Sun May 15 10:35:54 2016 UTC ()
Explicitly mention MP_TRAMPOLINE in these comments, so that NXR links them.


(maxv)
diff -r1.216 -r1.217 src/sys/arch/amd64/amd64/machdep.c
diff -r1.754 -r1.755 src/sys/arch/i386/i386/machdep.c
diff -r1.194 -r1.195 src/sys/arch/x86/x86/pmap.c

cvs diff -r1.216 -r1.217 src/sys/arch/amd64/amd64/machdep.c (switch to unified diff)

--- src/sys/arch/amd64/amd64/machdep.c 2016/05/12 06:45:16 1.216
+++ src/sys/arch/amd64/amd64/machdep.c 2016/05/15 10:35:54 1.217
@@ -1,2218 +1,2218 @@ @@ -1,2218 +1,2218 @@
1/* $NetBSD: machdep.c,v 1.216 2016/05/12 06:45:16 maxv Exp $ */ 1/* $NetBSD: machdep.c,v 1.217 2016/05/15 10:35:54 maxv Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 1996, 1997, 1998, 2000, 2006, 2007, 2008, 2011 4 * Copyright (c) 1996, 1997, 1998, 2000, 2006, 2007, 2008, 2011
5 * The NetBSD Foundation, Inc. 5 * The NetBSD Foundation, Inc.
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * This code is derived from software contributed to The NetBSD Foundation 8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Charles M. Hannum and by Jason R. Thorpe of the Numerical Aerospace 9 * by Charles M. Hannum and by Jason R. Thorpe of the Numerical Aerospace
10 * Simulation Facility, NASA Ames Research Center. 10 * Simulation Facility, NASA Ames Research Center.
11 * 11 *
12 * This code is derived from software contributed to The NetBSD Foundation 12 * This code is derived from software contributed to The NetBSD Foundation
13 * by Coyote Point Systems, Inc. which was written under contract to Coyote 13 * by Coyote Point Systems, Inc. which was written under contract to Coyote
14 * Point by Jed Davis and Devon O'Dell. 14 * Point by Jed Davis and Devon O'Dell.
15 * 15 *
16 * Redistribution and use in source and binary forms, with or without 16 * Redistribution and use in source and binary forms, with or without
17 * modification, are permitted provided that the following conditions 17 * modification, are permitted provided that the following conditions
18 * are met: 18 * are met:
19 * 1. Redistributions of source code must retain the above copyright 19 * 1. Redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer. 20 * notice, this list of conditions and the following disclaimer.
21 * 2. Redistributions in binary form must reproduce the above copyright 21 * 2. Redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the 22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution. 23 * documentation and/or other materials provided with the distribution.
24 * 24 *
25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 25 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE. 35 * POSSIBILITY OF SUCH DAMAGE.
36 */ 36 */
37 37
38/* 38/*
39 * Copyright (c) 2006 Mathieu Ropert <mro@adviseo.fr> 39 * Copyright (c) 2006 Mathieu Ropert <mro@adviseo.fr>
40 * 40 *
41 * Permission to use, copy, modify, and distribute this software for any 41 * Permission to use, copy, modify, and distribute this software for any
42 * purpose with or without fee is hereby granted, provided that the above 42 * purpose with or without fee is hereby granted, provided that the above
43 * copyright notice and this permission notice appear in all copies. 43 * copyright notice and this permission notice appear in all copies.
44 * 44 *
45 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 45 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
46 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 46 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
47 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 47 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
48 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 48 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
49 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 49 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
50 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 50 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
51 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 51 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
52 */ 52 */
53 53
54/* 54/*
55 * Copyright (c) 2007 Manuel Bouyer. 55 * Copyright (c) 2007 Manuel Bouyer.
56 * 56 *
57 * Redistribution and use in source and binary forms, with or without 57 * Redistribution and use in source and binary forms, with or without
58 * modification, are permitted provided that the following conditions 58 * modification, are permitted provided that the following conditions
59 * are met: 59 * are met:
60 * 1. Redistributions of source code must retain the above copyright 60 * 1. Redistributions of source code must retain the above copyright
61 * notice, this list of conditions and the following disclaimer. 61 * notice, this list of conditions and the following disclaimer.
62 * 2. Redistributions in binary form must reproduce the above copyright 62 * 2. Redistributions in binary form must reproduce the above copyright
63 * notice, this list of conditions and the following disclaimer in the 63 * notice, this list of conditions and the following disclaimer in the
64 * documentation and/or other materials provided with the distribution. 64 * documentation and/or other materials provided with the distribution.
65 * 65 *
66 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 66 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
67 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 67 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
68 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 68 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
69 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 69 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
70 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 70 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
71 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 71 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
72 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 72 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
73 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 73 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
74 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 74 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
75 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 75 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
76 * 76 *
77 */ 77 */
78 78
79/*- 79/*-
80 * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. 80 * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
81 * All rights reserved. 81 * All rights reserved.
82 * 82 *
83 * This code is derived from software contributed to Berkeley by 83 * This code is derived from software contributed to Berkeley by
84 * William Jolitz. 84 * William Jolitz.
85 * 85 *
86 * Redistribution and use in source and binary forms, with or without 86 * Redistribution and use in source and binary forms, with or without
87 * modification, are permitted provided that the following conditions 87 * modification, are permitted provided that the following conditions
88 * are met: 88 * are met:
89 * 1. Redistributions of source code must retain the above copyright 89 * 1. Redistributions of source code must retain the above copyright
90 * notice, this list of conditions and the following disclaimer. 90 * notice, this list of conditions and the following disclaimer.
91 * 2. Redistributions in binary form must reproduce the above copyright 91 * 2. Redistributions in binary form must reproduce the above copyright
92 * notice, this list of conditions and the following disclaimer in the 92 * notice, this list of conditions and the following disclaimer in the
93 * documentation and/or other materials provided with the distribution. 93 * documentation and/or other materials provided with the distribution.
94 * 3. Neither the name of the University nor the names of its contributors 94 * 3. Neither the name of the University nor the names of its contributors
95 * may be used to endorse or promote products derived from this software 95 * may be used to endorse or promote products derived from this software
96 * without specific prior written permission. 96 * without specific prior written permission.
97 * 97 *
98 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 98 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
99 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 99 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
100 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 100 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
101 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 101 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
102 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 102 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
103 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 103 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
104 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 104 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
105 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 105 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
106 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 106 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
107 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 107 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
108 * SUCH DAMAGE. 108 * SUCH DAMAGE.
109 * 109 *
110 * @(#)machdep.c 7.4 (Berkeley) 6/3/91 110 * @(#)machdep.c 7.4 (Berkeley) 6/3/91
111 */ 111 */
112 112
113#include <sys/cdefs.h> 113#include <sys/cdefs.h>
114__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.216 2016/05/12 06:45:16 maxv Exp $"); 114__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.217 2016/05/15 10:35:54 maxv Exp $");
115 115
116/* #define XENDEBUG_LOW */ 116/* #define XENDEBUG_LOW */
117 117
118#include "opt_modular.h" 118#include "opt_modular.h"
119#include "opt_user_ldt.h" 119#include "opt_user_ldt.h"
120#include "opt_ddb.h" 120#include "opt_ddb.h"
121#include "opt_kgdb.h" 121#include "opt_kgdb.h"
122#include "opt_cpureset_delay.h" 122#include "opt_cpureset_delay.h"
123#include "opt_mtrr.h" 123#include "opt_mtrr.h"
124#include "opt_realmem.h" 124#include "opt_realmem.h"
125#include "opt_xen.h" 125#include "opt_xen.h"
126#ifndef XEN 126#ifndef XEN
127#include "opt_physmem.h" 127#include "opt_physmem.h"
128#endif 128#endif
129#include "isa.h" 129#include "isa.h"
130#include "pci.h" 130#include "pci.h"
131 131
132#include <sys/param.h> 132#include <sys/param.h>
133#include <sys/systm.h> 133#include <sys/systm.h>
134#include <sys/signal.h> 134#include <sys/signal.h>
135#include <sys/signalvar.h> 135#include <sys/signalvar.h>
136#include <sys/kernel.h> 136#include <sys/kernel.h>
137#include <sys/cpu.h> 137#include <sys/cpu.h>
138#include <sys/exec.h> 138#include <sys/exec.h>
139#include <sys/exec_aout.h> /* for MID_* */ 139#include <sys/exec_aout.h> /* for MID_* */
140#include <sys/reboot.h> 140#include <sys/reboot.h>
141#include <sys/conf.h> 141#include <sys/conf.h>
142#include <sys/mbuf.h> 142#include <sys/mbuf.h>
143#include <sys/msgbuf.h> 143#include <sys/msgbuf.h>
144#include <sys/mount.h> 144#include <sys/mount.h>
145#include <sys/core.h> 145#include <sys/core.h>
146#include <sys/kcore.h> 146#include <sys/kcore.h>
147#include <sys/ucontext.h> 147#include <sys/ucontext.h>
148#include <machine/kcore.h> 148#include <machine/kcore.h>
149#include <sys/ras.h> 149#include <sys/ras.h>
150#include <sys/syscallargs.h> 150#include <sys/syscallargs.h>
151#include <sys/ksyms.h> 151#include <sys/ksyms.h>
152#include <sys/device.h> 152#include <sys/device.h>
153#include <sys/lwp.h> 153#include <sys/lwp.h>
154#include <sys/proc.h> 154#include <sys/proc.h>
155 155
156#ifdef KGDB 156#ifdef KGDB
157#include <sys/kgdb.h> 157#include <sys/kgdb.h>
158#endif 158#endif
159 159
160#include <dev/cons.h> 160#include <dev/cons.h>
161#include <dev/mm.h> 161#include <dev/mm.h>
162 162
163#include <uvm/uvm.h> 163#include <uvm/uvm.h>
164#include <uvm/uvm_page.h> 164#include <uvm/uvm_page.h>
165 165
166#include <sys/sysctl.h> 166#include <sys/sysctl.h>
167 167
168#include <machine/cpu.h> 168#include <machine/cpu.h>
169#include <machine/cpufunc.h> 169#include <machine/cpufunc.h>
170#include <machine/gdt.h> 170#include <machine/gdt.h>
171#include <machine/intr.h> 171#include <machine/intr.h>
172#include <machine/pio.h> 172#include <machine/pio.h>
173#include <machine/psl.h> 173#include <machine/psl.h>
174#include <machine/reg.h> 174#include <machine/reg.h>
175#include <machine/specialreg.h> 175#include <machine/specialreg.h>
176#include <machine/bootinfo.h> 176#include <machine/bootinfo.h>
177#include <x86/fpu.h> 177#include <x86/fpu.h>
178#include <machine/mtrr.h> 178#include <machine/mtrr.h>
179#include <machine/mpbiosvar.h> 179#include <machine/mpbiosvar.h>
180 180
181#include <x86/cputypes.h> 181#include <x86/cputypes.h>
182#include <x86/cpuvar.h> 182#include <x86/cpuvar.h>
183#include <x86/machdep.h> 183#include <x86/machdep.h>
184 184
185#include <x86/x86/tsc.h> 185#include <x86/x86/tsc.h>
186 186
187#include <dev/isa/isareg.h> 187#include <dev/isa/isareg.h>
188#include <machine/isa_machdep.h> 188#include <machine/isa_machdep.h>
189#include <dev/ic/i8042reg.h> 189#include <dev/ic/i8042reg.h>
190 190
191#ifdef XEN 191#ifdef XEN
192#include <xen/xen.h> 192#include <xen/xen.h>
193#include <xen/hypervisor.h> 193#include <xen/hypervisor.h>
194#include <xen/evtchn.h> 194#include <xen/evtchn.h>
195#endif 195#endif
196 196
197#ifdef DDB 197#ifdef DDB
198#include <machine/db_machdep.h> 198#include <machine/db_machdep.h>
199#include <ddb/db_extern.h> 199#include <ddb/db_extern.h>
200#include <ddb/db_output.h> 200#include <ddb/db_output.h>
201#include <ddb/db_interface.h> 201#include <ddb/db_interface.h>
202#endif 202#endif
203 203
204#include "acpica.h" 204#include "acpica.h"
205 205
206#if NACPICA > 0 206#if NACPICA > 0
207#include <dev/acpi/acpivar.h> 207#include <dev/acpi/acpivar.h>
208#define ACPI_MACHDEP_PRIVATE 208#define ACPI_MACHDEP_PRIVATE
209#include <machine/acpi_machdep.h> 209#include <machine/acpi_machdep.h>
210#endif 210#endif
211 211
212#include "isa.h" 212#include "isa.h"
213#include "isadma.h" 213#include "isadma.h"
214#include "ksyms.h" 214#include "ksyms.h"
215 215
216/* the following is used externally (sysctl_hw) */ 216/* the following is used externally (sysctl_hw) */
217char machine[] = "amd64"; /* CPU "architecture" */ 217char machine[] = "amd64"; /* CPU "architecture" */
218char machine_arch[] = "x86_64"; /* machine == machine_arch */ 218char machine_arch[] = "x86_64"; /* machine == machine_arch */
219 219
220extern struct bi_devmatch *x86_alldisks; 220extern struct bi_devmatch *x86_alldisks;
221extern int x86_ndisks; 221extern int x86_ndisks;
222 222
223#ifdef CPURESET_DELAY 223#ifdef CPURESET_DELAY
224int cpureset_delay = CPURESET_DELAY; 224int cpureset_delay = CPURESET_DELAY;
225#else 225#else
226int cpureset_delay = 2000; /* default to 2s */ 226int cpureset_delay = 2000; /* default to 2s */
227#endif 227#endif
228 228
229int cpu_class = CPUCLASS_686; 229int cpu_class = CPUCLASS_686;
230 230
231#ifdef MTRR 231#ifdef MTRR
232struct mtrr_funcs *mtrr_funcs; 232struct mtrr_funcs *mtrr_funcs;
233#endif 233#endif
234 234
235uint64_t dumpmem_low; 235uint64_t dumpmem_low;
236uint64_t dumpmem_high; 236uint64_t dumpmem_high;
237int cpu_class; 237int cpu_class;
238int use_pae; 238int use_pae;
239 239
240#ifndef NO_SPARSE_DUMP 240#ifndef NO_SPARSE_DUMP
241int sparse_dump = 1; 241int sparse_dump = 1;
242 242
243paddr_t max_paddr = 0; 243paddr_t max_paddr = 0;
244unsigned char *sparse_dump_physmap; 244unsigned char *sparse_dump_physmap;
245#endif 245#endif
246 246
247char *dump_headerbuf, *dump_headerbuf_ptr; 247char *dump_headerbuf, *dump_headerbuf_ptr;
248#define dump_headerbuf_size PAGE_SIZE 248#define dump_headerbuf_size PAGE_SIZE
249#define dump_headerbuf_end (dump_headerbuf + dump_headerbuf_size) 249#define dump_headerbuf_end (dump_headerbuf + dump_headerbuf_size)
250#define dump_headerbuf_avail (dump_headerbuf_end - dump_headerbuf_ptr) 250#define dump_headerbuf_avail (dump_headerbuf_end - dump_headerbuf_ptr)
251daddr_t dump_header_blkno; 251daddr_t dump_header_blkno;
252 252
253size_t dump_nmemsegs; 253size_t dump_nmemsegs;
254size_t dump_npages; 254size_t dump_npages;
255size_t dump_header_size; 255size_t dump_header_size;
256size_t dump_totalbytesleft; 256size_t dump_totalbytesleft;
257 257
258vaddr_t msgbuf_vaddr; 258vaddr_t msgbuf_vaddr;
259paddr_t msgbuf_paddr; 259paddr_t msgbuf_paddr;
260 260
261struct { 261struct {
262 paddr_t paddr; 262 paddr_t paddr;
263 psize_t sz; 263 psize_t sz;
264} msgbuf_p_seg[VM_PHYSSEG_MAX]; 264} msgbuf_p_seg[VM_PHYSSEG_MAX];
265unsigned int msgbuf_p_cnt = 0; 265unsigned int msgbuf_p_cnt = 0;
266  266
267vaddr_t idt_vaddr; 267vaddr_t idt_vaddr;
268paddr_t idt_paddr; 268paddr_t idt_paddr;
269 269
270vaddr_t lo32_vaddr; 270vaddr_t lo32_vaddr;
271paddr_t lo32_paddr; 271paddr_t lo32_paddr;
272 272
273vaddr_t module_start, module_end; 273vaddr_t module_start, module_end;
274static struct vm_map module_map_store; 274static struct vm_map module_map_store;
275extern struct vm_map *module_map; 275extern struct vm_map *module_map;
276vaddr_t kern_end; 276vaddr_t kern_end;
277 277
278struct vm_map *phys_map = NULL; 278struct vm_map *phys_map = NULL;
279 279
280extern paddr_t avail_start, avail_end; 280extern paddr_t avail_start, avail_end;
281#ifdef XEN 281#ifdef XEN
282extern paddr_t pmap_pa_start, pmap_pa_end; 282extern paddr_t pmap_pa_start, pmap_pa_end;
283#endif 283#endif
284 284
285#ifndef XEN 285#ifndef XEN
286void (*delay_func)(unsigned int) = i8254_delay; 286void (*delay_func)(unsigned int) = i8254_delay;
287void (*initclock_func)(void) = i8254_initclocks; 287void (*initclock_func)(void) = i8254_initclocks;
288#else /* XEN */ 288#else /* XEN */
289void (*delay_func)(unsigned int) = xen_delay; 289void (*delay_func)(unsigned int) = xen_delay;
290void (*initclock_func)(void) = xen_initclocks; 290void (*initclock_func)(void) = xen_initclocks;
291#endif 291#endif
292 292
293 293
294/* 294/*
295 * Size of memory segments, before any memory is stolen. 295 * Size of memory segments, before any memory is stolen.
296 */ 296 */
297phys_ram_seg_t mem_clusters[VM_PHYSSEG_MAX]; 297phys_ram_seg_t mem_clusters[VM_PHYSSEG_MAX];
298int mem_cluster_cnt; 298int mem_cluster_cnt;
299 299
300char x86_64_doubleflt_stack[4096]; 300char x86_64_doubleflt_stack[4096];
301 301
302int cpu_dump(void); 302int cpu_dump(void);
303int cpu_dumpsize(void); 303int cpu_dumpsize(void);
304u_long cpu_dump_mempagecnt(void); 304u_long cpu_dump_mempagecnt(void);
305void dodumpsys(void); 305void dodumpsys(void);
306void dumpsys(void); 306void dumpsys(void);
307 307
308extern int time_adjusted; /* XXX no common header */ 308extern int time_adjusted; /* XXX no common header */
309 309
310void dump_misc_init(void); 310void dump_misc_init(void);
311void dump_seg_prep(void); 311void dump_seg_prep(void);
312int dump_seg_iter(int (*)(paddr_t, paddr_t)); 312int dump_seg_iter(int (*)(paddr_t, paddr_t));
313 313
314#ifndef NO_SPARSE_DUMP 314#ifndef NO_SPARSE_DUMP
315void sparse_dump_reset(void); 315void sparse_dump_reset(void);
316void sparse_dump_mark(void); 316void sparse_dump_mark(void);
317void cpu_dump_prep_sparse(void); 317void cpu_dump_prep_sparse(void);
318#endif 318#endif
319 319
320void dump_header_start(void); 320void dump_header_start(void);
321int dump_header_flush(void); 321int dump_header_flush(void);
322int dump_header_addbytes(const void*, size_t); 322int dump_header_addbytes(const void*, size_t);
323int dump_header_addseg(paddr_t, paddr_t); 323int dump_header_addseg(paddr_t, paddr_t);
324int dump_header_finish(void); 324int dump_header_finish(void);
325 325
326int dump_seg_count_range(paddr_t, paddr_t); 326int dump_seg_count_range(paddr_t, paddr_t);
327int dumpsys_seg(paddr_t, paddr_t); 327int dumpsys_seg(paddr_t, paddr_t);
328 328
329void init_x86_64(paddr_t); 329void init_x86_64(paddr_t);
330 330
331static int valid_user_selector(struct lwp *, uint64_t); 331static int valid_user_selector(struct lwp *, uint64_t);
332 332
333/* 333/*
334 * Machine-dependent startup code 334 * Machine-dependent startup code
335 */ 335 */
336void 336void
337cpu_startup(void) 337cpu_startup(void)
338{ 338{
339 int x, y; 339 int x, y;
340 vaddr_t minaddr, maxaddr; 340 vaddr_t minaddr, maxaddr;
341 psize_t sz; 341 psize_t sz;
342 342
343 /* 343 /*
344 * For console drivers that require uvm and pmap to be initialized, 344 * For console drivers that require uvm and pmap to be initialized,
345 * we'll give them one more chance here... 345 * we'll give them one more chance here...
346 */ 346 */
347 consinit(); 347 consinit();
348 348
349 /* 349 /*
350 * Initialize error message buffer (et end of core). 350 * Initialize error message buffer (et end of core).
351 */ 351 */
352 if (msgbuf_p_cnt == 0) 352 if (msgbuf_p_cnt == 0)
353 panic("msgbuf paddr map has not been set up"); 353 panic("msgbuf paddr map has not been set up");
354 for (x = 0, sz = 0; x < msgbuf_p_cnt; sz += msgbuf_p_seg[x++].sz) 354 for (x = 0, sz = 0; x < msgbuf_p_cnt; sz += msgbuf_p_seg[x++].sz)
355 continue; 355 continue;
356 356
357 msgbuf_vaddr = uvm_km_alloc(kernel_map, sz, 0, 357 msgbuf_vaddr = uvm_km_alloc(kernel_map, sz, 0,
358 UVM_KMF_VAONLY); 358 UVM_KMF_VAONLY);
359 if (msgbuf_vaddr == 0) 359 if (msgbuf_vaddr == 0)
360 panic("failed to valloc msgbuf_vaddr"); 360 panic("failed to valloc msgbuf_vaddr");
361 361
362 /* msgbuf_paddr was init'd in pmap */ 362 /* msgbuf_paddr was init'd in pmap */
363 for (y = 0, sz = 0; y < msgbuf_p_cnt; y++) { 363 for (y = 0, sz = 0; y < msgbuf_p_cnt; y++) {
364 for (x = 0; x < btoc(msgbuf_p_seg[y].sz); x++, sz += PAGE_SIZE) 364 for (x = 0; x < btoc(msgbuf_p_seg[y].sz); x++, sz += PAGE_SIZE)
365 pmap_kenter_pa((vaddr_t)msgbuf_vaddr + sz, 365 pmap_kenter_pa((vaddr_t)msgbuf_vaddr + sz,
366 msgbuf_p_seg[y].paddr + x * PAGE_SIZE, 366 msgbuf_p_seg[y].paddr + x * PAGE_SIZE,
367 VM_PROT_READ | UVM_PROT_WRITE, 0); 367 VM_PROT_READ | UVM_PROT_WRITE, 0);
368 } 368 }
369 369
370 pmap_update(pmap_kernel()); 370 pmap_update(pmap_kernel());
371 371
372 initmsgbuf((void *)msgbuf_vaddr, round_page(sz)); 372 initmsgbuf((void *)msgbuf_vaddr, round_page(sz));
373 373
374 minaddr = 0; 374 minaddr = 0;
375 375
376 /* 376 /*
377 * Allocate a submap for physio 377 * Allocate a submap for physio
378 */ 378 */
379 phys_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr, 379 phys_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr,
380 VM_PHYS_SIZE, 0, false, NULL); 380 VM_PHYS_SIZE, 0, false, NULL);
381 381
382 uvm_map_setup(&module_map_store, module_start, module_end, 0); 382 uvm_map_setup(&module_map_store, module_start, module_end, 0);
383 module_map_store.pmap = pmap_kernel(); 383 module_map_store.pmap = pmap_kernel();
384 module_map = &module_map_store; 384 module_map = &module_map_store;
385 385
386 /* Say hello. */ 386 /* Say hello. */
387 banner(); 387 banner();
388 388
389#if NISA > 0 || NPCI > 0 389#if NISA > 0 || NPCI > 0
390 /* Safe for i/o port / memory space allocation to use malloc now. */ 390 /* Safe for i/o port / memory space allocation to use malloc now. */
391 x86_bus_space_mallocok(); 391 x86_bus_space_mallocok();
392#endif 392#endif
393 393
394 gdt_init(); 394 gdt_init();
395 x86_64_proc0_tss_ldt_init(); 395 x86_64_proc0_tss_ldt_init();
396 396
397 cpu_init_tss(&cpu_info_primary); 397 cpu_init_tss(&cpu_info_primary);
398#if !defined(XEN) 398#if !defined(XEN)
399 ltr(cpu_info_primary.ci_tss_sel); 399 ltr(cpu_info_primary.ci_tss_sel);
400#endif /* !defined(XEN) */ 400#endif /* !defined(XEN) */
401 401
402 x86_startup(); 402 x86_startup();
403} 403}
404 404
405#ifdef XEN 405#ifdef XEN
406/* used in assembly */ 406/* used in assembly */
407void hypervisor_callback(void); 407void hypervisor_callback(void);
408void failsafe_callback(void); 408void failsafe_callback(void);
409void x86_64_switch_context(struct pcb *); 409void x86_64_switch_context(struct pcb *);
410void x86_64_tls_switch(struct lwp *); 410void x86_64_tls_switch(struct lwp *);
411 411
412void 412void
413x86_64_switch_context(struct pcb *new) 413x86_64_switch_context(struct pcb *new)
414{ 414{
415 HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL), new->pcb_rsp0); 415 HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL), new->pcb_rsp0);
416 struct physdev_op physop; 416 struct physdev_op physop;
417 physop.cmd = PHYSDEVOP_SET_IOPL; 417 physop.cmd = PHYSDEVOP_SET_IOPL;
418 physop.u.set_iopl.iopl = new->pcb_iopl; 418 physop.u.set_iopl.iopl = new->pcb_iopl;
419 HYPERVISOR_physdev_op(&physop); 419 HYPERVISOR_physdev_op(&physop);
420} 420}
421 421
422void 422void
423x86_64_tls_switch(struct lwp *l) 423x86_64_tls_switch(struct lwp *l)
424{ 424{
425 struct cpu_info *ci = curcpu(); 425 struct cpu_info *ci = curcpu();
426 struct pcb *pcb = lwp_getpcb(l); 426 struct pcb *pcb = lwp_getpcb(l);
427 struct trapframe *tf = l->l_md.md_regs; 427 struct trapframe *tf = l->l_md.md_regs;
428 428
429 /* 429 /*
430 * Raise the IPL to IPL_HIGH. 430 * Raise the IPL to IPL_HIGH.
431 * FPU IPIs can alter the LWP's saved cr0. Dropping the priority 431 * FPU IPIs can alter the LWP's saved cr0. Dropping the priority
432 * is deferred until mi_switch(), when cpu_switchto() returns. 432 * is deferred until mi_switch(), when cpu_switchto() returns.
433 */ 433 */
434 (void)splhigh(); 434 (void)splhigh();
435 /* 435 /*
436 * If our floating point registers are on a different CPU, 436 * If our floating point registers are on a different CPU,
437 * set CR0_TS so we'll trap rather than reuse bogus state. 437 * set CR0_TS so we'll trap rather than reuse bogus state.
438 */ 438 */
439 if (l != ci->ci_fpcurlwp) { 439 if (l != ci->ci_fpcurlwp) {
440 HYPERVISOR_fpu_taskswitch(1); 440 HYPERVISOR_fpu_taskswitch(1);
441 } 441 }
442 442
443 /* Update TLS segment pointers */ 443 /* Update TLS segment pointers */
444 if (pcb->pcb_flags & PCB_COMPAT32) { 444 if (pcb->pcb_flags & PCB_COMPAT32) {
445 update_descriptor(&curcpu()->ci_gdt[GUFS_SEL], &pcb->pcb_fs); 445 update_descriptor(&curcpu()->ci_gdt[GUFS_SEL], &pcb->pcb_fs);
446 update_descriptor(&curcpu()->ci_gdt[GUGS_SEL], &pcb->pcb_gs); 446 update_descriptor(&curcpu()->ci_gdt[GUGS_SEL], &pcb->pcb_gs);
447 setfs(tf->tf_fs); 447 setfs(tf->tf_fs);
448 HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, tf->tf_gs); 448 HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, tf->tf_gs);
449 } else { 449 } else {
450 setfs(0); 450 setfs(0);
451 HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, 0); 451 HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, 0);
452 HYPERVISOR_set_segment_base(SEGBASE_FS, pcb->pcb_fs); 452 HYPERVISOR_set_segment_base(SEGBASE_FS, pcb->pcb_fs);
453 HYPERVISOR_set_segment_base(SEGBASE_GS_USER, pcb->pcb_gs); 453 HYPERVISOR_set_segment_base(SEGBASE_GS_USER, pcb->pcb_gs);
454 } 454 }
455} 455}
456#endif /* XEN */ 456#endif /* XEN */
457 457
458/* 458/*
459 * Set up proc0's TSS and LDT. 459 * Set up proc0's TSS and LDT.
460 */ 460 */
461void 461void
462x86_64_proc0_tss_ldt_init(void) 462x86_64_proc0_tss_ldt_init(void)
463{ 463{
464 struct lwp *l = &lwp0; 464 struct lwp *l = &lwp0;
465 struct pcb *pcb = lwp_getpcb(l); 465 struct pcb *pcb = lwp_getpcb(l);
466 466
467 pcb->pcb_flags = 0; 467 pcb->pcb_flags = 0;
468 pcb->pcb_fs = 0; 468 pcb->pcb_fs = 0;
469 pcb->pcb_gs = 0; 469 pcb->pcb_gs = 0;
470 pcb->pcb_rsp0 = (uvm_lwp_getuarea(l) + USPACE - 16) & ~0xf; 470 pcb->pcb_rsp0 = (uvm_lwp_getuarea(l) + USPACE - 16) & ~0xf;
471 pcb->pcb_iopl = SEL_KPL; 471 pcb->pcb_iopl = SEL_KPL;
472 472
473 pmap_kernel()->pm_ldt_sel = GSYSSEL(GLDT_SEL, SEL_KPL); 473 pmap_kernel()->pm_ldt_sel = GSYSSEL(GLDT_SEL, SEL_KPL);
474 pcb->pcb_cr0 = rcr0() & ~CR0_TS; 474 pcb->pcb_cr0 = rcr0() & ~CR0_TS;
475 l->l_md.md_regs = (struct trapframe *)pcb->pcb_rsp0 - 1; 475 l->l_md.md_regs = (struct trapframe *)pcb->pcb_rsp0 - 1;
476 476
477#if !defined(XEN) 477#if !defined(XEN)
478 lldt(pmap_kernel()->pm_ldt_sel); 478 lldt(pmap_kernel()->pm_ldt_sel);
479#else 479#else
480 { 480 {
481 struct physdev_op physop; 481 struct physdev_op physop;
482 xen_set_ldt((vaddr_t) ldtstore, LDT_SIZE >> 3); 482 xen_set_ldt((vaddr_t) ldtstore, LDT_SIZE >> 3);
483 /* Reset TS bit and set kernel stack for interrupt handlers */ 483 /* Reset TS bit and set kernel stack for interrupt handlers */
484 HYPERVISOR_fpu_taskswitch(1); 484 HYPERVISOR_fpu_taskswitch(1);
485 HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL), pcb->pcb_rsp0); 485 HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL), pcb->pcb_rsp0);
486 physop.cmd = PHYSDEVOP_SET_IOPL; 486 physop.cmd = PHYSDEVOP_SET_IOPL;
487 physop.u.set_iopl.iopl = pcb->pcb_iopl; 487 physop.u.set_iopl.iopl = pcb->pcb_iopl;
488 HYPERVISOR_physdev_op(&physop); 488 HYPERVISOR_physdev_op(&physop);
489 } 489 }
490#endif /* XEN */ 490#endif /* XEN */
491} 491}
492 492
493/* 493/*
494 * Set up TSS and I/O bitmap. 494 * Set up TSS and I/O bitmap.
495 */ 495 */
496void 496void
497cpu_init_tss(struct cpu_info *ci) 497cpu_init_tss(struct cpu_info *ci)
498{ 498{
499 struct x86_64_tss *tss = &ci->ci_tss; 499 struct x86_64_tss *tss = &ci->ci_tss;
500 uintptr_t p; 500 uintptr_t p;
501 501
502 tss->tss_iobase = IOMAP_INVALOFF << 16; 502 tss->tss_iobase = IOMAP_INVALOFF << 16;
503 /* tss->tss_ist[0] is filled by cpu_intr_init */ 503 /* tss->tss_ist[0] is filled by cpu_intr_init */
504 504
505 /* double fault */ 505 /* double fault */
506 tss->tss_ist[1] = (uint64_t)x86_64_doubleflt_stack + PAGE_SIZE - 16; 506 tss->tss_ist[1] = (uint64_t)x86_64_doubleflt_stack + PAGE_SIZE - 16;
507 507
508 /* NMI */ 508 /* NMI */
509 p = uvm_km_alloc(kernel_map, PAGE_SIZE, 0, UVM_KMF_WIRED); 509 p = uvm_km_alloc(kernel_map, PAGE_SIZE, 0, UVM_KMF_WIRED);
510 tss->tss_ist[2] = p + PAGE_SIZE - 16; 510 tss->tss_ist[2] = p + PAGE_SIZE - 16;
511 ci->ci_tss_sel = tss_alloc(tss); 511 ci->ci_tss_sel = tss_alloc(tss);
512} 512}
513 513
514void 514void
515buildcontext(struct lwp *l, void *catcher, void *f) 515buildcontext(struct lwp *l, void *catcher, void *f)
516{ 516{
517 struct trapframe *tf = l->l_md.md_regs; 517 struct trapframe *tf = l->l_md.md_regs;
518 518
519 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL); 519 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
520 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL); 520 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
521 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL); 521 tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL);
522 tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL); 522 tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL);
523 523
524 tf->tf_rip = (uint64_t)catcher; 524 tf->tf_rip = (uint64_t)catcher;
525 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL); 525 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
526 tf->tf_rflags &= ~PSL_CLEARSIG; 526 tf->tf_rflags &= ~PSL_CLEARSIG;
527 tf->tf_rsp = (uint64_t)f; 527 tf->tf_rsp = (uint64_t)f;
528 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL); 528 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
529 529
530 /* Ensure FP state is sane */ 530 /* Ensure FP state is sane */
531 fpu_save_area_reset(l); 531 fpu_save_area_reset(l);
532} 532}
533 533
534void 534void
535sendsig_sigcontext(const ksiginfo_t *ksi, const sigset_t *mask) 535sendsig_sigcontext(const ksiginfo_t *ksi, const sigset_t *mask)
536{ 536{
537 537
538 printf("sendsig_sigcontext: illegal\n"); 538 printf("sendsig_sigcontext: illegal\n");
539 sigexit(curlwp, SIGILL); 539 sigexit(curlwp, SIGILL);
540} 540}
541 541
542void 542void
543sendsig_siginfo(const ksiginfo_t *ksi, const sigset_t *mask) 543sendsig_siginfo(const ksiginfo_t *ksi, const sigset_t *mask)
544{ 544{
545 struct lwp *l = curlwp; 545 struct lwp *l = curlwp;
546 struct proc *p = l->l_proc; 546 struct proc *p = l->l_proc;
547 struct sigacts *ps = p->p_sigacts; 547 struct sigacts *ps = p->p_sigacts;
548 int onstack, error; 548 int onstack, error;
549 int sig = ksi->ksi_signo; 549 int sig = ksi->ksi_signo;
550 struct sigframe_siginfo *fp, frame; 550 struct sigframe_siginfo *fp, frame;
551 sig_t catcher = SIGACTION(p, sig).sa_handler; 551 sig_t catcher = SIGACTION(p, sig).sa_handler;
552 struct trapframe *tf = l->l_md.md_regs; 552 struct trapframe *tf = l->l_md.md_regs;
553 char *sp; 553 char *sp;
554 554
555 KASSERT(mutex_owned(p->p_lock)); 555 KASSERT(mutex_owned(p->p_lock));
556 556
557 /* Do we need to jump onto the signal stack? */ 557 /* Do we need to jump onto the signal stack? */
558 onstack = 558 onstack =
559 (l->l_sigstk.ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 && 559 (l->l_sigstk.ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 &&
560 (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0; 560 (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
561 561
562 /* Allocate space for the signal handler context. */ 562 /* Allocate space for the signal handler context. */
563 if (onstack) 563 if (onstack)
564 sp = ((char *)l->l_sigstk.ss_sp + l->l_sigstk.ss_size); 564 sp = ((char *)l->l_sigstk.ss_sp + l->l_sigstk.ss_size);
565 else 565 else
566 /* AMD64 ABI 128-bytes "red zone". */ 566 /* AMD64 ABI 128-bytes "red zone". */
567 sp = (char *)tf->tf_rsp - 128; 567 sp = (char *)tf->tf_rsp - 128;
568 568
569 sp -= sizeof(struct sigframe_siginfo); 569 sp -= sizeof(struct sigframe_siginfo);
570 /* Round down the stackpointer to a multiple of 16 for the ABI. */ 570 /* Round down the stackpointer to a multiple of 16 for the ABI. */
571 fp = (struct sigframe_siginfo *)(((unsigned long)sp & ~15) - 8); 571 fp = (struct sigframe_siginfo *)(((unsigned long)sp & ~15) - 8);
572 572
573 frame.sf_ra = (uint64_t)ps->sa_sigdesc[sig].sd_tramp; 573 frame.sf_ra = (uint64_t)ps->sa_sigdesc[sig].sd_tramp;
574 frame.sf_si._info = ksi->ksi_info; 574 frame.sf_si._info = ksi->ksi_info;
575 frame.sf_uc.uc_flags = _UC_SIGMASK; 575 frame.sf_uc.uc_flags = _UC_SIGMASK;
576 frame.sf_uc.uc_sigmask = *mask; 576 frame.sf_uc.uc_sigmask = *mask;
577 frame.sf_uc.uc_link = l->l_ctxlink; 577 frame.sf_uc.uc_link = l->l_ctxlink;
578 frame.sf_uc.uc_flags |= (l->l_sigstk.ss_flags & SS_ONSTACK) 578 frame.sf_uc.uc_flags |= (l->l_sigstk.ss_flags & SS_ONSTACK)
579 ? _UC_SETSTACK : _UC_CLRSTACK; 579 ? _UC_SETSTACK : _UC_CLRSTACK;
580 memset(&frame.sf_uc.uc_stack, 0, sizeof(frame.sf_uc.uc_stack)); 580 memset(&frame.sf_uc.uc_stack, 0, sizeof(frame.sf_uc.uc_stack));
581 sendsig_reset(l, sig); 581 sendsig_reset(l, sig);
582 582
583 mutex_exit(p->p_lock); 583 mutex_exit(p->p_lock);
584 cpu_getmcontext(l, &frame.sf_uc.uc_mcontext, &frame.sf_uc.uc_flags); 584 cpu_getmcontext(l, &frame.sf_uc.uc_mcontext, &frame.sf_uc.uc_flags);
585 /* Copyout all the fp regs, the signal handler might expect them. */ 585 /* Copyout all the fp regs, the signal handler might expect them. */
586 error = copyout(&frame, fp, sizeof frame); 586 error = copyout(&frame, fp, sizeof frame);
587 mutex_enter(p->p_lock); 587 mutex_enter(p->p_lock);
588 588
589 if (error != 0) { 589 if (error != 0) {
590 /* 590 /*
591 * Process has trashed its stack; give it an illegal 591 * Process has trashed its stack; give it an illegal
592 * instruction to halt it in its tracks. 592 * instruction to halt it in its tracks.
593 */ 593 */
594 sigexit(l, SIGILL); 594 sigexit(l, SIGILL);
595 /* NOTREACHED */ 595 /* NOTREACHED */
596 } 596 }
597 597
598 buildcontext(l, catcher, fp); 598 buildcontext(l, catcher, fp);
599 599
600 tf->tf_rdi = sig; 600 tf->tf_rdi = sig;
601 tf->tf_rsi = (uint64_t)&fp->sf_si; 601 tf->tf_rsi = (uint64_t)&fp->sf_si;
602 tf->tf_rdx = tf->tf_r15 = (uint64_t)&fp->sf_uc; 602 tf->tf_rdx = tf->tf_r15 = (uint64_t)&fp->sf_uc;
603 603
604 /* Remember that we're now on the signal stack. */ 604 /* Remember that we're now on the signal stack. */
605 if (onstack) 605 if (onstack)
606 l->l_sigstk.ss_flags |= SS_ONSTACK; 606 l->l_sigstk.ss_flags |= SS_ONSTACK;
607 607
608 if ((vaddr_t)catcher >= VM_MAXUSER_ADDRESS) { 608 if ((vaddr_t)catcher >= VM_MAXUSER_ADDRESS) {
609 /* 609 /*
610 * process has given an invalid address for the 610 * process has given an invalid address for the
611 * handler. Stop it, but do not do it before so 611 * handler. Stop it, but do not do it before so
612 * we can return the right info to userland (or in core dump) 612 * we can return the right info to userland (or in core dump)
613 */ 613 */
614 sigexit(l, SIGILL); 614 sigexit(l, SIGILL);
615 /* NOTREACHED */ 615 /* NOTREACHED */
616 } 616 }
617} 617}
618 618
619struct pcb dumppcb; 619struct pcb dumppcb;
620 620
621void 621void
622cpu_reboot(int howto, char *bootstr) 622cpu_reboot(int howto, char *bootstr)
623{ 623{
624 static bool syncdone = false; 624 static bool syncdone = false;
625 int s = IPL_NONE; 625 int s = IPL_NONE;
626 __USE(s); /* ugly otherwise */ 626 __USE(s); /* ugly otherwise */
627 627
628 if (cold) { 628 if (cold) {
629 howto |= RB_HALT; 629 howto |= RB_HALT;
630 goto haltsys; 630 goto haltsys;
631 } 631 }
632 632
633 boothowto = howto; 633 boothowto = howto;
634 634
635 /* i386 maybe_dump() */ 635 /* i386 maybe_dump() */
636 636
637 /* 637 /*
638 * If we've panic'd, don't make the situation potentially 638 * If we've panic'd, don't make the situation potentially
639 * worse by syncing or unmounting the file systems. 639 * worse by syncing or unmounting the file systems.
640 */ 640 */
641 if ((howto & RB_NOSYNC) == 0 && panicstr == NULL) { 641 if ((howto & RB_NOSYNC) == 0 && panicstr == NULL) {
642 if (!syncdone) { 642 if (!syncdone) {
643 syncdone = true; 643 syncdone = true;
644 /* XXX used to force unmount as well, here */ 644 /* XXX used to force unmount as well, here */
645 vfs_sync_all(curlwp); 645 vfs_sync_all(curlwp);
646 /* 646 /*
647 * If we've been adjusting the clock, the todr 647 * If we've been adjusting the clock, the todr
648 * will be out of synch; adjust it now. 648 * will be out of synch; adjust it now.
649 * 649 *
650 * XXX used to do this after unmounting all 650 * XXX used to do this after unmounting all
651 * filesystems with vfs_shutdown(). 651 * filesystems with vfs_shutdown().
652 */ 652 */
653 if (time_adjusted != 0) 653 if (time_adjusted != 0)
654 resettodr(); 654 resettodr();
655 } 655 }
656 656
657 while (vfs_unmountall1(curlwp, false, false) || 657 while (vfs_unmountall1(curlwp, false, false) ||
658 config_detach_all(boothowto) || 658 config_detach_all(boothowto) ||
659 vfs_unmount_forceone(curlwp)) 659 vfs_unmount_forceone(curlwp))
660 ; /* do nothing */ 660 ; /* do nothing */
661 } else 661 } else
662 suspendsched(); 662 suspendsched();
663 663
664 pmf_system_shutdown(boothowto); 664 pmf_system_shutdown(boothowto);
665 665
666 /* Disable interrupts. */ 666 /* Disable interrupts. */
667 s = splhigh(); 667 s = splhigh();
668 668
669 /* Do a dump if requested. */ 669 /* Do a dump if requested. */
670 if ((howto & (RB_DUMP | RB_HALT)) == RB_DUMP) 670 if ((howto & (RB_DUMP | RB_HALT)) == RB_DUMP)
671 dumpsys(); 671 dumpsys();
672 672
673haltsys: 673haltsys:
674 doshutdownhooks(); 674 doshutdownhooks();
675 675
676 if ((howto & RB_POWERDOWN) == RB_POWERDOWN) { 676 if ((howto & RB_POWERDOWN) == RB_POWERDOWN) {
677#if NACPICA > 0 677#if NACPICA > 0
678 if (s != IPL_NONE) 678 if (s != IPL_NONE)
679 splx(s); 679 splx(s);
680 680
681 acpi_enter_sleep_state(ACPI_STATE_S5); 681 acpi_enter_sleep_state(ACPI_STATE_S5);
682#endif 682#endif
683#ifdef XEN 683#ifdef XEN
684 HYPERVISOR_shutdown(); 684 HYPERVISOR_shutdown();
685#endif /* XEN */ 685#endif /* XEN */
686 } 686 }
687 687
688 cpu_broadcast_halt(); 688 cpu_broadcast_halt();
689 689
690 if (howto & RB_HALT) { 690 if (howto & RB_HALT) {
691#if NACPICA > 0 691#if NACPICA > 0
692 acpi_disable(); 692 acpi_disable();
693#endif 693#endif
694 694
695 printf("\n"); 695 printf("\n");
696 printf("The operating system has halted.\n"); 696 printf("The operating system has halted.\n");
697 printf("Please press any key to reboot.\n\n"); 697 printf("Please press any key to reboot.\n\n");
698 cnpollc(1); /* for proper keyboard command handling */ 698 cnpollc(1); /* for proper keyboard command handling */
699 if (cngetc() == 0) { 699 if (cngetc() == 0) {
700 /* no console attached, so just hlt */ 700 /* no console attached, so just hlt */
701 printf("No keyboard - cannot reboot after all.\n"); 701 printf("No keyboard - cannot reboot after all.\n");
702 for(;;) { 702 for(;;) {
703 x86_hlt(); 703 x86_hlt();
704 } 704 }
705 } 705 }
706 cnpollc(0); 706 cnpollc(0);
707 } 707 }
708 708
709 printf("rebooting...\n"); 709 printf("rebooting...\n");
710 if (cpureset_delay > 0) 710 if (cpureset_delay > 0)
711 delay(cpureset_delay * 1000); 711 delay(cpureset_delay * 1000);
712 cpu_reset(); 712 cpu_reset();
713 for(;;) ; 713 for(;;) ;
714 /*NOTREACHED*/ 714 /*NOTREACHED*/
715} 715}
716 716
717/* 717/*
718 * XXXfvdl share dumpcode. 718 * XXXfvdl share dumpcode.
719 */ 719 */
720 720
721/* 721/*
722 * Perform assorted dump-related initialization tasks. Assumes that 722 * Perform assorted dump-related initialization tasks. Assumes that
723 * the maximum physical memory address will not increase afterwards. 723 * the maximum physical memory address will not increase afterwards.
724 */ 724 */
725void 725void
726dump_misc_init(void) 726dump_misc_init(void)
727{ 727{
728#ifndef NO_SPARSE_DUMP 728#ifndef NO_SPARSE_DUMP
729 int i; 729 int i;
730#endif 730#endif
731 731
732 if (dump_headerbuf != NULL) 732 if (dump_headerbuf != NULL)
733 return; /* already called */ 733 return; /* already called */
734 734
735#ifndef NO_SPARSE_DUMP 735#ifndef NO_SPARSE_DUMP
736 for (i = 0; i < mem_cluster_cnt; ++i) { 736 for (i = 0; i < mem_cluster_cnt; ++i) {
737 paddr_t top = mem_clusters[i].start + mem_clusters[i].size; 737 paddr_t top = mem_clusters[i].start + mem_clusters[i].size;
738 if (max_paddr < top) 738 if (max_paddr < top)
739 max_paddr = top; 739 max_paddr = top;
740 } 740 }
741#ifdef DEBUG 741#ifdef DEBUG
742 printf("dump_misc_init: max_paddr = 0x%lx\n", 742 printf("dump_misc_init: max_paddr = 0x%lx\n",
743 (unsigned long)max_paddr); 743 (unsigned long)max_paddr);
744#endif 744#endif
745 if (max_paddr == 0) { 745 if (max_paddr == 0) {
746 printf("Your machine does not initialize mem_clusters; " 746 printf("Your machine does not initialize mem_clusters; "
747 "sparse_dumps disabled\n"); 747 "sparse_dumps disabled\n");
748 sparse_dump = 0; 748 sparse_dump = 0;
749 } else { 749 } else {
750 sparse_dump_physmap = (void *)uvm_km_alloc(kernel_map, 750 sparse_dump_physmap = (void *)uvm_km_alloc(kernel_map,
751 roundup(max_paddr / (PAGE_SIZE * NBBY), PAGE_SIZE), 751 roundup(max_paddr / (PAGE_SIZE * NBBY), PAGE_SIZE),
752 PAGE_SIZE, UVM_KMF_WIRED|UVM_KMF_ZERO); 752 PAGE_SIZE, UVM_KMF_WIRED|UVM_KMF_ZERO);
753 } 753 }
754#endif 754#endif
755 dump_headerbuf = (void *)uvm_km_alloc(kernel_map, 755 dump_headerbuf = (void *)uvm_km_alloc(kernel_map,
756 dump_headerbuf_size, 756 dump_headerbuf_size,
757 PAGE_SIZE, UVM_KMF_WIRED|UVM_KMF_ZERO); 757 PAGE_SIZE, UVM_KMF_WIRED|UVM_KMF_ZERO);
758 /* XXXjld should check for failure here, disable dumps if so. */ 758 /* XXXjld should check for failure here, disable dumps if so. */
759} 759}
760 760
761#ifndef NO_SPARSE_DUMP 761#ifndef NO_SPARSE_DUMP
762/* 762/*
763 * Clear the set of pages to include in a sparse dump. 763 * Clear the set of pages to include in a sparse dump.
764 */ 764 */
765void 765void
766sparse_dump_reset(void) 766sparse_dump_reset(void)
767{ 767{
768 memset(sparse_dump_physmap, 0, 768 memset(sparse_dump_physmap, 0,
769 roundup(max_paddr / (PAGE_SIZE * NBBY), PAGE_SIZE)); 769 roundup(max_paddr / (PAGE_SIZE * NBBY), PAGE_SIZE));
770} 770}
771 771
772/* 772/*
773 * Include or exclude pages in a sparse dump. 773 * Include or exclude pages in a sparse dump.
774 */ 774 */
775void 775void
776sparse_dump_mark(void) 776sparse_dump_mark(void)
777{ 777{
778 paddr_t p, pstart, pend; 778 paddr_t p, pstart, pend;
779 struct vm_page *pg; 779 struct vm_page *pg;
780 int i; 780 int i;
781 781
782 /* 782 /*
783 * Mark all memory pages, then unmark pages that are uninteresting. 783 * Mark all memory pages, then unmark pages that are uninteresting.
784 * Dereferenceing pg->uobject might crash again if another CPU 784 * Dereferenceing pg->uobject might crash again if another CPU
785 * frees the object out from under us, but we can't lock anything 785 * frees the object out from under us, but we can't lock anything
786 * so it's a risk we have to take. 786 * so it's a risk we have to take.
787 */ 787 */
788 788
789 for (i = 0; i < mem_cluster_cnt; ++i) { 789 for (i = 0; i < mem_cluster_cnt; ++i) {
790 pstart = mem_clusters[i].start / PAGE_SIZE; 790 pstart = mem_clusters[i].start / PAGE_SIZE;
791 pend = pstart + mem_clusters[i].size / PAGE_SIZE; 791 pend = pstart + mem_clusters[i].size / PAGE_SIZE;
792 792
793 for (p = pstart; p < pend; p++) { 793 for (p = pstart; p < pend; p++) {
794 setbit(sparse_dump_physmap, p); 794 setbit(sparse_dump_physmap, p);
795 } 795 }
796 } 796 }
797 for (i = 0; i < vm_nphysseg; i++) { 797 for (i = 0; i < vm_nphysseg; i++) {
798 struct vm_physseg *seg = VM_PHYSMEM_PTR(i); 798 struct vm_physseg *seg = VM_PHYSMEM_PTR(i);
799 799
800 for (pg = seg->pgs; pg < seg->lastpg; pg++) { 800 for (pg = seg->pgs; pg < seg->lastpg; pg++) {
801 if (pg->uanon || (pg->pqflags & PQ_FREE) || 801 if (pg->uanon || (pg->pqflags & PQ_FREE) ||
802 (pg->uobject && pg->uobject->pgops)) { 802 (pg->uobject && pg->uobject->pgops)) {
803 p = VM_PAGE_TO_PHYS(pg) / PAGE_SIZE; 803 p = VM_PAGE_TO_PHYS(pg) / PAGE_SIZE;
804 clrbit(sparse_dump_physmap, p); 804 clrbit(sparse_dump_physmap, p);
805 } 805 }
806 } 806 }
807 } 807 }
808} 808}
809 809
810/* 810/*
811 * Machine-dependently decides on the contents of a sparse dump, using 811 * Machine-dependently decides on the contents of a sparse dump, using
812 * the above. 812 * the above.
813 */ 813 */
814void 814void
815cpu_dump_prep_sparse(void) 815cpu_dump_prep_sparse(void)
816{ 816{
817 sparse_dump_reset(); 817 sparse_dump_reset();
818 /* XXX could the alternate recursive page table be skipped? */ 818 /* XXX could the alternate recursive page table be skipped? */
819 sparse_dump_mark(); 819 sparse_dump_mark();
820 /* Memory for I/O buffers could be unmarked here, for example. */ 820 /* Memory for I/O buffers could be unmarked here, for example. */
821 /* The kernel text could also be unmarked, but gdb would be upset. */ 821 /* The kernel text could also be unmarked, but gdb would be upset. */
822} 822}
823#endif 823#endif
824 824
825/* 825/*
826 * Abstractly iterate over the collection of memory segments to be 826 * Abstractly iterate over the collection of memory segments to be
827 * dumped; the callback lacks the customary environment-pointer 827 * dumped; the callback lacks the customary environment-pointer
828 * argument because none of the current users really need one. 828 * argument because none of the current users really need one.
829 * 829 *
830 * To be used only after dump_seg_prep is called to set things up. 830 * To be used only after dump_seg_prep is called to set things up.
831 */ 831 */
832int 832int
833dump_seg_iter(int (*callback)(paddr_t, paddr_t)) 833dump_seg_iter(int (*callback)(paddr_t, paddr_t))
834{ 834{
835 int error, i; 835 int error, i;
836 836
837#define CALLBACK(start,size) do { \ 837#define CALLBACK(start,size) do { \
838 error = callback(start,size); \ 838 error = callback(start,size); \
839 if (error) \ 839 if (error) \
840 return error; \ 840 return error; \
841} while(0) 841} while(0)
842 842
843 for (i = 0; i < mem_cluster_cnt; ++i) { 843 for (i = 0; i < mem_cluster_cnt; ++i) {
844#ifndef NO_SPARSE_DUMP 844#ifndef NO_SPARSE_DUMP
845 /* 845 /*
846 * The bitmap is scanned within each memory segment, 846 * The bitmap is scanned within each memory segment,
847 * rather than over its entire domain, in case any 847 * rather than over its entire domain, in case any
848 * pages outside of the memory proper have been mapped 848 * pages outside of the memory proper have been mapped
849 * into kva; they might be devices that wouldn't 849 * into kva; they might be devices that wouldn't
850 * appreciate being arbitrarily read, and including 850 * appreciate being arbitrarily read, and including
851 * them could also break the assumption that a sparse 851 * them could also break the assumption that a sparse
852 * dump will always be smaller than a full one. 852 * dump will always be smaller than a full one.
853 */ 853 */
854 if (sparse_dump && sparse_dump_physmap) { 854 if (sparse_dump && sparse_dump_physmap) {
855 paddr_t p, start, end; 855 paddr_t p, start, end;
856 int lastset; 856 int lastset;
857 857
858 start = mem_clusters[i].start; 858 start = mem_clusters[i].start;
859 end = start + mem_clusters[i].size; 859 end = start + mem_clusters[i].size;
860 start = rounddown(start, PAGE_SIZE); /* unnecessary? */ 860 start = rounddown(start, PAGE_SIZE); /* unnecessary? */
861 lastset = 0; 861 lastset = 0;
862 for (p = start; p < end; p += PAGE_SIZE) { 862 for (p = start; p < end; p += PAGE_SIZE) {
863 int thisset = isset(sparse_dump_physmap, 863 int thisset = isset(sparse_dump_physmap,
864 p/PAGE_SIZE); 864 p/PAGE_SIZE);
865 865
866 if (!lastset && thisset) 866 if (!lastset && thisset)
867 start = p; 867 start = p;
868 if (lastset && !thisset) 868 if (lastset && !thisset)
869 CALLBACK(start, p - start); 869 CALLBACK(start, p - start);
870 lastset = thisset; 870 lastset = thisset;
871 } 871 }
872 if (lastset) 872 if (lastset)
873 CALLBACK(start, p - start); 873 CALLBACK(start, p - start);
874 } else 874 } else
875#endif 875#endif
876 CALLBACK(mem_clusters[i].start, mem_clusters[i].size); 876 CALLBACK(mem_clusters[i].start, mem_clusters[i].size);
877 } 877 }
878 return 0; 878 return 0;
879#undef CALLBACK 879#undef CALLBACK
880} 880}
881 881
882/* 882/*
883 * Prepare for an impending core dump: decide what's being dumped and 883 * Prepare for an impending core dump: decide what's being dumped and
884 * how much space it will take up. 884 * how much space it will take up.
885 */ 885 */
886void 886void
887dump_seg_prep(void) 887dump_seg_prep(void)
888{ 888{
889#ifndef NO_SPARSE_DUMP 889#ifndef NO_SPARSE_DUMP
890 if (sparse_dump && sparse_dump_physmap) 890 if (sparse_dump && sparse_dump_physmap)
891 cpu_dump_prep_sparse(); 891 cpu_dump_prep_sparse();
892#endif 892#endif
893 893
894 dump_nmemsegs = 0; 894 dump_nmemsegs = 0;
895 dump_npages = 0; 895 dump_npages = 0;
896 dump_seg_iter(dump_seg_count_range); 896 dump_seg_iter(dump_seg_count_range);
897 897
898 dump_header_size = ALIGN(sizeof(kcore_seg_t)) + 898 dump_header_size = ALIGN(sizeof(kcore_seg_t)) +
899 ALIGN(sizeof(cpu_kcore_hdr_t)) + 899 ALIGN(sizeof(cpu_kcore_hdr_t)) +
900 ALIGN(dump_nmemsegs * sizeof(phys_ram_seg_t)); 900 ALIGN(dump_nmemsegs * sizeof(phys_ram_seg_t));
901 dump_header_size = roundup(dump_header_size, dbtob(1)); 901 dump_header_size = roundup(dump_header_size, dbtob(1));
902 902
903 /* 903 /*
904 * savecore(8) will read this to decide how many pages to 904 * savecore(8) will read this to decide how many pages to
905 * copy, and cpu_dumpconf has already used the pessimistic 905 * copy, and cpu_dumpconf has already used the pessimistic
906 * value to set dumplo, so it's time to tell the truth. 906 * value to set dumplo, so it's time to tell the truth.
907 */ 907 */
908 dumpsize = dump_npages; /* XXX could these just be one variable? */ 908 dumpsize = dump_npages; /* XXX could these just be one variable? */
909} 909}
910 910
911int 911int
912dump_seg_count_range(paddr_t start, paddr_t size) 912dump_seg_count_range(paddr_t start, paddr_t size)
913{ 913{
914 ++dump_nmemsegs; 914 ++dump_nmemsegs;
915 dump_npages += size / PAGE_SIZE; 915 dump_npages += size / PAGE_SIZE;
916 return 0; 916 return 0;
917} 917}
918 918
919/* 919/*
920 * A sparse dump's header may be rather large, due to the number of 920 * A sparse dump's header may be rather large, due to the number of
921 * "segments" emitted. These routines manage a simple output buffer, 921 * "segments" emitted. These routines manage a simple output buffer,
922 * so that the header can be written to disk incrementally. 922 * so that the header can be written to disk incrementally.
923 */ 923 */
924void 924void
925dump_header_start(void) 925dump_header_start(void)
926{ 926{
927 dump_headerbuf_ptr = dump_headerbuf; 927 dump_headerbuf_ptr = dump_headerbuf;
928 dump_header_blkno = dumplo; 928 dump_header_blkno = dumplo;
929} 929}
930 930
931int 931int
932dump_header_flush(void) 932dump_header_flush(void)
933{ 933{
934 const struct bdevsw *bdev; 934 const struct bdevsw *bdev;
935 size_t to_write; 935 size_t to_write;
936 int error; 936 int error;
937 937
938 bdev = bdevsw_lookup(dumpdev); 938 bdev = bdevsw_lookup(dumpdev);
939 to_write = roundup(dump_headerbuf_ptr - dump_headerbuf, dbtob(1)); 939 to_write = roundup(dump_headerbuf_ptr - dump_headerbuf, dbtob(1));
940 error = bdev->d_dump(dumpdev, dump_header_blkno, 940 error = bdev->d_dump(dumpdev, dump_header_blkno,
941 dump_headerbuf, to_write); 941 dump_headerbuf, to_write);
942 dump_header_blkno += btodb(to_write); 942 dump_header_blkno += btodb(to_write);
943 dump_headerbuf_ptr = dump_headerbuf; 943 dump_headerbuf_ptr = dump_headerbuf;
944 return error; 944 return error;
945} 945}
946 946
947int 947int
948dump_header_addbytes(const void* vptr, size_t n) 948dump_header_addbytes(const void* vptr, size_t n)
949{ 949{
950 const char* ptr = vptr; 950 const char* ptr = vptr;
951 int error; 951 int error;
952 952
953 while (n > dump_headerbuf_avail) { 953 while (n > dump_headerbuf_avail) {
954 memcpy(dump_headerbuf_ptr, ptr, dump_headerbuf_avail); 954 memcpy(dump_headerbuf_ptr, ptr, dump_headerbuf_avail);
955 ptr += dump_headerbuf_avail; 955 ptr += dump_headerbuf_avail;
956 n -= dump_headerbuf_avail; 956 n -= dump_headerbuf_avail;
957 dump_headerbuf_ptr = dump_headerbuf_end; 957 dump_headerbuf_ptr = dump_headerbuf_end;
958 error = dump_header_flush(); 958 error = dump_header_flush();
959 if (error) 959 if (error)
960 return error; 960 return error;
961 } 961 }
962 memcpy(dump_headerbuf_ptr, ptr, n); 962 memcpy(dump_headerbuf_ptr, ptr, n);
963 dump_headerbuf_ptr += n; 963 dump_headerbuf_ptr += n;
964 964
965 return 0; 965 return 0;
966} 966}
967 967
968int 968int
969dump_header_addseg(paddr_t start, paddr_t size) 969dump_header_addseg(paddr_t start, paddr_t size)
970{ 970{
971 phys_ram_seg_t seg = { start, size }; 971 phys_ram_seg_t seg = { start, size };
972 972
973 return dump_header_addbytes(&seg, sizeof(seg)); 973 return dump_header_addbytes(&seg, sizeof(seg));
974} 974}
975 975
976int 976int
977dump_header_finish(void) 977dump_header_finish(void)
978{ 978{
979 memset(dump_headerbuf_ptr, 0, dump_headerbuf_avail); 979 memset(dump_headerbuf_ptr, 0, dump_headerbuf_avail);
980 return dump_header_flush(); 980 return dump_header_flush();
981} 981}
982 982
983 983
984/* 984/*
985 * These variables are needed by /sbin/savecore 985 * These variables are needed by /sbin/savecore
986 */ 986 */
987uint32_t dumpmag = 0x8fca0101; /* magic number */ 987uint32_t dumpmag = 0x8fca0101; /* magic number */
988int dumpsize = 0; /* pages */ 988int dumpsize = 0; /* pages */
989long dumplo = 0; /* blocks */ 989long dumplo = 0; /* blocks */
990 990
991/* 991/*
992 * cpu_dumpsize: calculate size of machine-dependent kernel core dump headers 992 * cpu_dumpsize: calculate size of machine-dependent kernel core dump headers
993 * for a full (non-sparse) dump. 993 * for a full (non-sparse) dump.
994 */ 994 */
995int 995int
996cpu_dumpsize(void) 996cpu_dumpsize(void)
997{ 997{
998 int size; 998 int size;
999 999
1000 size = ALIGN(sizeof(kcore_seg_t)) + ALIGN(sizeof(cpu_kcore_hdr_t)) + 1000 size = ALIGN(sizeof(kcore_seg_t)) + ALIGN(sizeof(cpu_kcore_hdr_t)) +
1001 ALIGN(mem_cluster_cnt * sizeof(phys_ram_seg_t)); 1001 ALIGN(mem_cluster_cnt * sizeof(phys_ram_seg_t));
1002 if (roundup(size, dbtob(1)) != dbtob(1)) 1002 if (roundup(size, dbtob(1)) != dbtob(1))
1003 return (-1); 1003 return (-1);
1004 1004
1005 return (1); 1005 return (1);
1006} 1006}
1007 1007
1008/* 1008/*
1009 * cpu_dump_mempagecnt: calculate the size of RAM (in pages) to be dumped 1009 * cpu_dump_mempagecnt: calculate the size of RAM (in pages) to be dumped
1010 * for a full (non-sparse) dump. 1010 * for a full (non-sparse) dump.
1011 */ 1011 */
1012u_long 1012u_long
1013cpu_dump_mempagecnt(void) 1013cpu_dump_mempagecnt(void)
1014{ 1014{
1015 u_long i, n; 1015 u_long i, n;
1016 1016
1017 n = 0; 1017 n = 0;
1018 for (i = 0; i < mem_cluster_cnt; i++) 1018 for (i = 0; i < mem_cluster_cnt; i++)
1019 n += atop(mem_clusters[i].size); 1019 n += atop(mem_clusters[i].size);
1020 return (n); 1020 return (n);
1021} 1021}
1022 1022
1023/* 1023/*
1024 * cpu_dump: dump the machine-dependent kernel core dump headers. 1024 * cpu_dump: dump the machine-dependent kernel core dump headers.
1025 */ 1025 */
1026int 1026int
1027cpu_dump(void) 1027cpu_dump(void)
1028{ 1028{
1029 kcore_seg_t seg; 1029 kcore_seg_t seg;
1030 cpu_kcore_hdr_t cpuhdr; 1030 cpu_kcore_hdr_t cpuhdr;
1031 const struct bdevsw *bdev; 1031 const struct bdevsw *bdev;
1032 1032
1033 bdev = bdevsw_lookup(dumpdev); 1033 bdev = bdevsw_lookup(dumpdev);
1034 if (bdev == NULL) 1034 if (bdev == NULL)
1035 return (ENXIO); 1035 return (ENXIO);
1036 1036
1037 /* 1037 /*
1038 * Generate a segment header. 1038 * Generate a segment header.
1039 */ 1039 */
1040 CORE_SETMAGIC(seg, KCORE_MAGIC, MID_MACHINE, CORE_CPU); 1040 CORE_SETMAGIC(seg, KCORE_MAGIC, MID_MACHINE, CORE_CPU);
1041 seg.c_size = dump_header_size - ALIGN(sizeof(seg)); 1041 seg.c_size = dump_header_size - ALIGN(sizeof(seg));
1042 (void)dump_header_addbytes(&seg, ALIGN(sizeof(seg))); 1042 (void)dump_header_addbytes(&seg, ALIGN(sizeof(seg)));
1043 1043
1044 /* 1044 /*
1045 * Add the machine-dependent header info. 1045 * Add the machine-dependent header info.
1046 */ 1046 */
1047 cpuhdr.ptdpaddr = PDPpaddr; 1047 cpuhdr.ptdpaddr = PDPpaddr;
1048 cpuhdr.nmemsegs = dump_nmemsegs; 1048 cpuhdr.nmemsegs = dump_nmemsegs;
1049 (void)dump_header_addbytes(&cpuhdr, ALIGN(sizeof(cpuhdr))); 1049 (void)dump_header_addbytes(&cpuhdr, ALIGN(sizeof(cpuhdr)));
1050 1050
1051 /* 1051 /*
1052 * Write out the memory segment descriptors. 1052 * Write out the memory segment descriptors.
1053 */ 1053 */
1054 return dump_seg_iter(dump_header_addseg); 1054 return dump_seg_iter(dump_header_addseg);
1055} 1055}
1056 1056
1057/* 1057/*
1058 * Doadump comes here after turning off memory management and 1058 * Doadump comes here after turning off memory management and
1059 * getting on the dump stack, either when called above, or by 1059 * getting on the dump stack, either when called above, or by
1060 * the auto-restart code. 1060 * the auto-restart code.
1061 */ 1061 */
1062#define BYTES_PER_DUMP PAGE_SIZE /* must be a multiple of pagesize XXX small */ 1062#define BYTES_PER_DUMP PAGE_SIZE /* must be a multiple of pagesize XXX small */
1063static vaddr_t dumpspace; 1063static vaddr_t dumpspace;
1064 1064
1065vaddr_t 1065vaddr_t
1066reserve_dumppages(vaddr_t p) 1066reserve_dumppages(vaddr_t p)
1067{ 1067{
1068 1068
1069 dumpspace = p; 1069 dumpspace = p;
1070 return (p + BYTES_PER_DUMP); 1070 return (p + BYTES_PER_DUMP);
1071} 1071}
1072 1072
1073int 1073int
1074dumpsys_seg(paddr_t maddr, paddr_t bytes) 1074dumpsys_seg(paddr_t maddr, paddr_t bytes)
1075{ 1075{
1076 u_long i, m, n; 1076 u_long i, m, n;
1077 daddr_t blkno; 1077 daddr_t blkno;
1078 const struct bdevsw *bdev; 1078 const struct bdevsw *bdev;
1079 int (*dump)(dev_t, daddr_t, void *, size_t); 1079 int (*dump)(dev_t, daddr_t, void *, size_t);
1080 int error; 1080 int error;
1081 1081
1082 if (dumpdev == NODEV) 1082 if (dumpdev == NODEV)
1083 return ENODEV; 1083 return ENODEV;
1084 bdev = bdevsw_lookup(dumpdev); 1084 bdev = bdevsw_lookup(dumpdev);
1085 if (bdev == NULL || bdev->d_psize == NULL) 1085 if (bdev == NULL || bdev->d_psize == NULL)
1086 return ENODEV; 1086 return ENODEV;
1087 1087
1088 dump = bdev->d_dump; 1088 dump = bdev->d_dump;
1089 1089
1090 blkno = dump_header_blkno; 1090 blkno = dump_header_blkno;
1091 for (i = 0; i < bytes; i += n, dump_totalbytesleft -= n) { 1091 for (i = 0; i < bytes; i += n, dump_totalbytesleft -= n) {
1092 /* Print out how many MBs we have left to go. */ 1092 /* Print out how many MBs we have left to go. */
1093 if ((dump_totalbytesleft % (1024*1024)) == 0) 1093 if ((dump_totalbytesleft % (1024*1024)) == 0)
1094 printf_nolog("%lu ", (unsigned long) 1094 printf_nolog("%lu ", (unsigned long)
1095 (dump_totalbytesleft / (1024 * 1024))); 1095 (dump_totalbytesleft / (1024 * 1024)));
1096 1096
1097 /* Limit size for next transfer. */ 1097 /* Limit size for next transfer. */
1098 n = bytes - i; 1098 n = bytes - i;
1099 if (n > BYTES_PER_DUMP) 1099 if (n > BYTES_PER_DUMP)
1100 n = BYTES_PER_DUMP; 1100 n = BYTES_PER_DUMP;
1101 1101
1102 for (m = 0; m < n; m += NBPG) 1102 for (m = 0; m < n; m += NBPG)
1103 pmap_kenter_pa(dumpspace + m, maddr + m, 1103 pmap_kenter_pa(dumpspace + m, maddr + m,
1104 VM_PROT_READ, 0); 1104 VM_PROT_READ, 0);
1105 pmap_update(pmap_kernel()); 1105 pmap_update(pmap_kernel());
1106 1106
1107 error = (*dump)(dumpdev, blkno, (void *)dumpspace, n); 1107 error = (*dump)(dumpdev, blkno, (void *)dumpspace, n);
1108 pmap_kremove_local(dumpspace, n); 1108 pmap_kremove_local(dumpspace, n);
1109 if (error) 1109 if (error)
1110 return error; 1110 return error;
1111 maddr += n; 1111 maddr += n;
1112 blkno += btodb(n); /* XXX? */ 1112 blkno += btodb(n); /* XXX? */
1113 1113
1114#if 0 /* XXX this doesn't work. grr. */ 1114#if 0 /* XXX this doesn't work. grr. */
1115 /* operator aborting dump? */ 1115 /* operator aborting dump? */
1116 if (sget() != NULL) 1116 if (sget() != NULL)
1117 return EINTR; 1117 return EINTR;
1118#endif 1118#endif
1119 } 1119 }
1120 dump_header_blkno = blkno; 1120 dump_header_blkno = blkno;
1121 1121
1122 return 0; 1122 return 0;
1123} 1123}
1124 1124
1125void 1125void
1126dodumpsys(void) 1126dodumpsys(void)
1127{ 1127{
1128 const struct bdevsw *bdev; 1128 const struct bdevsw *bdev;
1129 int dumpend, psize; 1129 int dumpend, psize;
1130 int error; 1130 int error;
1131 1131
1132 if (dumpdev == NODEV) 1132 if (dumpdev == NODEV)
1133 return; 1133 return;
1134 1134
1135 bdev = bdevsw_lookup(dumpdev); 1135 bdev = bdevsw_lookup(dumpdev);
1136 if (bdev == NULL || bdev->d_psize == NULL) 1136 if (bdev == NULL || bdev->d_psize == NULL)
1137 return; 1137 return;
1138 /* 1138 /*
1139 * For dumps during autoconfiguration, 1139 * For dumps during autoconfiguration,
1140 * if dump device has already configured... 1140 * if dump device has already configured...
1141 */ 1141 */
1142 if (dumpsize == 0) 1142 if (dumpsize == 0)
1143 cpu_dumpconf(); 1143 cpu_dumpconf();
1144 1144
1145 printf("\ndumping to dev %llu,%llu (offset=%ld, size=%d):", 1145 printf("\ndumping to dev %llu,%llu (offset=%ld, size=%d):",
1146 (unsigned long long)major(dumpdev), 1146 (unsigned long long)major(dumpdev),
1147 (unsigned long long)minor(dumpdev), dumplo, dumpsize); 1147 (unsigned long long)minor(dumpdev), dumplo, dumpsize);
1148 1148
1149 if (dumplo <= 0 || dumpsize <= 0) { 1149 if (dumplo <= 0 || dumpsize <= 0) {
1150 printf(" not possible\n"); 1150 printf(" not possible\n");
1151 return; 1151 return;
1152 } 1152 }
1153 1153
1154 psize = bdev_size(dumpdev); 1154 psize = bdev_size(dumpdev);
1155 printf("\ndump "); 1155 printf("\ndump ");
1156 if (psize == -1) { 1156 if (psize == -1) {
1157 printf("area unavailable\n"); 1157 printf("area unavailable\n");
1158 return; 1158 return;
1159 } 1159 }
1160 1160
1161#if 0 /* XXX this doesn't work. grr. */ 1161#if 0 /* XXX this doesn't work. grr. */
1162 /* toss any characters present prior to dump */ 1162 /* toss any characters present prior to dump */
1163 while (sget() != NULL); /*syscons and pccons differ */ 1163 while (sget() != NULL); /*syscons and pccons differ */
1164#endif 1164#endif
1165 1165
1166 dump_seg_prep(); 1166 dump_seg_prep();
1167 dumpend = dumplo + btodb(dump_header_size) + ctod(dump_npages); 1167 dumpend = dumplo + btodb(dump_header_size) + ctod(dump_npages);
1168 if (dumpend > psize) { 1168 if (dumpend > psize) {
1169 printf("failed: insufficient space (%d < %d)\n", 1169 printf("failed: insufficient space (%d < %d)\n",
1170 psize, dumpend); 1170 psize, dumpend);
1171 goto failed; 1171 goto failed;
1172 } 1172 }
1173 1173
1174 dump_header_start(); 1174 dump_header_start();
1175 if ((error = cpu_dump()) != 0) 1175 if ((error = cpu_dump()) != 0)
1176 goto err; 1176 goto err;
1177 if ((error = dump_header_finish()) != 0) 1177 if ((error = dump_header_finish()) != 0)
1178 goto err; 1178 goto err;
1179 1179
1180 if (dump_header_blkno != dumplo + btodb(dump_header_size)) { 1180 if (dump_header_blkno != dumplo + btodb(dump_header_size)) {
1181 printf("BAD header size (%ld [written] != %ld [expected])\n", 1181 printf("BAD header size (%ld [written] != %ld [expected])\n",
1182 (long)(dump_header_blkno - dumplo), 1182 (long)(dump_header_blkno - dumplo),
1183 (long)btodb(dump_header_size)); 1183 (long)btodb(dump_header_size));
1184 goto failed; 1184 goto failed;
1185 } 1185 }
1186 1186
1187 dump_totalbytesleft = roundup(ptoa(dump_npages), BYTES_PER_DUMP); 1187 dump_totalbytesleft = roundup(ptoa(dump_npages), BYTES_PER_DUMP);
1188 error = dump_seg_iter(dumpsys_seg); 1188 error = dump_seg_iter(dumpsys_seg);
1189 1189
1190 if (error == 0 && dump_header_blkno != dumpend) { 1190 if (error == 0 && dump_header_blkno != dumpend) {
1191 printf("BAD dump size (%ld [written] != %ld [expected])\n", 1191 printf("BAD dump size (%ld [written] != %ld [expected])\n",
1192 (long)(dumpend - dumplo), 1192 (long)(dumpend - dumplo),
1193 (long)(dump_header_blkno - dumplo)); 1193 (long)(dump_header_blkno - dumplo));
1194 goto failed; 1194 goto failed;
1195 } 1195 }
1196 1196
1197err: 1197err:
1198 switch (error) { 1198 switch (error) {
1199 1199
1200 case ENXIO: 1200 case ENXIO:
1201 printf("device bad\n"); 1201 printf("device bad\n");
1202 break; 1202 break;
1203 1203
1204 case EFAULT: 1204 case EFAULT:
1205 printf("device not ready\n"); 1205 printf("device not ready\n");
1206 break; 1206 break;
1207 1207
1208 case EINVAL: 1208 case EINVAL:
1209 printf("area improper\n"); 1209 printf("area improper\n");
1210 break; 1210 break;
1211 1211
1212 case EIO: 1212 case EIO:
1213 printf("i/o error\n"); 1213 printf("i/o error\n");
1214 break; 1214 break;
1215 1215
1216 case EINTR: 1216 case EINTR:
1217 printf("aborted from console\n"); 1217 printf("aborted from console\n");
1218 break; 1218 break;
1219 1219
1220 case 0: 1220 case 0:
1221 printf("succeeded\n"); 1221 printf("succeeded\n");
1222 break; 1222 break;
1223 1223
1224 default: 1224 default:
1225 printf("error %d\n", error); 1225 printf("error %d\n", error);
1226 break; 1226 break;
1227 } 1227 }
1228failed: 1228failed:
1229 printf("\n\n"); 1229 printf("\n\n");
1230 delay(5000000); /* 5 seconds */ 1230 delay(5000000); /* 5 seconds */
1231} 1231}
1232 1232
1233/* 1233/*
1234 * This is called by main to set dumplo and dumpsize. 1234 * This is called by main to set dumplo and dumpsize.
1235 * Dumps always skip the first PAGE_SIZE of disk space 1235 * Dumps always skip the first PAGE_SIZE of disk space
1236 * in case there might be a disk label stored there. 1236 * in case there might be a disk label stored there.
1237 * If there is extra space, put dump at the end to 1237 * If there is extra space, put dump at the end to
1238 * reduce the chance that swapping trashes it. 1238 * reduce the chance that swapping trashes it.
1239 * 1239 *
1240 * Sparse dumps can't placed as close to the end as possible, because 1240 * Sparse dumps can't placed as close to the end as possible, because
1241 * savecore(8) has to know where to start reading in the dump device 1241 * savecore(8) has to know where to start reading in the dump device
1242 * before it has access to any of the crashed system's state. 1242 * before it has access to any of the crashed system's state.
1243 * 1243 *
1244 * Note also that a sparse dump will never be larger than a full one: 1244 * Note also that a sparse dump will never be larger than a full one:
1245 * in order to add a phys_ram_seg_t to the header, at least one page 1245 * in order to add a phys_ram_seg_t to the header, at least one page
1246 * must be removed. 1246 * must be removed.
1247 */ 1247 */
1248void 1248void
1249cpu_dumpconf(void) 1249cpu_dumpconf(void)
1250{ 1250{
1251 int nblks, dumpblks; /* size of dump area */ 1251 int nblks, dumpblks; /* size of dump area */
1252 1252
1253 if (dumpdev == NODEV) 1253 if (dumpdev == NODEV)
1254 goto bad; 1254 goto bad;
1255 nblks = bdev_size(dumpdev); 1255 nblks = bdev_size(dumpdev);
1256 if (nblks <= ctod(1)) 1256 if (nblks <= ctod(1))
1257 goto bad; 1257 goto bad;
1258 1258
1259 dumpblks = cpu_dumpsize(); 1259 dumpblks = cpu_dumpsize();
1260 if (dumpblks < 0) 1260 if (dumpblks < 0)
1261 goto bad; 1261 goto bad;
1262 1262
1263 /* dumpsize is in page units, and doesn't include headers. */ 1263 /* dumpsize is in page units, and doesn't include headers. */
1264 dumpsize = cpu_dump_mempagecnt(); 1264 dumpsize = cpu_dump_mempagecnt();
1265 1265
1266 dumpblks += ctod(dumpsize); 1266 dumpblks += ctod(dumpsize);
1267 1267
1268 /* If dump won't fit (incl. room for possible label), punt. */ 1268 /* If dump won't fit (incl. room for possible label), punt. */
1269 if (dumpblks > (nblks - ctod(1))) { 1269 if (dumpblks > (nblks - ctod(1))) {
1270#ifndef NO_SPARSE_DUMP 1270#ifndef NO_SPARSE_DUMP
1271 /* A sparse dump might (and hopefully will) fit. */ 1271 /* A sparse dump might (and hopefully will) fit. */
1272 dumplo = ctod(1); 1272 dumplo = ctod(1);
1273#else 1273#else
1274 /* But if we're not configured for that, punt. */ 1274 /* But if we're not configured for that, punt. */
1275 goto bad; 1275 goto bad;
1276#endif 1276#endif
1277 } else { 1277 } else {
1278 /* Put dump at end of partition */ 1278 /* Put dump at end of partition */
1279 dumplo = nblks - dumpblks; 1279 dumplo = nblks - dumpblks;
1280 } 1280 }
1281 1281
1282 1282
1283 /* Now that we've decided this will work, init ancillary stuff. */ 1283 /* Now that we've decided this will work, init ancillary stuff. */
1284 dump_misc_init(); 1284 dump_misc_init();
1285 return; 1285 return;
1286 1286
1287 bad: 1287 bad:
1288 dumpsize = 0; 1288 dumpsize = 0;
1289} 1289}
1290 1290
1291/* 1291/*
1292 * Clear registers on exec 1292 * Clear registers on exec
1293 */ 1293 */
1294void 1294void
1295setregs(struct lwp *l, struct exec_package *pack, vaddr_t stack) 1295setregs(struct lwp *l, struct exec_package *pack, vaddr_t stack)
1296{ 1296{
1297 struct pcb *pcb = lwp_getpcb(l); 1297 struct pcb *pcb = lwp_getpcb(l);
1298 struct trapframe *tf; 1298 struct trapframe *tf;
1299 1299
1300#ifdef USER_LDT 1300#ifdef USER_LDT
1301 pmap_ldt_cleanup(l); 1301 pmap_ldt_cleanup(l);
1302#endif 1302#endif
1303 1303
1304 fpu_save_area_clear(l, pack->ep_osversion >= 699002600 1304 fpu_save_area_clear(l, pack->ep_osversion >= 699002600
1305 ? __NetBSD_NPXCW__ : __NetBSD_COMPAT_NPXCW__); 1305 ? __NetBSD_NPXCW__ : __NetBSD_COMPAT_NPXCW__);
1306 pcb->pcb_flags = 0; 1306 pcb->pcb_flags = 0;
1307 1307
1308 l->l_proc->p_flag &= ~PK_32; 1308 l->l_proc->p_flag &= ~PK_32;
1309 1309
1310 tf = l->l_md.md_regs; 1310 tf = l->l_md.md_regs;
1311 tf->tf_ds = LSEL(LUDATA_SEL, SEL_UPL); 1311 tf->tf_ds = LSEL(LUDATA_SEL, SEL_UPL);
1312 tf->tf_es = LSEL(LUDATA_SEL, SEL_UPL); 1312 tf->tf_es = LSEL(LUDATA_SEL, SEL_UPL);
1313 cpu_fsgs_zero(l); 1313 cpu_fsgs_zero(l);
1314 tf->tf_rdi = 0; 1314 tf->tf_rdi = 0;
1315 tf->tf_rsi = 0; 1315 tf->tf_rsi = 0;
1316 tf->tf_rbp = 0; 1316 tf->tf_rbp = 0;
1317 tf->tf_rbx = l->l_proc->p_psstrp; 1317 tf->tf_rbx = l->l_proc->p_psstrp;
1318 tf->tf_rdx = 0; 1318 tf->tf_rdx = 0;
1319 tf->tf_rcx = 0; 1319 tf->tf_rcx = 0;
1320 tf->tf_rax = 0; 1320 tf->tf_rax = 0;
1321 tf->tf_rip = pack->ep_entry; 1321 tf->tf_rip = pack->ep_entry;
1322 tf->tf_cs = LSEL(LUCODE_SEL, SEL_UPL); 1322 tf->tf_cs = LSEL(LUCODE_SEL, SEL_UPL);
1323 tf->tf_rflags = PSL_USERSET; 1323 tf->tf_rflags = PSL_USERSET;
1324 tf->tf_rsp = stack; 1324 tf->tf_rsp = stack;
1325 tf->tf_ss = LSEL(LUDATA_SEL, SEL_UPL); 1325 tf->tf_ss = LSEL(LUDATA_SEL, SEL_UPL);
1326} 1326}
1327 1327
1328/* 1328/*
1329 * Initialize segments and descriptor tables 1329 * Initialize segments and descriptor tables
1330 */ 1330 */
1331 1331
1332#ifdef XEN 1332#ifdef XEN
1333struct trap_info *xen_idt; 1333struct trap_info *xen_idt;
1334int xen_idt_idx; 1334int xen_idt_idx;
1335#endif 1335#endif
1336char *ldtstore; 1336char *ldtstore;
1337char *gdtstore; 1337char *gdtstore;
1338 1338
1339void 1339void
1340setgate(struct gate_descriptor *gd, void *func, int ist, int type, int dpl, int sel) 1340setgate(struct gate_descriptor *gd, void *func, int ist, int type, int dpl, int sel)
1341{ 1341{
1342 1342
1343 kpreempt_disable(); 1343 kpreempt_disable();
1344 pmap_changeprot_local(idt_vaddr, VM_PROT_READ|VM_PROT_WRITE); 1344 pmap_changeprot_local(idt_vaddr, VM_PROT_READ|VM_PROT_WRITE);
1345 1345
1346 gd->gd_looffset = (uint64_t)func & 0xffff; 1346 gd->gd_looffset = (uint64_t)func & 0xffff;
1347 gd->gd_selector = sel; 1347 gd->gd_selector = sel;
1348 gd->gd_ist = ist; 1348 gd->gd_ist = ist;
1349 gd->gd_type = type; 1349 gd->gd_type = type;
1350 gd->gd_dpl = dpl; 1350 gd->gd_dpl = dpl;
1351 gd->gd_p = 1; 1351 gd->gd_p = 1;
1352 gd->gd_hioffset = (uint64_t)func >> 16; 1352 gd->gd_hioffset = (uint64_t)func >> 16;
1353 gd->gd_zero = 0; 1353 gd->gd_zero = 0;
1354 gd->gd_xx1 = 0; 1354 gd->gd_xx1 = 0;
1355 gd->gd_xx2 = 0; 1355 gd->gd_xx2 = 0;
1356 gd->gd_xx3 = 0; 1356 gd->gd_xx3 = 0;
1357 1357
1358 pmap_changeprot_local(idt_vaddr, VM_PROT_READ); 1358 pmap_changeprot_local(idt_vaddr, VM_PROT_READ);
1359 kpreempt_enable(); 1359 kpreempt_enable();
1360} 1360}
1361 1361
1362void 1362void
1363unsetgate(struct gate_descriptor *gd) 1363unsetgate(struct gate_descriptor *gd)
1364{ 1364{
1365 1365
1366 kpreempt_disable(); 1366 kpreempt_disable();
1367 pmap_changeprot_local(idt_vaddr, VM_PROT_READ|VM_PROT_WRITE); 1367 pmap_changeprot_local(idt_vaddr, VM_PROT_READ|VM_PROT_WRITE);
1368 1368
1369 memset(gd, 0, sizeof (*gd)); 1369 memset(gd, 0, sizeof (*gd));
1370 1370
1371 pmap_changeprot_local(idt_vaddr, VM_PROT_READ); 1371 pmap_changeprot_local(idt_vaddr, VM_PROT_READ);
1372 kpreempt_enable(); 1372 kpreempt_enable();
1373} 1373}
1374 1374
1375void 1375void
1376setregion(struct region_descriptor *rd, void *base, uint16_t limit) 1376setregion(struct region_descriptor *rd, void *base, uint16_t limit)
1377{ 1377{
1378 rd->rd_limit = limit; 1378 rd->rd_limit = limit;
1379 rd->rd_base = (uint64_t)base; 1379 rd->rd_base = (uint64_t)base;
1380} 1380}
1381 1381
1382/* 1382/*
1383 * Note that the base and limit fields are ignored in long mode. 1383 * Note that the base and limit fields are ignored in long mode.
1384 */ 1384 */
1385void 1385void
1386set_mem_segment(struct mem_segment_descriptor *sd, void *base, size_t limit, 1386set_mem_segment(struct mem_segment_descriptor *sd, void *base, size_t limit,
1387 int type, int dpl, int gran, int def32, int is64) 1387 int type, int dpl, int gran, int def32, int is64)
1388{ 1388{
1389 sd->sd_lolimit = (unsigned)limit; 1389 sd->sd_lolimit = (unsigned)limit;
1390 sd->sd_lobase = (unsigned long)base; 1390 sd->sd_lobase = (unsigned long)base;
1391 sd->sd_type = type; 1391 sd->sd_type = type;
1392 sd->sd_dpl = dpl; 1392 sd->sd_dpl = dpl;
1393 sd->sd_p = 1; 1393 sd->sd_p = 1;
1394 sd->sd_hilimit = (unsigned)limit >> 16; 1394 sd->sd_hilimit = (unsigned)limit >> 16;
1395 sd->sd_avl = 0; 1395 sd->sd_avl = 0;
1396 sd->sd_long = is64; 1396 sd->sd_long = is64;
1397 sd->sd_def32 = def32; 1397 sd->sd_def32 = def32;
1398 sd->sd_gran = gran; 1398 sd->sd_gran = gran;
1399 sd->sd_hibase = (unsigned long)base >> 24; 1399 sd->sd_hibase = (unsigned long)base >> 24;
1400} 1400}
1401 1401
1402void 1402void
1403set_sys_segment(struct sys_segment_descriptor *sd, void *base, size_t limit, 1403set_sys_segment(struct sys_segment_descriptor *sd, void *base, size_t limit,
1404 int type, int dpl, int gran) 1404 int type, int dpl, int gran)
1405{ 1405{
1406 memset(sd, 0, sizeof *sd); 1406 memset(sd, 0, sizeof *sd);
1407 sd->sd_lolimit = (unsigned)limit; 1407 sd->sd_lolimit = (unsigned)limit;
1408 sd->sd_lobase = (uint64_t)base; 1408 sd->sd_lobase = (uint64_t)base;
1409 sd->sd_type = type; 1409 sd->sd_type = type;
1410 sd->sd_dpl = dpl; 1410 sd->sd_dpl = dpl;
1411 sd->sd_p = 1; 1411 sd->sd_p = 1;
1412 sd->sd_hilimit = (unsigned)limit >> 16; 1412 sd->sd_hilimit = (unsigned)limit >> 16;
1413 sd->sd_gran = gran; 1413 sd->sd_gran = gran;
1414 sd->sd_hibase = (uint64_t)base >> 24; 1414 sd->sd_hibase = (uint64_t)base >> 24;
1415} 1415}
1416 1416
1417void 1417void
1418cpu_init_idt(void) 1418cpu_init_idt(void)
1419{ 1419{
1420#ifndef XEN 1420#ifndef XEN
1421 struct region_descriptor region; 1421 struct region_descriptor region;
1422 1422
1423 setregion(&region, idt, NIDT * sizeof(idt[0]) - 1); 1423 setregion(&region, idt, NIDT * sizeof(idt[0]) - 1);
1424 lidt(&region);  1424 lidt(&region);
1425#else 1425#else
1426 if (HYPERVISOR_set_trap_table(xen_idt)) 1426 if (HYPERVISOR_set_trap_table(xen_idt))
1427 panic("HYPERVISOR_set_trap_table() failed"); 1427 panic("HYPERVISOR_set_trap_table() failed");
1428#endif 1428#endif
1429} 1429}
1430 1430
1431#define IDTVEC(name) __CONCAT(X, name) 1431#define IDTVEC(name) __CONCAT(X, name)
1432typedef void (vector)(void); 1432typedef void (vector)(void);
1433extern vector IDTVEC(syscall); 1433extern vector IDTVEC(syscall);
1434extern vector IDTVEC(syscall32); 1434extern vector IDTVEC(syscall32);
1435extern vector IDTVEC(osyscall); 1435extern vector IDTVEC(osyscall);
1436extern vector IDTVEC(oosyscall); 1436extern vector IDTVEC(oosyscall);
1437extern vector *IDTVEC(exceptions)[]; 1437extern vector *IDTVEC(exceptions)[];
1438 1438
1439static void 1439static void
1440init_x86_64_msgbuf(void) 1440init_x86_64_msgbuf(void)
1441{ 1441{
1442 /* Message buffer is located at end of core. */ 1442 /* Message buffer is located at end of core. */
1443 struct vm_physseg *vps; 1443 struct vm_physseg *vps;
1444 psize_t sz = round_page(MSGBUFSIZE); 1444 psize_t sz = round_page(MSGBUFSIZE);
1445 psize_t reqsz = sz; 1445 psize_t reqsz = sz;
1446 int x; 1446 int x;
1447  1447
1448 search_again: 1448 search_again:
1449 vps = NULL; 1449 vps = NULL;
1450 1450
1451 for (x = 0; x < vm_nphysseg; x++) { 1451 for (x = 0; x < vm_nphysseg; x++) {
1452 vps = VM_PHYSMEM_PTR(x); 1452 vps = VM_PHYSMEM_PTR(x);
1453 if (ctob(vps->avail_end) == avail_end) 1453 if (ctob(vps->avail_end) == avail_end)
1454 break; 1454 break;
1455 } 1455 }
1456 if (x == vm_nphysseg) 1456 if (x == vm_nphysseg)
1457 panic("init_x86_64: can't find end of memory"); 1457 panic("init_x86_64: can't find end of memory");
1458 1458
1459 /* Shrink so it'll fit in the last segment. */ 1459 /* Shrink so it'll fit in the last segment. */
1460 if ((vps->avail_end - vps->avail_start) < atop(sz)) 1460 if ((vps->avail_end - vps->avail_start) < atop(sz))
1461 sz = ctob(vps->avail_end - vps->avail_start); 1461 sz = ctob(vps->avail_end - vps->avail_start);
1462 1462
1463 vps->avail_end -= atop(sz); 1463 vps->avail_end -= atop(sz);
1464 vps->end -= atop(sz); 1464 vps->end -= atop(sz);
1465 msgbuf_p_seg[msgbuf_p_cnt].sz = sz; 1465 msgbuf_p_seg[msgbuf_p_cnt].sz = sz;
1466 msgbuf_p_seg[msgbuf_p_cnt++].paddr = ctob(vps->avail_end); 1466 msgbuf_p_seg[msgbuf_p_cnt++].paddr = ctob(vps->avail_end);
1467 1467
1468 /* Remove the last segment if it now has no pages. */ 1468 /* Remove the last segment if it now has no pages. */
1469 if (vps->start == vps->end) { 1469 if (vps->start == vps->end) {
1470 for (vm_nphysseg--; x < vm_nphysseg; x++) 1470 for (vm_nphysseg--; x < vm_nphysseg; x++)
1471 VM_PHYSMEM_PTR_SWAP(x, x + 1); 1471 VM_PHYSMEM_PTR_SWAP(x, x + 1);
1472 } 1472 }
1473 1473
1474 /* Now find where the new avail_end is. */ 1474 /* Now find where the new avail_end is. */
1475 for (avail_end = 0, x = 0; x < vm_nphysseg; x++) 1475 for (avail_end = 0, x = 0; x < vm_nphysseg; x++)
1476 if (VM_PHYSMEM_PTR(x)->avail_end > avail_end) 1476 if (VM_PHYSMEM_PTR(x)->avail_end > avail_end)
1477 avail_end = VM_PHYSMEM_PTR(x)->avail_end; 1477 avail_end = VM_PHYSMEM_PTR(x)->avail_end;
1478 avail_end = ctob(avail_end); 1478 avail_end = ctob(avail_end);
1479 1479
1480 if (sz == reqsz) 1480 if (sz == reqsz)
1481 return; 1481 return;
1482 1482
1483 reqsz -= sz; 1483 reqsz -= sz;
1484 if (msgbuf_p_cnt == VM_PHYSSEG_MAX) { 1484 if (msgbuf_p_cnt == VM_PHYSSEG_MAX) {
1485 /* No more segments available, bail out. */ 1485 /* No more segments available, bail out. */
1486 printf("WARNING: MSGBUFSIZE (%zu) too large, using %zu.\n", 1486 printf("WARNING: MSGBUFSIZE (%zu) too large, using %zu.\n",
1487 (size_t)MSGBUFSIZE, (size_t)(MSGBUFSIZE - reqsz)); 1487 (size_t)MSGBUFSIZE, (size_t)(MSGBUFSIZE - reqsz));
1488 return; 1488 return;
1489 } 1489 }
1490 1490
1491 sz = reqsz; 1491 sz = reqsz;
1492 goto search_again; 1492 goto search_again;
1493} 1493}
1494 1494
1495static void 1495static void
1496init_x86_64_ksyms(void) 1496init_x86_64_ksyms(void)
1497{ 1497{
1498#if NKSYMS || defined(DDB) || defined(MODULAR) 1498#if NKSYMS || defined(DDB) || defined(MODULAR)
1499 extern int end; 1499 extern int end;
1500 extern int *esym; 1500 extern int *esym;
1501#ifndef XEN 1501#ifndef XEN
1502 struct btinfo_symtab *symtab; 1502 struct btinfo_symtab *symtab;
1503 vaddr_t tssym, tesym; 1503 vaddr_t tssym, tesym;
1504#endif 1504#endif
1505 1505
1506#ifdef DDB 1506#ifdef DDB
1507 db_machine_init(); 1507 db_machine_init();
1508#endif 1508#endif
1509 1509
1510#ifndef XEN 1510#ifndef XEN
1511 symtab = lookup_bootinfo(BTINFO_SYMTAB); 1511 symtab = lookup_bootinfo(BTINFO_SYMTAB);
1512 if (symtab) { 1512 if (symtab) {
1513 tssym = (vaddr_t)symtab->ssym + KERNBASE; 1513 tssym = (vaddr_t)symtab->ssym + KERNBASE;
1514 tesym = (vaddr_t)symtab->esym + KERNBASE; 1514 tesym = (vaddr_t)symtab->esym + KERNBASE;
1515 ksyms_addsyms_elf(symtab->nsym, (void *)tssym, (void *)tesym); 1515 ksyms_addsyms_elf(symtab->nsym, (void *)tssym, (void *)tesym);
1516 } else 1516 } else
1517 ksyms_addsyms_elf(*(long *)(void *)&end, 1517 ksyms_addsyms_elf(*(long *)(void *)&end,
1518 ((long *)(void *)&end) + 1, esym); 1518 ((long *)(void *)&end) + 1, esym);
1519#else /* XEN */ 1519#else /* XEN */
1520 esym = xen_start_info.mod_start ? 1520 esym = xen_start_info.mod_start ?
1521 (void *)xen_start_info.mod_start : 1521 (void *)xen_start_info.mod_start :
1522 (void *)xen_start_info.mfn_list; 1522 (void *)xen_start_info.mfn_list;
1523 ksyms_addsyms_elf(*(int *)(void *)&end, 1523 ksyms_addsyms_elf(*(int *)(void *)&end,
1524 ((int *)(void *)&end) + 1, esym); 1524 ((int *)(void *)&end) + 1, esym);
1525#endif /* XEN */ 1525#endif /* XEN */
1526#endif 1526#endif
1527} 1527}
1528 1528
1529void 1529void
1530init_x86_64(paddr_t first_avail) 1530init_x86_64(paddr_t first_avail)
1531{ 1531{
1532 extern void consinit(void); 1532 extern void consinit(void);
1533 struct region_descriptor region; 1533 struct region_descriptor region;
1534 struct mem_segment_descriptor *ldt_segp; 1534 struct mem_segment_descriptor *ldt_segp;
1535 int x; 1535 int x;
1536#ifndef XEN 1536#ifndef XEN
1537 int ist; 1537 int ist;
1538 extern struct extent *iomem_ex; 1538 extern struct extent *iomem_ex;
1539#if !defined(REALEXTMEM) && !defined(REALBASEMEM) 1539#if !defined(REALEXTMEM) && !defined(REALBASEMEM)
1540 struct btinfo_memmap *bim; 1540 struct btinfo_memmap *bim;
1541#endif 1541#endif
1542#endif /* !XEN */ 1542#endif /* !XEN */
1543 1543
1544 cpu_probe(&cpu_info_primary); 1544 cpu_probe(&cpu_info_primary);
1545 1545
1546#ifdef XEN 1546#ifdef XEN
1547 KASSERT(HYPERVISOR_shared_info != NULL); 1547 KASSERT(HYPERVISOR_shared_info != NULL);
1548 cpu_info_primary.ci_vcpu = &HYPERVISOR_shared_info->vcpu_info[0]; 1548 cpu_info_primary.ci_vcpu = &HYPERVISOR_shared_info->vcpu_info[0];
1549 1549
1550 __PRINTK(("init_x86_64(0x%lx)\n", first_avail)); 1550 __PRINTK(("init_x86_64(0x%lx)\n", first_avail));
1551#endif /* XEN */ 1551#endif /* XEN */
1552 1552
1553 cpu_init_msrs(&cpu_info_primary, true); 1553 cpu_init_msrs(&cpu_info_primary, true);
1554 1554
1555 use_pae = 1; /* PAE always enabled in long mode */ 1555 use_pae = 1; /* PAE always enabled in long mode */
1556 1556
1557#ifdef XEN 1557#ifdef XEN
1558 struct pcb *pcb = lwp_getpcb(&lwp0); 1558 struct pcb *pcb = lwp_getpcb(&lwp0);
1559 mutex_init(&pte_lock, MUTEX_DEFAULT, IPL_VM); 1559 mutex_init(&pte_lock, MUTEX_DEFAULT, IPL_VM);
1560 pcb->pcb_cr3 = xen_start_info.pt_base - KERNBASE; 1560 pcb->pcb_cr3 = xen_start_info.pt_base - KERNBASE;
1561 __PRINTK(("pcb_cr3 0x%lx\n", xen_start_info.pt_base - KERNBASE)); 1561 __PRINTK(("pcb_cr3 0x%lx\n", xen_start_info.pt_base - KERNBASE));
1562#endif 1562#endif
1563 1563
1564#if NISA > 0 || NPCI > 0 1564#if NISA > 0 || NPCI > 0
1565 x86_bus_space_init(); 1565 x86_bus_space_init();
1566#endif 1566#endif
1567 1567
1568 consinit(); /* XXX SHOULD NOT BE DONE HERE */ 1568 consinit(); /* XXX SHOULD NOT BE DONE HERE */
1569 1569
1570 /* 1570 /*
1571 * Initialize PAGE_SIZE-dependent variables. 1571 * Initialize PAGE_SIZE-dependent variables.
1572 */ 1572 */
1573 uvm_setpagesize(); 1573 uvm_setpagesize();
1574 1574
1575 uvmexp.ncolors = 2; 1575 uvmexp.ncolors = 2;
1576 1576
1577#ifndef XEN 1577#ifndef XEN
1578 /* 1578 /*
1579 * Low memory reservations: 1579 * Low memory reservations:
1580 * Page 0: BIOS data 1580 * Page 0: BIOS data
1581 * Page 1: BIOS callback (not used yet, for symmetry with i386) 1581 * Page 1: BIOS callback (not used yet, for symmetry with i386)
1582 * Page 2: MP bootstrap 1582 * Page 2: MP bootstrap code (MP_TRAMPOLINE)
1583 * Page 3: ACPI wakeup code (ACPI_WAKEUP_ADDR) 1583 * Page 3: ACPI wakeup code (ACPI_WAKEUP_ADDR)
1584 * Page 4: Temporary page table for 0MB-4MB 1584 * Page 4: Temporary page table for 0MB-4MB
1585 * Page 5: Temporary page directory 1585 * Page 5: Temporary page directory
1586 * Page 6: Temporary page map level 3 1586 * Page 6: Temporary page map level 3
1587 * Page 7: Temporary page map level 4 1587 * Page 7: Temporary page map level 4
1588 */ 1588 */
1589 avail_start = 8 * PAGE_SIZE; 1589 avail_start = 8 * PAGE_SIZE;
1590 1590
1591#if !defined(REALBASEMEM) && !defined(REALEXTMEM) 1591#if !defined(REALBASEMEM) && !defined(REALEXTMEM)
1592 /* 1592 /*
1593 * Check to see if we have a memory map from the BIOS (passed 1593 * Check to see if we have a memory map from the BIOS (passed
1594 * to us by the boot program. 1594 * to us by the boot program.
1595 */ 1595 */
1596 bim = lookup_bootinfo(BTINFO_MEMMAP); 1596 bim = lookup_bootinfo(BTINFO_MEMMAP);
1597 if (bim != NULL && bim->num > 0) 1597 if (bim != NULL && bim->num > 0)
1598 initx86_parse_memmap(bim, iomem_ex); 1598 initx86_parse_memmap(bim, iomem_ex);
1599#endif /* ! REALBASEMEM && ! REALEXTMEM */ 1599#endif /* ! REALBASEMEM && ! REALEXTMEM */
1600 1600
1601 /* 1601 /*
1602 * If the loop above didn't find any valid segment, fall back to 1602 * If the loop above didn't find any valid segment, fall back to
1603 * former code. 1603 * former code.
1604 */ 1604 */
1605 if (mem_cluster_cnt == 0) 1605 if (mem_cluster_cnt == 0)
1606 initx86_fake_memmap(iomem_ex); 1606 initx86_fake_memmap(iomem_ex);
1607 1607
1608#else /* XEN */ 1608#else /* XEN */
1609 /* Parse Xen command line (replace bootinfo */ 1609 /* Parse Xen command line (replace bootinfo */
1610 xen_parse_cmdline(XEN_PARSE_BOOTFLAGS, NULL); 1610 xen_parse_cmdline(XEN_PARSE_BOOTFLAGS, NULL);
1611 1611
1612 /* Determine physical address space */ 1612 /* Determine physical address space */
1613 avail_start = first_avail; 1613 avail_start = first_avail;
1614 avail_end = ctob(xen_start_info.nr_pages); 1614 avail_end = ctob(xen_start_info.nr_pages);
1615 pmap_pa_start = (KERNTEXTOFF - KERNBASE); 1615 pmap_pa_start = (KERNTEXTOFF - KERNBASE);
1616 pmap_pa_end = avail_end; 1616 pmap_pa_end = avail_end;
1617 __PRINTK(("pmap_pa_start 0x%lx avail_start 0x%lx avail_end 0x%lx\n", 1617 __PRINTK(("pmap_pa_start 0x%lx avail_start 0x%lx avail_end 0x%lx\n",
1618 pmap_pa_start, avail_start, avail_end)); 1618 pmap_pa_start, avail_start, avail_end));
1619#endif /* !XEN */ 1619#endif /* !XEN */
1620 1620
1621 /* 1621 /*
1622 * Call pmap initialization to make new kernel address space. 1622 * Call pmap initialization to make new kernel address space.
1623 * We must do this before loading pages into the VM system. 1623 * We must do this before loading pages into the VM system.
1624 */ 1624 */
1625 pmap_bootstrap(VM_MIN_KERNEL_ADDRESS); 1625 pmap_bootstrap(VM_MIN_KERNEL_ADDRESS);
1626 1626
1627 if (avail_start != PAGE_SIZE) 1627 if (avail_start != PAGE_SIZE)
1628 pmap_prealloc_lowmem_ptps(); 1628 pmap_prealloc_lowmem_ptps();
1629 1629
1630#ifndef XEN 1630#ifndef XEN
1631 initx86_load_memmap(first_avail); 1631 initx86_load_memmap(first_avail);
1632#else /* XEN */ 1632#else /* XEN */
1633 kern_end = KERNBASE + first_avail; 1633 kern_end = KERNBASE + first_avail;
1634 physmem = xen_start_info.nr_pages; 1634 physmem = xen_start_info.nr_pages;
1635 1635
1636 uvm_page_physload(atop(avail_start), 1636 uvm_page_physload(atop(avail_start),
1637 atop(avail_end), atop(avail_start), 1637 atop(avail_end), atop(avail_start),
1638 atop(avail_end), VM_FREELIST_DEFAULT); 1638 atop(avail_end), VM_FREELIST_DEFAULT);
1639#endif /* !XEN */ 1639#endif /* !XEN */
1640 1640
1641 init_x86_64_msgbuf(); 1641 init_x86_64_msgbuf();
1642 1642
1643 pmap_growkernel(VM_MIN_KERNEL_ADDRESS + 32 * 1024 * 1024); 1643 pmap_growkernel(VM_MIN_KERNEL_ADDRESS + 32 * 1024 * 1024);
1644 1644
1645 kpreempt_disable(); 1645 kpreempt_disable();
1646 pmap_kenter_pa(idt_vaddr, idt_paddr, VM_PROT_READ|VM_PROT_WRITE, 0); 1646 pmap_kenter_pa(idt_vaddr, idt_paddr, VM_PROT_READ|VM_PROT_WRITE, 0);
1647 pmap_update(pmap_kernel()); 1647 pmap_update(pmap_kernel());
1648 memset((void *)idt_vaddr, 0, PAGE_SIZE); 1648 memset((void *)idt_vaddr, 0, PAGE_SIZE);
1649 1649
1650#ifndef XEN 1650#ifndef XEN
1651 pmap_changeprot_local(idt_vaddr, VM_PROT_READ); 1651 pmap_changeprot_local(idt_vaddr, VM_PROT_READ);
1652#endif 1652#endif
1653 pmap_kenter_pa(idt_vaddr + PAGE_SIZE, idt_paddr + PAGE_SIZE, 1653 pmap_kenter_pa(idt_vaddr + PAGE_SIZE, idt_paddr + PAGE_SIZE,
1654 VM_PROT_READ|VM_PROT_WRITE, 0); 1654 VM_PROT_READ|VM_PROT_WRITE, 0);
1655#ifdef XEN 1655#ifdef XEN
1656 /* Steal one more page for LDT */ 1656 /* Steal one more page for LDT */
1657 pmap_kenter_pa(idt_vaddr + 2 * PAGE_SIZE, idt_paddr + 2 * PAGE_SIZE, 1657 pmap_kenter_pa(idt_vaddr + 2 * PAGE_SIZE, idt_paddr + 2 * PAGE_SIZE,
1658 VM_PROT_READ|VM_PROT_WRITE, 0); 1658 VM_PROT_READ|VM_PROT_WRITE, 0);
1659#endif 1659#endif
1660 pmap_kenter_pa(lo32_vaddr, lo32_paddr, VM_PROT_READ|VM_PROT_WRITE, 0); 1660 pmap_kenter_pa(lo32_vaddr, lo32_paddr, VM_PROT_READ|VM_PROT_WRITE, 0);
1661 pmap_update(pmap_kernel()); 1661 pmap_update(pmap_kernel());
1662 1662
1663#ifndef XEN 1663#ifndef XEN
1664 idt_init(); 1664 idt_init();
1665 idt = (struct gate_descriptor *)idt_vaddr; 1665 idt = (struct gate_descriptor *)idt_vaddr;
1666 gdtstore = (char *)(idt + NIDT); 1666 gdtstore = (char *)(idt + NIDT);
1667 ldtstore = gdtstore + DYNSEL_START; 1667 ldtstore = gdtstore + DYNSEL_START;
1668#else 1668#else
1669 xen_idt = (struct trap_info *)idt_vaddr; 1669 xen_idt = (struct trap_info *)idt_vaddr;
1670 xen_idt_idx = 0; 1670 xen_idt_idx = 0;
1671 /* Xen wants page aligned GDT/LDT in separated pages */ 1671 /* Xen wants page aligned GDT/LDT in separated pages */
1672 ldtstore = (char *) roundup((vaddr_t) (xen_idt + NIDT), PAGE_SIZE); 1672 ldtstore = (char *) roundup((vaddr_t) (xen_idt + NIDT), PAGE_SIZE);
1673 gdtstore = (char *) (ldtstore + PAGE_SIZE); 1673 gdtstore = (char *) (ldtstore + PAGE_SIZE);
1674#endif /* XEN */ 1674#endif /* XEN */
1675 1675
1676 /* make gdt gates and memory segments */ 1676 /* make gdt gates and memory segments */
1677 set_mem_segment(GDT_ADDR_MEM(gdtstore, GCODE_SEL), 0, 1677 set_mem_segment(GDT_ADDR_MEM(gdtstore, GCODE_SEL), 0,
1678 0xfffff, SDT_MEMERA, SEL_KPL, 1, 0, 1); 1678 0xfffff, SDT_MEMERA, SEL_KPL, 1, 0, 1);
1679 1679
1680 set_mem_segment(GDT_ADDR_MEM(gdtstore, GDATA_SEL), 0, 1680 set_mem_segment(GDT_ADDR_MEM(gdtstore, GDATA_SEL), 0,
1681 0xfffff, SDT_MEMRWA, SEL_KPL, 1, 0, 1); 1681 0xfffff, SDT_MEMRWA, SEL_KPL, 1, 0, 1);
1682 1682
1683#ifndef XEN 1683#ifndef XEN
1684 set_sys_segment(GDT_ADDR_SYS(gdtstore, GLDT_SEL), ldtstore, 1684 set_sys_segment(GDT_ADDR_SYS(gdtstore, GLDT_SEL), ldtstore,
1685 LDT_SIZE - 1, SDT_SYSLDT, SEL_KPL, 0); 1685 LDT_SIZE - 1, SDT_SYSLDT, SEL_KPL, 0);
1686#endif 1686#endif
1687 1687
1688 set_mem_segment(GDT_ADDR_MEM(gdtstore, GUCODE_SEL), 0, 1688 set_mem_segment(GDT_ADDR_MEM(gdtstore, GUCODE_SEL), 0,
1689 x86_btop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMERA, SEL_UPL, 1, 0, 1); 1689 x86_btop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMERA, SEL_UPL, 1, 0, 1);
1690 1690
1691 set_mem_segment(GDT_ADDR_MEM(gdtstore, GUDATA_SEL), 0, 1691 set_mem_segment(GDT_ADDR_MEM(gdtstore, GUDATA_SEL), 0,
1692 x86_btop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMRWA, SEL_UPL, 1, 0, 1); 1692 x86_btop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMRWA, SEL_UPL, 1, 0, 1);
1693 1693
1694 /* make ldt gates and memory segments */ 1694 /* make ldt gates and memory segments */
1695 setgate((struct gate_descriptor *)(ldtstore + LSYS5CALLS_SEL), 1695 setgate((struct gate_descriptor *)(ldtstore + LSYS5CALLS_SEL),
1696 &IDTVEC(oosyscall), 0, SDT_SYS386CGT, SEL_UPL, 1696 &IDTVEC(oosyscall), 0, SDT_SYS386CGT, SEL_UPL,
1697 GSEL(GCODE_SEL, SEL_KPL)); 1697 GSEL(GCODE_SEL, SEL_KPL));
1698 *(struct mem_segment_descriptor *)(ldtstore + LUCODE_SEL) = 1698 *(struct mem_segment_descriptor *)(ldtstore + LUCODE_SEL) =
1699 *GDT_ADDR_MEM(gdtstore, GUCODE_SEL); 1699 *GDT_ADDR_MEM(gdtstore, GUCODE_SEL);
1700 *(struct mem_segment_descriptor *)(ldtstore + LUDATA_SEL) = 1700 *(struct mem_segment_descriptor *)(ldtstore + LUDATA_SEL) =
1701 *GDT_ADDR_MEM(gdtstore, GUDATA_SEL); 1701 *GDT_ADDR_MEM(gdtstore, GUDATA_SEL);
1702 1702
1703 /* 1703 /*
1704 * 32 bit GDT entries. 1704 * 32 bit GDT entries.
1705 */ 1705 */
1706 set_mem_segment(GDT_ADDR_MEM(gdtstore, GUCODE32_SEL), 0, 1706 set_mem_segment(GDT_ADDR_MEM(gdtstore, GUCODE32_SEL), 0,
1707 x86_btop(VM_MAXUSER_ADDRESS32) - 1, SDT_MEMERA, SEL_UPL, 1, 1, 0); 1707 x86_btop(VM_MAXUSER_ADDRESS32) - 1, SDT_MEMERA, SEL_UPL, 1, 1, 0);
1708 1708
1709 set_mem_segment(GDT_ADDR_MEM(gdtstore, GUDATA32_SEL), 0, 1709 set_mem_segment(GDT_ADDR_MEM(gdtstore, GUDATA32_SEL), 0,
1710 x86_btop(VM_MAXUSER_ADDRESS32) - 1, SDT_MEMRWA, SEL_UPL, 1, 1, 0); 1710 x86_btop(VM_MAXUSER_ADDRESS32) - 1, SDT_MEMRWA, SEL_UPL, 1, 1, 0);
1711 1711
1712 set_mem_segment(GDT_ADDR_MEM(gdtstore, GUFS_SEL), 0, 1712 set_mem_segment(GDT_ADDR_MEM(gdtstore, GUFS_SEL), 0,
1713 x86_btop(VM_MAXUSER_ADDRESS32) - 1, SDT_MEMRWA, SEL_UPL, 1, 1, 0); 1713 x86_btop(VM_MAXUSER_ADDRESS32) - 1, SDT_MEMRWA, SEL_UPL, 1, 1, 0);
1714 1714
1715 set_mem_segment(GDT_ADDR_MEM(gdtstore, GUGS_SEL), 0, 1715 set_mem_segment(GDT_ADDR_MEM(gdtstore, GUGS_SEL), 0,
1716 x86_btop(VM_MAXUSER_ADDRESS32) - 1, SDT_MEMRWA, SEL_UPL, 1, 1, 0); 1716 x86_btop(VM_MAXUSER_ADDRESS32) - 1, SDT_MEMRWA, SEL_UPL, 1, 1, 0);
1717 1717
1718 /* 1718 /*
1719 * 32 bit LDT entries. 1719 * 32 bit LDT entries.
1720 */ 1720 */
1721 ldt_segp = (struct mem_segment_descriptor *)(ldtstore + LUCODE32_SEL); 1721 ldt_segp = (struct mem_segment_descriptor *)(ldtstore + LUCODE32_SEL);
1722 set_mem_segment(ldt_segp, 0, x86_btop(VM_MAXUSER_ADDRESS32) - 1, 1722 set_mem_segment(ldt_segp, 0, x86_btop(VM_MAXUSER_ADDRESS32) - 1,
1723 SDT_MEMERA, SEL_UPL, 1, 1, 0); 1723 SDT_MEMERA, SEL_UPL, 1, 1, 0);
1724 ldt_segp = (struct mem_segment_descriptor *)(ldtstore + LUDATA32_SEL); 1724 ldt_segp = (struct mem_segment_descriptor *)(ldtstore + LUDATA32_SEL);
1725 set_mem_segment(ldt_segp, 0, x86_btop(VM_MAXUSER_ADDRESS32) - 1, 1725 set_mem_segment(ldt_segp, 0, x86_btop(VM_MAXUSER_ADDRESS32) - 1,
1726 SDT_MEMRWA, SEL_UPL, 1, 1, 0); 1726 SDT_MEMRWA, SEL_UPL, 1, 1, 0);
1727 1727
1728 /* 1728 /*
1729 * Other entries. 1729 * Other entries.
1730 */ 1730 */
1731 memcpy((struct gate_descriptor *)(ldtstore + LSOL26CALLS_SEL), 1731 memcpy((struct gate_descriptor *)(ldtstore + LSOL26CALLS_SEL),
1732 (struct gate_descriptor *)(ldtstore + LSYS5CALLS_SEL), 1732 (struct gate_descriptor *)(ldtstore + LSYS5CALLS_SEL),
1733 sizeof (struct gate_descriptor)); 1733 sizeof (struct gate_descriptor));
1734 memcpy((struct gate_descriptor *)(ldtstore + LBSDICALLS_SEL), 1734 memcpy((struct gate_descriptor *)(ldtstore + LBSDICALLS_SEL),
1735 (struct gate_descriptor *)(ldtstore + LSYS5CALLS_SEL), 1735 (struct gate_descriptor *)(ldtstore + LSYS5CALLS_SEL),
1736 sizeof (struct gate_descriptor)); 1736 sizeof (struct gate_descriptor));
1737 1737
1738 /* exceptions */ 1738 /* exceptions */
1739 for (x = 0; x < 32; x++) { 1739 for (x = 0; x < 32; x++) {
1740#ifndef XEN 1740#ifndef XEN
1741 idt_vec_reserve(x); 1741 idt_vec_reserve(x);
1742 switch (x) { 1742 switch (x) {
1743 case 2: /* NMI */ 1743 case 2: /* NMI */
1744 ist = 3; 1744 ist = 3;
1745 break; 1745 break;
1746 case 8: /* double fault */ 1746 case 8: /* double fault */
1747 ist = 2; 1747 ist = 2;
1748 break; 1748 break;
1749 default: 1749 default:
1750 ist = 0; 1750 ist = 0;
1751 break; 1751 break;
1752 } 1752 }
1753 setgate(&idt[x], IDTVEC(exceptions)[x], ist, SDT_SYS386IGT, 1753 setgate(&idt[x], IDTVEC(exceptions)[x], ist, SDT_SYS386IGT,
1754 (x == 3 || x == 4) ? SEL_UPL : SEL_KPL, 1754 (x == 3 || x == 4) ? SEL_UPL : SEL_KPL,
1755 GSEL(GCODE_SEL, SEL_KPL)); 1755 GSEL(GCODE_SEL, SEL_KPL));
1756#else /* XEN */ 1756#else /* XEN */
1757 pmap_changeprot_local(idt_vaddr, VM_PROT_READ|VM_PROT_WRITE); 1757 pmap_changeprot_local(idt_vaddr, VM_PROT_READ|VM_PROT_WRITE);
1758 xen_idt[xen_idt_idx].vector = x; 1758 xen_idt[xen_idt_idx].vector = x;
1759 1759
1760 switch (x) { 1760 switch (x) {
1761 case 2: /* NMI */ 1761 case 2: /* NMI */
1762 case 18: /* MCA */ 1762 case 18: /* MCA */
1763 TI_SET_IF(&(xen_idt[xen_idt_idx]), 2); 1763 TI_SET_IF(&(xen_idt[xen_idt_idx]), 2);
1764 break; 1764 break;
1765 case 3: 1765 case 3:
1766 case 4: 1766 case 4:
1767 xen_idt[xen_idt_idx].flags = SEL_UPL; 1767 xen_idt[xen_idt_idx].flags = SEL_UPL;
1768 break; 1768 break;
1769 default: 1769 default:
1770 xen_idt[xen_idt_idx].flags = SEL_KPL; 1770 xen_idt[xen_idt_idx].flags = SEL_KPL;
1771 break; 1771 break;
1772 } 1772 }
1773 1773
1774 xen_idt[xen_idt_idx].cs = GSEL(GCODE_SEL, SEL_KPL); 1774 xen_idt[xen_idt_idx].cs = GSEL(GCODE_SEL, SEL_KPL);
1775 xen_idt[xen_idt_idx].address = 1775 xen_idt[xen_idt_idx].address =
1776 (unsigned long)IDTVEC(exceptions)[x]; 1776 (unsigned long)IDTVEC(exceptions)[x];
1777 xen_idt_idx++; 1777 xen_idt_idx++;
1778#endif /* XEN */ 1778#endif /* XEN */
1779 } 1779 }
1780 1780
1781 /* new-style interrupt gate for syscalls */ 1781 /* new-style interrupt gate for syscalls */
1782#ifndef XEN 1782#ifndef XEN
1783 idt_vec_reserve(128); 1783 idt_vec_reserve(128);
1784 setgate(&idt[128], &IDTVEC(osyscall), 0, SDT_SYS386IGT, SEL_UPL, 1784 setgate(&idt[128], &IDTVEC(osyscall), 0, SDT_SYS386IGT, SEL_UPL,
1785 GSEL(GCODE_SEL, SEL_KPL)); 1785 GSEL(GCODE_SEL, SEL_KPL));
1786#else 1786#else
1787 xen_idt[xen_idt_idx].vector = 128; 1787 xen_idt[xen_idt_idx].vector = 128;
1788 xen_idt[xen_idt_idx].flags = SEL_KPL; 1788 xen_idt[xen_idt_idx].flags = SEL_KPL;
1789 xen_idt[xen_idt_idx].cs = GSEL(GCODE_SEL, SEL_KPL); 1789 xen_idt[xen_idt_idx].cs = GSEL(GCODE_SEL, SEL_KPL);
1790 xen_idt[xen_idt_idx].address = (unsigned long) &IDTVEC(osyscall); 1790 xen_idt[xen_idt_idx].address = (unsigned long) &IDTVEC(osyscall);
1791 xen_idt_idx++; 1791 xen_idt_idx++;
1792 pmap_changeprot_local(idt_vaddr, VM_PROT_READ); 1792 pmap_changeprot_local(idt_vaddr, VM_PROT_READ);
1793#endif /* XEN */ 1793#endif /* XEN */
1794 kpreempt_enable(); 1794 kpreempt_enable();
1795 1795
1796 setregion(&region, gdtstore, DYNSEL_START - 1); 1796 setregion(&region, gdtstore, DYNSEL_START - 1);
1797 lgdt(&region); 1797 lgdt(&region);
1798 1798
1799#ifdef XEN 1799#ifdef XEN
1800 /* Init Xen callbacks and syscall handlers */ 1800 /* Init Xen callbacks and syscall handlers */
1801 if (HYPERVISOR_set_callbacks( 1801 if (HYPERVISOR_set_callbacks(
1802 (unsigned long) hypervisor_callback, 1802 (unsigned long) hypervisor_callback,
1803 (unsigned long) failsafe_callback, 1803 (unsigned long) failsafe_callback,
1804 (unsigned long) Xsyscall)) 1804 (unsigned long) Xsyscall))
1805 panic("HYPERVISOR_set_callbacks() failed"); 1805 panic("HYPERVISOR_set_callbacks() failed");
1806#endif /* XEN */ 1806#endif /* XEN */
1807 cpu_init_idt(); 1807 cpu_init_idt();
1808 1808
1809 init_x86_64_ksyms(); 1809 init_x86_64_ksyms();
1810 1810
1811#ifndef XEN 1811#ifndef XEN
1812 intr_default_setup(); 1812 intr_default_setup();
1813#else 1813#else
1814 events_default_setup(); 1814 events_default_setup();
1815#endif 1815#endif
1816 1816
1817 splraise(IPL_HIGH); 1817 splraise(IPL_HIGH);
1818 x86_enable_intr(); 1818 x86_enable_intr();
1819 1819
1820#ifdef DDB 1820#ifdef DDB
1821 if (boothowto & RB_KDB) 1821 if (boothowto & RB_KDB)
1822 Debugger(); 1822 Debugger();
1823#endif 1823#endif
1824#ifdef KGDB 1824#ifdef KGDB
1825 kgdb_port_init(); 1825 kgdb_port_init();
1826 if (boothowto & RB_KDB) { 1826 if (boothowto & RB_KDB) {
1827 kgdb_debug_init = 1; 1827 kgdb_debug_init = 1;
1828 kgdb_connect(1); 1828 kgdb_connect(1);
1829 } 1829 }
1830#endif 1830#endif
1831} 1831}
1832 1832
1833void 1833void
1834cpu_reset(void) 1834cpu_reset(void)
1835{ 1835{
1836 x86_disable_intr(); 1836 x86_disable_intr();
1837 1837
1838#ifdef XEN 1838#ifdef XEN
1839 HYPERVISOR_reboot(); 1839 HYPERVISOR_reboot();
1840#else 1840#else
1841 1841
1842 x86_reset(); 1842 x86_reset();
1843 1843
1844 /* 1844 /*
1845 * Try to cause a triple fault and watchdog reset by making the IDT 1845 * Try to cause a triple fault and watchdog reset by making the IDT
1846 * invalid and causing a fault. 1846 * invalid and causing a fault.
1847 */ 1847 */
1848 kpreempt_disable(); 1848 kpreempt_disable();
1849 pmap_changeprot_local(idt_vaddr, VM_PROT_READ|VM_PROT_WRITE);  1849 pmap_changeprot_local(idt_vaddr, VM_PROT_READ|VM_PROT_WRITE);
1850 pmap_changeprot_local(idt_vaddr + PAGE_SIZE, 1850 pmap_changeprot_local(idt_vaddr + PAGE_SIZE,
1851 VM_PROT_READ|VM_PROT_WRITE); 1851 VM_PROT_READ|VM_PROT_WRITE);
1852 memset((void *)idt, 0, NIDT * sizeof(idt[0])); 1852 memset((void *)idt, 0, NIDT * sizeof(idt[0]));
1853 kpreempt_enable(); 1853 kpreempt_enable();
1854 breakpoint(); 1854 breakpoint();
1855 1855
1856#if 0 1856#if 0
1857 /* 1857 /*
1858 * Try to cause a triple fault and watchdog reset by unmapping the 1858 * Try to cause a triple fault and watchdog reset by unmapping the
1859 * entire address space and doing a TLB flush. 1859 * entire address space and doing a TLB flush.
1860 */ 1860 */
1861 memset((void *)PTD, 0, PAGE_SIZE); 1861 memset((void *)PTD, 0, PAGE_SIZE);
1862 tlbflush();  1862 tlbflush();
1863#endif 1863#endif
1864#endif /* XEN */ 1864#endif /* XEN */
1865 1865
1866 for (;;); 1866 for (;;);
1867} 1867}
1868 1868
1869void 1869void
1870cpu_getmcontext(struct lwp *l, mcontext_t *mcp, unsigned int *flags) 1870cpu_getmcontext(struct lwp *l, mcontext_t *mcp, unsigned int *flags)
1871{ 1871{
1872 const struct trapframe *tf = l->l_md.md_regs; 1872 const struct trapframe *tf = l->l_md.md_regs;
1873 __greg_t ras_rip; 1873 __greg_t ras_rip;
1874 1874
1875 /* Copy general registers member by member */ 1875 /* Copy general registers member by member */
1876#define copy_from_tf(reg, REG, idx) mcp->__gregs[_REG_##REG] = tf->tf_##reg; 1876#define copy_from_tf(reg, REG, idx) mcp->__gregs[_REG_##REG] = tf->tf_##reg;
1877 _FRAME_GREG(copy_from_tf) 1877 _FRAME_GREG(copy_from_tf)
1878#undef copy_from_tf 1878#undef copy_from_tf
1879 1879
1880 if ((ras_rip = (__greg_t)ras_lookup(l->l_proc, 1880 if ((ras_rip = (__greg_t)ras_lookup(l->l_proc,
1881 (void *) mcp->__gregs[_REG_RIP])) != -1) 1881 (void *) mcp->__gregs[_REG_RIP])) != -1)
1882 mcp->__gregs[_REG_RIP] = ras_rip; 1882 mcp->__gregs[_REG_RIP] = ras_rip;
1883 1883
1884 *flags |= _UC_CPU; 1884 *flags |= _UC_CPU;
1885 1885
1886 mcp->_mc_tlsbase = (uintptr_t)l->l_private; 1886 mcp->_mc_tlsbase = (uintptr_t)l->l_private;
1887 *flags |= _UC_TLSBASE; 1887 *flags |= _UC_TLSBASE;
1888 1888
1889 process_read_fpregs_xmm(l, (struct fxsave *)&mcp->__fpregs); 1889 process_read_fpregs_xmm(l, (struct fxsave *)&mcp->__fpregs);
1890 *flags |= _UC_FPU; 1890 *flags |= _UC_FPU;
1891} 1891}
1892 1892
1893int 1893int
1894cpu_setmcontext(struct lwp *l, const mcontext_t *mcp, unsigned int flags) 1894cpu_setmcontext(struct lwp *l, const mcontext_t *mcp, unsigned int flags)
1895{ 1895{
1896 struct trapframe *tf = l->l_md.md_regs; 1896 struct trapframe *tf = l->l_md.md_regs;
1897 const __greg_t *gr = mcp->__gregs; 1897 const __greg_t *gr = mcp->__gregs;
1898 struct proc *p = l->l_proc; 1898 struct proc *p = l->l_proc;
1899 int error; 1899 int error;
1900 int err, trapno; 1900 int err, trapno;
1901 int64_t rflags; 1901 int64_t rflags;
1902 1902
1903 CTASSERT(sizeof (mcontext_t) == 26 * 8 + 8 + 512); 1903 CTASSERT(sizeof (mcontext_t) == 26 * 8 + 8 + 512);
1904 1904
1905 if ((flags & _UC_CPU) != 0) { 1905 if ((flags & _UC_CPU) != 0) {
1906 error = cpu_mcontext_validate(l, mcp); 1906 error = cpu_mcontext_validate(l, mcp);
1907 if (error != 0) 1907 if (error != 0)
1908 return error; 1908 return error;
1909 /* 1909 /*
1910 * save and restore some values we don't want to change. 1910 * save and restore some values we don't want to change.
1911 * _FRAME_GREG(copy_to_tf) below overwrites them. 1911 * _FRAME_GREG(copy_to_tf) below overwrites them.
1912 * 1912 *
1913 * XXX maybe inline this. 1913 * XXX maybe inline this.
1914 */ 1914 */
1915 rflags = tf->tf_rflags; 1915 rflags = tf->tf_rflags;
1916 err = tf->tf_err; 1916 err = tf->tf_err;
1917 trapno = tf->tf_trapno; 1917 trapno = tf->tf_trapno;
1918 1918
1919 /* Copy general registers member by member */ 1919 /* Copy general registers member by member */
1920#define copy_to_tf(reg, REG, idx) tf->tf_##reg = gr[_REG_##REG]; 1920#define copy_to_tf(reg, REG, idx) tf->tf_##reg = gr[_REG_##REG];
1921 _FRAME_GREG(copy_to_tf) 1921 _FRAME_GREG(copy_to_tf)
1922#undef copy_to_tf 1922#undef copy_to_tf
1923 1923
1924#ifdef XEN 1924#ifdef XEN
1925 /* 1925 /*
1926 * Xen has its own way of dealing with %cs and %ss, 1926 * Xen has its own way of dealing with %cs and %ss,
1927 * reset it to proper values. 1927 * reset it to proper values.
1928 */ 1928 */
1929 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL); 1929 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
1930 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL); 1930 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
1931#endif 1931#endif
1932 rflags &= ~PSL_USER; 1932 rflags &= ~PSL_USER;
1933 tf->tf_rflags = rflags | (gr[_REG_RFLAGS] & PSL_USER); 1933 tf->tf_rflags = rflags | (gr[_REG_RFLAGS] & PSL_USER);
1934 tf->tf_err = err; 1934 tf->tf_err = err;
1935 tf->tf_trapno = trapno; 1935 tf->tf_trapno = trapno;
1936 1936
1937 l->l_md.md_flags |= MDL_IRET; 1937 l->l_md.md_flags |= MDL_IRET;
1938 } 1938 }
1939 1939
1940 if ((flags & _UC_FPU) != 0) 1940 if ((flags & _UC_FPU) != 0)
1941 process_write_fpregs_xmm(l, (const struct fxsave *)&mcp->__fpregs); 1941 process_write_fpregs_xmm(l, (const struct fxsave *)&mcp->__fpregs);
1942 1942
1943 if ((flags & _UC_TLSBASE) != 0) 1943 if ((flags & _UC_TLSBASE) != 0)
1944 lwp_setprivate(l, (void *)(uintptr_t)mcp->_mc_tlsbase); 1944 lwp_setprivate(l, (void *)(uintptr_t)mcp->_mc_tlsbase);
1945 1945
1946 mutex_enter(p->p_lock); 1946 mutex_enter(p->p_lock);
1947 if (flags & _UC_SETSTACK) 1947 if (flags & _UC_SETSTACK)
1948 l->l_sigstk.ss_flags |= SS_ONSTACK; 1948 l->l_sigstk.ss_flags |= SS_ONSTACK;
1949 if (flags & _UC_CLRSTACK) 1949 if (flags & _UC_CLRSTACK)
1950 l->l_sigstk.ss_flags &= ~SS_ONSTACK; 1950 l->l_sigstk.ss_flags &= ~SS_ONSTACK;
1951 mutex_exit(p->p_lock); 1951 mutex_exit(p->p_lock);
1952 1952
1953 return 0; 1953 return 0;
1954} 1954}
1955 1955
1956int 1956int
1957cpu_mcontext_validate(struct lwp *l, const mcontext_t *mcp) 1957cpu_mcontext_validate(struct lwp *l, const mcontext_t *mcp)
1958{ 1958{
1959 const __greg_t *gr; 1959 const __greg_t *gr;
1960 uint16_t sel; 1960 uint16_t sel;
1961 int error; 1961 int error;
1962 struct pmap *pmap = l->l_proc->p_vmspace->vm_map.pmap; 1962 struct pmap *pmap = l->l_proc->p_vmspace->vm_map.pmap;
1963 struct proc *p = l->l_proc; 1963 struct proc *p = l->l_proc;
1964 struct trapframe *tf = l->l_md.md_regs; 1964 struct trapframe *tf = l->l_md.md_regs;
1965 1965
1966 gr = mcp->__gregs; 1966 gr = mcp->__gregs;
1967 1967
1968 if (((gr[_REG_RFLAGS] ^ tf->tf_rflags) & PSL_USERSTATIC) != 0) 1968 if (((gr[_REG_RFLAGS] ^ tf->tf_rflags) & PSL_USERSTATIC) != 0)
1969 return EINVAL; 1969 return EINVAL;
1970 1970
1971 if (__predict_false(pmap->pm_ldt != NULL)) { 1971 if (__predict_false(pmap->pm_ldt != NULL)) {
1972 error = valid_user_selector(l, gr[_REG_ES]); 1972 error = valid_user_selector(l, gr[_REG_ES]);
1973 if (error != 0) 1973 if (error != 0)
1974 return error; 1974 return error;
1975 1975
1976 error = valid_user_selector(l, gr[_REG_FS]); 1976 error = valid_user_selector(l, gr[_REG_FS]);
1977 if (error != 0) 1977 if (error != 0)
1978 return error; 1978 return error;
1979 1979
1980 error = valid_user_selector(l, gr[_REG_GS]); 1980 error = valid_user_selector(l, gr[_REG_GS]);
1981 if (error != 0) 1981 if (error != 0)
1982 return error; 1982 return error;
1983 1983
1984 if ((gr[_REG_DS] & 0xffff) == 0) 1984 if ((gr[_REG_DS] & 0xffff) == 0)
1985 return EINVAL; 1985 return EINVAL;
1986 error = valid_user_selector(l, gr[_REG_DS]); 1986 error = valid_user_selector(l, gr[_REG_DS]);
1987 if (error != 0) 1987 if (error != 0)
1988 return error; 1988 return error;
1989 1989
1990#ifndef XEN 1990#ifndef XEN
1991 if ((gr[_REG_SS] & 0xffff) == 0) 1991 if ((gr[_REG_SS] & 0xffff) == 0)
1992 return EINVAL; 1992 return EINVAL;
1993 error = valid_user_selector(l, gr[_REG_SS]); 1993 error = valid_user_selector(l, gr[_REG_SS]);
1994 if (error != 0) 1994 if (error != 0)
1995 return error; 1995 return error;
1996#endif 1996#endif
1997 } else { 1997 } else {
1998#define VUD(sel) \ 1998#define VUD(sel) \
1999 ((p->p_flag & PK_32) ? VALID_USER_DSEL32(sel) : VALID_USER_DSEL(sel)) 1999 ((p->p_flag & PK_32) ? VALID_USER_DSEL32(sel) : VALID_USER_DSEL(sel))
2000 sel = gr[_REG_ES] & 0xffff; 2000 sel = gr[_REG_ES] & 0xffff;
2001 if (sel != 0 && !VUD(sel)) 2001 if (sel != 0 && !VUD(sel))
2002 return EINVAL; 2002 return EINVAL;
2003 2003
2004/* XXX: Shouldn't this be FSEL32? */ 2004/* XXX: Shouldn't this be FSEL32? */
2005#define VUF(sel) \ 2005#define VUF(sel) \
2006 ((p->p_flag & PK_32) ? VALID_USER_DSEL32(sel) : VALID_USER_DSEL(sel)) 2006 ((p->p_flag & PK_32) ? VALID_USER_DSEL32(sel) : VALID_USER_DSEL(sel))
2007 sel = gr[_REG_FS] & 0xffff; 2007 sel = gr[_REG_FS] & 0xffff;
2008 if (sel != 0 && !VUF(sel)) 2008 if (sel != 0 && !VUF(sel))
2009 return EINVAL; 2009 return EINVAL;
2010 2010
2011#define VUG(sel) \ 2011#define VUG(sel) \
2012 ((p->p_flag & PK_32) ? VALID_USER_GSEL32(sel) : VALID_USER_DSEL(sel)) 2012 ((p->p_flag & PK_32) ? VALID_USER_GSEL32(sel) : VALID_USER_DSEL(sel))
2013 sel = gr[_REG_GS] & 0xffff; 2013 sel = gr[_REG_GS] & 0xffff;
2014 if (sel != 0 && !VUG(sel)) 2014 if (sel != 0 && !VUG(sel))
2015 return EINVAL; 2015 return EINVAL;
2016 2016
2017 sel = gr[_REG_DS] & 0xffff; 2017 sel = gr[_REG_DS] & 0xffff;
2018 if (!VUD(sel)) 2018 if (!VUD(sel))
2019 return EINVAL; 2019 return EINVAL;
2020 2020
2021#ifndef XEN 2021#ifndef XEN
2022 sel = gr[_REG_SS] & 0xffff; 2022 sel = gr[_REG_SS] & 0xffff;
2023 if (!VUD(sel)) 2023 if (!VUD(sel))
2024 return EINVAL; 2024 return EINVAL;
2025#endif 2025#endif
2026 2026
2027 } 2027 }
2028 2028
2029#ifndef XEN 2029#ifndef XEN
2030#define VUC(sel) \ 2030#define VUC(sel) \
2031 ((p->p_flag & PK_32) ? VALID_USER_CSEL32(sel) : VALID_USER_CSEL(sel)) 2031 ((p->p_flag & PK_32) ? VALID_USER_CSEL32(sel) : VALID_USER_CSEL(sel))
2032 sel = gr[_REG_CS] & 0xffff; 2032 sel = gr[_REG_CS] & 0xffff;
2033 if (!VUC(sel)) 2033 if (!VUC(sel))
2034 return EINVAL; 2034 return EINVAL;
2035#endif 2035#endif
2036 2036
2037 if (gr[_REG_RIP] >= VM_MAXUSER_ADDRESS) 2037 if (gr[_REG_RIP] >= VM_MAXUSER_ADDRESS)
2038 return EINVAL; 2038 return EINVAL;
2039 return 0; 2039 return 0;
2040} 2040}
2041 2041
2042void 2042void
2043cpu_initclocks(void) 2043cpu_initclocks(void)
2044{ 2044{
2045 (*initclock_func)(); 2045 (*initclock_func)();
2046} 2046}
2047 2047
2048static int 2048static int
2049valid_user_selector(struct lwp *l, uint64_t seg) 2049valid_user_selector(struct lwp *l, uint64_t seg)
2050{ 2050{
2051 int off, len; 2051 int off, len;
2052 char *dt; 2052 char *dt;
2053 struct mem_segment_descriptor *sdp; 2053 struct mem_segment_descriptor *sdp;
2054 struct proc *p = l->l_proc; 2054 struct proc *p = l->l_proc;
2055 struct pmap *pmap= p->p_vmspace->vm_map.pmap; 2055 struct pmap *pmap= p->p_vmspace->vm_map.pmap;
2056 uint64_t base; 2056 uint64_t base;
2057 2057
2058 seg &= 0xffff; 2058 seg &= 0xffff;
2059 2059
2060 if (seg == 0) 2060 if (seg == 0)
2061 return 0; 2061 return 0;
2062 2062
2063 off = (seg & 0xfff8); 2063 off = (seg & 0xfff8);
2064 if (seg & SEL_LDT) { 2064 if (seg & SEL_LDT) {
2065 if (pmap->pm_ldt != NULL) { 2065 if (pmap->pm_ldt != NULL) {
2066 len = pmap->pm_ldt_len; /* XXX broken */ 2066 len = pmap->pm_ldt_len; /* XXX broken */
2067 dt = (char *)pmap->pm_ldt; 2067 dt = (char *)pmap->pm_ldt;
2068 } else { 2068 } else {
2069 dt = ldtstore; 2069 dt = ldtstore;
2070 len = LDT_SIZE; 2070 len = LDT_SIZE;
2071 } 2071 }
2072 2072
2073 if (off > (len - 8)) 2073 if (off > (len - 8))
2074 return EINVAL; 2074 return EINVAL;
2075 } else { 2075 } else {
2076 CTASSERT(GUDATA_SEL & SEL_LDT); 2076 CTASSERT(GUDATA_SEL & SEL_LDT);
2077 KASSERT(seg != GUDATA_SEL); 2077 KASSERT(seg != GUDATA_SEL);
2078 CTASSERT(GUDATA32_SEL & SEL_LDT); 2078 CTASSERT(GUDATA32_SEL & SEL_LDT);
2079 KASSERT(seg != GUDATA32_SEL); 2079 KASSERT(seg != GUDATA32_SEL);
2080 return EINVAL; 2080 return EINVAL;
2081 } 2081 }
2082 2082
2083 sdp = (struct mem_segment_descriptor *)(dt + off); 2083 sdp = (struct mem_segment_descriptor *)(dt + off);
2084 if (sdp->sd_type < SDT_MEMRO || sdp->sd_p == 0) 2084 if (sdp->sd_type < SDT_MEMRO || sdp->sd_p == 0)
2085 return EINVAL; 2085 return EINVAL;
2086 2086
2087 base = ((uint64_t)sdp->sd_hibase << 32) | ((uint64_t)sdp->sd_lobase); 2087 base = ((uint64_t)sdp->sd_hibase << 32) | ((uint64_t)sdp->sd_lobase);
2088 if (sdp->sd_gran == 1) 2088 if (sdp->sd_gran == 1)
2089 base <<= PAGE_SHIFT; 2089 base <<= PAGE_SHIFT;
2090 2090
2091 if (base >= VM_MAXUSER_ADDRESS) 2091 if (base >= VM_MAXUSER_ADDRESS)
2092 return EINVAL; 2092 return EINVAL;
2093 2093
2094 return 0; 2094 return 0;
2095} 2095}
2096 2096
2097int 2097int
2098mm_md_kernacc(void *ptr, vm_prot_t prot, bool *handled) 2098mm_md_kernacc(void *ptr, vm_prot_t prot, bool *handled)
2099{ 2099{
2100 extern int start, __data_start; 2100 extern int start, __data_start;
2101 const vaddr_t v = (vaddr_t)ptr; 2101 const vaddr_t v = (vaddr_t)ptr;
2102 2102
2103 if (v >= (vaddr_t)&start && v < (vaddr_t)kern_end) { 2103 if (v >= (vaddr_t)&start && v < (vaddr_t)kern_end) {
2104 *handled = true; 2104 *handled = true;
2105 /* Either the text or rodata segment */ 2105 /* Either the text or rodata segment */
2106 if (v < (vaddr_t)&__data_start && (prot & VM_PROT_WRITE)) 2106 if (v < (vaddr_t)&__data_start && (prot & VM_PROT_WRITE))
2107 return EFAULT; 2107 return EFAULT;
2108 2108
2109 } else if (v >= module_start && v < module_end) { 2109 } else if (v >= module_start && v < module_end) {
2110 *handled = true; 2110 *handled = true;
2111 if (!uvm_map_checkprot(module_map, v, v + 1, prot)) 2111 if (!uvm_map_checkprot(module_map, v, v + 1, prot))
2112 return EFAULT; 2112 return EFAULT;
2113 } else { 2113 } else {
2114 *handled = false; 2114 *handled = false;
2115 } 2115 }
2116 return 0; 2116 return 0;
2117} 2117}
2118 2118
2119/* 2119/*
2120 * Zero out an LWP's TLS context (%fs and %gs and associated stuff). 2120 * Zero out an LWP's TLS context (%fs and %gs and associated stuff).
2121 * Used when exec'ing a new program. 2121 * Used when exec'ing a new program.
2122 */ 2122 */
2123 2123
2124void 2124void
2125cpu_fsgs_zero(struct lwp *l) 2125cpu_fsgs_zero(struct lwp *l)
2126{ 2126{
2127 struct trapframe * const tf = l->l_md.md_regs; 2127 struct trapframe * const tf = l->l_md.md_regs;
2128 struct pcb *pcb; 2128 struct pcb *pcb;
2129 uint64_t zero = 0; 2129 uint64_t zero = 0;
2130 2130
2131 pcb = lwp_getpcb(l); 2131 pcb = lwp_getpcb(l);
2132 if (l == curlwp) { 2132 if (l == curlwp) {
2133 kpreempt_disable(); 2133 kpreempt_disable();
2134 tf->tf_fs = 0; 2134 tf->tf_fs = 0;
2135 tf->tf_gs = 0; 2135 tf->tf_gs = 0;
2136 setfs(0); 2136 setfs(0);
2137#ifndef XEN 2137#ifndef XEN
2138 setusergs(0); 2138 setusergs(0);
2139#else 2139#else
2140 HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, 0); 2140 HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, 0);
2141#endif 2141#endif
2142 if ((l->l_proc->p_flag & PK_32) == 0) { 2142 if ((l->l_proc->p_flag & PK_32) == 0) {
2143#ifndef XEN 2143#ifndef XEN
2144 wrmsr(MSR_FSBASE, 0); 2144 wrmsr(MSR_FSBASE, 0);
2145 wrmsr(MSR_KERNELGSBASE, 0); 2145 wrmsr(MSR_KERNELGSBASE, 0);
2146#else 2146#else
2147 HYPERVISOR_set_segment_base(SEGBASE_FS, 0); 2147 HYPERVISOR_set_segment_base(SEGBASE_FS, 0);
2148 HYPERVISOR_set_segment_base(SEGBASE_GS_USER, 0); 2148 HYPERVISOR_set_segment_base(SEGBASE_GS_USER, 0);
2149#endif 2149#endif
2150 } 2150 }
2151 pcb->pcb_fs = 0; 2151 pcb->pcb_fs = 0;
2152 pcb->pcb_gs = 0; 2152 pcb->pcb_gs = 0;
2153 update_descriptor(&curcpu()->ci_gdt[GUFS_SEL], &zero); 2153 update_descriptor(&curcpu()->ci_gdt[GUFS_SEL], &zero);
2154 update_descriptor(&curcpu()->ci_gdt[GUGS_SEL], &zero); 2154 update_descriptor(&curcpu()->ci_gdt[GUGS_SEL], &zero);
2155 kpreempt_enable(); 2155 kpreempt_enable();
2156 } else { 2156 } else {
2157 tf->tf_fs = 0; 2157 tf->tf_fs = 0;
2158 tf->tf_gs = 0; 2158 tf->tf_gs = 0;
2159 pcb->pcb_fs = 0; 2159 pcb->pcb_fs = 0;
2160 pcb->pcb_gs = 0; 2160 pcb->pcb_gs = 0;
2161 } 2161 }
2162 2162
2163} 2163}
2164 2164
2165/* 2165/*
2166 * Load an LWP's TLS context, possibly changing the %fs and %gs selectors. 2166 * Load an LWP's TLS context, possibly changing the %fs and %gs selectors.
2167 * Used only for 32-bit processes. 2167 * Used only for 32-bit processes.
2168 */ 2168 */
2169 2169
2170void 2170void
2171cpu_fsgs_reload(struct lwp *l, int fssel, int gssel) 2171cpu_fsgs_reload(struct lwp *l, int fssel, int gssel)
2172{ 2172{
2173 struct trapframe *tf; 2173 struct trapframe *tf;
2174 struct pcb *pcb; 2174 struct pcb *pcb;
2175 2175
2176 KASSERT(l->l_proc->p_flag & PK_32); 2176 KASSERT(l->l_proc->p_flag & PK_32);
2177 tf = l->l_md.md_regs; 2177 tf = l->l_md.md_regs;
2178 if (l == curlwp) { 2178 if (l == curlwp) {
2179 pcb = lwp_getpcb(l); 2179 pcb = lwp_getpcb(l);
2180 kpreempt_disable(); 2180 kpreempt_disable();
2181 update_descriptor(&curcpu()->ci_gdt[GUFS_SEL], &pcb->pcb_fs); 2181 update_descriptor(&curcpu()->ci_gdt[GUFS_SEL], &pcb->pcb_fs);
2182 update_descriptor(&curcpu()->ci_gdt[GUGS_SEL], &pcb->pcb_gs); 2182 update_descriptor(&curcpu()->ci_gdt[GUGS_SEL], &pcb->pcb_gs);
2183 setfs(fssel); 2183 setfs(fssel);
2184#ifndef XEN 2184#ifndef XEN
2185 setusergs(gssel); 2185 setusergs(gssel);
2186#else 2186#else
2187 HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, gssel); 2187 HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, gssel);
2188#endif 2188#endif
2189 tf->tf_fs = fssel; 2189 tf->tf_fs = fssel;
2190 tf->tf_gs = gssel; 2190 tf->tf_gs = gssel;
2191 kpreempt_enable(); 2191 kpreempt_enable();
2192 } else { 2192 } else {
2193 tf->tf_fs = fssel; 2193 tf->tf_fs = fssel;
2194 tf->tf_gs = gssel; 2194 tf->tf_gs = gssel;
2195 } 2195 }
2196} 2196}
2197 2197
2198 2198
2199#ifdef __HAVE_DIRECT_MAP 2199#ifdef __HAVE_DIRECT_MAP
2200bool 2200bool
2201mm_md_direct_mapped_io(void *addr, paddr_t *paddr) 2201mm_md_direct_mapped_io(void *addr, paddr_t *paddr)
2202{ 2202{
2203 vaddr_t va = (vaddr_t)addr; 2203 vaddr_t va = (vaddr_t)addr;
2204 2204
2205 if (va >= PMAP_DIRECT_BASE && va < PMAP_DIRECT_END) { 2205 if (va >= PMAP_DIRECT_BASE && va < PMAP_DIRECT_END) {
2206 *paddr = PMAP_DIRECT_UNMAP(va); 2206 *paddr = PMAP_DIRECT_UNMAP(va);
2207 return true; 2207 return true;
2208 } 2208 }
2209 return false; 2209 return false;
2210} 2210}
2211 2211
2212bool 2212bool
2213mm_md_direct_mapped_phys(paddr_t paddr, vaddr_t *vaddr) 2213mm_md_direct_mapped_phys(paddr_t paddr, vaddr_t *vaddr)
2214{ 2214{
2215 *vaddr = PMAP_DIRECT_MAP(paddr); 2215 *vaddr = PMAP_DIRECT_MAP(paddr);
2216 return true; 2216 return true;
2217} 2217}
2218#endif 2218#endif

cvs diff -r1.754 -r1.755 src/sys/arch/i386/i386/machdep.c (switch to unified diff)

--- src/sys/arch/i386/i386/machdep.c 2015/04/24 00:04:04 1.754
+++ src/sys/arch/i386/i386/machdep.c 2016/05/15 10:35:54 1.755
@@ -1,1766 +1,1766 @@ @@ -1,1766 +1,1766 @@
1/* $NetBSD: machdep.c,v 1.754 2015/04/24 00:04:04 khorben Exp $ */ 1/* $NetBSD: machdep.c,v 1.755 2016/05/15 10:35:54 maxv Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 1996, 1997, 1998, 2000, 2004, 2006, 2008, 2009 4 * Copyright (c) 1996, 1997, 1998, 2000, 2004, 2006, 2008, 2009
5 * The NetBSD Foundation, Inc. 5 * The NetBSD Foundation, Inc.
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * This code is derived from software contributed to The NetBSD Foundation 8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Charles M. Hannum, by Jason R. Thorpe of the Numerical Aerospace 9 * by Charles M. Hannum, by Jason R. Thorpe of the Numerical Aerospace
10 * Simulation Facility NASA Ames Research Center, by Julio M. Merino Vidal, 10 * Simulation Facility NASA Ames Research Center, by Julio M. Merino Vidal,
11 * and by Andrew Doran. 11 * and by Andrew Doran.
12 * 12 *
13 * Redistribution and use in source and binary forms, with or without 13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions 14 * modification, are permitted provided that the following conditions
15 * are met: 15 * are met:
16 * 1. Redistributions of source code must retain the above copyright 16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer. 17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright 18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the 19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution. 20 * documentation and/or other materials provided with the distribution.
21 * 21 *
22 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
24 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
26 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 * POSSIBILITY OF SUCH DAMAGE. 32 * POSSIBILITY OF SUCH DAMAGE.
33 */ 33 */
34 34
35/*- 35/*-
36 * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. 36 * Copyright (c) 1982, 1987, 1990 The Regents of the University of California.
37 * All rights reserved. 37 * All rights reserved.
38 * 38 *
39 * This code is derived from software contributed to Berkeley by 39 * This code is derived from software contributed to Berkeley by
40 * William Jolitz. 40 * William Jolitz.
41 * 41 *
42 * Redistribution and use in source and binary forms, with or without 42 * Redistribution and use in source and binary forms, with or without
43 * modification, are permitted provided that the following conditions 43 * modification, are permitted provided that the following conditions
44 * are met: 44 * are met:
45 * 1. Redistributions of source code must retain the above copyright 45 * 1. Redistributions of source code must retain the above copyright
46 * notice, this list of conditions and the following disclaimer. 46 * notice, this list of conditions and the following disclaimer.
47 * 2. Redistributions in binary form must reproduce the above copyright 47 * 2. Redistributions in binary form must reproduce the above copyright
48 * notice, this list of conditions and the following disclaimer in the 48 * notice, this list of conditions and the following disclaimer in the
49 * documentation and/or other materials provided with the distribution. 49 * documentation and/or other materials provided with the distribution.
50 * 3. Neither the name of the University nor the names of its contributors 50 * 3. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software 51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission. 52 * without specific prior written permission.
53 * 53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE. 64 * SUCH DAMAGE.
65 * 65 *
66 * @(#)machdep.c 7.4 (Berkeley) 6/3/91 66 * @(#)machdep.c 7.4 (Berkeley) 6/3/91
67 */ 67 */
68 68
69#include <sys/cdefs.h> 69#include <sys/cdefs.h>
70__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.754 2015/04/24 00:04:04 khorben Exp $"); 70__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.755 2016/05/15 10:35:54 maxv Exp $");
71 71
72#include "opt_beep.h" 72#include "opt_beep.h"
73#include "opt_compat_ibcs2.h" 73#include "opt_compat_ibcs2.h"
74#include "opt_compat_freebsd.h" 74#include "opt_compat_freebsd.h"
75#include "opt_compat_netbsd.h" 75#include "opt_compat_netbsd.h"
76#include "opt_compat_svr4.h" 76#include "opt_compat_svr4.h"
77#include "opt_cpureset_delay.h" 77#include "opt_cpureset_delay.h"
78#include "opt_ddb.h" 78#include "opt_ddb.h"
79#include "opt_ipkdb.h" 79#include "opt_ipkdb.h"
80#include "opt_kgdb.h" 80#include "opt_kgdb.h"
81#include "opt_mtrr.h" 81#include "opt_mtrr.h"
82#include "opt_modular.h" 82#include "opt_modular.h"
83#include "opt_multiboot.h" 83#include "opt_multiboot.h"
84#include "opt_multiprocessor.h" 84#include "opt_multiprocessor.h"
85#include "opt_physmem.h" 85#include "opt_physmem.h"
86#include "opt_realmem.h" 86#include "opt_realmem.h"
87#include "opt_user_ldt.h" 87#include "opt_user_ldt.h"
88#include "opt_vm86.h" 88#include "opt_vm86.h"
89#include "opt_xen.h" 89#include "opt_xen.h"
90#include "isa.h" 90#include "isa.h"
91#include "pci.h" 91#include "pci.h"
92 92
93#include <sys/param.h> 93#include <sys/param.h>
94#include <sys/systm.h> 94#include <sys/systm.h>
95#include <sys/signal.h> 95#include <sys/signal.h>
96#include <sys/signalvar.h> 96#include <sys/signalvar.h>
97#include <sys/kernel.h> 97#include <sys/kernel.h>
98#include <sys/cpu.h> 98#include <sys/cpu.h>
99#include <sys/exec.h> 99#include <sys/exec.h>
100#include <sys/fcntl.h> 100#include <sys/fcntl.h>
101#include <sys/reboot.h> 101#include <sys/reboot.h>
102#include <sys/conf.h> 102#include <sys/conf.h>
103#include <sys/kauth.h> 103#include <sys/kauth.h>
104#include <sys/mbuf.h> 104#include <sys/mbuf.h>
105#include <sys/msgbuf.h> 105#include <sys/msgbuf.h>
106#include <sys/mount.h> 106#include <sys/mount.h>
107#include <sys/syscallargs.h> 107#include <sys/syscallargs.h>
108#include <sys/core.h> 108#include <sys/core.h>
109#include <sys/kcore.h> 109#include <sys/kcore.h>
110#include <sys/ucontext.h> 110#include <sys/ucontext.h>
111#include <sys/ras.h> 111#include <sys/ras.h>
112#include <sys/ksyms.h> 112#include <sys/ksyms.h>
113#include <sys/device.h> 113#include <sys/device.h>
114 114
115#ifdef IPKDB 115#ifdef IPKDB
116#include <ipkdb/ipkdb.h> 116#include <ipkdb/ipkdb.h>
117#endif 117#endif
118 118
119#ifdef KGDB 119#ifdef KGDB
120#include <sys/kgdb.h> 120#include <sys/kgdb.h>
121#endif 121#endif
122 122
123#include <dev/cons.h> 123#include <dev/cons.h>
124#include <dev/mm.h> 124#include <dev/mm.h>
125 125
126#include <uvm/uvm.h> 126#include <uvm/uvm.h>
127#include <uvm/uvm_page.h> 127#include <uvm/uvm_page.h>
128 128
129#include <sys/sysctl.h> 129#include <sys/sysctl.h>
130 130
131#include <machine/cpu.h> 131#include <machine/cpu.h>
132#include <machine/cpufunc.h> 132#include <machine/cpufunc.h>
133#include <machine/cpuvar.h> 133#include <machine/cpuvar.h>
134#include <machine/gdt.h> 134#include <machine/gdt.h>
135#include <machine/intr.h> 135#include <machine/intr.h>
136#include <machine/kcore.h> 136#include <machine/kcore.h>
137#include <machine/pio.h> 137#include <machine/pio.h>
138#include <machine/psl.h> 138#include <machine/psl.h>
139#include <machine/reg.h> 139#include <machine/reg.h>
140#include <machine/specialreg.h> 140#include <machine/specialreg.h>
141#include <machine/bootinfo.h> 141#include <machine/bootinfo.h>
142#include <machine/mtrr.h> 142#include <machine/mtrr.h>
143#include <x86/x86/tsc.h> 143#include <x86/x86/tsc.h>
144 144
145#include <x86/fpu.h> 145#include <x86/fpu.h>
146#include <x86/machdep.h> 146#include <x86/machdep.h>
147 147
148#include <machine/multiboot.h> 148#include <machine/multiboot.h>
149#ifdef XEN 149#ifdef XEN
150#include <xen/evtchn.h> 150#include <xen/evtchn.h>
151#include <xen/xen.h> 151#include <xen/xen.h>
152#include <xen/hypervisor.h> 152#include <xen/hypervisor.h>
153 153
154/* #define XENDEBUG */ 154/* #define XENDEBUG */
155/* #define XENDEBUG_LOW */ 155/* #define XENDEBUG_LOW */
156 156
157#ifdef XENDEBUG 157#ifdef XENDEBUG
158#define XENPRINTF(x) printf x 158#define XENPRINTF(x) printf x
159#define XENPRINTK(x) printk x 159#define XENPRINTK(x) printk x
160#else 160#else
161#define XENPRINTF(x) 161#define XENPRINTF(x)
162#define XENPRINTK(x) 162#define XENPRINTK(x)
163#endif 163#endif
164#define PRINTK(x) printf x 164#define PRINTK(x) printf x
165#endif /* XEN */ 165#endif /* XEN */
166 166
167#include <dev/isa/isareg.h> 167#include <dev/isa/isareg.h>
168#include <machine/isa_machdep.h> 168#include <machine/isa_machdep.h>
169#include <dev/ic/i8042reg.h> 169#include <dev/ic/i8042reg.h>
170 170
171#ifdef DDB 171#ifdef DDB
172#include <machine/db_machdep.h> 172#include <machine/db_machdep.h>
173#include <ddb/db_extern.h> 173#include <ddb/db_extern.h>
174#endif 174#endif
175 175
176#ifdef VM86 176#ifdef VM86
177#include <machine/vm86.h> 177#include <machine/vm86.h>
178#endif 178#endif
179 179
180#include "acpica.h" 180#include "acpica.h"
181#include "bioscall.h" 181#include "bioscall.h"
182 182
183#if NBIOSCALL > 0 183#if NBIOSCALL > 0
184#include <machine/bioscall.h> 184#include <machine/bioscall.h>
185#endif 185#endif
186 186
187#if NACPICA > 0 187#if NACPICA > 0
188#include <dev/acpi/acpivar.h> 188#include <dev/acpi/acpivar.h>
189#define ACPI_MACHDEP_PRIVATE 189#define ACPI_MACHDEP_PRIVATE
190#include <machine/acpi_machdep.h> 190#include <machine/acpi_machdep.h>
191#endif 191#endif
192 192
193#include "isa.h" 193#include "isa.h"
194#include "isadma.h" 194#include "isadma.h"
195#include "ksyms.h" 195#include "ksyms.h"
196 196
197#include "cardbus.h" 197#include "cardbus.h"
198#if NCARDBUS > 0 198#if NCARDBUS > 0
199/* For rbus_min_start hint. */ 199/* For rbus_min_start hint. */
200#include <sys/bus.h> 200#include <sys/bus.h>
201#include <dev/cardbus/rbus.h> 201#include <dev/cardbus/rbus.h>
202#include <machine/rbus_machdep.h> 202#include <machine/rbus_machdep.h>
203#endif 203#endif
204 204
205#include "mca.h" 205#include "mca.h"
206#if NMCA > 0 206#if NMCA > 0
207#include <machine/mca_machdep.h> /* for mca_busprobe() */ 207#include <machine/mca_machdep.h> /* for mca_busprobe() */
208#endif 208#endif
209 209
210#ifdef MULTIPROCESSOR /* XXX */ 210#ifdef MULTIPROCESSOR /* XXX */
211#include <machine/mpbiosvar.h> /* XXX */ 211#include <machine/mpbiosvar.h> /* XXX */
212#endif /* XXX */ 212#endif /* XXX */
213 213
214/* the following is used externally (sysctl_hw) */ 214/* the following is used externally (sysctl_hw) */
215char machine[] = "i386"; /* CPU "architecture" */ 215char machine[] = "i386"; /* CPU "architecture" */
216char machine_arch[] = "i386"; /* machine == machine_arch */ 216char machine_arch[] = "i386"; /* machine == machine_arch */
217 217
218extern struct bi_devmatch *x86_alldisks; 218extern struct bi_devmatch *x86_alldisks;
219extern int x86_ndisks; 219extern int x86_ndisks;
220 220
221#ifdef CPURESET_DELAY 221#ifdef CPURESET_DELAY
222int cpureset_delay = CPURESET_DELAY; 222int cpureset_delay = CPURESET_DELAY;
223#else 223#else
224int cpureset_delay = 2000; /* default to 2s */ 224int cpureset_delay = 2000; /* default to 2s */
225#endif 225#endif
226 226
227#ifdef MTRR 227#ifdef MTRR
228struct mtrr_funcs *mtrr_funcs; 228struct mtrr_funcs *mtrr_funcs;
229#endif 229#endif
230 230
231int cpu_class; 231int cpu_class;
232int use_pae; 232int use_pae;
233int i386_fpu_present = 1; 233int i386_fpu_present = 1;
234int i386_fpu_fdivbug; 234int i386_fpu_fdivbug;
235 235
236int i386_use_fxsave; 236int i386_use_fxsave;
237int i386_has_sse; 237int i386_has_sse;
238int i386_has_sse2; 238int i386_has_sse2;
239 239
240vaddr_t msgbuf_vaddr; 240vaddr_t msgbuf_vaddr;
241struct { 241struct {
242 paddr_t paddr; 242 paddr_t paddr;
243 psize_t sz; 243 psize_t sz;
244} msgbuf_p_seg[VM_PHYSSEG_MAX]; 244} msgbuf_p_seg[VM_PHYSSEG_MAX];
245unsigned int msgbuf_p_cnt = 0; 245unsigned int msgbuf_p_cnt = 0;
246 246
247vaddr_t idt_vaddr; 247vaddr_t idt_vaddr;
248paddr_t idt_paddr; 248paddr_t idt_paddr;
249vaddr_t pentium_idt_vaddr; 249vaddr_t pentium_idt_vaddr;
250 250
251struct vm_map *phys_map = NULL; 251struct vm_map *phys_map = NULL;
252 252
253extern paddr_t avail_start, avail_end; 253extern paddr_t avail_start, avail_end;
254#ifdef XEN 254#ifdef XEN
255extern paddr_t pmap_pa_start, pmap_pa_end; 255extern paddr_t pmap_pa_start, pmap_pa_end;
256void hypervisor_callback(void); 256void hypervisor_callback(void);
257void failsafe_callback(void); 257void failsafe_callback(void);
258#endif 258#endif
259 259
260#ifdef XEN 260#ifdef XEN
261void (*delay_func)(unsigned int) = xen_delay; 261void (*delay_func)(unsigned int) = xen_delay;
262void (*initclock_func)(void) = xen_initclocks; 262void (*initclock_func)(void) = xen_initclocks;
263#else 263#else
264void (*delay_func)(unsigned int) = i8254_delay; 264void (*delay_func)(unsigned int) = i8254_delay;
265void (*initclock_func)(void) = i8254_initclocks; 265void (*initclock_func)(void) = i8254_initclocks;
266#endif 266#endif
267 267
268 268
269/* 269/*
270 * Size of memory segments, before any memory is stolen. 270 * Size of memory segments, before any memory is stolen.
271 */ 271 */
272phys_ram_seg_t mem_clusters[VM_PHYSSEG_MAX]; 272phys_ram_seg_t mem_clusters[VM_PHYSSEG_MAX];
273int mem_cluster_cnt = 0; 273int mem_cluster_cnt = 0;
274 274
275void init386(paddr_t); 275void init386(paddr_t);
276void initgdt(union descriptor *); 276void initgdt(union descriptor *);
277 277
278extern int time_adjusted; 278extern int time_adjusted;
279 279
280int *esym; 280int *esym;
281int *eblob; 281int *eblob;
282extern int boothowto; 282extern int boothowto;
283 283
284#ifndef XEN 284#ifndef XEN
285 285
286/* Base memory reported by BIOS. */ 286/* Base memory reported by BIOS. */
287#ifndef REALBASEMEM 287#ifndef REALBASEMEM
288int biosbasemem = 0; 288int biosbasemem = 0;
289#else 289#else
290int biosbasemem = REALBASEMEM; 290int biosbasemem = REALBASEMEM;
291#endif 291#endif
292 292
293/* Extended memory reported by BIOS. */ 293/* Extended memory reported by BIOS. */
294#ifndef REALEXTMEM 294#ifndef REALEXTMEM
295int biosextmem = 0; 295int biosextmem = 0;
296#else 296#else
297int biosextmem = REALEXTMEM; 297int biosextmem = REALEXTMEM;
298#endif 298#endif
299 299
300/* Set if any boot-loader set biosbasemem/biosextmem. */ 300/* Set if any boot-loader set biosbasemem/biosextmem. */
301int biosmem_implicit; 301int biosmem_implicit;
302 302
303/* Representation of the bootinfo structure constructed by a NetBSD native 303/* Representation of the bootinfo structure constructed by a NetBSD native
304 * boot loader. Only be used by native_loader(). */ 304 * boot loader. Only be used by native_loader(). */
305struct bootinfo_source { 305struct bootinfo_source {
306 uint32_t bs_naddrs; 306 uint32_t bs_naddrs;
307 void *bs_addrs[1]; /* Actually longer. */ 307 void *bs_addrs[1]; /* Actually longer. */
308}; 308};
309 309
310/* Only called by locore.h; no need to be in a header file. */ 310/* Only called by locore.h; no need to be in a header file. */
311void native_loader(int, int, struct bootinfo_source *, paddr_t, int, int); 311void native_loader(int, int, struct bootinfo_source *, paddr_t, int, int);
312 312
313/* 313/*
314 * Called as one of the very first things during system startup (just after 314 * Called as one of the very first things during system startup (just after
315 * the boot loader gave control to the kernel image), this routine is in 315 * the boot loader gave control to the kernel image), this routine is in
316 * charge of retrieving the parameters passed in by the boot loader and 316 * charge of retrieving the parameters passed in by the boot loader and
317 * storing them in the appropriate kernel variables. 317 * storing them in the appropriate kernel variables.
318 * 318 *
319 * WARNING: Because the kernel has not yet relocated itself to KERNBASE, 319 * WARNING: Because the kernel has not yet relocated itself to KERNBASE,
320 * special care has to be taken when accessing memory because absolute 320 * special care has to be taken when accessing memory because absolute
321 * addresses (referring to kernel symbols) do not work. So: 321 * addresses (referring to kernel symbols) do not work. So:
322 * 322 *
323 * 1) Avoid jumps to absolute addresses (such as gotos and switches). 323 * 1) Avoid jumps to absolute addresses (such as gotos and switches).
324 * 2) To access global variables use their physical address, which 324 * 2) To access global variables use their physical address, which
325 * can be obtained using the RELOC macro. 325 * can be obtained using the RELOC macro.
326 */ 326 */
327void 327void
328native_loader(int bl_boothowto, int bl_bootdev, 328native_loader(int bl_boothowto, int bl_bootdev,
329 struct bootinfo_source *bl_bootinfo, paddr_t bl_esym, 329 struct bootinfo_source *bl_bootinfo, paddr_t bl_esym,
330 int bl_biosextmem, int bl_biosbasemem) 330 int bl_biosextmem, int bl_biosbasemem)
331{ 331{
332#define RELOC(type, x) ((type)((vaddr_t)(x) - KERNBASE)) 332#define RELOC(type, x) ((type)((vaddr_t)(x) - KERNBASE))
333 333
334 *RELOC(int *, &boothowto) = bl_boothowto; 334 *RELOC(int *, &boothowto) = bl_boothowto;
335 335
336#ifdef COMPAT_OLDBOOT 336#ifdef COMPAT_OLDBOOT
337 /* 337 /*
338 * Pre-1.3 boot loaders gave the boot device as a parameter 338 * Pre-1.3 boot loaders gave the boot device as a parameter
339 * (instead of a bootinfo entry). 339 * (instead of a bootinfo entry).
340 */ 340 */
341 *RELOC(int *, &bootdev) = bl_bootdev; 341 *RELOC(int *, &bootdev) = bl_bootdev;
342#endif 342#endif
343 343
344 /* 344 /*
345 * The boot loader provides a physical, non-relocated address 345 * The boot loader provides a physical, non-relocated address
346 * for the symbols table's end. We need to convert it to a 346 * for the symbols table's end. We need to convert it to a
347 * virtual address. 347 * virtual address.
348 */ 348 */
349 if (bl_esym != 0) 349 if (bl_esym != 0)
350 *RELOC(int **, &esym) = (int *)((vaddr_t)bl_esym + KERNBASE); 350 *RELOC(int **, &esym) = (int *)((vaddr_t)bl_esym + KERNBASE);
351 else 351 else
352 *RELOC(int **, &esym) = 0; 352 *RELOC(int **, &esym) = 0;
353 353
354 /* 354 /*
355 * Copy bootinfo entries (if any) from the boot loader's 355 * Copy bootinfo entries (if any) from the boot loader's
356 * representation to the kernel's bootinfo space. 356 * representation to the kernel's bootinfo space.
357 */ 357 */
358 if (bl_bootinfo != NULL) { 358 if (bl_bootinfo != NULL) {
359 size_t i; 359 size_t i;
360 uint8_t *data; 360 uint8_t *data;
361 struct bootinfo *bidest; 361 struct bootinfo *bidest;
362 struct btinfo_modulelist *bi; 362 struct btinfo_modulelist *bi;
363 363
364 bidest = RELOC(struct bootinfo *, &bootinfo); 364 bidest = RELOC(struct bootinfo *, &bootinfo);
365 365
366 data = &bidest->bi_data[0]; 366 data = &bidest->bi_data[0];
367 367
368 for (i = 0; i < bl_bootinfo->bs_naddrs; i++) { 368 for (i = 0; i < bl_bootinfo->bs_naddrs; i++) {
369 struct btinfo_common *bc; 369 struct btinfo_common *bc;
370 370
371 bc = bl_bootinfo->bs_addrs[i]; 371 bc = bl_bootinfo->bs_addrs[i];
372 372
373 if ((data + bc->len) > 373 if ((data + bc->len) >
374 (&bidest->bi_data[0] + BOOTINFO_MAXSIZE)) 374 (&bidest->bi_data[0] + BOOTINFO_MAXSIZE))
375 break; 375 break;
376 376
377 memcpy(data, bc, bc->len); 377 memcpy(data, bc, bc->len);
378 /* 378 /*
379 * If any modules were loaded, record where they 379 * If any modules were loaded, record where they
380 * end. We'll need to skip over them. 380 * end. We'll need to skip over them.
381 */ 381 */
382 bi = (struct btinfo_modulelist *)data; 382 bi = (struct btinfo_modulelist *)data;
383 if (bi->common.type == BTINFO_MODULELIST) { 383 if (bi->common.type == BTINFO_MODULELIST) {
384 *RELOC(int **, &eblob) = 384 *RELOC(int **, &eblob) =
385 (int *)(bi->endpa + KERNBASE); 385 (int *)(bi->endpa + KERNBASE);
386 } 386 }
387 data += bc->len; 387 data += bc->len;
388 } 388 }
389 bidest->bi_nentries = i; 389 bidest->bi_nentries = i;
390 } 390 }
391 391
392 /* 392 /*
393 * Configure biosbasemem and biosextmem only if they were not 393 * Configure biosbasemem and biosextmem only if they were not
394 * explicitly given during the kernel's build. 394 * explicitly given during the kernel's build.
395 */ 395 */
396 if (*RELOC(int *, &biosbasemem) == 0) { 396 if (*RELOC(int *, &biosbasemem) == 0) {
397 *RELOC(int *, &biosbasemem) = bl_biosbasemem; 397 *RELOC(int *, &biosbasemem) = bl_biosbasemem;
398 *RELOC(int *, &biosmem_implicit) = 1; 398 *RELOC(int *, &biosmem_implicit) = 1;
399 } 399 }
400 if (*RELOC(int *, &biosextmem) == 0) { 400 if (*RELOC(int *, &biosextmem) == 0) {
401 *RELOC(int *, &biosextmem) = bl_biosextmem; 401 *RELOC(int *, &biosextmem) = bl_biosextmem;
402 *RELOC(int *, &biosmem_implicit) = 1; 402 *RELOC(int *, &biosmem_implicit) = 1;
403 } 403 }
404#undef RELOC 404#undef RELOC
405} 405}
406 406
407#endif /* XEN */ 407#endif /* XEN */
408 408
409/* 409/*
410 * Machine-dependent startup code 410 * Machine-dependent startup code
411 */ 411 */
412void 412void
413cpu_startup(void) 413cpu_startup(void)
414{ 414{
415 int x, y; 415 int x, y;
416 vaddr_t minaddr, maxaddr; 416 vaddr_t minaddr, maxaddr;
417 psize_t sz; 417 psize_t sz;
418 418
419 /* 419 /*
420 * For console drivers that require uvm and pmap to be initialized, 420 * For console drivers that require uvm and pmap to be initialized,
421 * we'll give them one more chance here... 421 * we'll give them one more chance here...
422 */ 422 */
423 consinit(); 423 consinit();
424 424
425 /* 425 /*
426 * Initialize error message buffer (et end of core). 426 * Initialize error message buffer (et end of core).
427 */ 427 */
428 if (msgbuf_p_cnt == 0) 428 if (msgbuf_p_cnt == 0)
429 panic("msgbuf paddr map has not been set up"); 429 panic("msgbuf paddr map has not been set up");
430 for (x = 0, sz = 0; x < msgbuf_p_cnt; sz += msgbuf_p_seg[x++].sz) 430 for (x = 0, sz = 0; x < msgbuf_p_cnt; sz += msgbuf_p_seg[x++].sz)
431 continue; 431 continue;
432 msgbuf_vaddr = uvm_km_alloc(kernel_map, sz, 0, UVM_KMF_VAONLY); 432 msgbuf_vaddr = uvm_km_alloc(kernel_map, sz, 0, UVM_KMF_VAONLY);
433 if (msgbuf_vaddr == 0) 433 if (msgbuf_vaddr == 0)
434 panic("failed to valloc msgbuf_vaddr"); 434 panic("failed to valloc msgbuf_vaddr");
435 435
436 /* msgbuf_paddr was init'd in pmap */ 436 /* msgbuf_paddr was init'd in pmap */
437 for (y = 0, sz = 0; y < msgbuf_p_cnt; y++) { 437 for (y = 0, sz = 0; y < msgbuf_p_cnt; y++) {
438 for (x = 0; x < btoc(msgbuf_p_seg[y].sz); x++, sz += PAGE_SIZE) 438 for (x = 0; x < btoc(msgbuf_p_seg[y].sz); x++, sz += PAGE_SIZE)
439 pmap_kenter_pa((vaddr_t)msgbuf_vaddr + sz, 439 pmap_kenter_pa((vaddr_t)msgbuf_vaddr + sz,
440 msgbuf_p_seg[y].paddr + x * PAGE_SIZE, 440 msgbuf_p_seg[y].paddr + x * PAGE_SIZE,
441 VM_PROT_READ|VM_PROT_WRITE, 0); 441 VM_PROT_READ|VM_PROT_WRITE, 0);
442 } 442 }
443 pmap_update(pmap_kernel()); 443 pmap_update(pmap_kernel());
444 444
445 initmsgbuf((void *)msgbuf_vaddr, sz); 445 initmsgbuf((void *)msgbuf_vaddr, sz);
446 446
447#ifdef MULTIBOOT 447#ifdef MULTIBOOT
448 multiboot_print_info(); 448 multiboot_print_info();
449#endif 449#endif
450 450
451#ifdef TRAPLOG 451#ifdef TRAPLOG
452 /* 452 /*
453 * Enable recording of branch from/to in MSR's 453 * Enable recording of branch from/to in MSR's
454 */ 454 */
455 wrmsr(MSR_DEBUGCTLMSR, 0x1); 455 wrmsr(MSR_DEBUGCTLMSR, 0x1);
456#endif 456#endif
457 457
458#if NCARDBUS > 0 458#if NCARDBUS > 0
459 /* Tell RBUS how much RAM we have, so it can use heuristics. */ 459 /* Tell RBUS how much RAM we have, so it can use heuristics. */
460 rbus_min_start_hint(ctob((psize_t)physmem)); 460 rbus_min_start_hint(ctob((psize_t)physmem));
461#endif 461#endif
462 462
463 minaddr = 0; 463 minaddr = 0;
464 464
465 /* 465 /*
466 * Allocate a submap for physio 466 * Allocate a submap for physio
467 */ 467 */
468 phys_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr, 468 phys_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr,
469 VM_PHYS_SIZE, 0, false, NULL); 469 VM_PHYS_SIZE, 0, false, NULL);
470 470
471 /* Say hello. */ 471 /* Say hello. */
472 banner(); 472 banner();
473 473
474 /* Safe for i/o port / memory space allocation to use malloc now. */ 474 /* Safe for i/o port / memory space allocation to use malloc now. */
475#if NISA > 0 || NPCI > 0 475#if NISA > 0 || NPCI > 0
476 x86_bus_space_mallocok(); 476 x86_bus_space_mallocok();
477#endif 477#endif
478 478
479 gdt_init(); 479 gdt_init();
480 i386_proc0_tss_ldt_init(); 480 i386_proc0_tss_ldt_init();
481 481
482#ifndef XEN 482#ifndef XEN
483 cpu_init_tss(&cpu_info_primary); 483 cpu_init_tss(&cpu_info_primary);
484 ltr(cpu_info_primary.ci_tss_sel); 484 ltr(cpu_info_primary.ci_tss_sel);
485#endif 485#endif
486 486
487 x86_startup(); 487 x86_startup();
488} 488}
489 489
490/* 490/*
491 * Set up proc0's TSS and LDT. 491 * Set up proc0's TSS and LDT.
492 */ 492 */
493void 493void
494i386_proc0_tss_ldt_init(void) 494i386_proc0_tss_ldt_init(void)
495{ 495{
496 struct lwp *l; 496 struct lwp *l;
497 struct pcb *pcb __diagused; 497 struct pcb *pcb __diagused;
498 498
499 l = &lwp0; 499 l = &lwp0;
500 pcb = lwp_getpcb(l); 500 pcb = lwp_getpcb(l);
501 501
502 pmap_kernel()->pm_ldt_sel = GSEL(GLDT_SEL, SEL_KPL); 502 pmap_kernel()->pm_ldt_sel = GSEL(GLDT_SEL, SEL_KPL);
503 pcb->pcb_cr0 = rcr0() & ~CR0_TS; 503 pcb->pcb_cr0 = rcr0() & ~CR0_TS;
504 pcb->pcb_esp0 = uvm_lwp_getuarea(l) + USPACE - 16; 504 pcb->pcb_esp0 = uvm_lwp_getuarea(l) + USPACE - 16;
505 pcb->pcb_iopl = SEL_KPL; 505 pcb->pcb_iopl = SEL_KPL;
506 l->l_md.md_regs = (struct trapframe *)pcb->pcb_esp0 - 1; 506 l->l_md.md_regs = (struct trapframe *)pcb->pcb_esp0 - 1;
507 memcpy(&pcb->pcb_fsd, &gdt[GUDATA_SEL], sizeof(pcb->pcb_fsd)); 507 memcpy(&pcb->pcb_fsd, &gdt[GUDATA_SEL], sizeof(pcb->pcb_fsd));
508 memcpy(&pcb->pcb_gsd, &gdt[GUDATA_SEL], sizeof(pcb->pcb_gsd)); 508 memcpy(&pcb->pcb_gsd, &gdt[GUDATA_SEL], sizeof(pcb->pcb_gsd));
509 509
510#ifndef XEN 510#ifndef XEN
511 lldt(pmap_kernel()->pm_ldt_sel); 511 lldt(pmap_kernel()->pm_ldt_sel);
512#else 512#else
513 HYPERVISOR_fpu_taskswitch(1); 513 HYPERVISOR_fpu_taskswitch(1);
514 XENPRINTF(("lwp tss sp %p ss %04x/%04x\n", 514 XENPRINTF(("lwp tss sp %p ss %04x/%04x\n",
515 (void *)pcb->pcb_esp0, 515 (void *)pcb->pcb_esp0,
516 GSEL(GDATA_SEL, SEL_KPL), 516 GSEL(GDATA_SEL, SEL_KPL),
517 IDXSEL(GSEL(GDATA_SEL, SEL_KPL)))); 517 IDXSEL(GSEL(GDATA_SEL, SEL_KPL))));
518 HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL), pcb->pcb_esp0); 518 HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL), pcb->pcb_esp0);
519#endif 519#endif
520} 520}
521 521
522#ifdef XEN 522#ifdef XEN
523/* used in assembly */ 523/* used in assembly */
524void i386_switch_context(lwp_t *); 524void i386_switch_context(lwp_t *);
525void i386_tls_switch(lwp_t *); 525void i386_tls_switch(lwp_t *);
526 526
527/* 527/*
528 * Switch context: 528 * Switch context:
529 * - switch stack pointer for user->kernel transition 529 * - switch stack pointer for user->kernel transition
530 */ 530 */
531void 531void
532i386_switch_context(lwp_t *l) 532i386_switch_context(lwp_t *l)
533{ 533{
534 struct pcb *pcb; 534 struct pcb *pcb;
535 struct physdev_op physop; 535 struct physdev_op physop;
536 536
537 pcb = lwp_getpcb(l); 537 pcb = lwp_getpcb(l);
538 538
539 HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL), pcb->pcb_esp0); 539 HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL), pcb->pcb_esp0);
540 540
541 physop.cmd = PHYSDEVOP_SET_IOPL; 541 physop.cmd = PHYSDEVOP_SET_IOPL;
542 physop.u.set_iopl.iopl = pcb->pcb_iopl; 542 physop.u.set_iopl.iopl = pcb->pcb_iopl;
543 HYPERVISOR_physdev_op(&physop); 543 HYPERVISOR_physdev_op(&physop);
544} 544}
545 545
546void 546void
547i386_tls_switch(lwp_t *l) 547i386_tls_switch(lwp_t *l)
548{ 548{
549 struct cpu_info *ci = curcpu(); 549 struct cpu_info *ci = curcpu();
550 struct pcb *pcb = lwp_getpcb(l); 550 struct pcb *pcb = lwp_getpcb(l);
551 /* 551 /*
552 * Raise the IPL to IPL_HIGH. 552 * Raise the IPL to IPL_HIGH.
553 * FPU IPIs can alter the LWP's saved cr0. Dropping the priority 553 * FPU IPIs can alter the LWP's saved cr0. Dropping the priority
554 * is deferred until mi_switch(), when cpu_switchto() returns. 554 * is deferred until mi_switch(), when cpu_switchto() returns.
555 */ 555 */
556 (void)splhigh(); 556 (void)splhigh();
557 557
558 /* 558 /*
559 * If our floating point registers are on a different CPU, 559 * If our floating point registers are on a different CPU,
560 * set CR0_TS so we'll trap rather than reuse bogus state. 560 * set CR0_TS so we'll trap rather than reuse bogus state.
561 */ 561 */
562 562
563 if (l != ci->ci_fpcurlwp) { 563 if (l != ci->ci_fpcurlwp) {
564 HYPERVISOR_fpu_taskswitch(1); 564 HYPERVISOR_fpu_taskswitch(1);
565 } 565 }
566 566
567 /* Update TLS segment pointers */ 567 /* Update TLS segment pointers */
568 update_descriptor(&ci->ci_gdt[GUFS_SEL], 568 update_descriptor(&ci->ci_gdt[GUFS_SEL],
569 (union descriptor *) &pcb->pcb_fsd); 569 (union descriptor *) &pcb->pcb_fsd);
570 update_descriptor(&ci->ci_gdt[GUGS_SEL],  570 update_descriptor(&ci->ci_gdt[GUGS_SEL],
571 (union descriptor *) &pcb->pcb_gsd); 571 (union descriptor *) &pcb->pcb_gsd);
572 572
573} 573}
574#endif /* XEN */ 574#endif /* XEN */
575 575
576#ifndef XEN 576#ifndef XEN
577/* 577/*
578 * Set up TSS and I/O bitmap. 578 * Set up TSS and I/O bitmap.
579 */ 579 */
580void 580void
581cpu_init_tss(struct cpu_info *ci) 581cpu_init_tss(struct cpu_info *ci)
582{ 582{
583 struct i386tss *tss = &ci->ci_tss; 583 struct i386tss *tss = &ci->ci_tss;
584 584
585 tss->tss_iobase = IOMAP_INVALOFF << 16; 585 tss->tss_iobase = IOMAP_INVALOFF << 16;
586 tss->tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); 586 tss->tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
587 tss->tss_ldt = GSEL(GLDT_SEL, SEL_KPL); 587 tss->tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
588 tss->tss_cr3 = rcr3(); 588 tss->tss_cr3 = rcr3();
589 ci->ci_tss_sel = tss_alloc(tss); 589 ci->ci_tss_sel = tss_alloc(tss);
590} 590}
591#endif /* XEN */ 591#endif /* XEN */
592 592
593void * 593void *
594getframe(struct lwp *l, int sig, int *onstack) 594getframe(struct lwp *l, int sig, int *onstack)
595{ 595{
596 struct proc *p = l->l_proc; 596 struct proc *p = l->l_proc;
597 struct trapframe *tf = l->l_md.md_regs; 597 struct trapframe *tf = l->l_md.md_regs;
598 598
599 /* Do we need to jump onto the signal stack? */ 599 /* Do we need to jump onto the signal stack? */
600 *onstack = (l->l_sigstk.ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 600 *onstack = (l->l_sigstk.ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0
601 && (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0; 601 && (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0;
602 if (*onstack) 602 if (*onstack)
603 return (char *)l->l_sigstk.ss_sp + l->l_sigstk.ss_size; 603 return (char *)l->l_sigstk.ss_sp + l->l_sigstk.ss_size;
604#ifdef VM86 604#ifdef VM86
605 if (tf->tf_eflags & PSL_VM) 605 if (tf->tf_eflags & PSL_VM)
606 return (void *)(tf->tf_esp + (tf->tf_ss << 4)); 606 return (void *)(tf->tf_esp + (tf->tf_ss << 4));
607 else 607 else
608#endif 608#endif
609 return (void *)tf->tf_esp; 609 return (void *)tf->tf_esp;
610} 610}
611 611
612/* 612/*
613 * Build context to run handler in. We invoke the handler 613 * Build context to run handler in. We invoke the handler
614 * directly, only returning via the trampoline. Note the 614 * directly, only returning via the trampoline. Note the
615 * trampoline version numbers are coordinated with machine- 615 * trampoline version numbers are coordinated with machine-
616 * dependent code in libc. 616 * dependent code in libc.
617 */ 617 */
618void 618void
619buildcontext(struct lwp *l, int sel, void *catcher, void *fp) 619buildcontext(struct lwp *l, int sel, void *catcher, void *fp)
620{ 620{
621 struct trapframe *tf = l->l_md.md_regs; 621 struct trapframe *tf = l->l_md.md_regs;
622 622
623 tf->tf_gs = GSEL(GUGS_SEL, SEL_UPL); 623 tf->tf_gs = GSEL(GUGS_SEL, SEL_UPL);
624 tf->tf_fs = GSEL(GUFS_SEL, SEL_UPL); 624 tf->tf_fs = GSEL(GUFS_SEL, SEL_UPL);
625 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL); 625 tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL);
626 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL); 626 tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL);
627 tf->tf_eip = (int)catcher; 627 tf->tf_eip = (int)catcher;
628 tf->tf_cs = GSEL(sel, SEL_UPL); 628 tf->tf_cs = GSEL(sel, SEL_UPL);
629 tf->tf_eflags &= ~PSL_CLEARSIG; 629 tf->tf_eflags &= ~PSL_CLEARSIG;
630 tf->tf_esp = (int)fp; 630 tf->tf_esp = (int)fp;
631 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL); 631 tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL);
632 632
633 /* Ensure FP state is reset. */ 633 /* Ensure FP state is reset. */
634 fpu_save_area_reset(l); 634 fpu_save_area_reset(l);
635} 635}
636 636
637void 637void
638sendsig_siginfo(const ksiginfo_t *ksi, const sigset_t *mask) 638sendsig_siginfo(const ksiginfo_t *ksi, const sigset_t *mask)
639{ 639{
640 struct lwp *l = curlwp; 640 struct lwp *l = curlwp;
641 struct proc *p = l->l_proc; 641 struct proc *p = l->l_proc;
642 struct pmap *pmap = vm_map_pmap(&p->p_vmspace->vm_map); 642 struct pmap *pmap = vm_map_pmap(&p->p_vmspace->vm_map);
643 int sel = pmap->pm_hiexec > I386_MAX_EXE_ADDR ? 643 int sel = pmap->pm_hiexec > I386_MAX_EXE_ADDR ?
644 GUCODEBIG_SEL : GUCODE_SEL; 644 GUCODEBIG_SEL : GUCODE_SEL;
645 struct sigacts *ps = p->p_sigacts; 645 struct sigacts *ps = p->p_sigacts;
646 int onstack, error; 646 int onstack, error;
647 int sig = ksi->ksi_signo; 647 int sig = ksi->ksi_signo;
648 struct sigframe_siginfo *fp = getframe(l, sig, &onstack), frame; 648 struct sigframe_siginfo *fp = getframe(l, sig, &onstack), frame;
649 sig_t catcher = SIGACTION(p, sig).sa_handler; 649 sig_t catcher = SIGACTION(p, sig).sa_handler;
650 struct trapframe *tf = l->l_md.md_regs; 650 struct trapframe *tf = l->l_md.md_regs;
651 651
652 KASSERT(mutex_owned(p->p_lock)); 652 KASSERT(mutex_owned(p->p_lock));
653 653
654 fp--; 654 fp--;
655 655
656 frame.sf_ra = (int)ps->sa_sigdesc[sig].sd_tramp; 656 frame.sf_ra = (int)ps->sa_sigdesc[sig].sd_tramp;
657 frame.sf_signum = sig; 657 frame.sf_signum = sig;
658 frame.sf_sip = &fp->sf_si; 658 frame.sf_sip = &fp->sf_si;
659 frame.sf_ucp = &fp->sf_uc; 659 frame.sf_ucp = &fp->sf_uc;
660 frame.sf_si._info = ksi->ksi_info; 660 frame.sf_si._info = ksi->ksi_info;
661 frame.sf_uc.uc_flags = _UC_SIGMASK|_UC_VM; 661 frame.sf_uc.uc_flags = _UC_SIGMASK|_UC_VM;
662 frame.sf_uc.uc_sigmask = *mask; 662 frame.sf_uc.uc_sigmask = *mask;
663 frame.sf_uc.uc_link = l->l_ctxlink; 663 frame.sf_uc.uc_link = l->l_ctxlink;
664 frame.sf_uc.uc_flags |= (l->l_sigstk.ss_flags & SS_ONSTACK) 664 frame.sf_uc.uc_flags |= (l->l_sigstk.ss_flags & SS_ONSTACK)
665 ? _UC_SETSTACK : _UC_CLRSTACK; 665 ? _UC_SETSTACK : _UC_CLRSTACK;
666 memset(&frame.sf_uc.uc_stack, 0, sizeof(frame.sf_uc.uc_stack)); 666 memset(&frame.sf_uc.uc_stack, 0, sizeof(frame.sf_uc.uc_stack));
667 667
668 if (tf->tf_eflags & PSL_VM) 668 if (tf->tf_eflags & PSL_VM)
669 (*p->p_emul->e_syscall_intern)(p); 669 (*p->p_emul->e_syscall_intern)(p);
670 sendsig_reset(l, sig); 670 sendsig_reset(l, sig);
671 671
672 mutex_exit(p->p_lock); 672 mutex_exit(p->p_lock);
673 cpu_getmcontext(l, &frame.sf_uc.uc_mcontext, &frame.sf_uc.uc_flags); 673 cpu_getmcontext(l, &frame.sf_uc.uc_mcontext, &frame.sf_uc.uc_flags);
674 error = copyout(&frame, fp, sizeof(frame)); 674 error = copyout(&frame, fp, sizeof(frame));
675 mutex_enter(p->p_lock); 675 mutex_enter(p->p_lock);
676 676
677 if (error != 0) { 677 if (error != 0) {
678 /* 678 /*
679 * Process has trashed its stack; give it an illegal 679 * Process has trashed its stack; give it an illegal
680 * instruction to halt it in its tracks. 680 * instruction to halt it in its tracks.
681 */ 681 */
682 sigexit(l, SIGILL); 682 sigexit(l, SIGILL);
683 /* NOTREACHED */ 683 /* NOTREACHED */
684 } 684 }
685 685
686 buildcontext(l, sel, catcher, fp); 686 buildcontext(l, sel, catcher, fp);
687 687
688 /* Remember that we're now on the signal stack. */ 688 /* Remember that we're now on the signal stack. */
689 if (onstack) 689 if (onstack)
690 l->l_sigstk.ss_flags |= SS_ONSTACK; 690 l->l_sigstk.ss_flags |= SS_ONSTACK;
691} 691}
692 692
693static void 693static void
694maybe_dump(int howto) 694maybe_dump(int howto)
695{ 695{
696 int s; 696 int s;
697 697
698 /* Disable interrupts. */ 698 /* Disable interrupts. */
699 s = splhigh(); 699 s = splhigh();
700 700
701 /* Do a dump if requested. */ 701 /* Do a dump if requested. */
702 if ((howto & (RB_DUMP | RB_HALT)) == RB_DUMP) 702 if ((howto & (RB_DUMP | RB_HALT)) == RB_DUMP)
703 dumpsys(); 703 dumpsys();
704 704
705 splx(s); 705 splx(s);
706} 706}
707 707
708void 708void
709cpu_reboot(int howto, char *bootstr) 709cpu_reboot(int howto, char *bootstr)
710{ 710{
711 static bool syncdone = false; 711 static bool syncdone = false;
712 int s = IPL_NONE; 712 int s = IPL_NONE;
713 713
714 if (cold) { 714 if (cold) {
715 howto |= RB_HALT; 715 howto |= RB_HALT;
716 goto haltsys; 716 goto haltsys;
717 } 717 }
718 718
719 boothowto = howto; 719 boothowto = howto;
720 720
721 /* XXX used to dump after vfs_shutdown() and before 721 /* XXX used to dump after vfs_shutdown() and before
722 * detaching devices / shutdown hooks / pmf_system_shutdown(). 722 * detaching devices / shutdown hooks / pmf_system_shutdown().
723 */ 723 */
724 maybe_dump(howto); 724 maybe_dump(howto);
725 725
726 /* 726 /*
727 * If we've panic'd, don't make the situation potentially 727 * If we've panic'd, don't make the situation potentially
728 * worse by syncing or unmounting the file systems. 728 * worse by syncing or unmounting the file systems.
729 */ 729 */
730 if ((howto & RB_NOSYNC) == 0 && panicstr == NULL) { 730 if ((howto & RB_NOSYNC) == 0 && panicstr == NULL) {
731 if (!syncdone) { 731 if (!syncdone) {
732 syncdone = true; 732 syncdone = true;
733 /* XXX used to force unmount as well, here */ 733 /* XXX used to force unmount as well, here */
734 vfs_sync_all(curlwp); 734 vfs_sync_all(curlwp);
735 /* 735 /*
736 * If we've been adjusting the clock, the todr 736 * If we've been adjusting the clock, the todr
737 * will be out of synch; adjust it now. 737 * will be out of synch; adjust it now.
738 * 738 *
739 * XXX used to do this after unmounting all 739 * XXX used to do this after unmounting all
740 * filesystems with vfs_shutdown(). 740 * filesystems with vfs_shutdown().
741 */ 741 */
742 if (time_adjusted != 0) 742 if (time_adjusted != 0)
743 resettodr(); 743 resettodr();
744 } 744 }
745 745
746 while (vfs_unmountall1(curlwp, false, false) || 746 while (vfs_unmountall1(curlwp, false, false) ||
747 config_detach_all(boothowto) || 747 config_detach_all(boothowto) ||
748 vfs_unmount_forceone(curlwp)) 748 vfs_unmount_forceone(curlwp))
749 ; /* do nothing */ 749 ; /* do nothing */
750 } else 750 } else
751 suspendsched(); 751 suspendsched();
752 752
753 pmf_system_shutdown(boothowto); 753 pmf_system_shutdown(boothowto);
754 754
755 s = splhigh(); 755 s = splhigh();
756 756
757 /* amd64 maybe_dump() */ 757 /* amd64 maybe_dump() */
758 758
759haltsys: 759haltsys:
760 doshutdownhooks(); 760 doshutdownhooks();
761 761
762 if ((howto & RB_POWERDOWN) == RB_POWERDOWN) { 762 if ((howto & RB_POWERDOWN) == RB_POWERDOWN) {
763#if NACPICA > 0 763#if NACPICA > 0
764 if (s != IPL_NONE) 764 if (s != IPL_NONE)
765 splx(s); 765 splx(s);
766 766
767 acpi_enter_sleep_state(ACPI_STATE_S5); 767 acpi_enter_sleep_state(ACPI_STATE_S5);
768#else 768#else
769 __USE(s); 769 __USE(s);
770#endif 770#endif
771#ifdef XEN 771#ifdef XEN
772 HYPERVISOR_shutdown(); 772 HYPERVISOR_shutdown();
773 for (;;); 773 for (;;);
774#endif 774#endif
775 } 775 }
776 776
777#ifdef MULTIPROCESSOR 777#ifdef MULTIPROCESSOR
778 cpu_broadcast_halt(); 778 cpu_broadcast_halt();
779#endif /* MULTIPROCESSOR */ 779#endif /* MULTIPROCESSOR */
780 780
781 if (howto & RB_HALT) { 781 if (howto & RB_HALT) {
782#if NACPICA > 0 782#if NACPICA > 0
783 acpi_disable(); 783 acpi_disable();
784#endif 784#endif
785 785
786 printf("\n"); 786 printf("\n");
787 printf("The operating system has halted.\n"); 787 printf("The operating system has halted.\n");
788 printf("Please press any key to reboot.\n\n"); 788 printf("Please press any key to reboot.\n\n");
789 789
790#ifdef BEEP_ONHALT 790#ifdef BEEP_ONHALT
791 { 791 {
792 int c; 792 int c;
793 for (c = BEEP_ONHALT_COUNT; c > 0; c--) { 793 for (c = BEEP_ONHALT_COUNT; c > 0; c--) {
794 sysbeep(BEEP_ONHALT_PITCH, 794 sysbeep(BEEP_ONHALT_PITCH,
795 BEEP_ONHALT_PERIOD * hz / 1000); 795 BEEP_ONHALT_PERIOD * hz / 1000);
796 delay(BEEP_ONHALT_PERIOD * 1000); 796 delay(BEEP_ONHALT_PERIOD * 1000);
797 sysbeep(0, BEEP_ONHALT_PERIOD * hz / 1000); 797 sysbeep(0, BEEP_ONHALT_PERIOD * hz / 1000);
798 delay(BEEP_ONHALT_PERIOD * 1000); 798 delay(BEEP_ONHALT_PERIOD * 1000);
799 } 799 }
800 } 800 }
801#endif 801#endif
802 802
803 cnpollc(1); /* for proper keyboard command handling */ 803 cnpollc(1); /* for proper keyboard command handling */
804 if (cngetc() == 0) { 804 if (cngetc() == 0) {
805 /* no console attached, so just hlt */ 805 /* no console attached, so just hlt */
806 printf("No keyboard - cannot reboot after all.\n"); 806 printf("No keyboard - cannot reboot after all.\n");
807 for(;;) { 807 for(;;) {
808 x86_hlt(); 808 x86_hlt();
809 } 809 }
810 } 810 }
811 cnpollc(0); 811 cnpollc(0);
812 } 812 }
813 813
814 printf("rebooting...\n"); 814 printf("rebooting...\n");
815 if (cpureset_delay > 0) 815 if (cpureset_delay > 0)
816 delay(cpureset_delay * 1000); 816 delay(cpureset_delay * 1000);
817 cpu_reset(); 817 cpu_reset();
818 for(;;) ; 818 for(;;) ;
819 /*NOTREACHED*/ 819 /*NOTREACHED*/
820} 820}
821 821
822/* 822/*
823 * Clear registers on exec 823 * Clear registers on exec
824 */ 824 */
825void 825void
826setregs(struct lwp *l, struct exec_package *pack, vaddr_t stack) 826setregs(struct lwp *l, struct exec_package *pack, vaddr_t stack)
827{ 827{
828 struct pmap *pmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map); 828 struct pmap *pmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map);
829 struct pcb *pcb = lwp_getpcb(l); 829 struct pcb *pcb = lwp_getpcb(l);
830 struct trapframe *tf; 830 struct trapframe *tf;
831 831
832#ifdef USER_LDT 832#ifdef USER_LDT
833 pmap_ldt_cleanup(l); 833 pmap_ldt_cleanup(l);
834#endif 834#endif
835 835
836 fpu_save_area_clear(l, pack->ep_osversion >= 699002600 836 fpu_save_area_clear(l, pack->ep_osversion >= 699002600
837 ? __INITIAL_NPXCW__ : __NetBSD_COMPAT_NPXCW__); 837 ? __INITIAL_NPXCW__ : __NetBSD_COMPAT_NPXCW__);
838 838
839 memcpy(&pcb->pcb_fsd, &gdt[GUDATA_SEL], sizeof(pcb->pcb_fsd)); 839 memcpy(&pcb->pcb_fsd, &gdt[GUDATA_SEL], sizeof(pcb->pcb_fsd));
840 memcpy(&pcb->pcb_gsd, &gdt[GUDATA_SEL], sizeof(pcb->pcb_gsd)); 840 memcpy(&pcb->pcb_gsd, &gdt[GUDATA_SEL], sizeof(pcb->pcb_gsd));
841 841
842 tf = l->l_md.md_regs; 842 tf = l->l_md.md_regs;
843 tf->tf_gs = GSEL(GUGS_SEL, SEL_UPL); 843 tf->tf_gs = GSEL(GUGS_SEL, SEL_UPL);
844 tf->tf_fs = GSEL(GUFS_SEL, SEL_UPL); 844 tf->tf_fs = GSEL(GUFS_SEL, SEL_UPL);
845 tf->tf_es = LSEL(LUDATA_SEL, SEL_UPL); 845 tf->tf_es = LSEL(LUDATA_SEL, SEL_UPL);
846 tf->tf_ds = LSEL(LUDATA_SEL, SEL_UPL); 846 tf->tf_ds = LSEL(LUDATA_SEL, SEL_UPL);
847 tf->tf_edi = 0; 847 tf->tf_edi = 0;
848 tf->tf_esi = 0; 848 tf->tf_esi = 0;
849 tf->tf_ebp = 0; 849 tf->tf_ebp = 0;
850 tf->tf_ebx = l->l_proc->p_psstrp; 850 tf->tf_ebx = l->l_proc->p_psstrp;
851 tf->tf_edx = 0; 851 tf->tf_edx = 0;
852 tf->tf_ecx = 0; 852 tf->tf_ecx = 0;
853 tf->tf_eax = 0; 853 tf->tf_eax = 0;
854 tf->tf_eip = pack->ep_entry; 854 tf->tf_eip = pack->ep_entry;
855 tf->tf_cs = pmap->pm_hiexec > I386_MAX_EXE_ADDR ? 855 tf->tf_cs = pmap->pm_hiexec > I386_MAX_EXE_ADDR ?
856 LSEL(LUCODEBIG_SEL, SEL_UPL) : LSEL(LUCODE_SEL, SEL_UPL); 856 LSEL(LUCODEBIG_SEL, SEL_UPL) : LSEL(LUCODE_SEL, SEL_UPL);
857 tf->tf_eflags = PSL_USERSET; 857 tf->tf_eflags = PSL_USERSET;
858 tf->tf_esp = stack; 858 tf->tf_esp = stack;
859 tf->tf_ss = LSEL(LUDATA_SEL, SEL_UPL); 859 tf->tf_ss = LSEL(LUDATA_SEL, SEL_UPL);
860} 860}
861 861
862/* 862/*
863 * Initialize segments and descriptor tables 863 * Initialize segments and descriptor tables
864 */ 864 */
865 865
866union descriptor *gdt, *ldt; 866union descriptor *gdt, *ldt;
867union descriptor *pentium_idt; 867union descriptor *pentium_idt;
868extern vaddr_t lwp0uarea; 868extern vaddr_t lwp0uarea;
869 869
870void 870void
871setgate(struct gate_descriptor *gd, void *func, int args, int type, int dpl, 871setgate(struct gate_descriptor *gd, void *func, int args, int type, int dpl,
872 int sel) 872 int sel)
873{ 873{
874 874
875 gd->gd_looffset = (int)func; 875 gd->gd_looffset = (int)func;
876 gd->gd_selector = sel; 876 gd->gd_selector = sel;
877 gd->gd_stkcpy = args; 877 gd->gd_stkcpy = args;
878 gd->gd_xx = 0; 878 gd->gd_xx = 0;
879 gd->gd_type = type; 879 gd->gd_type = type;
880 gd->gd_dpl = dpl; 880 gd->gd_dpl = dpl;
881 gd->gd_p = 1; 881 gd->gd_p = 1;
882 gd->gd_hioffset = (int)func >> 16; 882 gd->gd_hioffset = (int)func >> 16;
883} 883}
884 884
885void 885void
886unsetgate(struct gate_descriptor *gd) 886unsetgate(struct gate_descriptor *gd)
887{ 887{
888 gd->gd_p = 0; 888 gd->gd_p = 0;
889 gd->gd_hioffset = 0; 889 gd->gd_hioffset = 0;
890 gd->gd_looffset = 0; 890 gd->gd_looffset = 0;
891 gd->gd_selector = 0; 891 gd->gd_selector = 0;
892 gd->gd_xx = 0; 892 gd->gd_xx = 0;
893 gd->gd_stkcpy = 0; 893 gd->gd_stkcpy = 0;
894 gd->gd_type = 0; 894 gd->gd_type = 0;
895 gd->gd_dpl = 0; 895 gd->gd_dpl = 0;
896} 896}
897 897
898 898
899void 899void
900setregion(struct region_descriptor *rd, void *base, size_t limit) 900setregion(struct region_descriptor *rd, void *base, size_t limit)
901{ 901{
902 902
903 rd->rd_limit = (int)limit; 903 rd->rd_limit = (int)limit;
904 rd->rd_base = (int)base; 904 rd->rd_base = (int)base;
905} 905}
906 906
907void 907void
908setsegment(struct segment_descriptor *sd, const void *base, size_t limit, 908setsegment(struct segment_descriptor *sd, const void *base, size_t limit,
909 int type, int dpl, int def32, int gran) 909 int type, int dpl, int def32, int gran)
910{ 910{
911 911
912 sd->sd_lolimit = (int)limit; 912 sd->sd_lolimit = (int)limit;
913 sd->sd_lobase = (int)base; 913 sd->sd_lobase = (int)base;
914 sd->sd_type = type; 914 sd->sd_type = type;
915 sd->sd_dpl = dpl; 915 sd->sd_dpl = dpl;
916 sd->sd_p = 1; 916 sd->sd_p = 1;
917 sd->sd_hilimit = (int)limit >> 16; 917 sd->sd_hilimit = (int)limit >> 16;
918 sd->sd_xx = 0; 918 sd->sd_xx = 0;
919 sd->sd_def32 = def32; 919 sd->sd_def32 = def32;
920 sd->sd_gran = gran; 920 sd->sd_gran = gran;
921 sd->sd_hibase = (int)base >> 24; 921 sd->sd_hibase = (int)base >> 24;
922} 922}
923 923
924#define IDTVEC(name) __CONCAT(X, name) 924#define IDTVEC(name) __CONCAT(X, name)
925typedef void (vector)(void); 925typedef void (vector)(void);
926extern vector IDTVEC(syscall); 926extern vector IDTVEC(syscall);
927extern vector IDTVEC(osyscall); 927extern vector IDTVEC(osyscall);
928extern vector *IDTVEC(exceptions)[]; 928extern vector *IDTVEC(exceptions)[];
929extern vector IDTVEC(svr4_fasttrap); 929extern vector IDTVEC(svr4_fasttrap);
930void (*svr4_fasttrap_vec)(void) = (void (*)(void))nullop; 930void (*svr4_fasttrap_vec)(void) = (void (*)(void))nullop;
931krwlock_t svr4_fasttrap_lock; 931krwlock_t svr4_fasttrap_lock;
932#ifdef XEN 932#ifdef XEN
933#define MAX_XEN_IDT 128 933#define MAX_XEN_IDT 128
934trap_info_t xen_idt[MAX_XEN_IDT]; 934trap_info_t xen_idt[MAX_XEN_IDT];
935int xen_idt_idx; 935int xen_idt_idx;
936extern union descriptor tmpgdt[]; 936extern union descriptor tmpgdt[];
937#endif 937#endif
938 938
939void  939void
940cpu_init_idt(void) 940cpu_init_idt(void)
941{ 941{
942#ifndef XEN 942#ifndef XEN
943 struct region_descriptor region; 943 struct region_descriptor region;
944 setregion(&region, pentium_idt, NIDT * sizeof(idt[0]) - 1); 944 setregion(&region, pentium_idt, NIDT * sizeof(idt[0]) - 1);
945 lidt(&region); 945 lidt(&region);
946#else /* XEN */ 946#else /* XEN */
947 XENPRINTF(("HYPERVISOR_set_trap_table %p\n", xen_idt)); 947 XENPRINTF(("HYPERVISOR_set_trap_table %p\n", xen_idt));
948 if (HYPERVISOR_set_trap_table(xen_idt)) 948 if (HYPERVISOR_set_trap_table(xen_idt))
949 panic("HYPERVISOR_set_trap_table %p failed\n", xen_idt); 949 panic("HYPERVISOR_set_trap_table %p failed\n", xen_idt);
950#endif /* !XEN */ 950#endif /* !XEN */
951} 951}
952 952
953void 953void
954initgdt(union descriptor *tgdt) 954initgdt(union descriptor *tgdt)
955{ 955{
956 KASSERT(tgdt != NULL); 956 KASSERT(tgdt != NULL);
957  957
958 gdt = tgdt; 958 gdt = tgdt;
959#ifdef XEN 959#ifdef XEN
960 u_long frames[16]; 960 u_long frames[16];
961#else 961#else
962 struct region_descriptor region; 962 struct region_descriptor region;
963 memset(gdt, 0, NGDT*sizeof(*gdt)); 963 memset(gdt, 0, NGDT*sizeof(*gdt));
964#endif /* XEN */ 964#endif /* XEN */
965 /* make gdt gates and memory segments */ 965 /* make gdt gates and memory segments */
966 setsegment(&gdt[GCODE_SEL].sd, 0, 0xfffff, SDT_MEMERA, SEL_KPL, 1, 1); 966 setsegment(&gdt[GCODE_SEL].sd, 0, 0xfffff, SDT_MEMERA, SEL_KPL, 1, 1);
967 setsegment(&gdt[GDATA_SEL].sd, 0, 0xfffff, SDT_MEMRWA, SEL_KPL, 1, 1); 967 setsegment(&gdt[GDATA_SEL].sd, 0, 0xfffff, SDT_MEMRWA, SEL_KPL, 1, 1);
968 setsegment(&gdt[GUCODE_SEL].sd, 0, x86_btop(I386_MAX_EXE_ADDR) - 1, 968 setsegment(&gdt[GUCODE_SEL].sd, 0, x86_btop(I386_MAX_EXE_ADDR) - 1,
969 SDT_MEMERA, SEL_UPL, 1, 1); 969 SDT_MEMERA, SEL_UPL, 1, 1);
970 setsegment(&gdt[GUCODEBIG_SEL].sd, 0, 0xfffff, 970 setsegment(&gdt[GUCODEBIG_SEL].sd, 0, 0xfffff,
971 SDT_MEMERA, SEL_UPL, 1, 1); 971 SDT_MEMERA, SEL_UPL, 1, 1);
972 setsegment(&gdt[GUDATA_SEL].sd, 0, 0xfffff, 972 setsegment(&gdt[GUDATA_SEL].sd, 0, 0xfffff,
973 SDT_MEMRWA, SEL_UPL, 1, 1); 973 SDT_MEMRWA, SEL_UPL, 1, 1);
974#if NBIOSCALL > 0 974#if NBIOSCALL > 0
975 /* bios trampoline GDT entries */ 975 /* bios trampoline GDT entries */
976 setsegment(&gdt[GBIOSCODE_SEL].sd, 0, 0xfffff, SDT_MEMERA, SEL_KPL, 0, 976 setsegment(&gdt[GBIOSCODE_SEL].sd, 0, 0xfffff, SDT_MEMERA, SEL_KPL, 0,
977 0); 977 0);
978 setsegment(&gdt[GBIOSDATA_SEL].sd, 0, 0xfffff, SDT_MEMRWA, SEL_KPL, 0, 978 setsegment(&gdt[GBIOSDATA_SEL].sd, 0, 0xfffff, SDT_MEMRWA, SEL_KPL, 0,
979 0); 979 0);
980#endif 980#endif
981 setsegment(&gdt[GCPU_SEL].sd, &cpu_info_primary, 0xfffff, 981 setsegment(&gdt[GCPU_SEL].sd, &cpu_info_primary, 0xfffff,
982 SDT_MEMRWA, SEL_KPL, 1, 1); 982 SDT_MEMRWA, SEL_KPL, 1, 1);
983 983
984#ifndef XEN 984#ifndef XEN
985 setregion(&region, gdt, NGDT * sizeof(gdt[0]) - 1); 985 setregion(&region, gdt, NGDT * sizeof(gdt[0]) - 1);
986 lgdt(&region); 986 lgdt(&region);
987#else /* !XEN */ 987#else /* !XEN */
988 /* 988 /*
989 * We jumpstart the bootstrap process a bit so we can update 989 * We jumpstart the bootstrap process a bit so we can update
990 * page permissions. This is done redundantly later from 990 * page permissions. This is done redundantly later from
991 * x86_xpmap.c:xen_pmap_bootstrap() - harmless. 991 * x86_xpmap.c:xen_pmap_bootstrap() - harmless.
992 */ 992 */
993 xpmap_phys_to_machine_mapping = 993 xpmap_phys_to_machine_mapping =
994 (unsigned long *)xen_start_info.mfn_list; 994 (unsigned long *)xen_start_info.mfn_list;
995 995
996 frames[0] = xpmap_ptom((uint32_t)gdt - KERNBASE) >> PAGE_SHIFT; 996 frames[0] = xpmap_ptom((uint32_t)gdt - KERNBASE) >> PAGE_SHIFT;
997 { /* 997 { /*
998 * Enter the gdt page RO into the kernel map. We can't 998 * Enter the gdt page RO into the kernel map. We can't
999 * use pmap_kenter_pa() here, because %fs is not 999 * use pmap_kenter_pa() here, because %fs is not
1000 * usable until the gdt is loaded, and %fs is used as 1000 * usable until the gdt is loaded, and %fs is used as
1001 * the base pointer for curcpu() and curlwp(), both of 1001 * the base pointer for curcpu() and curlwp(), both of
1002 * which are in the callpath of pmap_kenter_pa(). 1002 * which are in the callpath of pmap_kenter_pa().
1003 * So we mash up our own - this is MD code anyway. 1003 * So we mash up our own - this is MD code anyway.
1004 */ 1004 */
1005 pt_entry_t pte; 1005 pt_entry_t pte;
1006 pt_entry_t pg_nx = (cpu_feature[2] & CPUID_NOX ? PG_NX : 0); 1006 pt_entry_t pg_nx = (cpu_feature[2] & CPUID_NOX ? PG_NX : 0);
1007 1007
1008 pte = pmap_pa2pte((vaddr_t)gdt - KERNBASE); 1008 pte = pmap_pa2pte((vaddr_t)gdt - KERNBASE);
1009 pte |= PG_k | PG_RO | pg_nx | PG_V; 1009 pte |= PG_k | PG_RO | pg_nx | PG_V;
1010 1010
1011 if (HYPERVISOR_update_va_mapping((vaddr_t)gdt, pte, UVMF_INVLPG) < 0) { 1011 if (HYPERVISOR_update_va_mapping((vaddr_t)gdt, pte, UVMF_INVLPG) < 0) {
1012 panic("gdt page RO update failed.\n"); 1012 panic("gdt page RO update failed.\n");
1013 } 1013 }
1014 1014
1015 } 1015 }
1016 1016
1017 XENPRINTK(("loading gdt %lx, %d entries\n", frames[0] << PAGE_SHIFT, 1017 XENPRINTK(("loading gdt %lx, %d entries\n", frames[0] << PAGE_SHIFT,
1018 NGDT)); 1018 NGDT));
1019 if (HYPERVISOR_set_gdt(frames, NGDT /* XXX is it right ? */)) 1019 if (HYPERVISOR_set_gdt(frames, NGDT /* XXX is it right ? */))
1020 panic("HYPERVISOR_set_gdt failed!\n"); 1020 panic("HYPERVISOR_set_gdt failed!\n");
1021 1021
1022 lgdt_finish(); 1022 lgdt_finish();
1023#endif /* !XEN */ 1023#endif /* !XEN */
1024} 1024}
1025 1025
1026static void 1026static void
1027init386_msgbuf(void) 1027init386_msgbuf(void)
1028{ 1028{
1029 /* Message buffer is located at end of core. */ 1029 /* Message buffer is located at end of core. */
1030 struct vm_physseg *vps; 1030 struct vm_physseg *vps;
1031 psize_t sz = round_page(MSGBUFSIZE); 1031 psize_t sz = round_page(MSGBUFSIZE);
1032 psize_t reqsz = sz; 1032 psize_t reqsz = sz;
1033 unsigned int x; 1033 unsigned int x;
1034 1034
1035 search_again: 1035 search_again:
1036 vps = NULL; 1036 vps = NULL;
1037 for (x = 0; x < vm_nphysseg; ++x) { 1037 for (x = 0; x < vm_nphysseg; ++x) {
1038 vps = VM_PHYSMEM_PTR(x); 1038 vps = VM_PHYSMEM_PTR(x);
1039 if (ctob(vps->avail_end) == avail_end) { 1039 if (ctob(vps->avail_end) == avail_end) {
1040 break; 1040 break;
1041 } 1041 }
1042 } 1042 }
1043 if (x == vm_nphysseg) 1043 if (x == vm_nphysseg)
1044 panic("init386: can't find end of memory"); 1044 panic("init386: can't find end of memory");
1045 1045
1046 /* Shrink so it'll fit in the last segment. */ 1046 /* Shrink so it'll fit in the last segment. */
1047 if (vps->avail_end - vps->avail_start < atop(sz)) 1047 if (vps->avail_end - vps->avail_start < atop(sz))
1048 sz = ctob(vps->avail_end - vps->avail_start); 1048 sz = ctob(vps->avail_end - vps->avail_start);
1049 1049
1050 vps->avail_end -= atop(sz); 1050 vps->avail_end -= atop(sz);
1051 vps->end -= atop(sz); 1051 vps->end -= atop(sz);
1052 msgbuf_p_seg[msgbuf_p_cnt].sz = sz; 1052 msgbuf_p_seg[msgbuf_p_cnt].sz = sz;
1053 msgbuf_p_seg[msgbuf_p_cnt++].paddr = ctob(vps->avail_end); 1053 msgbuf_p_seg[msgbuf_p_cnt++].paddr = ctob(vps->avail_end);
1054 1054
1055 /* Remove the last segment if it now has no pages. */ 1055 /* Remove the last segment if it now has no pages. */
1056 if (vps->start == vps->end) { 1056 if (vps->start == vps->end) {
1057 for (--vm_nphysseg; x < vm_nphysseg; x++) 1057 for (--vm_nphysseg; x < vm_nphysseg; x++)
1058 VM_PHYSMEM_PTR_SWAP(x, x + 1); 1058 VM_PHYSMEM_PTR_SWAP(x, x + 1);
1059 } 1059 }
1060 1060
1061 /* Now find where the new avail_end is. */ 1061 /* Now find where the new avail_end is. */
1062 for (avail_end = 0, x = 0; x < vm_nphysseg; x++) 1062 for (avail_end = 0, x = 0; x < vm_nphysseg; x++)
1063 if (VM_PHYSMEM_PTR(x)->avail_end > avail_end) 1063 if (VM_PHYSMEM_PTR(x)->avail_end > avail_end)
1064 avail_end = VM_PHYSMEM_PTR(x)->avail_end; 1064 avail_end = VM_PHYSMEM_PTR(x)->avail_end;
1065 avail_end = ctob(avail_end); 1065 avail_end = ctob(avail_end);
1066 1066
1067 if (sz == reqsz) 1067 if (sz == reqsz)
1068 return; 1068 return;
1069 1069
1070 reqsz -= sz; 1070 reqsz -= sz;
1071 if (msgbuf_p_cnt == VM_PHYSSEG_MAX) { 1071 if (msgbuf_p_cnt == VM_PHYSSEG_MAX) {
1072 /* No more segments available, bail out. */ 1072 /* No more segments available, bail out. */
1073 printf("WARNING: MSGBUFSIZE (%zu) too large, using %zu.\n", 1073 printf("WARNING: MSGBUFSIZE (%zu) too large, using %zu.\n",
1074 (size_t)MSGBUFSIZE, (size_t)(MSGBUFSIZE - reqsz)); 1074 (size_t)MSGBUFSIZE, (size_t)(MSGBUFSIZE - reqsz));
1075 return; 1075 return;
1076 } 1076 }
1077 1077
1078 sz = reqsz; 1078 sz = reqsz;
1079 goto search_again; 1079 goto search_again;
1080} 1080}
1081 1081
1082#ifndef XEN 1082#ifndef XEN
1083static void 1083static void
1084init386_pte0(void) 1084init386_pte0(void)
1085{ 1085{
1086 paddr_t paddr; 1086 paddr_t paddr;
1087 vaddr_t vaddr; 1087 vaddr_t vaddr;
1088 1088
1089 paddr = 4 * PAGE_SIZE; 1089 paddr = 4 * PAGE_SIZE;
1090 vaddr = (vaddr_t)vtopte(0); 1090 vaddr = (vaddr_t)vtopte(0);
1091 pmap_kenter_pa(vaddr, paddr, VM_PROT_ALL, 0); 1091 pmap_kenter_pa(vaddr, paddr, VM_PROT_ALL, 0);
1092 pmap_update(pmap_kernel()); 1092 pmap_update(pmap_kernel());
1093 /* make sure it is clean before using */ 1093 /* make sure it is clean before using */
1094 memset((void *)vaddr, 0, PAGE_SIZE); 1094 memset((void *)vaddr, 0, PAGE_SIZE);
1095} 1095}
1096#endif /* !XEN */ 1096#endif /* !XEN */
1097 1097
1098static void 1098static void
1099init386_ksyms(void) 1099init386_ksyms(void)
1100{ 1100{
1101#if NKSYMS || defined(DDB) || defined(MODULAR) 1101#if NKSYMS || defined(DDB) || defined(MODULAR)
1102 extern int end; 1102 extern int end;
1103 struct btinfo_symtab *symtab; 1103 struct btinfo_symtab *symtab;
1104 1104
1105#ifdef DDB 1105#ifdef DDB
1106 db_machine_init(); 1106 db_machine_init();
1107#endif 1107#endif
1108 1108
1109#if defined(MULTIBOOT) 1109#if defined(MULTIBOOT)
1110 if (multiboot_ksyms_addsyms_elf()) 1110 if (multiboot_ksyms_addsyms_elf())
1111 return; 1111 return;
1112#endif 1112#endif
1113 1113
1114 if ((symtab = lookup_bootinfo(BTINFO_SYMTAB)) == NULL) { 1114 if ((symtab = lookup_bootinfo(BTINFO_SYMTAB)) == NULL) {
1115 ksyms_addsyms_elf(*(int *)&end, ((int *)&end) + 1, esym); 1115 ksyms_addsyms_elf(*(int *)&end, ((int *)&end) + 1, esym);
1116 return; 1116 return;
1117 } 1117 }
1118 1118
1119 symtab->ssym += KERNBASE; 1119 symtab->ssym += KERNBASE;
1120 symtab->esym += KERNBASE; 1120 symtab->esym += KERNBASE;
1121 ksyms_addsyms_elf(symtab->nsym, (int *)symtab->ssym, (int *)symtab->esym); 1121 ksyms_addsyms_elf(symtab->nsym, (int *)symtab->ssym, (int *)symtab->esym);
1122#endif 1122#endif
1123} 1123}
1124 1124
1125void 1125void
1126init386(paddr_t first_avail) 1126init386(paddr_t first_avail)
1127{ 1127{
1128 extern void consinit(void); 1128 extern void consinit(void);
1129 int x; 1129 int x;
1130#ifndef XEN 1130#ifndef XEN
1131 union descriptor *tgdt; 1131 union descriptor *tgdt;
1132 extern struct extent *iomem_ex; 1132 extern struct extent *iomem_ex;
1133 struct region_descriptor region; 1133 struct region_descriptor region;
1134 struct btinfo_memmap *bim; 1134 struct btinfo_memmap *bim;
1135#endif 1135#endif
1136#if NBIOSCALL > 0 1136#if NBIOSCALL > 0
1137 extern int biostramp_image_size; 1137 extern int biostramp_image_size;
1138 extern u_char biostramp_image[]; 1138 extern u_char biostramp_image[];
1139#endif 1139#endif
1140 1140
1141#ifdef XEN 1141#ifdef XEN
1142 XENPRINTK(("HYPERVISOR_shared_info %p (%x)\n", HYPERVISOR_shared_info, 1142 XENPRINTK(("HYPERVISOR_shared_info %p (%x)\n", HYPERVISOR_shared_info,
1143 xen_start_info.shared_info)); 1143 xen_start_info.shared_info));
1144 KASSERT(HYPERVISOR_shared_info != NULL); 1144 KASSERT(HYPERVISOR_shared_info != NULL);
1145 cpu_info_primary.ci_vcpu = &HYPERVISOR_shared_info->vcpu_info[0]; 1145 cpu_info_primary.ci_vcpu = &HYPERVISOR_shared_info->vcpu_info[0];
1146#endif 1146#endif
1147 cpu_probe(&cpu_info_primary); 1147 cpu_probe(&cpu_info_primary);
1148 1148
1149 uvm_lwp_setuarea(&lwp0, lwp0uarea); 1149 uvm_lwp_setuarea(&lwp0, lwp0uarea);
1150 1150
1151 cpu_init_msrs(&cpu_info_primary, true); 1151 cpu_init_msrs(&cpu_info_primary, true);
1152 1152
1153#ifdef PAE 1153#ifdef PAE
1154 use_pae = 1; 1154 use_pae = 1;
1155#else 1155#else
1156 use_pae = 0; 1156 use_pae = 0;
1157#endif 1157#endif
1158 1158
1159#ifdef XEN 1159#ifdef XEN
1160 struct pcb *pcb = lwp_getpcb(&lwp0); 1160 struct pcb *pcb = lwp_getpcb(&lwp0);
1161 pcb->pcb_cr3 = PDPpaddr; 1161 pcb->pcb_cr3 = PDPpaddr;
1162 __PRINTK(("pcb_cr3 0x%lx cr3 0x%lx\n", 1162 __PRINTK(("pcb_cr3 0x%lx cr3 0x%lx\n",
1163 PDPpaddr, xpmap_ptom(PDPpaddr))); 1163 PDPpaddr, xpmap_ptom(PDPpaddr)));
1164 XENPRINTK(("lwp0uarea %p first_avail %p\n", 1164 XENPRINTK(("lwp0uarea %p first_avail %p\n",
1165 lwp0uarea, (void *)(long)first_avail)); 1165 lwp0uarea, (void *)(long)first_avail));
1166 XENPRINTK(("ptdpaddr %p atdevbase %p\n", (void *)PDPpaddr, 1166 XENPRINTK(("ptdpaddr %p atdevbase %p\n", (void *)PDPpaddr,
1167 (void *)atdevbase)); 1167 (void *)atdevbase));
1168#endif 1168#endif
1169 1169
1170#if defined(PAE) && !defined(XEN) 1170#if defined(PAE) && !defined(XEN)
1171 /* 1171 /*
1172 * Save VA and PA of L3 PD of boot processor (for Xen, this is done 1172 * Save VA and PA of L3 PD of boot processor (for Xen, this is done
1173 * in xen_pmap_bootstrap()) 1173 * in xen_pmap_bootstrap())
1174 */ 1174 */
1175 cpu_info_primary.ci_pae_l3_pdirpa = rcr3(); 1175 cpu_info_primary.ci_pae_l3_pdirpa = rcr3();
1176 cpu_info_primary.ci_pae_l3_pdir = (pd_entry_t *)(rcr3() + KERNBASE); 1176 cpu_info_primary.ci_pae_l3_pdir = (pd_entry_t *)(rcr3() + KERNBASE);
1177#endif /* PAE && !XEN */ 1177#endif /* PAE && !XEN */
1178 1178
1179#ifdef XEN 1179#ifdef XEN
1180 xen_parse_cmdline(XEN_PARSE_BOOTFLAGS, NULL); 1180 xen_parse_cmdline(XEN_PARSE_BOOTFLAGS, NULL);
1181#endif 1181#endif
1182 1182
1183 /* 1183 /*
1184 * Initialize PAGE_SIZE-dependent variables. 1184 * Initialize PAGE_SIZE-dependent variables.
1185 */ 1185 */
1186 uvm_setpagesize(); 1186 uvm_setpagesize();
1187 1187
1188 /* 1188 /*
1189 * Start with 2 color bins -- this is just a guess to get us 1189 * Start with 2 color bins -- this is just a guess to get us
1190 * started. We'll recolor when we determine the largest cache 1190 * started. We'll recolor when we determine the largest cache
1191 * sizes on the system. 1191 * sizes on the system.
1192 */ 1192 */
1193 uvmexp.ncolors = 2; 1193 uvmexp.ncolors = 2;
1194 1194
1195#ifndef XEN 1195#ifndef XEN
1196 /* 1196 /*
1197 * Low memory reservations: 1197 * Low memory reservations:
1198 * Page 0: BIOS data 1198 * Page 0: BIOS data
1199 * Page 1: BIOS callback 1199 * Page 1: BIOS callback
1200 * Page 2: MP bootstrap 1200 * Page 2: MP bootstrap code (MP_TRAMPOLINE)
1201 * Page 3: ACPI wakeup code 1201 * Page 3: ACPI wakeup code (ACPI_WAKEUP_ADDR)
1202 * Page 4: Temporary page table for 0MB-4MB 1202 * Page 4: Temporary page table for 0MB-4MB
1203 * Page 5: Temporary page directory 1203 * Page 5: Temporary page directory
1204 */ 1204 */
1205 avail_start = 6 * PAGE_SIZE; 1205 avail_start = 6 * PAGE_SIZE;
1206#else /* !XEN */ 1206#else /* !XEN */
1207 /* steal one page for gdt */ 1207 /* steal one page for gdt */
1208 gdt = (void *)((u_long)first_avail + KERNBASE); 1208 gdt = (void *)((u_long)first_avail + KERNBASE);
1209 first_avail += PAGE_SIZE; 1209 first_avail += PAGE_SIZE;
1210 /* Make sure the end of the space used by the kernel is rounded. */ 1210 /* Make sure the end of the space used by the kernel is rounded. */
1211 first_avail = round_page(first_avail); 1211 first_avail = round_page(first_avail);
1212 avail_start = first_avail; 1212 avail_start = first_avail;
1213 avail_end = ctob((paddr_t)xen_start_info.nr_pages); 1213 avail_end = ctob((paddr_t)xen_start_info.nr_pages);
1214 pmap_pa_start = (KERNTEXTOFF - KERNBASE); 1214 pmap_pa_start = (KERNTEXTOFF - KERNBASE);
1215 pmap_pa_end = pmap_pa_start + ctob((paddr_t)xen_start_info.nr_pages); 1215 pmap_pa_end = pmap_pa_start + ctob((paddr_t)xen_start_info.nr_pages);
1216 mem_clusters[0].start = avail_start; 1216 mem_clusters[0].start = avail_start;
1217 mem_clusters[0].size = avail_end - avail_start; 1217 mem_clusters[0].size = avail_end - avail_start;
1218 mem_cluster_cnt++; 1218 mem_cluster_cnt++;
1219 physmem += xen_start_info.nr_pages; 1219 physmem += xen_start_info.nr_pages;
1220 uvmexp.wired += atop(avail_start); 1220 uvmexp.wired += atop(avail_start);
1221 /* 1221 /*
1222 * initgdt() has to be done before consinit(), so that %fs is properly 1222 * initgdt() has to be done before consinit(), so that %fs is properly
1223 * initialised. initgdt() uses pmap_kenter_pa so it can't be called 1223 * initialised. initgdt() uses pmap_kenter_pa so it can't be called
1224 * before the above variables are set. 1224 * before the above variables are set.
1225 */ 1225 */
1226 1226
1227 initgdt(gdt); 1227 initgdt(gdt);
1228 1228
1229 mutex_init(&pte_lock, MUTEX_DEFAULT, IPL_VM); 1229 mutex_init(&pte_lock, MUTEX_DEFAULT, IPL_VM);
1230#endif /* XEN */ 1230#endif /* XEN */
1231 1231
1232#if NISA > 0 || NPCI > 0 1232#if NISA > 0 || NPCI > 0
1233 x86_bus_space_init(); 1233 x86_bus_space_init();
1234#endif /* NISA > 0 || NPCI > 0 */ 1234#endif /* NISA > 0 || NPCI > 0 */
1235  1235
1236 consinit(); /* XXX SHOULD NOT BE DONE HERE */ 1236 consinit(); /* XXX SHOULD NOT BE DONE HERE */
1237 1237
1238#ifdef DEBUG_MEMLOAD 1238#ifdef DEBUG_MEMLOAD
1239 printf("mem_cluster_count: %d\n", mem_cluster_cnt); 1239 printf("mem_cluster_count: %d\n", mem_cluster_cnt);
1240#endif 1240#endif
1241 1241
1242 /* 1242 /*
1243 * Call pmap initialization to make new kernel address space. 1243 * Call pmap initialization to make new kernel address space.
1244 * We must do this before loading pages into the VM system. 1244 * We must do this before loading pages into the VM system.
1245 */ 1245 */
1246 pmap_bootstrap((vaddr_t)atdevbase + IOM_SIZE); 1246 pmap_bootstrap((vaddr_t)atdevbase + IOM_SIZE);
1247 1247
1248#ifndef XEN 1248#ifndef XEN
1249 /* 1249 /*
1250 * Check to see if we have a memory map from the BIOS (passed 1250 * Check to see if we have a memory map from the BIOS (passed
1251 * to us by the boot program. 1251 * to us by the boot program.
1252 */ 1252 */
1253 bim = lookup_bootinfo(BTINFO_MEMMAP); 1253 bim = lookup_bootinfo(BTINFO_MEMMAP);
1254 if ((biosmem_implicit || (biosbasemem == 0 && biosextmem == 0)) && 1254 if ((biosmem_implicit || (biosbasemem == 0 && biosextmem == 0)) &&
1255 bim != NULL && bim->num > 0) 1255 bim != NULL && bim->num > 0)
1256 initx86_parse_memmap(bim, iomem_ex); 1256 initx86_parse_memmap(bim, iomem_ex);
1257 1257
1258 /* 1258 /*
1259 * If the loop above didn't find any valid segment, fall back to 1259 * If the loop above didn't find any valid segment, fall back to
1260 * former code. 1260 * former code.
1261 */ 1261 */
1262 if (mem_cluster_cnt == 0) 1262 if (mem_cluster_cnt == 0)
1263 initx86_fake_memmap(iomem_ex); 1263 initx86_fake_memmap(iomem_ex);
1264 1264
1265 initx86_load_memmap(first_avail); 1265 initx86_load_memmap(first_avail);
1266 1266
1267#else /* !XEN */ 1267#else /* !XEN */
1268 XENPRINTK(("load the memory cluster 0x%" PRIx64 " (%" PRId64 ") - " 1268 XENPRINTK(("load the memory cluster 0x%" PRIx64 " (%" PRId64 ") - "
1269 "0x%" PRIx64 " (%" PRId64 ")\n", 1269 "0x%" PRIx64 " (%" PRId64 ")\n",
1270 (uint64_t)avail_start, (uint64_t)atop(avail_start), 1270 (uint64_t)avail_start, (uint64_t)atop(avail_start),
1271 (uint64_t)avail_end, (uint64_t)atop(avail_end))); 1271 (uint64_t)avail_end, (uint64_t)atop(avail_end)));
1272 uvm_page_physload(atop(avail_start), atop(avail_end), 1272 uvm_page_physload(atop(avail_start), atop(avail_end),
1273 atop(avail_start), atop(avail_end), 1273 atop(avail_start), atop(avail_end),
1274 VM_FREELIST_DEFAULT); 1274 VM_FREELIST_DEFAULT);
1275 1275
1276 /* Reclaim the boot gdt page - see locore.s */ 1276 /* Reclaim the boot gdt page - see locore.s */
1277 { 1277 {
1278 pt_entry_t pte; 1278 pt_entry_t pte;
1279 pt_entry_t pg_nx = (cpu_feature[2] & CPUID_NOX ? PG_NX : 0); 1279 pt_entry_t pg_nx = (cpu_feature[2] & CPUID_NOX ? PG_NX : 0);
1280 1280
1281 pte = pmap_pa2pte((vaddr_t)tmpgdt - KERNBASE); 1281 pte = pmap_pa2pte((vaddr_t)tmpgdt - KERNBASE);
1282 pte |= PG_k | PG_RW | pg_nx | PG_V; 1282 pte |= PG_k | PG_RW | pg_nx | PG_V;
1283 1283
1284 if (HYPERVISOR_update_va_mapping((vaddr_t)tmpgdt, pte, UVMF_INVLPG) < 0) { 1284 if (HYPERVISOR_update_va_mapping((vaddr_t)tmpgdt, pte, UVMF_INVLPG) < 0) {
1285 panic("tmpgdt page relaim RW update failed.\n"); 1285 panic("tmpgdt page relaim RW update failed.\n");
1286 } 1286 }
1287 } 1287 }
1288 1288
1289#endif /* !XEN */ 1289#endif /* !XEN */
1290 1290
1291 init386_msgbuf(); 1291 init386_msgbuf();
1292 1292
1293#ifndef XEN 1293#ifndef XEN
1294 /* 1294 /*
1295 * XXX Remove this 1295 * XXX Remove this
1296 * 1296 *
1297 * Setup a temporary Page Table Entry to allow identity mappings of 1297 * Setup a temporary Page Table Entry to allow identity mappings of
1298 * the real mode address. This is required by: 1298 * the real mode address. This is required by:
1299 * - bioscall 1299 * - bioscall
1300 * - MP bootstrap 1300 * - MP bootstrap
1301 * - ACPI wakecode 1301 * - ACPI wakecode
1302 */ 1302 */
1303 init386_pte0(); 1303 init386_pte0();
1304 1304
1305#if NBIOSCALL > 0 1305#if NBIOSCALL > 0
1306 KASSERT(biostramp_image_size <= PAGE_SIZE); 1306 KASSERT(biostramp_image_size <= PAGE_SIZE);
1307 pmap_kenter_pa((vaddr_t)BIOSTRAMP_BASE, /* virtual */ 1307 pmap_kenter_pa((vaddr_t)BIOSTRAMP_BASE, /* virtual */
1308 (paddr_t)BIOSTRAMP_BASE, /* physical */ 1308 (paddr_t)BIOSTRAMP_BASE, /* physical */
1309 VM_PROT_ALL, 0); /* protection */ 1309 VM_PROT_ALL, 0); /* protection */
1310 pmap_update(pmap_kernel()); 1310 pmap_update(pmap_kernel());
1311 memcpy((void *)BIOSTRAMP_BASE, biostramp_image, biostramp_image_size); 1311 memcpy((void *)BIOSTRAMP_BASE, biostramp_image, biostramp_image_size);
1312 1312
1313 /* Needed early, for bioscall() */ 1313 /* Needed early, for bioscall() */
1314 cpu_info_primary.ci_pmap = pmap_kernel(); 1314 cpu_info_primary.ci_pmap = pmap_kernel();
1315#endif 1315#endif
1316#endif /* !XEN */ 1316#endif /* !XEN */
1317 1317
1318 pmap_kenter_pa(idt_vaddr, idt_paddr, VM_PROT_READ|VM_PROT_WRITE, 0); 1318 pmap_kenter_pa(idt_vaddr, idt_paddr, VM_PROT_READ|VM_PROT_WRITE, 0);
1319 pmap_update(pmap_kernel()); 1319 pmap_update(pmap_kernel());
1320 memset((void *)idt_vaddr, 0, PAGE_SIZE); 1320 memset((void *)idt_vaddr, 0, PAGE_SIZE);
1321 1321
1322 1322
1323#ifndef XEN 1323#ifndef XEN
1324 idt_init(); 1324 idt_init();
1325 1325
1326 idt = (struct gate_descriptor *)idt_vaddr; 1326 idt = (struct gate_descriptor *)idt_vaddr;
1327 pmap_kenter_pa(pentium_idt_vaddr, idt_paddr, VM_PROT_READ, 0); 1327 pmap_kenter_pa(pentium_idt_vaddr, idt_paddr, VM_PROT_READ, 0);
1328 pmap_update(pmap_kernel()); 1328 pmap_update(pmap_kernel());
1329 pentium_idt = (union descriptor *)pentium_idt_vaddr; 1329 pentium_idt = (union descriptor *)pentium_idt_vaddr;
1330 1330
1331 tgdt = gdt; 1331 tgdt = gdt;
1332 gdt = (union descriptor *) 1332 gdt = (union descriptor *)
1333 ((char *)idt + NIDT * sizeof (struct gate_descriptor)); 1333 ((char *)idt + NIDT * sizeof (struct gate_descriptor));
1334 ldt = gdt + NGDT; 1334 ldt = gdt + NGDT;
1335 1335
1336 memcpy(gdt, tgdt, NGDT*sizeof(*gdt)); 1336 memcpy(gdt, tgdt, NGDT*sizeof(*gdt));
1337 1337
1338 setsegment(&gdt[GLDT_SEL].sd, ldt, NLDT * sizeof(ldt[0]) - 1, 1338 setsegment(&gdt[GLDT_SEL].sd, ldt, NLDT * sizeof(ldt[0]) - 1,
1339 SDT_SYSLDT, SEL_KPL, 0, 0); 1339 SDT_SYSLDT, SEL_KPL, 0, 0);
1340#else 1340#else
1341 HYPERVISOR_set_callbacks( 1341 HYPERVISOR_set_callbacks(
1342 GSEL(GCODE_SEL, SEL_KPL), (unsigned long)hypervisor_callback, 1342 GSEL(GCODE_SEL, SEL_KPL), (unsigned long)hypervisor_callback,
1343 GSEL(GCODE_SEL, SEL_KPL), (unsigned long)failsafe_callback); 1343 GSEL(GCODE_SEL, SEL_KPL), (unsigned long)failsafe_callback);
1344 1344
1345 ldt = (union descriptor *)idt_vaddr; 1345 ldt = (union descriptor *)idt_vaddr;
1346#endif /* XEN */ 1346#endif /* XEN */
1347 1347
1348 /* make ldt gates and memory segments */ 1348 /* make ldt gates and memory segments */
1349 setgate(&ldt[LSYS5CALLS_SEL].gd, &IDTVEC(osyscall), 1, 1349 setgate(&ldt[LSYS5CALLS_SEL].gd, &IDTVEC(osyscall), 1,
1350 SDT_SYS386CGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); 1350 SDT_SYS386CGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
1351 1351
1352 ldt[LUCODE_SEL] = gdt[GUCODE_SEL]; 1352 ldt[LUCODE_SEL] = gdt[GUCODE_SEL];
1353 ldt[LUCODEBIG_SEL] = gdt[GUCODEBIG_SEL]; 1353 ldt[LUCODEBIG_SEL] = gdt[GUCODEBIG_SEL];
1354 ldt[LUDATA_SEL] = gdt[GUDATA_SEL]; 1354 ldt[LUDATA_SEL] = gdt[GUDATA_SEL];
1355 ldt[LSOL26CALLS_SEL] = ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL]; 1355 ldt[LSOL26CALLS_SEL] = ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL];
1356 1356
1357#ifndef XEN 1357#ifndef XEN
1358 /* exceptions */ 1358 /* exceptions */
1359 for (x = 0; x < 32; x++) { 1359 for (x = 0; x < 32; x++) {
1360 idt_vec_reserve(x); 1360 idt_vec_reserve(x);
1361 setgate(&idt[x], IDTVEC(exceptions)[x], 0, SDT_SYS386IGT, 1361 setgate(&idt[x], IDTVEC(exceptions)[x], 0, SDT_SYS386IGT,
1362 (x == 3 || x == 4) ? SEL_UPL : SEL_KPL, 1362 (x == 3 || x == 4) ? SEL_UPL : SEL_KPL,
1363 GSEL(GCODE_SEL, SEL_KPL)); 1363 GSEL(GCODE_SEL, SEL_KPL));
1364 } 1364 }
1365 1365
1366 /* new-style interrupt gate for syscalls */ 1366 /* new-style interrupt gate for syscalls */
1367 idt_vec_reserve(128); 1367 idt_vec_reserve(128);
1368 setgate(&idt[128], &IDTVEC(syscall), 0, SDT_SYS386IGT, SEL_UPL, 1368 setgate(&idt[128], &IDTVEC(syscall), 0, SDT_SYS386IGT, SEL_UPL,
1369 GSEL(GCODE_SEL, SEL_KPL)); 1369 GSEL(GCODE_SEL, SEL_KPL));
1370 idt_vec_reserve(0xd2); 1370 idt_vec_reserve(0xd2);
1371 setgate(&idt[0xd2], &IDTVEC(svr4_fasttrap), 0, SDT_SYS386IGT, 1371 setgate(&idt[0xd2], &IDTVEC(svr4_fasttrap), 0, SDT_SYS386IGT,
1372 SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); 1372 SEL_UPL, GSEL(GCODE_SEL, SEL_KPL));
1373 1373
1374 setregion(&region, gdt, NGDT * sizeof(gdt[0]) - 1); 1374 setregion(&region, gdt, NGDT * sizeof(gdt[0]) - 1);
1375 lgdt(&region); 1375 lgdt(&region);
1376 1376
1377 cpu_init_idt(); 1377 cpu_init_idt();
1378#else /* !XEN */ 1378#else /* !XEN */
1379 memset(xen_idt, 0, sizeof(trap_info_t) * MAX_XEN_IDT); 1379 memset(xen_idt, 0, sizeof(trap_info_t) * MAX_XEN_IDT);
1380 xen_idt_idx = 0; 1380 xen_idt_idx = 0;
1381 for (x = 0; x < 32; x++) { 1381 for (x = 0; x < 32; x++) {
1382 KASSERT(xen_idt_idx < MAX_XEN_IDT); 1382 KASSERT(xen_idt_idx < MAX_XEN_IDT);
1383 xen_idt[xen_idt_idx].vector = x; 1383 xen_idt[xen_idt_idx].vector = x;
1384 1384
1385 switch (x) { 1385 switch (x) {
1386 case 2: /* NMI */ 1386 case 2: /* NMI */
1387 case 18: /* MCA */ 1387 case 18: /* MCA */
1388 TI_SET_IF(&(xen_idt[xen_idt_idx]), 2); 1388 TI_SET_IF(&(xen_idt[xen_idt_idx]), 2);
1389 break; 1389 break;
1390 case 3: 1390 case 3:
1391 case 4: 1391 case 4:
1392 xen_idt[xen_idt_idx].flags = SEL_UPL; 1392 xen_idt[xen_idt_idx].flags = SEL_UPL;
1393 break; 1393 break;
1394 default: 1394 default:
1395 xen_idt[xen_idt_idx].flags = SEL_XEN; 1395 xen_idt[xen_idt_idx].flags = SEL_XEN;
1396 break; 1396 break;
1397 } 1397 }
1398 1398
1399 xen_idt[xen_idt_idx].cs = GSEL(GCODE_SEL, SEL_KPL); 1399 xen_idt[xen_idt_idx].cs = GSEL(GCODE_SEL, SEL_KPL);
1400 xen_idt[xen_idt_idx].address = 1400 xen_idt[xen_idt_idx].address =
1401 (uint32_t)IDTVEC(exceptions)[x]; 1401 (uint32_t)IDTVEC(exceptions)[x];
1402 xen_idt_idx++; 1402 xen_idt_idx++;
1403 } 1403 }
1404 KASSERT(xen_idt_idx < MAX_XEN_IDT); 1404 KASSERT(xen_idt_idx < MAX_XEN_IDT);
1405 xen_idt[xen_idt_idx].vector = 128; 1405 xen_idt[xen_idt_idx].vector = 128;
1406 xen_idt[xen_idt_idx].flags = SEL_UPL; 1406 xen_idt[xen_idt_idx].flags = SEL_UPL;
1407 xen_idt[xen_idt_idx].cs = GSEL(GCODE_SEL, SEL_KPL); 1407 xen_idt[xen_idt_idx].cs = GSEL(GCODE_SEL, SEL_KPL);
1408 xen_idt[xen_idt_idx].address = (uint32_t)&IDTVEC(syscall); 1408 xen_idt[xen_idt_idx].address = (uint32_t)&IDTVEC(syscall);
1409 xen_idt_idx++; 1409 xen_idt_idx++;
1410 KASSERT(xen_idt_idx < MAX_XEN_IDT); 1410 KASSERT(xen_idt_idx < MAX_XEN_IDT);
1411 xen_idt[xen_idt_idx].vector = 0xd2; 1411 xen_idt[xen_idt_idx].vector = 0xd2;
1412 xen_idt[xen_idt_idx].flags = SEL_UPL; 1412 xen_idt[xen_idt_idx].flags = SEL_UPL;
1413 xen_idt[xen_idt_idx].cs = GSEL(GCODE_SEL, SEL_KPL); 1413 xen_idt[xen_idt_idx].cs = GSEL(GCODE_SEL, SEL_KPL);
1414 xen_idt[xen_idt_idx].address = (uint32_t)&IDTVEC(svr4_fasttrap); 1414 xen_idt[xen_idt_idx].address = (uint32_t)&IDTVEC(svr4_fasttrap);
1415 xen_idt_idx++; 1415 xen_idt_idx++;
1416 lldt(GSEL(GLDT_SEL, SEL_KPL)); 1416 lldt(GSEL(GLDT_SEL, SEL_KPL));
1417 cpu_init_idt(); 1417 cpu_init_idt();
1418#endif /* XEN */ 1418#endif /* XEN */
1419 1419
1420 init386_ksyms(); 1420 init386_ksyms();
1421 1421
1422#if NMCA > 0 1422#if NMCA > 0
1423 /* check for MCA bus, needed to be done before ISA stuff - if 1423 /* check for MCA bus, needed to be done before ISA stuff - if
1424 * MCA is detected, ISA needs to use level triggered interrupts 1424 * MCA is detected, ISA needs to use level triggered interrupts
1425 * by default */ 1425 * by default */
1426 mca_busprobe(); 1426 mca_busprobe();
1427#endif 1427#endif
1428 1428
1429#ifdef XEN 1429#ifdef XEN
1430 XENPRINTF(("events_default_setup\n")); 1430 XENPRINTF(("events_default_setup\n"));
1431 events_default_setup(); 1431 events_default_setup();
1432#else 1432#else
1433 intr_default_setup(); 1433 intr_default_setup();
1434#endif 1434#endif
1435 1435
1436 splraise(IPL_HIGH); 1436 splraise(IPL_HIGH);
1437 x86_enable_intr(); 1437 x86_enable_intr();
1438 1438
1439#ifdef DDB 1439#ifdef DDB
1440 if (boothowto & RB_KDB) 1440 if (boothowto & RB_KDB)
1441 Debugger(); 1441 Debugger();
1442#endif 1442#endif
1443#ifdef IPKDB 1443#ifdef IPKDB
1444 ipkdb_init(); 1444 ipkdb_init();
1445 if (boothowto & RB_KDB) 1445 if (boothowto & RB_KDB)
1446 ipkdb_connect(0); 1446 ipkdb_connect(0);
1447#endif 1447#endif
1448#ifdef KGDB 1448#ifdef KGDB
1449 kgdb_port_init(); 1449 kgdb_port_init();
1450 if (boothowto & RB_KDB) { 1450 if (boothowto & RB_KDB) {
1451 kgdb_debug_init = 1; 1451 kgdb_debug_init = 1;
1452 kgdb_connect(1); 1452 kgdb_connect(1);
1453 } 1453 }
1454#endif 1454#endif
1455 1455
1456 if (physmem < btoc(2 * 1024 * 1024)) { 1456 if (physmem < btoc(2 * 1024 * 1024)) {
1457 printf("warning: too little memory available; " 1457 printf("warning: too little memory available; "
1458 "have %lu bytes, want %lu bytes\n" 1458 "have %lu bytes, want %lu bytes\n"
1459 "running in degraded mode\n" 1459 "running in degraded mode\n"
1460 "press a key to confirm\n\n", 1460 "press a key to confirm\n\n",
1461 (unsigned long)ptoa(physmem), 2*1024*1024UL); 1461 (unsigned long)ptoa(physmem), 2*1024*1024UL);
1462 cngetc(); 1462 cngetc();
1463 } 1463 }
1464 1464
1465 rw_init(&svr4_fasttrap_lock); 1465 rw_init(&svr4_fasttrap_lock);
1466} 1466}
1467 1467
1468#include <dev/ic/mc146818reg.h> /* for NVRAM POST */ 1468#include <dev/ic/mc146818reg.h> /* for NVRAM POST */
1469#include <i386/isa/nvram.h> /* for NVRAM POST */ 1469#include <i386/isa/nvram.h> /* for NVRAM POST */
1470 1470
1471void 1471void
1472cpu_reset(void) 1472cpu_reset(void)
1473{ 1473{
1474#ifdef XEN 1474#ifdef XEN
1475 HYPERVISOR_reboot(); 1475 HYPERVISOR_reboot();
1476 for (;;); 1476 for (;;);
1477#else /* XEN */ 1477#else /* XEN */
1478 struct region_descriptor region; 1478 struct region_descriptor region;
1479 1479
1480 x86_disable_intr(); 1480 x86_disable_intr();
1481 1481
1482 /* 1482 /*
1483 * Ensure the NVRAM reset byte contains something vaguely sane. 1483 * Ensure the NVRAM reset byte contains something vaguely sane.
1484 */ 1484 */
1485 1485
1486 outb(IO_RTC, NVRAM_RESET); 1486 outb(IO_RTC, NVRAM_RESET);
1487 outb(IO_RTC+1, NVRAM_RESET_RST); 1487 outb(IO_RTC+1, NVRAM_RESET_RST);
1488 1488
1489 /* 1489 /*
1490 * Reset AMD Geode SC1100. 1490 * Reset AMD Geode SC1100.
1491 * 1491 *
1492 * 1) Write PCI Configuration Address Register (0xcf8) to 1492 * 1) Write PCI Configuration Address Register (0xcf8) to
1493 * select Function 0, Register 0x44: Bridge Configuration, 1493 * select Function 0, Register 0x44: Bridge Configuration,
1494 * GPIO and LPC Configuration Register Space, Reset 1494 * GPIO and LPC Configuration Register Space, Reset
1495 * Control Register. 1495 * Control Register.
1496 * 1496 *
1497 * 2) Write 0xf to PCI Configuration Data Register (0xcfc) 1497 * 2) Write 0xf to PCI Configuration Data Register (0xcfc)
1498 * to reset IDE controller, IDE bus, and PCI bus, and 1498 * to reset IDE controller, IDE bus, and PCI bus, and
1499 * to trigger a system-wide reset. 1499 * to trigger a system-wide reset.
1500 *  1500 *
1501 * See AMD Geode SC1100 Processor Data Book, Revision 2.0, 1501 * See AMD Geode SC1100 Processor Data Book, Revision 2.0,
1502 * sections 6.3.1, 6.3.2, and 6.4.1. 1502 * sections 6.3.1, 6.3.2, and 6.4.1.
1503 */ 1503 */
1504 if (cpu_info_primary.ci_signature == 0x540) { 1504 if (cpu_info_primary.ci_signature == 0x540) {
1505 outl(0xcf8, 0x80009044); 1505 outl(0xcf8, 0x80009044);
1506 outl(0xcfc, 0xf); 1506 outl(0xcfc, 0xf);
1507 } 1507 }
1508 1508
1509 x86_reset(); 1509 x86_reset();
1510 1510
1511 /* 1511 /*
1512 * Try to cause a triple fault and watchdog reset by making the IDT 1512 * Try to cause a triple fault and watchdog reset by making the IDT
1513 * invalid and causing a fault. 1513 * invalid and causing a fault.
1514 */ 1514 */
1515 memset((void *)idt, 0, NIDT * sizeof(idt[0])); 1515 memset((void *)idt, 0, NIDT * sizeof(idt[0]));
1516 setregion(&region, idt, NIDT * sizeof(idt[0]) - 1); 1516 setregion(&region, idt, NIDT * sizeof(idt[0]) - 1);
1517 lidt(&region); 1517 lidt(&region);
1518 breakpoint(); 1518 breakpoint();
1519 1519
1520#if 0 1520#if 0
1521 /* 1521 /*
1522 * Try to cause a triple fault and watchdog reset by unmapping the 1522 * Try to cause a triple fault and watchdog reset by unmapping the
1523 * entire address space and doing a TLB flush. 1523 * entire address space and doing a TLB flush.
1524 */ 1524 */
1525 memset((void *)PTD, 0, PAGE_SIZE); 1525 memset((void *)PTD, 0, PAGE_SIZE);
1526 tlbflush(); 1526 tlbflush();
1527#endif 1527#endif
1528 1528
1529 for (;;); 1529 for (;;);
1530#endif /* XEN */ 1530#endif /* XEN */
1531} 1531}
1532 1532
1533void 1533void
1534cpu_getmcontext(struct lwp *l, mcontext_t *mcp, unsigned int *flags) 1534cpu_getmcontext(struct lwp *l, mcontext_t *mcp, unsigned int *flags)
1535{ 1535{
1536 const struct trapframe *tf = l->l_md.md_regs; 1536 const struct trapframe *tf = l->l_md.md_regs;
1537 __greg_t *gr = mcp->__gregs; 1537 __greg_t *gr = mcp->__gregs;
1538 __greg_t ras_eip; 1538 __greg_t ras_eip;
1539 1539
1540 /* Save register context. */ 1540 /* Save register context. */
1541#ifdef VM86 1541#ifdef VM86
1542 if (tf->tf_eflags & PSL_VM) { 1542 if (tf->tf_eflags & PSL_VM) {
1543 gr[_REG_GS] = tf->tf_vm86_gs; 1543 gr[_REG_GS] = tf->tf_vm86_gs;
1544 gr[_REG_FS] = tf->tf_vm86_fs; 1544 gr[_REG_FS] = tf->tf_vm86_fs;
1545 gr[_REG_ES] = tf->tf_vm86_es; 1545 gr[_REG_ES] = tf->tf_vm86_es;
1546 gr[_REG_DS] = tf->tf_vm86_ds; 1546 gr[_REG_DS] = tf->tf_vm86_ds;
1547 gr[_REG_EFL] = get_vflags(l); 1547 gr[_REG_EFL] = get_vflags(l);
1548 } else 1548 } else
1549#endif 1549#endif
1550 { 1550 {
1551 gr[_REG_GS] = tf->tf_gs; 1551 gr[_REG_GS] = tf->tf_gs;
1552 gr[_REG_FS] = tf->tf_fs; 1552 gr[_REG_FS] = tf->tf_fs;
1553 gr[_REG_ES] = tf->tf_es; 1553 gr[_REG_ES] = tf->tf_es;
1554 gr[_REG_DS] = tf->tf_ds; 1554 gr[_REG_DS] = tf->tf_ds;
1555 gr[_REG_EFL] = tf->tf_eflags; 1555 gr[_REG_EFL] = tf->tf_eflags;
1556 } 1556 }
1557 gr[_REG_EDI] = tf->tf_edi; 1557 gr[_REG_EDI] = tf->tf_edi;
1558 gr[_REG_ESI] = tf->tf_esi; 1558 gr[_REG_ESI] = tf->tf_esi;
1559 gr[_REG_EBP] = tf->tf_ebp; 1559 gr[_REG_EBP] = tf->tf_ebp;
1560 gr[_REG_EBX] = tf->tf_ebx; 1560 gr[_REG_EBX] = tf->tf_ebx;
1561 gr[_REG_EDX] = tf->tf_edx; 1561 gr[_REG_EDX] = tf->tf_edx;
1562 gr[_REG_ECX] = tf->tf_ecx; 1562 gr[_REG_ECX] = tf->tf_ecx;
1563 gr[_REG_EAX] = tf->tf_eax; 1563 gr[_REG_EAX] = tf->tf_eax;
1564 gr[_REG_EIP] = tf->tf_eip; 1564 gr[_REG_EIP] = tf->tf_eip;
1565 gr[_REG_CS] = tf->tf_cs; 1565 gr[_REG_CS] = tf->tf_cs;
1566 gr[_REG_ESP] = tf->tf_esp; 1566 gr[_REG_ESP] = tf->tf_esp;
1567 gr[_REG_UESP] = tf->tf_esp; 1567 gr[_REG_UESP] = tf->tf_esp;
1568 gr[_REG_SS] = tf->tf_ss; 1568 gr[_REG_SS] = tf->tf_ss;
1569 gr[_REG_TRAPNO] = tf->tf_trapno; 1569 gr[_REG_TRAPNO] = tf->tf_trapno;
1570 gr[_REG_ERR] = tf->tf_err; 1570 gr[_REG_ERR] = tf->tf_err;
1571 1571
1572 if ((ras_eip = (__greg_t)ras_lookup(l->l_proc, 1572 if ((ras_eip = (__greg_t)ras_lookup(l->l_proc,
1573 (void *) gr[_REG_EIP])) != -1) 1573 (void *) gr[_REG_EIP])) != -1)
1574 gr[_REG_EIP] = ras_eip; 1574 gr[_REG_EIP] = ras_eip;
1575 1575
1576 *flags |= _UC_CPU; 1576 *flags |= _UC_CPU;
1577 1577
1578 mcp->_mc_tlsbase = (uintptr_t)l->l_private; 1578 mcp->_mc_tlsbase = (uintptr_t)l->l_private;
1579 *flags |= _UC_TLSBASE; 1579 *flags |= _UC_TLSBASE;
1580 1580
1581 /* 1581 /*
1582 * Save floating point register context. 1582 * Save floating point register context.
1583 * 1583 *
1584 * If the cpu doesn't support fxsave we must still write to 1584 * If the cpu doesn't support fxsave we must still write to
1585 * the entire 512 byte area - otherwise we leak kernel memory 1585 * the entire 512 byte area - otherwise we leak kernel memory
1586 * contents to userspace. 1586 * contents to userspace.
1587 * It wouldn't matter if we were doing the copyout here. 1587 * It wouldn't matter if we were doing the copyout here.
1588 * So we might as well convert to fxsave format. 1588 * So we might as well convert to fxsave format.
1589 */ 1589 */
1590 __CTASSERT(sizeof (struct fxsave) == 1590 __CTASSERT(sizeof (struct fxsave) ==
1591 sizeof mcp->__fpregs.__fp_reg_set.__fp_xmm_state); 1591 sizeof mcp->__fpregs.__fp_reg_set.__fp_xmm_state);
1592 process_read_fpregs_xmm(l, (struct fxsave *) 1592 process_read_fpregs_xmm(l, (struct fxsave *)
1593 &mcp->__fpregs.__fp_reg_set.__fp_xmm_state); 1593 &mcp->__fpregs.__fp_reg_set.__fp_xmm_state);
1594 memset(&mcp->__fpregs.__fp_pad, 0, sizeof mcp->__fpregs.__fp_pad); 1594 memset(&mcp->__fpregs.__fp_pad, 0, sizeof mcp->__fpregs.__fp_pad);
1595 *flags |= _UC_FXSAVE | _UC_FPU; 1595 *flags |= _UC_FXSAVE | _UC_FPU;
1596} 1596}
1597 1597
1598int 1598int
1599cpu_mcontext_validate(struct lwp *l, const mcontext_t *mcp) 1599cpu_mcontext_validate(struct lwp *l, const mcontext_t *mcp)
1600{ 1600{
1601 const __greg_t *gr = mcp->__gregs; 1601 const __greg_t *gr = mcp->__gregs;
1602 struct trapframe *tf = l->l_md.md_regs; 1602 struct trapframe *tf = l->l_md.md_regs;
1603 1603
1604 /* 1604 /*
1605 * Check for security violations. If we're returning 1605 * Check for security violations. If we're returning
1606 * to protected mode, the CPU will validate the segment 1606 * to protected mode, the CPU will validate the segment
1607 * registers automatically and generate a trap on 1607 * registers automatically and generate a trap on
1608 * violations. We handle the trap, rather than doing 1608 * violations. We handle the trap, rather than doing
1609 * all of the checking here. 1609 * all of the checking here.
1610 */ 1610 */
1611 if (((gr[_REG_EFL] ^ tf->tf_eflags) & PSL_USERSTATIC) || 1611 if (((gr[_REG_EFL] ^ tf->tf_eflags) & PSL_USERSTATIC) ||
1612 !USERMODE(gr[_REG_CS], gr[_REG_EFL])) 1612 !USERMODE(gr[_REG_CS], gr[_REG_EFL]))
1613 return EINVAL; 1613 return EINVAL;
1614 1614
1615 return 0; 1615 return 0;
1616} 1616}
1617 1617
1618int 1618int
1619cpu_setmcontext(struct lwp *l, const mcontext_t *mcp, unsigned int flags) 1619cpu_setmcontext(struct lwp *l, const mcontext_t *mcp, unsigned int flags)
1620{ 1620{
1621 struct trapframe *tf = l->l_md.md_regs; 1621 struct trapframe *tf = l->l_md.md_regs;
1622 const __greg_t *gr = mcp->__gregs; 1622 const __greg_t *gr = mcp->__gregs;
1623 struct proc *p = l->l_proc; 1623 struct proc *p = l->l_proc;
1624 int error; 1624 int error;
1625 1625
1626 /* Restore register context, if any. */ 1626 /* Restore register context, if any. */
1627 if ((flags & _UC_CPU) != 0) { 1627 if ((flags & _UC_CPU) != 0) {
1628#ifdef VM86 1628#ifdef VM86
1629 if (gr[_REG_EFL] & PSL_VM) { 1629 if (gr[_REG_EFL] & PSL_VM) {
1630 tf->tf_vm86_gs = gr[_REG_GS]; 1630 tf->tf_vm86_gs = gr[_REG_GS];
1631 tf->tf_vm86_fs = gr[_REG_FS]; 1631 tf->tf_vm86_fs = gr[_REG_FS];
1632 tf->tf_vm86_es = gr[_REG_ES]; 1632 tf->tf_vm86_es = gr[_REG_ES];
1633 tf->tf_vm86_ds = gr[_REG_DS]; 1633 tf->tf_vm86_ds = gr[_REG_DS];
1634 set_vflags(l, gr[_REG_EFL]); 1634 set_vflags(l, gr[_REG_EFL]);
1635 if (flags & _UC_VM) { 1635 if (flags & _UC_VM) {
1636 void syscall_vm86(struct trapframe *); 1636 void syscall_vm86(struct trapframe *);
1637 l->l_proc->p_md.md_syscall = syscall_vm86; 1637 l->l_proc->p_md.md_syscall = syscall_vm86;
1638 } 1638 }
1639 } else 1639 } else
1640#endif 1640#endif
1641 { 1641 {
1642 error = cpu_mcontext_validate(l, mcp); 1642 error = cpu_mcontext_validate(l, mcp);
1643 if (error) 1643 if (error)
1644 return error; 1644 return error;
1645 1645
1646 tf->tf_gs = gr[_REG_GS]; 1646 tf->tf_gs = gr[_REG_GS];
1647 tf->tf_fs = gr[_REG_FS]; 1647 tf->tf_fs = gr[_REG_FS];
1648 tf->tf_es = gr[_REG_ES]; 1648 tf->tf_es = gr[_REG_ES];
1649 tf->tf_ds = gr[_REG_DS]; 1649 tf->tf_ds = gr[_REG_DS];
1650 /* Only change the user-alterable part of eflags */ 1650 /* Only change the user-alterable part of eflags */
1651 tf->tf_eflags &= ~PSL_USER; 1651 tf->tf_eflags &= ~PSL_USER;
1652 tf->tf_eflags |= (gr[_REG_EFL] & PSL_USER); 1652 tf->tf_eflags |= (gr[_REG_EFL] & PSL_USER);
1653 } 1653 }
1654 tf->tf_edi = gr[_REG_EDI]; 1654 tf->tf_edi = gr[_REG_EDI];
1655 tf->tf_esi = gr[_REG_ESI]; 1655 tf->tf_esi = gr[_REG_ESI];
1656 tf->tf_ebp = gr[_REG_EBP]; 1656 tf->tf_ebp = gr[_REG_EBP];
1657 tf->tf_ebx = gr[_REG_EBX]; 1657 tf->tf_ebx = gr[_REG_EBX];
1658 tf->tf_edx = gr[_REG_EDX]; 1658 tf->tf_edx = gr[_REG_EDX];
1659 tf->tf_ecx = gr[_REG_ECX]; 1659 tf->tf_ecx = gr[_REG_ECX];
1660 tf->tf_eax = gr[_REG_EAX]; 1660 tf->tf_eax = gr[_REG_EAX];
1661 tf->tf_eip = gr[_REG_EIP]; 1661 tf->tf_eip = gr[_REG_EIP];
1662 tf->tf_cs = gr[_REG_CS]; 1662 tf->tf_cs = gr[_REG_CS];
1663 tf->tf_esp = gr[_REG_UESP]; 1663 tf->tf_esp = gr[_REG_UESP];
1664 tf->tf_ss = gr[_REG_SS]; 1664 tf->tf_ss = gr[_REG_SS];
1665 } 1665 }
1666 1666
1667 if ((flags & _UC_TLSBASE) != 0) 1667 if ((flags & _UC_TLSBASE) != 0)
1668 lwp_setprivate(l, (void *)(uintptr_t)mcp->_mc_tlsbase); 1668 lwp_setprivate(l, (void *)(uintptr_t)mcp->_mc_tlsbase);
1669 1669
1670 /* Restore floating point register context, if given. */ 1670 /* Restore floating point register context, if given. */
1671 if ((flags & _UC_FPU) != 0) { 1671 if ((flags & _UC_FPU) != 0) {
1672 __CTASSERT(sizeof (struct fxsave) == 1672 __CTASSERT(sizeof (struct fxsave) ==
1673 sizeof mcp->__fpregs.__fp_reg_set.__fp_xmm_state); 1673 sizeof mcp->__fpregs.__fp_reg_set.__fp_xmm_state);
1674 __CTASSERT(sizeof (struct save87) == 1674 __CTASSERT(sizeof (struct save87) ==
1675 sizeof mcp->__fpregs.__fp_reg_set.__fpchip_state); 1675 sizeof mcp->__fpregs.__fp_reg_set.__fpchip_state);
1676 1676
1677 if (flags & _UC_FXSAVE) { 1677 if (flags & _UC_FXSAVE) {
1678 process_write_fpregs_xmm(l, (const struct fxsave *) 1678 process_write_fpregs_xmm(l, (const struct fxsave *)
1679 &mcp->__fpregs.__fp_reg_set.__fp_xmm_state); 1679 &mcp->__fpregs.__fp_reg_set.__fp_xmm_state);
1680 } else { 1680 } else {
1681 process_write_fpregs_s87(l, (const struct save87 *) 1681 process_write_fpregs_s87(l, (const struct save87 *)
1682 &mcp->__fpregs.__fp_reg_set.__fpchip_state); 1682 &mcp->__fpregs.__fp_reg_set.__fpchip_state);
1683 } 1683 }
1684 } 1684 }
1685 1685
1686 mutex_enter(p->p_lock); 1686 mutex_enter(p->p_lock);
1687 if (flags & _UC_SETSTACK) 1687 if (flags & _UC_SETSTACK)
1688 l->l_sigstk.ss_flags |= SS_ONSTACK; 1688 l->l_sigstk.ss_flags |= SS_ONSTACK;
1689 if (flags & _UC_CLRSTACK) 1689 if (flags & _UC_CLRSTACK)
1690 l->l_sigstk.ss_flags &= ~SS_ONSTACK; 1690 l->l_sigstk.ss_flags &= ~SS_ONSTACK;
1691 mutex_exit(p->p_lock); 1691 mutex_exit(p->p_lock);
1692 return (0); 1692 return (0);
1693} 1693}
1694 1694
1695void 1695void
1696cpu_initclocks(void) 1696cpu_initclocks(void)
1697{ 1697{
1698 1698
1699 (*initclock_func)(); 1699 (*initclock_func)();
1700} 1700}
1701 1701
1702#define DEV_IO 14 /* iopl for compat_10 */ 1702#define DEV_IO 14 /* iopl for compat_10 */
1703 1703
1704int 1704int
1705mm_md_open(dev_t dev, int flag, int mode, struct lwp *l) 1705mm_md_open(dev_t dev, int flag, int mode, struct lwp *l)
1706{ 1706{
1707 1707
1708 switch (minor(dev)) { 1708 switch (minor(dev)) {
1709 case DEV_IO: 1709 case DEV_IO:
1710 /* 1710 /*
1711 * This is done by i386_iopl(3) now. 1711 * This is done by i386_iopl(3) now.
1712 * 1712 *
1713 * #if defined(COMPAT_10) || defined(COMPAT_FREEBSD) 1713 * #if defined(COMPAT_10) || defined(COMPAT_FREEBSD)
1714 */ 1714 */
1715 if (flag & FWRITE) { 1715 if (flag & FWRITE) {
1716 struct trapframe *fp; 1716 struct trapframe *fp;
1717 int error; 1717 int error;
1718 1718
1719 error = kauth_authorize_machdep(l->l_cred, 1719 error = kauth_authorize_machdep(l->l_cred,
1720 KAUTH_MACHDEP_IOPL, NULL, NULL, NULL, NULL); 1720 KAUTH_MACHDEP_IOPL, NULL, NULL, NULL, NULL);
1721 if (error) 1721 if (error)
1722 return (error); 1722 return (error);
1723 fp = curlwp->l_md.md_regs; 1723 fp = curlwp->l_md.md_regs;
1724 fp->tf_eflags |= PSL_IOPL; 1724 fp->tf_eflags |= PSL_IOPL;
1725 } 1725 }
1726 break; 1726 break;
1727 default: 1727 default:
1728 break; 1728 break;
1729 } 1729 }
1730 return 0; 1730 return 0;
1731} 1731}
1732 1732
1733#ifdef PAE 1733#ifdef PAE
1734void 1734void
1735cpu_alloc_l3_page(struct cpu_info *ci) 1735cpu_alloc_l3_page(struct cpu_info *ci)
1736{ 1736{
1737 int ret; 1737 int ret;
1738 struct pglist pg; 1738 struct pglist pg;
1739 struct vm_page *vmap; 1739 struct vm_page *vmap;
1740 1740
1741 KASSERT(ci != NULL); 1741 KASSERT(ci != NULL);
1742 /* 1742 /*
1743 * Allocate a page for the per-CPU L3 PD. cr3 being 32 bits, PA musts 1743 * Allocate a page for the per-CPU L3 PD. cr3 being 32 bits, PA musts
1744 * resides below the 4GB boundary. 1744 * resides below the 4GB boundary.
1745 */ 1745 */
1746 ret = uvm_pglistalloc(PAGE_SIZE, 0, 0x100000000ULL, 32, 0, &pg, 1, 0); 1746 ret = uvm_pglistalloc(PAGE_SIZE, 0, 0x100000000ULL, 32, 0, &pg, 1, 0);
1747 vmap = TAILQ_FIRST(&pg); 1747 vmap = TAILQ_FIRST(&pg);
1748 1748
1749 if (ret != 0 || vmap == NULL) 1749 if (ret != 0 || vmap == NULL)
1750 panic("%s: failed to allocate L3 pglist for CPU %d (ret %d)\n", 1750 panic("%s: failed to allocate L3 pglist for CPU %d (ret %d)\n",
1751 __func__, cpu_index(ci), ret); 1751 __func__, cpu_index(ci), ret);
1752 1752
1753 ci->ci_pae_l3_pdirpa = vmap->phys_addr; 1753 ci->ci_pae_l3_pdirpa = vmap->phys_addr;
1754 1754
1755 ci->ci_pae_l3_pdir = (paddr_t *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0, 1755 ci->ci_pae_l3_pdir = (paddr_t *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
1756 UVM_KMF_VAONLY | UVM_KMF_NOWAIT); 1756 UVM_KMF_VAONLY | UVM_KMF_NOWAIT);
1757 if (ci->ci_pae_l3_pdir == NULL) 1757 if (ci->ci_pae_l3_pdir == NULL)
1758 panic("%s: failed to allocate L3 PD for CPU %d\n", 1758 panic("%s: failed to allocate L3 PD for CPU %d\n",
1759 __func__, cpu_index(ci)); 1759 __func__, cpu_index(ci));
1760 1760
1761 pmap_kenter_pa((vaddr_t)ci->ci_pae_l3_pdir, ci->ci_pae_l3_pdirpa, 1761 pmap_kenter_pa((vaddr_t)ci->ci_pae_l3_pdir, ci->ci_pae_l3_pdirpa,
1762 VM_PROT_READ | VM_PROT_WRITE, 0); 1762 VM_PROT_READ | VM_PROT_WRITE, 0);
1763 1763
1764 pmap_update(pmap_kernel()); 1764 pmap_update(pmap_kernel());
1765} 1765}
1766#endif /* PAE */ 1766#endif /* PAE */

cvs diff -r1.194 -r1.195 src/sys/arch/x86/x86/pmap.c (switch to unified diff)

--- src/sys/arch/x86/x86/pmap.c 2016/05/14 09:37:21 1.194
+++ src/sys/arch/x86/x86/pmap.c 2016/05/15 10:35:54 1.195
@@ -1,1173 +1,1173 @@ @@ -1,1173 +1,1173 @@
1/* $NetBSD: pmap.c,v 1.194 2016/05/14 09:37:21 maxv Exp $ */ 1/* $NetBSD: pmap.c,v 1.195 2016/05/15 10:35:54 maxv Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 2008, 2010 The NetBSD Foundation, Inc. 4 * Copyright (c) 2008, 2010 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to The NetBSD Foundation 7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Andrew Doran. 8 * by Andrew Doran.
9 * 9 *
10 * Redistribution and use in source and binary forms, with or without 10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions 11 * modification, are permitted provided that the following conditions
12 * are met: 12 * are met:
13 * 1. Redistributions of source code must retain the above copyright 13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer. 14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright 15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the 16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution. 17 * documentation and/or other materials provided with the distribution.
18 * 18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE. 29 * POSSIBILITY OF SUCH DAMAGE.
30 */ 30 */
31 31
32/* 32/*
33 * Copyright (c) 2007 Manuel Bouyer. 33 * Copyright (c) 2007 Manuel Bouyer.
34 * 34 *
35 * Redistribution and use in source and binary forms, with or without 35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions 36 * modification, are permitted provided that the following conditions
37 * are met: 37 * are met:
38 * 1. Redistributions of source code must retain the above copyright 38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer. 39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright 40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the 41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution. 42 * documentation and/or other materials provided with the distribution.
43 * 43 *
44 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 44 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
45 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 45 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
46 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 46 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
47 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 47 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
48 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 48 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
49 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 49 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
50 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 50 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
51 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 51 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
52 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 52 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
53 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 53 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
54 * 54 *
55 */ 55 */
56 56
57/* 57/*
58 * Copyright (c) 2006 Mathieu Ropert <mro@adviseo.fr> 58 * Copyright (c) 2006 Mathieu Ropert <mro@adviseo.fr>
59 * 59 *
60 * Permission to use, copy, modify, and distribute this software for any 60 * Permission to use, copy, modify, and distribute this software for any
61 * purpose with or without fee is hereby granted, provided that the above 61 * purpose with or without fee is hereby granted, provided that the above
62 * copyright notice and this permission notice appear in all copies. 62 * copyright notice and this permission notice appear in all copies.
63 * 63 *
64 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 64 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
65 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 65 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
66 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 66 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
67 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 67 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
68 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 68 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
69 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 69 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
70 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 70 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
71 */ 71 */
72 72
73/* 73/*
74 * Copyright (c) 1997 Charles D. Cranor and Washington University. 74 * Copyright (c) 1997 Charles D. Cranor and Washington University.
75 * All rights reserved. 75 * All rights reserved.
76 * 76 *
77 * Redistribution and use in source and binary forms, with or without 77 * Redistribution and use in source and binary forms, with or without
78 * modification, are permitted provided that the following conditions 78 * modification, are permitted provided that the following conditions
79 * are met: 79 * are met:
80 * 1. Redistributions of source code must retain the above copyright 80 * 1. Redistributions of source code must retain the above copyright
81 * notice, this list of conditions and the following disclaimer. 81 * notice, this list of conditions and the following disclaimer.
82 * 2. Redistributions in binary form must reproduce the above copyright 82 * 2. Redistributions in binary form must reproduce the above copyright
83 * notice, this list of conditions and the following disclaimer in the 83 * notice, this list of conditions and the following disclaimer in the
84 * documentation and/or other materials provided with the distribution. 84 * documentation and/or other materials provided with the distribution.
85 * 85 *
86 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 86 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
87 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 87 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
88 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 88 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
89 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 89 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
90 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 90 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
91 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 91 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
92 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 92 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
93 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 93 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
94 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 94 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
95 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 95 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
96 */ 96 */
97 97
98/* 98/*
99 * Copyright 2001 (c) Wasabi Systems, Inc. 99 * Copyright 2001 (c) Wasabi Systems, Inc.
100 * All rights reserved. 100 * All rights reserved.
101 * 101 *
102 * Written by Frank van der Linden for Wasabi Systems, Inc. 102 * Written by Frank van der Linden for Wasabi Systems, Inc.
103 * 103 *
104 * Redistribution and use in source and binary forms, with or without 104 * Redistribution and use in source and binary forms, with or without
105 * modification, are permitted provided that the following conditions 105 * modification, are permitted provided that the following conditions
106 * are met: 106 * are met:
107 * 1. Redistributions of source code must retain the above copyright 107 * 1. Redistributions of source code must retain the above copyright
108 * notice, this list of conditions and the following disclaimer. 108 * notice, this list of conditions and the following disclaimer.
109 * 2. Redistributions in binary form must reproduce the above copyright 109 * 2. Redistributions in binary form must reproduce the above copyright
110 * notice, this list of conditions and the following disclaimer in the 110 * notice, this list of conditions and the following disclaimer in the
111 * documentation and/or other materials provided with the distribution. 111 * documentation and/or other materials provided with the distribution.
112 * 3. All advertising materials mentioning features or use of this software 112 * 3. All advertising materials mentioning features or use of this software
113 * must display the following acknowledgement: 113 * must display the following acknowledgement:
114 * This product includes software developed for the NetBSD Project by 114 * This product includes software developed for the NetBSD Project by
115 * Wasabi Systems, Inc. 115 * Wasabi Systems, Inc.
116 * 4. The name of Wasabi Systems, Inc. may not be used to endorse 116 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
117 * or promote products derived from this software without specific prior 117 * or promote products derived from this software without specific prior
118 * written permission. 118 * written permission.
119 * 119 *
120 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND 120 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
121 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 121 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
122 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 122 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
123 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC 123 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
124 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 124 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
125 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 125 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
126 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 126 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
127 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 127 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
128 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 128 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
129 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 129 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
130 * POSSIBILITY OF SUCH DAMAGE. 130 * POSSIBILITY OF SUCH DAMAGE.
131 */ 131 */
132 132
133/* 133/*
134 * This is the i386 pmap modified and generalized to support x86-64 134 * This is the i386 pmap modified and generalized to support x86-64
135 * as well. The idea is to hide the upper N levels of the page tables 135 * as well. The idea is to hide the upper N levels of the page tables
136 * inside pmap_get_ptp, pmap_free_ptp and pmap_growkernel. The rest 136 * inside pmap_get_ptp, pmap_free_ptp and pmap_growkernel. The rest
137 * is mostly untouched, except that it uses some more generalized 137 * is mostly untouched, except that it uses some more generalized
138 * macros and interfaces. 138 * macros and interfaces.
139 * 139 *
140 * This pmap has been tested on the i386 as well, and it can be easily 140 * This pmap has been tested on the i386 as well, and it can be easily
141 * adapted to PAE. 141 * adapted to PAE.
142 * 142 *
143 * fvdl@wasabisystems.com 18-Jun-2001 143 * fvdl@wasabisystems.com 18-Jun-2001
144 */ 144 */
145 145
146/* 146/*
147 * pmap.c: i386 pmap module rewrite 147 * pmap.c: i386 pmap module rewrite
148 * Chuck Cranor <chuck@netbsd> 148 * Chuck Cranor <chuck@netbsd>
149 * 11-Aug-97 149 * 11-Aug-97
150 * 150 *
151 * history of this pmap module: in addition to my own input, i used 151 * history of this pmap module: in addition to my own input, i used
152 * the following references for this rewrite of the i386 pmap: 152 * the following references for this rewrite of the i386 pmap:
153 * 153 *
154 * [1] the NetBSD i386 pmap. this pmap appears to be based on the 154 * [1] the NetBSD i386 pmap. this pmap appears to be based on the
155 * BSD hp300 pmap done by Mike Hibler at University of Utah. 155 * BSD hp300 pmap done by Mike Hibler at University of Utah.
156 * it was then ported to the i386 by William Jolitz of UUNET 156 * it was then ported to the i386 by William Jolitz of UUNET
157 * Technologies, Inc. Then Charles M. Hannum of the NetBSD 157 * Technologies, Inc. Then Charles M. Hannum of the NetBSD
158 * project fixed some bugs and provided some speed ups. 158 * project fixed some bugs and provided some speed ups.
159 * 159 *
160 * [2] the FreeBSD i386 pmap. this pmap seems to be the 160 * [2] the FreeBSD i386 pmap. this pmap seems to be the
161 * Hibler/Jolitz pmap, as modified for FreeBSD by John S. Dyson 161 * Hibler/Jolitz pmap, as modified for FreeBSD by John S. Dyson
162 * and David Greenman. 162 * and David Greenman.
163 * 163 *
164 * [3] the Mach pmap. this pmap, from CMU, seems to have migrated 164 * [3] the Mach pmap. this pmap, from CMU, seems to have migrated
165 * between several processors. the VAX version was done by 165 * between several processors. the VAX version was done by
166 * Avadis Tevanian, Jr., and Michael Wayne Young. the i386 166 * Avadis Tevanian, Jr., and Michael Wayne Young. the i386
167 * version was done by Lance Berc, Mike Kupfer, Bob Baron, 167 * version was done by Lance Berc, Mike Kupfer, Bob Baron,
168 * David Golub, and Richard Draves. the alpha version was 168 * David Golub, and Richard Draves. the alpha version was
169 * done by Alessandro Forin (CMU/Mach) and Chris Demetriou 169 * done by Alessandro Forin (CMU/Mach) and Chris Demetriou
170 * (NetBSD/alpha). 170 * (NetBSD/alpha).
171 */ 171 */
172 172
173#include <sys/cdefs.h> 173#include <sys/cdefs.h>
174__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.194 2016/05/14 09:37:21 maxv Exp $"); 174__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.195 2016/05/15 10:35:54 maxv Exp $");
175 175
176#include "opt_user_ldt.h" 176#include "opt_user_ldt.h"
177#include "opt_lockdebug.h" 177#include "opt_lockdebug.h"
178#include "opt_multiprocessor.h" 178#include "opt_multiprocessor.h"
179#include "opt_xen.h" 179#include "opt_xen.h"
180#if !defined(__x86_64__) 180#if !defined(__x86_64__)
181#include "opt_kstack_dr0.h" 181#include "opt_kstack_dr0.h"
182#endif /* !defined(__x86_64__) */ 182#endif /* !defined(__x86_64__) */
183 183
184#include <sys/param.h> 184#include <sys/param.h>
185#include <sys/systm.h> 185#include <sys/systm.h>
186#include <sys/proc.h> 186#include <sys/proc.h>
187#include <sys/pool.h> 187#include <sys/pool.h>
188#include <sys/kernel.h> 188#include <sys/kernel.h>
189#include <sys/atomic.h> 189#include <sys/atomic.h>
190#include <sys/cpu.h> 190#include <sys/cpu.h>
191#include <sys/intr.h> 191#include <sys/intr.h>
192#include <sys/xcall.h> 192#include <sys/xcall.h>
193#include <sys/kcore.h> 193#include <sys/kcore.h>
194 194
195#include <uvm/uvm.h> 195#include <uvm/uvm.h>
196#include <uvm/pmap/pmap_pvt.h> 196#include <uvm/pmap/pmap_pvt.h>
197 197
198#include <dev/isa/isareg.h> 198#include <dev/isa/isareg.h>
199 199
200#include <machine/specialreg.h> 200#include <machine/specialreg.h>
201#include <machine/gdt.h> 201#include <machine/gdt.h>
202#include <machine/isa_machdep.h> 202#include <machine/isa_machdep.h>
203#include <machine/cpuvar.h> 203#include <machine/cpuvar.h>
204#include <machine/cputypes.h> 204#include <machine/cputypes.h>
205 205
206#include <x86/pmap.h> 206#include <x86/pmap.h>
207#include <x86/pmap_pv.h> 207#include <x86/pmap_pv.h>
208 208
209#include <x86/i82489reg.h> 209#include <x86/i82489reg.h>
210#include <x86/i82489var.h> 210#include <x86/i82489var.h>
211 211
212#ifdef XEN 212#ifdef XEN
213#include <xen/xen-public/xen.h> 213#include <xen/xen-public/xen.h>
214#include <xen/hypervisor.h> 214#include <xen/hypervisor.h>
215#endif 215#endif
216 216
217/* 217/*
218 * general info: 218 * general info:
219 * 219 *
220 * - for an explanation of how the i386 MMU hardware works see 220 * - for an explanation of how the i386 MMU hardware works see
221 * the comments in <machine/pte.h>. 221 * the comments in <machine/pte.h>.
222 * 222 *
223 * - for an explanation of the general memory structure used by 223 * - for an explanation of the general memory structure used by
224 * this pmap (including the recursive mapping), see the comments 224 * this pmap (including the recursive mapping), see the comments
225 * in <machine/pmap.h>. 225 * in <machine/pmap.h>.
226 * 226 *
227 * this file contains the code for the "pmap module." the module's 227 * this file contains the code for the "pmap module." the module's
228 * job is to manage the hardware's virtual to physical address mappings. 228 * job is to manage the hardware's virtual to physical address mappings.
229 * note that there are two levels of mapping in the VM system: 229 * note that there are two levels of mapping in the VM system:
230 * 230 *
231 * [1] the upper layer of the VM system uses vm_map's and vm_map_entry's 231 * [1] the upper layer of the VM system uses vm_map's and vm_map_entry's
232 * to map ranges of virtual address space to objects/files. for 232 * to map ranges of virtual address space to objects/files. for
233 * example, the vm_map may say: "map VA 0x1000 to 0x22000 read-only 233 * example, the vm_map may say: "map VA 0x1000 to 0x22000 read-only
234 * to the file /bin/ls starting at offset zero." note that 234 * to the file /bin/ls starting at offset zero." note that
235 * the upper layer mapping is not concerned with how individual 235 * the upper layer mapping is not concerned with how individual
236 * vm_pages are mapped. 236 * vm_pages are mapped.
237 * 237 *
238 * [2] the lower layer of the VM system (the pmap) maintains the mappings 238 * [2] the lower layer of the VM system (the pmap) maintains the mappings
239 * from virtual addresses. it is concerned with which vm_page is 239 * from virtual addresses. it is concerned with which vm_page is
240 * mapped where. for example, when you run /bin/ls and start 240 * mapped where. for example, when you run /bin/ls and start
241 * at page 0x1000 the fault routine may lookup the correct page 241 * at page 0x1000 the fault routine may lookup the correct page
242 * of the /bin/ls file and then ask the pmap layer to establish 242 * of the /bin/ls file and then ask the pmap layer to establish
243 * a mapping for it. 243 * a mapping for it.
244 * 244 *
245 * note that information in the lower layer of the VM system can be 245 * note that information in the lower layer of the VM system can be
246 * thrown away since it can easily be reconstructed from the info 246 * thrown away since it can easily be reconstructed from the info
247 * in the upper layer. 247 * in the upper layer.
248 * 248 *
249 * data structures we use include: 249 * data structures we use include:
250 * 250 *
251 * - struct pmap: describes the address space of one thread 251 * - struct pmap: describes the address space of one thread
252 * - struct pmap_page: describes one pv-tracked page, without 252 * - struct pmap_page: describes one pv-tracked page, without
253 * necessarily a corresponding vm_page 253 * necessarily a corresponding vm_page
254 * - struct pv_entry: describes one <PMAP,VA> mapping of a PA 254 * - struct pv_entry: describes one <PMAP,VA> mapping of a PA
255 * - struct pv_head: there is one pv_head per pv-tracked page of 255 * - struct pv_head: there is one pv_head per pv-tracked page of
256 * physical memory. the pv_head points to a list of pv_entry 256 * physical memory. the pv_head points to a list of pv_entry
257 * structures which describe all the <PMAP,VA> pairs that this 257 * structures which describe all the <PMAP,VA> pairs that this
258 * page is mapped in. this is critical for page based operations 258 * page is mapped in. this is critical for page based operations
259 * such as pmap_page_protect() [change protection on _all_ mappings 259 * such as pmap_page_protect() [change protection on _all_ mappings
260 * of a page] 260 * of a page]
261 */ 261 */
262 262
263/* 263/*
264 * memory allocation 264 * memory allocation
265 * 265 *
266 * - there are three data structures that we must dynamically allocate: 266 * - there are three data structures that we must dynamically allocate:
267 * 267 *
268 * [A] new process' page directory page (PDP) 268 * [A] new process' page directory page (PDP)
269 * - plan 1: done at pmap_create() we use 269 * - plan 1: done at pmap_create() we use
270 * uvm_km_alloc(kernel_map, PAGE_SIZE) [fka kmem_alloc] to do this 270 * uvm_km_alloc(kernel_map, PAGE_SIZE) [fka kmem_alloc] to do this
271 * allocation. 271 * allocation.
272 * 272 *
273 * if we are low in free physical memory then we sleep in 273 * if we are low in free physical memory then we sleep in
274 * uvm_km_alloc -- in this case this is ok since we are creating 274 * uvm_km_alloc -- in this case this is ok since we are creating
275 * a new pmap and should not be holding any locks. 275 * a new pmap and should not be holding any locks.
276 * 276 *
277 * if the kernel is totally out of virtual space 277 * if the kernel is totally out of virtual space
278 * (i.e. uvm_km_alloc returns NULL), then we panic. 278 * (i.e. uvm_km_alloc returns NULL), then we panic.
279 * 279 *
280 * [B] new page tables pages (PTP) 280 * [B] new page tables pages (PTP)
281 * - call uvm_pagealloc() 281 * - call uvm_pagealloc()
282 * => success: zero page, add to pm_pdir 282 * => success: zero page, add to pm_pdir
283 * => failure: we are out of free vm_pages, let pmap_enter() 283 * => failure: we are out of free vm_pages, let pmap_enter()
284 * tell UVM about it. 284 * tell UVM about it.
285 * 285 *
286 * note: for kernel PTPs, we start with NKPTP of them. as we map 286 * note: for kernel PTPs, we start with NKPTP of them. as we map
287 * kernel memory (at uvm_map time) we check to see if we've grown 287 * kernel memory (at uvm_map time) we check to see if we've grown
288 * the kernel pmap. if so, we call the optional function 288 * the kernel pmap. if so, we call the optional function
289 * pmap_growkernel() to grow the kernel PTPs in advance. 289 * pmap_growkernel() to grow the kernel PTPs in advance.
290 * 290 *
291 * [C] pv_entry structures 291 * [C] pv_entry structures
292 */ 292 */
293 293
294/* 294/*
295 * locking 295 * locking
296 * 296 *
297 * we have the following locks that we must contend with: 297 * we have the following locks that we must contend with:
298 * 298 *
299 * mutexes: 299 * mutexes:
300 * 300 *
301 * - pmap lock (per pmap, part of uvm_object) 301 * - pmap lock (per pmap, part of uvm_object)
302 * this lock protects the fields in the pmap structure including 302 * this lock protects the fields in the pmap structure including
303 * the non-kernel PDEs in the PDP, and the PTEs. it also locks 303 * the non-kernel PDEs in the PDP, and the PTEs. it also locks
304 * in the alternate PTE space (since that is determined by the 304 * in the alternate PTE space (since that is determined by the
305 * entry in the PDP). 305 * entry in the PDP).
306 * 306 *
307 * - pvh_lock (per pv_head) 307 * - pvh_lock (per pv_head)
308 * this lock protects the pv_entry list which is chained off the 308 * this lock protects the pv_entry list which is chained off the
309 * pv_head structure for a specific pv-tracked PA. it is locked 309 * pv_head structure for a specific pv-tracked PA. it is locked
310 * when traversing the list (e.g. adding/removing mappings, 310 * when traversing the list (e.g. adding/removing mappings,
311 * syncing R/M bits, etc.) 311 * syncing R/M bits, etc.)
312 * 312 *
313 * - pmaps_lock 313 * - pmaps_lock
314 * this lock protects the list of active pmaps (headed by "pmaps"). 314 * this lock protects the list of active pmaps (headed by "pmaps").
315 * we lock it when adding or removing pmaps from this list. 315 * we lock it when adding or removing pmaps from this list.
316 */ 316 */
317 317
318const vaddr_t ptp_masks[] = PTP_MASK_INITIALIZER; 318const vaddr_t ptp_masks[] = PTP_MASK_INITIALIZER;
319const int ptp_shifts[] = PTP_SHIFT_INITIALIZER; 319const int ptp_shifts[] = PTP_SHIFT_INITIALIZER;
320const long nkptpmax[] = NKPTPMAX_INITIALIZER; 320const long nkptpmax[] = NKPTPMAX_INITIALIZER;
321const long nbpd[] = NBPD_INITIALIZER; 321const long nbpd[] = NBPD_INITIALIZER;
322pd_entry_t * const normal_pdes[] = PDES_INITIALIZER; 322pd_entry_t * const normal_pdes[] = PDES_INITIALIZER;
323 323
324long nkptp[] = NKPTP_INITIALIZER; 324long nkptp[] = NKPTP_INITIALIZER;
325 325
326struct pmap_head pmaps; 326struct pmap_head pmaps;
327kmutex_t pmaps_lock; 327kmutex_t pmaps_lock;
328 328
329static vaddr_t pmap_maxkvaddr; 329static vaddr_t pmap_maxkvaddr;
330 330
331/* 331/*
332 * XXX kludge: dummy locking to make KASSERTs in uvm_page.c comfortable. 332 * XXX kludge: dummy locking to make KASSERTs in uvm_page.c comfortable.
333 * actual locking is done by pm_lock. 333 * actual locking is done by pm_lock.
334 */ 334 */
335#if defined(DIAGNOSTIC) 335#if defined(DIAGNOSTIC)
336#define PMAP_SUBOBJ_LOCK(pm, idx) \ 336#define PMAP_SUBOBJ_LOCK(pm, idx) \
337 KASSERT(mutex_owned((pm)->pm_lock)); \ 337 KASSERT(mutex_owned((pm)->pm_lock)); \
338 if ((idx) != 0) \ 338 if ((idx) != 0) \
339 mutex_enter((pm)->pm_obj[(idx)].vmobjlock) 339 mutex_enter((pm)->pm_obj[(idx)].vmobjlock)
340#define PMAP_SUBOBJ_UNLOCK(pm, idx) \ 340#define PMAP_SUBOBJ_UNLOCK(pm, idx) \
341 KASSERT(mutex_owned((pm)->pm_lock)); \ 341 KASSERT(mutex_owned((pm)->pm_lock)); \
342 if ((idx) != 0) \ 342 if ((idx) != 0) \
343 mutex_exit((pm)->pm_obj[(idx)].vmobjlock) 343 mutex_exit((pm)->pm_obj[(idx)].vmobjlock)
344#else /* defined(DIAGNOSTIC) */ 344#else /* defined(DIAGNOSTIC) */
345#define PMAP_SUBOBJ_LOCK(pm, idx) /* nothing */ 345#define PMAP_SUBOBJ_LOCK(pm, idx) /* nothing */
346#define PMAP_SUBOBJ_UNLOCK(pm, idx) /* nothing */ 346#define PMAP_SUBOBJ_UNLOCK(pm, idx) /* nothing */
347#endif /* defined(DIAGNOSTIC) */ 347#endif /* defined(DIAGNOSTIC) */
348 348
349/* 349/*
350 * Misc. event counters. 350 * Misc. event counters.
351 */ 351 */
352struct evcnt pmap_iobmp_evcnt; 352struct evcnt pmap_iobmp_evcnt;
353struct evcnt pmap_ldt_evcnt; 353struct evcnt pmap_ldt_evcnt;
354 354
355/* 355/*
356 * PAT 356 * PAT
357 */ 357 */
358#define PATENTRY(n, type) (type << ((n) * 8)) 358#define PATENTRY(n, type) (type << ((n) * 8))
359#define PAT_UC 0x0ULL 359#define PAT_UC 0x0ULL
360#define PAT_WC 0x1ULL 360#define PAT_WC 0x1ULL
361#define PAT_WT 0x4ULL 361#define PAT_WT 0x4ULL
362#define PAT_WP 0x5ULL 362#define PAT_WP 0x5ULL
363#define PAT_WB 0x6ULL 363#define PAT_WB 0x6ULL
364#define PAT_UCMINUS 0x7ULL 364#define PAT_UCMINUS 0x7ULL
365 365
366static bool cpu_pat_enabled __read_mostly = false; 366static bool cpu_pat_enabled __read_mostly = false;
367 367
368/* 368/*
369 * global data structures 369 * global data structures
370 */ 370 */
371 371
372static struct pmap kernel_pmap_store; /* the kernel's pmap (proc0) */ 372static struct pmap kernel_pmap_store; /* the kernel's pmap (proc0) */
373struct pmap *const kernel_pmap_ptr = &kernel_pmap_store; 373struct pmap *const kernel_pmap_ptr = &kernel_pmap_store;
374 374
375/* 375/*
376 * pmap_pg_g: if our processor supports PG_G in the PTE then we 376 * pmap_pg_g: if our processor supports PG_G in the PTE then we
377 * set pmap_pg_g to PG_G (otherwise it is zero). 377 * set pmap_pg_g to PG_G (otherwise it is zero).
378 */ 378 */
379 379
380int pmap_pg_g __read_mostly = 0; 380int pmap_pg_g __read_mostly = 0;
381 381
382/* 382/*
383 * pmap_largepages: if our processor supports PG_PS and we are 383 * pmap_largepages: if our processor supports PG_PS and we are
384 * using it, this is set to true. 384 * using it, this is set to true.
385 */ 385 */
386 386
387int pmap_largepages __read_mostly; 387int pmap_largepages __read_mostly;
388 388
389/* 389/*
390 * i386 physical memory comes in a big contig chunk with a small 390 * i386 physical memory comes in a big contig chunk with a small
391 * hole toward the front of it... the following two paddr_t's 391 * hole toward the front of it... the following two paddr_t's
392 * (shared with machdep.c) describe the physical address space 392 * (shared with machdep.c) describe the physical address space
393 * of this machine. 393 * of this machine.
394 */ 394 */
395paddr_t avail_start __read_mostly; /* PA of first available physical page */ 395paddr_t avail_start __read_mostly; /* PA of first available physical page */
396paddr_t avail_end __read_mostly; /* PA of last available physical page */ 396paddr_t avail_end __read_mostly; /* PA of last available physical page */
397 397
398#ifdef XEN 398#ifdef XEN
399#ifdef __x86_64__ 399#ifdef __x86_64__
400/* Dummy PGD for user cr3, used between pmap_deactivate() and pmap_activate() */ 400/* Dummy PGD for user cr3, used between pmap_deactivate() and pmap_activate() */
401static paddr_t xen_dummy_user_pgd; 401static paddr_t xen_dummy_user_pgd;
402#endif /* __x86_64__ */ 402#endif /* __x86_64__ */
403paddr_t pmap_pa_start; /* PA of first physical page for this domain */ 403paddr_t pmap_pa_start; /* PA of first physical page for this domain */
404paddr_t pmap_pa_end; /* PA of last physical page for this domain */ 404paddr_t pmap_pa_end; /* PA of last physical page for this domain */
405#endif /* XEN */ 405#endif /* XEN */
406 406
407#define VM_PAGE_TO_PP(pg) (&(pg)->mdpage.mp_pp) 407#define VM_PAGE_TO_PP(pg) (&(pg)->mdpage.mp_pp)
408 408
409#define PV_HASH_SIZE 32768 409#define PV_HASH_SIZE 32768
410#define PV_HASH_LOCK_CNT 32 410#define PV_HASH_LOCK_CNT 32
411 411
412struct pv_hash_lock { 412struct pv_hash_lock {
413 kmutex_t lock; 413 kmutex_t lock;
414} __aligned(CACHE_LINE_SIZE) pv_hash_locks[PV_HASH_LOCK_CNT] 414} __aligned(CACHE_LINE_SIZE) pv_hash_locks[PV_HASH_LOCK_CNT]
415 __aligned(CACHE_LINE_SIZE); 415 __aligned(CACHE_LINE_SIZE);
416 416
417struct pv_hash_head { 417struct pv_hash_head {
418 SLIST_HEAD(, pv_entry) hh_list; 418 SLIST_HEAD(, pv_entry) hh_list;
419} pv_hash_heads[PV_HASH_SIZE]; 419} pv_hash_heads[PV_HASH_SIZE];
420 420
421static u_int 421static u_int
422pvhash_hash(struct vm_page *ptp, vaddr_t va) 422pvhash_hash(struct vm_page *ptp, vaddr_t va)
423{ 423{
424 424
425 return (uintptr_t)ptp / sizeof(*ptp) + (va >> PAGE_SHIFT); 425 return (uintptr_t)ptp / sizeof(*ptp) + (va >> PAGE_SHIFT);
426} 426}
427 427
428static struct pv_hash_head * 428static struct pv_hash_head *
429pvhash_head(u_int hash) 429pvhash_head(u_int hash)
430{ 430{
431 431
432 return &pv_hash_heads[hash % PV_HASH_SIZE]; 432 return &pv_hash_heads[hash % PV_HASH_SIZE];
433} 433}
434 434
435static kmutex_t * 435static kmutex_t *
436pvhash_lock(u_int hash) 436pvhash_lock(u_int hash)
437{ 437{
438 438
439 return &pv_hash_locks[hash % PV_HASH_LOCK_CNT].lock; 439 return &pv_hash_locks[hash % PV_HASH_LOCK_CNT].lock;
440} 440}
441 441
442static struct pv_entry * 442static struct pv_entry *
443pvhash_remove(struct pv_hash_head *hh, struct vm_page *ptp, vaddr_t va) 443pvhash_remove(struct pv_hash_head *hh, struct vm_page *ptp, vaddr_t va)
444{ 444{
445 struct pv_entry *pve; 445 struct pv_entry *pve;
446 struct pv_entry *prev; 446 struct pv_entry *prev;
447 447
448 prev = NULL; 448 prev = NULL;
449 SLIST_FOREACH(pve, &hh->hh_list, pve_hash) { 449 SLIST_FOREACH(pve, &hh->hh_list, pve_hash) {
450 if (pve->pve_pte.pte_ptp == ptp && 450 if (pve->pve_pte.pte_ptp == ptp &&
451 pve->pve_pte.pte_va == va) { 451 pve->pve_pte.pte_va == va) {
452 if (prev != NULL) { 452 if (prev != NULL) {
453 SLIST_REMOVE_AFTER(prev, pve_hash); 453 SLIST_REMOVE_AFTER(prev, pve_hash);
454 } else { 454 } else {
455 SLIST_REMOVE_HEAD(&hh->hh_list, pve_hash); 455 SLIST_REMOVE_HEAD(&hh->hh_list, pve_hash);
456 } 456 }
457 break; 457 break;
458 } 458 }
459 prev = pve; 459 prev = pve;
460 } 460 }
461 return pve; 461 return pve;
462} 462}
463 463
464/* 464/*
465 * other data structures 465 * other data structures
466 */ 466 */
467 467
468static pt_entry_t protection_codes[8] __read_mostly; /* maps MI prot to i386 468static pt_entry_t protection_codes[8] __read_mostly; /* maps MI prot to i386
469 prot code */ 469 prot code */
470static bool pmap_initialized __read_mostly = false; /* pmap_init done yet? */ 470static bool pmap_initialized __read_mostly = false; /* pmap_init done yet? */
471 471
472/* 472/*
473 * the following two vaddr_t's are used during system startup 473 * the following two vaddr_t's are used during system startup
474 * to keep track of how much of the kernel's VM space we have used. 474 * to keep track of how much of the kernel's VM space we have used.
475 * once the system is started, the management of the remaining kernel 475 * once the system is started, the management of the remaining kernel
476 * VM space is turned over to the kernel_map vm_map. 476 * VM space is turned over to the kernel_map vm_map.
477 */ 477 */
478 478
479static vaddr_t virtual_avail __read_mostly; /* VA of first free KVA */ 479static vaddr_t virtual_avail __read_mostly; /* VA of first free KVA */
480static vaddr_t virtual_end __read_mostly; /* VA of last free KVA */ 480static vaddr_t virtual_end __read_mostly; /* VA of last free KVA */
481 481
482/* 482/*
483 * pool that pmap structures are allocated from 483 * pool that pmap structures are allocated from
484 */ 484 */
485 485
486static struct pool_cache pmap_cache; 486static struct pool_cache pmap_cache;
487 487
488/* 488/*
489 * pv_entry cache 489 * pv_entry cache
490 */ 490 */
491 491
492static struct pool_cache pmap_pv_cache; 492static struct pool_cache pmap_pv_cache;
493 493
494#ifdef __HAVE_DIRECT_MAP 494#ifdef __HAVE_DIRECT_MAP
495 495
496extern phys_ram_seg_t mem_clusters[]; 496extern phys_ram_seg_t mem_clusters[];
497extern int mem_cluster_cnt; 497extern int mem_cluster_cnt;
498 498
499#else 499#else
500 500
501/* 501/*
502 * MULTIPROCESSOR: special VA's/ PTE's are actually allocated inside a 502 * MULTIPROCESSOR: special VA's/ PTE's are actually allocated inside a
503 * maxcpus*NPTECL array of PTE's, to avoid cache line thrashing 503 * maxcpus*NPTECL array of PTE's, to avoid cache line thrashing
504 * due to false sharing. 504 * due to false sharing.
505 */ 505 */
506 506
507#ifdef MULTIPROCESSOR 507#ifdef MULTIPROCESSOR
508#define PTESLEW(pte, id) ((pte)+(id)*NPTECL) 508#define PTESLEW(pte, id) ((pte)+(id)*NPTECL)
509#define VASLEW(va,id) ((va)+(id)*NPTECL*PAGE_SIZE) 509#define VASLEW(va,id) ((va)+(id)*NPTECL*PAGE_SIZE)
510#else 510#else
511#define PTESLEW(pte, id) ((void)id, pte) 511#define PTESLEW(pte, id) ((void)id, pte)
512#define VASLEW(va,id) ((void)id, va) 512#define VASLEW(va,id) ((void)id, va)
513#endif 513#endif
514 514
515/* 515/*
516 * special VAs and the PTEs that map them 516 * special VAs and the PTEs that map them
517 */ 517 */
518static pt_entry_t *csrc_pte, *cdst_pte, *zero_pte, *ptp_pte, *early_zero_pte; 518static pt_entry_t *csrc_pte, *cdst_pte, *zero_pte, *ptp_pte, *early_zero_pte;
519static char *csrcp, *cdstp, *zerop, *ptpp; 519static char *csrcp, *cdstp, *zerop, *ptpp;
520#ifdef XEN 520#ifdef XEN
521char *early_zerop; /* also referenced from xen_pmap_bootstrap() */ 521char *early_zerop; /* also referenced from xen_pmap_bootstrap() */
522#else 522#else
523static char *early_zerop; 523static char *early_zerop;
524#endif 524#endif
525 525
526#endif 526#endif
527 527
528int pmap_enter_default(pmap_t, vaddr_t, paddr_t, vm_prot_t, u_int); 528int pmap_enter_default(pmap_t, vaddr_t, paddr_t, vm_prot_t, u_int);
529 529
530/* PDP pool_cache(9) and its callbacks */ 530/* PDP pool_cache(9) and its callbacks */
531struct pool_cache pmap_pdp_cache; 531struct pool_cache pmap_pdp_cache;
532static int pmap_pdp_ctor(void *, void *, int); 532static int pmap_pdp_ctor(void *, void *, int);
533static void pmap_pdp_dtor(void *, void *); 533static void pmap_pdp_dtor(void *, void *);
534#ifdef PAE 534#ifdef PAE
535/* need to allocate items of 4 pages */ 535/* need to allocate items of 4 pages */
536static void *pmap_pdp_alloc(struct pool *, int); 536static void *pmap_pdp_alloc(struct pool *, int);
537static void pmap_pdp_free(struct pool *, void *); 537static void pmap_pdp_free(struct pool *, void *);
538static struct pool_allocator pmap_pdp_allocator = { 538static struct pool_allocator pmap_pdp_allocator = {
539 .pa_alloc = pmap_pdp_alloc, 539 .pa_alloc = pmap_pdp_alloc,
540 .pa_free = pmap_pdp_free, 540 .pa_free = pmap_pdp_free,
541 .pa_pagesz = PAGE_SIZE * PDP_SIZE, 541 .pa_pagesz = PAGE_SIZE * PDP_SIZE,
542}; 542};
543#endif /* PAE */ 543#endif /* PAE */
544 544
545extern vaddr_t idt_vaddr; /* we allocate IDT early */ 545extern vaddr_t idt_vaddr; /* we allocate IDT early */
546extern paddr_t idt_paddr; 546extern paddr_t idt_paddr;
547 547
548#ifdef _LP64 548#ifdef _LP64
549extern vaddr_t lo32_vaddr; 549extern vaddr_t lo32_vaddr;
550extern vaddr_t lo32_paddr; 550extern vaddr_t lo32_paddr;
551#endif 551#endif
552 552
553extern int end; 553extern int end;
554 554
555#ifdef i386 555#ifdef i386
556/* stuff to fix the pentium f00f bug */ 556/* stuff to fix the pentium f00f bug */
557extern vaddr_t pentium_idt_vaddr; 557extern vaddr_t pentium_idt_vaddr;
558#endif 558#endif
559 559
560 560
561/* 561/*
562 * local prototypes 562 * local prototypes
563 */ 563 */
564 564
565static struct vm_page *pmap_get_ptp(struct pmap *, vaddr_t, 565static struct vm_page *pmap_get_ptp(struct pmap *, vaddr_t,
566 pd_entry_t * const *); 566 pd_entry_t * const *);
567static struct vm_page *pmap_find_ptp(struct pmap *, vaddr_t, paddr_t, int); 567static struct vm_page *pmap_find_ptp(struct pmap *, vaddr_t, paddr_t, int);
568static void pmap_freepage(struct pmap *, struct vm_page *, int); 568static void pmap_freepage(struct pmap *, struct vm_page *, int);
569static void pmap_free_ptp(struct pmap *, struct vm_page *, 569static void pmap_free_ptp(struct pmap *, struct vm_page *,
570 vaddr_t, pt_entry_t *, 570 vaddr_t, pt_entry_t *,
571 pd_entry_t * const *); 571 pd_entry_t * const *);
572static bool pmap_remove_pte(struct pmap *, struct vm_page *, 572static bool pmap_remove_pte(struct pmap *, struct vm_page *,
573 pt_entry_t *, vaddr_t, 573 pt_entry_t *, vaddr_t,
574 struct pv_entry **); 574 struct pv_entry **);
575static void pmap_remove_ptes(struct pmap *, struct vm_page *, 575static void pmap_remove_ptes(struct pmap *, struct vm_page *,
576 vaddr_t, vaddr_t, vaddr_t, 576 vaddr_t, vaddr_t, vaddr_t,
577 struct pv_entry **); 577 struct pv_entry **);
578 578
579static bool pmap_get_physpage(vaddr_t, int, paddr_t *); 579static bool pmap_get_physpage(vaddr_t, int, paddr_t *);
580static void pmap_alloc_level(pd_entry_t * const *, vaddr_t, int, 580static void pmap_alloc_level(pd_entry_t * const *, vaddr_t, int,
581 long *); 581 long *);
582 582
583static bool pmap_reactivate(struct pmap *); 583static bool pmap_reactivate(struct pmap *);
584 584
585/* 585/*
586 * p m a p h e l p e r f u n c t i o n s 586 * p m a p h e l p e r f u n c t i o n s
587 */ 587 */
588 588
589static inline void 589static inline void
590pmap_stats_update(struct pmap *pmap, int resid_diff, int wired_diff) 590pmap_stats_update(struct pmap *pmap, int resid_diff, int wired_diff)
591{ 591{
592 592
593 if (pmap == pmap_kernel()) { 593 if (pmap == pmap_kernel()) {
594 atomic_add_long(&pmap->pm_stats.resident_count, resid_diff); 594 atomic_add_long(&pmap->pm_stats.resident_count, resid_diff);
595 atomic_add_long(&pmap->pm_stats.wired_count, wired_diff); 595 atomic_add_long(&pmap->pm_stats.wired_count, wired_diff);
596 } else { 596 } else {
597 KASSERT(mutex_owned(pmap->pm_lock)); 597 KASSERT(mutex_owned(pmap->pm_lock));
598 pmap->pm_stats.resident_count += resid_diff; 598 pmap->pm_stats.resident_count += resid_diff;
599 pmap->pm_stats.wired_count += wired_diff; 599 pmap->pm_stats.wired_count += wired_diff;
600 } 600 }
601} 601}
602 602
603static inline void 603static inline void
604pmap_stats_update_bypte(struct pmap *pmap, pt_entry_t npte, pt_entry_t opte) 604pmap_stats_update_bypte(struct pmap *pmap, pt_entry_t npte, pt_entry_t opte)
605{ 605{
606 int resid_diff = ((npte & PG_V) ? 1 : 0) - ((opte & PG_V) ? 1 : 0); 606 int resid_diff = ((npte & PG_V) ? 1 : 0) - ((opte & PG_V) ? 1 : 0);
607 int wired_diff = ((npte & PG_W) ? 1 : 0) - ((opte & PG_W) ? 1 : 0); 607 int wired_diff = ((npte & PG_W) ? 1 : 0) - ((opte & PG_W) ? 1 : 0);
608 608
609 KASSERT((npte & (PG_V | PG_W)) != PG_W); 609 KASSERT((npte & (PG_V | PG_W)) != PG_W);
610 KASSERT((opte & (PG_V | PG_W)) != PG_W); 610 KASSERT((opte & (PG_V | PG_W)) != PG_W);
611 611
612 pmap_stats_update(pmap, resid_diff, wired_diff); 612 pmap_stats_update(pmap, resid_diff, wired_diff);
613} 613}
614 614
615/* 615/*
616 * ptp_to_pmap: lookup pmap by ptp 616 * ptp_to_pmap: lookup pmap by ptp
617 */ 617 */
618 618
619static struct pmap * 619static struct pmap *
620ptp_to_pmap(struct vm_page *ptp) 620ptp_to_pmap(struct vm_page *ptp)
621{ 621{
622 struct pmap *pmap; 622 struct pmap *pmap;
623 623
624 if (ptp == NULL) { 624 if (ptp == NULL) {
625 return pmap_kernel(); 625 return pmap_kernel();
626 } 626 }
627 pmap = (struct pmap *)ptp->uobject; 627 pmap = (struct pmap *)ptp->uobject;
628 KASSERT(pmap != NULL); 628 KASSERT(pmap != NULL);
629 KASSERT(&pmap->pm_obj[0] == ptp->uobject); 629 KASSERT(&pmap->pm_obj[0] == ptp->uobject);
630 return pmap; 630 return pmap;
631} 631}
632 632
633static inline struct pv_pte * 633static inline struct pv_pte *
634pve_to_pvpte(struct pv_entry *pve) 634pve_to_pvpte(struct pv_entry *pve)
635{ 635{
636 636
637 KASSERT((void *)&pve->pve_pte == (void *)pve); 637 KASSERT((void *)&pve->pve_pte == (void *)pve);
638 return &pve->pve_pte; 638 return &pve->pve_pte;
639} 639}
640 640
641static inline struct pv_entry * 641static inline struct pv_entry *
642pvpte_to_pve(struct pv_pte *pvpte) 642pvpte_to_pve(struct pv_pte *pvpte)
643{ 643{
644 struct pv_entry *pve = (void *)pvpte; 644 struct pv_entry *pve = (void *)pvpte;
645 645
646 KASSERT(pve_to_pvpte(pve) == pvpte); 646 KASSERT(pve_to_pvpte(pve) == pvpte);
647 return pve; 647 return pve;
648} 648}
649 649
650/* 650/*
651 * pv_pte_first, pv_pte_next: PV list iterator. 651 * pv_pte_first, pv_pte_next: PV list iterator.
652 */ 652 */
653 653
654static struct pv_pte * 654static struct pv_pte *
655pv_pte_first(struct pmap_page *pp) 655pv_pte_first(struct pmap_page *pp)
656{ 656{
657 657
658 if ((pp->pp_flags & PP_EMBEDDED) != 0) { 658 if ((pp->pp_flags & PP_EMBEDDED) != 0) {
659 return &pp->pp_pte; 659 return &pp->pp_pte;
660 } 660 }
661 return pve_to_pvpte(LIST_FIRST(&pp->pp_head.pvh_list)); 661 return pve_to_pvpte(LIST_FIRST(&pp->pp_head.pvh_list));
662} 662}
663 663
664static struct pv_pte * 664static struct pv_pte *
665pv_pte_next(struct pmap_page *pp, struct pv_pte *pvpte) 665pv_pte_next(struct pmap_page *pp, struct pv_pte *pvpte)
666{ 666{
667 667
668 KASSERT(pvpte != NULL); 668 KASSERT(pvpte != NULL);
669 if (pvpte == &pp->pp_pte) { 669 if (pvpte == &pp->pp_pte) {
670 KASSERT((pp->pp_flags & PP_EMBEDDED) != 0); 670 KASSERT((pp->pp_flags & PP_EMBEDDED) != 0);
671 return NULL; 671 return NULL;
672 } 672 }
673 KASSERT((pp->pp_flags & PP_EMBEDDED) == 0); 673 KASSERT((pp->pp_flags & PP_EMBEDDED) == 0);
674 return pve_to_pvpte(LIST_NEXT(pvpte_to_pve(pvpte), pve_list)); 674 return pve_to_pvpte(LIST_NEXT(pvpte_to_pve(pvpte), pve_list));
675} 675}
676 676
677/* 677/*
678 * pmap_is_curpmap: is this pmap the one currently loaded [in %cr3]? 678 * pmap_is_curpmap: is this pmap the one currently loaded [in %cr3]?
679 * of course the kernel is always loaded 679 * of course the kernel is always loaded
680 */ 680 */
681 681
682bool 682bool
683pmap_is_curpmap(struct pmap *pmap) 683pmap_is_curpmap(struct pmap *pmap)
684{ 684{
685 return((pmap == pmap_kernel()) || 685 return((pmap == pmap_kernel()) ||
686 (pmap == curcpu()->ci_pmap)); 686 (pmap == curcpu()->ci_pmap));
687} 687}
688 688
689/* 689/*
690 * Add a reference to the specified pmap. 690 * Add a reference to the specified pmap.
691 */ 691 */
692 692
693void 693void
694pmap_reference(struct pmap *pmap) 694pmap_reference(struct pmap *pmap)
695{ 695{
696 696
697 atomic_inc_uint(&pmap->pm_obj[0].uo_refs); 697 atomic_inc_uint(&pmap->pm_obj[0].uo_refs);
698} 698}
699 699
700/* 700/*
701 * pmap_map_ptes: map a pmap's PTEs into KVM and lock them in 701 * pmap_map_ptes: map a pmap's PTEs into KVM and lock them in
702 * 702 *
703 * there are several pmaps involved. some or all of them might be same. 703 * there are several pmaps involved. some or all of them might be same.
704 * 704 *
705 * - the pmap given by the first argument 705 * - the pmap given by the first argument
706 * our caller wants to access this pmap's PTEs. 706 * our caller wants to access this pmap's PTEs.
707 * 707 *
708 * - pmap_kernel() 708 * - pmap_kernel()
709 * the kernel pmap. note that it only contains the kernel part 709 * the kernel pmap. note that it only contains the kernel part
710 * of the address space which is shared by any pmap. ie. any 710 * of the address space which is shared by any pmap. ie. any
711 * pmap can be used instead of pmap_kernel() for our purpose. 711 * pmap can be used instead of pmap_kernel() for our purpose.
712 * 712 *
713 * - ci->ci_pmap 713 * - ci->ci_pmap
714 * pmap currently loaded on the cpu. 714 * pmap currently loaded on the cpu.
715 * 715 *
716 * - vm_map_pmap(&curproc->p_vmspace->vm_map) 716 * - vm_map_pmap(&curproc->p_vmspace->vm_map)
717 * current process' pmap. 717 * current process' pmap.
718 * 718 *
719 * => we lock enough pmaps to keep things locked in 719 * => we lock enough pmaps to keep things locked in
720 * => must be undone with pmap_unmap_ptes before returning 720 * => must be undone with pmap_unmap_ptes before returning
721 */ 721 */
722 722
723void 723void
724pmap_map_ptes(struct pmap *pmap, struct pmap **pmap2, 724pmap_map_ptes(struct pmap *pmap, struct pmap **pmap2,
725 pd_entry_t **ptepp, pd_entry_t * const **pdeppp) 725 pd_entry_t **ptepp, pd_entry_t * const **pdeppp)
726{ 726{
727 struct pmap *curpmap; 727 struct pmap *curpmap;
728 struct cpu_info *ci; 728 struct cpu_info *ci;
729 lwp_t *l; 729 lwp_t *l;
730 730
731 /* The kernel's pmap is always accessible. */ 731 /* The kernel's pmap is always accessible. */
732 if (pmap == pmap_kernel()) { 732 if (pmap == pmap_kernel()) {
733 *pmap2 = NULL; 733 *pmap2 = NULL;
734 *ptepp = PTE_BASE; 734 *ptepp = PTE_BASE;
735 *pdeppp = normal_pdes; 735 *pdeppp = normal_pdes;
736 return; 736 return;
737 } 737 }
738 KASSERT(kpreempt_disabled()); 738 KASSERT(kpreempt_disabled());
739 739
740 l = curlwp; 740 l = curlwp;
741 retry: 741 retry:
742 mutex_enter(pmap->pm_lock); 742 mutex_enter(pmap->pm_lock);
743 ci = curcpu(); 743 ci = curcpu();
744 curpmap = ci->ci_pmap; 744 curpmap = ci->ci_pmap;
745 if (vm_map_pmap(&l->l_proc->p_vmspace->vm_map) == pmap) { 745 if (vm_map_pmap(&l->l_proc->p_vmspace->vm_map) == pmap) {
746 /* Our own pmap so just load it: easy. */ 746 /* Our own pmap so just load it: easy. */
747 if (__predict_false(ci->ci_want_pmapload)) { 747 if (__predict_false(ci->ci_want_pmapload)) {
748 mutex_exit(pmap->pm_lock); 748 mutex_exit(pmap->pm_lock);
749 pmap_load(); 749 pmap_load();
750 goto retry; 750 goto retry;
751 } 751 }
752 KASSERT(pmap == curpmap); 752 KASSERT(pmap == curpmap);
753 } else if (pmap == curpmap) { 753 } else if (pmap == curpmap) {
754 /* 754 /*
755 * Already on the CPU: make it valid. This is very 755 * Already on the CPU: make it valid. This is very
756 * often the case during exit(), when we have switched 756 * often the case during exit(), when we have switched
757 * to the kernel pmap in order to destroy a user pmap. 757 * to the kernel pmap in order to destroy a user pmap.
758 */ 758 */
759 if (!pmap_reactivate(pmap)) { 759 if (!pmap_reactivate(pmap)) {
760 u_int gen = uvm_emap_gen_return(); 760 u_int gen = uvm_emap_gen_return();
761 tlbflush(); 761 tlbflush();
762 uvm_emap_update(gen); 762 uvm_emap_update(gen);
763 } 763 }
764 } else { 764 } else {
765 /* 765 /*
766 * Toss current pmap from CPU, but keep a reference to it. 766 * Toss current pmap from CPU, but keep a reference to it.
767 * The reference will be dropped by pmap_unmap_ptes(). 767 * The reference will be dropped by pmap_unmap_ptes().
768 * Can happen if we block during exit(). 768 * Can happen if we block during exit().
769 */ 769 */
770 const cpuid_t cid = cpu_index(ci); 770 const cpuid_t cid = cpu_index(ci);
771 771
772 kcpuset_atomic_clear(curpmap->pm_cpus, cid); 772 kcpuset_atomic_clear(curpmap->pm_cpus, cid);
773 kcpuset_atomic_clear(curpmap->pm_kernel_cpus, cid); 773 kcpuset_atomic_clear(curpmap->pm_kernel_cpus, cid);
774 ci->ci_pmap = pmap; 774 ci->ci_pmap = pmap;
775 ci->ci_tlbstate = TLBSTATE_VALID; 775 ci->ci_tlbstate = TLBSTATE_VALID;
776 kcpuset_atomic_set(pmap->pm_cpus, cid); 776 kcpuset_atomic_set(pmap->pm_cpus, cid);
777 kcpuset_atomic_set(pmap->pm_kernel_cpus, cid); 777 kcpuset_atomic_set(pmap->pm_kernel_cpus, cid);
778 cpu_load_pmap(pmap, curpmap); 778 cpu_load_pmap(pmap, curpmap);
779 } 779 }
780 pmap->pm_ncsw = l->l_ncsw; 780 pmap->pm_ncsw = l->l_ncsw;
781 *pmap2 = curpmap; 781 *pmap2 = curpmap;
782 *ptepp = PTE_BASE; 782 *ptepp = PTE_BASE;
783#if defined(XEN) && defined(__x86_64__) 783#if defined(XEN) && defined(__x86_64__)
784 KASSERT(ci->ci_normal_pdes[PTP_LEVELS - 2] == L4_BASE); 784 KASSERT(ci->ci_normal_pdes[PTP_LEVELS - 2] == L4_BASE);
785 ci->ci_normal_pdes[PTP_LEVELS - 2] = pmap->pm_pdir; 785 ci->ci_normal_pdes[PTP_LEVELS - 2] = pmap->pm_pdir;
786 *pdeppp = ci->ci_normal_pdes; 786 *pdeppp = ci->ci_normal_pdes;
787#else /* XEN && __x86_64__ */ 787#else /* XEN && __x86_64__ */
788 *pdeppp = normal_pdes; 788 *pdeppp = normal_pdes;
789#endif /* XEN && __x86_64__ */ 789#endif /* XEN && __x86_64__ */
790} 790}
791 791
792/* 792/*
793 * pmap_unmap_ptes: unlock the PTE mapping of "pmap" 793 * pmap_unmap_ptes: unlock the PTE mapping of "pmap"
794 */ 794 */
795 795
796void 796void
797pmap_unmap_ptes(struct pmap *pmap, struct pmap *pmap2) 797pmap_unmap_ptes(struct pmap *pmap, struct pmap *pmap2)
798{ 798{
799 struct cpu_info *ci; 799 struct cpu_info *ci;
800 struct pmap *mypmap; 800 struct pmap *mypmap;
801 801
802 KASSERT(kpreempt_disabled()); 802 KASSERT(kpreempt_disabled());
803 803
804 /* The kernel's pmap is always accessible. */ 804 /* The kernel's pmap is always accessible. */
805 if (pmap == pmap_kernel()) { 805 if (pmap == pmap_kernel()) {
806 return; 806 return;
807 } 807 }
808 808
809 ci = curcpu(); 809 ci = curcpu();
810#if defined(XEN) && defined(__x86_64__) 810#if defined(XEN) && defined(__x86_64__)
811 /* Reset per-cpu normal_pdes */ 811 /* Reset per-cpu normal_pdes */
812 KASSERT(ci->ci_normal_pdes[PTP_LEVELS - 2] != L4_BASE); 812 KASSERT(ci->ci_normal_pdes[PTP_LEVELS - 2] != L4_BASE);
813 ci->ci_normal_pdes[PTP_LEVELS - 2] = L4_BASE; 813 ci->ci_normal_pdes[PTP_LEVELS - 2] = L4_BASE;
814#endif /* XEN && __x86_64__ */ 814#endif /* XEN && __x86_64__ */
815 /* 815 /*
816 * We cannot tolerate context switches while mapped in. 816 * We cannot tolerate context switches while mapped in.
817 * If it is our own pmap all we have to do is unlock. 817 * If it is our own pmap all we have to do is unlock.
818 */ 818 */
819 KASSERT(pmap->pm_ncsw == curlwp->l_ncsw); 819 KASSERT(pmap->pm_ncsw == curlwp->l_ncsw);
820 mypmap = vm_map_pmap(&curproc->p_vmspace->vm_map); 820 mypmap = vm_map_pmap(&curproc->p_vmspace->vm_map);
821 if (pmap == mypmap) { 821 if (pmap == mypmap) {
822 mutex_exit(pmap->pm_lock); 822 mutex_exit(pmap->pm_lock);
823 return; 823 return;
824 } 824 }
825 825
826 /* 826 /*
827 * Mark whatever's on the CPU now as lazy and unlock. 827 * Mark whatever's on the CPU now as lazy and unlock.
828 * If the pmap was already installed, we are done. 828 * If the pmap was already installed, we are done.
829 */ 829 */
830 ci->ci_tlbstate = TLBSTATE_LAZY; 830 ci->ci_tlbstate = TLBSTATE_LAZY;
831 ci->ci_want_pmapload = (mypmap != pmap_kernel()); 831 ci->ci_want_pmapload = (mypmap != pmap_kernel());
832 mutex_exit(pmap->pm_lock); 832 mutex_exit(pmap->pm_lock);
833 if (pmap == pmap2) { 833 if (pmap == pmap2) {
834 return; 834 return;
835 } 835 }
836 836
837 /* 837 /*
838 * We installed another pmap on the CPU. Grab a reference to 838 * We installed another pmap on the CPU. Grab a reference to
839 * it and leave in place. Toss the evicted pmap (can block). 839 * it and leave in place. Toss the evicted pmap (can block).
840 */ 840 */
841 pmap_reference(pmap); 841 pmap_reference(pmap);
842 pmap_destroy(pmap2); 842 pmap_destroy(pmap2);
843} 843}
844 844
845 845
846inline static void 846inline static void
847pmap_exec_account(struct pmap *pm, vaddr_t va, pt_entry_t opte, pt_entry_t npte) 847pmap_exec_account(struct pmap *pm, vaddr_t va, pt_entry_t opte, pt_entry_t npte)
848{ 848{
849 849
850#if !defined(__x86_64__) 850#if !defined(__x86_64__)
851 if (curproc == NULL || curproc->p_vmspace == NULL || 851 if (curproc == NULL || curproc->p_vmspace == NULL ||
852 pm != vm_map_pmap(&curproc->p_vmspace->vm_map)) 852 pm != vm_map_pmap(&curproc->p_vmspace->vm_map))
853 return; 853 return;
854 854
855 if ((opte ^ npte) & PG_X) 855 if ((opte ^ npte) & PG_X)
856 pmap_update_pg(va); 856 pmap_update_pg(va);
857 857
858 /* 858 /*
859 * Executability was removed on the last executable change. 859 * Executability was removed on the last executable change.
860 * Reset the code segment to something conservative and 860 * Reset the code segment to something conservative and
861 * let the trap handler deal with setting the right limit. 861 * let the trap handler deal with setting the right limit.
862 * We can't do that because of locking constraints on the vm map. 862 * We can't do that because of locking constraints on the vm map.
863 */ 863 */
864 864
865 if ((opte & PG_X) && (npte & PG_X) == 0 && va == pm->pm_hiexec) { 865 if ((opte & PG_X) && (npte & PG_X) == 0 && va == pm->pm_hiexec) {
866 struct trapframe *tf = curlwp->l_md.md_regs; 866 struct trapframe *tf = curlwp->l_md.md_regs;
867 867
868 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL); 868 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
869 pm->pm_hiexec = I386_MAX_EXE_ADDR; 869 pm->pm_hiexec = I386_MAX_EXE_ADDR;
870 } 870 }
871#endif /* !defined(__x86_64__) */ 871#endif /* !defined(__x86_64__) */
872} 872}
873 873
874#if !defined(__x86_64__) 874#if !defined(__x86_64__)
875/* 875/*
876 * Fixup the code segment to cover all potential executable mappings. 876 * Fixup the code segment to cover all potential executable mappings.
877 * returns 0 if no changes to the code segment were made. 877 * returns 0 if no changes to the code segment were made.
878 */ 878 */
879 879
880int 880int
881pmap_exec_fixup(struct vm_map *map, struct trapframe *tf, struct pcb *pcb) 881pmap_exec_fixup(struct vm_map *map, struct trapframe *tf, struct pcb *pcb)
882{ 882{
883 struct vm_map_entry *ent; 883 struct vm_map_entry *ent;
884 struct pmap *pm = vm_map_pmap(map); 884 struct pmap *pm = vm_map_pmap(map);
885 vaddr_t va = 0; 885 vaddr_t va = 0;
886 886
887 vm_map_lock_read(map); 887 vm_map_lock_read(map);
888 for (ent = (&map->header)->next; ent != &map->header; ent = ent->next) { 888 for (ent = (&map->header)->next; ent != &map->header; ent = ent->next) {
889 889
890 /* 890 /*
891 * This entry has greater va than the entries before. 891 * This entry has greater va than the entries before.
892 * We need to make it point to the last page, not past it. 892 * We need to make it point to the last page, not past it.
893 */ 893 */
894 894
895 if (ent->protection & VM_PROT_EXECUTE) 895 if (ent->protection & VM_PROT_EXECUTE)
896 va = trunc_page(ent->end) - PAGE_SIZE; 896 va = trunc_page(ent->end) - PAGE_SIZE;
897 } 897 }
898 vm_map_unlock_read(map); 898 vm_map_unlock_read(map);
899 if (va == pm->pm_hiexec && tf->tf_cs == GSEL(GUCODEBIG_SEL, SEL_UPL)) 899 if (va == pm->pm_hiexec && tf->tf_cs == GSEL(GUCODEBIG_SEL, SEL_UPL))
900 return (0); 900 return (0);
901 901
902 pm->pm_hiexec = va; 902 pm->pm_hiexec = va;
903 if (pm->pm_hiexec > I386_MAX_EXE_ADDR) { 903 if (pm->pm_hiexec > I386_MAX_EXE_ADDR) {
904 tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL); 904 tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL);
905 } else { 905 } else {
906 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL); 906 tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL);
907 return (0); 907 return (0);
908 } 908 }
909 return (1); 909 return (1);
910} 910}
911#endif /* !defined(__x86_64__) */ 911#endif /* !defined(__x86_64__) */
912 912
913void 913void
914pat_init(struct cpu_info *ci) 914pat_init(struct cpu_info *ci)
915{ 915{
916 uint64_t pat; 916 uint64_t pat;
917 917
918 if (!(ci->ci_feat_val[0] & CPUID_PAT)) 918 if (!(ci->ci_feat_val[0] & CPUID_PAT))
919 return; 919 return;
920 920
921 /* We change WT to WC. Leave all other entries the default values. */ 921 /* We change WT to WC. Leave all other entries the default values. */
922 pat = PATENTRY(0, PAT_WB) | PATENTRY(1, PAT_WC) | 922 pat = PATENTRY(0, PAT_WB) | PATENTRY(1, PAT_WC) |
923 PATENTRY(2, PAT_UCMINUS) | PATENTRY(3, PAT_UC) | 923 PATENTRY(2, PAT_UCMINUS) | PATENTRY(3, PAT_UC) |
924 PATENTRY(4, PAT_WB) | PATENTRY(5, PAT_WC) | 924 PATENTRY(4, PAT_WB) | PATENTRY(5, PAT_WC) |
925 PATENTRY(6, PAT_UCMINUS) | PATENTRY(7, PAT_UC); 925 PATENTRY(6, PAT_UCMINUS) | PATENTRY(7, PAT_UC);
926 926
927 wrmsr(MSR_CR_PAT, pat); 927 wrmsr(MSR_CR_PAT, pat);
928 cpu_pat_enabled = true; 928 cpu_pat_enabled = true;
929 aprint_debug_dev(ci->ci_dev, "PAT enabled\n"); 929 aprint_debug_dev(ci->ci_dev, "PAT enabled\n");
930} 930}
931 931
932static pt_entry_t 932static pt_entry_t
933pmap_pat_flags(u_int flags) 933pmap_pat_flags(u_int flags)
934{ 934{
935 u_int cacheflags = (flags & PMAP_CACHE_MASK); 935 u_int cacheflags = (flags & PMAP_CACHE_MASK);
936 936
937 if (!cpu_pat_enabled) { 937 if (!cpu_pat_enabled) {
938 switch (cacheflags) { 938 switch (cacheflags) {
939 case PMAP_NOCACHE: 939 case PMAP_NOCACHE:
940 case PMAP_NOCACHE_OVR: 940 case PMAP_NOCACHE_OVR:
941 /* results in PGC_UCMINUS on cpus which have 941 /* results in PGC_UCMINUS on cpus which have
942 * the cpuid PAT but PAT "disabled" 942 * the cpuid PAT but PAT "disabled"
943 */ 943 */
944 return PG_N; 944 return PG_N;
945 default: 945 default:
946 return 0; 946 return 0;
947 } 947 }
948 } 948 }
949 949
950 switch (cacheflags) { 950 switch (cacheflags) {
951 case PMAP_NOCACHE: 951 case PMAP_NOCACHE:
952 return PGC_UC; 952 return PGC_UC;
953 case PMAP_WRITE_COMBINE: 953 case PMAP_WRITE_COMBINE:
954 return PGC_WC; 954 return PGC_WC;
955 case PMAP_WRITE_BACK: 955 case PMAP_WRITE_BACK:
956 return PGC_WB; 956 return PGC_WB;
957 case PMAP_NOCACHE_OVR: 957 case PMAP_NOCACHE_OVR:
958 return PGC_UCMINUS; 958 return PGC_UCMINUS;
959 } 959 }
960 960
961 return 0; 961 return 0;
962} 962}
963 963
964/* 964/*
965 * p m a p k e n t e r f u n c t i o n s 965 * p m a p k e n t e r f u n c t i o n s
966 * 966 *
967 * functions to quickly enter/remove pages from the kernel address 967 * functions to quickly enter/remove pages from the kernel address
968 * space. pmap_kremove is exported to MI kernel. we make use of 968 * space. pmap_kremove is exported to MI kernel. we make use of
969 * the recursive PTE mappings. 969 * the recursive PTE mappings.
970 */ 970 */
971 971
972/* 972/*
973 * pmap_kenter_pa: enter a kernel mapping without R/M (pv_entry) tracking 973 * pmap_kenter_pa: enter a kernel mapping without R/M (pv_entry) tracking
974 * 974 *
975 * => no need to lock anything, assume va is already allocated 975 * => no need to lock anything, assume va is already allocated
976 * => should be faster than normal pmap enter function 976 * => should be faster than normal pmap enter function
977 */ 977 */
978 978
979void 979void
980pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags) 980pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags)
981{ 981{
982 pt_entry_t *pte, opte, npte; 982 pt_entry_t *pte, opte, npte;
983 983
984 KASSERT(!(prot & ~VM_PROT_ALL)); 984 KASSERT(!(prot & ~VM_PROT_ALL));
985 985
986 if (va < VM_MIN_KERNEL_ADDRESS) 986 if (va < VM_MIN_KERNEL_ADDRESS)
987 pte = vtopte(va); 987 pte = vtopte(va);
988 else 988 else
989 pte = kvtopte(va); 989 pte = kvtopte(va);
990#ifdef DOM0OPS 990#ifdef DOM0OPS
991 if (pa < pmap_pa_start || pa >= pmap_pa_end) { 991 if (pa < pmap_pa_start || pa >= pmap_pa_end) {
992#ifdef DEBUG 992#ifdef DEBUG
993 printf_nolog("%s: pa 0x%" PRIx64 " for va 0x%" PRIx64 993 printf_nolog("%s: pa 0x%" PRIx64 " for va 0x%" PRIx64
994 " outside range\n", __func__, (int64_t)pa, (int64_t)va); 994 " outside range\n", __func__, (int64_t)pa, (int64_t)va);
995#endif /* DEBUG */ 995#endif /* DEBUG */
996 npte = pa; 996 npte = pa;
997 } else 997 } else
998#endif /* DOM0OPS */ 998#endif /* DOM0OPS */
999 npte = pmap_pa2pte(pa); 999 npte = pmap_pa2pte(pa);
1000 npte |= protection_codes[prot] | PG_k | PG_V | pmap_pg_g; 1000 npte |= protection_codes[prot] | PG_k | PG_V | pmap_pg_g;
1001 npte |= pmap_pat_flags(flags); 1001 npte |= pmap_pat_flags(flags);
1002 opte = pmap_pte_testset(pte, npte); /* zap! */ 1002 opte = pmap_pte_testset(pte, npte); /* zap! */
1003#if defined(DIAGNOSTIC) 1003#if defined(DIAGNOSTIC)
1004 /* XXX For now... */ 1004 /* XXX For now... */
1005 if (opte & PG_PS) 1005 if (opte & PG_PS)
1006 panic("%s: PG_PS", __func__); 1006 panic("%s: PG_PS", __func__);
1007#endif 1007#endif
1008 if ((opte & (PG_V | PG_U)) == (PG_V | PG_U)) { 1008 if ((opte & (PG_V | PG_U)) == (PG_V | PG_U)) {
1009 /* This should not happen. */ 1009 /* This should not happen. */
1010 printf_nolog("%s: mapping already present\n", __func__); 1010 printf_nolog("%s: mapping already present\n", __func__);
1011 kpreempt_disable(); 1011 kpreempt_disable();
1012 pmap_tlb_shootdown(pmap_kernel(), va, opte, TLBSHOOT_KENTER); 1012 pmap_tlb_shootdown(pmap_kernel(), va, opte, TLBSHOOT_KENTER);
1013 kpreempt_enable(); 1013 kpreempt_enable();
1014 } 1014 }
1015} 1015}
1016 1016
1017void 1017void
1018pmap_emap_enter(vaddr_t va, paddr_t pa, vm_prot_t prot) 1018pmap_emap_enter(vaddr_t va, paddr_t pa, vm_prot_t prot)
1019{ 1019{
1020 pt_entry_t *pte, npte; 1020 pt_entry_t *pte, npte;
1021 1021
1022 KASSERT((prot & ~VM_PROT_ALL) == 0); 1022 KASSERT((prot & ~VM_PROT_ALL) == 0);
1023 pte = (va < VM_MIN_KERNEL_ADDRESS) ? vtopte(va) : kvtopte(va); 1023 pte = (va < VM_MIN_KERNEL_ADDRESS) ? vtopte(va) : kvtopte(va);
1024 1024
1025#ifdef DOM0OPS 1025#ifdef DOM0OPS
1026 if (pa < pmap_pa_start || pa >= pmap_pa_end) { 1026 if (pa < pmap_pa_start || pa >= pmap_pa_end) {
1027 npte = pa; 1027 npte = pa;
1028 } else 1028 } else
1029#endif 1029#endif
1030 npte = pmap_pa2pte(pa); 1030 npte = pmap_pa2pte(pa);
1031 1031
1032 npte = pmap_pa2pte(pa); 1032 npte = pmap_pa2pte(pa);
1033 npte |= protection_codes[prot] | PG_k | PG_V; 1033 npte |= protection_codes[prot] | PG_k | PG_V;
1034 pmap_pte_set(pte, npte); 1034 pmap_pte_set(pte, npte);
1035} 1035}
1036 1036
1037/* 1037/*
1038 * pmap_emap_sync: perform TLB flush or pmap load, if it was deferred. 1038 * pmap_emap_sync: perform TLB flush or pmap load, if it was deferred.
1039 */ 1039 */
1040void 1040void
1041pmap_emap_sync(bool canload) 1041pmap_emap_sync(bool canload)
1042{ 1042{
1043 struct cpu_info *ci = curcpu(); 1043 struct cpu_info *ci = curcpu();
1044 struct pmap *pmap; 1044 struct pmap *pmap;
1045 1045
1046 KASSERT(kpreempt_disabled()); 1046 KASSERT(kpreempt_disabled());
1047 if (__predict_true(ci->ci_want_pmapload && canload)) { 1047 if (__predict_true(ci->ci_want_pmapload && canload)) {
1048 /* 1048 /*
1049 * XXX: Hint for pmap_reactivate(), which might suggest to 1049 * XXX: Hint for pmap_reactivate(), which might suggest to
1050 * not perform TLB flush, if state has not changed. 1050 * not perform TLB flush, if state has not changed.
1051 */ 1051 */
1052 pmap = vm_map_pmap(&curlwp->l_proc->p_vmspace->vm_map); 1052 pmap = vm_map_pmap(&curlwp->l_proc->p_vmspace->vm_map);
1053 if (__predict_false(pmap == ci->ci_pmap)) { 1053 if (__predict_false(pmap == ci->ci_pmap)) {
1054 kcpuset_atomic_clear(pmap->pm_cpus, cpu_index(ci)); 1054 kcpuset_atomic_clear(pmap->pm_cpus, cpu_index(ci));
1055 } 1055 }
1056 pmap_load(); 1056 pmap_load();
1057 KASSERT(ci->ci_want_pmapload == 0); 1057 KASSERT(ci->ci_want_pmapload == 0);
1058 } else { 1058 } else {
1059 tlbflush(); 1059 tlbflush();
1060 } 1060 }
1061 1061
1062} 1062}
1063 1063
1064void 1064void
1065pmap_emap_remove(vaddr_t sva, vsize_t len) 1065pmap_emap_remove(vaddr_t sva, vsize_t len)
1066{ 1066{
1067 pt_entry_t *pte; 1067 pt_entry_t *pte;
1068 vaddr_t va, eva = sva + len; 1068 vaddr_t va, eva = sva + len;
1069 1069
1070 for (va = sva; va < eva; va += PAGE_SIZE) { 1070 for (va = sva; va < eva; va += PAGE_SIZE) {
1071 pte = (va < VM_MIN_KERNEL_ADDRESS) ? vtopte(va) : kvtopte(va); 1071 pte = (va < VM_MIN_KERNEL_ADDRESS) ? vtopte(va) : kvtopte(va);
1072 pmap_pte_set(pte, 0); 1072 pmap_pte_set(pte, 0);
1073 } 1073 }
1074} 1074}
1075 1075
1076__strict_weak_alias(pmap_kenter_ma, pmap_kenter_pa); 1076__strict_weak_alias(pmap_kenter_ma, pmap_kenter_pa);
1077 1077
1078#if defined(__x86_64__) 1078#if defined(__x86_64__)
1079/* 1079/*
1080 * Change protection for a virtual address. Local for a CPU only, don't 1080 * Change protection for a virtual address. Local for a CPU only, don't
1081 * care about TLB shootdowns. 1081 * care about TLB shootdowns.
1082 * 1082 *
1083 * => must be called with preemption disabled 1083 * => must be called with preemption disabled
1084 */ 1084 */
1085void 1085void
1086pmap_changeprot_local(vaddr_t va, vm_prot_t prot) 1086pmap_changeprot_local(vaddr_t va, vm_prot_t prot)
1087{ 1087{
1088 pt_entry_t *pte, opte, npte; 1088 pt_entry_t *pte, opte, npte;
1089 1089
1090 KASSERT(kpreempt_disabled()); 1090 KASSERT(kpreempt_disabled());
1091 1091
1092 if (va < VM_MIN_KERNEL_ADDRESS) 1092 if (va < VM_MIN_KERNEL_ADDRESS)
1093 pte = vtopte(va); 1093 pte = vtopte(va);
1094 else 1094 else
1095 pte = kvtopte(va); 1095 pte = kvtopte(va);
1096 1096
1097 npte = opte = *pte; 1097 npte = opte = *pte;
1098 1098
1099 if ((prot & VM_PROT_WRITE) != 0) 1099 if ((prot & VM_PROT_WRITE) != 0)
1100 npte |= PG_RW; 1100 npte |= PG_RW;
1101 else 1101 else
1102 npte &= ~PG_RW; 1102 npte &= ~PG_RW;
1103 1103
1104 if (opte != npte) { 1104 if (opte != npte) {
1105 pmap_pte_set(pte, npte); 1105 pmap_pte_set(pte, npte);
1106 pmap_pte_flush(); 1106 pmap_pte_flush();
1107 invlpg(va); 1107 invlpg(va);
1108 } 1108 }
1109} 1109}
1110#endif /* defined(__x86_64__) */ 1110#endif /* defined(__x86_64__) */
1111 1111
1112/* 1112/*
1113 * pmap_kremove: remove a kernel mapping(s) without R/M (pv_entry) tracking 1113 * pmap_kremove: remove a kernel mapping(s) without R/M (pv_entry) tracking
1114 * 1114 *
1115 * => no need to lock anything 1115 * => no need to lock anything
1116 * => caller must dispose of any vm_page mapped in the va range 1116 * => caller must dispose of any vm_page mapped in the va range
1117 * => note: not an inline function 1117 * => note: not an inline function
1118 * => we assume the va is page aligned and the len is a multiple of PAGE_SIZE 1118 * => we assume the va is page aligned and the len is a multiple of PAGE_SIZE
1119 * => we assume kernel only unmaps valid addresses and thus don't bother 1119 * => we assume kernel only unmaps valid addresses and thus don't bother
1120 * checking the valid bit before doing TLB flushing 1120 * checking the valid bit before doing TLB flushing
1121 * => must be followed by call to pmap_update() before reuse of page 1121 * => must be followed by call to pmap_update() before reuse of page
1122 */ 1122 */
1123 1123
1124static inline void 1124static inline void
1125pmap_kremove1(vaddr_t sva, vsize_t len, bool localonly) 1125pmap_kremove1(vaddr_t sva, vsize_t len, bool localonly)
1126{ 1126{
1127 pt_entry_t *pte, opte; 1127 pt_entry_t *pte, opte;
1128 vaddr_t va, eva; 1128 vaddr_t va, eva;
1129 1129
1130 eva = sva + len; 1130 eva = sva + len;
1131 1131
1132 kpreempt_disable(); 1132 kpreempt_disable();
1133 for (va = sva; va < eva; va += PAGE_SIZE) { 1133 for (va = sva; va < eva; va += PAGE_SIZE) {
1134 pte = kvtopte(va); 1134 pte = kvtopte(va);
1135 opte = pmap_pte_testset(pte, 0); /* zap! */ 1135 opte = pmap_pte_testset(pte, 0); /* zap! */
1136 if ((opte & (PG_V | PG_U)) == (PG_V | PG_U) && !localonly) { 1136 if ((opte & (PG_V | PG_U)) == (PG_V | PG_U) && !localonly) {
1137 pmap_tlb_shootdown(pmap_kernel(), va, opte, 1137 pmap_tlb_shootdown(pmap_kernel(), va, opte,
1138 TLBSHOOT_KREMOVE); 1138 TLBSHOOT_KREMOVE);
1139 } 1139 }
1140 KASSERT((opte & PG_PS) == 0); 1140 KASSERT((opte & PG_PS) == 0);
1141 KASSERT((opte & PG_PVLIST) == 0); 1141 KASSERT((opte & PG_PVLIST) == 0);
1142 } 1142 }
1143 if (localonly) { 1143 if (localonly) {
1144 tlbflushg(); 1144 tlbflushg();
1145 } 1145 }
1146 kpreempt_enable(); 1146 kpreempt_enable();
1147} 1147}
1148 1148
1149void 1149void
1150pmap_kremove(vaddr_t sva, vsize_t len) 1150pmap_kremove(vaddr_t sva, vsize_t len)
1151{ 1151{
1152 1152
1153 pmap_kremove1(sva, len, false); 1153 pmap_kremove1(sva, len, false);
1154} 1154}
1155 1155
1156/* 1156/*
1157 * pmap_kremove_local: like pmap_kremove(), but only worry about 1157 * pmap_kremove_local: like pmap_kremove(), but only worry about
1158 * TLB invalidations on the current CPU. this is only intended 1158 * TLB invalidations on the current CPU. this is only intended
1159 * for use while writing kernel crash dumps. 1159 * for use while writing kernel crash dumps.
1160 */ 1160 */
1161 1161
1162void 1162void
1163pmap_kremove_local(vaddr_t sva, vsize_t len) 1163pmap_kremove_local(vaddr_t sva, vsize_t len)
1164{ 1164{
1165 1165
1166 KASSERT(panicstr != NULL); 1166 KASSERT(panicstr != NULL);
1167 pmap_kremove1(sva, len, true); 1167 pmap_kremove1(sva, len, true);
1168} 1168}
1169 1169
1170/* 1170/*
1171 * p m a p i n i t f u n c t i o n s 1171 * p m a p i n i t f u n c t i o n s
1172 * 1172 *
1173 * pmap_bootstrap and pmap_init are called during system startup 1173 * pmap_bootstrap and pmap_init are called during system startup
@@ -3535,1081 +3535,1081 @@ pmap_sync_pv(struct pv_pte *pvpte, pt_en @@ -3535,1081 +3535,1081 @@ pmap_sync_pv(struct pv_pte *pvpte, pt_en
3535 3535
3536 /* 3536 /*
3537 * check if there's anything to do on this pte. 3537 * check if there's anything to do on this pte.
3538 */ 3538 */
3539 3539
3540 if ((opte & clearbits) == 0) { 3540 if ((opte & clearbits) == 0) {
3541 need_shootdown = false; 3541 need_shootdown = false;
3542 break; 3542 break;
3543 } 3543 }
3544 3544
3545 /* 3545 /*
3546 * we need a shootdown if the pte is cached. (PG_U) 3546 * we need a shootdown if the pte is cached. (PG_U)
3547 * 3547 *
3548 * ...unless we are clearing only the PG_RW bit and 3548 * ...unless we are clearing only the PG_RW bit and
3549 * it isn't cached as RW. (PG_M) 3549 * it isn't cached as RW. (PG_M)
3550 */ 3550 */
3551 3551
3552 need_shootdown = (opte & PG_U) != 0 && 3552 need_shootdown = (opte & PG_U) != 0 &&
3553 !(clearbits == PG_RW && (opte & PG_M) == 0); 3553 !(clearbits == PG_RW && (opte & PG_M) == 0);
3554 3554
3555 npte = opte & ~clearbits; 3555 npte = opte & ~clearbits;
3556 3556
3557 /* 3557 /*
3558 * if we need a shootdown anyway, clear PG_U and PG_M. 3558 * if we need a shootdown anyway, clear PG_U and PG_M.
3559 */ 3559 */
3560 3560
3561 if (need_shootdown) { 3561 if (need_shootdown) {
3562 npte &= ~(PG_U | PG_M); 3562 npte &= ~(PG_U | PG_M);
3563 } 3563 }
3564 KASSERT((npte & (PG_M | PG_U)) != PG_M); 3564 KASSERT((npte & (PG_M | PG_U)) != PG_M);
3565 KASSERT((npte & (PG_U | PG_V)) != PG_U); 3565 KASSERT((npte & (PG_U | PG_V)) != PG_U);
3566 KASSERT(npte == 0 || (opte & PG_V) != 0); 3566 KASSERT(npte == 0 || (opte & PG_V) != 0);
3567 } while (pmap_pte_cas(ptep, opte, npte) != opte); 3567 } while (pmap_pte_cas(ptep, opte, npte) != opte);
3568 3568
3569 if (need_shootdown) { 3569 if (need_shootdown) {
3570 pmap_tlb_shootdown(pmap, va, opte, TLBSHOOT_SYNC_PV2); 3570 pmap_tlb_shootdown(pmap, va, opte, TLBSHOOT_SYNC_PV2);
3571 } 3571 }
3572 pmap_unmap_pte(); 3572 pmap_unmap_pte();
3573 3573
3574 *optep = opte; 3574 *optep = opte;
3575 return 0; 3575 return 0;
3576} 3576}
3577 3577
3578static void 3578static void
3579pmap_pp_remove(struct pmap_page *pp, paddr_t pa) 3579pmap_pp_remove(struct pmap_page *pp, paddr_t pa)
3580{ 3580{
3581 struct pv_pte *pvpte; 3581 struct pv_pte *pvpte;
3582 struct pv_entry *killlist = NULL; 3582 struct pv_entry *killlist = NULL;
3583 struct vm_page *ptp; 3583 struct vm_page *ptp;
3584 pt_entry_t expect; 3584 pt_entry_t expect;
3585 int count; 3585 int count;
3586 3586
3587 expect = pmap_pa2pte(pa) | PG_V; 3587 expect = pmap_pa2pte(pa) | PG_V;
3588 count = SPINLOCK_BACKOFF_MIN; 3588 count = SPINLOCK_BACKOFF_MIN;
3589 kpreempt_disable(); 3589 kpreempt_disable();
3590startover: 3590startover:
3591 while ((pvpte = pv_pte_first(pp)) != NULL) { 3591 while ((pvpte = pv_pte_first(pp)) != NULL) {
3592 struct pmap *pmap; 3592 struct pmap *pmap;
3593 struct pv_entry *pve; 3593 struct pv_entry *pve;
3594 pt_entry_t opte; 3594 pt_entry_t opte;
3595 vaddr_t va; 3595 vaddr_t va;
3596 int error; 3596 int error;
3597 3597
3598 /* 3598 /*
3599 * add a reference to the pmap before clearing the pte. 3599 * add a reference to the pmap before clearing the pte.
3600 * otherwise the pmap can disappear behind us. 3600 * otherwise the pmap can disappear behind us.
3601 */ 3601 */
3602 3602
3603 ptp = pvpte->pte_ptp; 3603 ptp = pvpte->pte_ptp;
3604 pmap = ptp_to_pmap(ptp); 3604 pmap = ptp_to_pmap(ptp);
3605 if (ptp != NULL) { 3605 if (ptp != NULL) {
3606 pmap_reference(pmap); 3606 pmap_reference(pmap);
3607 } 3607 }
3608 3608
3609 error = pmap_sync_pv(pvpte, expect, ~0, &opte); 3609 error = pmap_sync_pv(pvpte, expect, ~0, &opte);
3610 if (error == EAGAIN) { 3610 if (error == EAGAIN) {
3611 int hold_count; 3611 int hold_count;
3612 KERNEL_UNLOCK_ALL(curlwp, &hold_count); 3612 KERNEL_UNLOCK_ALL(curlwp, &hold_count);
3613 if (ptp != NULL) { 3613 if (ptp != NULL) {
3614 pmap_destroy(pmap); 3614 pmap_destroy(pmap);
3615 } 3615 }
3616 SPINLOCK_BACKOFF(count); 3616 SPINLOCK_BACKOFF(count);
3617 KERNEL_LOCK(hold_count, curlwp); 3617 KERNEL_LOCK(hold_count, curlwp);
3618 goto startover; 3618 goto startover;
3619 } 3619 }
3620 3620
3621 pp->pp_attrs |= opte; 3621 pp->pp_attrs |= opte;
3622 va = pvpte->pte_va; 3622 va = pvpte->pte_va;
3623 pve = pmap_remove_pv(pp, ptp, va); 3623 pve = pmap_remove_pv(pp, ptp, va);
3624 3624
3625 /* update the PTP reference count. free if last reference. */ 3625 /* update the PTP reference count. free if last reference. */
3626 if (ptp != NULL) { 3626 if (ptp != NULL) {
3627 struct pmap *pmap2; 3627 struct pmap *pmap2;
3628 pt_entry_t *ptes; 3628 pt_entry_t *ptes;
3629 pd_entry_t * const *pdes; 3629 pd_entry_t * const *pdes;
3630 3630
3631 KASSERT(pmap != pmap_kernel()); 3631 KASSERT(pmap != pmap_kernel());
3632 3632
3633 pmap_tlb_shootnow(); 3633 pmap_tlb_shootnow();
3634 pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); 3634 pmap_map_ptes(pmap, &pmap2, &ptes, &pdes);
3635 pmap_stats_update_bypte(pmap, 0, opte); 3635 pmap_stats_update_bypte(pmap, 0, opte);
3636 ptp->wire_count--; 3636 ptp->wire_count--;
3637 if (ptp->wire_count <= 1) { 3637 if (ptp->wire_count <= 1) {
3638 pmap_free_ptp(pmap, ptp, va, ptes, pdes); 3638 pmap_free_ptp(pmap, ptp, va, ptes, pdes);
3639 } 3639 }
3640 pmap_unmap_ptes(pmap, pmap2); 3640 pmap_unmap_ptes(pmap, pmap2);
3641 pmap_destroy(pmap); 3641 pmap_destroy(pmap);
3642 } else { 3642 } else {
3643 KASSERT(pmap == pmap_kernel()); 3643 KASSERT(pmap == pmap_kernel());
3644 pmap_stats_update_bypte(pmap, 0, opte); 3644 pmap_stats_update_bypte(pmap, 0, opte);
3645 } 3645 }
3646 3646
3647 if (pve != NULL) { 3647 if (pve != NULL) {
3648 pve->pve_next = killlist; /* mark it for death */ 3648 pve->pve_next = killlist; /* mark it for death */
3649 killlist = pve; 3649 killlist = pve;
3650 } 3650 }
3651 } 3651 }
3652 pmap_tlb_shootnow(); 3652 pmap_tlb_shootnow();
3653 kpreempt_enable(); 3653 kpreempt_enable();
3654 3654
3655 /* Now free unused pvs. */ 3655 /* Now free unused pvs. */
3656 pmap_free_pvs(killlist); 3656 pmap_free_pvs(killlist);
3657} 3657}
3658 3658
3659/* 3659/*
3660 * pmap_page_remove: remove a managed vm_page from all pmaps that map it 3660 * pmap_page_remove: remove a managed vm_page from all pmaps that map it
3661 * 3661 *
3662 * => R/M bits are sync'd back to attrs 3662 * => R/M bits are sync'd back to attrs
3663 */ 3663 */
3664 3664
3665void 3665void
3666pmap_page_remove(struct vm_page *pg) 3666pmap_page_remove(struct vm_page *pg)
3667{ 3667{
3668 struct pmap_page *pp; 3668 struct pmap_page *pp;
3669 paddr_t pa; 3669 paddr_t pa;
3670 3670
3671 KASSERT(uvm_page_locked_p(pg)); 3671 KASSERT(uvm_page_locked_p(pg));
3672 3672
3673 pp = VM_PAGE_TO_PP(pg); 3673 pp = VM_PAGE_TO_PP(pg);
3674 pa = VM_PAGE_TO_PHYS(pg); 3674 pa = VM_PAGE_TO_PHYS(pg);
3675 pmap_pp_remove(pp, pa); 3675 pmap_pp_remove(pp, pa);
3676} 3676}
3677 3677
3678/* 3678/*
3679 * pmap_pv_remove: remove an unmanaged pv-tracked page from all pmaps 3679 * pmap_pv_remove: remove an unmanaged pv-tracked page from all pmaps
3680 * that map it 3680 * that map it
3681 */ 3681 */
3682 3682
3683void 3683void
3684pmap_pv_remove(paddr_t pa) 3684pmap_pv_remove(paddr_t pa)
3685{ 3685{
3686 struct pmap_page *pp; 3686 struct pmap_page *pp;
3687 3687
3688 pp = pmap_pv_tracked(pa); 3688 pp = pmap_pv_tracked(pa);
3689 if (pp == NULL) 3689 if (pp == NULL)
3690 panic("pmap_pv_protect: page not pv-tracked: 0x%"PRIxPADDR, 3690 panic("pmap_pv_protect: page not pv-tracked: 0x%"PRIxPADDR,
3691 pa); 3691 pa);
3692 pmap_pp_remove(pp, pa); 3692 pmap_pp_remove(pp, pa);
3693} 3693}
3694 3694
3695/* 3695/*
3696 * p m a p a t t r i b u t e f u n c t i o n s 3696 * p m a p a t t r i b u t e f u n c t i o n s
3697 * functions that test/change managed page's attributes 3697 * functions that test/change managed page's attributes
3698 * since a page can be mapped multiple times we must check each PTE that 3698 * since a page can be mapped multiple times we must check each PTE that
3699 * maps it by going down the pv lists. 3699 * maps it by going down the pv lists.
3700 */ 3700 */
3701 3701
3702/* 3702/*
3703 * pmap_test_attrs: test a page's attributes 3703 * pmap_test_attrs: test a page's attributes
3704 */ 3704 */
3705 3705
3706bool 3706bool
3707pmap_test_attrs(struct vm_page *pg, unsigned testbits) 3707pmap_test_attrs(struct vm_page *pg, unsigned testbits)
3708{ 3708{
3709 struct pmap_page *pp; 3709 struct pmap_page *pp;
3710 struct pv_pte *pvpte; 3710 struct pv_pte *pvpte;
3711 pt_entry_t expect; 3711 pt_entry_t expect;
3712 u_int result; 3712 u_int result;
3713 3713
3714 KASSERT(uvm_page_locked_p(pg)); 3714 KASSERT(uvm_page_locked_p(pg));
3715 3715
3716 pp = VM_PAGE_TO_PP(pg); 3716 pp = VM_PAGE_TO_PP(pg);
3717 if ((pp->pp_attrs & testbits) != 0) { 3717 if ((pp->pp_attrs & testbits) != 0) {
3718 return true; 3718 return true;
3719 } 3719 }
3720 expect = pmap_pa2pte(VM_PAGE_TO_PHYS(pg)) | PG_V; 3720 expect = pmap_pa2pte(VM_PAGE_TO_PHYS(pg)) | PG_V;
3721 kpreempt_disable(); 3721 kpreempt_disable();
3722 for (pvpte = pv_pte_first(pp); pvpte; pvpte = pv_pte_next(pp, pvpte)) { 3722 for (pvpte = pv_pte_first(pp); pvpte; pvpte = pv_pte_next(pp, pvpte)) {
3723 pt_entry_t opte; 3723 pt_entry_t opte;
3724 int error; 3724 int error;
3725 3725
3726 if ((pp->pp_attrs & testbits) != 0) { 3726 if ((pp->pp_attrs & testbits) != 0) {
3727 break; 3727 break;
3728 } 3728 }
3729 error = pmap_sync_pv(pvpte, expect, 0, &opte); 3729 error = pmap_sync_pv(pvpte, expect, 0, &opte);
3730 if (error == 0) { 3730 if (error == 0) {
3731 pp->pp_attrs |= opte; 3731 pp->pp_attrs |= opte;
3732 } 3732 }
3733 } 3733 }
3734 result = pp->pp_attrs & testbits; 3734 result = pp->pp_attrs & testbits;
3735 kpreempt_enable(); 3735 kpreempt_enable();
3736 3736
3737 /* 3737 /*
3738 * note that we will exit the for loop with a non-null pve if 3738 * note that we will exit the for loop with a non-null pve if
3739 * we have found the bits we are testing for. 3739 * we have found the bits we are testing for.
3740 */ 3740 */
3741 3741
3742 return result != 0; 3742 return result != 0;
3743} 3743}
3744 3744
3745static bool 3745static bool
3746pmap_pp_clear_attrs(struct pmap_page *pp, paddr_t pa, unsigned clearbits) 3746pmap_pp_clear_attrs(struct pmap_page *pp, paddr_t pa, unsigned clearbits)
3747{ 3747{
3748 struct pv_pte *pvpte; 3748 struct pv_pte *pvpte;
3749 u_int result; 3749 u_int result;
3750 pt_entry_t expect; 3750 pt_entry_t expect;
3751 int count; 3751 int count;
3752 3752
3753 expect = pmap_pa2pte(pa) | PG_V; 3753 expect = pmap_pa2pte(pa) | PG_V;
3754 count = SPINLOCK_BACKOFF_MIN; 3754 count = SPINLOCK_BACKOFF_MIN;
3755 kpreempt_disable(); 3755 kpreempt_disable();
3756startover: 3756startover:
3757 for (pvpte = pv_pte_first(pp); pvpte; pvpte = pv_pte_next(pp, pvpte)) { 3757 for (pvpte = pv_pte_first(pp); pvpte; pvpte = pv_pte_next(pp, pvpte)) {
3758 pt_entry_t opte; 3758 pt_entry_t opte;
3759 int error; 3759 int error;
3760 3760
3761 error = pmap_sync_pv(pvpte, expect, clearbits, &opte); 3761 error = pmap_sync_pv(pvpte, expect, clearbits, &opte);
3762 if (error == EAGAIN) { 3762 if (error == EAGAIN) {
3763 int hold_count; 3763 int hold_count;
3764 KERNEL_UNLOCK_ALL(curlwp, &hold_count); 3764 KERNEL_UNLOCK_ALL(curlwp, &hold_count);
3765 SPINLOCK_BACKOFF(count); 3765 SPINLOCK_BACKOFF(count);
3766 KERNEL_LOCK(hold_count, curlwp); 3766 KERNEL_LOCK(hold_count, curlwp);
3767 goto startover; 3767 goto startover;
3768 } 3768 }
3769 pp->pp_attrs |= opte; 3769 pp->pp_attrs |= opte;
3770 } 3770 }
3771 result = pp->pp_attrs & clearbits; 3771 result = pp->pp_attrs & clearbits;
3772 pp->pp_attrs &= ~clearbits; 3772 pp->pp_attrs &= ~clearbits;
3773 pmap_tlb_shootnow(); 3773 pmap_tlb_shootnow();
3774 kpreempt_enable(); 3774 kpreempt_enable();
3775 3775
3776 return result != 0; 3776 return result != 0;
3777} 3777}
3778 3778
3779/* 3779/*
3780 * pmap_clear_attrs: clear the specified attribute for a page. 3780 * pmap_clear_attrs: clear the specified attribute for a page.
3781 * 3781 *
3782 * => we return true if we cleared one of the bits we were asked to 3782 * => we return true if we cleared one of the bits we were asked to
3783 */ 3783 */
3784 3784
3785bool 3785bool
3786pmap_clear_attrs(struct vm_page *pg, unsigned clearbits) 3786pmap_clear_attrs(struct vm_page *pg, unsigned clearbits)
3787{ 3787{
3788 struct pmap_page *pp; 3788 struct pmap_page *pp;
3789 paddr_t pa; 3789 paddr_t pa;
3790 3790
3791 KASSERT(uvm_page_locked_p(pg)); 3791 KASSERT(uvm_page_locked_p(pg));
3792 3792
3793 pp = VM_PAGE_TO_PP(pg); 3793 pp = VM_PAGE_TO_PP(pg);
3794 pa = VM_PAGE_TO_PHYS(pg); 3794 pa = VM_PAGE_TO_PHYS(pg);
3795 3795
3796 return pmap_pp_clear_attrs(pp, pa, clearbits); 3796 return pmap_pp_clear_attrs(pp, pa, clearbits);
3797} 3797}
3798 3798
3799/* 3799/*
3800 * pmap_pv_clear_attrs: clear the specified attributes for an unmanaged 3800 * pmap_pv_clear_attrs: clear the specified attributes for an unmanaged
3801 * pv-tracked page. 3801 * pv-tracked page.
3802 */ 3802 */
3803 3803
3804bool 3804bool
3805pmap_pv_clear_attrs(paddr_t pa, unsigned clearbits) 3805pmap_pv_clear_attrs(paddr_t pa, unsigned clearbits)
3806{ 3806{
3807 struct pmap_page *pp; 3807 struct pmap_page *pp;
3808 3808
3809 pp = pmap_pv_tracked(pa); 3809 pp = pmap_pv_tracked(pa);
3810 if (pp == NULL) 3810 if (pp == NULL)
3811 panic("pmap_pv_protect: page not pv-tracked: 0x%"PRIxPADDR, 3811 panic("pmap_pv_protect: page not pv-tracked: 0x%"PRIxPADDR,
3812 pa); 3812 pa);
3813 3813
3814 return pmap_pp_clear_attrs(pp, pa, clearbits); 3814 return pmap_pp_clear_attrs(pp, pa, clearbits);
3815} 3815}
3816 3816
3817/* 3817/*
3818 * p m a p p r o t e c t i o n f u n c t i o n s 3818 * p m a p p r o t e c t i o n f u n c t i o n s
3819 */ 3819 */
3820 3820
3821/* 3821/*
3822 * pmap_page_protect: change the protection of all recorded mappings 3822 * pmap_page_protect: change the protection of all recorded mappings
3823 * of a managed page 3823 * of a managed page
3824 * 3824 *
3825 * => NOTE: this is an inline function in pmap.h 3825 * => NOTE: this is an inline function in pmap.h
3826 */ 3826 */
3827 3827
3828/* see pmap.h */ 3828/* see pmap.h */
3829 3829
3830/* 3830/*
3831 * pmap_pv_protect: change the protection of all recorded mappings 3831 * pmap_pv_protect: change the protection of all recorded mappings
3832 * of an unmanaged pv-tracked page 3832 * of an unmanaged pv-tracked page
3833 * 3833 *
3834 * => NOTE: this is an inline function in pmap.h 3834 * => NOTE: this is an inline function in pmap.h
3835 */ 3835 */
3836 3836
3837/* see pmap.h */ 3837/* see pmap.h */
3838 3838
3839/* 3839/*
3840 * pmap_protect: set the protection in of the pages in a pmap 3840 * pmap_protect: set the protection in of the pages in a pmap
3841 * 3841 *
3842 * => NOTE: this is an inline function in pmap.h 3842 * => NOTE: this is an inline function in pmap.h
3843 */ 3843 */
3844 3844
3845/* see pmap.h */ 3845/* see pmap.h */
3846 3846
3847/* 3847/*
3848 * pmap_write_protect: write-protect pages in a pmap. 3848 * pmap_write_protect: write-protect pages in a pmap.
3849 */ 3849 */
3850void 3850void
3851pmap_write_protect(struct pmap *pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot) 3851pmap_write_protect(struct pmap *pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot)
3852{ 3852{
3853 pt_entry_t *ptes; 3853 pt_entry_t *ptes;
3854 pt_entry_t * const *pdes; 3854 pt_entry_t * const *pdes;
3855 struct pmap *pmap2; 3855 struct pmap *pmap2;
3856 vaddr_t blockend, va; 3856 vaddr_t blockend, va;
3857 3857
3858 KASSERT(curlwp->l_md.md_gc_pmap != pmap); 3858 KASSERT(curlwp->l_md.md_gc_pmap != pmap);
3859 3859
3860 sva &= PG_FRAME; 3860 sva &= PG_FRAME;
3861 eva &= PG_FRAME; 3861 eva &= PG_FRAME;
3862 3862
3863 /* Acquire pmap. */ 3863 /* Acquire pmap. */
3864 kpreempt_disable(); 3864 kpreempt_disable();
3865 pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); 3865 pmap_map_ptes(pmap, &pmap2, &ptes, &pdes);
3866 3866
3867 for (va = sva ; va < eva ; va = blockend) { 3867 for (va = sva ; va < eva ; va = blockend) {
3868 pt_entry_t *spte, *epte; 3868 pt_entry_t *spte, *epte;
3869 int i; 3869 int i;
3870 3870
3871 blockend = x86_round_pdr(va + 1); 3871 blockend = x86_round_pdr(va + 1);
3872 if (blockend > eva) 3872 if (blockend > eva)
3873 blockend = eva; 3873 blockend = eva;
3874 3874
3875 /* 3875 /*
3876 * XXXCDC: our PTE mappings should never be write-protected! 3876 * XXXCDC: our PTE mappings should never be write-protected!
3877 * 3877 *
3878 * long term solution is to move the PTEs out of user 3878 * long term solution is to move the PTEs out of user
3879 * address space. and into kernel address space (up 3879 * address space. and into kernel address space (up
3880 * with APTE). then we can set VM_MAXUSER_ADDRESS to 3880 * with APTE). then we can set VM_MAXUSER_ADDRESS to
3881 * be VM_MAX_ADDRESS. 3881 * be VM_MAX_ADDRESS.
3882 */ 3882 */
3883 3883
3884 /* XXXCDC: ugly hack to avoid freeing PDP here */ 3884 /* XXXCDC: ugly hack to avoid freeing PDP here */
3885 for (i = 0; i < PDP_SIZE; i++) { 3885 for (i = 0; i < PDP_SIZE; i++) {
3886 if (pl_i(va, PTP_LEVELS) == PDIR_SLOT_PTE+i) 3886 if (pl_i(va, PTP_LEVELS) == PDIR_SLOT_PTE+i)
3887 continue; 3887 continue;
3888 } 3888 }
3889 3889
3890 /* Is it a valid block? */ 3890 /* Is it a valid block? */
3891 if (!pmap_pdes_valid(va, pdes, NULL)) { 3891 if (!pmap_pdes_valid(va, pdes, NULL)) {
3892 continue; 3892 continue;
3893 } 3893 }
3894 KASSERT(va < VM_MAXUSER_ADDRESS || va >= VM_MAX_ADDRESS); 3894 KASSERT(va < VM_MAXUSER_ADDRESS || va >= VM_MAX_ADDRESS);
3895 3895
3896 spte = &ptes[pl1_i(va)]; 3896 spte = &ptes[pl1_i(va)];
3897 epte = &ptes[pl1_i(blockend)]; 3897 epte = &ptes[pl1_i(blockend)];
3898 3898
3899 for (/*null */; spte < epte ; spte++) { 3899 for (/*null */; spte < epte ; spte++) {
3900 pt_entry_t opte, npte; 3900 pt_entry_t opte, npte;
3901 3901
3902 do { 3902 do {
3903 opte = *spte; 3903 opte = *spte;
3904 if ((~opte & (PG_RW | PG_V)) != 0) { 3904 if ((~opte & (PG_RW | PG_V)) != 0) {
3905 goto next; 3905 goto next;
3906 } 3906 }
3907 npte = opte & ~PG_RW; 3907 npte = opte & ~PG_RW;
3908 } while (pmap_pte_cas(spte, opte, npte) != opte); 3908 } while (pmap_pte_cas(spte, opte, npte) != opte);
3909 3909
3910 if ((opte & PG_M) != 0) { 3910 if ((opte & PG_M) != 0) {
3911 vaddr_t tva = x86_ptob(spte - ptes); 3911 vaddr_t tva = x86_ptob(spte - ptes);
3912 pmap_tlb_shootdown(pmap, tva, opte, 3912 pmap_tlb_shootdown(pmap, tva, opte,
3913 TLBSHOOT_WRITE_PROTECT); 3913 TLBSHOOT_WRITE_PROTECT);
3914 } 3914 }
3915next:; 3915next:;
3916 } 3916 }
3917 } 3917 }
3918 3918
3919 /* Release pmap. */ 3919 /* Release pmap. */
3920 pmap_unmap_ptes(pmap, pmap2); 3920 pmap_unmap_ptes(pmap, pmap2);
3921 kpreempt_enable(); 3921 kpreempt_enable();
3922} 3922}
3923 3923
3924/* 3924/*
3925 * pmap_unwire: clear the wired bit in the PTE. 3925 * pmap_unwire: clear the wired bit in the PTE.
3926 * 3926 *
3927 * => Mapping should already be present. 3927 * => Mapping should already be present.
3928 */ 3928 */
3929void 3929void
3930pmap_unwire(struct pmap *pmap, vaddr_t va) 3930pmap_unwire(struct pmap *pmap, vaddr_t va)
3931{ 3931{
3932 pt_entry_t *ptes, *ptep, opte; 3932 pt_entry_t *ptes, *ptep, opte;
3933 pd_entry_t * const *pdes; 3933 pd_entry_t * const *pdes;
3934 struct pmap *pmap2; 3934 struct pmap *pmap2;
3935 3935
3936 /* Acquire pmap. */ 3936 /* Acquire pmap. */
3937 kpreempt_disable(); 3937 kpreempt_disable();
3938 pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); 3938 pmap_map_ptes(pmap, &pmap2, &ptes, &pdes);
3939 3939
3940 if (!pmap_pdes_valid(va, pdes, NULL)) { 3940 if (!pmap_pdes_valid(va, pdes, NULL)) {
3941 panic("pmap_unwire: invalid PDE"); 3941 panic("pmap_unwire: invalid PDE");
3942 } 3942 }
3943 3943
3944 ptep = &ptes[pl1_i(va)]; 3944 ptep = &ptes[pl1_i(va)];
3945 opte = *ptep; 3945 opte = *ptep;
3946 KASSERT(pmap_valid_entry(opte)); 3946 KASSERT(pmap_valid_entry(opte));
3947 3947
3948 if (opte & PG_W) { 3948 if (opte & PG_W) {
3949 pt_entry_t npte = opte & ~PG_W; 3949 pt_entry_t npte = opte & ~PG_W;
3950 3950
3951 opte = pmap_pte_testset(ptep, npte); 3951 opte = pmap_pte_testset(ptep, npte);
3952 pmap_stats_update_bypte(pmap, npte, opte); 3952 pmap_stats_update_bypte(pmap, npte, opte);
3953 } else { 3953 } else {
3954 printf("pmap_unwire: wiring for pmap %p va 0x%lx " 3954 printf("pmap_unwire: wiring for pmap %p va 0x%lx "
3955 "did not change!\n", pmap, va); 3955 "did not change!\n", pmap, va);
3956 } 3956 }
3957 3957
3958 /* Release pmap. */ 3958 /* Release pmap. */
3959 pmap_unmap_ptes(pmap, pmap2); 3959 pmap_unmap_ptes(pmap, pmap2);
3960 kpreempt_enable(); 3960 kpreempt_enable();
3961} 3961}
3962 3962
3963/* 3963/*
3964 * pmap_copy: copy mappings from one pmap to another 3964 * pmap_copy: copy mappings from one pmap to another
3965 * 3965 *
3966 * => optional function 3966 * => optional function
3967 * void pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr) 3967 * void pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr)
3968 */ 3968 */
3969 3969
3970/* 3970/*
3971 * defined as macro in pmap.h 3971 * defined as macro in pmap.h
3972 */ 3972 */
3973 3973
3974__strict_weak_alias(pmap_enter, pmap_enter_default); 3974__strict_weak_alias(pmap_enter, pmap_enter_default);
3975 3975
3976int 3976int
3977pmap_enter_default(pmap_t pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, 3977pmap_enter_default(pmap_t pmap, vaddr_t va, paddr_t pa, vm_prot_t prot,
3978 u_int flags) 3978 u_int flags)
3979{ 3979{
3980 return pmap_enter_ma(pmap, va, pa, pa, prot, flags, 0); 3980 return pmap_enter_ma(pmap, va, pa, pa, prot, flags, 0);
3981} 3981}
3982 3982
3983/* 3983/*
3984 * pmap_enter: enter a mapping into a pmap 3984 * pmap_enter: enter a mapping into a pmap
3985 * 3985 *
3986 * => must be done "now" ... no lazy-evaluation 3986 * => must be done "now" ... no lazy-evaluation
3987 * => we set pmap => pv_head locking 3987 * => we set pmap => pv_head locking
3988 */ 3988 */
3989int 3989int
3990pmap_enter_ma(struct pmap *pmap, vaddr_t va, paddr_t ma, paddr_t pa, 3990pmap_enter_ma(struct pmap *pmap, vaddr_t va, paddr_t ma, paddr_t pa,
3991 vm_prot_t prot, u_int flags, int domid) 3991 vm_prot_t prot, u_int flags, int domid)
3992{ 3992{
3993 pt_entry_t *ptes, opte, npte; 3993 pt_entry_t *ptes, opte, npte;
3994 pt_entry_t *ptep; 3994 pt_entry_t *ptep;
3995 pd_entry_t * const *pdes; 3995 pd_entry_t * const *pdes;
3996 struct vm_page *ptp; 3996 struct vm_page *ptp;
3997 struct vm_page *new_pg, *old_pg; 3997 struct vm_page *new_pg, *old_pg;
3998 struct pmap_page *new_pp, *old_pp; 3998 struct pmap_page *new_pp, *old_pp;
3999 struct pv_entry *old_pve = NULL; 3999 struct pv_entry *old_pve = NULL;
4000 struct pv_entry *new_pve; 4000 struct pv_entry *new_pve;
4001 struct pv_entry *new_pve2; 4001 struct pv_entry *new_pve2;
4002 int error; 4002 int error;
4003 bool wired = (flags & PMAP_WIRED) != 0; 4003 bool wired = (flags & PMAP_WIRED) != 0;
4004 struct pmap *pmap2; 4004 struct pmap *pmap2;
4005 4005
4006 KASSERT(pmap_initialized); 4006 KASSERT(pmap_initialized);
4007 KASSERT(curlwp->l_md.md_gc_pmap != pmap); 4007 KASSERT(curlwp->l_md.md_gc_pmap != pmap);
4008 KASSERT(va < VM_MAX_KERNEL_ADDRESS); 4008 KASSERT(va < VM_MAX_KERNEL_ADDRESS);
4009 KASSERTMSG(va != (vaddr_t)PDP_BASE, 4009 KASSERTMSG(va != (vaddr_t)PDP_BASE,
4010 "pmap_enter: trying to map over PDP!"); 4010 "pmap_enter: trying to map over PDP!");
4011 KASSERTMSG(va < VM_MIN_KERNEL_ADDRESS || 4011 KASSERTMSG(va < VM_MIN_KERNEL_ADDRESS ||
4012 pmap_valid_entry(pmap->pm_pdir[pl_i(va, PTP_LEVELS)]), 4012 pmap_valid_entry(pmap->pm_pdir[pl_i(va, PTP_LEVELS)]),
4013 "pmap_enter: missing kernel PTP for VA %lx!", va); 4013 "pmap_enter: missing kernel PTP for VA %lx!", va);
4014 4014
4015#ifdef XEN 4015#ifdef XEN
4016 KASSERT(domid == DOMID_SELF || pa == 0); 4016 KASSERT(domid == DOMID_SELF || pa == 0);
4017#endif /* XEN */ 4017#endif /* XEN */
4018 4018
4019 npte = ma | protection_codes[prot] | PG_V; 4019 npte = ma | protection_codes[prot] | PG_V;
4020 npte |= pmap_pat_flags(flags); 4020 npte |= pmap_pat_flags(flags);
4021 if (wired) 4021 if (wired)
4022 npte |= PG_W; 4022 npte |= PG_W;
4023 if (va < VM_MAXUSER_ADDRESS) 4023 if (va < VM_MAXUSER_ADDRESS)
4024 npte |= PG_u; 4024 npte |= PG_u;
4025 else if (va < VM_MAX_ADDRESS) 4025 else if (va < VM_MAX_ADDRESS)
4026 npte |= (PG_u | PG_RW); /* XXXCDC: no longer needed? */ 4026 npte |= (PG_u | PG_RW); /* XXXCDC: no longer needed? */
4027 else 4027 else
4028 npte |= PG_k; 4028 npte |= PG_k;
4029 if (pmap == pmap_kernel()) 4029 if (pmap == pmap_kernel())
4030 npte |= pmap_pg_g; 4030 npte |= pmap_pg_g;
4031 if (flags & VM_PROT_ALL) { 4031 if (flags & VM_PROT_ALL) {
4032 npte |= PG_U; 4032 npte |= PG_U;
4033 if (flags & VM_PROT_WRITE) { 4033 if (flags & VM_PROT_WRITE) {
4034 KASSERT((npte & PG_RW) != 0); 4034 KASSERT((npte & PG_RW) != 0);
4035 npte |= PG_M; 4035 npte |= PG_M;
4036 } 4036 }
4037 } 4037 }
4038 4038
4039#ifdef XEN 4039#ifdef XEN
4040 if (domid != DOMID_SELF) 4040 if (domid != DOMID_SELF)
4041 new_pg = NULL; 4041 new_pg = NULL;
4042 else 4042 else
4043#endif 4043#endif
4044 new_pg = PHYS_TO_VM_PAGE(pa); 4044 new_pg = PHYS_TO_VM_PAGE(pa);
4045 if (new_pg != NULL) { 4045 if (new_pg != NULL) {
4046 /* This is a managed page */ 4046 /* This is a managed page */
4047 npte |= PG_PVLIST; 4047 npte |= PG_PVLIST;
4048 new_pp = VM_PAGE_TO_PP(new_pg); 4048 new_pp = VM_PAGE_TO_PP(new_pg);
4049 } else if ((new_pp = pmap_pv_tracked(pa)) != NULL) { 4049 } else if ((new_pp = pmap_pv_tracked(pa)) != NULL) {
4050 /* This is an unmanaged pv-tracked page */ 4050 /* This is an unmanaged pv-tracked page */
4051 npte |= PG_PVLIST; 4051 npte |= PG_PVLIST;
4052 } else { 4052 } else {
4053 new_pp = NULL; 4053 new_pp = NULL;
4054 } 4054 }
4055 4055
4056 /* get pves. */ 4056 /* get pves. */
4057 new_pve = pool_cache_get(&pmap_pv_cache, PR_NOWAIT); 4057 new_pve = pool_cache_get(&pmap_pv_cache, PR_NOWAIT);
4058 new_pve2 = pool_cache_get(&pmap_pv_cache, PR_NOWAIT); 4058 new_pve2 = pool_cache_get(&pmap_pv_cache, PR_NOWAIT);
4059 if (new_pve == NULL || new_pve2 == NULL) { 4059 if (new_pve == NULL || new_pve2 == NULL) {
4060 if (flags & PMAP_CANFAIL) { 4060 if (flags & PMAP_CANFAIL) {
4061 error = ENOMEM; 4061 error = ENOMEM;
4062 goto out2; 4062 goto out2;
4063 } 4063 }
4064 panic("pmap_enter: pve allocation failed"); 4064 panic("pmap_enter: pve allocation failed");
4065 } 4065 }
4066 4066
4067 kpreempt_disable(); 4067 kpreempt_disable();
4068 pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); /* locks pmap */ 4068 pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); /* locks pmap */
4069 if (pmap == pmap_kernel()) { 4069 if (pmap == pmap_kernel()) {
4070 ptp = NULL; 4070 ptp = NULL;
4071 } else { 4071 } else {
4072 ptp = pmap_get_ptp(pmap, va, pdes); 4072 ptp = pmap_get_ptp(pmap, va, pdes);
4073 if (ptp == NULL) { 4073 if (ptp == NULL) {
4074 pmap_unmap_ptes(pmap, pmap2); 4074 pmap_unmap_ptes(pmap, pmap2);
4075 if (flags & PMAP_CANFAIL) { 4075 if (flags & PMAP_CANFAIL) {
4076 error = ENOMEM; 4076 error = ENOMEM;
4077 goto out; 4077 goto out;
4078 } 4078 }
4079 panic("pmap_enter: get ptp failed"); 4079 panic("pmap_enter: get ptp failed");
4080 } 4080 }
4081 } 4081 }
4082 4082
4083 /* 4083 /*
4084 * update the pte. 4084 * update the pte.
4085 */ 4085 */
4086 4086
4087 ptep = &ptes[pl1_i(va)]; 4087 ptep = &ptes[pl1_i(va)];
4088 do { 4088 do {
4089 opte = *ptep; 4089 opte = *ptep;
4090 4090
4091 /* 4091 /*
4092 * if the same page, inherit PG_U and PG_M. 4092 * if the same page, inherit PG_U and PG_M.
4093 */ 4093 */
4094 if (((opte ^ npte) & (PG_FRAME | PG_V)) == 0) { 4094 if (((opte ^ npte) & (PG_FRAME | PG_V)) == 0) {
4095 npte |= opte & (PG_U | PG_M); 4095 npte |= opte & (PG_U | PG_M);
4096 } 4096 }
4097#if defined(XEN) 4097#if defined(XEN)
4098 if (domid != DOMID_SELF) { 4098 if (domid != DOMID_SELF) {
4099 /* pmap_pte_cas with error handling */ 4099 /* pmap_pte_cas with error handling */
4100 int s = splvm(); 4100 int s = splvm();
4101 if (opte != *ptep) { 4101 if (opte != *ptep) {
4102 splx(s); 4102 splx(s);
4103 continue; 4103 continue;
4104 } 4104 }
4105 error = xpq_update_foreign( 4105 error = xpq_update_foreign(
4106 vtomach((vaddr_t)ptep), npte, domid); 4106 vtomach((vaddr_t)ptep), npte, domid);
4107 splx(s); 4107 splx(s);
4108 if (error) { 4108 if (error) {
4109 if (ptp != NULL && ptp->wire_count <= 1) { 4109 if (ptp != NULL && ptp->wire_count <= 1) {
4110 pmap_free_ptp(pmap, ptp, va, ptes, pdes); 4110 pmap_free_ptp(pmap, ptp, va, ptes, pdes);
4111 } 4111 }
4112 pmap_unmap_ptes(pmap, pmap2); 4112 pmap_unmap_ptes(pmap, pmap2);
4113 goto out; 4113 goto out;
4114 } 4114 }
4115 break; 4115 break;
4116 } 4116 }
4117#endif /* defined(XEN) */ 4117#endif /* defined(XEN) */
4118 } while (pmap_pte_cas(ptep, opte, npte) != opte); 4118 } while (pmap_pte_cas(ptep, opte, npte) != opte);
4119 4119
4120 /* 4120 /*
4121 * update statistics and PTP's reference count. 4121 * update statistics and PTP's reference count.
4122 */ 4122 */
4123 4123
4124 pmap_stats_update_bypte(pmap, npte, opte); 4124 pmap_stats_update_bypte(pmap, npte, opte);
4125 if (ptp != NULL && !pmap_valid_entry(opte)) { 4125 if (ptp != NULL && !pmap_valid_entry(opte)) {
4126 ptp->wire_count++; 4126 ptp->wire_count++;
4127 } 4127 }
4128 KASSERT(ptp == NULL || ptp->wire_count > 1); 4128 KASSERT(ptp == NULL || ptp->wire_count > 1);
4129 4129
4130 /* 4130 /*
4131 * if the same page, we can skip pv_entry handling. 4131 * if the same page, we can skip pv_entry handling.
4132 */ 4132 */
4133 4133
4134 if (((opte ^ npte) & (PG_FRAME | PG_V)) == 0) { 4134 if (((opte ^ npte) & (PG_FRAME | PG_V)) == 0) {
4135 KASSERT(((opte ^ npte) & PG_PVLIST) == 0); 4135 KASSERT(((opte ^ npte) & PG_PVLIST) == 0);
4136 goto same_pa; 4136 goto same_pa;
4137 } 4137 }
4138 4138
4139 /* 4139 /*
4140 * if old page is pv-tracked, remove pv_entry from its list. 4140 * if old page is pv-tracked, remove pv_entry from its list.
4141 */ 4141 */
4142 4142
4143 if ((~opte & (PG_V | PG_PVLIST)) == 0) { 4143 if ((~opte & (PG_V | PG_PVLIST)) == 0) {
4144 if ((old_pg = PHYS_TO_VM_PAGE(pmap_pte2pa(opte))) != NULL) { 4144 if ((old_pg = PHYS_TO_VM_PAGE(pmap_pte2pa(opte))) != NULL) {
4145 KASSERT(uvm_page_locked_p(old_pg)); 4145 KASSERT(uvm_page_locked_p(old_pg));
4146 old_pp = VM_PAGE_TO_PP(old_pg); 4146 old_pp = VM_PAGE_TO_PP(old_pg);
4147 } else if ((old_pp = pmap_pv_tracked(pmap_pte2pa(opte))) 4147 } else if ((old_pp = pmap_pv_tracked(pmap_pte2pa(opte)))
4148 == NULL) { 4148 == NULL) {
4149 pa = pmap_pte2pa(opte); 4149 pa = pmap_pte2pa(opte);
4150 panic("pmap_enter: PG_PVLIST with pv-untracked page" 4150 panic("pmap_enter: PG_PVLIST with pv-untracked page"
4151 " va = 0x%"PRIxVADDR 4151 " va = 0x%"PRIxVADDR
4152 " pa = 0x%" PRIxPADDR " (0x%" PRIxPADDR ")", 4152 " pa = 0x%" PRIxPADDR " (0x%" PRIxPADDR ")",
4153 va, pa, atop(pa)); 4153 va, pa, atop(pa));
4154 } 4154 }
4155 4155
4156 old_pve = pmap_remove_pv(old_pp, ptp, va); 4156 old_pve = pmap_remove_pv(old_pp, ptp, va);
4157 old_pp->pp_attrs |= opte; 4157 old_pp->pp_attrs |= opte;
4158 } 4158 }
4159 4159
4160 /* 4160 /*
4161 * if new page is pv-tracked, insert pv_entry into its list. 4161 * if new page is pv-tracked, insert pv_entry into its list.
4162 */ 4162 */
4163 4163
4164 if (new_pp) { 4164 if (new_pp) {
4165 new_pve = pmap_enter_pv(new_pp, new_pve, &new_pve2, ptp, va); 4165 new_pve = pmap_enter_pv(new_pp, new_pve, &new_pve2, ptp, va);
4166 } 4166 }
4167 4167
4168same_pa: 4168same_pa:
4169 pmap_unmap_ptes(pmap, pmap2); 4169 pmap_unmap_ptes(pmap, pmap2);
4170 4170
4171 /* 4171 /*
4172 * shootdown tlb if necessary. 4172 * shootdown tlb if necessary.
4173 */ 4173 */
4174 4174
4175 if ((~opte & (PG_V | PG_U)) == 0 && 4175 if ((~opte & (PG_V | PG_U)) == 0 &&
4176 ((opte ^ npte) & (PG_FRAME | PG_RW)) != 0) { 4176 ((opte ^ npte) & (PG_FRAME | PG_RW)) != 0) {
4177 pmap_tlb_shootdown(pmap, va, opte, TLBSHOOT_ENTER); 4177 pmap_tlb_shootdown(pmap, va, opte, TLBSHOOT_ENTER);
4178 } 4178 }
4179 4179
4180 error = 0; 4180 error = 0;
4181out: 4181out:
4182 kpreempt_enable(); 4182 kpreempt_enable();
4183out2: 4183out2:
4184 if (old_pve != NULL) { 4184 if (old_pve != NULL) {
4185 pool_cache_put(&pmap_pv_cache, old_pve); 4185 pool_cache_put(&pmap_pv_cache, old_pve);
4186 } 4186 }
4187 if (new_pve != NULL) { 4187 if (new_pve != NULL) {
4188 pool_cache_put(&pmap_pv_cache, new_pve); 4188 pool_cache_put(&pmap_pv_cache, new_pve);
4189 } 4189 }
4190 if (new_pve2 != NULL) { 4190 if (new_pve2 != NULL) {
4191 pool_cache_put(&pmap_pv_cache, new_pve2); 4191 pool_cache_put(&pmap_pv_cache, new_pve2);
4192 } 4192 }
4193 4193
4194 return error; 4194 return error;
4195} 4195}
4196 4196
4197static bool 4197static bool
4198pmap_get_physpage(vaddr_t va, int level, paddr_t *paddrp) 4198pmap_get_physpage(vaddr_t va, int level, paddr_t *paddrp)
4199{ 4199{
4200 struct vm_page *ptp; 4200 struct vm_page *ptp;
4201 struct pmap *kpm = pmap_kernel(); 4201 struct pmap *kpm = pmap_kernel();
4202 4202
4203 if (!uvm.page_init_done) { 4203 if (!uvm.page_init_done) {
4204 4204
4205 /* 4205 /*
4206 * we're growing the kernel pmap early (from 4206 * we're growing the kernel pmap early (from
4207 * uvm_pageboot_alloc()). this case must be 4207 * uvm_pageboot_alloc()). this case must be
4208 * handled a little differently. 4208 * handled a little differently.
4209 */ 4209 */
4210 4210
4211 if (!uvm_page_physget(paddrp)) 4211 if (!uvm_page_physget(paddrp))
4212 panic("pmap_get_physpage: out of memory"); 4212 panic("pmap_get_physpage: out of memory");
4213#if defined(__HAVE_DIRECT_MAP) 4213#if defined(__HAVE_DIRECT_MAP)
4214 pagezero(PMAP_DIRECT_MAP(*paddrp)); 4214 pagezero(PMAP_DIRECT_MAP(*paddrp));
4215#else 4215#else
4216#if defined(XEN) 4216#if defined(XEN)
4217 if (XEN_VERSION_SUPPORTED(3, 4)) { 4217 if (XEN_VERSION_SUPPORTED(3, 4)) {
4218 xen_pagezero(*paddrp); 4218 xen_pagezero(*paddrp);
4219 return true; 4219 return true;
4220 } 4220 }
4221#endif 4221#endif
4222 kpreempt_disable(); 4222 kpreempt_disable();
4223 pmap_pte_set(early_zero_pte, 4223 pmap_pte_set(early_zero_pte,
4224 pmap_pa2pte(*paddrp) | PG_V | PG_RW | PG_k); 4224 pmap_pa2pte(*paddrp) | PG_V | PG_RW | PG_k);
4225 pmap_pte_flush(); 4225 pmap_pte_flush();
4226 pmap_update_pg((vaddr_t)early_zerop); 4226 pmap_update_pg((vaddr_t)early_zerop);
4227 memset(early_zerop, 0, PAGE_SIZE); 4227 memset(early_zerop, 0, PAGE_SIZE);
4228#if defined(DIAGNOSTIC) || defined(XEN) 4228#if defined(DIAGNOSTIC) || defined(XEN)
4229 pmap_pte_set(early_zero_pte, 0); 4229 pmap_pte_set(early_zero_pte, 0);
4230 pmap_pte_flush(); 4230 pmap_pte_flush();
4231#endif /* defined(DIAGNOSTIC) */ 4231#endif /* defined(DIAGNOSTIC) */
4232 kpreempt_enable(); 4232 kpreempt_enable();
4233#endif /* defined(__HAVE_DIRECT_MAP) */ 4233#endif /* defined(__HAVE_DIRECT_MAP) */
4234 } else { 4234 } else {
4235 /* XXX */ 4235 /* XXX */
4236 ptp = uvm_pagealloc(NULL, 0, NULL, 4236 ptp = uvm_pagealloc(NULL, 0, NULL,
4237 UVM_PGA_USERESERVE|UVM_PGA_ZERO); 4237 UVM_PGA_USERESERVE|UVM_PGA_ZERO);
4238 if (ptp == NULL) 4238 if (ptp == NULL)
4239 panic("pmap_get_physpage: out of memory"); 4239 panic("pmap_get_physpage: out of memory");
4240 ptp->flags &= ~PG_BUSY; 4240 ptp->flags &= ~PG_BUSY;
4241 ptp->wire_count = 1; 4241 ptp->wire_count = 1;
4242 *paddrp = VM_PAGE_TO_PHYS(ptp); 4242 *paddrp = VM_PAGE_TO_PHYS(ptp);
4243 } 4243 }
4244 pmap_stats_update(kpm, 1, 0); 4244 pmap_stats_update(kpm, 1, 0);
4245 return true; 4245 return true;
4246} 4246}
4247 4247
4248/* 4248/*
4249 * Allocate the amount of specified ptps for a ptp level, and populate 4249 * Allocate the amount of specified ptps for a ptp level, and populate
4250 * all levels below accordingly, mapping virtual addresses starting at 4250 * all levels below accordingly, mapping virtual addresses starting at
4251 * kva. 4251 * kva.
4252 * 4252 *
4253 * Used by pmap_growkernel. 4253 * Used by pmap_growkernel.
4254 */ 4254 */
4255static void 4255static void
4256pmap_alloc_level(pd_entry_t * const *pdes, vaddr_t kva, int lvl, 4256pmap_alloc_level(pd_entry_t * const *pdes, vaddr_t kva, int lvl,
4257 long *needed_ptps) 4257 long *needed_ptps)
4258{ 4258{
4259 unsigned long i; 4259 unsigned long i;
4260 vaddr_t va; 4260 vaddr_t va;
4261 paddr_t pa; 4261 paddr_t pa;
4262 unsigned long index, endindex; 4262 unsigned long index, endindex;
4263 int level; 4263 int level;
4264 pd_entry_t *pdep; 4264 pd_entry_t *pdep;
4265#ifdef XEN 4265#ifdef XEN
4266 int s = splvm(); /* protect xpq_* */ 4266 int s = splvm(); /* protect xpq_* */
4267#endif 4267#endif
4268 4268
4269 for (level = lvl; level > 1; level--) { 4269 for (level = lvl; level > 1; level--) {
4270 if (level == PTP_LEVELS) 4270 if (level == PTP_LEVELS)
4271 pdep = pmap_kernel()->pm_pdir; 4271 pdep = pmap_kernel()->pm_pdir;
4272 else 4272 else
4273 pdep = pdes[level - 2]; 4273 pdep = pdes[level - 2];
4274 va = kva; 4274 va = kva;
4275 index = pl_i_roundup(kva, level); 4275 index = pl_i_roundup(kva, level);
4276 endindex = index + needed_ptps[level - 1] - 1; 4276 endindex = index + needed_ptps[level - 1] - 1;
4277 4277
4278 4278
4279 for (i = index; i <= endindex; i++) { 4279 for (i = index; i <= endindex; i++) {
4280 pt_entry_t pte; 4280 pt_entry_t pte;
4281 4281
4282 KASSERT(!pmap_valid_entry(pdep[i])); 4282 KASSERT(!pmap_valid_entry(pdep[i]));
4283 pmap_get_physpage(va, level - 1, &pa); 4283 pmap_get_physpage(va, level - 1, &pa);
4284 pte = pmap_pa2pte(pa) | PG_k | PG_V | PG_RW; 4284 pte = pmap_pa2pte(pa) | PG_k | PG_V | PG_RW;
4285#ifdef XEN 4285#ifdef XEN
4286 pmap_pte_set(&pdep[i], pte); 4286 pmap_pte_set(&pdep[i], pte);
4287#if defined(PAE) || defined(__x86_64__) 4287#if defined(PAE) || defined(__x86_64__)
4288 if (level == PTP_LEVELS && i >= PDIR_SLOT_KERN) { 4288 if (level == PTP_LEVELS && i >= PDIR_SLOT_KERN) {
4289 if (__predict_true( 4289 if (__predict_true(
4290 cpu_info_primary.ci_flags & CPUF_PRESENT)) { 4290 cpu_info_primary.ci_flags & CPUF_PRESENT)) {
4291 /* update per-cpu PMDs on all cpus */ 4291 /* update per-cpu PMDs on all cpus */
4292 xen_kpm_sync(pmap_kernel(), i); 4292 xen_kpm_sync(pmap_kernel(), i);
4293 } else { 4293 } else {
4294 /* 4294 /*
4295 * too early; update primary CPU 4295 * too early; update primary CPU
4296 * PMD only (without locks) 4296 * PMD only (without locks)
4297 */ 4297 */
4298#ifdef PAE 4298#ifdef PAE
4299 pd_entry_t *cpu_pdep = 4299 pd_entry_t *cpu_pdep =
4300 &cpu_info_primary.ci_kpm_pdir[l2tol2(i)]; 4300 &cpu_info_primary.ci_kpm_pdir[l2tol2(i)];
4301#endif 4301#endif
4302#ifdef __x86_64__ 4302#ifdef __x86_64__
4303 pd_entry_t *cpu_pdep = 4303 pd_entry_t *cpu_pdep =
4304 &cpu_info_primary.ci_kpm_pdir[i]; 4304 &cpu_info_primary.ci_kpm_pdir[i];
4305#endif 4305#endif
4306 pmap_pte_set(cpu_pdep, pte); 4306 pmap_pte_set(cpu_pdep, pte);
4307 } 4307 }
4308 } 4308 }
4309#endif /* PAE || __x86_64__ */ 4309#endif /* PAE || __x86_64__ */
4310#else /* XEN */ 4310#else /* XEN */
4311 pdep[i] = pte; 4311 pdep[i] = pte;
4312#endif /* XEN */ 4312#endif /* XEN */
4313 KASSERT(level != PTP_LEVELS || nkptp[level - 1] + 4313 KASSERT(level != PTP_LEVELS || nkptp[level - 1] +
4314 pl_i(VM_MIN_KERNEL_ADDRESS, level) == i); 4314 pl_i(VM_MIN_KERNEL_ADDRESS, level) == i);
4315 nkptp[level - 1]++; 4315 nkptp[level - 1]++;
4316 va += nbpd[level - 1]; 4316 va += nbpd[level - 1];
4317 } 4317 }
4318 pmap_pte_flush(); 4318 pmap_pte_flush();
4319 } 4319 }
4320#ifdef XEN 4320#ifdef XEN
4321 splx(s); 4321 splx(s);
4322#endif 4322#endif
4323} 4323}
4324 4324
4325/* 4325/*
4326 * pmap_growkernel: increase usage of KVM space 4326 * pmap_growkernel: increase usage of KVM space
4327 * 4327 *
4328 * => we allocate new PTPs for the kernel and install them in all 4328 * => we allocate new PTPs for the kernel and install them in all
4329 * the pmaps on the system. 4329 * the pmaps on the system.
4330 */ 4330 */
4331 4331
4332vaddr_t 4332vaddr_t
4333pmap_growkernel(vaddr_t maxkvaddr) 4333pmap_growkernel(vaddr_t maxkvaddr)
4334{ 4334{
4335 struct pmap *kpm = pmap_kernel(); 4335 struct pmap *kpm = pmap_kernel();
4336#if !defined(XEN) || !defined(__x86_64__) 4336#if !defined(XEN) || !defined(__x86_64__)
4337 struct pmap *pm; 4337 struct pmap *pm;
4338 long old; 4338 long old;
4339#endif 4339#endif
4340 int s, i; 4340 int s, i;
4341 long needed_kptp[PTP_LEVELS], target_nptp; 4341 long needed_kptp[PTP_LEVELS], target_nptp;
4342 bool invalidate = false; 4342 bool invalidate = false;
4343 4343
4344 s = splvm(); /* to be safe */ 4344 s = splvm(); /* to be safe */
4345 mutex_enter(kpm->pm_lock); 4345 mutex_enter(kpm->pm_lock);
4346 4346
4347 if (maxkvaddr <= pmap_maxkvaddr) { 4347 if (maxkvaddr <= pmap_maxkvaddr) {
4348 mutex_exit(kpm->pm_lock); 4348 mutex_exit(kpm->pm_lock);
4349 splx(s); 4349 splx(s);
4350 return pmap_maxkvaddr; 4350 return pmap_maxkvaddr;
4351 } 4351 }
4352 4352
4353 maxkvaddr = x86_round_pdr(maxkvaddr); 4353 maxkvaddr = x86_round_pdr(maxkvaddr);
4354#if !defined(XEN) || !defined(__x86_64__) 4354#if !defined(XEN) || !defined(__x86_64__)
4355 old = nkptp[PTP_LEVELS - 1]; 4355 old = nkptp[PTP_LEVELS - 1];
4356#endif 4356#endif
4357 4357
4358 /* 4358 /*
4359 * This loop could be optimized more, but pmap_growkernel() 4359 * This loop could be optimized more, but pmap_growkernel()
4360 * is called infrequently. 4360 * is called infrequently.
4361 */ 4361 */
4362 for (i = PTP_LEVELS - 1; i >= 1; i--) { 4362 for (i = PTP_LEVELS - 1; i >= 1; i--) {
4363 target_nptp = pl_i_roundup(maxkvaddr, i + 1) - 4363 target_nptp = pl_i_roundup(maxkvaddr, i + 1) -
4364 pl_i_roundup(VM_MIN_KERNEL_ADDRESS, i + 1); 4364 pl_i_roundup(VM_MIN_KERNEL_ADDRESS, i + 1);
4365 /* 4365 /*
4366 * XXX only need to check toplevel. 4366 * XXX only need to check toplevel.
4367 */ 4367 */
4368 if (target_nptp > nkptpmax[i]) 4368 if (target_nptp > nkptpmax[i])
4369 panic("out of KVA space"); 4369 panic("out of KVA space");
4370 KASSERT(target_nptp >= nkptp[i]); 4370 KASSERT(target_nptp >= nkptp[i]);
4371 needed_kptp[i] = target_nptp - nkptp[i]; 4371 needed_kptp[i] = target_nptp - nkptp[i];
4372 } 4372 }
4373 4373
4374 pmap_alloc_level(normal_pdes, pmap_maxkvaddr, PTP_LEVELS, needed_kptp); 4374 pmap_alloc_level(normal_pdes, pmap_maxkvaddr, PTP_LEVELS, needed_kptp);
4375 4375
4376 /* 4376 /*
4377 * If the number of top level entries changed, update all 4377 * If the number of top level entries changed, update all
4378 * pmaps. 4378 * pmaps.
4379 */ 4379 */
4380 if (needed_kptp[PTP_LEVELS - 1] != 0) { 4380 if (needed_kptp[PTP_LEVELS - 1] != 0) {
4381#ifdef XEN 4381#ifdef XEN
4382#ifdef __x86_64__ 4382#ifdef __x86_64__
4383 /* nothing, kernel entries are never entered in user pmap */ 4383 /* nothing, kernel entries are never entered in user pmap */
4384#else /* __x86_64__ */ 4384#else /* __x86_64__ */
4385 mutex_enter(&pmaps_lock); 4385 mutex_enter(&pmaps_lock);
4386 LIST_FOREACH(pm, &pmaps, pm_list) { 4386 LIST_FOREACH(pm, &pmaps, pm_list) {
4387 int pdkidx; 4387 int pdkidx;
4388 for (pdkidx = PDIR_SLOT_KERN + old; 4388 for (pdkidx = PDIR_SLOT_KERN + old;
4389 pdkidx < PDIR_SLOT_KERN + nkptp[PTP_LEVELS - 1]; 4389 pdkidx < PDIR_SLOT_KERN + nkptp[PTP_LEVELS - 1];
4390 pdkidx++) { 4390 pdkidx++) {
4391 pmap_pte_set(&pm->pm_pdir[pdkidx], 4391 pmap_pte_set(&pm->pm_pdir[pdkidx],
4392 kpm->pm_pdir[pdkidx]); 4392 kpm->pm_pdir[pdkidx]);
4393 } 4393 }
4394 pmap_pte_flush(); 4394 pmap_pte_flush();
4395 } 4395 }
4396 mutex_exit(&pmaps_lock); 4396 mutex_exit(&pmaps_lock);
4397#endif /* __x86_64__ */ 4397#endif /* __x86_64__ */
4398#else /* XEN */ 4398#else /* XEN */
4399 unsigned newpdes; 4399 unsigned newpdes;
4400 newpdes = nkptp[PTP_LEVELS - 1] - old; 4400 newpdes = nkptp[PTP_LEVELS - 1] - old;
4401 mutex_enter(&pmaps_lock); 4401 mutex_enter(&pmaps_lock);
4402 LIST_FOREACH(pm, &pmaps, pm_list) { 4402 LIST_FOREACH(pm, &pmaps, pm_list) {
4403 memcpy(&pm->pm_pdir[PDIR_SLOT_KERN + old], 4403 memcpy(&pm->pm_pdir[PDIR_SLOT_KERN + old],
4404 &kpm->pm_pdir[PDIR_SLOT_KERN + old], 4404 &kpm->pm_pdir[PDIR_SLOT_KERN + old],
4405 newpdes * sizeof (pd_entry_t)); 4405 newpdes * sizeof (pd_entry_t));
4406 } 4406 }
4407 mutex_exit(&pmaps_lock); 4407 mutex_exit(&pmaps_lock);
4408#endif 4408#endif
4409 invalidate = true; 4409 invalidate = true;
4410 } 4410 }
4411 pmap_maxkvaddr = maxkvaddr; 4411 pmap_maxkvaddr = maxkvaddr;
4412 mutex_exit(kpm->pm_lock); 4412 mutex_exit(kpm->pm_lock);
4413 splx(s); 4413 splx(s);
4414 4414
4415 if (invalidate && pmap_initialized) { 4415 if (invalidate && pmap_initialized) {
4416 /* Invalidate the PDP cache. */ 4416 /* Invalidate the PDP cache. */
4417 pool_cache_invalidate(&pmap_pdp_cache); 4417 pool_cache_invalidate(&pmap_pdp_cache);
4418 } 4418 }
4419 4419
4420 return maxkvaddr; 4420 return maxkvaddr;
4421} 4421}
4422 4422
4423#ifdef DEBUG 4423#ifdef DEBUG
4424void pmap_dump(struct pmap *, vaddr_t, vaddr_t); 4424void pmap_dump(struct pmap *, vaddr_t, vaddr_t);
4425 4425
4426/* 4426/*
4427 * pmap_dump: dump all the mappings from a pmap 4427 * pmap_dump: dump all the mappings from a pmap
4428 * 4428 *
4429 * => caller should not be holding any pmap locks 4429 * => caller should not be holding any pmap locks
4430 */ 4430 */
4431 4431
4432void 4432void
4433pmap_dump(struct pmap *pmap, vaddr_t sva, vaddr_t eva) 4433pmap_dump(struct pmap *pmap, vaddr_t sva, vaddr_t eva)
4434{ 4434{
4435 pt_entry_t *ptes, *pte; 4435 pt_entry_t *ptes, *pte;
4436 pd_entry_t * const *pdes; 4436 pd_entry_t * const *pdes;
4437 struct pmap *pmap2; 4437 struct pmap *pmap2;
4438 vaddr_t blkendva; 4438 vaddr_t blkendva;
4439 4439
4440 /* 4440 /*
4441 * if end is out of range truncate. 4441 * if end is out of range truncate.
4442 * if (end == start) update to max. 4442 * if (end == start) update to max.
4443 */ 4443 */
4444 4444
4445 if (eva > VM_MAXUSER_ADDRESS || eva <= sva) 4445 if (eva > VM_MAXUSER_ADDRESS || eva <= sva)
4446 eva = VM_MAXUSER_ADDRESS; 4446 eva = VM_MAXUSER_ADDRESS;
4447 4447
4448 /* 4448 /*
4449 * we lock in the pmap => pv_head direction 4449 * we lock in the pmap => pv_head direction
4450 */ 4450 */
4451 4451
4452 kpreempt_disable(); 4452 kpreempt_disable();
4453 pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); /* locks pmap */ 4453 pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); /* locks pmap */
4454 4454
4455 /* 4455 /*
4456 * dumping a range of pages: we dump in PTP sized blocks (4MB) 4456 * dumping a range of pages: we dump in PTP sized blocks (4MB)
4457 */ 4457 */
4458 4458
4459 for (/* null */ ; sva < eva ; sva = blkendva) { 4459 for (/* null */ ; sva < eva ; sva = blkendva) {
4460 4460
4461 /* determine range of block */ 4461 /* determine range of block */
4462 blkendva = x86_round_pdr(sva+1); 4462 blkendva = x86_round_pdr(sva+1);
4463 if (blkendva > eva) 4463 if (blkendva > eva)
4464 blkendva = eva; 4464 blkendva = eva;
4465 4465
4466 /* valid block? */ 4466 /* valid block? */
4467 if (!pmap_pdes_valid(sva, pdes, NULL)) 4467 if (!pmap_pdes_valid(sva, pdes, NULL))
4468 continue; 4468 continue;
4469 4469
4470 pte = &ptes[pl1_i(sva)]; 4470 pte = &ptes[pl1_i(sva)];
4471 for (/* null */; sva < blkendva ; sva += PAGE_SIZE, pte++) { 4471 for (/* null */; sva < blkendva ; sva += PAGE_SIZE, pte++) {
4472 if (!pmap_valid_entry(*pte)) 4472 if (!pmap_valid_entry(*pte))
4473 continue; 4473 continue;
4474 printf("va %#" PRIxVADDR " -> pa %#" PRIxPADDR 4474 printf("va %#" PRIxVADDR " -> pa %#" PRIxPADDR
4475 " (pte=%#" PRIxPADDR ")\n", 4475 " (pte=%#" PRIxPADDR ")\n",
4476 sva, (paddr_t)pmap_pte2pa(*pte), (paddr_t)*pte); 4476 sva, (paddr_t)pmap_pte2pa(*pte), (paddr_t)*pte);
4477 } 4477 }
4478 } 4478 }
4479 pmap_unmap_ptes(pmap, pmap2); 4479 pmap_unmap_ptes(pmap, pmap2);
4480 kpreempt_enable(); 4480 kpreempt_enable();
4481} 4481}
4482#endif 4482#endif
4483 4483
4484/* 4484/*
4485 * pmap_update: process deferred invalidations and frees. 4485 * pmap_update: process deferred invalidations and frees.
4486 */ 4486 */
4487 4487
4488void 4488void
4489pmap_update(struct pmap *pmap) 4489pmap_update(struct pmap *pmap)
4490{ 4490{
4491 struct vm_page *empty_ptps; 4491 struct vm_page *empty_ptps;
4492 lwp_t *l = curlwp; 4492 lwp_t *l = curlwp;
4493 4493
4494 /* 4494 /*
4495 * If we have torn down this pmap, invalidate non-global TLB 4495 * If we have torn down this pmap, invalidate non-global TLB
4496 * entries on any processors using it. 4496 * entries on any processors using it.
4497 */ 4497 */
4498 kpreempt_disable(); 4498 kpreempt_disable();
4499 if (__predict_false(l->l_md.md_gc_pmap == pmap)) { 4499 if (__predict_false(l->l_md.md_gc_pmap == pmap)) {
4500 l->l_md.md_gc_pmap = NULL; 4500 l->l_md.md_gc_pmap = NULL;
4501 pmap_tlb_shootdown(pmap, (vaddr_t)-1LL, 0, TLBSHOOT_UPDATE); 4501 pmap_tlb_shootdown(pmap, (vaddr_t)-1LL, 0, TLBSHOOT_UPDATE);
4502 } 4502 }
4503 /* 4503 /*
4504 * Initiate any pending TLB shootdowns. Wait for them to 4504 * Initiate any pending TLB shootdowns. Wait for them to
4505 * complete before returning control to the caller. 4505 * complete before returning control to the caller.
4506 */ 4506 */
4507 pmap_tlb_shootnow(); 4507 pmap_tlb_shootnow();
4508 kpreempt_enable(); 4508 kpreempt_enable();
4509 4509
4510 /* 4510 /*
4511 * Now that shootdowns are complete, process deferred frees, 4511 * Now that shootdowns are complete, process deferred frees,
4512 * but not from interrupt context. 4512 * but not from interrupt context.
4513 */ 4513 */
4514 if (l->l_md.md_gc_ptp != NULL) { 4514 if (l->l_md.md_gc_ptp != NULL) {
4515 KASSERT((l->l_pflag & LP_INTR) == 0); 4515 KASSERT((l->l_pflag & LP_INTR) == 0);
4516 if (cpu_intr_p()) { 4516 if (cpu_intr_p()) {
4517 return; 4517 return;
4518 } 4518 }
4519 empty_ptps = l->l_md.md_gc_ptp; 4519 empty_ptps = l->l_md.md_gc_ptp;
4520 l->l_md.md_gc_ptp = NULL; 4520 l->l_md.md_gc_ptp = NULL;
4521 pmap_free_ptps(empty_ptps); 4521 pmap_free_ptps(empty_ptps);
4522 } 4522 }
4523} 4523}
4524 4524
4525#if PTP_LEVELS > 4 4525#if PTP_LEVELS > 4
4526#error "Unsupported number of page table mappings" 4526#error "Unsupported number of page table mappings"
4527#endif 4527#endif
4528 4528
4529paddr_t 4529paddr_t
4530pmap_init_tmp_pgtbl(paddr_t pg) 4530pmap_init_tmp_pgtbl(paddr_t pg)
4531{ 4531{
4532 static bool maps_loaded; 4532 static bool maps_loaded;
4533 static const paddr_t x86_tmp_pml_paddr[] = { 4533 static const paddr_t x86_tmp_pml_paddr[] = {
4534 4 * PAGE_SIZE, 4534 4 * PAGE_SIZE, /* L1 */
4535 5 * PAGE_SIZE, 4535 5 * PAGE_SIZE, /* L2 */
4536 6 * PAGE_SIZE, 4536 6 * PAGE_SIZE, /* L3 */
4537 7 * PAGE_SIZE 4537 7 * PAGE_SIZE /* L4 */
4538 }; 4538 };
4539 static vaddr_t x86_tmp_pml_vaddr[] = { 0, 0, 0, 0 }; 4539 static vaddr_t x86_tmp_pml_vaddr[] = { 0, 0, 0, 0 };
4540 4540
4541 pd_entry_t *tmp_pml, *kernel_pml; 4541 pd_entry_t *tmp_pml, *kernel_pml;
4542  4542
4543 int level; 4543 int level;
4544 4544
4545 if (!maps_loaded) { 4545 if (!maps_loaded) {
4546 for (level = 0; level < PTP_LEVELS; ++level) { 4546 for (level = 0; level < PTP_LEVELS; ++level) {
4547 x86_tmp_pml_vaddr[level] = 4547 x86_tmp_pml_vaddr[level] =
4548 uvm_km_alloc(kernel_map, PAGE_SIZE, 0, 4548 uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
4549 UVM_KMF_VAONLY); 4549 UVM_KMF_VAONLY);
4550 4550
4551 if (x86_tmp_pml_vaddr[level] == 0) 4551 if (x86_tmp_pml_vaddr[level] == 0)
4552 panic("mapping of real mode PML failed\n"); 4552 panic("mapping of real mode PML failed\n");
4553 pmap_kenter_pa(x86_tmp_pml_vaddr[level], 4553 pmap_kenter_pa(x86_tmp_pml_vaddr[level],
4554 x86_tmp_pml_paddr[level], 4554 x86_tmp_pml_paddr[level],
4555 VM_PROT_READ | VM_PROT_WRITE, 0); 4555 VM_PROT_READ | VM_PROT_WRITE, 0);
4556 pmap_update(pmap_kernel()); 4556 pmap_update(pmap_kernel());
4557 } 4557 }
4558 maps_loaded = true; 4558 maps_loaded = true;
4559 } 4559 }
4560 4560
4561 /* Zero levels 1-3 */ 4561 /* Zero levels 1-3 */
4562 for (level = 0; level < PTP_LEVELS - 1; ++level) { 4562 for (level = 0; level < PTP_LEVELS - 1; ++level) {
4563 tmp_pml = (void *)x86_tmp_pml_vaddr[level]; 4563 tmp_pml = (void *)x86_tmp_pml_vaddr[level];
4564 memset(tmp_pml, 0, PAGE_SIZE); 4564 memset(tmp_pml, 0, PAGE_SIZE);
4565 } 4565 }
4566 4566
4567 /* Copy PML4 */ 4567 /* Copy PML4 */
4568 kernel_pml = pmap_kernel()->pm_pdir; 4568 kernel_pml = pmap_kernel()->pm_pdir;
4569 tmp_pml = (void *)x86_tmp_pml_vaddr[PTP_LEVELS - 1]; 4569 tmp_pml = (void *)x86_tmp_pml_vaddr[PTP_LEVELS - 1];
4570 memcpy(tmp_pml, kernel_pml, PAGE_SIZE); 4570 memcpy(tmp_pml, kernel_pml, PAGE_SIZE);
4571 4571
4572#ifdef PAE 4572#ifdef PAE
4573 /* 4573 /*
4574 * Use the last 4 entries of the L2 page as L3 PD entries. These 4574 * Use the last 4 entries of the L2 page as L3 PD entries. These
4575 * last entries are unlikely to be used for temporary mappings. 4575 * last entries are unlikely to be used for temporary mappings.
4576 * 508: maps 0->1GB (userland) 4576 * 508: maps 0->1GB (userland)
4577 * 509: unused 4577 * 509: unused
4578 * 510: unused 4578 * 510: unused
4579 * 511: maps 3->4GB (kernel) 4579 * 511: maps 3->4GB (kernel)
4580 */ 4580 */
4581 tmp_pml[508] = x86_tmp_pml_paddr[PTP_LEVELS - 1] | PG_V; 4581 tmp_pml[508] = x86_tmp_pml_paddr[PTP_LEVELS - 1] | PG_V;
4582 tmp_pml[509] = 0; 4582 tmp_pml[509] = 0;
4583 tmp_pml[510] = 0; 4583 tmp_pml[510] = 0;
4584 tmp_pml[511] = pmap_pdirpa(pmap_kernel(), PDIR_SLOT_KERN) | PG_V; 4584 tmp_pml[511] = pmap_pdirpa(pmap_kernel(), PDIR_SLOT_KERN) | PG_V;
4585#endif 4585#endif
4586 4586
4587 for (level = PTP_LEVELS - 1; level > 0; --level) { 4587 for (level = PTP_LEVELS - 1; level > 0; --level) {
4588 tmp_pml = (void *)x86_tmp_pml_vaddr[level]; 4588 tmp_pml = (void *)x86_tmp_pml_vaddr[level];
4589 4589
4590 tmp_pml[pl_i(pg, level + 1)] = 4590 tmp_pml[pl_i(pg, level + 1)] =
4591 (x86_tmp_pml_paddr[level - 1] & PG_FRAME) | PG_RW | PG_V; 4591 (x86_tmp_pml_paddr[level - 1] & PG_FRAME) | PG_RW | PG_V;
4592 } 4592 }
4593 4593
4594 tmp_pml = (void *)x86_tmp_pml_vaddr[0]; 4594 tmp_pml = (void *)x86_tmp_pml_vaddr[0];
4595 tmp_pml[pl_i(pg, 1)] = (pg & PG_FRAME) | PG_RW | PG_V; 4595 tmp_pml[pl_i(pg, 1)] = (pg & PG_FRAME) | PG_RW | PG_V;
4596 4596
4597#ifdef PAE 4597#ifdef PAE
4598 /* Return the PA of the L3 page (entry 508 of the L2 page) */ 4598 /* Return the PA of the L3 page (entry 508 of the L2 page) */
4599 return x86_tmp_pml_paddr[PTP_LEVELS - 1] + 508 * sizeof(pd_entry_t); 4599 return x86_tmp_pml_paddr[PTP_LEVELS - 1] + 508 * sizeof(pd_entry_t);
4600#endif 4600#endif
4601 4601
4602 return x86_tmp_pml_paddr[PTP_LEVELS - 1]; 4602 return x86_tmp_pml_paddr[PTP_LEVELS - 1];
4603} 4603}
4604 4604
4605u_int 4605u_int
4606x86_mmap_flags(paddr_t mdpgno) 4606x86_mmap_flags(paddr_t mdpgno)
4607{ 4607{
4608 u_int nflag = (mdpgno >> X86_MMAP_FLAG_SHIFT) & X86_MMAP_FLAG_MASK; 4608 u_int nflag = (mdpgno >> X86_MMAP_FLAG_SHIFT) & X86_MMAP_FLAG_MASK;
4609 u_int pflag = 0; 4609 u_int pflag = 0;
4610 4610
4611 if (nflag & X86_MMAP_FLAG_PREFETCH) 4611 if (nflag & X86_MMAP_FLAG_PREFETCH)
4612 pflag |= PMAP_WRITE_COMBINE; 4612 pflag |= PMAP_WRITE_COMBINE;
4613 4613
4614 return pflag; 4614 return pflag;
4615} 4615}