Explicitly mention MP_TRAMPOLINE in these comments, so that NXR links them.diff -r1.216 -r1.217 src/sys/arch/amd64/amd64/machdep.c
(maxv)
--- src/sys/arch/amd64/amd64/machdep.c 2016/05/12 06:45:16 1.216
+++ src/sys/arch/amd64/amd64/machdep.c 2016/05/15 10:35:54 1.217
@@ -1,2218 +1,2218 @@ | @@ -1,2218 +1,2218 @@ | |||
1 | /* $NetBSD: machdep.c,v 1.216 2016/05/12 06:45:16 maxv Exp $ */ | 1 | /* $NetBSD: machdep.c,v 1.217 2016/05/15 10:35:54 maxv Exp $ */ | |
2 | 2 | |||
3 | /*- | 3 | /*- | |
4 | * Copyright (c) 1996, 1997, 1998, 2000, 2006, 2007, 2008, 2011 | 4 | * Copyright (c) 1996, 1997, 1998, 2000, 2006, 2007, 2008, 2011 | |
5 | * The NetBSD Foundation, Inc. | 5 | * The NetBSD Foundation, Inc. | |
6 | * All rights reserved. | 6 | * All rights reserved. | |
7 | * | 7 | * | |
8 | * This code is derived from software contributed to The NetBSD Foundation | 8 | * This code is derived from software contributed to The NetBSD Foundation | |
9 | * by Charles M. Hannum and by Jason R. Thorpe of the Numerical Aerospace | 9 | * by Charles M. Hannum and by Jason R. Thorpe of the Numerical Aerospace | |
10 | * Simulation Facility, NASA Ames Research Center. | 10 | * Simulation Facility, NASA Ames Research Center. | |
11 | * | 11 | * | |
12 | * This code is derived from software contributed to The NetBSD Foundation | 12 | * This code is derived from software contributed to The NetBSD Foundation | |
13 | * by Coyote Point Systems, Inc. which was written under contract to Coyote | 13 | * by Coyote Point Systems, Inc. which was written under contract to Coyote | |
14 | * Point by Jed Davis and Devon O'Dell. | 14 | * Point by Jed Davis and Devon O'Dell. | |
15 | * | 15 | * | |
16 | * Redistribution and use in source and binary forms, with or without | 16 | * Redistribution and use in source and binary forms, with or without | |
17 | * modification, are permitted provided that the following conditions | 17 | * modification, are permitted provided that the following conditions | |
18 | * are met: | 18 | * are met: | |
19 | * 1. Redistributions of source code must retain the above copyright | 19 | * 1. Redistributions of source code must retain the above copyright | |
20 | * notice, this list of conditions and the following disclaimer. | 20 | * notice, this list of conditions and the following disclaimer. | |
21 | * 2. Redistributions in binary form must reproduce the above copyright | 21 | * 2. Redistributions in binary form must reproduce the above copyright | |
22 | * notice, this list of conditions and the following disclaimer in the | 22 | * notice, this list of conditions and the following disclaimer in the | |
23 | * documentation and/or other materials provided with the distribution. | 23 | * documentation and/or other materials provided with the distribution. | |
24 | * | 24 | * | |
25 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | 25 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | |
26 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | 26 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | |
27 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | 27 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
28 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | 28 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | |
29 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | 29 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
30 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | 30 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
31 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | 31 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
32 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | 32 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
33 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | 33 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
34 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | 34 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
35 | * POSSIBILITY OF SUCH DAMAGE. | 35 | * POSSIBILITY OF SUCH DAMAGE. | |
36 | */ | 36 | */ | |
37 | 37 | |||
38 | /* | 38 | /* | |
39 | * Copyright (c) 2006 Mathieu Ropert <mro@adviseo.fr> | 39 | * Copyright (c) 2006 Mathieu Ropert <mro@adviseo.fr> | |
40 | * | 40 | * | |
41 | * Permission to use, copy, modify, and distribute this software for any | 41 | * Permission to use, copy, modify, and distribute this software for any | |
42 | * purpose with or without fee is hereby granted, provided that the above | 42 | * purpose with or without fee is hereby granted, provided that the above | |
43 | * copyright notice and this permission notice appear in all copies. | 43 | * copyright notice and this permission notice appear in all copies. | |
44 | * | 44 | * | |
45 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | 45 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |
46 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | 46 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |
47 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | 47 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |
48 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | 48 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |
49 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | 49 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |
50 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | 50 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |
51 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | 51 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |
52 | */ | 52 | */ | |
53 | 53 | |||
54 | /* | 54 | /* | |
55 | * Copyright (c) 2007 Manuel Bouyer. | 55 | * Copyright (c) 2007 Manuel Bouyer. | |
56 | * | 56 | * | |
57 | * Redistribution and use in source and binary forms, with or without | 57 | * Redistribution and use in source and binary forms, with or without | |
58 | * modification, are permitted provided that the following conditions | 58 | * modification, are permitted provided that the following conditions | |
59 | * are met: | 59 | * are met: | |
60 | * 1. Redistributions of source code must retain the above copyright | 60 | * 1. Redistributions of source code must retain the above copyright | |
61 | * notice, this list of conditions and the following disclaimer. | 61 | * notice, this list of conditions and the following disclaimer. | |
62 | * 2. Redistributions in binary form must reproduce the above copyright | 62 | * 2. Redistributions in binary form must reproduce the above copyright | |
63 | * notice, this list of conditions and the following disclaimer in the | 63 | * notice, this list of conditions and the following disclaimer in the | |
64 | * documentation and/or other materials provided with the distribution. | 64 | * documentation and/or other materials provided with the distribution. | |
65 | * | 65 | * | |
66 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR | 66 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR | |
67 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | 67 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | |
68 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. | 68 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. | |
69 | * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, | 69 | * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, | |
70 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | 70 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
71 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 71 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
72 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 72 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
73 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 73 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
74 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF | 74 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF | |
75 | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 75 | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
76 | * | 76 | * | |
77 | */ | 77 | */ | |
78 | 78 | |||
79 | /*- | 79 | /*- | |
80 | * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. | 80 | * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. | |
81 | * All rights reserved. | 81 | * All rights reserved. | |
82 | * | 82 | * | |
83 | * This code is derived from software contributed to Berkeley by | 83 | * This code is derived from software contributed to Berkeley by | |
84 | * William Jolitz. | 84 | * William Jolitz. | |
85 | * | 85 | * | |
86 | * Redistribution and use in source and binary forms, with or without | 86 | * Redistribution and use in source and binary forms, with or without | |
87 | * modification, are permitted provided that the following conditions | 87 | * modification, are permitted provided that the following conditions | |
88 | * are met: | 88 | * are met: | |
89 | * 1. Redistributions of source code must retain the above copyright | 89 | * 1. Redistributions of source code must retain the above copyright | |
90 | * notice, this list of conditions and the following disclaimer. | 90 | * notice, this list of conditions and the following disclaimer. | |
91 | * 2. Redistributions in binary form must reproduce the above copyright | 91 | * 2. Redistributions in binary form must reproduce the above copyright | |
92 | * notice, this list of conditions and the following disclaimer in the | 92 | * notice, this list of conditions and the following disclaimer in the | |
93 | * documentation and/or other materials provided with the distribution. | 93 | * documentation and/or other materials provided with the distribution. | |
94 | * 3. Neither the name of the University nor the names of its contributors | 94 | * 3. Neither the name of the University nor the names of its contributors | |
95 | * may be used to endorse or promote products derived from this software | 95 | * may be used to endorse or promote products derived from this software | |
96 | * without specific prior written permission. | 96 | * without specific prior written permission. | |
97 | * | 97 | * | |
98 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | 98 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
99 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 99 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
100 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 100 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
101 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | 101 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
102 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | 102 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
103 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | 103 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
104 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | 104 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
105 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | 105 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
106 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | 106 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
107 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | 107 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
108 | * SUCH DAMAGE. | 108 | * SUCH DAMAGE. | |
109 | * | 109 | * | |
110 | * @(#)machdep.c 7.4 (Berkeley) 6/3/91 | 110 | * @(#)machdep.c 7.4 (Berkeley) 6/3/91 | |
111 | */ | 111 | */ | |
112 | 112 | |||
113 | #include <sys/cdefs.h> | 113 | #include <sys/cdefs.h> | |
114 | __KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.216 2016/05/12 06:45:16 maxv Exp $"); | 114 | __KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.217 2016/05/15 10:35:54 maxv Exp $"); | |
115 | 115 | |||
116 | /* #define XENDEBUG_LOW */ | 116 | /* #define XENDEBUG_LOW */ | |
117 | 117 | |||
118 | #include "opt_modular.h" | 118 | #include "opt_modular.h" | |
119 | #include "opt_user_ldt.h" | 119 | #include "opt_user_ldt.h" | |
120 | #include "opt_ddb.h" | 120 | #include "opt_ddb.h" | |
121 | #include "opt_kgdb.h" | 121 | #include "opt_kgdb.h" | |
122 | #include "opt_cpureset_delay.h" | 122 | #include "opt_cpureset_delay.h" | |
123 | #include "opt_mtrr.h" | 123 | #include "opt_mtrr.h" | |
124 | #include "opt_realmem.h" | 124 | #include "opt_realmem.h" | |
125 | #include "opt_xen.h" | 125 | #include "opt_xen.h" | |
126 | #ifndef XEN | 126 | #ifndef XEN | |
127 | #include "opt_physmem.h" | 127 | #include "opt_physmem.h" | |
128 | #endif | 128 | #endif | |
129 | #include "isa.h" | 129 | #include "isa.h" | |
130 | #include "pci.h" | 130 | #include "pci.h" | |
131 | 131 | |||
132 | #include <sys/param.h> | 132 | #include <sys/param.h> | |
133 | #include <sys/systm.h> | 133 | #include <sys/systm.h> | |
134 | #include <sys/signal.h> | 134 | #include <sys/signal.h> | |
135 | #include <sys/signalvar.h> | 135 | #include <sys/signalvar.h> | |
136 | #include <sys/kernel.h> | 136 | #include <sys/kernel.h> | |
137 | #include <sys/cpu.h> | 137 | #include <sys/cpu.h> | |
138 | #include <sys/exec.h> | 138 | #include <sys/exec.h> | |
139 | #include <sys/exec_aout.h> /* for MID_* */ | 139 | #include <sys/exec_aout.h> /* for MID_* */ | |
140 | #include <sys/reboot.h> | 140 | #include <sys/reboot.h> | |
141 | #include <sys/conf.h> | 141 | #include <sys/conf.h> | |
142 | #include <sys/mbuf.h> | 142 | #include <sys/mbuf.h> | |
143 | #include <sys/msgbuf.h> | 143 | #include <sys/msgbuf.h> | |
144 | #include <sys/mount.h> | 144 | #include <sys/mount.h> | |
145 | #include <sys/core.h> | 145 | #include <sys/core.h> | |
146 | #include <sys/kcore.h> | 146 | #include <sys/kcore.h> | |
147 | #include <sys/ucontext.h> | 147 | #include <sys/ucontext.h> | |
148 | #include <machine/kcore.h> | 148 | #include <machine/kcore.h> | |
149 | #include <sys/ras.h> | 149 | #include <sys/ras.h> | |
150 | #include <sys/syscallargs.h> | 150 | #include <sys/syscallargs.h> | |
151 | #include <sys/ksyms.h> | 151 | #include <sys/ksyms.h> | |
152 | #include <sys/device.h> | 152 | #include <sys/device.h> | |
153 | #include <sys/lwp.h> | 153 | #include <sys/lwp.h> | |
154 | #include <sys/proc.h> | 154 | #include <sys/proc.h> | |
155 | 155 | |||
156 | #ifdef KGDB | 156 | #ifdef KGDB | |
157 | #include <sys/kgdb.h> | 157 | #include <sys/kgdb.h> | |
158 | #endif | 158 | #endif | |
159 | 159 | |||
160 | #include <dev/cons.h> | 160 | #include <dev/cons.h> | |
161 | #include <dev/mm.h> | 161 | #include <dev/mm.h> | |
162 | 162 | |||
163 | #include <uvm/uvm.h> | 163 | #include <uvm/uvm.h> | |
164 | #include <uvm/uvm_page.h> | 164 | #include <uvm/uvm_page.h> | |
165 | 165 | |||
166 | #include <sys/sysctl.h> | 166 | #include <sys/sysctl.h> | |
167 | 167 | |||
168 | #include <machine/cpu.h> | 168 | #include <machine/cpu.h> | |
169 | #include <machine/cpufunc.h> | 169 | #include <machine/cpufunc.h> | |
170 | #include <machine/gdt.h> | 170 | #include <machine/gdt.h> | |
171 | #include <machine/intr.h> | 171 | #include <machine/intr.h> | |
172 | #include <machine/pio.h> | 172 | #include <machine/pio.h> | |
173 | #include <machine/psl.h> | 173 | #include <machine/psl.h> | |
174 | #include <machine/reg.h> | 174 | #include <machine/reg.h> | |
175 | #include <machine/specialreg.h> | 175 | #include <machine/specialreg.h> | |
176 | #include <machine/bootinfo.h> | 176 | #include <machine/bootinfo.h> | |
177 | #include <x86/fpu.h> | 177 | #include <x86/fpu.h> | |
178 | #include <machine/mtrr.h> | 178 | #include <machine/mtrr.h> | |
179 | #include <machine/mpbiosvar.h> | 179 | #include <machine/mpbiosvar.h> | |
180 | 180 | |||
181 | #include <x86/cputypes.h> | 181 | #include <x86/cputypes.h> | |
182 | #include <x86/cpuvar.h> | 182 | #include <x86/cpuvar.h> | |
183 | #include <x86/machdep.h> | 183 | #include <x86/machdep.h> | |
184 | 184 | |||
185 | #include <x86/x86/tsc.h> | 185 | #include <x86/x86/tsc.h> | |
186 | 186 | |||
187 | #include <dev/isa/isareg.h> | 187 | #include <dev/isa/isareg.h> | |
188 | #include <machine/isa_machdep.h> | 188 | #include <machine/isa_machdep.h> | |
189 | #include <dev/ic/i8042reg.h> | 189 | #include <dev/ic/i8042reg.h> | |
190 | 190 | |||
191 | #ifdef XEN | 191 | #ifdef XEN | |
192 | #include <xen/xen.h> | 192 | #include <xen/xen.h> | |
193 | #include <xen/hypervisor.h> | 193 | #include <xen/hypervisor.h> | |
194 | #include <xen/evtchn.h> | 194 | #include <xen/evtchn.h> | |
195 | #endif | 195 | #endif | |
196 | 196 | |||
197 | #ifdef DDB | 197 | #ifdef DDB | |
198 | #include <machine/db_machdep.h> | 198 | #include <machine/db_machdep.h> | |
199 | #include <ddb/db_extern.h> | 199 | #include <ddb/db_extern.h> | |
200 | #include <ddb/db_output.h> | 200 | #include <ddb/db_output.h> | |
201 | #include <ddb/db_interface.h> | 201 | #include <ddb/db_interface.h> | |
202 | #endif | 202 | #endif | |
203 | 203 | |||
204 | #include "acpica.h" | 204 | #include "acpica.h" | |
205 | 205 | |||
206 | #if NACPICA > 0 | 206 | #if NACPICA > 0 | |
207 | #include <dev/acpi/acpivar.h> | 207 | #include <dev/acpi/acpivar.h> | |
208 | #define ACPI_MACHDEP_PRIVATE | 208 | #define ACPI_MACHDEP_PRIVATE | |
209 | #include <machine/acpi_machdep.h> | 209 | #include <machine/acpi_machdep.h> | |
210 | #endif | 210 | #endif | |
211 | 211 | |||
212 | #include "isa.h" | 212 | #include "isa.h" | |
213 | #include "isadma.h" | 213 | #include "isadma.h" | |
214 | #include "ksyms.h" | 214 | #include "ksyms.h" | |
215 | 215 | |||
216 | /* the following is used externally (sysctl_hw) */ | 216 | /* the following is used externally (sysctl_hw) */ | |
217 | char machine[] = "amd64"; /* CPU "architecture" */ | 217 | char machine[] = "amd64"; /* CPU "architecture" */ | |
218 | char machine_arch[] = "x86_64"; /* machine == machine_arch */ | 218 | char machine_arch[] = "x86_64"; /* machine == machine_arch */ | |
219 | 219 | |||
220 | extern struct bi_devmatch *x86_alldisks; | 220 | extern struct bi_devmatch *x86_alldisks; | |
221 | extern int x86_ndisks; | 221 | extern int x86_ndisks; | |
222 | 222 | |||
223 | #ifdef CPURESET_DELAY | 223 | #ifdef CPURESET_DELAY | |
224 | int cpureset_delay = CPURESET_DELAY; | 224 | int cpureset_delay = CPURESET_DELAY; | |
225 | #else | 225 | #else | |
226 | int cpureset_delay = 2000; /* default to 2s */ | 226 | int cpureset_delay = 2000; /* default to 2s */ | |
227 | #endif | 227 | #endif | |
228 | 228 | |||
229 | int cpu_class = CPUCLASS_686; | 229 | int cpu_class = CPUCLASS_686; | |
230 | 230 | |||
231 | #ifdef MTRR | 231 | #ifdef MTRR | |
232 | struct mtrr_funcs *mtrr_funcs; | 232 | struct mtrr_funcs *mtrr_funcs; | |
233 | #endif | 233 | #endif | |
234 | 234 | |||
235 | uint64_t dumpmem_low; | 235 | uint64_t dumpmem_low; | |
236 | uint64_t dumpmem_high; | 236 | uint64_t dumpmem_high; | |
237 | int cpu_class; | 237 | int cpu_class; | |
238 | int use_pae; | 238 | int use_pae; | |
239 | 239 | |||
240 | #ifndef NO_SPARSE_DUMP | 240 | #ifndef NO_SPARSE_DUMP | |
241 | int sparse_dump = 1; | 241 | int sparse_dump = 1; | |
242 | 242 | |||
243 | paddr_t max_paddr = 0; | 243 | paddr_t max_paddr = 0; | |
244 | unsigned char *sparse_dump_physmap; | 244 | unsigned char *sparse_dump_physmap; | |
245 | #endif | 245 | #endif | |
246 | 246 | |||
247 | char *dump_headerbuf, *dump_headerbuf_ptr; | 247 | char *dump_headerbuf, *dump_headerbuf_ptr; | |
248 | #define dump_headerbuf_size PAGE_SIZE | 248 | #define dump_headerbuf_size PAGE_SIZE | |
249 | #define dump_headerbuf_end (dump_headerbuf + dump_headerbuf_size) | 249 | #define dump_headerbuf_end (dump_headerbuf + dump_headerbuf_size) | |
250 | #define dump_headerbuf_avail (dump_headerbuf_end - dump_headerbuf_ptr) | 250 | #define dump_headerbuf_avail (dump_headerbuf_end - dump_headerbuf_ptr) | |
251 | daddr_t dump_header_blkno; | 251 | daddr_t dump_header_blkno; | |
252 | 252 | |||
253 | size_t dump_nmemsegs; | 253 | size_t dump_nmemsegs; | |
254 | size_t dump_npages; | 254 | size_t dump_npages; | |
255 | size_t dump_header_size; | 255 | size_t dump_header_size; | |
256 | size_t dump_totalbytesleft; | 256 | size_t dump_totalbytesleft; | |
257 | 257 | |||
258 | vaddr_t msgbuf_vaddr; | 258 | vaddr_t msgbuf_vaddr; | |
259 | paddr_t msgbuf_paddr; | 259 | paddr_t msgbuf_paddr; | |
260 | 260 | |||
261 | struct { | 261 | struct { | |
262 | paddr_t paddr; | 262 | paddr_t paddr; | |
263 | psize_t sz; | 263 | psize_t sz; | |
264 | } msgbuf_p_seg[VM_PHYSSEG_MAX]; | 264 | } msgbuf_p_seg[VM_PHYSSEG_MAX]; | |
265 | unsigned int msgbuf_p_cnt = 0; | 265 | unsigned int msgbuf_p_cnt = 0; | |
266 | 266 | |||
267 | vaddr_t idt_vaddr; | 267 | vaddr_t idt_vaddr; | |
268 | paddr_t idt_paddr; | 268 | paddr_t idt_paddr; | |
269 | 269 | |||
270 | vaddr_t lo32_vaddr; | 270 | vaddr_t lo32_vaddr; | |
271 | paddr_t lo32_paddr; | 271 | paddr_t lo32_paddr; | |
272 | 272 | |||
273 | vaddr_t module_start, module_end; | 273 | vaddr_t module_start, module_end; | |
274 | static struct vm_map module_map_store; | 274 | static struct vm_map module_map_store; | |
275 | extern struct vm_map *module_map; | 275 | extern struct vm_map *module_map; | |
276 | vaddr_t kern_end; | 276 | vaddr_t kern_end; | |
277 | 277 | |||
278 | struct vm_map *phys_map = NULL; | 278 | struct vm_map *phys_map = NULL; | |
279 | 279 | |||
280 | extern paddr_t avail_start, avail_end; | 280 | extern paddr_t avail_start, avail_end; | |
281 | #ifdef XEN | 281 | #ifdef XEN | |
282 | extern paddr_t pmap_pa_start, pmap_pa_end; | 282 | extern paddr_t pmap_pa_start, pmap_pa_end; | |
283 | #endif | 283 | #endif | |
284 | 284 | |||
285 | #ifndef XEN | 285 | #ifndef XEN | |
286 | void (*delay_func)(unsigned int) = i8254_delay; | 286 | void (*delay_func)(unsigned int) = i8254_delay; | |
287 | void (*initclock_func)(void) = i8254_initclocks; | 287 | void (*initclock_func)(void) = i8254_initclocks; | |
288 | #else /* XEN */ | 288 | #else /* XEN */ | |
289 | void (*delay_func)(unsigned int) = xen_delay; | 289 | void (*delay_func)(unsigned int) = xen_delay; | |
290 | void (*initclock_func)(void) = xen_initclocks; | 290 | void (*initclock_func)(void) = xen_initclocks; | |
291 | #endif | 291 | #endif | |
292 | 292 | |||
293 | 293 | |||
294 | /* | 294 | /* | |
295 | * Size of memory segments, before any memory is stolen. | 295 | * Size of memory segments, before any memory is stolen. | |
296 | */ | 296 | */ | |
297 | phys_ram_seg_t mem_clusters[VM_PHYSSEG_MAX]; | 297 | phys_ram_seg_t mem_clusters[VM_PHYSSEG_MAX]; | |
298 | int mem_cluster_cnt; | 298 | int mem_cluster_cnt; | |
299 | 299 | |||
300 | char x86_64_doubleflt_stack[4096]; | 300 | char x86_64_doubleflt_stack[4096]; | |
301 | 301 | |||
302 | int cpu_dump(void); | 302 | int cpu_dump(void); | |
303 | int cpu_dumpsize(void); | 303 | int cpu_dumpsize(void); | |
304 | u_long cpu_dump_mempagecnt(void); | 304 | u_long cpu_dump_mempagecnt(void); | |
305 | void dodumpsys(void); | 305 | void dodumpsys(void); | |
306 | void dumpsys(void); | 306 | void dumpsys(void); | |
307 | 307 | |||
308 | extern int time_adjusted; /* XXX no common header */ | 308 | extern int time_adjusted; /* XXX no common header */ | |
309 | 309 | |||
310 | void dump_misc_init(void); | 310 | void dump_misc_init(void); | |
311 | void dump_seg_prep(void); | 311 | void dump_seg_prep(void); | |
312 | int dump_seg_iter(int (*)(paddr_t, paddr_t)); | 312 | int dump_seg_iter(int (*)(paddr_t, paddr_t)); | |
313 | 313 | |||
314 | #ifndef NO_SPARSE_DUMP | 314 | #ifndef NO_SPARSE_DUMP | |
315 | void sparse_dump_reset(void); | 315 | void sparse_dump_reset(void); | |
316 | void sparse_dump_mark(void); | 316 | void sparse_dump_mark(void); | |
317 | void cpu_dump_prep_sparse(void); | 317 | void cpu_dump_prep_sparse(void); | |
318 | #endif | 318 | #endif | |
319 | 319 | |||
320 | void dump_header_start(void); | 320 | void dump_header_start(void); | |
321 | int dump_header_flush(void); | 321 | int dump_header_flush(void); | |
322 | int dump_header_addbytes(const void*, size_t); | 322 | int dump_header_addbytes(const void*, size_t); | |
323 | int dump_header_addseg(paddr_t, paddr_t); | 323 | int dump_header_addseg(paddr_t, paddr_t); | |
324 | int dump_header_finish(void); | 324 | int dump_header_finish(void); | |
325 | 325 | |||
326 | int dump_seg_count_range(paddr_t, paddr_t); | 326 | int dump_seg_count_range(paddr_t, paddr_t); | |
327 | int dumpsys_seg(paddr_t, paddr_t); | 327 | int dumpsys_seg(paddr_t, paddr_t); | |
328 | 328 | |||
329 | void init_x86_64(paddr_t); | 329 | void init_x86_64(paddr_t); | |
330 | 330 | |||
331 | static int valid_user_selector(struct lwp *, uint64_t); | 331 | static int valid_user_selector(struct lwp *, uint64_t); | |
332 | 332 | |||
333 | /* | 333 | /* | |
334 | * Machine-dependent startup code | 334 | * Machine-dependent startup code | |
335 | */ | 335 | */ | |
336 | void | 336 | void | |
337 | cpu_startup(void) | 337 | cpu_startup(void) | |
338 | { | 338 | { | |
339 | int x, y; | 339 | int x, y; | |
340 | vaddr_t minaddr, maxaddr; | 340 | vaddr_t minaddr, maxaddr; | |
341 | psize_t sz; | 341 | psize_t sz; | |
342 | 342 | |||
343 | /* | 343 | /* | |
344 | * For console drivers that require uvm and pmap to be initialized, | 344 | * For console drivers that require uvm and pmap to be initialized, | |
345 | * we'll give them one more chance here... | 345 | * we'll give them one more chance here... | |
346 | */ | 346 | */ | |
347 | consinit(); | 347 | consinit(); | |
348 | 348 | |||
349 | /* | 349 | /* | |
350 | * Initialize error message buffer (et end of core). | 350 | * Initialize error message buffer (et end of core). | |
351 | */ | 351 | */ | |
352 | if (msgbuf_p_cnt == 0) | 352 | if (msgbuf_p_cnt == 0) | |
353 | panic("msgbuf paddr map has not been set up"); | 353 | panic("msgbuf paddr map has not been set up"); | |
354 | for (x = 0, sz = 0; x < msgbuf_p_cnt; sz += msgbuf_p_seg[x++].sz) | 354 | for (x = 0, sz = 0; x < msgbuf_p_cnt; sz += msgbuf_p_seg[x++].sz) | |
355 | continue; | 355 | continue; | |
356 | 356 | |||
357 | msgbuf_vaddr = uvm_km_alloc(kernel_map, sz, 0, | 357 | msgbuf_vaddr = uvm_km_alloc(kernel_map, sz, 0, | |
358 | UVM_KMF_VAONLY); | 358 | UVM_KMF_VAONLY); | |
359 | if (msgbuf_vaddr == 0) | 359 | if (msgbuf_vaddr == 0) | |
360 | panic("failed to valloc msgbuf_vaddr"); | 360 | panic("failed to valloc msgbuf_vaddr"); | |
361 | 361 | |||
362 | /* msgbuf_paddr was init'd in pmap */ | 362 | /* msgbuf_paddr was init'd in pmap */ | |
363 | for (y = 0, sz = 0; y < msgbuf_p_cnt; y++) { | 363 | for (y = 0, sz = 0; y < msgbuf_p_cnt; y++) { | |
364 | for (x = 0; x < btoc(msgbuf_p_seg[y].sz); x++, sz += PAGE_SIZE) | 364 | for (x = 0; x < btoc(msgbuf_p_seg[y].sz); x++, sz += PAGE_SIZE) | |
365 | pmap_kenter_pa((vaddr_t)msgbuf_vaddr + sz, | 365 | pmap_kenter_pa((vaddr_t)msgbuf_vaddr + sz, | |
366 | msgbuf_p_seg[y].paddr + x * PAGE_SIZE, | 366 | msgbuf_p_seg[y].paddr + x * PAGE_SIZE, | |
367 | VM_PROT_READ | UVM_PROT_WRITE, 0); | 367 | VM_PROT_READ | UVM_PROT_WRITE, 0); | |
368 | } | 368 | } | |
369 | 369 | |||
370 | pmap_update(pmap_kernel()); | 370 | pmap_update(pmap_kernel()); | |
371 | 371 | |||
372 | initmsgbuf((void *)msgbuf_vaddr, round_page(sz)); | 372 | initmsgbuf((void *)msgbuf_vaddr, round_page(sz)); | |
373 | 373 | |||
374 | minaddr = 0; | 374 | minaddr = 0; | |
375 | 375 | |||
376 | /* | 376 | /* | |
377 | * Allocate a submap for physio | 377 | * Allocate a submap for physio | |
378 | */ | 378 | */ | |
379 | phys_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr, | 379 | phys_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr, | |
380 | VM_PHYS_SIZE, 0, false, NULL); | 380 | VM_PHYS_SIZE, 0, false, NULL); | |
381 | 381 | |||
382 | uvm_map_setup(&module_map_store, module_start, module_end, 0); | 382 | uvm_map_setup(&module_map_store, module_start, module_end, 0); | |
383 | module_map_store.pmap = pmap_kernel(); | 383 | module_map_store.pmap = pmap_kernel(); | |
384 | module_map = &module_map_store; | 384 | module_map = &module_map_store; | |
385 | 385 | |||
386 | /* Say hello. */ | 386 | /* Say hello. */ | |
387 | banner(); | 387 | banner(); | |
388 | 388 | |||
389 | #if NISA > 0 || NPCI > 0 | 389 | #if NISA > 0 || NPCI > 0 | |
390 | /* Safe for i/o port / memory space allocation to use malloc now. */ | 390 | /* Safe for i/o port / memory space allocation to use malloc now. */ | |
391 | x86_bus_space_mallocok(); | 391 | x86_bus_space_mallocok(); | |
392 | #endif | 392 | #endif | |
393 | 393 | |||
394 | gdt_init(); | 394 | gdt_init(); | |
395 | x86_64_proc0_tss_ldt_init(); | 395 | x86_64_proc0_tss_ldt_init(); | |
396 | 396 | |||
397 | cpu_init_tss(&cpu_info_primary); | 397 | cpu_init_tss(&cpu_info_primary); | |
398 | #if !defined(XEN) | 398 | #if !defined(XEN) | |
399 | ltr(cpu_info_primary.ci_tss_sel); | 399 | ltr(cpu_info_primary.ci_tss_sel); | |
400 | #endif /* !defined(XEN) */ | 400 | #endif /* !defined(XEN) */ | |
401 | 401 | |||
402 | x86_startup(); | 402 | x86_startup(); | |
403 | } | 403 | } | |
404 | 404 | |||
405 | #ifdef XEN | 405 | #ifdef XEN | |
406 | /* used in assembly */ | 406 | /* used in assembly */ | |
407 | void hypervisor_callback(void); | 407 | void hypervisor_callback(void); | |
408 | void failsafe_callback(void); | 408 | void failsafe_callback(void); | |
409 | void x86_64_switch_context(struct pcb *); | 409 | void x86_64_switch_context(struct pcb *); | |
410 | void x86_64_tls_switch(struct lwp *); | 410 | void x86_64_tls_switch(struct lwp *); | |
411 | 411 | |||
412 | void | 412 | void | |
413 | x86_64_switch_context(struct pcb *new) | 413 | x86_64_switch_context(struct pcb *new) | |
414 | { | 414 | { | |
415 | HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL), new->pcb_rsp0); | 415 | HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL), new->pcb_rsp0); | |
416 | struct physdev_op physop; | 416 | struct physdev_op physop; | |
417 | physop.cmd = PHYSDEVOP_SET_IOPL; | 417 | physop.cmd = PHYSDEVOP_SET_IOPL; | |
418 | physop.u.set_iopl.iopl = new->pcb_iopl; | 418 | physop.u.set_iopl.iopl = new->pcb_iopl; | |
419 | HYPERVISOR_physdev_op(&physop); | 419 | HYPERVISOR_physdev_op(&physop); | |
420 | } | 420 | } | |
421 | 421 | |||
422 | void | 422 | void | |
423 | x86_64_tls_switch(struct lwp *l) | 423 | x86_64_tls_switch(struct lwp *l) | |
424 | { | 424 | { | |
425 | struct cpu_info *ci = curcpu(); | 425 | struct cpu_info *ci = curcpu(); | |
426 | struct pcb *pcb = lwp_getpcb(l); | 426 | struct pcb *pcb = lwp_getpcb(l); | |
427 | struct trapframe *tf = l->l_md.md_regs; | 427 | struct trapframe *tf = l->l_md.md_regs; | |
428 | 428 | |||
429 | /* | 429 | /* | |
430 | * Raise the IPL to IPL_HIGH. | 430 | * Raise the IPL to IPL_HIGH. | |
431 | * FPU IPIs can alter the LWP's saved cr0. Dropping the priority | 431 | * FPU IPIs can alter the LWP's saved cr0. Dropping the priority | |
432 | * is deferred until mi_switch(), when cpu_switchto() returns. | 432 | * is deferred until mi_switch(), when cpu_switchto() returns. | |
433 | */ | 433 | */ | |
434 | (void)splhigh(); | 434 | (void)splhigh(); | |
435 | /* | 435 | /* | |
436 | * If our floating point registers are on a different CPU, | 436 | * If our floating point registers are on a different CPU, | |
437 | * set CR0_TS so we'll trap rather than reuse bogus state. | 437 | * set CR0_TS so we'll trap rather than reuse bogus state. | |
438 | */ | 438 | */ | |
439 | if (l != ci->ci_fpcurlwp) { | 439 | if (l != ci->ci_fpcurlwp) { | |
440 | HYPERVISOR_fpu_taskswitch(1); | 440 | HYPERVISOR_fpu_taskswitch(1); | |
441 | } | 441 | } | |
442 | 442 | |||
443 | /* Update TLS segment pointers */ | 443 | /* Update TLS segment pointers */ | |
444 | if (pcb->pcb_flags & PCB_COMPAT32) { | 444 | if (pcb->pcb_flags & PCB_COMPAT32) { | |
445 | update_descriptor(&curcpu()->ci_gdt[GUFS_SEL], &pcb->pcb_fs); | 445 | update_descriptor(&curcpu()->ci_gdt[GUFS_SEL], &pcb->pcb_fs); | |
446 | update_descriptor(&curcpu()->ci_gdt[GUGS_SEL], &pcb->pcb_gs); | 446 | update_descriptor(&curcpu()->ci_gdt[GUGS_SEL], &pcb->pcb_gs); | |
447 | setfs(tf->tf_fs); | 447 | setfs(tf->tf_fs); | |
448 | HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, tf->tf_gs); | 448 | HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, tf->tf_gs); | |
449 | } else { | 449 | } else { | |
450 | setfs(0); | 450 | setfs(0); | |
451 | HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, 0); | 451 | HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, 0); | |
452 | HYPERVISOR_set_segment_base(SEGBASE_FS, pcb->pcb_fs); | 452 | HYPERVISOR_set_segment_base(SEGBASE_FS, pcb->pcb_fs); | |
453 | HYPERVISOR_set_segment_base(SEGBASE_GS_USER, pcb->pcb_gs); | 453 | HYPERVISOR_set_segment_base(SEGBASE_GS_USER, pcb->pcb_gs); | |
454 | } | 454 | } | |
455 | } | 455 | } | |
456 | #endif /* XEN */ | 456 | #endif /* XEN */ | |
457 | 457 | |||
458 | /* | 458 | /* | |
459 | * Set up proc0's TSS and LDT. | 459 | * Set up proc0's TSS and LDT. | |
460 | */ | 460 | */ | |
461 | void | 461 | void | |
462 | x86_64_proc0_tss_ldt_init(void) | 462 | x86_64_proc0_tss_ldt_init(void) | |
463 | { | 463 | { | |
464 | struct lwp *l = &lwp0; | 464 | struct lwp *l = &lwp0; | |
465 | struct pcb *pcb = lwp_getpcb(l); | 465 | struct pcb *pcb = lwp_getpcb(l); | |
466 | 466 | |||
467 | pcb->pcb_flags = 0; | 467 | pcb->pcb_flags = 0; | |
468 | pcb->pcb_fs = 0; | 468 | pcb->pcb_fs = 0; | |
469 | pcb->pcb_gs = 0; | 469 | pcb->pcb_gs = 0; | |
470 | pcb->pcb_rsp0 = (uvm_lwp_getuarea(l) + USPACE - 16) & ~0xf; | 470 | pcb->pcb_rsp0 = (uvm_lwp_getuarea(l) + USPACE - 16) & ~0xf; | |
471 | pcb->pcb_iopl = SEL_KPL; | 471 | pcb->pcb_iopl = SEL_KPL; | |
472 | 472 | |||
473 | pmap_kernel()->pm_ldt_sel = GSYSSEL(GLDT_SEL, SEL_KPL); | 473 | pmap_kernel()->pm_ldt_sel = GSYSSEL(GLDT_SEL, SEL_KPL); | |
474 | pcb->pcb_cr0 = rcr0() & ~CR0_TS; | 474 | pcb->pcb_cr0 = rcr0() & ~CR0_TS; | |
475 | l->l_md.md_regs = (struct trapframe *)pcb->pcb_rsp0 - 1; | 475 | l->l_md.md_regs = (struct trapframe *)pcb->pcb_rsp0 - 1; | |
476 | 476 | |||
477 | #if !defined(XEN) | 477 | #if !defined(XEN) | |
478 | lldt(pmap_kernel()->pm_ldt_sel); | 478 | lldt(pmap_kernel()->pm_ldt_sel); | |
479 | #else | 479 | #else | |
480 | { | 480 | { | |
481 | struct physdev_op physop; | 481 | struct physdev_op physop; | |
482 | xen_set_ldt((vaddr_t) ldtstore, LDT_SIZE >> 3); | 482 | xen_set_ldt((vaddr_t) ldtstore, LDT_SIZE >> 3); | |
483 | /* Reset TS bit and set kernel stack for interrupt handlers */ | 483 | /* Reset TS bit and set kernel stack for interrupt handlers */ | |
484 | HYPERVISOR_fpu_taskswitch(1); | 484 | HYPERVISOR_fpu_taskswitch(1); | |
485 | HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL), pcb->pcb_rsp0); | 485 | HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL), pcb->pcb_rsp0); | |
486 | physop.cmd = PHYSDEVOP_SET_IOPL; | 486 | physop.cmd = PHYSDEVOP_SET_IOPL; | |
487 | physop.u.set_iopl.iopl = pcb->pcb_iopl; | 487 | physop.u.set_iopl.iopl = pcb->pcb_iopl; | |
488 | HYPERVISOR_physdev_op(&physop); | 488 | HYPERVISOR_physdev_op(&physop); | |
489 | } | 489 | } | |
490 | #endif /* XEN */ | 490 | #endif /* XEN */ | |
491 | } | 491 | } | |
492 | 492 | |||
493 | /* | 493 | /* | |
494 | * Set up TSS and I/O bitmap. | 494 | * Set up TSS and I/O bitmap. | |
495 | */ | 495 | */ | |
496 | void | 496 | void | |
497 | cpu_init_tss(struct cpu_info *ci) | 497 | cpu_init_tss(struct cpu_info *ci) | |
498 | { | 498 | { | |
499 | struct x86_64_tss *tss = &ci->ci_tss; | 499 | struct x86_64_tss *tss = &ci->ci_tss; | |
500 | uintptr_t p; | 500 | uintptr_t p; | |
501 | 501 | |||
502 | tss->tss_iobase = IOMAP_INVALOFF << 16; | 502 | tss->tss_iobase = IOMAP_INVALOFF << 16; | |
503 | /* tss->tss_ist[0] is filled by cpu_intr_init */ | 503 | /* tss->tss_ist[0] is filled by cpu_intr_init */ | |
504 | 504 | |||
505 | /* double fault */ | 505 | /* double fault */ | |
506 | tss->tss_ist[1] = (uint64_t)x86_64_doubleflt_stack + PAGE_SIZE - 16; | 506 | tss->tss_ist[1] = (uint64_t)x86_64_doubleflt_stack + PAGE_SIZE - 16; | |
507 | 507 | |||
508 | /* NMI */ | 508 | /* NMI */ | |
509 | p = uvm_km_alloc(kernel_map, PAGE_SIZE, 0, UVM_KMF_WIRED); | 509 | p = uvm_km_alloc(kernel_map, PAGE_SIZE, 0, UVM_KMF_WIRED); | |
510 | tss->tss_ist[2] = p + PAGE_SIZE - 16; | 510 | tss->tss_ist[2] = p + PAGE_SIZE - 16; | |
511 | ci->ci_tss_sel = tss_alloc(tss); | 511 | ci->ci_tss_sel = tss_alloc(tss); | |
512 | } | 512 | } | |
513 | 513 | |||
514 | void | 514 | void | |
515 | buildcontext(struct lwp *l, void *catcher, void *f) | 515 | buildcontext(struct lwp *l, void *catcher, void *f) | |
516 | { | 516 | { | |
517 | struct trapframe *tf = l->l_md.md_regs; | 517 | struct trapframe *tf = l->l_md.md_regs; | |
518 | 518 | |||
519 | tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL); | 519 | tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL); | |
520 | tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL); | 520 | tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL); | |
521 | tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL); | 521 | tf->tf_fs = GSEL(GUDATA_SEL, SEL_UPL); | |
522 | tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL); | 522 | tf->tf_gs = GSEL(GUDATA_SEL, SEL_UPL); | |
523 | 523 | |||
524 | tf->tf_rip = (uint64_t)catcher; | 524 | tf->tf_rip = (uint64_t)catcher; | |
525 | tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL); | 525 | tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL); | |
526 | tf->tf_rflags &= ~PSL_CLEARSIG; | 526 | tf->tf_rflags &= ~PSL_CLEARSIG; | |
527 | tf->tf_rsp = (uint64_t)f; | 527 | tf->tf_rsp = (uint64_t)f; | |
528 | tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL); | 528 | tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL); | |
529 | 529 | |||
530 | /* Ensure FP state is sane */ | 530 | /* Ensure FP state is sane */ | |
531 | fpu_save_area_reset(l); | 531 | fpu_save_area_reset(l); | |
532 | } | 532 | } | |
533 | 533 | |||
534 | void | 534 | void | |
535 | sendsig_sigcontext(const ksiginfo_t *ksi, const sigset_t *mask) | 535 | sendsig_sigcontext(const ksiginfo_t *ksi, const sigset_t *mask) | |
536 | { | 536 | { | |
537 | 537 | |||
538 | printf("sendsig_sigcontext: illegal\n"); | 538 | printf("sendsig_sigcontext: illegal\n"); | |
539 | sigexit(curlwp, SIGILL); | 539 | sigexit(curlwp, SIGILL); | |
540 | } | 540 | } | |
541 | 541 | |||
542 | void | 542 | void | |
543 | sendsig_siginfo(const ksiginfo_t *ksi, const sigset_t *mask) | 543 | sendsig_siginfo(const ksiginfo_t *ksi, const sigset_t *mask) | |
544 | { | 544 | { | |
545 | struct lwp *l = curlwp; | 545 | struct lwp *l = curlwp; | |
546 | struct proc *p = l->l_proc; | 546 | struct proc *p = l->l_proc; | |
547 | struct sigacts *ps = p->p_sigacts; | 547 | struct sigacts *ps = p->p_sigacts; | |
548 | int onstack, error; | 548 | int onstack, error; | |
549 | int sig = ksi->ksi_signo; | 549 | int sig = ksi->ksi_signo; | |
550 | struct sigframe_siginfo *fp, frame; | 550 | struct sigframe_siginfo *fp, frame; | |
551 | sig_t catcher = SIGACTION(p, sig).sa_handler; | 551 | sig_t catcher = SIGACTION(p, sig).sa_handler; | |
552 | struct trapframe *tf = l->l_md.md_regs; | 552 | struct trapframe *tf = l->l_md.md_regs; | |
553 | char *sp; | 553 | char *sp; | |
554 | 554 | |||
555 | KASSERT(mutex_owned(p->p_lock)); | 555 | KASSERT(mutex_owned(p->p_lock)); | |
556 | 556 | |||
557 | /* Do we need to jump onto the signal stack? */ | 557 | /* Do we need to jump onto the signal stack? */ | |
558 | onstack = | 558 | onstack = | |
559 | (l->l_sigstk.ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 && | 559 | (l->l_sigstk.ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 && | |
560 | (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0; | 560 | (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0; | |
561 | 561 | |||
562 | /* Allocate space for the signal handler context. */ | 562 | /* Allocate space for the signal handler context. */ | |
563 | if (onstack) | 563 | if (onstack) | |
564 | sp = ((char *)l->l_sigstk.ss_sp + l->l_sigstk.ss_size); | 564 | sp = ((char *)l->l_sigstk.ss_sp + l->l_sigstk.ss_size); | |
565 | else | 565 | else | |
566 | /* AMD64 ABI 128-bytes "red zone". */ | 566 | /* AMD64 ABI 128-bytes "red zone". */ | |
567 | sp = (char *)tf->tf_rsp - 128; | 567 | sp = (char *)tf->tf_rsp - 128; | |
568 | 568 | |||
569 | sp -= sizeof(struct sigframe_siginfo); | 569 | sp -= sizeof(struct sigframe_siginfo); | |
570 | /* Round down the stackpointer to a multiple of 16 for the ABI. */ | 570 | /* Round down the stackpointer to a multiple of 16 for the ABI. */ | |
571 | fp = (struct sigframe_siginfo *)(((unsigned long)sp & ~15) - 8); | 571 | fp = (struct sigframe_siginfo *)(((unsigned long)sp & ~15) - 8); | |
572 | 572 | |||
573 | frame.sf_ra = (uint64_t)ps->sa_sigdesc[sig].sd_tramp; | 573 | frame.sf_ra = (uint64_t)ps->sa_sigdesc[sig].sd_tramp; | |
574 | frame.sf_si._info = ksi->ksi_info; | 574 | frame.sf_si._info = ksi->ksi_info; | |
575 | frame.sf_uc.uc_flags = _UC_SIGMASK; | 575 | frame.sf_uc.uc_flags = _UC_SIGMASK; | |
576 | frame.sf_uc.uc_sigmask = *mask; | 576 | frame.sf_uc.uc_sigmask = *mask; | |
577 | frame.sf_uc.uc_link = l->l_ctxlink; | 577 | frame.sf_uc.uc_link = l->l_ctxlink; | |
578 | frame.sf_uc.uc_flags |= (l->l_sigstk.ss_flags & SS_ONSTACK) | 578 | frame.sf_uc.uc_flags |= (l->l_sigstk.ss_flags & SS_ONSTACK) | |
579 | ? _UC_SETSTACK : _UC_CLRSTACK; | 579 | ? _UC_SETSTACK : _UC_CLRSTACK; | |
580 | memset(&frame.sf_uc.uc_stack, 0, sizeof(frame.sf_uc.uc_stack)); | 580 | memset(&frame.sf_uc.uc_stack, 0, sizeof(frame.sf_uc.uc_stack)); | |
581 | sendsig_reset(l, sig); | 581 | sendsig_reset(l, sig); | |
582 | 582 | |||
583 | mutex_exit(p->p_lock); | 583 | mutex_exit(p->p_lock); | |
584 | cpu_getmcontext(l, &frame.sf_uc.uc_mcontext, &frame.sf_uc.uc_flags); | 584 | cpu_getmcontext(l, &frame.sf_uc.uc_mcontext, &frame.sf_uc.uc_flags); | |
585 | /* Copyout all the fp regs, the signal handler might expect them. */ | 585 | /* Copyout all the fp regs, the signal handler might expect them. */ | |
586 | error = copyout(&frame, fp, sizeof frame); | 586 | error = copyout(&frame, fp, sizeof frame); | |
587 | mutex_enter(p->p_lock); | 587 | mutex_enter(p->p_lock); | |
588 | 588 | |||
589 | if (error != 0) { | 589 | if (error != 0) { | |
590 | /* | 590 | /* | |
591 | * Process has trashed its stack; give it an illegal | 591 | * Process has trashed its stack; give it an illegal | |
592 | * instruction to halt it in its tracks. | 592 | * instruction to halt it in its tracks. | |
593 | */ | 593 | */ | |
594 | sigexit(l, SIGILL); | 594 | sigexit(l, SIGILL); | |
595 | /* NOTREACHED */ | 595 | /* NOTREACHED */ | |
596 | } | 596 | } | |
597 | 597 | |||
598 | buildcontext(l, catcher, fp); | 598 | buildcontext(l, catcher, fp); | |
599 | 599 | |||
600 | tf->tf_rdi = sig; | 600 | tf->tf_rdi = sig; | |
601 | tf->tf_rsi = (uint64_t)&fp->sf_si; | 601 | tf->tf_rsi = (uint64_t)&fp->sf_si; | |
602 | tf->tf_rdx = tf->tf_r15 = (uint64_t)&fp->sf_uc; | 602 | tf->tf_rdx = tf->tf_r15 = (uint64_t)&fp->sf_uc; | |
603 | 603 | |||
604 | /* Remember that we're now on the signal stack. */ | 604 | /* Remember that we're now on the signal stack. */ | |
605 | if (onstack) | 605 | if (onstack) | |
606 | l->l_sigstk.ss_flags |= SS_ONSTACK; | 606 | l->l_sigstk.ss_flags |= SS_ONSTACK; | |
607 | 607 | |||
608 | if ((vaddr_t)catcher >= VM_MAXUSER_ADDRESS) { | 608 | if ((vaddr_t)catcher >= VM_MAXUSER_ADDRESS) { | |
609 | /* | 609 | /* | |
610 | * process has given an invalid address for the | 610 | * process has given an invalid address for the | |
611 | * handler. Stop it, but do not do it before so | 611 | * handler. Stop it, but do not do it before so | |
612 | * we can return the right info to userland (or in core dump) | 612 | * we can return the right info to userland (or in core dump) | |
613 | */ | 613 | */ | |
614 | sigexit(l, SIGILL); | 614 | sigexit(l, SIGILL); | |
615 | /* NOTREACHED */ | 615 | /* NOTREACHED */ | |
616 | } | 616 | } | |
617 | } | 617 | } | |
618 | 618 | |||
619 | struct pcb dumppcb; | 619 | struct pcb dumppcb; | |
620 | 620 | |||
621 | void | 621 | void | |
622 | cpu_reboot(int howto, char *bootstr) | 622 | cpu_reboot(int howto, char *bootstr) | |
623 | { | 623 | { | |
624 | static bool syncdone = false; | 624 | static bool syncdone = false; | |
625 | int s = IPL_NONE; | 625 | int s = IPL_NONE; | |
626 | __USE(s); /* ugly otherwise */ | 626 | __USE(s); /* ugly otherwise */ | |
627 | 627 | |||
628 | if (cold) { | 628 | if (cold) { | |
629 | howto |= RB_HALT; | 629 | howto |= RB_HALT; | |
630 | goto haltsys; | 630 | goto haltsys; | |
631 | } | 631 | } | |
632 | 632 | |||
633 | boothowto = howto; | 633 | boothowto = howto; | |
634 | 634 | |||
635 | /* i386 maybe_dump() */ | 635 | /* i386 maybe_dump() */ | |
636 | 636 | |||
637 | /* | 637 | /* | |
638 | * If we've panic'd, don't make the situation potentially | 638 | * If we've panic'd, don't make the situation potentially | |
639 | * worse by syncing or unmounting the file systems. | 639 | * worse by syncing or unmounting the file systems. | |
640 | */ | 640 | */ | |
641 | if ((howto & RB_NOSYNC) == 0 && panicstr == NULL) { | 641 | if ((howto & RB_NOSYNC) == 0 && panicstr == NULL) { | |
642 | if (!syncdone) { | 642 | if (!syncdone) { | |
643 | syncdone = true; | 643 | syncdone = true; | |
644 | /* XXX used to force unmount as well, here */ | 644 | /* XXX used to force unmount as well, here */ | |
645 | vfs_sync_all(curlwp); | 645 | vfs_sync_all(curlwp); | |
646 | /* | 646 | /* | |
647 | * If we've been adjusting the clock, the todr | 647 | * If we've been adjusting the clock, the todr | |
648 | * will be out of synch; adjust it now. | 648 | * will be out of synch; adjust it now. | |
649 | * | 649 | * | |
650 | * XXX used to do this after unmounting all | 650 | * XXX used to do this after unmounting all | |
651 | * filesystems with vfs_shutdown(). | 651 | * filesystems with vfs_shutdown(). | |
652 | */ | 652 | */ | |
653 | if (time_adjusted != 0) | 653 | if (time_adjusted != 0) | |
654 | resettodr(); | 654 | resettodr(); | |
655 | } | 655 | } | |
656 | 656 | |||
657 | while (vfs_unmountall1(curlwp, false, false) || | 657 | while (vfs_unmountall1(curlwp, false, false) || | |
658 | config_detach_all(boothowto) || | 658 | config_detach_all(boothowto) || | |
659 | vfs_unmount_forceone(curlwp)) | 659 | vfs_unmount_forceone(curlwp)) | |
660 | ; /* do nothing */ | 660 | ; /* do nothing */ | |
661 | } else | 661 | } else | |
662 | suspendsched(); | 662 | suspendsched(); | |
663 | 663 | |||
664 | pmf_system_shutdown(boothowto); | 664 | pmf_system_shutdown(boothowto); | |
665 | 665 | |||
666 | /* Disable interrupts. */ | 666 | /* Disable interrupts. */ | |
667 | s = splhigh(); | 667 | s = splhigh(); | |
668 | 668 | |||
669 | /* Do a dump if requested. */ | 669 | /* Do a dump if requested. */ | |
670 | if ((howto & (RB_DUMP | RB_HALT)) == RB_DUMP) | 670 | if ((howto & (RB_DUMP | RB_HALT)) == RB_DUMP) | |
671 | dumpsys(); | 671 | dumpsys(); | |
672 | 672 | |||
673 | haltsys: | 673 | haltsys: | |
674 | doshutdownhooks(); | 674 | doshutdownhooks(); | |
675 | 675 | |||
676 | if ((howto & RB_POWERDOWN) == RB_POWERDOWN) { | 676 | if ((howto & RB_POWERDOWN) == RB_POWERDOWN) { | |
677 | #if NACPICA > 0 | 677 | #if NACPICA > 0 | |
678 | if (s != IPL_NONE) | 678 | if (s != IPL_NONE) | |
679 | splx(s); | 679 | splx(s); | |
680 | 680 | |||
681 | acpi_enter_sleep_state(ACPI_STATE_S5); | 681 | acpi_enter_sleep_state(ACPI_STATE_S5); | |
682 | #endif | 682 | #endif | |
683 | #ifdef XEN | 683 | #ifdef XEN | |
684 | HYPERVISOR_shutdown(); | 684 | HYPERVISOR_shutdown(); | |
685 | #endif /* XEN */ | 685 | #endif /* XEN */ | |
686 | } | 686 | } | |
687 | 687 | |||
688 | cpu_broadcast_halt(); | 688 | cpu_broadcast_halt(); | |
689 | 689 | |||
690 | if (howto & RB_HALT) { | 690 | if (howto & RB_HALT) { | |
691 | #if NACPICA > 0 | 691 | #if NACPICA > 0 | |
692 | acpi_disable(); | 692 | acpi_disable(); | |
693 | #endif | 693 | #endif | |
694 | 694 | |||
695 | printf("\n"); | 695 | printf("\n"); | |
696 | printf("The operating system has halted.\n"); | 696 | printf("The operating system has halted.\n"); | |
697 | printf("Please press any key to reboot.\n\n"); | 697 | printf("Please press any key to reboot.\n\n"); | |
698 | cnpollc(1); /* for proper keyboard command handling */ | 698 | cnpollc(1); /* for proper keyboard command handling */ | |
699 | if (cngetc() == 0) { | 699 | if (cngetc() == 0) { | |
700 | /* no console attached, so just hlt */ | 700 | /* no console attached, so just hlt */ | |
701 | printf("No keyboard - cannot reboot after all.\n"); | 701 | printf("No keyboard - cannot reboot after all.\n"); | |
702 | for(;;) { | 702 | for(;;) { | |
703 | x86_hlt(); | 703 | x86_hlt(); | |
704 | } | 704 | } | |
705 | } | 705 | } | |
706 | cnpollc(0); | 706 | cnpollc(0); | |
707 | } | 707 | } | |
708 | 708 | |||
709 | printf("rebooting...\n"); | 709 | printf("rebooting...\n"); | |
710 | if (cpureset_delay > 0) | 710 | if (cpureset_delay > 0) | |
711 | delay(cpureset_delay * 1000); | 711 | delay(cpureset_delay * 1000); | |
712 | cpu_reset(); | 712 | cpu_reset(); | |
713 | for(;;) ; | 713 | for(;;) ; | |
714 | /*NOTREACHED*/ | 714 | /*NOTREACHED*/ | |
715 | } | 715 | } | |
716 | 716 | |||
717 | /* | 717 | /* | |
718 | * XXXfvdl share dumpcode. | 718 | * XXXfvdl share dumpcode. | |
719 | */ | 719 | */ | |
720 | 720 | |||
721 | /* | 721 | /* | |
722 | * Perform assorted dump-related initialization tasks. Assumes that | 722 | * Perform assorted dump-related initialization tasks. Assumes that | |
723 | * the maximum physical memory address will not increase afterwards. | 723 | * the maximum physical memory address will not increase afterwards. | |
724 | */ | 724 | */ | |
725 | void | 725 | void | |
726 | dump_misc_init(void) | 726 | dump_misc_init(void) | |
727 | { | 727 | { | |
728 | #ifndef NO_SPARSE_DUMP | 728 | #ifndef NO_SPARSE_DUMP | |
729 | int i; | 729 | int i; | |
730 | #endif | 730 | #endif | |
731 | 731 | |||
732 | if (dump_headerbuf != NULL) | 732 | if (dump_headerbuf != NULL) | |
733 | return; /* already called */ | 733 | return; /* already called */ | |
734 | 734 | |||
735 | #ifndef NO_SPARSE_DUMP | 735 | #ifndef NO_SPARSE_DUMP | |
736 | for (i = 0; i < mem_cluster_cnt; ++i) { | 736 | for (i = 0; i < mem_cluster_cnt; ++i) { | |
737 | paddr_t top = mem_clusters[i].start + mem_clusters[i].size; | 737 | paddr_t top = mem_clusters[i].start + mem_clusters[i].size; | |
738 | if (max_paddr < top) | 738 | if (max_paddr < top) | |
739 | max_paddr = top; | 739 | max_paddr = top; | |
740 | } | 740 | } | |
741 | #ifdef DEBUG | 741 | #ifdef DEBUG | |
742 | printf("dump_misc_init: max_paddr = 0x%lx\n", | 742 | printf("dump_misc_init: max_paddr = 0x%lx\n", | |
743 | (unsigned long)max_paddr); | 743 | (unsigned long)max_paddr); | |
744 | #endif | 744 | #endif | |
745 | if (max_paddr == 0) { | 745 | if (max_paddr == 0) { | |
746 | printf("Your machine does not initialize mem_clusters; " | 746 | printf("Your machine does not initialize mem_clusters; " | |
747 | "sparse_dumps disabled\n"); | 747 | "sparse_dumps disabled\n"); | |
748 | sparse_dump = 0; | 748 | sparse_dump = 0; | |
749 | } else { | 749 | } else { | |
750 | sparse_dump_physmap = (void *)uvm_km_alloc(kernel_map, | 750 | sparse_dump_physmap = (void *)uvm_km_alloc(kernel_map, | |
751 | roundup(max_paddr / (PAGE_SIZE * NBBY), PAGE_SIZE), | 751 | roundup(max_paddr / (PAGE_SIZE * NBBY), PAGE_SIZE), | |
752 | PAGE_SIZE, UVM_KMF_WIRED|UVM_KMF_ZERO); | 752 | PAGE_SIZE, UVM_KMF_WIRED|UVM_KMF_ZERO); | |
753 | } | 753 | } | |
754 | #endif | 754 | #endif | |
755 | dump_headerbuf = (void *)uvm_km_alloc(kernel_map, | 755 | dump_headerbuf = (void *)uvm_km_alloc(kernel_map, | |
756 | dump_headerbuf_size, | 756 | dump_headerbuf_size, | |
757 | PAGE_SIZE, UVM_KMF_WIRED|UVM_KMF_ZERO); | 757 | PAGE_SIZE, UVM_KMF_WIRED|UVM_KMF_ZERO); | |
758 | /* XXXjld should check for failure here, disable dumps if so. */ | 758 | /* XXXjld should check for failure here, disable dumps if so. */ | |
759 | } | 759 | } | |
760 | 760 | |||
761 | #ifndef NO_SPARSE_DUMP | 761 | #ifndef NO_SPARSE_DUMP | |
762 | /* | 762 | /* | |
763 | * Clear the set of pages to include in a sparse dump. | 763 | * Clear the set of pages to include in a sparse dump. | |
764 | */ | 764 | */ | |
765 | void | 765 | void | |
766 | sparse_dump_reset(void) | 766 | sparse_dump_reset(void) | |
767 | { | 767 | { | |
768 | memset(sparse_dump_physmap, 0, | 768 | memset(sparse_dump_physmap, 0, | |
769 | roundup(max_paddr / (PAGE_SIZE * NBBY), PAGE_SIZE)); | 769 | roundup(max_paddr / (PAGE_SIZE * NBBY), PAGE_SIZE)); | |
770 | } | 770 | } | |
771 | 771 | |||
772 | /* | 772 | /* | |
773 | * Include or exclude pages in a sparse dump. | 773 | * Include or exclude pages in a sparse dump. | |
774 | */ | 774 | */ | |
775 | void | 775 | void | |
776 | sparse_dump_mark(void) | 776 | sparse_dump_mark(void) | |
777 | { | 777 | { | |
778 | paddr_t p, pstart, pend; | 778 | paddr_t p, pstart, pend; | |
779 | struct vm_page *pg; | 779 | struct vm_page *pg; | |
780 | int i; | 780 | int i; | |
781 | 781 | |||
782 | /* | 782 | /* | |
783 | * Mark all memory pages, then unmark pages that are uninteresting. | 783 | * Mark all memory pages, then unmark pages that are uninteresting. | |
784 | * Dereferenceing pg->uobject might crash again if another CPU | 784 | * Dereferenceing pg->uobject might crash again if another CPU | |
785 | * frees the object out from under us, but we can't lock anything | 785 | * frees the object out from under us, but we can't lock anything | |
786 | * so it's a risk we have to take. | 786 | * so it's a risk we have to take. | |
787 | */ | 787 | */ | |
788 | 788 | |||
789 | for (i = 0; i < mem_cluster_cnt; ++i) { | 789 | for (i = 0; i < mem_cluster_cnt; ++i) { | |
790 | pstart = mem_clusters[i].start / PAGE_SIZE; | 790 | pstart = mem_clusters[i].start / PAGE_SIZE; | |
791 | pend = pstart + mem_clusters[i].size / PAGE_SIZE; | 791 | pend = pstart + mem_clusters[i].size / PAGE_SIZE; | |
792 | 792 | |||
793 | for (p = pstart; p < pend; p++) { | 793 | for (p = pstart; p < pend; p++) { | |
794 | setbit(sparse_dump_physmap, p); | 794 | setbit(sparse_dump_physmap, p); | |
795 | } | 795 | } | |
796 | } | 796 | } | |
797 | for (i = 0; i < vm_nphysseg; i++) { | 797 | for (i = 0; i < vm_nphysseg; i++) { | |
798 | struct vm_physseg *seg = VM_PHYSMEM_PTR(i); | 798 | struct vm_physseg *seg = VM_PHYSMEM_PTR(i); | |
799 | 799 | |||
800 | for (pg = seg->pgs; pg < seg->lastpg; pg++) { | 800 | for (pg = seg->pgs; pg < seg->lastpg; pg++) { | |
801 | if (pg->uanon || (pg->pqflags & PQ_FREE) || | 801 | if (pg->uanon || (pg->pqflags & PQ_FREE) || | |
802 | (pg->uobject && pg->uobject->pgops)) { | 802 | (pg->uobject && pg->uobject->pgops)) { | |
803 | p = VM_PAGE_TO_PHYS(pg) / PAGE_SIZE; | 803 | p = VM_PAGE_TO_PHYS(pg) / PAGE_SIZE; | |
804 | clrbit(sparse_dump_physmap, p); | 804 | clrbit(sparse_dump_physmap, p); | |
805 | } | 805 | } | |
806 | } | 806 | } | |
807 | } | 807 | } | |
808 | } | 808 | } | |
809 | 809 | |||
810 | /* | 810 | /* | |
811 | * Machine-dependently decides on the contents of a sparse dump, using | 811 | * Machine-dependently decides on the contents of a sparse dump, using | |
812 | * the above. | 812 | * the above. | |
813 | */ | 813 | */ | |
814 | void | 814 | void | |
815 | cpu_dump_prep_sparse(void) | 815 | cpu_dump_prep_sparse(void) | |
816 | { | 816 | { | |
817 | sparse_dump_reset(); | 817 | sparse_dump_reset(); | |
818 | /* XXX could the alternate recursive page table be skipped? */ | 818 | /* XXX could the alternate recursive page table be skipped? */ | |
819 | sparse_dump_mark(); | 819 | sparse_dump_mark(); | |
820 | /* Memory for I/O buffers could be unmarked here, for example. */ | 820 | /* Memory for I/O buffers could be unmarked here, for example. */ | |
821 | /* The kernel text could also be unmarked, but gdb would be upset. */ | 821 | /* The kernel text could also be unmarked, but gdb would be upset. */ | |
822 | } | 822 | } | |
823 | #endif | 823 | #endif | |
824 | 824 | |||
825 | /* | 825 | /* | |
826 | * Abstractly iterate over the collection of memory segments to be | 826 | * Abstractly iterate over the collection of memory segments to be | |
827 | * dumped; the callback lacks the customary environment-pointer | 827 | * dumped; the callback lacks the customary environment-pointer | |
828 | * argument because none of the current users really need one. | 828 | * argument because none of the current users really need one. | |
829 | * | 829 | * | |
830 | * To be used only after dump_seg_prep is called to set things up. | 830 | * To be used only after dump_seg_prep is called to set things up. | |
831 | */ | 831 | */ | |
832 | int | 832 | int | |
833 | dump_seg_iter(int (*callback)(paddr_t, paddr_t)) | 833 | dump_seg_iter(int (*callback)(paddr_t, paddr_t)) | |
834 | { | 834 | { | |
835 | int error, i; | 835 | int error, i; | |
836 | 836 | |||
837 | #define CALLBACK(start,size) do { \ | 837 | #define CALLBACK(start,size) do { \ | |
838 | error = callback(start,size); \ | 838 | error = callback(start,size); \ | |
839 | if (error) \ | 839 | if (error) \ | |
840 | return error; \ | 840 | return error; \ | |
841 | } while(0) | 841 | } while(0) | |
842 | 842 | |||
843 | for (i = 0; i < mem_cluster_cnt; ++i) { | 843 | for (i = 0; i < mem_cluster_cnt; ++i) { | |
844 | #ifndef NO_SPARSE_DUMP | 844 | #ifndef NO_SPARSE_DUMP | |
845 | /* | 845 | /* | |
846 | * The bitmap is scanned within each memory segment, | 846 | * The bitmap is scanned within each memory segment, | |
847 | * rather than over its entire domain, in case any | 847 | * rather than over its entire domain, in case any | |
848 | * pages outside of the memory proper have been mapped | 848 | * pages outside of the memory proper have been mapped | |
849 | * into kva; they might be devices that wouldn't | 849 | * into kva; they might be devices that wouldn't | |
850 | * appreciate being arbitrarily read, and including | 850 | * appreciate being arbitrarily read, and including | |
851 | * them could also break the assumption that a sparse | 851 | * them could also break the assumption that a sparse | |
852 | * dump will always be smaller than a full one. | 852 | * dump will always be smaller than a full one. | |
853 | */ | 853 | */ | |
854 | if (sparse_dump && sparse_dump_physmap) { | 854 | if (sparse_dump && sparse_dump_physmap) { | |
855 | paddr_t p, start, end; | 855 | paddr_t p, start, end; | |
856 | int lastset; | 856 | int lastset; | |
857 | 857 | |||
858 | start = mem_clusters[i].start; | 858 | start = mem_clusters[i].start; | |
859 | end = start + mem_clusters[i].size; | 859 | end = start + mem_clusters[i].size; | |
860 | start = rounddown(start, PAGE_SIZE); /* unnecessary? */ | 860 | start = rounddown(start, PAGE_SIZE); /* unnecessary? */ | |
861 | lastset = 0; | 861 | lastset = 0; | |
862 | for (p = start; p < end; p += PAGE_SIZE) { | 862 | for (p = start; p < end; p += PAGE_SIZE) { | |
863 | int thisset = isset(sparse_dump_physmap, | 863 | int thisset = isset(sparse_dump_physmap, | |
864 | p/PAGE_SIZE); | 864 | p/PAGE_SIZE); | |
865 | 865 | |||
866 | if (!lastset && thisset) | 866 | if (!lastset && thisset) | |
867 | start = p; | 867 | start = p; | |
868 | if (lastset && !thisset) | 868 | if (lastset && !thisset) | |
869 | CALLBACK(start, p - start); | 869 | CALLBACK(start, p - start); | |
870 | lastset = thisset; | 870 | lastset = thisset; | |
871 | } | 871 | } | |
872 | if (lastset) | 872 | if (lastset) | |
873 | CALLBACK(start, p - start); | 873 | CALLBACK(start, p - start); | |
874 | } else | 874 | } else | |
875 | #endif | 875 | #endif | |
876 | CALLBACK(mem_clusters[i].start, mem_clusters[i].size); | 876 | CALLBACK(mem_clusters[i].start, mem_clusters[i].size); | |
877 | } | 877 | } | |
878 | return 0; | 878 | return 0; | |
879 | #undef CALLBACK | 879 | #undef CALLBACK | |
880 | } | 880 | } | |
881 | 881 | |||
882 | /* | 882 | /* | |
883 | * Prepare for an impending core dump: decide what's being dumped and | 883 | * Prepare for an impending core dump: decide what's being dumped and | |
884 | * how much space it will take up. | 884 | * how much space it will take up. | |
885 | */ | 885 | */ | |
886 | void | 886 | void | |
887 | dump_seg_prep(void) | 887 | dump_seg_prep(void) | |
888 | { | 888 | { | |
889 | #ifndef NO_SPARSE_DUMP | 889 | #ifndef NO_SPARSE_DUMP | |
890 | if (sparse_dump && sparse_dump_physmap) | 890 | if (sparse_dump && sparse_dump_physmap) | |
891 | cpu_dump_prep_sparse(); | 891 | cpu_dump_prep_sparse(); | |
892 | #endif | 892 | #endif | |
893 | 893 | |||
894 | dump_nmemsegs = 0; | 894 | dump_nmemsegs = 0; | |
895 | dump_npages = 0; | 895 | dump_npages = 0; | |
896 | dump_seg_iter(dump_seg_count_range); | 896 | dump_seg_iter(dump_seg_count_range); | |
897 | 897 | |||
898 | dump_header_size = ALIGN(sizeof(kcore_seg_t)) + | 898 | dump_header_size = ALIGN(sizeof(kcore_seg_t)) + | |
899 | ALIGN(sizeof(cpu_kcore_hdr_t)) + | 899 | ALIGN(sizeof(cpu_kcore_hdr_t)) + | |
900 | ALIGN(dump_nmemsegs * sizeof(phys_ram_seg_t)); | 900 | ALIGN(dump_nmemsegs * sizeof(phys_ram_seg_t)); | |
901 | dump_header_size = roundup(dump_header_size, dbtob(1)); | 901 | dump_header_size = roundup(dump_header_size, dbtob(1)); | |
902 | 902 | |||
903 | /* | 903 | /* | |
904 | * savecore(8) will read this to decide how many pages to | 904 | * savecore(8) will read this to decide how many pages to | |
905 | * copy, and cpu_dumpconf has already used the pessimistic | 905 | * copy, and cpu_dumpconf has already used the pessimistic | |
906 | * value to set dumplo, so it's time to tell the truth. | 906 | * value to set dumplo, so it's time to tell the truth. | |
907 | */ | 907 | */ | |
908 | dumpsize = dump_npages; /* XXX could these just be one variable? */ | 908 | dumpsize = dump_npages; /* XXX could these just be one variable? */ | |
909 | } | 909 | } | |
910 | 910 | |||
911 | int | 911 | int | |
912 | dump_seg_count_range(paddr_t start, paddr_t size) | 912 | dump_seg_count_range(paddr_t start, paddr_t size) | |
913 | { | 913 | { | |
914 | ++dump_nmemsegs; | 914 | ++dump_nmemsegs; | |
915 | dump_npages += size / PAGE_SIZE; | 915 | dump_npages += size / PAGE_SIZE; | |
916 | return 0; | 916 | return 0; | |
917 | } | 917 | } | |
918 | 918 | |||
919 | /* | 919 | /* | |
920 | * A sparse dump's header may be rather large, due to the number of | 920 | * A sparse dump's header may be rather large, due to the number of | |
921 | * "segments" emitted. These routines manage a simple output buffer, | 921 | * "segments" emitted. These routines manage a simple output buffer, | |
922 | * so that the header can be written to disk incrementally. | 922 | * so that the header can be written to disk incrementally. | |
923 | */ | 923 | */ | |
924 | void | 924 | void | |
925 | dump_header_start(void) | 925 | dump_header_start(void) | |
926 | { | 926 | { | |
927 | dump_headerbuf_ptr = dump_headerbuf; | 927 | dump_headerbuf_ptr = dump_headerbuf; | |
928 | dump_header_blkno = dumplo; | 928 | dump_header_blkno = dumplo; | |
929 | } | 929 | } | |
930 | 930 | |||
931 | int | 931 | int | |
932 | dump_header_flush(void) | 932 | dump_header_flush(void) | |
933 | { | 933 | { | |
934 | const struct bdevsw *bdev; | 934 | const struct bdevsw *bdev; | |
935 | size_t to_write; | 935 | size_t to_write; | |
936 | int error; | 936 | int error; | |
937 | 937 | |||
938 | bdev = bdevsw_lookup(dumpdev); | 938 | bdev = bdevsw_lookup(dumpdev); | |
939 | to_write = roundup(dump_headerbuf_ptr - dump_headerbuf, dbtob(1)); | 939 | to_write = roundup(dump_headerbuf_ptr - dump_headerbuf, dbtob(1)); | |
940 | error = bdev->d_dump(dumpdev, dump_header_blkno, | 940 | error = bdev->d_dump(dumpdev, dump_header_blkno, | |
941 | dump_headerbuf, to_write); | 941 | dump_headerbuf, to_write); | |
942 | dump_header_blkno += btodb(to_write); | 942 | dump_header_blkno += btodb(to_write); | |
943 | dump_headerbuf_ptr = dump_headerbuf; | 943 | dump_headerbuf_ptr = dump_headerbuf; | |
944 | return error; | 944 | return error; | |
945 | } | 945 | } | |
946 | 946 | |||
947 | int | 947 | int | |
948 | dump_header_addbytes(const void* vptr, size_t n) | 948 | dump_header_addbytes(const void* vptr, size_t n) | |
949 | { | 949 | { | |
950 | const char* ptr = vptr; | 950 | const char* ptr = vptr; | |
951 | int error; | 951 | int error; | |
952 | 952 | |||
953 | while (n > dump_headerbuf_avail) { | 953 | while (n > dump_headerbuf_avail) { | |
954 | memcpy(dump_headerbuf_ptr, ptr, dump_headerbuf_avail); | 954 | memcpy(dump_headerbuf_ptr, ptr, dump_headerbuf_avail); | |
955 | ptr += dump_headerbuf_avail; | 955 | ptr += dump_headerbuf_avail; | |
956 | n -= dump_headerbuf_avail; | 956 | n -= dump_headerbuf_avail; | |
957 | dump_headerbuf_ptr = dump_headerbuf_end; | 957 | dump_headerbuf_ptr = dump_headerbuf_end; | |
958 | error = dump_header_flush(); | 958 | error = dump_header_flush(); | |
959 | if (error) | 959 | if (error) | |
960 | return error; | 960 | return error; | |
961 | } | 961 | } | |
962 | memcpy(dump_headerbuf_ptr, ptr, n); | 962 | memcpy(dump_headerbuf_ptr, ptr, n); | |
963 | dump_headerbuf_ptr += n; | 963 | dump_headerbuf_ptr += n; | |
964 | 964 | |||
965 | return 0; | 965 | return 0; | |
966 | } | 966 | } | |
967 | 967 | |||
968 | int | 968 | int | |
969 | dump_header_addseg(paddr_t start, paddr_t size) | 969 | dump_header_addseg(paddr_t start, paddr_t size) | |
970 | { | 970 | { | |
971 | phys_ram_seg_t seg = { start, size }; | 971 | phys_ram_seg_t seg = { start, size }; | |
972 | 972 | |||
973 | return dump_header_addbytes(&seg, sizeof(seg)); | 973 | return dump_header_addbytes(&seg, sizeof(seg)); | |
974 | } | 974 | } | |
975 | 975 | |||
976 | int | 976 | int | |
977 | dump_header_finish(void) | 977 | dump_header_finish(void) | |
978 | { | 978 | { | |
979 | memset(dump_headerbuf_ptr, 0, dump_headerbuf_avail); | 979 | memset(dump_headerbuf_ptr, 0, dump_headerbuf_avail); | |
980 | return dump_header_flush(); | 980 | return dump_header_flush(); | |
981 | } | 981 | } | |
982 | 982 | |||
983 | 983 | |||
984 | /* | 984 | /* | |
985 | * These variables are needed by /sbin/savecore | 985 | * These variables are needed by /sbin/savecore | |
986 | */ | 986 | */ | |
987 | uint32_t dumpmag = 0x8fca0101; /* magic number */ | 987 | uint32_t dumpmag = 0x8fca0101; /* magic number */ | |
988 | int dumpsize = 0; /* pages */ | 988 | int dumpsize = 0; /* pages */ | |
989 | long dumplo = 0; /* blocks */ | 989 | long dumplo = 0; /* blocks */ | |
990 | 990 | |||
991 | /* | 991 | /* | |
992 | * cpu_dumpsize: calculate size of machine-dependent kernel core dump headers | 992 | * cpu_dumpsize: calculate size of machine-dependent kernel core dump headers | |
993 | * for a full (non-sparse) dump. | 993 | * for a full (non-sparse) dump. | |
994 | */ | 994 | */ | |
995 | int | 995 | int | |
996 | cpu_dumpsize(void) | 996 | cpu_dumpsize(void) | |
997 | { | 997 | { | |
998 | int size; | 998 | int size; | |
999 | 999 | |||
1000 | size = ALIGN(sizeof(kcore_seg_t)) + ALIGN(sizeof(cpu_kcore_hdr_t)) + | 1000 | size = ALIGN(sizeof(kcore_seg_t)) + ALIGN(sizeof(cpu_kcore_hdr_t)) + | |
1001 | ALIGN(mem_cluster_cnt * sizeof(phys_ram_seg_t)); | 1001 | ALIGN(mem_cluster_cnt * sizeof(phys_ram_seg_t)); | |
1002 | if (roundup(size, dbtob(1)) != dbtob(1)) | 1002 | if (roundup(size, dbtob(1)) != dbtob(1)) | |
1003 | return (-1); | 1003 | return (-1); | |
1004 | 1004 | |||
1005 | return (1); | 1005 | return (1); | |
1006 | } | 1006 | } | |
1007 | 1007 | |||
1008 | /* | 1008 | /* | |
1009 | * cpu_dump_mempagecnt: calculate the size of RAM (in pages) to be dumped | 1009 | * cpu_dump_mempagecnt: calculate the size of RAM (in pages) to be dumped | |
1010 | * for a full (non-sparse) dump. | 1010 | * for a full (non-sparse) dump. | |
1011 | */ | 1011 | */ | |
1012 | u_long | 1012 | u_long | |
1013 | cpu_dump_mempagecnt(void) | 1013 | cpu_dump_mempagecnt(void) | |
1014 | { | 1014 | { | |
1015 | u_long i, n; | 1015 | u_long i, n; | |
1016 | 1016 | |||
1017 | n = 0; | 1017 | n = 0; | |
1018 | for (i = 0; i < mem_cluster_cnt; i++) | 1018 | for (i = 0; i < mem_cluster_cnt; i++) | |
1019 | n += atop(mem_clusters[i].size); | 1019 | n += atop(mem_clusters[i].size); | |
1020 | return (n); | 1020 | return (n); | |
1021 | } | 1021 | } | |
1022 | 1022 | |||
1023 | /* | 1023 | /* | |
1024 | * cpu_dump: dump the machine-dependent kernel core dump headers. | 1024 | * cpu_dump: dump the machine-dependent kernel core dump headers. | |
1025 | */ | 1025 | */ | |
1026 | int | 1026 | int | |
1027 | cpu_dump(void) | 1027 | cpu_dump(void) | |
1028 | { | 1028 | { | |
1029 | kcore_seg_t seg; | 1029 | kcore_seg_t seg; | |
1030 | cpu_kcore_hdr_t cpuhdr; | 1030 | cpu_kcore_hdr_t cpuhdr; | |
1031 | const struct bdevsw *bdev; | 1031 | const struct bdevsw *bdev; | |
1032 | 1032 | |||
1033 | bdev = bdevsw_lookup(dumpdev); | 1033 | bdev = bdevsw_lookup(dumpdev); | |
1034 | if (bdev == NULL) | 1034 | if (bdev == NULL) | |
1035 | return (ENXIO); | 1035 | return (ENXIO); | |
1036 | 1036 | |||
1037 | /* | 1037 | /* | |
1038 | * Generate a segment header. | 1038 | * Generate a segment header. | |
1039 | */ | 1039 | */ | |
1040 | CORE_SETMAGIC(seg, KCORE_MAGIC, MID_MACHINE, CORE_CPU); | 1040 | CORE_SETMAGIC(seg, KCORE_MAGIC, MID_MACHINE, CORE_CPU); | |
1041 | seg.c_size = dump_header_size - ALIGN(sizeof(seg)); | 1041 | seg.c_size = dump_header_size - ALIGN(sizeof(seg)); | |
1042 | (void)dump_header_addbytes(&seg, ALIGN(sizeof(seg))); | 1042 | (void)dump_header_addbytes(&seg, ALIGN(sizeof(seg))); | |
1043 | 1043 | |||
1044 | /* | 1044 | /* | |
1045 | * Add the machine-dependent header info. | 1045 | * Add the machine-dependent header info. | |
1046 | */ | 1046 | */ | |
1047 | cpuhdr.ptdpaddr = PDPpaddr; | 1047 | cpuhdr.ptdpaddr = PDPpaddr; | |
1048 | cpuhdr.nmemsegs = dump_nmemsegs; | 1048 | cpuhdr.nmemsegs = dump_nmemsegs; | |
1049 | (void)dump_header_addbytes(&cpuhdr, ALIGN(sizeof(cpuhdr))); | 1049 | (void)dump_header_addbytes(&cpuhdr, ALIGN(sizeof(cpuhdr))); | |
1050 | 1050 | |||
1051 | /* | 1051 | /* | |
1052 | * Write out the memory segment descriptors. | 1052 | * Write out the memory segment descriptors. | |
1053 | */ | 1053 | */ | |
1054 | return dump_seg_iter(dump_header_addseg); | 1054 | return dump_seg_iter(dump_header_addseg); | |
1055 | } | 1055 | } | |
1056 | 1056 | |||
1057 | /* | 1057 | /* | |
1058 | * Doadump comes here after turning off memory management and | 1058 | * Doadump comes here after turning off memory management and | |
1059 | * getting on the dump stack, either when called above, or by | 1059 | * getting on the dump stack, either when called above, or by | |
1060 | * the auto-restart code. | 1060 | * the auto-restart code. | |
1061 | */ | 1061 | */ | |
1062 | #define BYTES_PER_DUMP PAGE_SIZE /* must be a multiple of pagesize XXX small */ | 1062 | #define BYTES_PER_DUMP PAGE_SIZE /* must be a multiple of pagesize XXX small */ | |
1063 | static vaddr_t dumpspace; | 1063 | static vaddr_t dumpspace; | |
1064 | 1064 | |||
1065 | vaddr_t | 1065 | vaddr_t | |
1066 | reserve_dumppages(vaddr_t p) | 1066 | reserve_dumppages(vaddr_t p) | |
1067 | { | 1067 | { | |
1068 | 1068 | |||
1069 | dumpspace = p; | 1069 | dumpspace = p; | |
1070 | return (p + BYTES_PER_DUMP); | 1070 | return (p + BYTES_PER_DUMP); | |
1071 | } | 1071 | } | |
1072 | 1072 | |||
1073 | int | 1073 | int | |
1074 | dumpsys_seg(paddr_t maddr, paddr_t bytes) | 1074 | dumpsys_seg(paddr_t maddr, paddr_t bytes) | |
1075 | { | 1075 | { | |
1076 | u_long i, m, n; | 1076 | u_long i, m, n; | |
1077 | daddr_t blkno; | 1077 | daddr_t blkno; | |
1078 | const struct bdevsw *bdev; | 1078 | const struct bdevsw *bdev; | |
1079 | int (*dump)(dev_t, daddr_t, void *, size_t); | 1079 | int (*dump)(dev_t, daddr_t, void *, size_t); | |
1080 | int error; | 1080 | int error; | |
1081 | 1081 | |||
1082 | if (dumpdev == NODEV) | 1082 | if (dumpdev == NODEV) | |
1083 | return ENODEV; | 1083 | return ENODEV; | |
1084 | bdev = bdevsw_lookup(dumpdev); | 1084 | bdev = bdevsw_lookup(dumpdev); | |
1085 | if (bdev == NULL || bdev->d_psize == NULL) | 1085 | if (bdev == NULL || bdev->d_psize == NULL) | |
1086 | return ENODEV; | 1086 | return ENODEV; | |
1087 | 1087 | |||
1088 | dump = bdev->d_dump; | 1088 | dump = bdev->d_dump; | |
1089 | 1089 | |||
1090 | blkno = dump_header_blkno; | 1090 | blkno = dump_header_blkno; | |
1091 | for (i = 0; i < bytes; i += n, dump_totalbytesleft -= n) { | 1091 | for (i = 0; i < bytes; i += n, dump_totalbytesleft -= n) { | |
1092 | /* Print out how many MBs we have left to go. */ | 1092 | /* Print out how many MBs we have left to go. */ | |
1093 | if ((dump_totalbytesleft % (1024*1024)) == 0) | 1093 | if ((dump_totalbytesleft % (1024*1024)) == 0) | |
1094 | printf_nolog("%lu ", (unsigned long) | 1094 | printf_nolog("%lu ", (unsigned long) | |
1095 | (dump_totalbytesleft / (1024 * 1024))); | 1095 | (dump_totalbytesleft / (1024 * 1024))); | |
1096 | 1096 | |||
1097 | /* Limit size for next transfer. */ | 1097 | /* Limit size for next transfer. */ | |
1098 | n = bytes - i; | 1098 | n = bytes - i; | |
1099 | if (n > BYTES_PER_DUMP) | 1099 | if (n > BYTES_PER_DUMP) | |
1100 | n = BYTES_PER_DUMP; | 1100 | n = BYTES_PER_DUMP; | |
1101 | 1101 | |||
1102 | for (m = 0; m < n; m += NBPG) | 1102 | for (m = 0; m < n; m += NBPG) | |
1103 | pmap_kenter_pa(dumpspace + m, maddr + m, | 1103 | pmap_kenter_pa(dumpspace + m, maddr + m, | |
1104 | VM_PROT_READ, 0); | 1104 | VM_PROT_READ, 0); | |
1105 | pmap_update(pmap_kernel()); | 1105 | pmap_update(pmap_kernel()); | |
1106 | 1106 | |||
1107 | error = (*dump)(dumpdev, blkno, (void *)dumpspace, n); | 1107 | error = (*dump)(dumpdev, blkno, (void *)dumpspace, n); | |
1108 | pmap_kremove_local(dumpspace, n); | 1108 | pmap_kremove_local(dumpspace, n); | |
1109 | if (error) | 1109 | if (error) | |
1110 | return error; | 1110 | return error; | |
1111 | maddr += n; | 1111 | maddr += n; | |
1112 | blkno += btodb(n); /* XXX? */ | 1112 | blkno += btodb(n); /* XXX? */ | |
1113 | 1113 | |||
1114 | #if 0 /* XXX this doesn't work. grr. */ | 1114 | #if 0 /* XXX this doesn't work. grr. */ | |
1115 | /* operator aborting dump? */ | 1115 | /* operator aborting dump? */ | |
1116 | if (sget() != NULL) | 1116 | if (sget() != NULL) | |
1117 | return EINTR; | 1117 | return EINTR; | |
1118 | #endif | 1118 | #endif | |
1119 | } | 1119 | } | |
1120 | dump_header_blkno = blkno; | 1120 | dump_header_blkno = blkno; | |
1121 | 1121 | |||
1122 | return 0; | 1122 | return 0; | |
1123 | } | 1123 | } | |
1124 | 1124 | |||
1125 | void | 1125 | void | |
1126 | dodumpsys(void) | 1126 | dodumpsys(void) | |
1127 | { | 1127 | { | |
1128 | const struct bdevsw *bdev; | 1128 | const struct bdevsw *bdev; | |
1129 | int dumpend, psize; | 1129 | int dumpend, psize; | |
1130 | int error; | 1130 | int error; | |
1131 | 1131 | |||
1132 | if (dumpdev == NODEV) | 1132 | if (dumpdev == NODEV) | |
1133 | return; | 1133 | return; | |
1134 | 1134 | |||
1135 | bdev = bdevsw_lookup(dumpdev); | 1135 | bdev = bdevsw_lookup(dumpdev); | |
1136 | if (bdev == NULL || bdev->d_psize == NULL) | 1136 | if (bdev == NULL || bdev->d_psize == NULL) | |
1137 | return; | 1137 | return; | |
1138 | /* | 1138 | /* | |
1139 | * For dumps during autoconfiguration, | 1139 | * For dumps during autoconfiguration, | |
1140 | * if dump device has already configured... | 1140 | * if dump device has already configured... | |
1141 | */ | 1141 | */ | |
1142 | if (dumpsize == 0) | 1142 | if (dumpsize == 0) | |
1143 | cpu_dumpconf(); | 1143 | cpu_dumpconf(); | |
1144 | 1144 | |||
1145 | printf("\ndumping to dev %llu,%llu (offset=%ld, size=%d):", | 1145 | printf("\ndumping to dev %llu,%llu (offset=%ld, size=%d):", | |
1146 | (unsigned long long)major(dumpdev), | 1146 | (unsigned long long)major(dumpdev), | |
1147 | (unsigned long long)minor(dumpdev), dumplo, dumpsize); | 1147 | (unsigned long long)minor(dumpdev), dumplo, dumpsize); | |
1148 | 1148 | |||
1149 | if (dumplo <= 0 || dumpsize <= 0) { | 1149 | if (dumplo <= 0 || dumpsize <= 0) { | |
1150 | printf(" not possible\n"); | 1150 | printf(" not possible\n"); | |
1151 | return; | 1151 | return; | |
1152 | } | 1152 | } | |
1153 | 1153 | |||
1154 | psize = bdev_size(dumpdev); | 1154 | psize = bdev_size(dumpdev); | |
1155 | printf("\ndump "); | 1155 | printf("\ndump "); | |
1156 | if (psize == -1) { | 1156 | if (psize == -1) { | |
1157 | printf("area unavailable\n"); | 1157 | printf("area unavailable\n"); | |
1158 | return; | 1158 | return; | |
1159 | } | 1159 | } | |
1160 | 1160 | |||
1161 | #if 0 /* XXX this doesn't work. grr. */ | 1161 | #if 0 /* XXX this doesn't work. grr. */ | |
1162 | /* toss any characters present prior to dump */ | 1162 | /* toss any characters present prior to dump */ | |
1163 | while (sget() != NULL); /*syscons and pccons differ */ | 1163 | while (sget() != NULL); /*syscons and pccons differ */ | |
1164 | #endif | 1164 | #endif | |
1165 | 1165 | |||
1166 | dump_seg_prep(); | 1166 | dump_seg_prep(); | |
1167 | dumpend = dumplo + btodb(dump_header_size) + ctod(dump_npages); | 1167 | dumpend = dumplo + btodb(dump_header_size) + ctod(dump_npages); | |
1168 | if (dumpend > psize) { | 1168 | if (dumpend > psize) { | |
1169 | printf("failed: insufficient space (%d < %d)\n", | 1169 | printf("failed: insufficient space (%d < %d)\n", | |
1170 | psize, dumpend); | 1170 | psize, dumpend); | |
1171 | goto failed; | 1171 | goto failed; | |
1172 | } | 1172 | } | |
1173 | 1173 | |||
1174 | dump_header_start(); | 1174 | dump_header_start(); | |
1175 | if ((error = cpu_dump()) != 0) | 1175 | if ((error = cpu_dump()) != 0) | |
1176 | goto err; | 1176 | goto err; | |
1177 | if ((error = dump_header_finish()) != 0) | 1177 | if ((error = dump_header_finish()) != 0) | |
1178 | goto err; | 1178 | goto err; | |
1179 | 1179 | |||
1180 | if (dump_header_blkno != dumplo + btodb(dump_header_size)) { | 1180 | if (dump_header_blkno != dumplo + btodb(dump_header_size)) { | |
1181 | printf("BAD header size (%ld [written] != %ld [expected])\n", | 1181 | printf("BAD header size (%ld [written] != %ld [expected])\n", | |
1182 | (long)(dump_header_blkno - dumplo), | 1182 | (long)(dump_header_blkno - dumplo), | |
1183 | (long)btodb(dump_header_size)); | 1183 | (long)btodb(dump_header_size)); | |
1184 | goto failed; | 1184 | goto failed; | |
1185 | } | 1185 | } | |
1186 | 1186 | |||
1187 | dump_totalbytesleft = roundup(ptoa(dump_npages), BYTES_PER_DUMP); | 1187 | dump_totalbytesleft = roundup(ptoa(dump_npages), BYTES_PER_DUMP); | |
1188 | error = dump_seg_iter(dumpsys_seg); | 1188 | error = dump_seg_iter(dumpsys_seg); | |
1189 | 1189 | |||
1190 | if (error == 0 && dump_header_blkno != dumpend) { | 1190 | if (error == 0 && dump_header_blkno != dumpend) { | |
1191 | printf("BAD dump size (%ld [written] != %ld [expected])\n", | 1191 | printf("BAD dump size (%ld [written] != %ld [expected])\n", | |
1192 | (long)(dumpend - dumplo), | 1192 | (long)(dumpend - dumplo), | |
1193 | (long)(dump_header_blkno - dumplo)); | 1193 | (long)(dump_header_blkno - dumplo)); | |
1194 | goto failed; | 1194 | goto failed; | |
1195 | } | 1195 | } | |
1196 | 1196 | |||
1197 | err: | 1197 | err: | |
1198 | switch (error) { | 1198 | switch (error) { | |
1199 | 1199 | |||
1200 | case ENXIO: | 1200 | case ENXIO: | |
1201 | printf("device bad\n"); | 1201 | printf("device bad\n"); | |
1202 | break; | 1202 | break; | |
1203 | 1203 | |||
1204 | case EFAULT: | 1204 | case EFAULT: | |
1205 | printf("device not ready\n"); | 1205 | printf("device not ready\n"); | |
1206 | break; | 1206 | break; | |
1207 | 1207 | |||
1208 | case EINVAL: | 1208 | case EINVAL: | |
1209 | printf("area improper\n"); | 1209 | printf("area improper\n"); | |
1210 | break; | 1210 | break; | |
1211 | 1211 | |||
1212 | case EIO: | 1212 | case EIO: | |
1213 | printf("i/o error\n"); | 1213 | printf("i/o error\n"); | |
1214 | break; | 1214 | break; | |
1215 | 1215 | |||
1216 | case EINTR: | 1216 | case EINTR: | |
1217 | printf("aborted from console\n"); | 1217 | printf("aborted from console\n"); | |
1218 | break; | 1218 | break; | |
1219 | 1219 | |||
1220 | case 0: | 1220 | case 0: | |
1221 | printf("succeeded\n"); | 1221 | printf("succeeded\n"); | |
1222 | break; | 1222 | break; | |
1223 | 1223 | |||
1224 | default: | 1224 | default: | |
1225 | printf("error %d\n", error); | 1225 | printf("error %d\n", error); | |
1226 | break; | 1226 | break; | |
1227 | } | 1227 | } | |
1228 | failed: | 1228 | failed: | |
1229 | printf("\n\n"); | 1229 | printf("\n\n"); | |
1230 | delay(5000000); /* 5 seconds */ | 1230 | delay(5000000); /* 5 seconds */ | |
1231 | } | 1231 | } | |
1232 | 1232 | |||
1233 | /* | 1233 | /* | |
1234 | * This is called by main to set dumplo and dumpsize. | 1234 | * This is called by main to set dumplo and dumpsize. | |
1235 | * Dumps always skip the first PAGE_SIZE of disk space | 1235 | * Dumps always skip the first PAGE_SIZE of disk space | |
1236 | * in case there might be a disk label stored there. | 1236 | * in case there might be a disk label stored there. | |
1237 | * If there is extra space, put dump at the end to | 1237 | * If there is extra space, put dump at the end to | |
1238 | * reduce the chance that swapping trashes it. | 1238 | * reduce the chance that swapping trashes it. | |
1239 | * | 1239 | * | |
1240 | * Sparse dumps can't placed as close to the end as possible, because | 1240 | * Sparse dumps can't placed as close to the end as possible, because | |
1241 | * savecore(8) has to know where to start reading in the dump device | 1241 | * savecore(8) has to know where to start reading in the dump device | |
1242 | * before it has access to any of the crashed system's state. | 1242 | * before it has access to any of the crashed system's state. | |
1243 | * | 1243 | * | |
1244 | * Note also that a sparse dump will never be larger than a full one: | 1244 | * Note also that a sparse dump will never be larger than a full one: | |
1245 | * in order to add a phys_ram_seg_t to the header, at least one page | 1245 | * in order to add a phys_ram_seg_t to the header, at least one page | |
1246 | * must be removed. | 1246 | * must be removed. | |
1247 | */ | 1247 | */ | |
1248 | void | 1248 | void | |
1249 | cpu_dumpconf(void) | 1249 | cpu_dumpconf(void) | |
1250 | { | 1250 | { | |
1251 | int nblks, dumpblks; /* size of dump area */ | 1251 | int nblks, dumpblks; /* size of dump area */ | |
1252 | 1252 | |||
1253 | if (dumpdev == NODEV) | 1253 | if (dumpdev == NODEV) | |
1254 | goto bad; | 1254 | goto bad; | |
1255 | nblks = bdev_size(dumpdev); | 1255 | nblks = bdev_size(dumpdev); | |
1256 | if (nblks <= ctod(1)) | 1256 | if (nblks <= ctod(1)) | |
1257 | goto bad; | 1257 | goto bad; | |
1258 | 1258 | |||
1259 | dumpblks = cpu_dumpsize(); | 1259 | dumpblks = cpu_dumpsize(); | |
1260 | if (dumpblks < 0) | 1260 | if (dumpblks < 0) | |
1261 | goto bad; | 1261 | goto bad; | |
1262 | 1262 | |||
1263 | /* dumpsize is in page units, and doesn't include headers. */ | 1263 | /* dumpsize is in page units, and doesn't include headers. */ | |
1264 | dumpsize = cpu_dump_mempagecnt(); | 1264 | dumpsize = cpu_dump_mempagecnt(); | |
1265 | 1265 | |||
1266 | dumpblks += ctod(dumpsize); | 1266 | dumpblks += ctod(dumpsize); | |
1267 | 1267 | |||
1268 | /* If dump won't fit (incl. room for possible label), punt. */ | 1268 | /* If dump won't fit (incl. room for possible label), punt. */ | |
1269 | if (dumpblks > (nblks - ctod(1))) { | 1269 | if (dumpblks > (nblks - ctod(1))) { | |
1270 | #ifndef NO_SPARSE_DUMP | 1270 | #ifndef NO_SPARSE_DUMP | |
1271 | /* A sparse dump might (and hopefully will) fit. */ | 1271 | /* A sparse dump might (and hopefully will) fit. */ | |
1272 | dumplo = ctod(1); | 1272 | dumplo = ctod(1); | |
1273 | #else | 1273 | #else | |
1274 | /* But if we're not configured for that, punt. */ | 1274 | /* But if we're not configured for that, punt. */ | |
1275 | goto bad; | 1275 | goto bad; | |
1276 | #endif | 1276 | #endif | |
1277 | } else { | 1277 | } else { | |
1278 | /* Put dump at end of partition */ | 1278 | /* Put dump at end of partition */ | |
1279 | dumplo = nblks - dumpblks; | 1279 | dumplo = nblks - dumpblks; | |
1280 | } | 1280 | } | |
1281 | 1281 | |||
1282 | 1282 | |||
1283 | /* Now that we've decided this will work, init ancillary stuff. */ | 1283 | /* Now that we've decided this will work, init ancillary stuff. */ | |
1284 | dump_misc_init(); | 1284 | dump_misc_init(); | |
1285 | return; | 1285 | return; | |
1286 | 1286 | |||
1287 | bad: | 1287 | bad: | |
1288 | dumpsize = 0; | 1288 | dumpsize = 0; | |
1289 | } | 1289 | } | |
1290 | 1290 | |||
1291 | /* | 1291 | /* | |
1292 | * Clear registers on exec | 1292 | * Clear registers on exec | |
1293 | */ | 1293 | */ | |
1294 | void | 1294 | void | |
1295 | setregs(struct lwp *l, struct exec_package *pack, vaddr_t stack) | 1295 | setregs(struct lwp *l, struct exec_package *pack, vaddr_t stack) | |
1296 | { | 1296 | { | |
1297 | struct pcb *pcb = lwp_getpcb(l); | 1297 | struct pcb *pcb = lwp_getpcb(l); | |
1298 | struct trapframe *tf; | 1298 | struct trapframe *tf; | |
1299 | 1299 | |||
1300 | #ifdef USER_LDT | 1300 | #ifdef USER_LDT | |
1301 | pmap_ldt_cleanup(l); | 1301 | pmap_ldt_cleanup(l); | |
1302 | #endif | 1302 | #endif | |
1303 | 1303 | |||
1304 | fpu_save_area_clear(l, pack->ep_osversion >= 699002600 | 1304 | fpu_save_area_clear(l, pack->ep_osversion >= 699002600 | |
1305 | ? __NetBSD_NPXCW__ : __NetBSD_COMPAT_NPXCW__); | 1305 | ? __NetBSD_NPXCW__ : __NetBSD_COMPAT_NPXCW__); | |
1306 | pcb->pcb_flags = 0; | 1306 | pcb->pcb_flags = 0; | |
1307 | 1307 | |||
1308 | l->l_proc->p_flag &= ~PK_32; | 1308 | l->l_proc->p_flag &= ~PK_32; | |
1309 | 1309 | |||
1310 | tf = l->l_md.md_regs; | 1310 | tf = l->l_md.md_regs; | |
1311 | tf->tf_ds = LSEL(LUDATA_SEL, SEL_UPL); | 1311 | tf->tf_ds = LSEL(LUDATA_SEL, SEL_UPL); | |
1312 | tf->tf_es = LSEL(LUDATA_SEL, SEL_UPL); | 1312 | tf->tf_es = LSEL(LUDATA_SEL, SEL_UPL); | |
1313 | cpu_fsgs_zero(l); | 1313 | cpu_fsgs_zero(l); | |
1314 | tf->tf_rdi = 0; | 1314 | tf->tf_rdi = 0; | |
1315 | tf->tf_rsi = 0; | 1315 | tf->tf_rsi = 0; | |
1316 | tf->tf_rbp = 0; | 1316 | tf->tf_rbp = 0; | |
1317 | tf->tf_rbx = l->l_proc->p_psstrp; | 1317 | tf->tf_rbx = l->l_proc->p_psstrp; | |
1318 | tf->tf_rdx = 0; | 1318 | tf->tf_rdx = 0; | |
1319 | tf->tf_rcx = 0; | 1319 | tf->tf_rcx = 0; | |
1320 | tf->tf_rax = 0; | 1320 | tf->tf_rax = 0; | |
1321 | tf->tf_rip = pack->ep_entry; | 1321 | tf->tf_rip = pack->ep_entry; | |
1322 | tf->tf_cs = LSEL(LUCODE_SEL, SEL_UPL); | 1322 | tf->tf_cs = LSEL(LUCODE_SEL, SEL_UPL); | |
1323 | tf->tf_rflags = PSL_USERSET; | 1323 | tf->tf_rflags = PSL_USERSET; | |
1324 | tf->tf_rsp = stack; | 1324 | tf->tf_rsp = stack; | |
1325 | tf->tf_ss = LSEL(LUDATA_SEL, SEL_UPL); | 1325 | tf->tf_ss = LSEL(LUDATA_SEL, SEL_UPL); | |
1326 | } | 1326 | } | |
1327 | 1327 | |||
1328 | /* | 1328 | /* | |
1329 | * Initialize segments and descriptor tables | 1329 | * Initialize segments and descriptor tables | |
1330 | */ | 1330 | */ | |
1331 | 1331 | |||
1332 | #ifdef XEN | 1332 | #ifdef XEN | |
1333 | struct trap_info *xen_idt; | 1333 | struct trap_info *xen_idt; | |
1334 | int xen_idt_idx; | 1334 | int xen_idt_idx; | |
1335 | #endif | 1335 | #endif | |
1336 | char *ldtstore; | 1336 | char *ldtstore; | |
1337 | char *gdtstore; | 1337 | char *gdtstore; | |
1338 | 1338 | |||
1339 | void | 1339 | void | |
1340 | setgate(struct gate_descriptor *gd, void *func, int ist, int type, int dpl, int sel) | 1340 | setgate(struct gate_descriptor *gd, void *func, int ist, int type, int dpl, int sel) | |
1341 | { | 1341 | { | |
1342 | 1342 | |||
1343 | kpreempt_disable(); | 1343 | kpreempt_disable(); | |
1344 | pmap_changeprot_local(idt_vaddr, VM_PROT_READ|VM_PROT_WRITE); | 1344 | pmap_changeprot_local(idt_vaddr, VM_PROT_READ|VM_PROT_WRITE); | |
1345 | 1345 | |||
1346 | gd->gd_looffset = (uint64_t)func & 0xffff; | 1346 | gd->gd_looffset = (uint64_t)func & 0xffff; | |
1347 | gd->gd_selector = sel; | 1347 | gd->gd_selector = sel; | |
1348 | gd->gd_ist = ist; | 1348 | gd->gd_ist = ist; | |
1349 | gd->gd_type = type; | 1349 | gd->gd_type = type; | |
1350 | gd->gd_dpl = dpl; | 1350 | gd->gd_dpl = dpl; | |
1351 | gd->gd_p = 1; | 1351 | gd->gd_p = 1; | |
1352 | gd->gd_hioffset = (uint64_t)func >> 16; | 1352 | gd->gd_hioffset = (uint64_t)func >> 16; | |
1353 | gd->gd_zero = 0; | 1353 | gd->gd_zero = 0; | |
1354 | gd->gd_xx1 = 0; | 1354 | gd->gd_xx1 = 0; | |
1355 | gd->gd_xx2 = 0; | 1355 | gd->gd_xx2 = 0; | |
1356 | gd->gd_xx3 = 0; | 1356 | gd->gd_xx3 = 0; | |
1357 | 1357 | |||
1358 | pmap_changeprot_local(idt_vaddr, VM_PROT_READ); | 1358 | pmap_changeprot_local(idt_vaddr, VM_PROT_READ); | |
1359 | kpreempt_enable(); | 1359 | kpreempt_enable(); | |
1360 | } | 1360 | } | |
1361 | 1361 | |||
1362 | void | 1362 | void | |
1363 | unsetgate(struct gate_descriptor *gd) | 1363 | unsetgate(struct gate_descriptor *gd) | |
1364 | { | 1364 | { | |
1365 | 1365 | |||
1366 | kpreempt_disable(); | 1366 | kpreempt_disable(); | |
1367 | pmap_changeprot_local(idt_vaddr, VM_PROT_READ|VM_PROT_WRITE); | 1367 | pmap_changeprot_local(idt_vaddr, VM_PROT_READ|VM_PROT_WRITE); | |
1368 | 1368 | |||
1369 | memset(gd, 0, sizeof (*gd)); | 1369 | memset(gd, 0, sizeof (*gd)); | |
1370 | 1370 | |||
1371 | pmap_changeprot_local(idt_vaddr, VM_PROT_READ); | 1371 | pmap_changeprot_local(idt_vaddr, VM_PROT_READ); | |
1372 | kpreempt_enable(); | 1372 | kpreempt_enable(); | |
1373 | } | 1373 | } | |
1374 | 1374 | |||
1375 | void | 1375 | void | |
1376 | setregion(struct region_descriptor *rd, void *base, uint16_t limit) | 1376 | setregion(struct region_descriptor *rd, void *base, uint16_t limit) | |
1377 | { | 1377 | { | |
1378 | rd->rd_limit = limit; | 1378 | rd->rd_limit = limit; | |
1379 | rd->rd_base = (uint64_t)base; | 1379 | rd->rd_base = (uint64_t)base; | |
1380 | } | 1380 | } | |
1381 | 1381 | |||
1382 | /* | 1382 | /* | |
1383 | * Note that the base and limit fields are ignored in long mode. | 1383 | * Note that the base and limit fields are ignored in long mode. | |
1384 | */ | 1384 | */ | |
1385 | void | 1385 | void | |
1386 | set_mem_segment(struct mem_segment_descriptor *sd, void *base, size_t limit, | 1386 | set_mem_segment(struct mem_segment_descriptor *sd, void *base, size_t limit, | |
1387 | int type, int dpl, int gran, int def32, int is64) | 1387 | int type, int dpl, int gran, int def32, int is64) | |
1388 | { | 1388 | { | |
1389 | sd->sd_lolimit = (unsigned)limit; | 1389 | sd->sd_lolimit = (unsigned)limit; | |
1390 | sd->sd_lobase = (unsigned long)base; | 1390 | sd->sd_lobase = (unsigned long)base; | |
1391 | sd->sd_type = type; | 1391 | sd->sd_type = type; | |
1392 | sd->sd_dpl = dpl; | 1392 | sd->sd_dpl = dpl; | |
1393 | sd->sd_p = 1; | 1393 | sd->sd_p = 1; | |
1394 | sd->sd_hilimit = (unsigned)limit >> 16; | 1394 | sd->sd_hilimit = (unsigned)limit >> 16; | |
1395 | sd->sd_avl = 0; | 1395 | sd->sd_avl = 0; | |
1396 | sd->sd_long = is64; | 1396 | sd->sd_long = is64; | |
1397 | sd->sd_def32 = def32; | 1397 | sd->sd_def32 = def32; | |
1398 | sd->sd_gran = gran; | 1398 | sd->sd_gran = gran; | |
1399 | sd->sd_hibase = (unsigned long)base >> 24; | 1399 | sd->sd_hibase = (unsigned long)base >> 24; | |
1400 | } | 1400 | } | |
1401 | 1401 | |||
1402 | void | 1402 | void | |
1403 | set_sys_segment(struct sys_segment_descriptor *sd, void *base, size_t limit, | 1403 | set_sys_segment(struct sys_segment_descriptor *sd, void *base, size_t limit, | |
1404 | int type, int dpl, int gran) | 1404 | int type, int dpl, int gran) | |
1405 | { | 1405 | { | |
1406 | memset(sd, 0, sizeof *sd); | 1406 | memset(sd, 0, sizeof *sd); | |
1407 | sd->sd_lolimit = (unsigned)limit; | 1407 | sd->sd_lolimit = (unsigned)limit; | |
1408 | sd->sd_lobase = (uint64_t)base; | 1408 | sd->sd_lobase = (uint64_t)base; | |
1409 | sd->sd_type = type; | 1409 | sd->sd_type = type; | |
1410 | sd->sd_dpl = dpl; | 1410 | sd->sd_dpl = dpl; | |
1411 | sd->sd_p = 1; | 1411 | sd->sd_p = 1; | |
1412 | sd->sd_hilimit = (unsigned)limit >> 16; | 1412 | sd->sd_hilimit = (unsigned)limit >> 16; | |
1413 | sd->sd_gran = gran; | 1413 | sd->sd_gran = gran; | |
1414 | sd->sd_hibase = (uint64_t)base >> 24; | 1414 | sd->sd_hibase = (uint64_t)base >> 24; | |
1415 | } | 1415 | } | |
1416 | 1416 | |||
1417 | void | 1417 | void | |
1418 | cpu_init_idt(void) | 1418 | cpu_init_idt(void) | |
1419 | { | 1419 | { | |
1420 | #ifndef XEN | 1420 | #ifndef XEN | |
1421 | struct region_descriptor region; | 1421 | struct region_descriptor region; | |
1422 | 1422 | |||
1423 | setregion(®ion, idt, NIDT * sizeof(idt[0]) - 1); | 1423 | setregion(®ion, idt, NIDT * sizeof(idt[0]) - 1); | |
1424 | lidt(®ion); | 1424 | lidt(®ion); | |
1425 | #else | 1425 | #else | |
1426 | if (HYPERVISOR_set_trap_table(xen_idt)) | 1426 | if (HYPERVISOR_set_trap_table(xen_idt)) | |
1427 | panic("HYPERVISOR_set_trap_table() failed"); | 1427 | panic("HYPERVISOR_set_trap_table() failed"); | |
1428 | #endif | 1428 | #endif | |
1429 | } | 1429 | } | |
1430 | 1430 | |||
1431 | #define IDTVEC(name) __CONCAT(X, name) | 1431 | #define IDTVEC(name) __CONCAT(X, name) | |
1432 | typedef void (vector)(void); | 1432 | typedef void (vector)(void); | |
1433 | extern vector IDTVEC(syscall); | 1433 | extern vector IDTVEC(syscall); | |
1434 | extern vector IDTVEC(syscall32); | 1434 | extern vector IDTVEC(syscall32); | |
1435 | extern vector IDTVEC(osyscall); | 1435 | extern vector IDTVEC(osyscall); | |
1436 | extern vector IDTVEC(oosyscall); | 1436 | extern vector IDTVEC(oosyscall); | |
1437 | extern vector *IDTVEC(exceptions)[]; | 1437 | extern vector *IDTVEC(exceptions)[]; | |
1438 | 1438 | |||
1439 | static void | 1439 | static void | |
1440 | init_x86_64_msgbuf(void) | 1440 | init_x86_64_msgbuf(void) | |
1441 | { | 1441 | { | |
1442 | /* Message buffer is located at end of core. */ | 1442 | /* Message buffer is located at end of core. */ | |
1443 | struct vm_physseg *vps; | 1443 | struct vm_physseg *vps; | |
1444 | psize_t sz = round_page(MSGBUFSIZE); | 1444 | psize_t sz = round_page(MSGBUFSIZE); | |
1445 | psize_t reqsz = sz; | 1445 | psize_t reqsz = sz; | |
1446 | int x; | 1446 | int x; | |
1447 | 1447 | |||
1448 | search_again: | 1448 | search_again: | |
1449 | vps = NULL; | 1449 | vps = NULL; | |
1450 | 1450 | |||
1451 | for (x = 0; x < vm_nphysseg; x++) { | 1451 | for (x = 0; x < vm_nphysseg; x++) { | |
1452 | vps = VM_PHYSMEM_PTR(x); | 1452 | vps = VM_PHYSMEM_PTR(x); | |
1453 | if (ctob(vps->avail_end) == avail_end) | 1453 | if (ctob(vps->avail_end) == avail_end) | |
1454 | break; | 1454 | break; | |
1455 | } | 1455 | } | |
1456 | if (x == vm_nphysseg) | 1456 | if (x == vm_nphysseg) | |
1457 | panic("init_x86_64: can't find end of memory"); | 1457 | panic("init_x86_64: can't find end of memory"); | |
1458 | 1458 | |||
1459 | /* Shrink so it'll fit in the last segment. */ | 1459 | /* Shrink so it'll fit in the last segment. */ | |
1460 | if ((vps->avail_end - vps->avail_start) < atop(sz)) | 1460 | if ((vps->avail_end - vps->avail_start) < atop(sz)) | |
1461 | sz = ctob(vps->avail_end - vps->avail_start); | 1461 | sz = ctob(vps->avail_end - vps->avail_start); | |
1462 | 1462 | |||
1463 | vps->avail_end -= atop(sz); | 1463 | vps->avail_end -= atop(sz); | |
1464 | vps->end -= atop(sz); | 1464 | vps->end -= atop(sz); | |
1465 | msgbuf_p_seg[msgbuf_p_cnt].sz = sz; | 1465 | msgbuf_p_seg[msgbuf_p_cnt].sz = sz; | |
1466 | msgbuf_p_seg[msgbuf_p_cnt++].paddr = ctob(vps->avail_end); | 1466 | msgbuf_p_seg[msgbuf_p_cnt++].paddr = ctob(vps->avail_end); | |
1467 | 1467 | |||
1468 | /* Remove the last segment if it now has no pages. */ | 1468 | /* Remove the last segment if it now has no pages. */ | |
1469 | if (vps->start == vps->end) { | 1469 | if (vps->start == vps->end) { | |
1470 | for (vm_nphysseg--; x < vm_nphysseg; x++) | 1470 | for (vm_nphysseg--; x < vm_nphysseg; x++) | |
1471 | VM_PHYSMEM_PTR_SWAP(x, x + 1); | 1471 | VM_PHYSMEM_PTR_SWAP(x, x + 1); | |
1472 | } | 1472 | } | |
1473 | 1473 | |||
1474 | /* Now find where the new avail_end is. */ | 1474 | /* Now find where the new avail_end is. */ | |
1475 | for (avail_end = 0, x = 0; x < vm_nphysseg; x++) | 1475 | for (avail_end = 0, x = 0; x < vm_nphysseg; x++) | |
1476 | if (VM_PHYSMEM_PTR(x)->avail_end > avail_end) | 1476 | if (VM_PHYSMEM_PTR(x)->avail_end > avail_end) | |
1477 | avail_end = VM_PHYSMEM_PTR(x)->avail_end; | 1477 | avail_end = VM_PHYSMEM_PTR(x)->avail_end; | |
1478 | avail_end = ctob(avail_end); | 1478 | avail_end = ctob(avail_end); | |
1479 | 1479 | |||
1480 | if (sz == reqsz) | 1480 | if (sz == reqsz) | |
1481 | return; | 1481 | return; | |
1482 | 1482 | |||
1483 | reqsz -= sz; | 1483 | reqsz -= sz; | |
1484 | if (msgbuf_p_cnt == VM_PHYSSEG_MAX) { | 1484 | if (msgbuf_p_cnt == VM_PHYSSEG_MAX) { | |
1485 | /* No more segments available, bail out. */ | 1485 | /* No more segments available, bail out. */ | |
1486 | printf("WARNING: MSGBUFSIZE (%zu) too large, using %zu.\n", | 1486 | printf("WARNING: MSGBUFSIZE (%zu) too large, using %zu.\n", | |
1487 | (size_t)MSGBUFSIZE, (size_t)(MSGBUFSIZE - reqsz)); | 1487 | (size_t)MSGBUFSIZE, (size_t)(MSGBUFSIZE - reqsz)); | |
1488 | return; | 1488 | return; | |
1489 | } | 1489 | } | |
1490 | 1490 | |||
1491 | sz = reqsz; | 1491 | sz = reqsz; | |
1492 | goto search_again; | 1492 | goto search_again; | |
1493 | } | 1493 | } | |
1494 | 1494 | |||
1495 | static void | 1495 | static void | |
1496 | init_x86_64_ksyms(void) | 1496 | init_x86_64_ksyms(void) | |
1497 | { | 1497 | { | |
1498 | #if NKSYMS || defined(DDB) || defined(MODULAR) | 1498 | #if NKSYMS || defined(DDB) || defined(MODULAR) | |
1499 | extern int end; | 1499 | extern int end; | |
1500 | extern int *esym; | 1500 | extern int *esym; | |
1501 | #ifndef XEN | 1501 | #ifndef XEN | |
1502 | struct btinfo_symtab *symtab; | 1502 | struct btinfo_symtab *symtab; | |
1503 | vaddr_t tssym, tesym; | 1503 | vaddr_t tssym, tesym; | |
1504 | #endif | 1504 | #endif | |
1505 | 1505 | |||
1506 | #ifdef DDB | 1506 | #ifdef DDB | |
1507 | db_machine_init(); | 1507 | db_machine_init(); | |
1508 | #endif | 1508 | #endif | |
1509 | 1509 | |||
1510 | #ifndef XEN | 1510 | #ifndef XEN | |
1511 | symtab = lookup_bootinfo(BTINFO_SYMTAB); | 1511 | symtab = lookup_bootinfo(BTINFO_SYMTAB); | |
1512 | if (symtab) { | 1512 | if (symtab) { | |
1513 | tssym = (vaddr_t)symtab->ssym + KERNBASE; | 1513 | tssym = (vaddr_t)symtab->ssym + KERNBASE; | |
1514 | tesym = (vaddr_t)symtab->esym + KERNBASE; | 1514 | tesym = (vaddr_t)symtab->esym + KERNBASE; | |
1515 | ksyms_addsyms_elf(symtab->nsym, (void *)tssym, (void *)tesym); | 1515 | ksyms_addsyms_elf(symtab->nsym, (void *)tssym, (void *)tesym); | |
1516 | } else | 1516 | } else | |
1517 | ksyms_addsyms_elf(*(long *)(void *)&end, | 1517 | ksyms_addsyms_elf(*(long *)(void *)&end, | |
1518 | ((long *)(void *)&end) + 1, esym); | 1518 | ((long *)(void *)&end) + 1, esym); | |
1519 | #else /* XEN */ | 1519 | #else /* XEN */ | |
1520 | esym = xen_start_info.mod_start ? | 1520 | esym = xen_start_info.mod_start ? | |
1521 | (void *)xen_start_info.mod_start : | 1521 | (void *)xen_start_info.mod_start : | |
1522 | (void *)xen_start_info.mfn_list; | 1522 | (void *)xen_start_info.mfn_list; | |
1523 | ksyms_addsyms_elf(*(int *)(void *)&end, | 1523 | ksyms_addsyms_elf(*(int *)(void *)&end, | |
1524 | ((int *)(void *)&end) + 1, esym); | 1524 | ((int *)(void *)&end) + 1, esym); | |
1525 | #endif /* XEN */ | 1525 | #endif /* XEN */ | |
1526 | #endif | 1526 | #endif | |
1527 | } | 1527 | } | |
1528 | 1528 | |||
1529 | void | 1529 | void | |
1530 | init_x86_64(paddr_t first_avail) | 1530 | init_x86_64(paddr_t first_avail) | |
1531 | { | 1531 | { | |
1532 | extern void consinit(void); | 1532 | extern void consinit(void); | |
1533 | struct region_descriptor region; | 1533 | struct region_descriptor region; | |
1534 | struct mem_segment_descriptor *ldt_segp; | 1534 | struct mem_segment_descriptor *ldt_segp; | |
1535 | int x; | 1535 | int x; | |
1536 | #ifndef XEN | 1536 | #ifndef XEN | |
1537 | int ist; | 1537 | int ist; | |
1538 | extern struct extent *iomem_ex; | 1538 | extern struct extent *iomem_ex; | |
1539 | #if !defined(REALEXTMEM) && !defined(REALBASEMEM) | 1539 | #if !defined(REALEXTMEM) && !defined(REALBASEMEM) | |
1540 | struct btinfo_memmap *bim; | 1540 | struct btinfo_memmap *bim; | |
1541 | #endif | 1541 | #endif | |
1542 | #endif /* !XEN */ | 1542 | #endif /* !XEN */ | |
1543 | 1543 | |||
1544 | cpu_probe(&cpu_info_primary); | 1544 | cpu_probe(&cpu_info_primary); | |
1545 | 1545 | |||
1546 | #ifdef XEN | 1546 | #ifdef XEN | |
1547 | KASSERT(HYPERVISOR_shared_info != NULL); | 1547 | KASSERT(HYPERVISOR_shared_info != NULL); | |
1548 | cpu_info_primary.ci_vcpu = &HYPERVISOR_shared_info->vcpu_info[0]; | 1548 | cpu_info_primary.ci_vcpu = &HYPERVISOR_shared_info->vcpu_info[0]; | |
1549 | 1549 | |||
1550 | __PRINTK(("init_x86_64(0x%lx)\n", first_avail)); | 1550 | __PRINTK(("init_x86_64(0x%lx)\n", first_avail)); | |
1551 | #endif /* XEN */ | 1551 | #endif /* XEN */ | |
1552 | 1552 | |||
1553 | cpu_init_msrs(&cpu_info_primary, true); | 1553 | cpu_init_msrs(&cpu_info_primary, true); | |
1554 | 1554 | |||
1555 | use_pae = 1; /* PAE always enabled in long mode */ | 1555 | use_pae = 1; /* PAE always enabled in long mode */ | |
1556 | 1556 | |||
1557 | #ifdef XEN | 1557 | #ifdef XEN | |
1558 | struct pcb *pcb = lwp_getpcb(&lwp0); | 1558 | struct pcb *pcb = lwp_getpcb(&lwp0); | |
1559 | mutex_init(&pte_lock, MUTEX_DEFAULT, IPL_VM); | 1559 | mutex_init(&pte_lock, MUTEX_DEFAULT, IPL_VM); | |
1560 | pcb->pcb_cr3 = xen_start_info.pt_base - KERNBASE; | 1560 | pcb->pcb_cr3 = xen_start_info.pt_base - KERNBASE; | |
1561 | __PRINTK(("pcb_cr3 0x%lx\n", xen_start_info.pt_base - KERNBASE)); | 1561 | __PRINTK(("pcb_cr3 0x%lx\n", xen_start_info.pt_base - KERNBASE)); | |
1562 | #endif | 1562 | #endif | |
1563 | 1563 | |||
1564 | #if NISA > 0 || NPCI > 0 | 1564 | #if NISA > 0 || NPCI > 0 | |
1565 | x86_bus_space_init(); | 1565 | x86_bus_space_init(); | |
1566 | #endif | 1566 | #endif | |
1567 | 1567 | |||
1568 | consinit(); /* XXX SHOULD NOT BE DONE HERE */ | 1568 | consinit(); /* XXX SHOULD NOT BE DONE HERE */ | |
1569 | 1569 | |||
1570 | /* | 1570 | /* | |
1571 | * Initialize PAGE_SIZE-dependent variables. | 1571 | * Initialize PAGE_SIZE-dependent variables. | |
1572 | */ | 1572 | */ | |
1573 | uvm_setpagesize(); | 1573 | uvm_setpagesize(); | |
1574 | 1574 | |||
1575 | uvmexp.ncolors = 2; | 1575 | uvmexp.ncolors = 2; | |
1576 | 1576 | |||
1577 | #ifndef XEN | 1577 | #ifndef XEN | |
1578 | /* | 1578 | /* | |
1579 | * Low memory reservations: | 1579 | * Low memory reservations: | |
1580 | * Page 0: BIOS data | 1580 | * Page 0: BIOS data | |
1581 | * Page 1: BIOS callback (not used yet, for symmetry with i386) | 1581 | * Page 1: BIOS callback (not used yet, for symmetry with i386) | |
1582 | * Page 2: MP bootstrap | 1582 | * Page 2: MP bootstrap code (MP_TRAMPOLINE) | |
1583 | * Page 3: ACPI wakeup code (ACPI_WAKEUP_ADDR) | 1583 | * Page 3: ACPI wakeup code (ACPI_WAKEUP_ADDR) | |
1584 | * Page 4: Temporary page table for 0MB-4MB | 1584 | * Page 4: Temporary page table for 0MB-4MB | |
1585 | * Page 5: Temporary page directory | 1585 | * Page 5: Temporary page directory | |
1586 | * Page 6: Temporary page map level 3 | 1586 | * Page 6: Temporary page map level 3 | |
1587 | * Page 7: Temporary page map level 4 | 1587 | * Page 7: Temporary page map level 4 | |
1588 | */ | 1588 | */ | |
1589 | avail_start = 8 * PAGE_SIZE; | 1589 | avail_start = 8 * PAGE_SIZE; | |
1590 | 1590 | |||
1591 | #if !defined(REALBASEMEM) && !defined(REALEXTMEM) | 1591 | #if !defined(REALBASEMEM) && !defined(REALEXTMEM) | |
1592 | /* | 1592 | /* | |
1593 | * Check to see if we have a memory map from the BIOS (passed | 1593 | * Check to see if we have a memory map from the BIOS (passed | |
1594 | * to us by the boot program. | 1594 | * to us by the boot program. | |
1595 | */ | 1595 | */ | |
1596 | bim = lookup_bootinfo(BTINFO_MEMMAP); | 1596 | bim = lookup_bootinfo(BTINFO_MEMMAP); | |
1597 | if (bim != NULL && bim->num > 0) | 1597 | if (bim != NULL && bim->num > 0) | |
1598 | initx86_parse_memmap(bim, iomem_ex); | 1598 | initx86_parse_memmap(bim, iomem_ex); | |
1599 | #endif /* ! REALBASEMEM && ! REALEXTMEM */ | 1599 | #endif /* ! REALBASEMEM && ! REALEXTMEM */ | |
1600 | 1600 | |||
1601 | /* | 1601 | /* | |
1602 | * If the loop above didn't find any valid segment, fall back to | 1602 | * If the loop above didn't find any valid segment, fall back to | |
1603 | * former code. | 1603 | * former code. | |
1604 | */ | 1604 | */ | |
1605 | if (mem_cluster_cnt == 0) | 1605 | if (mem_cluster_cnt == 0) | |
1606 | initx86_fake_memmap(iomem_ex); | 1606 | initx86_fake_memmap(iomem_ex); | |
1607 | 1607 | |||
1608 | #else /* XEN */ | 1608 | #else /* XEN */ | |
1609 | /* Parse Xen command line (replace bootinfo */ | 1609 | /* Parse Xen command line (replace bootinfo */ | |
1610 | xen_parse_cmdline(XEN_PARSE_BOOTFLAGS, NULL); | 1610 | xen_parse_cmdline(XEN_PARSE_BOOTFLAGS, NULL); | |
1611 | 1611 | |||
1612 | /* Determine physical address space */ | 1612 | /* Determine physical address space */ | |
1613 | avail_start = first_avail; | 1613 | avail_start = first_avail; | |
1614 | avail_end = ctob(xen_start_info.nr_pages); | 1614 | avail_end = ctob(xen_start_info.nr_pages); | |
1615 | pmap_pa_start = (KERNTEXTOFF - KERNBASE); | 1615 | pmap_pa_start = (KERNTEXTOFF - KERNBASE); | |
1616 | pmap_pa_end = avail_end; | 1616 | pmap_pa_end = avail_end; | |
1617 | __PRINTK(("pmap_pa_start 0x%lx avail_start 0x%lx avail_end 0x%lx\n", | 1617 | __PRINTK(("pmap_pa_start 0x%lx avail_start 0x%lx avail_end 0x%lx\n", | |
1618 | pmap_pa_start, avail_start, avail_end)); | 1618 | pmap_pa_start, avail_start, avail_end)); | |
1619 | #endif /* !XEN */ | 1619 | #endif /* !XEN */ | |
1620 | 1620 | |||
1621 | /* | 1621 | /* | |
1622 | * Call pmap initialization to make new kernel address space. | 1622 | * Call pmap initialization to make new kernel address space. | |
1623 | * We must do this before loading pages into the VM system. | 1623 | * We must do this before loading pages into the VM system. | |
1624 | */ | 1624 | */ | |
1625 | pmap_bootstrap(VM_MIN_KERNEL_ADDRESS); | 1625 | pmap_bootstrap(VM_MIN_KERNEL_ADDRESS); | |
1626 | 1626 | |||
1627 | if (avail_start != PAGE_SIZE) | 1627 | if (avail_start != PAGE_SIZE) | |
1628 | pmap_prealloc_lowmem_ptps(); | 1628 | pmap_prealloc_lowmem_ptps(); | |
1629 | 1629 | |||
1630 | #ifndef XEN | 1630 | #ifndef XEN | |
1631 | initx86_load_memmap(first_avail); | 1631 | initx86_load_memmap(first_avail); | |
1632 | #else /* XEN */ | 1632 | #else /* XEN */ | |
1633 | kern_end = KERNBASE + first_avail; | 1633 | kern_end = KERNBASE + first_avail; | |
1634 | physmem = xen_start_info.nr_pages; | 1634 | physmem = xen_start_info.nr_pages; | |
1635 | 1635 | |||
1636 | uvm_page_physload(atop(avail_start), | 1636 | uvm_page_physload(atop(avail_start), | |
1637 | atop(avail_end), atop(avail_start), | 1637 | atop(avail_end), atop(avail_start), | |
1638 | atop(avail_end), VM_FREELIST_DEFAULT); | 1638 | atop(avail_end), VM_FREELIST_DEFAULT); | |
1639 | #endif /* !XEN */ | 1639 | #endif /* !XEN */ | |
1640 | 1640 | |||
1641 | init_x86_64_msgbuf(); | 1641 | init_x86_64_msgbuf(); | |
1642 | 1642 | |||
1643 | pmap_growkernel(VM_MIN_KERNEL_ADDRESS + 32 * 1024 * 1024); | 1643 | pmap_growkernel(VM_MIN_KERNEL_ADDRESS + 32 * 1024 * 1024); | |
1644 | 1644 | |||
1645 | kpreempt_disable(); | 1645 | kpreempt_disable(); | |
1646 | pmap_kenter_pa(idt_vaddr, idt_paddr, VM_PROT_READ|VM_PROT_WRITE, 0); | 1646 | pmap_kenter_pa(idt_vaddr, idt_paddr, VM_PROT_READ|VM_PROT_WRITE, 0); | |
1647 | pmap_update(pmap_kernel()); | 1647 | pmap_update(pmap_kernel()); | |
1648 | memset((void *)idt_vaddr, 0, PAGE_SIZE); | 1648 | memset((void *)idt_vaddr, 0, PAGE_SIZE); | |
1649 | 1649 | |||
1650 | #ifndef XEN | 1650 | #ifndef XEN | |
1651 | pmap_changeprot_local(idt_vaddr, VM_PROT_READ); | 1651 | pmap_changeprot_local(idt_vaddr, VM_PROT_READ); | |
1652 | #endif | 1652 | #endif | |
1653 | pmap_kenter_pa(idt_vaddr + PAGE_SIZE, idt_paddr + PAGE_SIZE, | 1653 | pmap_kenter_pa(idt_vaddr + PAGE_SIZE, idt_paddr + PAGE_SIZE, | |
1654 | VM_PROT_READ|VM_PROT_WRITE, 0); | 1654 | VM_PROT_READ|VM_PROT_WRITE, 0); | |
1655 | #ifdef XEN | 1655 | #ifdef XEN | |
1656 | /* Steal one more page for LDT */ | 1656 | /* Steal one more page for LDT */ | |
1657 | pmap_kenter_pa(idt_vaddr + 2 * PAGE_SIZE, idt_paddr + 2 * PAGE_SIZE, | 1657 | pmap_kenter_pa(idt_vaddr + 2 * PAGE_SIZE, idt_paddr + 2 * PAGE_SIZE, | |
1658 | VM_PROT_READ|VM_PROT_WRITE, 0); | 1658 | VM_PROT_READ|VM_PROT_WRITE, 0); | |
1659 | #endif | 1659 | #endif | |
1660 | pmap_kenter_pa(lo32_vaddr, lo32_paddr, VM_PROT_READ|VM_PROT_WRITE, 0); | 1660 | pmap_kenter_pa(lo32_vaddr, lo32_paddr, VM_PROT_READ|VM_PROT_WRITE, 0); | |
1661 | pmap_update(pmap_kernel()); | 1661 | pmap_update(pmap_kernel()); | |
1662 | 1662 | |||
1663 | #ifndef XEN | 1663 | #ifndef XEN | |
1664 | idt_init(); | 1664 | idt_init(); | |
1665 | idt = (struct gate_descriptor *)idt_vaddr; | 1665 | idt = (struct gate_descriptor *)idt_vaddr; | |
1666 | gdtstore = (char *)(idt + NIDT); | 1666 | gdtstore = (char *)(idt + NIDT); | |
1667 | ldtstore = gdtstore + DYNSEL_START; | 1667 | ldtstore = gdtstore + DYNSEL_START; | |
1668 | #else | 1668 | #else | |
1669 | xen_idt = (struct trap_info *)idt_vaddr; | 1669 | xen_idt = (struct trap_info *)idt_vaddr; | |
1670 | xen_idt_idx = 0; | 1670 | xen_idt_idx = 0; | |
1671 | /* Xen wants page aligned GDT/LDT in separated pages */ | 1671 | /* Xen wants page aligned GDT/LDT in separated pages */ | |
1672 | ldtstore = (char *) roundup((vaddr_t) (xen_idt + NIDT), PAGE_SIZE); | 1672 | ldtstore = (char *) roundup((vaddr_t) (xen_idt + NIDT), PAGE_SIZE); | |
1673 | gdtstore = (char *) (ldtstore + PAGE_SIZE); | 1673 | gdtstore = (char *) (ldtstore + PAGE_SIZE); | |
1674 | #endif /* XEN */ | 1674 | #endif /* XEN */ | |
1675 | 1675 | |||
1676 | /* make gdt gates and memory segments */ | 1676 | /* make gdt gates and memory segments */ | |
1677 | set_mem_segment(GDT_ADDR_MEM(gdtstore, GCODE_SEL), 0, | 1677 | set_mem_segment(GDT_ADDR_MEM(gdtstore, GCODE_SEL), 0, | |
1678 | 0xfffff, SDT_MEMERA, SEL_KPL, 1, 0, 1); | 1678 | 0xfffff, SDT_MEMERA, SEL_KPL, 1, 0, 1); | |
1679 | 1679 | |||
1680 | set_mem_segment(GDT_ADDR_MEM(gdtstore, GDATA_SEL), 0, | 1680 | set_mem_segment(GDT_ADDR_MEM(gdtstore, GDATA_SEL), 0, | |
1681 | 0xfffff, SDT_MEMRWA, SEL_KPL, 1, 0, 1); | 1681 | 0xfffff, SDT_MEMRWA, SEL_KPL, 1, 0, 1); | |
1682 | 1682 | |||
1683 | #ifndef XEN | 1683 | #ifndef XEN | |
1684 | set_sys_segment(GDT_ADDR_SYS(gdtstore, GLDT_SEL), ldtstore, | 1684 | set_sys_segment(GDT_ADDR_SYS(gdtstore, GLDT_SEL), ldtstore, | |
1685 | LDT_SIZE - 1, SDT_SYSLDT, SEL_KPL, 0); | 1685 | LDT_SIZE - 1, SDT_SYSLDT, SEL_KPL, 0); | |
1686 | #endif | 1686 | #endif | |
1687 | 1687 | |||
1688 | set_mem_segment(GDT_ADDR_MEM(gdtstore, GUCODE_SEL), 0, | 1688 | set_mem_segment(GDT_ADDR_MEM(gdtstore, GUCODE_SEL), 0, | |
1689 | x86_btop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMERA, SEL_UPL, 1, 0, 1); | 1689 | x86_btop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMERA, SEL_UPL, 1, 0, 1); | |
1690 | 1690 | |||
1691 | set_mem_segment(GDT_ADDR_MEM(gdtstore, GUDATA_SEL), 0, | 1691 | set_mem_segment(GDT_ADDR_MEM(gdtstore, GUDATA_SEL), 0, | |
1692 | x86_btop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMRWA, SEL_UPL, 1, 0, 1); | 1692 | x86_btop(VM_MAXUSER_ADDRESS) - 1, SDT_MEMRWA, SEL_UPL, 1, 0, 1); | |
1693 | 1693 | |||
1694 | /* make ldt gates and memory segments */ | 1694 | /* make ldt gates and memory segments */ | |
1695 | setgate((struct gate_descriptor *)(ldtstore + LSYS5CALLS_SEL), | 1695 | setgate((struct gate_descriptor *)(ldtstore + LSYS5CALLS_SEL), | |
1696 | &IDTVEC(oosyscall), 0, SDT_SYS386CGT, SEL_UPL, | 1696 | &IDTVEC(oosyscall), 0, SDT_SYS386CGT, SEL_UPL, | |
1697 | GSEL(GCODE_SEL, SEL_KPL)); | 1697 | GSEL(GCODE_SEL, SEL_KPL)); | |
1698 | *(struct mem_segment_descriptor *)(ldtstore + LUCODE_SEL) = | 1698 | *(struct mem_segment_descriptor *)(ldtstore + LUCODE_SEL) = | |
1699 | *GDT_ADDR_MEM(gdtstore, GUCODE_SEL); | 1699 | *GDT_ADDR_MEM(gdtstore, GUCODE_SEL); | |
1700 | *(struct mem_segment_descriptor *)(ldtstore + LUDATA_SEL) = | 1700 | *(struct mem_segment_descriptor *)(ldtstore + LUDATA_SEL) = | |
1701 | *GDT_ADDR_MEM(gdtstore, GUDATA_SEL); | 1701 | *GDT_ADDR_MEM(gdtstore, GUDATA_SEL); | |
1702 | 1702 | |||
1703 | /* | 1703 | /* | |
1704 | * 32 bit GDT entries. | 1704 | * 32 bit GDT entries. | |
1705 | */ | 1705 | */ | |
1706 | set_mem_segment(GDT_ADDR_MEM(gdtstore, GUCODE32_SEL), 0, | 1706 | set_mem_segment(GDT_ADDR_MEM(gdtstore, GUCODE32_SEL), 0, | |
1707 | x86_btop(VM_MAXUSER_ADDRESS32) - 1, SDT_MEMERA, SEL_UPL, 1, 1, 0); | 1707 | x86_btop(VM_MAXUSER_ADDRESS32) - 1, SDT_MEMERA, SEL_UPL, 1, 1, 0); | |
1708 | 1708 | |||
1709 | set_mem_segment(GDT_ADDR_MEM(gdtstore, GUDATA32_SEL), 0, | 1709 | set_mem_segment(GDT_ADDR_MEM(gdtstore, GUDATA32_SEL), 0, | |
1710 | x86_btop(VM_MAXUSER_ADDRESS32) - 1, SDT_MEMRWA, SEL_UPL, 1, 1, 0); | 1710 | x86_btop(VM_MAXUSER_ADDRESS32) - 1, SDT_MEMRWA, SEL_UPL, 1, 1, 0); | |
1711 | 1711 | |||
1712 | set_mem_segment(GDT_ADDR_MEM(gdtstore, GUFS_SEL), 0, | 1712 | set_mem_segment(GDT_ADDR_MEM(gdtstore, GUFS_SEL), 0, | |
1713 | x86_btop(VM_MAXUSER_ADDRESS32) - 1, SDT_MEMRWA, SEL_UPL, 1, 1, 0); | 1713 | x86_btop(VM_MAXUSER_ADDRESS32) - 1, SDT_MEMRWA, SEL_UPL, 1, 1, 0); | |
1714 | 1714 | |||
1715 | set_mem_segment(GDT_ADDR_MEM(gdtstore, GUGS_SEL), 0, | 1715 | set_mem_segment(GDT_ADDR_MEM(gdtstore, GUGS_SEL), 0, | |
1716 | x86_btop(VM_MAXUSER_ADDRESS32) - 1, SDT_MEMRWA, SEL_UPL, 1, 1, 0); | 1716 | x86_btop(VM_MAXUSER_ADDRESS32) - 1, SDT_MEMRWA, SEL_UPL, 1, 1, 0); | |
1717 | 1717 | |||
1718 | /* | 1718 | /* | |
1719 | * 32 bit LDT entries. | 1719 | * 32 bit LDT entries. | |
1720 | */ | 1720 | */ | |
1721 | ldt_segp = (struct mem_segment_descriptor *)(ldtstore + LUCODE32_SEL); | 1721 | ldt_segp = (struct mem_segment_descriptor *)(ldtstore + LUCODE32_SEL); | |
1722 | set_mem_segment(ldt_segp, 0, x86_btop(VM_MAXUSER_ADDRESS32) - 1, | 1722 | set_mem_segment(ldt_segp, 0, x86_btop(VM_MAXUSER_ADDRESS32) - 1, | |
1723 | SDT_MEMERA, SEL_UPL, 1, 1, 0); | 1723 | SDT_MEMERA, SEL_UPL, 1, 1, 0); | |
1724 | ldt_segp = (struct mem_segment_descriptor *)(ldtstore + LUDATA32_SEL); | 1724 | ldt_segp = (struct mem_segment_descriptor *)(ldtstore + LUDATA32_SEL); | |
1725 | set_mem_segment(ldt_segp, 0, x86_btop(VM_MAXUSER_ADDRESS32) - 1, | 1725 | set_mem_segment(ldt_segp, 0, x86_btop(VM_MAXUSER_ADDRESS32) - 1, | |
1726 | SDT_MEMRWA, SEL_UPL, 1, 1, 0); | 1726 | SDT_MEMRWA, SEL_UPL, 1, 1, 0); | |
1727 | 1727 | |||
1728 | /* | 1728 | /* | |
1729 | * Other entries. | 1729 | * Other entries. | |
1730 | */ | 1730 | */ | |
1731 | memcpy((struct gate_descriptor *)(ldtstore + LSOL26CALLS_SEL), | 1731 | memcpy((struct gate_descriptor *)(ldtstore + LSOL26CALLS_SEL), | |
1732 | (struct gate_descriptor *)(ldtstore + LSYS5CALLS_SEL), | 1732 | (struct gate_descriptor *)(ldtstore + LSYS5CALLS_SEL), | |
1733 | sizeof (struct gate_descriptor)); | 1733 | sizeof (struct gate_descriptor)); | |
1734 | memcpy((struct gate_descriptor *)(ldtstore + LBSDICALLS_SEL), | 1734 | memcpy((struct gate_descriptor *)(ldtstore + LBSDICALLS_SEL), | |
1735 | (struct gate_descriptor *)(ldtstore + LSYS5CALLS_SEL), | 1735 | (struct gate_descriptor *)(ldtstore + LSYS5CALLS_SEL), | |
1736 | sizeof (struct gate_descriptor)); | 1736 | sizeof (struct gate_descriptor)); | |
1737 | 1737 | |||
1738 | /* exceptions */ | 1738 | /* exceptions */ | |
1739 | for (x = 0; x < 32; x++) { | 1739 | for (x = 0; x < 32; x++) { | |
1740 | #ifndef XEN | 1740 | #ifndef XEN | |
1741 | idt_vec_reserve(x); | 1741 | idt_vec_reserve(x); | |
1742 | switch (x) { | 1742 | switch (x) { | |
1743 | case 2: /* NMI */ | 1743 | case 2: /* NMI */ | |
1744 | ist = 3; | 1744 | ist = 3; | |
1745 | break; | 1745 | break; | |
1746 | case 8: /* double fault */ | 1746 | case 8: /* double fault */ | |
1747 | ist = 2; | 1747 | ist = 2; | |
1748 | break; | 1748 | break; | |
1749 | default: | 1749 | default: | |
1750 | ist = 0; | 1750 | ist = 0; | |
1751 | break; | 1751 | break; | |
1752 | } | 1752 | } | |
1753 | setgate(&idt[x], IDTVEC(exceptions)[x], ist, SDT_SYS386IGT, | 1753 | setgate(&idt[x], IDTVEC(exceptions)[x], ist, SDT_SYS386IGT, | |
1754 | (x == 3 || x == 4) ? SEL_UPL : SEL_KPL, | 1754 | (x == 3 || x == 4) ? SEL_UPL : SEL_KPL, | |
1755 | GSEL(GCODE_SEL, SEL_KPL)); | 1755 | GSEL(GCODE_SEL, SEL_KPL)); | |
1756 | #else /* XEN */ | 1756 | #else /* XEN */ | |
1757 | pmap_changeprot_local(idt_vaddr, VM_PROT_READ|VM_PROT_WRITE); | 1757 | pmap_changeprot_local(idt_vaddr, VM_PROT_READ|VM_PROT_WRITE); | |
1758 | xen_idt[xen_idt_idx].vector = x; | 1758 | xen_idt[xen_idt_idx].vector = x; | |
1759 | 1759 | |||
1760 | switch (x) { | 1760 | switch (x) { | |
1761 | case 2: /* NMI */ | 1761 | case 2: /* NMI */ | |
1762 | case 18: /* MCA */ | 1762 | case 18: /* MCA */ | |
1763 | TI_SET_IF(&(xen_idt[xen_idt_idx]), 2); | 1763 | TI_SET_IF(&(xen_idt[xen_idt_idx]), 2); | |
1764 | break; | 1764 | break; | |
1765 | case 3: | 1765 | case 3: | |
1766 | case 4: | 1766 | case 4: | |
1767 | xen_idt[xen_idt_idx].flags = SEL_UPL; | 1767 | xen_idt[xen_idt_idx].flags = SEL_UPL; | |
1768 | break; | 1768 | break; | |
1769 | default: | 1769 | default: | |
1770 | xen_idt[xen_idt_idx].flags = SEL_KPL; | 1770 | xen_idt[xen_idt_idx].flags = SEL_KPL; | |
1771 | break; | 1771 | break; | |
1772 | } | 1772 | } | |
1773 | 1773 | |||
1774 | xen_idt[xen_idt_idx].cs = GSEL(GCODE_SEL, SEL_KPL); | 1774 | xen_idt[xen_idt_idx].cs = GSEL(GCODE_SEL, SEL_KPL); | |
1775 | xen_idt[xen_idt_idx].address = | 1775 | xen_idt[xen_idt_idx].address = | |
1776 | (unsigned long)IDTVEC(exceptions)[x]; | 1776 | (unsigned long)IDTVEC(exceptions)[x]; | |
1777 | xen_idt_idx++; | 1777 | xen_idt_idx++; | |
1778 | #endif /* XEN */ | 1778 | #endif /* XEN */ | |
1779 | } | 1779 | } | |
1780 | 1780 | |||
1781 | /* new-style interrupt gate for syscalls */ | 1781 | /* new-style interrupt gate for syscalls */ | |
1782 | #ifndef XEN | 1782 | #ifndef XEN | |
1783 | idt_vec_reserve(128); | 1783 | idt_vec_reserve(128); | |
1784 | setgate(&idt[128], &IDTVEC(osyscall), 0, SDT_SYS386IGT, SEL_UPL, | 1784 | setgate(&idt[128], &IDTVEC(osyscall), 0, SDT_SYS386IGT, SEL_UPL, | |
1785 | GSEL(GCODE_SEL, SEL_KPL)); | 1785 | GSEL(GCODE_SEL, SEL_KPL)); | |
1786 | #else | 1786 | #else | |
1787 | xen_idt[xen_idt_idx].vector = 128; | 1787 | xen_idt[xen_idt_idx].vector = 128; | |
1788 | xen_idt[xen_idt_idx].flags = SEL_KPL; | 1788 | xen_idt[xen_idt_idx].flags = SEL_KPL; | |
1789 | xen_idt[xen_idt_idx].cs = GSEL(GCODE_SEL, SEL_KPL); | 1789 | xen_idt[xen_idt_idx].cs = GSEL(GCODE_SEL, SEL_KPL); | |
1790 | xen_idt[xen_idt_idx].address = (unsigned long) &IDTVEC(osyscall); | 1790 | xen_idt[xen_idt_idx].address = (unsigned long) &IDTVEC(osyscall); | |
1791 | xen_idt_idx++; | 1791 | xen_idt_idx++; | |
1792 | pmap_changeprot_local(idt_vaddr, VM_PROT_READ); | 1792 | pmap_changeprot_local(idt_vaddr, VM_PROT_READ); | |
1793 | #endif /* XEN */ | 1793 | #endif /* XEN */ | |
1794 | kpreempt_enable(); | 1794 | kpreempt_enable(); | |
1795 | 1795 | |||
1796 | setregion(®ion, gdtstore, DYNSEL_START - 1); | 1796 | setregion(®ion, gdtstore, DYNSEL_START - 1); | |
1797 | lgdt(®ion); | 1797 | lgdt(®ion); | |
1798 | 1798 | |||
1799 | #ifdef XEN | 1799 | #ifdef XEN | |
1800 | /* Init Xen callbacks and syscall handlers */ | 1800 | /* Init Xen callbacks and syscall handlers */ | |
1801 | if (HYPERVISOR_set_callbacks( | 1801 | if (HYPERVISOR_set_callbacks( | |
1802 | (unsigned long) hypervisor_callback, | 1802 | (unsigned long) hypervisor_callback, | |
1803 | (unsigned long) failsafe_callback, | 1803 | (unsigned long) failsafe_callback, | |
1804 | (unsigned long) Xsyscall)) | 1804 | (unsigned long) Xsyscall)) | |
1805 | panic("HYPERVISOR_set_callbacks() failed"); | 1805 | panic("HYPERVISOR_set_callbacks() failed"); | |
1806 | #endif /* XEN */ | 1806 | #endif /* XEN */ | |
1807 | cpu_init_idt(); | 1807 | cpu_init_idt(); | |
1808 | 1808 | |||
1809 | init_x86_64_ksyms(); | 1809 | init_x86_64_ksyms(); | |
1810 | 1810 | |||
1811 | #ifndef XEN | 1811 | #ifndef XEN | |
1812 | intr_default_setup(); | 1812 | intr_default_setup(); | |
1813 | #else | 1813 | #else | |
1814 | events_default_setup(); | 1814 | events_default_setup(); | |
1815 | #endif | 1815 | #endif | |
1816 | 1816 | |||
1817 | splraise(IPL_HIGH); | 1817 | splraise(IPL_HIGH); | |
1818 | x86_enable_intr(); | 1818 | x86_enable_intr(); | |
1819 | 1819 | |||
1820 | #ifdef DDB | 1820 | #ifdef DDB | |
1821 | if (boothowto & RB_KDB) | 1821 | if (boothowto & RB_KDB) | |
1822 | Debugger(); | 1822 | Debugger(); | |
1823 | #endif | 1823 | #endif | |
1824 | #ifdef KGDB | 1824 | #ifdef KGDB | |
1825 | kgdb_port_init(); | 1825 | kgdb_port_init(); | |
1826 | if (boothowto & RB_KDB) { | 1826 | if (boothowto & RB_KDB) { | |
1827 | kgdb_debug_init = 1; | 1827 | kgdb_debug_init = 1; | |
1828 | kgdb_connect(1); | 1828 | kgdb_connect(1); | |
1829 | } | 1829 | } | |
1830 | #endif | 1830 | #endif | |
1831 | } | 1831 | } | |
1832 | 1832 | |||
1833 | void | 1833 | void | |
1834 | cpu_reset(void) | 1834 | cpu_reset(void) | |
1835 | { | 1835 | { | |
1836 | x86_disable_intr(); | 1836 | x86_disable_intr(); | |
1837 | 1837 | |||
1838 | #ifdef XEN | 1838 | #ifdef XEN | |
1839 | HYPERVISOR_reboot(); | 1839 | HYPERVISOR_reboot(); | |
1840 | #else | 1840 | #else | |
1841 | 1841 | |||
1842 | x86_reset(); | 1842 | x86_reset(); | |
1843 | 1843 | |||
1844 | /* | 1844 | /* | |
1845 | * Try to cause a triple fault and watchdog reset by making the IDT | 1845 | * Try to cause a triple fault and watchdog reset by making the IDT | |
1846 | * invalid and causing a fault. | 1846 | * invalid and causing a fault. | |
1847 | */ | 1847 | */ | |
1848 | kpreempt_disable(); | 1848 | kpreempt_disable(); | |
1849 | pmap_changeprot_local(idt_vaddr, VM_PROT_READ|VM_PROT_WRITE); | 1849 | pmap_changeprot_local(idt_vaddr, VM_PROT_READ|VM_PROT_WRITE); | |
1850 | pmap_changeprot_local(idt_vaddr + PAGE_SIZE, | 1850 | pmap_changeprot_local(idt_vaddr + PAGE_SIZE, | |
1851 | VM_PROT_READ|VM_PROT_WRITE); | 1851 | VM_PROT_READ|VM_PROT_WRITE); | |
1852 | memset((void *)idt, 0, NIDT * sizeof(idt[0])); | 1852 | memset((void *)idt, 0, NIDT * sizeof(idt[0])); | |
1853 | kpreempt_enable(); | 1853 | kpreempt_enable(); | |
1854 | breakpoint(); | 1854 | breakpoint(); | |
1855 | 1855 | |||
1856 | #if 0 | 1856 | #if 0 | |
1857 | /* | 1857 | /* | |
1858 | * Try to cause a triple fault and watchdog reset by unmapping the | 1858 | * Try to cause a triple fault and watchdog reset by unmapping the | |
1859 | * entire address space and doing a TLB flush. | 1859 | * entire address space and doing a TLB flush. | |
1860 | */ | 1860 | */ | |
1861 | memset((void *)PTD, 0, PAGE_SIZE); | 1861 | memset((void *)PTD, 0, PAGE_SIZE); | |
1862 | tlbflush(); | 1862 | tlbflush(); | |
1863 | #endif | 1863 | #endif | |
1864 | #endif /* XEN */ | 1864 | #endif /* XEN */ | |
1865 | 1865 | |||
1866 | for (;;); | 1866 | for (;;); | |
1867 | } | 1867 | } | |
1868 | 1868 | |||
1869 | void | 1869 | void | |
1870 | cpu_getmcontext(struct lwp *l, mcontext_t *mcp, unsigned int *flags) | 1870 | cpu_getmcontext(struct lwp *l, mcontext_t *mcp, unsigned int *flags) | |
1871 | { | 1871 | { | |
1872 | const struct trapframe *tf = l->l_md.md_regs; | 1872 | const struct trapframe *tf = l->l_md.md_regs; | |
1873 | __greg_t ras_rip; | 1873 | __greg_t ras_rip; | |
1874 | 1874 | |||
1875 | /* Copy general registers member by member */ | 1875 | /* Copy general registers member by member */ | |
1876 | #define copy_from_tf(reg, REG, idx) mcp->__gregs[_REG_##REG] = tf->tf_##reg; | 1876 | #define copy_from_tf(reg, REG, idx) mcp->__gregs[_REG_##REG] = tf->tf_##reg; | |
1877 | _FRAME_GREG(copy_from_tf) | 1877 | _FRAME_GREG(copy_from_tf) | |
1878 | #undef copy_from_tf | 1878 | #undef copy_from_tf | |
1879 | 1879 | |||
1880 | if ((ras_rip = (__greg_t)ras_lookup(l->l_proc, | 1880 | if ((ras_rip = (__greg_t)ras_lookup(l->l_proc, | |
1881 | (void *) mcp->__gregs[_REG_RIP])) != -1) | 1881 | (void *) mcp->__gregs[_REG_RIP])) != -1) | |
1882 | mcp->__gregs[_REG_RIP] = ras_rip; | 1882 | mcp->__gregs[_REG_RIP] = ras_rip; | |
1883 | 1883 | |||
1884 | *flags |= _UC_CPU; | 1884 | *flags |= _UC_CPU; | |
1885 | 1885 | |||
1886 | mcp->_mc_tlsbase = (uintptr_t)l->l_private; | 1886 | mcp->_mc_tlsbase = (uintptr_t)l->l_private; | |
1887 | *flags |= _UC_TLSBASE; | 1887 | *flags |= _UC_TLSBASE; | |
1888 | 1888 | |||
1889 | process_read_fpregs_xmm(l, (struct fxsave *)&mcp->__fpregs); | 1889 | process_read_fpregs_xmm(l, (struct fxsave *)&mcp->__fpregs); | |
1890 | *flags |= _UC_FPU; | 1890 | *flags |= _UC_FPU; | |
1891 | } | 1891 | } | |
1892 | 1892 | |||
1893 | int | 1893 | int | |
1894 | cpu_setmcontext(struct lwp *l, const mcontext_t *mcp, unsigned int flags) | 1894 | cpu_setmcontext(struct lwp *l, const mcontext_t *mcp, unsigned int flags) | |
1895 | { | 1895 | { | |
1896 | struct trapframe *tf = l->l_md.md_regs; | 1896 | struct trapframe *tf = l->l_md.md_regs; | |
1897 | const __greg_t *gr = mcp->__gregs; | 1897 | const __greg_t *gr = mcp->__gregs; | |
1898 | struct proc *p = l->l_proc; | 1898 | struct proc *p = l->l_proc; | |
1899 | int error; | 1899 | int error; | |
1900 | int err, trapno; | 1900 | int err, trapno; | |
1901 | int64_t rflags; | 1901 | int64_t rflags; | |
1902 | 1902 | |||
1903 | CTASSERT(sizeof (mcontext_t) == 26 * 8 + 8 + 512); | 1903 | CTASSERT(sizeof (mcontext_t) == 26 * 8 + 8 + 512); | |
1904 | 1904 | |||
1905 | if ((flags & _UC_CPU) != 0) { | 1905 | if ((flags & _UC_CPU) != 0) { | |
1906 | error = cpu_mcontext_validate(l, mcp); | 1906 | error = cpu_mcontext_validate(l, mcp); | |
1907 | if (error != 0) | 1907 | if (error != 0) | |
1908 | return error; | 1908 | return error; | |
1909 | /* | 1909 | /* | |
1910 | * save and restore some values we don't want to change. | 1910 | * save and restore some values we don't want to change. | |
1911 | * _FRAME_GREG(copy_to_tf) below overwrites them. | 1911 | * _FRAME_GREG(copy_to_tf) below overwrites them. | |
1912 | * | 1912 | * | |
1913 | * XXX maybe inline this. | 1913 | * XXX maybe inline this. | |
1914 | */ | 1914 | */ | |
1915 | rflags = tf->tf_rflags; | 1915 | rflags = tf->tf_rflags; | |
1916 | err = tf->tf_err; | 1916 | err = tf->tf_err; | |
1917 | trapno = tf->tf_trapno; | 1917 | trapno = tf->tf_trapno; | |
1918 | 1918 | |||
1919 | /* Copy general registers member by member */ | 1919 | /* Copy general registers member by member */ | |
1920 | #define copy_to_tf(reg, REG, idx) tf->tf_##reg = gr[_REG_##REG]; | 1920 | #define copy_to_tf(reg, REG, idx) tf->tf_##reg = gr[_REG_##REG]; | |
1921 | _FRAME_GREG(copy_to_tf) | 1921 | _FRAME_GREG(copy_to_tf) | |
1922 | #undef copy_to_tf | 1922 | #undef copy_to_tf | |
1923 | 1923 | |||
1924 | #ifdef XEN | 1924 | #ifdef XEN | |
1925 | /* | 1925 | /* | |
1926 | * Xen has its own way of dealing with %cs and %ss, | 1926 | * Xen has its own way of dealing with %cs and %ss, | |
1927 | * reset it to proper values. | 1927 | * reset it to proper values. | |
1928 | */ | 1928 | */ | |
1929 | tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL); | 1929 | tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL); | |
1930 | tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL); | 1930 | tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL); | |
1931 | #endif | 1931 | #endif | |
1932 | rflags &= ~PSL_USER; | 1932 | rflags &= ~PSL_USER; | |
1933 | tf->tf_rflags = rflags | (gr[_REG_RFLAGS] & PSL_USER); | 1933 | tf->tf_rflags = rflags | (gr[_REG_RFLAGS] & PSL_USER); | |
1934 | tf->tf_err = err; | 1934 | tf->tf_err = err; | |
1935 | tf->tf_trapno = trapno; | 1935 | tf->tf_trapno = trapno; | |
1936 | 1936 | |||
1937 | l->l_md.md_flags |= MDL_IRET; | 1937 | l->l_md.md_flags |= MDL_IRET; | |
1938 | } | 1938 | } | |
1939 | 1939 | |||
1940 | if ((flags & _UC_FPU) != 0) | 1940 | if ((flags & _UC_FPU) != 0) | |
1941 | process_write_fpregs_xmm(l, (const struct fxsave *)&mcp->__fpregs); | 1941 | process_write_fpregs_xmm(l, (const struct fxsave *)&mcp->__fpregs); | |
1942 | 1942 | |||
1943 | if ((flags & _UC_TLSBASE) != 0) | 1943 | if ((flags & _UC_TLSBASE) != 0) | |
1944 | lwp_setprivate(l, (void *)(uintptr_t)mcp->_mc_tlsbase); | 1944 | lwp_setprivate(l, (void *)(uintptr_t)mcp->_mc_tlsbase); | |
1945 | 1945 | |||
1946 | mutex_enter(p->p_lock); | 1946 | mutex_enter(p->p_lock); | |
1947 | if (flags & _UC_SETSTACK) | 1947 | if (flags & _UC_SETSTACK) | |
1948 | l->l_sigstk.ss_flags |= SS_ONSTACK; | 1948 | l->l_sigstk.ss_flags |= SS_ONSTACK; | |
1949 | if (flags & _UC_CLRSTACK) | 1949 | if (flags & _UC_CLRSTACK) | |
1950 | l->l_sigstk.ss_flags &= ~SS_ONSTACK; | 1950 | l->l_sigstk.ss_flags &= ~SS_ONSTACK; | |
1951 | mutex_exit(p->p_lock); | 1951 | mutex_exit(p->p_lock); | |
1952 | 1952 | |||
1953 | return 0; | 1953 | return 0; | |
1954 | } | 1954 | } | |
1955 | 1955 | |||
1956 | int | 1956 | int | |
1957 | cpu_mcontext_validate(struct lwp *l, const mcontext_t *mcp) | 1957 | cpu_mcontext_validate(struct lwp *l, const mcontext_t *mcp) | |
1958 | { | 1958 | { | |
1959 | const __greg_t *gr; | 1959 | const __greg_t *gr; | |
1960 | uint16_t sel; | 1960 | uint16_t sel; | |
1961 | int error; | 1961 | int error; | |
1962 | struct pmap *pmap = l->l_proc->p_vmspace->vm_map.pmap; | 1962 | struct pmap *pmap = l->l_proc->p_vmspace->vm_map.pmap; | |
1963 | struct proc *p = l->l_proc; | 1963 | struct proc *p = l->l_proc; | |
1964 | struct trapframe *tf = l->l_md.md_regs; | 1964 | struct trapframe *tf = l->l_md.md_regs; | |
1965 | 1965 | |||
1966 | gr = mcp->__gregs; | 1966 | gr = mcp->__gregs; | |
1967 | 1967 | |||
1968 | if (((gr[_REG_RFLAGS] ^ tf->tf_rflags) & PSL_USERSTATIC) != 0) | 1968 | if (((gr[_REG_RFLAGS] ^ tf->tf_rflags) & PSL_USERSTATIC) != 0) | |
1969 | return EINVAL; | 1969 | return EINVAL; | |
1970 | 1970 | |||
1971 | if (__predict_false(pmap->pm_ldt != NULL)) { | 1971 | if (__predict_false(pmap->pm_ldt != NULL)) { | |
1972 | error = valid_user_selector(l, gr[_REG_ES]); | 1972 | error = valid_user_selector(l, gr[_REG_ES]); | |
1973 | if (error != 0) | 1973 | if (error != 0) | |
1974 | return error; | 1974 | return error; | |
1975 | 1975 | |||
1976 | error = valid_user_selector(l, gr[_REG_FS]); | 1976 | error = valid_user_selector(l, gr[_REG_FS]); | |
1977 | if (error != 0) | 1977 | if (error != 0) | |
1978 | return error; | 1978 | return error; | |
1979 | 1979 | |||
1980 | error = valid_user_selector(l, gr[_REG_GS]); | 1980 | error = valid_user_selector(l, gr[_REG_GS]); | |
1981 | if (error != 0) | 1981 | if (error != 0) | |
1982 | return error; | 1982 | return error; | |
1983 | 1983 | |||
1984 | if ((gr[_REG_DS] & 0xffff) == 0) | 1984 | if ((gr[_REG_DS] & 0xffff) == 0) | |
1985 | return EINVAL; | 1985 | return EINVAL; | |
1986 | error = valid_user_selector(l, gr[_REG_DS]); | 1986 | error = valid_user_selector(l, gr[_REG_DS]); | |
1987 | if (error != 0) | 1987 | if (error != 0) | |
1988 | return error; | 1988 | return error; | |
1989 | 1989 | |||
1990 | #ifndef XEN | 1990 | #ifndef XEN | |
1991 | if ((gr[_REG_SS] & 0xffff) == 0) | 1991 | if ((gr[_REG_SS] & 0xffff) == 0) | |
1992 | return EINVAL; | 1992 | return EINVAL; | |
1993 | error = valid_user_selector(l, gr[_REG_SS]); | 1993 | error = valid_user_selector(l, gr[_REG_SS]); | |
1994 | if (error != 0) | 1994 | if (error != 0) | |
1995 | return error; | 1995 | return error; | |
1996 | #endif | 1996 | #endif | |
1997 | } else { | 1997 | } else { | |
1998 | #define VUD(sel) \ | 1998 | #define VUD(sel) \ | |
1999 | ((p->p_flag & PK_32) ? VALID_USER_DSEL32(sel) : VALID_USER_DSEL(sel)) | 1999 | ((p->p_flag & PK_32) ? VALID_USER_DSEL32(sel) : VALID_USER_DSEL(sel)) | |
2000 | sel = gr[_REG_ES] & 0xffff; | 2000 | sel = gr[_REG_ES] & 0xffff; | |
2001 | if (sel != 0 && !VUD(sel)) | 2001 | if (sel != 0 && !VUD(sel)) | |
2002 | return EINVAL; | 2002 | return EINVAL; | |
2003 | 2003 | |||
2004 | /* XXX: Shouldn't this be FSEL32? */ | 2004 | /* XXX: Shouldn't this be FSEL32? */ | |
2005 | #define VUF(sel) \ | 2005 | #define VUF(sel) \ | |
2006 | ((p->p_flag & PK_32) ? VALID_USER_DSEL32(sel) : VALID_USER_DSEL(sel)) | 2006 | ((p->p_flag & PK_32) ? VALID_USER_DSEL32(sel) : VALID_USER_DSEL(sel)) | |
2007 | sel = gr[_REG_FS] & 0xffff; | 2007 | sel = gr[_REG_FS] & 0xffff; | |
2008 | if (sel != 0 && !VUF(sel)) | 2008 | if (sel != 0 && !VUF(sel)) | |
2009 | return EINVAL; | 2009 | return EINVAL; | |
2010 | 2010 | |||
2011 | #define VUG(sel) \ | 2011 | #define VUG(sel) \ | |
2012 | ((p->p_flag & PK_32) ? VALID_USER_GSEL32(sel) : VALID_USER_DSEL(sel)) | 2012 | ((p->p_flag & PK_32) ? VALID_USER_GSEL32(sel) : VALID_USER_DSEL(sel)) | |
2013 | sel = gr[_REG_GS] & 0xffff; | 2013 | sel = gr[_REG_GS] & 0xffff; | |
2014 | if (sel != 0 && !VUG(sel)) | 2014 | if (sel != 0 && !VUG(sel)) | |
2015 | return EINVAL; | 2015 | return EINVAL; | |
2016 | 2016 | |||
2017 | sel = gr[_REG_DS] & 0xffff; | 2017 | sel = gr[_REG_DS] & 0xffff; | |
2018 | if (!VUD(sel)) | 2018 | if (!VUD(sel)) | |
2019 | return EINVAL; | 2019 | return EINVAL; | |
2020 | 2020 | |||
2021 | #ifndef XEN | 2021 | #ifndef XEN | |
2022 | sel = gr[_REG_SS] & 0xffff; | 2022 | sel = gr[_REG_SS] & 0xffff; | |
2023 | if (!VUD(sel)) | 2023 | if (!VUD(sel)) | |
2024 | return EINVAL; | 2024 | return EINVAL; | |
2025 | #endif | 2025 | #endif | |
2026 | 2026 | |||
2027 | } | 2027 | } | |
2028 | 2028 | |||
2029 | #ifndef XEN | 2029 | #ifndef XEN | |
2030 | #define VUC(sel) \ | 2030 | #define VUC(sel) \ | |
2031 | ((p->p_flag & PK_32) ? VALID_USER_CSEL32(sel) : VALID_USER_CSEL(sel)) | 2031 | ((p->p_flag & PK_32) ? VALID_USER_CSEL32(sel) : VALID_USER_CSEL(sel)) | |
2032 | sel = gr[_REG_CS] & 0xffff; | 2032 | sel = gr[_REG_CS] & 0xffff; | |
2033 | if (!VUC(sel)) | 2033 | if (!VUC(sel)) | |
2034 | return EINVAL; | 2034 | return EINVAL; | |
2035 | #endif | 2035 | #endif | |
2036 | 2036 | |||
2037 | if (gr[_REG_RIP] >= VM_MAXUSER_ADDRESS) | 2037 | if (gr[_REG_RIP] >= VM_MAXUSER_ADDRESS) | |
2038 | return EINVAL; | 2038 | return EINVAL; | |
2039 | return 0; | 2039 | return 0; | |
2040 | } | 2040 | } | |
2041 | 2041 | |||
2042 | void | 2042 | void | |
2043 | cpu_initclocks(void) | 2043 | cpu_initclocks(void) | |
2044 | { | 2044 | { | |
2045 | (*initclock_func)(); | 2045 | (*initclock_func)(); | |
2046 | } | 2046 | } | |
2047 | 2047 | |||
2048 | static int | 2048 | static int | |
2049 | valid_user_selector(struct lwp *l, uint64_t seg) | 2049 | valid_user_selector(struct lwp *l, uint64_t seg) | |
2050 | { | 2050 | { | |
2051 | int off, len; | 2051 | int off, len; | |
2052 | char *dt; | 2052 | char *dt; | |
2053 | struct mem_segment_descriptor *sdp; | 2053 | struct mem_segment_descriptor *sdp; | |
2054 | struct proc *p = l->l_proc; | 2054 | struct proc *p = l->l_proc; | |
2055 | struct pmap *pmap= p->p_vmspace->vm_map.pmap; | 2055 | struct pmap *pmap= p->p_vmspace->vm_map.pmap; | |
2056 | uint64_t base; | 2056 | uint64_t base; | |
2057 | 2057 | |||
2058 | seg &= 0xffff; | 2058 | seg &= 0xffff; | |
2059 | 2059 | |||
2060 | if (seg == 0) | 2060 | if (seg == 0) | |
2061 | return 0; | 2061 | return 0; | |
2062 | 2062 | |||
2063 | off = (seg & 0xfff8); | 2063 | off = (seg & 0xfff8); | |
2064 | if (seg & SEL_LDT) { | 2064 | if (seg & SEL_LDT) { | |
2065 | if (pmap->pm_ldt != NULL) { | 2065 | if (pmap->pm_ldt != NULL) { | |
2066 | len = pmap->pm_ldt_len; /* XXX broken */ | 2066 | len = pmap->pm_ldt_len; /* XXX broken */ | |
2067 | dt = (char *)pmap->pm_ldt; | 2067 | dt = (char *)pmap->pm_ldt; | |
2068 | } else { | 2068 | } else { | |
2069 | dt = ldtstore; | 2069 | dt = ldtstore; | |
2070 | len = LDT_SIZE; | 2070 | len = LDT_SIZE; | |
2071 | } | 2071 | } | |
2072 | 2072 | |||
2073 | if (off > (len - 8)) | 2073 | if (off > (len - 8)) | |
2074 | return EINVAL; | 2074 | return EINVAL; | |
2075 | } else { | 2075 | } else { | |
2076 | CTASSERT(GUDATA_SEL & SEL_LDT); | 2076 | CTASSERT(GUDATA_SEL & SEL_LDT); | |
2077 | KASSERT(seg != GUDATA_SEL); | 2077 | KASSERT(seg != GUDATA_SEL); | |
2078 | CTASSERT(GUDATA32_SEL & SEL_LDT); | 2078 | CTASSERT(GUDATA32_SEL & SEL_LDT); | |
2079 | KASSERT(seg != GUDATA32_SEL); | 2079 | KASSERT(seg != GUDATA32_SEL); | |
2080 | return EINVAL; | 2080 | return EINVAL; | |
2081 | } | 2081 | } | |
2082 | 2082 | |||
2083 | sdp = (struct mem_segment_descriptor *)(dt + off); | 2083 | sdp = (struct mem_segment_descriptor *)(dt + off); | |
2084 | if (sdp->sd_type < SDT_MEMRO || sdp->sd_p == 0) | 2084 | if (sdp->sd_type < SDT_MEMRO || sdp->sd_p == 0) | |
2085 | return EINVAL; | 2085 | return EINVAL; | |
2086 | 2086 | |||
2087 | base = ((uint64_t)sdp->sd_hibase << 32) | ((uint64_t)sdp->sd_lobase); | 2087 | base = ((uint64_t)sdp->sd_hibase << 32) | ((uint64_t)sdp->sd_lobase); | |
2088 | if (sdp->sd_gran == 1) | 2088 | if (sdp->sd_gran == 1) | |
2089 | base <<= PAGE_SHIFT; | 2089 | base <<= PAGE_SHIFT; | |
2090 | 2090 | |||
2091 | if (base >= VM_MAXUSER_ADDRESS) | 2091 | if (base >= VM_MAXUSER_ADDRESS) | |
2092 | return EINVAL; | 2092 | return EINVAL; | |
2093 | 2093 | |||
2094 | return 0; | 2094 | return 0; | |
2095 | } | 2095 | } | |
2096 | 2096 | |||
2097 | int | 2097 | int | |
2098 | mm_md_kernacc(void *ptr, vm_prot_t prot, bool *handled) | 2098 | mm_md_kernacc(void *ptr, vm_prot_t prot, bool *handled) | |
2099 | { | 2099 | { | |
2100 | extern int start, __data_start; | 2100 | extern int start, __data_start; | |
2101 | const vaddr_t v = (vaddr_t)ptr; | 2101 | const vaddr_t v = (vaddr_t)ptr; | |
2102 | 2102 | |||
2103 | if (v >= (vaddr_t)&start && v < (vaddr_t)kern_end) { | 2103 | if (v >= (vaddr_t)&start && v < (vaddr_t)kern_end) { | |
2104 | *handled = true; | 2104 | *handled = true; | |
2105 | /* Either the text or rodata segment */ | 2105 | /* Either the text or rodata segment */ | |
2106 | if (v < (vaddr_t)&__data_start && (prot & VM_PROT_WRITE)) | 2106 | if (v < (vaddr_t)&__data_start && (prot & VM_PROT_WRITE)) | |
2107 | return EFAULT; | 2107 | return EFAULT; | |
2108 | 2108 | |||
2109 | } else if (v >= module_start && v < module_end) { | 2109 | } else if (v >= module_start && v < module_end) { | |
2110 | *handled = true; | 2110 | *handled = true; | |
2111 | if (!uvm_map_checkprot(module_map, v, v + 1, prot)) | 2111 | if (!uvm_map_checkprot(module_map, v, v + 1, prot)) | |
2112 | return EFAULT; | 2112 | return EFAULT; | |
2113 | } else { | 2113 | } else { | |
2114 | *handled = false; | 2114 | *handled = false; | |
2115 | } | 2115 | } | |
2116 | return 0; | 2116 | return 0; | |
2117 | } | 2117 | } | |
2118 | 2118 | |||
2119 | /* | 2119 | /* | |
2120 | * Zero out an LWP's TLS context (%fs and %gs and associated stuff). | 2120 | * Zero out an LWP's TLS context (%fs and %gs and associated stuff). | |
2121 | * Used when exec'ing a new program. | 2121 | * Used when exec'ing a new program. | |
2122 | */ | 2122 | */ | |
2123 | 2123 | |||
2124 | void | 2124 | void | |
2125 | cpu_fsgs_zero(struct lwp *l) | 2125 | cpu_fsgs_zero(struct lwp *l) | |
2126 | { | 2126 | { | |
2127 | struct trapframe * const tf = l->l_md.md_regs; | 2127 | struct trapframe * const tf = l->l_md.md_regs; | |
2128 | struct pcb *pcb; | 2128 | struct pcb *pcb; | |
2129 | uint64_t zero = 0; | 2129 | uint64_t zero = 0; | |
2130 | 2130 | |||
2131 | pcb = lwp_getpcb(l); | 2131 | pcb = lwp_getpcb(l); | |
2132 | if (l == curlwp) { | 2132 | if (l == curlwp) { | |
2133 | kpreempt_disable(); | 2133 | kpreempt_disable(); | |
2134 | tf->tf_fs = 0; | 2134 | tf->tf_fs = 0; | |
2135 | tf->tf_gs = 0; | 2135 | tf->tf_gs = 0; | |
2136 | setfs(0); | 2136 | setfs(0); | |
2137 | #ifndef XEN | 2137 | #ifndef XEN | |
2138 | setusergs(0); | 2138 | setusergs(0); | |
2139 | #else | 2139 | #else | |
2140 | HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, 0); | 2140 | HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, 0); | |
2141 | #endif | 2141 | #endif | |
2142 | if ((l->l_proc->p_flag & PK_32) == 0) { | 2142 | if ((l->l_proc->p_flag & PK_32) == 0) { | |
2143 | #ifndef XEN | 2143 | #ifndef XEN | |
2144 | wrmsr(MSR_FSBASE, 0); | 2144 | wrmsr(MSR_FSBASE, 0); | |
2145 | wrmsr(MSR_KERNELGSBASE, 0); | 2145 | wrmsr(MSR_KERNELGSBASE, 0); | |
2146 | #else | 2146 | #else | |
2147 | HYPERVISOR_set_segment_base(SEGBASE_FS, 0); | 2147 | HYPERVISOR_set_segment_base(SEGBASE_FS, 0); | |
2148 | HYPERVISOR_set_segment_base(SEGBASE_GS_USER, 0); | 2148 | HYPERVISOR_set_segment_base(SEGBASE_GS_USER, 0); | |
2149 | #endif | 2149 | #endif | |
2150 | } | 2150 | } | |
2151 | pcb->pcb_fs = 0; | 2151 | pcb->pcb_fs = 0; | |
2152 | pcb->pcb_gs = 0; | 2152 | pcb->pcb_gs = 0; | |
2153 | update_descriptor(&curcpu()->ci_gdt[GUFS_SEL], &zero); | 2153 | update_descriptor(&curcpu()->ci_gdt[GUFS_SEL], &zero); | |
2154 | update_descriptor(&curcpu()->ci_gdt[GUGS_SEL], &zero); | 2154 | update_descriptor(&curcpu()->ci_gdt[GUGS_SEL], &zero); | |
2155 | kpreempt_enable(); | 2155 | kpreempt_enable(); | |
2156 | } else { | 2156 | } else { | |
2157 | tf->tf_fs = 0; | 2157 | tf->tf_fs = 0; | |
2158 | tf->tf_gs = 0; | 2158 | tf->tf_gs = 0; | |
2159 | pcb->pcb_fs = 0; | 2159 | pcb->pcb_fs = 0; | |
2160 | pcb->pcb_gs = 0; | 2160 | pcb->pcb_gs = 0; | |
2161 | } | 2161 | } | |
2162 | 2162 | |||
2163 | } | 2163 | } | |
2164 | 2164 | |||
2165 | /* | 2165 | /* | |
2166 | * Load an LWP's TLS context, possibly changing the %fs and %gs selectors. | 2166 | * Load an LWP's TLS context, possibly changing the %fs and %gs selectors. | |
2167 | * Used only for 32-bit processes. | 2167 | * Used only for 32-bit processes. | |
2168 | */ | 2168 | */ | |
2169 | 2169 | |||
2170 | void | 2170 | void | |
2171 | cpu_fsgs_reload(struct lwp *l, int fssel, int gssel) | 2171 | cpu_fsgs_reload(struct lwp *l, int fssel, int gssel) | |
2172 | { | 2172 | { | |
2173 | struct trapframe *tf; | 2173 | struct trapframe *tf; | |
2174 | struct pcb *pcb; | 2174 | struct pcb *pcb; | |
2175 | 2175 | |||
2176 | KASSERT(l->l_proc->p_flag & PK_32); | 2176 | KASSERT(l->l_proc->p_flag & PK_32); | |
2177 | tf = l->l_md.md_regs; | 2177 | tf = l->l_md.md_regs; | |
2178 | if (l == curlwp) { | 2178 | if (l == curlwp) { | |
2179 | pcb = lwp_getpcb(l); | 2179 | pcb = lwp_getpcb(l); | |
2180 | kpreempt_disable(); | 2180 | kpreempt_disable(); | |
2181 | update_descriptor(&curcpu()->ci_gdt[GUFS_SEL], &pcb->pcb_fs); | 2181 | update_descriptor(&curcpu()->ci_gdt[GUFS_SEL], &pcb->pcb_fs); | |
2182 | update_descriptor(&curcpu()->ci_gdt[GUGS_SEL], &pcb->pcb_gs); | 2182 | update_descriptor(&curcpu()->ci_gdt[GUGS_SEL], &pcb->pcb_gs); | |
2183 | setfs(fssel); | 2183 | setfs(fssel); | |
2184 | #ifndef XEN | 2184 | #ifndef XEN | |
2185 | setusergs(gssel); | 2185 | setusergs(gssel); | |
2186 | #else | 2186 | #else | |
2187 | HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, gssel); | 2187 | HYPERVISOR_set_segment_base(SEGBASE_GS_USER_SEL, gssel); | |
2188 | #endif | 2188 | #endif | |
2189 | tf->tf_fs = fssel; | 2189 | tf->tf_fs = fssel; | |
2190 | tf->tf_gs = gssel; | 2190 | tf->tf_gs = gssel; | |
2191 | kpreempt_enable(); | 2191 | kpreempt_enable(); | |
2192 | } else { | 2192 | } else { | |
2193 | tf->tf_fs = fssel; | 2193 | tf->tf_fs = fssel; | |
2194 | tf->tf_gs = gssel; | 2194 | tf->tf_gs = gssel; | |
2195 | } | 2195 | } | |
2196 | } | 2196 | } | |
2197 | 2197 | |||
2198 | 2198 | |||
2199 | #ifdef __HAVE_DIRECT_MAP | 2199 | #ifdef __HAVE_DIRECT_MAP | |
2200 | bool | 2200 | bool | |
2201 | mm_md_direct_mapped_io(void *addr, paddr_t *paddr) | 2201 | mm_md_direct_mapped_io(void *addr, paddr_t *paddr) | |
2202 | { | 2202 | { | |
2203 | vaddr_t va = (vaddr_t)addr; | 2203 | vaddr_t va = (vaddr_t)addr; | |
2204 | 2204 | |||
2205 | if (va >= PMAP_DIRECT_BASE && va < PMAP_DIRECT_END) { | 2205 | if (va >= PMAP_DIRECT_BASE && va < PMAP_DIRECT_END) { | |
2206 | *paddr = PMAP_DIRECT_UNMAP(va); | 2206 | *paddr = PMAP_DIRECT_UNMAP(va); | |
2207 | return true; | 2207 | return true; | |
2208 | } | 2208 | } | |
2209 | return false; | 2209 | return false; | |
2210 | } | 2210 | } | |
2211 | 2211 | |||
2212 | bool | 2212 | bool | |
2213 | mm_md_direct_mapped_phys(paddr_t paddr, vaddr_t *vaddr) | 2213 | mm_md_direct_mapped_phys(paddr_t paddr, vaddr_t *vaddr) | |
2214 | { | 2214 | { | |
2215 | *vaddr = PMAP_DIRECT_MAP(paddr); | 2215 | *vaddr = PMAP_DIRECT_MAP(paddr); | |
2216 | return true; | 2216 | return true; | |
2217 | } | 2217 | } | |
2218 | #endif | 2218 | #endif |
--- src/sys/arch/i386/i386/machdep.c 2015/04/24 00:04:04 1.754
+++ src/sys/arch/i386/i386/machdep.c 2016/05/15 10:35:54 1.755
@@ -1,1766 +1,1766 @@ | @@ -1,1766 +1,1766 @@ | |||
1 | /* $NetBSD: machdep.c,v 1.754 2015/04/24 00:04:04 khorben Exp $ */ | 1 | /* $NetBSD: machdep.c,v 1.755 2016/05/15 10:35:54 maxv Exp $ */ | |
2 | 2 | |||
3 | /*- | 3 | /*- | |
4 | * Copyright (c) 1996, 1997, 1998, 2000, 2004, 2006, 2008, 2009 | 4 | * Copyright (c) 1996, 1997, 1998, 2000, 2004, 2006, 2008, 2009 | |
5 | * The NetBSD Foundation, Inc. | 5 | * The NetBSD Foundation, Inc. | |
6 | * All rights reserved. | 6 | * All rights reserved. | |
7 | * | 7 | * | |
8 | * This code is derived from software contributed to The NetBSD Foundation | 8 | * This code is derived from software contributed to The NetBSD Foundation | |
9 | * by Charles M. Hannum, by Jason R. Thorpe of the Numerical Aerospace | 9 | * by Charles M. Hannum, by Jason R. Thorpe of the Numerical Aerospace | |
10 | * Simulation Facility NASA Ames Research Center, by Julio M. Merino Vidal, | 10 | * Simulation Facility NASA Ames Research Center, by Julio M. Merino Vidal, | |
11 | * and by Andrew Doran. | 11 | * and by Andrew Doran. | |
12 | * | 12 | * | |
13 | * Redistribution and use in source and binary forms, with or without | 13 | * Redistribution and use in source and binary forms, with or without | |
14 | * modification, are permitted provided that the following conditions | 14 | * modification, are permitted provided that the following conditions | |
15 | * are met: | 15 | * are met: | |
16 | * 1. Redistributions of source code must retain the above copyright | 16 | * 1. Redistributions of source code must retain the above copyright | |
17 | * notice, this list of conditions and the following disclaimer. | 17 | * notice, this list of conditions and the following disclaimer. | |
18 | * 2. Redistributions in binary form must reproduce the above copyright | 18 | * 2. Redistributions in binary form must reproduce the above copyright | |
19 | * notice, this list of conditions and the following disclaimer in the | 19 | * notice, this list of conditions and the following disclaimer in the | |
20 | * documentation and/or other materials provided with the distribution. | 20 | * documentation and/or other materials provided with the distribution. | |
21 | * | 21 | * | |
22 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | 22 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | |
23 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | 23 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | |
24 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | 24 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
25 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | 25 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | |
26 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | 26 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
27 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | 27 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
28 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | 28 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
29 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | 29 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
30 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | 30 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
31 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | 31 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
32 | * POSSIBILITY OF SUCH DAMAGE. | 32 | * POSSIBILITY OF SUCH DAMAGE. | |
33 | */ | 33 | */ | |
34 | 34 | |||
35 | /*- | 35 | /*- | |
36 | * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. | 36 | * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. | |
37 | * All rights reserved. | 37 | * All rights reserved. | |
38 | * | 38 | * | |
39 | * This code is derived from software contributed to Berkeley by | 39 | * This code is derived from software contributed to Berkeley by | |
40 | * William Jolitz. | 40 | * William Jolitz. | |
41 | * | 41 | * | |
42 | * Redistribution and use in source and binary forms, with or without | 42 | * Redistribution and use in source and binary forms, with or without | |
43 | * modification, are permitted provided that the following conditions | 43 | * modification, are permitted provided that the following conditions | |
44 | * are met: | 44 | * are met: | |
45 | * 1. Redistributions of source code must retain the above copyright | 45 | * 1. Redistributions of source code must retain the above copyright | |
46 | * notice, this list of conditions and the following disclaimer. | 46 | * notice, this list of conditions and the following disclaimer. | |
47 | * 2. Redistributions in binary form must reproduce the above copyright | 47 | * 2. Redistributions in binary form must reproduce the above copyright | |
48 | * notice, this list of conditions and the following disclaimer in the | 48 | * notice, this list of conditions and the following disclaimer in the | |
49 | * documentation and/or other materials provided with the distribution. | 49 | * documentation and/or other materials provided with the distribution. | |
50 | * 3. Neither the name of the University nor the names of its contributors | 50 | * 3. Neither the name of the University nor the names of its contributors | |
51 | * may be used to endorse or promote products derived from this software | 51 | * may be used to endorse or promote products derived from this software | |
52 | * without specific prior written permission. | 52 | * without specific prior written permission. | |
53 | * | 53 | * | |
54 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | 54 | * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND | |
55 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 55 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
56 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 56 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
57 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | 57 | * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | |
58 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | 58 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
59 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | 59 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |
60 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | 60 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |
61 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | 61 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |
62 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | 62 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |
63 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | 63 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |
64 | * SUCH DAMAGE. | 64 | * SUCH DAMAGE. | |
65 | * | 65 | * | |
66 | * @(#)machdep.c 7.4 (Berkeley) 6/3/91 | 66 | * @(#)machdep.c 7.4 (Berkeley) 6/3/91 | |
67 | */ | 67 | */ | |
68 | 68 | |||
69 | #include <sys/cdefs.h> | 69 | #include <sys/cdefs.h> | |
70 | __KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.754 2015/04/24 00:04:04 khorben Exp $"); | 70 | __KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.755 2016/05/15 10:35:54 maxv Exp $"); | |
71 | 71 | |||
72 | #include "opt_beep.h" | 72 | #include "opt_beep.h" | |
73 | #include "opt_compat_ibcs2.h" | 73 | #include "opt_compat_ibcs2.h" | |
74 | #include "opt_compat_freebsd.h" | 74 | #include "opt_compat_freebsd.h" | |
75 | #include "opt_compat_netbsd.h" | 75 | #include "opt_compat_netbsd.h" | |
76 | #include "opt_compat_svr4.h" | 76 | #include "opt_compat_svr4.h" | |
77 | #include "opt_cpureset_delay.h" | 77 | #include "opt_cpureset_delay.h" | |
78 | #include "opt_ddb.h" | 78 | #include "opt_ddb.h" | |
79 | #include "opt_ipkdb.h" | 79 | #include "opt_ipkdb.h" | |
80 | #include "opt_kgdb.h" | 80 | #include "opt_kgdb.h" | |
81 | #include "opt_mtrr.h" | 81 | #include "opt_mtrr.h" | |
82 | #include "opt_modular.h" | 82 | #include "opt_modular.h" | |
83 | #include "opt_multiboot.h" | 83 | #include "opt_multiboot.h" | |
84 | #include "opt_multiprocessor.h" | 84 | #include "opt_multiprocessor.h" | |
85 | #include "opt_physmem.h" | 85 | #include "opt_physmem.h" | |
86 | #include "opt_realmem.h" | 86 | #include "opt_realmem.h" | |
87 | #include "opt_user_ldt.h" | 87 | #include "opt_user_ldt.h" | |
88 | #include "opt_vm86.h" | 88 | #include "opt_vm86.h" | |
89 | #include "opt_xen.h" | 89 | #include "opt_xen.h" | |
90 | #include "isa.h" | 90 | #include "isa.h" | |
91 | #include "pci.h" | 91 | #include "pci.h" | |
92 | 92 | |||
93 | #include <sys/param.h> | 93 | #include <sys/param.h> | |
94 | #include <sys/systm.h> | 94 | #include <sys/systm.h> | |
95 | #include <sys/signal.h> | 95 | #include <sys/signal.h> | |
96 | #include <sys/signalvar.h> | 96 | #include <sys/signalvar.h> | |
97 | #include <sys/kernel.h> | 97 | #include <sys/kernel.h> | |
98 | #include <sys/cpu.h> | 98 | #include <sys/cpu.h> | |
99 | #include <sys/exec.h> | 99 | #include <sys/exec.h> | |
100 | #include <sys/fcntl.h> | 100 | #include <sys/fcntl.h> | |
101 | #include <sys/reboot.h> | 101 | #include <sys/reboot.h> | |
102 | #include <sys/conf.h> | 102 | #include <sys/conf.h> | |
103 | #include <sys/kauth.h> | 103 | #include <sys/kauth.h> | |
104 | #include <sys/mbuf.h> | 104 | #include <sys/mbuf.h> | |
105 | #include <sys/msgbuf.h> | 105 | #include <sys/msgbuf.h> | |
106 | #include <sys/mount.h> | 106 | #include <sys/mount.h> | |
107 | #include <sys/syscallargs.h> | 107 | #include <sys/syscallargs.h> | |
108 | #include <sys/core.h> | 108 | #include <sys/core.h> | |
109 | #include <sys/kcore.h> | 109 | #include <sys/kcore.h> | |
110 | #include <sys/ucontext.h> | 110 | #include <sys/ucontext.h> | |
111 | #include <sys/ras.h> | 111 | #include <sys/ras.h> | |
112 | #include <sys/ksyms.h> | 112 | #include <sys/ksyms.h> | |
113 | #include <sys/device.h> | 113 | #include <sys/device.h> | |
114 | 114 | |||
115 | #ifdef IPKDB | 115 | #ifdef IPKDB | |
116 | #include <ipkdb/ipkdb.h> | 116 | #include <ipkdb/ipkdb.h> | |
117 | #endif | 117 | #endif | |
118 | 118 | |||
119 | #ifdef KGDB | 119 | #ifdef KGDB | |
120 | #include <sys/kgdb.h> | 120 | #include <sys/kgdb.h> | |
121 | #endif | 121 | #endif | |
122 | 122 | |||
123 | #include <dev/cons.h> | 123 | #include <dev/cons.h> | |
124 | #include <dev/mm.h> | 124 | #include <dev/mm.h> | |
125 | 125 | |||
126 | #include <uvm/uvm.h> | 126 | #include <uvm/uvm.h> | |
127 | #include <uvm/uvm_page.h> | 127 | #include <uvm/uvm_page.h> | |
128 | 128 | |||
129 | #include <sys/sysctl.h> | 129 | #include <sys/sysctl.h> | |
130 | 130 | |||
131 | #include <machine/cpu.h> | 131 | #include <machine/cpu.h> | |
132 | #include <machine/cpufunc.h> | 132 | #include <machine/cpufunc.h> | |
133 | #include <machine/cpuvar.h> | 133 | #include <machine/cpuvar.h> | |
134 | #include <machine/gdt.h> | 134 | #include <machine/gdt.h> | |
135 | #include <machine/intr.h> | 135 | #include <machine/intr.h> | |
136 | #include <machine/kcore.h> | 136 | #include <machine/kcore.h> | |
137 | #include <machine/pio.h> | 137 | #include <machine/pio.h> | |
138 | #include <machine/psl.h> | 138 | #include <machine/psl.h> | |
139 | #include <machine/reg.h> | 139 | #include <machine/reg.h> | |
140 | #include <machine/specialreg.h> | 140 | #include <machine/specialreg.h> | |
141 | #include <machine/bootinfo.h> | 141 | #include <machine/bootinfo.h> | |
142 | #include <machine/mtrr.h> | 142 | #include <machine/mtrr.h> | |
143 | #include <x86/x86/tsc.h> | 143 | #include <x86/x86/tsc.h> | |
144 | 144 | |||
145 | #include <x86/fpu.h> | 145 | #include <x86/fpu.h> | |
146 | #include <x86/machdep.h> | 146 | #include <x86/machdep.h> | |
147 | 147 | |||
148 | #include <machine/multiboot.h> | 148 | #include <machine/multiboot.h> | |
149 | #ifdef XEN | 149 | #ifdef XEN | |
150 | #include <xen/evtchn.h> | 150 | #include <xen/evtchn.h> | |
151 | #include <xen/xen.h> | 151 | #include <xen/xen.h> | |
152 | #include <xen/hypervisor.h> | 152 | #include <xen/hypervisor.h> | |
153 | 153 | |||
154 | /* #define XENDEBUG */ | 154 | /* #define XENDEBUG */ | |
155 | /* #define XENDEBUG_LOW */ | 155 | /* #define XENDEBUG_LOW */ | |
156 | 156 | |||
157 | #ifdef XENDEBUG | 157 | #ifdef XENDEBUG | |
158 | #define XENPRINTF(x) printf x | 158 | #define XENPRINTF(x) printf x | |
159 | #define XENPRINTK(x) printk x | 159 | #define XENPRINTK(x) printk x | |
160 | #else | 160 | #else | |
161 | #define XENPRINTF(x) | 161 | #define XENPRINTF(x) | |
162 | #define XENPRINTK(x) | 162 | #define XENPRINTK(x) | |
163 | #endif | 163 | #endif | |
164 | #define PRINTK(x) printf x | 164 | #define PRINTK(x) printf x | |
165 | #endif /* XEN */ | 165 | #endif /* XEN */ | |
166 | 166 | |||
167 | #include <dev/isa/isareg.h> | 167 | #include <dev/isa/isareg.h> | |
168 | #include <machine/isa_machdep.h> | 168 | #include <machine/isa_machdep.h> | |
169 | #include <dev/ic/i8042reg.h> | 169 | #include <dev/ic/i8042reg.h> | |
170 | 170 | |||
171 | #ifdef DDB | 171 | #ifdef DDB | |
172 | #include <machine/db_machdep.h> | 172 | #include <machine/db_machdep.h> | |
173 | #include <ddb/db_extern.h> | 173 | #include <ddb/db_extern.h> | |
174 | #endif | 174 | #endif | |
175 | 175 | |||
176 | #ifdef VM86 | 176 | #ifdef VM86 | |
177 | #include <machine/vm86.h> | 177 | #include <machine/vm86.h> | |
178 | #endif | 178 | #endif | |
179 | 179 | |||
180 | #include "acpica.h" | 180 | #include "acpica.h" | |
181 | #include "bioscall.h" | 181 | #include "bioscall.h" | |
182 | 182 | |||
183 | #if NBIOSCALL > 0 | 183 | #if NBIOSCALL > 0 | |
184 | #include <machine/bioscall.h> | 184 | #include <machine/bioscall.h> | |
185 | #endif | 185 | #endif | |
186 | 186 | |||
187 | #if NACPICA > 0 | 187 | #if NACPICA > 0 | |
188 | #include <dev/acpi/acpivar.h> | 188 | #include <dev/acpi/acpivar.h> | |
189 | #define ACPI_MACHDEP_PRIVATE | 189 | #define ACPI_MACHDEP_PRIVATE | |
190 | #include <machine/acpi_machdep.h> | 190 | #include <machine/acpi_machdep.h> | |
191 | #endif | 191 | #endif | |
192 | 192 | |||
193 | #include "isa.h" | 193 | #include "isa.h" | |
194 | #include "isadma.h" | 194 | #include "isadma.h" | |
195 | #include "ksyms.h" | 195 | #include "ksyms.h" | |
196 | 196 | |||
197 | #include "cardbus.h" | 197 | #include "cardbus.h" | |
198 | #if NCARDBUS > 0 | 198 | #if NCARDBUS > 0 | |
199 | /* For rbus_min_start hint. */ | 199 | /* For rbus_min_start hint. */ | |
200 | #include <sys/bus.h> | 200 | #include <sys/bus.h> | |
201 | #include <dev/cardbus/rbus.h> | 201 | #include <dev/cardbus/rbus.h> | |
202 | #include <machine/rbus_machdep.h> | 202 | #include <machine/rbus_machdep.h> | |
203 | #endif | 203 | #endif | |
204 | 204 | |||
205 | #include "mca.h" | 205 | #include "mca.h" | |
206 | #if NMCA > 0 | 206 | #if NMCA > 0 | |
207 | #include <machine/mca_machdep.h> /* for mca_busprobe() */ | 207 | #include <machine/mca_machdep.h> /* for mca_busprobe() */ | |
208 | #endif | 208 | #endif | |
209 | 209 | |||
210 | #ifdef MULTIPROCESSOR /* XXX */ | 210 | #ifdef MULTIPROCESSOR /* XXX */ | |
211 | #include <machine/mpbiosvar.h> /* XXX */ | 211 | #include <machine/mpbiosvar.h> /* XXX */ | |
212 | #endif /* XXX */ | 212 | #endif /* XXX */ | |
213 | 213 | |||
214 | /* the following is used externally (sysctl_hw) */ | 214 | /* the following is used externally (sysctl_hw) */ | |
215 | char machine[] = "i386"; /* CPU "architecture" */ | 215 | char machine[] = "i386"; /* CPU "architecture" */ | |
216 | char machine_arch[] = "i386"; /* machine == machine_arch */ | 216 | char machine_arch[] = "i386"; /* machine == machine_arch */ | |
217 | 217 | |||
218 | extern struct bi_devmatch *x86_alldisks; | 218 | extern struct bi_devmatch *x86_alldisks; | |
219 | extern int x86_ndisks; | 219 | extern int x86_ndisks; | |
220 | 220 | |||
221 | #ifdef CPURESET_DELAY | 221 | #ifdef CPURESET_DELAY | |
222 | int cpureset_delay = CPURESET_DELAY; | 222 | int cpureset_delay = CPURESET_DELAY; | |
223 | #else | 223 | #else | |
224 | int cpureset_delay = 2000; /* default to 2s */ | 224 | int cpureset_delay = 2000; /* default to 2s */ | |
225 | #endif | 225 | #endif | |
226 | 226 | |||
227 | #ifdef MTRR | 227 | #ifdef MTRR | |
228 | struct mtrr_funcs *mtrr_funcs; | 228 | struct mtrr_funcs *mtrr_funcs; | |
229 | #endif | 229 | #endif | |
230 | 230 | |||
231 | int cpu_class; | 231 | int cpu_class; | |
232 | int use_pae; | 232 | int use_pae; | |
233 | int i386_fpu_present = 1; | 233 | int i386_fpu_present = 1; | |
234 | int i386_fpu_fdivbug; | 234 | int i386_fpu_fdivbug; | |
235 | 235 | |||
236 | int i386_use_fxsave; | 236 | int i386_use_fxsave; | |
237 | int i386_has_sse; | 237 | int i386_has_sse; | |
238 | int i386_has_sse2; | 238 | int i386_has_sse2; | |
239 | 239 | |||
240 | vaddr_t msgbuf_vaddr; | 240 | vaddr_t msgbuf_vaddr; | |
241 | struct { | 241 | struct { | |
242 | paddr_t paddr; | 242 | paddr_t paddr; | |
243 | psize_t sz; | 243 | psize_t sz; | |
244 | } msgbuf_p_seg[VM_PHYSSEG_MAX]; | 244 | } msgbuf_p_seg[VM_PHYSSEG_MAX]; | |
245 | unsigned int msgbuf_p_cnt = 0; | 245 | unsigned int msgbuf_p_cnt = 0; | |
246 | 246 | |||
247 | vaddr_t idt_vaddr; | 247 | vaddr_t idt_vaddr; | |
248 | paddr_t idt_paddr; | 248 | paddr_t idt_paddr; | |
249 | vaddr_t pentium_idt_vaddr; | 249 | vaddr_t pentium_idt_vaddr; | |
250 | 250 | |||
251 | struct vm_map *phys_map = NULL; | 251 | struct vm_map *phys_map = NULL; | |
252 | 252 | |||
253 | extern paddr_t avail_start, avail_end; | 253 | extern paddr_t avail_start, avail_end; | |
254 | #ifdef XEN | 254 | #ifdef XEN | |
255 | extern paddr_t pmap_pa_start, pmap_pa_end; | 255 | extern paddr_t pmap_pa_start, pmap_pa_end; | |
256 | void hypervisor_callback(void); | 256 | void hypervisor_callback(void); | |
257 | void failsafe_callback(void); | 257 | void failsafe_callback(void); | |
258 | #endif | 258 | #endif | |
259 | 259 | |||
260 | #ifdef XEN | 260 | #ifdef XEN | |
261 | void (*delay_func)(unsigned int) = xen_delay; | 261 | void (*delay_func)(unsigned int) = xen_delay; | |
262 | void (*initclock_func)(void) = xen_initclocks; | 262 | void (*initclock_func)(void) = xen_initclocks; | |
263 | #else | 263 | #else | |
264 | void (*delay_func)(unsigned int) = i8254_delay; | 264 | void (*delay_func)(unsigned int) = i8254_delay; | |
265 | void (*initclock_func)(void) = i8254_initclocks; | 265 | void (*initclock_func)(void) = i8254_initclocks; | |
266 | #endif | 266 | #endif | |
267 | 267 | |||
268 | 268 | |||
269 | /* | 269 | /* | |
270 | * Size of memory segments, before any memory is stolen. | 270 | * Size of memory segments, before any memory is stolen. | |
271 | */ | 271 | */ | |
272 | phys_ram_seg_t mem_clusters[VM_PHYSSEG_MAX]; | 272 | phys_ram_seg_t mem_clusters[VM_PHYSSEG_MAX]; | |
273 | int mem_cluster_cnt = 0; | 273 | int mem_cluster_cnt = 0; | |
274 | 274 | |||
275 | void init386(paddr_t); | 275 | void init386(paddr_t); | |
276 | void initgdt(union descriptor *); | 276 | void initgdt(union descriptor *); | |
277 | 277 | |||
278 | extern int time_adjusted; | 278 | extern int time_adjusted; | |
279 | 279 | |||
280 | int *esym; | 280 | int *esym; | |
281 | int *eblob; | 281 | int *eblob; | |
282 | extern int boothowto; | 282 | extern int boothowto; | |
283 | 283 | |||
284 | #ifndef XEN | 284 | #ifndef XEN | |
285 | 285 | |||
286 | /* Base memory reported by BIOS. */ | 286 | /* Base memory reported by BIOS. */ | |
287 | #ifndef REALBASEMEM | 287 | #ifndef REALBASEMEM | |
288 | int biosbasemem = 0; | 288 | int biosbasemem = 0; | |
289 | #else | 289 | #else | |
290 | int biosbasemem = REALBASEMEM; | 290 | int biosbasemem = REALBASEMEM; | |
291 | #endif | 291 | #endif | |
292 | 292 | |||
293 | /* Extended memory reported by BIOS. */ | 293 | /* Extended memory reported by BIOS. */ | |
294 | #ifndef REALEXTMEM | 294 | #ifndef REALEXTMEM | |
295 | int biosextmem = 0; | 295 | int biosextmem = 0; | |
296 | #else | 296 | #else | |
297 | int biosextmem = REALEXTMEM; | 297 | int biosextmem = REALEXTMEM; | |
298 | #endif | 298 | #endif | |
299 | 299 | |||
300 | /* Set if any boot-loader set biosbasemem/biosextmem. */ | 300 | /* Set if any boot-loader set biosbasemem/biosextmem. */ | |
301 | int biosmem_implicit; | 301 | int biosmem_implicit; | |
302 | 302 | |||
303 | /* Representation of the bootinfo structure constructed by a NetBSD native | 303 | /* Representation of the bootinfo structure constructed by a NetBSD native | |
304 | * boot loader. Only be used by native_loader(). */ | 304 | * boot loader. Only be used by native_loader(). */ | |
305 | struct bootinfo_source { | 305 | struct bootinfo_source { | |
306 | uint32_t bs_naddrs; | 306 | uint32_t bs_naddrs; | |
307 | void *bs_addrs[1]; /* Actually longer. */ | 307 | void *bs_addrs[1]; /* Actually longer. */ | |
308 | }; | 308 | }; | |
309 | 309 | |||
310 | /* Only called by locore.h; no need to be in a header file. */ | 310 | /* Only called by locore.h; no need to be in a header file. */ | |
311 | void native_loader(int, int, struct bootinfo_source *, paddr_t, int, int); | 311 | void native_loader(int, int, struct bootinfo_source *, paddr_t, int, int); | |
312 | 312 | |||
313 | /* | 313 | /* | |
314 | * Called as one of the very first things during system startup (just after | 314 | * Called as one of the very first things during system startup (just after | |
315 | * the boot loader gave control to the kernel image), this routine is in | 315 | * the boot loader gave control to the kernel image), this routine is in | |
316 | * charge of retrieving the parameters passed in by the boot loader and | 316 | * charge of retrieving the parameters passed in by the boot loader and | |
317 | * storing them in the appropriate kernel variables. | 317 | * storing them in the appropriate kernel variables. | |
318 | * | 318 | * | |
319 | * WARNING: Because the kernel has not yet relocated itself to KERNBASE, | 319 | * WARNING: Because the kernel has not yet relocated itself to KERNBASE, | |
320 | * special care has to be taken when accessing memory because absolute | 320 | * special care has to be taken when accessing memory because absolute | |
321 | * addresses (referring to kernel symbols) do not work. So: | 321 | * addresses (referring to kernel symbols) do not work. So: | |
322 | * | 322 | * | |
323 | * 1) Avoid jumps to absolute addresses (such as gotos and switches). | 323 | * 1) Avoid jumps to absolute addresses (such as gotos and switches). | |
324 | * 2) To access global variables use their physical address, which | 324 | * 2) To access global variables use their physical address, which | |
325 | * can be obtained using the RELOC macro. | 325 | * can be obtained using the RELOC macro. | |
326 | */ | 326 | */ | |
327 | void | 327 | void | |
328 | native_loader(int bl_boothowto, int bl_bootdev, | 328 | native_loader(int bl_boothowto, int bl_bootdev, | |
329 | struct bootinfo_source *bl_bootinfo, paddr_t bl_esym, | 329 | struct bootinfo_source *bl_bootinfo, paddr_t bl_esym, | |
330 | int bl_biosextmem, int bl_biosbasemem) | 330 | int bl_biosextmem, int bl_biosbasemem) | |
331 | { | 331 | { | |
332 | #define RELOC(type, x) ((type)((vaddr_t)(x) - KERNBASE)) | 332 | #define RELOC(type, x) ((type)((vaddr_t)(x) - KERNBASE)) | |
333 | 333 | |||
334 | *RELOC(int *, &boothowto) = bl_boothowto; | 334 | *RELOC(int *, &boothowto) = bl_boothowto; | |
335 | 335 | |||
336 | #ifdef COMPAT_OLDBOOT | 336 | #ifdef COMPAT_OLDBOOT | |
337 | /* | 337 | /* | |
338 | * Pre-1.3 boot loaders gave the boot device as a parameter | 338 | * Pre-1.3 boot loaders gave the boot device as a parameter | |
339 | * (instead of a bootinfo entry). | 339 | * (instead of a bootinfo entry). | |
340 | */ | 340 | */ | |
341 | *RELOC(int *, &bootdev) = bl_bootdev; | 341 | *RELOC(int *, &bootdev) = bl_bootdev; | |
342 | #endif | 342 | #endif | |
343 | 343 | |||
344 | /* | 344 | /* | |
345 | * The boot loader provides a physical, non-relocated address | 345 | * The boot loader provides a physical, non-relocated address | |
346 | * for the symbols table's end. We need to convert it to a | 346 | * for the symbols table's end. We need to convert it to a | |
347 | * virtual address. | 347 | * virtual address. | |
348 | */ | 348 | */ | |
349 | if (bl_esym != 0) | 349 | if (bl_esym != 0) | |
350 | *RELOC(int **, &esym) = (int *)((vaddr_t)bl_esym + KERNBASE); | 350 | *RELOC(int **, &esym) = (int *)((vaddr_t)bl_esym + KERNBASE); | |
351 | else | 351 | else | |
352 | *RELOC(int **, &esym) = 0; | 352 | *RELOC(int **, &esym) = 0; | |
353 | 353 | |||
354 | /* | 354 | /* | |
355 | * Copy bootinfo entries (if any) from the boot loader's | 355 | * Copy bootinfo entries (if any) from the boot loader's | |
356 | * representation to the kernel's bootinfo space. | 356 | * representation to the kernel's bootinfo space. | |
357 | */ | 357 | */ | |
358 | if (bl_bootinfo != NULL) { | 358 | if (bl_bootinfo != NULL) { | |
359 | size_t i; | 359 | size_t i; | |
360 | uint8_t *data; | 360 | uint8_t *data; | |
361 | struct bootinfo *bidest; | 361 | struct bootinfo *bidest; | |
362 | struct btinfo_modulelist *bi; | 362 | struct btinfo_modulelist *bi; | |
363 | 363 | |||
364 | bidest = RELOC(struct bootinfo *, &bootinfo); | 364 | bidest = RELOC(struct bootinfo *, &bootinfo); | |
365 | 365 | |||
366 | data = &bidest->bi_data[0]; | 366 | data = &bidest->bi_data[0]; | |
367 | 367 | |||
368 | for (i = 0; i < bl_bootinfo->bs_naddrs; i++) { | 368 | for (i = 0; i < bl_bootinfo->bs_naddrs; i++) { | |
369 | struct btinfo_common *bc; | 369 | struct btinfo_common *bc; | |
370 | 370 | |||
371 | bc = bl_bootinfo->bs_addrs[i]; | 371 | bc = bl_bootinfo->bs_addrs[i]; | |
372 | 372 | |||
373 | if ((data + bc->len) > | 373 | if ((data + bc->len) > | |
374 | (&bidest->bi_data[0] + BOOTINFO_MAXSIZE)) | 374 | (&bidest->bi_data[0] + BOOTINFO_MAXSIZE)) | |
375 | break; | 375 | break; | |
376 | 376 | |||
377 | memcpy(data, bc, bc->len); | 377 | memcpy(data, bc, bc->len); | |
378 | /* | 378 | /* | |
379 | * If any modules were loaded, record where they | 379 | * If any modules were loaded, record where they | |
380 | * end. We'll need to skip over them. | 380 | * end. We'll need to skip over them. | |
381 | */ | 381 | */ | |
382 | bi = (struct btinfo_modulelist *)data; | 382 | bi = (struct btinfo_modulelist *)data; | |
383 | if (bi->common.type == BTINFO_MODULELIST) { | 383 | if (bi->common.type == BTINFO_MODULELIST) { | |
384 | *RELOC(int **, &eblob) = | 384 | *RELOC(int **, &eblob) = | |
385 | (int *)(bi->endpa + KERNBASE); | 385 | (int *)(bi->endpa + KERNBASE); | |
386 | } | 386 | } | |
387 | data += bc->len; | 387 | data += bc->len; | |
388 | } | 388 | } | |
389 | bidest->bi_nentries = i; | 389 | bidest->bi_nentries = i; | |
390 | } | 390 | } | |
391 | 391 | |||
392 | /* | 392 | /* | |
393 | * Configure biosbasemem and biosextmem only if they were not | 393 | * Configure biosbasemem and biosextmem only if they were not | |
394 | * explicitly given during the kernel's build. | 394 | * explicitly given during the kernel's build. | |
395 | */ | 395 | */ | |
396 | if (*RELOC(int *, &biosbasemem) == 0) { | 396 | if (*RELOC(int *, &biosbasemem) == 0) { | |
397 | *RELOC(int *, &biosbasemem) = bl_biosbasemem; | 397 | *RELOC(int *, &biosbasemem) = bl_biosbasemem; | |
398 | *RELOC(int *, &biosmem_implicit) = 1; | 398 | *RELOC(int *, &biosmem_implicit) = 1; | |
399 | } | 399 | } | |
400 | if (*RELOC(int *, &biosextmem) == 0) { | 400 | if (*RELOC(int *, &biosextmem) == 0) { | |
401 | *RELOC(int *, &biosextmem) = bl_biosextmem; | 401 | *RELOC(int *, &biosextmem) = bl_biosextmem; | |
402 | *RELOC(int *, &biosmem_implicit) = 1; | 402 | *RELOC(int *, &biosmem_implicit) = 1; | |
403 | } | 403 | } | |
404 | #undef RELOC | 404 | #undef RELOC | |
405 | } | 405 | } | |
406 | 406 | |||
407 | #endif /* XEN */ | 407 | #endif /* XEN */ | |
408 | 408 | |||
409 | /* | 409 | /* | |
410 | * Machine-dependent startup code | 410 | * Machine-dependent startup code | |
411 | */ | 411 | */ | |
412 | void | 412 | void | |
413 | cpu_startup(void) | 413 | cpu_startup(void) | |
414 | { | 414 | { | |
415 | int x, y; | 415 | int x, y; | |
416 | vaddr_t minaddr, maxaddr; | 416 | vaddr_t minaddr, maxaddr; | |
417 | psize_t sz; | 417 | psize_t sz; | |
418 | 418 | |||
419 | /* | 419 | /* | |
420 | * For console drivers that require uvm and pmap to be initialized, | 420 | * For console drivers that require uvm and pmap to be initialized, | |
421 | * we'll give them one more chance here... | 421 | * we'll give them one more chance here... | |
422 | */ | 422 | */ | |
423 | consinit(); | 423 | consinit(); | |
424 | 424 | |||
425 | /* | 425 | /* | |
426 | * Initialize error message buffer (et end of core). | 426 | * Initialize error message buffer (et end of core). | |
427 | */ | 427 | */ | |
428 | if (msgbuf_p_cnt == 0) | 428 | if (msgbuf_p_cnt == 0) | |
429 | panic("msgbuf paddr map has not been set up"); | 429 | panic("msgbuf paddr map has not been set up"); | |
430 | for (x = 0, sz = 0; x < msgbuf_p_cnt; sz += msgbuf_p_seg[x++].sz) | 430 | for (x = 0, sz = 0; x < msgbuf_p_cnt; sz += msgbuf_p_seg[x++].sz) | |
431 | continue; | 431 | continue; | |
432 | msgbuf_vaddr = uvm_km_alloc(kernel_map, sz, 0, UVM_KMF_VAONLY); | 432 | msgbuf_vaddr = uvm_km_alloc(kernel_map, sz, 0, UVM_KMF_VAONLY); | |
433 | if (msgbuf_vaddr == 0) | 433 | if (msgbuf_vaddr == 0) | |
434 | panic("failed to valloc msgbuf_vaddr"); | 434 | panic("failed to valloc msgbuf_vaddr"); | |
435 | 435 | |||
436 | /* msgbuf_paddr was init'd in pmap */ | 436 | /* msgbuf_paddr was init'd in pmap */ | |
437 | for (y = 0, sz = 0; y < msgbuf_p_cnt; y++) { | 437 | for (y = 0, sz = 0; y < msgbuf_p_cnt; y++) { | |
438 | for (x = 0; x < btoc(msgbuf_p_seg[y].sz); x++, sz += PAGE_SIZE) | 438 | for (x = 0; x < btoc(msgbuf_p_seg[y].sz); x++, sz += PAGE_SIZE) | |
439 | pmap_kenter_pa((vaddr_t)msgbuf_vaddr + sz, | 439 | pmap_kenter_pa((vaddr_t)msgbuf_vaddr + sz, | |
440 | msgbuf_p_seg[y].paddr + x * PAGE_SIZE, | 440 | msgbuf_p_seg[y].paddr + x * PAGE_SIZE, | |
441 | VM_PROT_READ|VM_PROT_WRITE, 0); | 441 | VM_PROT_READ|VM_PROT_WRITE, 0); | |
442 | } | 442 | } | |
443 | pmap_update(pmap_kernel()); | 443 | pmap_update(pmap_kernel()); | |
444 | 444 | |||
445 | initmsgbuf((void *)msgbuf_vaddr, sz); | 445 | initmsgbuf((void *)msgbuf_vaddr, sz); | |
446 | 446 | |||
447 | #ifdef MULTIBOOT | 447 | #ifdef MULTIBOOT | |
448 | multiboot_print_info(); | 448 | multiboot_print_info(); | |
449 | #endif | 449 | #endif | |
450 | 450 | |||
451 | #ifdef TRAPLOG | 451 | #ifdef TRAPLOG | |
452 | /* | 452 | /* | |
453 | * Enable recording of branch from/to in MSR's | 453 | * Enable recording of branch from/to in MSR's | |
454 | */ | 454 | */ | |
455 | wrmsr(MSR_DEBUGCTLMSR, 0x1); | 455 | wrmsr(MSR_DEBUGCTLMSR, 0x1); | |
456 | #endif | 456 | #endif | |
457 | 457 | |||
458 | #if NCARDBUS > 0 | 458 | #if NCARDBUS > 0 | |
459 | /* Tell RBUS how much RAM we have, so it can use heuristics. */ | 459 | /* Tell RBUS how much RAM we have, so it can use heuristics. */ | |
460 | rbus_min_start_hint(ctob((psize_t)physmem)); | 460 | rbus_min_start_hint(ctob((psize_t)physmem)); | |
461 | #endif | 461 | #endif | |
462 | 462 | |||
463 | minaddr = 0; | 463 | minaddr = 0; | |
464 | 464 | |||
465 | /* | 465 | /* | |
466 | * Allocate a submap for physio | 466 | * Allocate a submap for physio | |
467 | */ | 467 | */ | |
468 | phys_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr, | 468 | phys_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr, | |
469 | VM_PHYS_SIZE, 0, false, NULL); | 469 | VM_PHYS_SIZE, 0, false, NULL); | |
470 | 470 | |||
471 | /* Say hello. */ | 471 | /* Say hello. */ | |
472 | banner(); | 472 | banner(); | |
473 | 473 | |||
474 | /* Safe for i/o port / memory space allocation to use malloc now. */ | 474 | /* Safe for i/o port / memory space allocation to use malloc now. */ | |
475 | #if NISA > 0 || NPCI > 0 | 475 | #if NISA > 0 || NPCI > 0 | |
476 | x86_bus_space_mallocok(); | 476 | x86_bus_space_mallocok(); | |
477 | #endif | 477 | #endif | |
478 | 478 | |||
479 | gdt_init(); | 479 | gdt_init(); | |
480 | i386_proc0_tss_ldt_init(); | 480 | i386_proc0_tss_ldt_init(); | |
481 | 481 | |||
482 | #ifndef XEN | 482 | #ifndef XEN | |
483 | cpu_init_tss(&cpu_info_primary); | 483 | cpu_init_tss(&cpu_info_primary); | |
484 | ltr(cpu_info_primary.ci_tss_sel); | 484 | ltr(cpu_info_primary.ci_tss_sel); | |
485 | #endif | 485 | #endif | |
486 | 486 | |||
487 | x86_startup(); | 487 | x86_startup(); | |
488 | } | 488 | } | |
489 | 489 | |||
490 | /* | 490 | /* | |
491 | * Set up proc0's TSS and LDT. | 491 | * Set up proc0's TSS and LDT. | |
492 | */ | 492 | */ | |
493 | void | 493 | void | |
494 | i386_proc0_tss_ldt_init(void) | 494 | i386_proc0_tss_ldt_init(void) | |
495 | { | 495 | { | |
496 | struct lwp *l; | 496 | struct lwp *l; | |
497 | struct pcb *pcb __diagused; | 497 | struct pcb *pcb __diagused; | |
498 | 498 | |||
499 | l = &lwp0; | 499 | l = &lwp0; | |
500 | pcb = lwp_getpcb(l); | 500 | pcb = lwp_getpcb(l); | |
501 | 501 | |||
502 | pmap_kernel()->pm_ldt_sel = GSEL(GLDT_SEL, SEL_KPL); | 502 | pmap_kernel()->pm_ldt_sel = GSEL(GLDT_SEL, SEL_KPL); | |
503 | pcb->pcb_cr0 = rcr0() & ~CR0_TS; | 503 | pcb->pcb_cr0 = rcr0() & ~CR0_TS; | |
504 | pcb->pcb_esp0 = uvm_lwp_getuarea(l) + USPACE - 16; | 504 | pcb->pcb_esp0 = uvm_lwp_getuarea(l) + USPACE - 16; | |
505 | pcb->pcb_iopl = SEL_KPL; | 505 | pcb->pcb_iopl = SEL_KPL; | |
506 | l->l_md.md_regs = (struct trapframe *)pcb->pcb_esp0 - 1; | 506 | l->l_md.md_regs = (struct trapframe *)pcb->pcb_esp0 - 1; | |
507 | memcpy(&pcb->pcb_fsd, &gdt[GUDATA_SEL], sizeof(pcb->pcb_fsd)); | 507 | memcpy(&pcb->pcb_fsd, &gdt[GUDATA_SEL], sizeof(pcb->pcb_fsd)); | |
508 | memcpy(&pcb->pcb_gsd, &gdt[GUDATA_SEL], sizeof(pcb->pcb_gsd)); | 508 | memcpy(&pcb->pcb_gsd, &gdt[GUDATA_SEL], sizeof(pcb->pcb_gsd)); | |
509 | 509 | |||
510 | #ifndef XEN | 510 | #ifndef XEN | |
511 | lldt(pmap_kernel()->pm_ldt_sel); | 511 | lldt(pmap_kernel()->pm_ldt_sel); | |
512 | #else | 512 | #else | |
513 | HYPERVISOR_fpu_taskswitch(1); | 513 | HYPERVISOR_fpu_taskswitch(1); | |
514 | XENPRINTF(("lwp tss sp %p ss %04x/%04x\n", | 514 | XENPRINTF(("lwp tss sp %p ss %04x/%04x\n", | |
515 | (void *)pcb->pcb_esp0, | 515 | (void *)pcb->pcb_esp0, | |
516 | GSEL(GDATA_SEL, SEL_KPL), | 516 | GSEL(GDATA_SEL, SEL_KPL), | |
517 | IDXSEL(GSEL(GDATA_SEL, SEL_KPL)))); | 517 | IDXSEL(GSEL(GDATA_SEL, SEL_KPL)))); | |
518 | HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL), pcb->pcb_esp0); | 518 | HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL), pcb->pcb_esp0); | |
519 | #endif | 519 | #endif | |
520 | } | 520 | } | |
521 | 521 | |||
522 | #ifdef XEN | 522 | #ifdef XEN | |
523 | /* used in assembly */ | 523 | /* used in assembly */ | |
524 | void i386_switch_context(lwp_t *); | 524 | void i386_switch_context(lwp_t *); | |
525 | void i386_tls_switch(lwp_t *); | 525 | void i386_tls_switch(lwp_t *); | |
526 | 526 | |||
527 | /* | 527 | /* | |
528 | * Switch context: | 528 | * Switch context: | |
529 | * - switch stack pointer for user->kernel transition | 529 | * - switch stack pointer for user->kernel transition | |
530 | */ | 530 | */ | |
531 | void | 531 | void | |
532 | i386_switch_context(lwp_t *l) | 532 | i386_switch_context(lwp_t *l) | |
533 | { | 533 | { | |
534 | struct pcb *pcb; | 534 | struct pcb *pcb; | |
535 | struct physdev_op physop; | 535 | struct physdev_op physop; | |
536 | 536 | |||
537 | pcb = lwp_getpcb(l); | 537 | pcb = lwp_getpcb(l); | |
538 | 538 | |||
539 | HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL), pcb->pcb_esp0); | 539 | HYPERVISOR_stack_switch(GSEL(GDATA_SEL, SEL_KPL), pcb->pcb_esp0); | |
540 | 540 | |||
541 | physop.cmd = PHYSDEVOP_SET_IOPL; | 541 | physop.cmd = PHYSDEVOP_SET_IOPL; | |
542 | physop.u.set_iopl.iopl = pcb->pcb_iopl; | 542 | physop.u.set_iopl.iopl = pcb->pcb_iopl; | |
543 | HYPERVISOR_physdev_op(&physop); | 543 | HYPERVISOR_physdev_op(&physop); | |
544 | } | 544 | } | |
545 | 545 | |||
546 | void | 546 | void | |
547 | i386_tls_switch(lwp_t *l) | 547 | i386_tls_switch(lwp_t *l) | |
548 | { | 548 | { | |
549 | struct cpu_info *ci = curcpu(); | 549 | struct cpu_info *ci = curcpu(); | |
550 | struct pcb *pcb = lwp_getpcb(l); | 550 | struct pcb *pcb = lwp_getpcb(l); | |
551 | /* | 551 | /* | |
552 | * Raise the IPL to IPL_HIGH. | 552 | * Raise the IPL to IPL_HIGH. | |
553 | * FPU IPIs can alter the LWP's saved cr0. Dropping the priority | 553 | * FPU IPIs can alter the LWP's saved cr0. Dropping the priority | |
554 | * is deferred until mi_switch(), when cpu_switchto() returns. | 554 | * is deferred until mi_switch(), when cpu_switchto() returns. | |
555 | */ | 555 | */ | |
556 | (void)splhigh(); | 556 | (void)splhigh(); | |
557 | 557 | |||
558 | /* | 558 | /* | |
559 | * If our floating point registers are on a different CPU, | 559 | * If our floating point registers are on a different CPU, | |
560 | * set CR0_TS so we'll trap rather than reuse bogus state. | 560 | * set CR0_TS so we'll trap rather than reuse bogus state. | |
561 | */ | 561 | */ | |
562 | 562 | |||
563 | if (l != ci->ci_fpcurlwp) { | 563 | if (l != ci->ci_fpcurlwp) { | |
564 | HYPERVISOR_fpu_taskswitch(1); | 564 | HYPERVISOR_fpu_taskswitch(1); | |
565 | } | 565 | } | |
566 | 566 | |||
567 | /* Update TLS segment pointers */ | 567 | /* Update TLS segment pointers */ | |
568 | update_descriptor(&ci->ci_gdt[GUFS_SEL], | 568 | update_descriptor(&ci->ci_gdt[GUFS_SEL], | |
569 | (union descriptor *) &pcb->pcb_fsd); | 569 | (union descriptor *) &pcb->pcb_fsd); | |
570 | update_descriptor(&ci->ci_gdt[GUGS_SEL], | 570 | update_descriptor(&ci->ci_gdt[GUGS_SEL], | |
571 | (union descriptor *) &pcb->pcb_gsd); | 571 | (union descriptor *) &pcb->pcb_gsd); | |
572 | 572 | |||
573 | } | 573 | } | |
574 | #endif /* XEN */ | 574 | #endif /* XEN */ | |
575 | 575 | |||
576 | #ifndef XEN | 576 | #ifndef XEN | |
577 | /* | 577 | /* | |
578 | * Set up TSS and I/O bitmap. | 578 | * Set up TSS and I/O bitmap. | |
579 | */ | 579 | */ | |
580 | void | 580 | void | |
581 | cpu_init_tss(struct cpu_info *ci) | 581 | cpu_init_tss(struct cpu_info *ci) | |
582 | { | 582 | { | |
583 | struct i386tss *tss = &ci->ci_tss; | 583 | struct i386tss *tss = &ci->ci_tss; | |
584 | 584 | |||
585 | tss->tss_iobase = IOMAP_INVALOFF << 16; | 585 | tss->tss_iobase = IOMAP_INVALOFF << 16; | |
586 | tss->tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); | 586 | tss->tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); | |
587 | tss->tss_ldt = GSEL(GLDT_SEL, SEL_KPL); | 587 | tss->tss_ldt = GSEL(GLDT_SEL, SEL_KPL); | |
588 | tss->tss_cr3 = rcr3(); | 588 | tss->tss_cr3 = rcr3(); | |
589 | ci->ci_tss_sel = tss_alloc(tss); | 589 | ci->ci_tss_sel = tss_alloc(tss); | |
590 | } | 590 | } | |
591 | #endif /* XEN */ | 591 | #endif /* XEN */ | |
592 | 592 | |||
593 | void * | 593 | void * | |
594 | getframe(struct lwp *l, int sig, int *onstack) | 594 | getframe(struct lwp *l, int sig, int *onstack) | |
595 | { | 595 | { | |
596 | struct proc *p = l->l_proc; | 596 | struct proc *p = l->l_proc; | |
597 | struct trapframe *tf = l->l_md.md_regs; | 597 | struct trapframe *tf = l->l_md.md_regs; | |
598 | 598 | |||
599 | /* Do we need to jump onto the signal stack? */ | 599 | /* Do we need to jump onto the signal stack? */ | |
600 | *onstack = (l->l_sigstk.ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 | 600 | *onstack = (l->l_sigstk.ss_flags & (SS_DISABLE | SS_ONSTACK)) == 0 | |
601 | && (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0; | 601 | && (SIGACTION(p, sig).sa_flags & SA_ONSTACK) != 0; | |
602 | if (*onstack) | 602 | if (*onstack) | |
603 | return (char *)l->l_sigstk.ss_sp + l->l_sigstk.ss_size; | 603 | return (char *)l->l_sigstk.ss_sp + l->l_sigstk.ss_size; | |
604 | #ifdef VM86 | 604 | #ifdef VM86 | |
605 | if (tf->tf_eflags & PSL_VM) | 605 | if (tf->tf_eflags & PSL_VM) | |
606 | return (void *)(tf->tf_esp + (tf->tf_ss << 4)); | 606 | return (void *)(tf->tf_esp + (tf->tf_ss << 4)); | |
607 | else | 607 | else | |
608 | #endif | 608 | #endif | |
609 | return (void *)tf->tf_esp; | 609 | return (void *)tf->tf_esp; | |
610 | } | 610 | } | |
611 | 611 | |||
612 | /* | 612 | /* | |
613 | * Build context to run handler in. We invoke the handler | 613 | * Build context to run handler in. We invoke the handler | |
614 | * directly, only returning via the trampoline. Note the | 614 | * directly, only returning via the trampoline. Note the | |
615 | * trampoline version numbers are coordinated with machine- | 615 | * trampoline version numbers are coordinated with machine- | |
616 | * dependent code in libc. | 616 | * dependent code in libc. | |
617 | */ | 617 | */ | |
618 | void | 618 | void | |
619 | buildcontext(struct lwp *l, int sel, void *catcher, void *fp) | 619 | buildcontext(struct lwp *l, int sel, void *catcher, void *fp) | |
620 | { | 620 | { | |
621 | struct trapframe *tf = l->l_md.md_regs; | 621 | struct trapframe *tf = l->l_md.md_regs; | |
622 | 622 | |||
623 | tf->tf_gs = GSEL(GUGS_SEL, SEL_UPL); | 623 | tf->tf_gs = GSEL(GUGS_SEL, SEL_UPL); | |
624 | tf->tf_fs = GSEL(GUFS_SEL, SEL_UPL); | 624 | tf->tf_fs = GSEL(GUFS_SEL, SEL_UPL); | |
625 | tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL); | 625 | tf->tf_es = GSEL(GUDATA_SEL, SEL_UPL); | |
626 | tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL); | 626 | tf->tf_ds = GSEL(GUDATA_SEL, SEL_UPL); | |
627 | tf->tf_eip = (int)catcher; | 627 | tf->tf_eip = (int)catcher; | |
628 | tf->tf_cs = GSEL(sel, SEL_UPL); | 628 | tf->tf_cs = GSEL(sel, SEL_UPL); | |
629 | tf->tf_eflags &= ~PSL_CLEARSIG; | 629 | tf->tf_eflags &= ~PSL_CLEARSIG; | |
630 | tf->tf_esp = (int)fp; | 630 | tf->tf_esp = (int)fp; | |
631 | tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL); | 631 | tf->tf_ss = GSEL(GUDATA_SEL, SEL_UPL); | |
632 | 632 | |||
633 | /* Ensure FP state is reset. */ | 633 | /* Ensure FP state is reset. */ | |
634 | fpu_save_area_reset(l); | 634 | fpu_save_area_reset(l); | |
635 | } | 635 | } | |
636 | 636 | |||
637 | void | 637 | void | |
638 | sendsig_siginfo(const ksiginfo_t *ksi, const sigset_t *mask) | 638 | sendsig_siginfo(const ksiginfo_t *ksi, const sigset_t *mask) | |
639 | { | 639 | { | |
640 | struct lwp *l = curlwp; | 640 | struct lwp *l = curlwp; | |
641 | struct proc *p = l->l_proc; | 641 | struct proc *p = l->l_proc; | |
642 | struct pmap *pmap = vm_map_pmap(&p->p_vmspace->vm_map); | 642 | struct pmap *pmap = vm_map_pmap(&p->p_vmspace->vm_map); | |
643 | int sel = pmap->pm_hiexec > I386_MAX_EXE_ADDR ? | 643 | int sel = pmap->pm_hiexec > I386_MAX_EXE_ADDR ? | |
644 | GUCODEBIG_SEL : GUCODE_SEL; | 644 | GUCODEBIG_SEL : GUCODE_SEL; | |
645 | struct sigacts *ps = p->p_sigacts; | 645 | struct sigacts *ps = p->p_sigacts; | |
646 | int onstack, error; | 646 | int onstack, error; | |
647 | int sig = ksi->ksi_signo; | 647 | int sig = ksi->ksi_signo; | |
648 | struct sigframe_siginfo *fp = getframe(l, sig, &onstack), frame; | 648 | struct sigframe_siginfo *fp = getframe(l, sig, &onstack), frame; | |
649 | sig_t catcher = SIGACTION(p, sig).sa_handler; | 649 | sig_t catcher = SIGACTION(p, sig).sa_handler; | |
650 | struct trapframe *tf = l->l_md.md_regs; | 650 | struct trapframe *tf = l->l_md.md_regs; | |
651 | 651 | |||
652 | KASSERT(mutex_owned(p->p_lock)); | 652 | KASSERT(mutex_owned(p->p_lock)); | |
653 | 653 | |||
654 | fp--; | 654 | fp--; | |
655 | 655 | |||
656 | frame.sf_ra = (int)ps->sa_sigdesc[sig].sd_tramp; | 656 | frame.sf_ra = (int)ps->sa_sigdesc[sig].sd_tramp; | |
657 | frame.sf_signum = sig; | 657 | frame.sf_signum = sig; | |
658 | frame.sf_sip = &fp->sf_si; | 658 | frame.sf_sip = &fp->sf_si; | |
659 | frame.sf_ucp = &fp->sf_uc; | 659 | frame.sf_ucp = &fp->sf_uc; | |
660 | frame.sf_si._info = ksi->ksi_info; | 660 | frame.sf_si._info = ksi->ksi_info; | |
661 | frame.sf_uc.uc_flags = _UC_SIGMASK|_UC_VM; | 661 | frame.sf_uc.uc_flags = _UC_SIGMASK|_UC_VM; | |
662 | frame.sf_uc.uc_sigmask = *mask; | 662 | frame.sf_uc.uc_sigmask = *mask; | |
663 | frame.sf_uc.uc_link = l->l_ctxlink; | 663 | frame.sf_uc.uc_link = l->l_ctxlink; | |
664 | frame.sf_uc.uc_flags |= (l->l_sigstk.ss_flags & SS_ONSTACK) | 664 | frame.sf_uc.uc_flags |= (l->l_sigstk.ss_flags & SS_ONSTACK) | |
665 | ? _UC_SETSTACK : _UC_CLRSTACK; | 665 | ? _UC_SETSTACK : _UC_CLRSTACK; | |
666 | memset(&frame.sf_uc.uc_stack, 0, sizeof(frame.sf_uc.uc_stack)); | 666 | memset(&frame.sf_uc.uc_stack, 0, sizeof(frame.sf_uc.uc_stack)); | |
667 | 667 | |||
668 | if (tf->tf_eflags & PSL_VM) | 668 | if (tf->tf_eflags & PSL_VM) | |
669 | (*p->p_emul->e_syscall_intern)(p); | 669 | (*p->p_emul->e_syscall_intern)(p); | |
670 | sendsig_reset(l, sig); | 670 | sendsig_reset(l, sig); | |
671 | 671 | |||
672 | mutex_exit(p->p_lock); | 672 | mutex_exit(p->p_lock); | |
673 | cpu_getmcontext(l, &frame.sf_uc.uc_mcontext, &frame.sf_uc.uc_flags); | 673 | cpu_getmcontext(l, &frame.sf_uc.uc_mcontext, &frame.sf_uc.uc_flags); | |
674 | error = copyout(&frame, fp, sizeof(frame)); | 674 | error = copyout(&frame, fp, sizeof(frame)); | |
675 | mutex_enter(p->p_lock); | 675 | mutex_enter(p->p_lock); | |
676 | 676 | |||
677 | if (error != 0) { | 677 | if (error != 0) { | |
678 | /* | 678 | /* | |
679 | * Process has trashed its stack; give it an illegal | 679 | * Process has trashed its stack; give it an illegal | |
680 | * instruction to halt it in its tracks. | 680 | * instruction to halt it in its tracks. | |
681 | */ | 681 | */ | |
682 | sigexit(l, SIGILL); | 682 | sigexit(l, SIGILL); | |
683 | /* NOTREACHED */ | 683 | /* NOTREACHED */ | |
684 | } | 684 | } | |
685 | 685 | |||
686 | buildcontext(l, sel, catcher, fp); | 686 | buildcontext(l, sel, catcher, fp); | |
687 | 687 | |||
688 | /* Remember that we're now on the signal stack. */ | 688 | /* Remember that we're now on the signal stack. */ | |
689 | if (onstack) | 689 | if (onstack) | |
690 | l->l_sigstk.ss_flags |= SS_ONSTACK; | 690 | l->l_sigstk.ss_flags |= SS_ONSTACK; | |
691 | } | 691 | } | |
692 | 692 | |||
693 | static void | 693 | static void | |
694 | maybe_dump(int howto) | 694 | maybe_dump(int howto) | |
695 | { | 695 | { | |
696 | int s; | 696 | int s; | |
697 | 697 | |||
698 | /* Disable interrupts. */ | 698 | /* Disable interrupts. */ | |
699 | s = splhigh(); | 699 | s = splhigh(); | |
700 | 700 | |||
701 | /* Do a dump if requested. */ | 701 | /* Do a dump if requested. */ | |
702 | if ((howto & (RB_DUMP | RB_HALT)) == RB_DUMP) | 702 | if ((howto & (RB_DUMP | RB_HALT)) == RB_DUMP) | |
703 | dumpsys(); | 703 | dumpsys(); | |
704 | 704 | |||
705 | splx(s); | 705 | splx(s); | |
706 | } | 706 | } | |
707 | 707 | |||
708 | void | 708 | void | |
709 | cpu_reboot(int howto, char *bootstr) | 709 | cpu_reboot(int howto, char *bootstr) | |
710 | { | 710 | { | |
711 | static bool syncdone = false; | 711 | static bool syncdone = false; | |
712 | int s = IPL_NONE; | 712 | int s = IPL_NONE; | |
713 | 713 | |||
714 | if (cold) { | 714 | if (cold) { | |
715 | howto |= RB_HALT; | 715 | howto |= RB_HALT; | |
716 | goto haltsys; | 716 | goto haltsys; | |
717 | } | 717 | } | |
718 | 718 | |||
719 | boothowto = howto; | 719 | boothowto = howto; | |
720 | 720 | |||
721 | /* XXX used to dump after vfs_shutdown() and before | 721 | /* XXX used to dump after vfs_shutdown() and before | |
722 | * detaching devices / shutdown hooks / pmf_system_shutdown(). | 722 | * detaching devices / shutdown hooks / pmf_system_shutdown(). | |
723 | */ | 723 | */ | |
724 | maybe_dump(howto); | 724 | maybe_dump(howto); | |
725 | 725 | |||
726 | /* | 726 | /* | |
727 | * If we've panic'd, don't make the situation potentially | 727 | * If we've panic'd, don't make the situation potentially | |
728 | * worse by syncing or unmounting the file systems. | 728 | * worse by syncing or unmounting the file systems. | |
729 | */ | 729 | */ | |
730 | if ((howto & RB_NOSYNC) == 0 && panicstr == NULL) { | 730 | if ((howto & RB_NOSYNC) == 0 && panicstr == NULL) { | |
731 | if (!syncdone) { | 731 | if (!syncdone) { | |
732 | syncdone = true; | 732 | syncdone = true; | |
733 | /* XXX used to force unmount as well, here */ | 733 | /* XXX used to force unmount as well, here */ | |
734 | vfs_sync_all(curlwp); | 734 | vfs_sync_all(curlwp); | |
735 | /* | 735 | /* | |
736 | * If we've been adjusting the clock, the todr | 736 | * If we've been adjusting the clock, the todr | |
737 | * will be out of synch; adjust it now. | 737 | * will be out of synch; adjust it now. | |
738 | * | 738 | * | |
739 | * XXX used to do this after unmounting all | 739 | * XXX used to do this after unmounting all | |
740 | * filesystems with vfs_shutdown(). | 740 | * filesystems with vfs_shutdown(). | |
741 | */ | 741 | */ | |
742 | if (time_adjusted != 0) | 742 | if (time_adjusted != 0) | |
743 | resettodr(); | 743 | resettodr(); | |
744 | } | 744 | } | |
745 | 745 | |||
746 | while (vfs_unmountall1(curlwp, false, false) || | 746 | while (vfs_unmountall1(curlwp, false, false) || | |
747 | config_detach_all(boothowto) || | 747 | config_detach_all(boothowto) || | |
748 | vfs_unmount_forceone(curlwp)) | 748 | vfs_unmount_forceone(curlwp)) | |
749 | ; /* do nothing */ | 749 | ; /* do nothing */ | |
750 | } else | 750 | } else | |
751 | suspendsched(); | 751 | suspendsched(); | |
752 | 752 | |||
753 | pmf_system_shutdown(boothowto); | 753 | pmf_system_shutdown(boothowto); | |
754 | 754 | |||
755 | s = splhigh(); | 755 | s = splhigh(); | |
756 | 756 | |||
757 | /* amd64 maybe_dump() */ | 757 | /* amd64 maybe_dump() */ | |
758 | 758 | |||
759 | haltsys: | 759 | haltsys: | |
760 | doshutdownhooks(); | 760 | doshutdownhooks(); | |
761 | 761 | |||
762 | if ((howto & RB_POWERDOWN) == RB_POWERDOWN) { | 762 | if ((howto & RB_POWERDOWN) == RB_POWERDOWN) { | |
763 | #if NACPICA > 0 | 763 | #if NACPICA > 0 | |
764 | if (s != IPL_NONE) | 764 | if (s != IPL_NONE) | |
765 | splx(s); | 765 | splx(s); | |
766 | 766 | |||
767 | acpi_enter_sleep_state(ACPI_STATE_S5); | 767 | acpi_enter_sleep_state(ACPI_STATE_S5); | |
768 | #else | 768 | #else | |
769 | __USE(s); | 769 | __USE(s); | |
770 | #endif | 770 | #endif | |
771 | #ifdef XEN | 771 | #ifdef XEN | |
772 | HYPERVISOR_shutdown(); | 772 | HYPERVISOR_shutdown(); | |
773 | for (;;); | 773 | for (;;); | |
774 | #endif | 774 | #endif | |
775 | } | 775 | } | |
776 | 776 | |||
777 | #ifdef MULTIPROCESSOR | 777 | #ifdef MULTIPROCESSOR | |
778 | cpu_broadcast_halt(); | 778 | cpu_broadcast_halt(); | |
779 | #endif /* MULTIPROCESSOR */ | 779 | #endif /* MULTIPROCESSOR */ | |
780 | 780 | |||
781 | if (howto & RB_HALT) { | 781 | if (howto & RB_HALT) { | |
782 | #if NACPICA > 0 | 782 | #if NACPICA > 0 | |
783 | acpi_disable(); | 783 | acpi_disable(); | |
784 | #endif | 784 | #endif | |
785 | 785 | |||
786 | printf("\n"); | 786 | printf("\n"); | |
787 | printf("The operating system has halted.\n"); | 787 | printf("The operating system has halted.\n"); | |
788 | printf("Please press any key to reboot.\n\n"); | 788 | printf("Please press any key to reboot.\n\n"); | |
789 | 789 | |||
790 | #ifdef BEEP_ONHALT | 790 | #ifdef BEEP_ONHALT | |
791 | { | 791 | { | |
792 | int c; | 792 | int c; | |
793 | for (c = BEEP_ONHALT_COUNT; c > 0; c--) { | 793 | for (c = BEEP_ONHALT_COUNT; c > 0; c--) { | |
794 | sysbeep(BEEP_ONHALT_PITCH, | 794 | sysbeep(BEEP_ONHALT_PITCH, | |
795 | BEEP_ONHALT_PERIOD * hz / 1000); | 795 | BEEP_ONHALT_PERIOD * hz / 1000); | |
796 | delay(BEEP_ONHALT_PERIOD * 1000); | 796 | delay(BEEP_ONHALT_PERIOD * 1000); | |
797 | sysbeep(0, BEEP_ONHALT_PERIOD * hz / 1000); | 797 | sysbeep(0, BEEP_ONHALT_PERIOD * hz / 1000); | |
798 | delay(BEEP_ONHALT_PERIOD * 1000); | 798 | delay(BEEP_ONHALT_PERIOD * 1000); | |
799 | } | 799 | } | |
800 | } | 800 | } | |
801 | #endif | 801 | #endif | |
802 | 802 | |||
803 | cnpollc(1); /* for proper keyboard command handling */ | 803 | cnpollc(1); /* for proper keyboard command handling */ | |
804 | if (cngetc() == 0) { | 804 | if (cngetc() == 0) { | |
805 | /* no console attached, so just hlt */ | 805 | /* no console attached, so just hlt */ | |
806 | printf("No keyboard - cannot reboot after all.\n"); | 806 | printf("No keyboard - cannot reboot after all.\n"); | |
807 | for(;;) { | 807 | for(;;) { | |
808 | x86_hlt(); | 808 | x86_hlt(); | |
809 | } | 809 | } | |
810 | } | 810 | } | |
811 | cnpollc(0); | 811 | cnpollc(0); | |
812 | } | 812 | } | |
813 | 813 | |||
814 | printf("rebooting...\n"); | 814 | printf("rebooting...\n"); | |
815 | if (cpureset_delay > 0) | 815 | if (cpureset_delay > 0) | |
816 | delay(cpureset_delay * 1000); | 816 | delay(cpureset_delay * 1000); | |
817 | cpu_reset(); | 817 | cpu_reset(); | |
818 | for(;;) ; | 818 | for(;;) ; | |
819 | /*NOTREACHED*/ | 819 | /*NOTREACHED*/ | |
820 | } | 820 | } | |
821 | 821 | |||
822 | /* | 822 | /* | |
823 | * Clear registers on exec | 823 | * Clear registers on exec | |
824 | */ | 824 | */ | |
825 | void | 825 | void | |
826 | setregs(struct lwp *l, struct exec_package *pack, vaddr_t stack) | 826 | setregs(struct lwp *l, struct exec_package *pack, vaddr_t stack) | |
827 | { | 827 | { | |
828 | struct pmap *pmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map); | 828 | struct pmap *pmap = vm_map_pmap(&l->l_proc->p_vmspace->vm_map); | |
829 | struct pcb *pcb = lwp_getpcb(l); | 829 | struct pcb *pcb = lwp_getpcb(l); | |
830 | struct trapframe *tf; | 830 | struct trapframe *tf; | |
831 | 831 | |||
832 | #ifdef USER_LDT | 832 | #ifdef USER_LDT | |
833 | pmap_ldt_cleanup(l); | 833 | pmap_ldt_cleanup(l); | |
834 | #endif | 834 | #endif | |
835 | 835 | |||
836 | fpu_save_area_clear(l, pack->ep_osversion >= 699002600 | 836 | fpu_save_area_clear(l, pack->ep_osversion >= 699002600 | |
837 | ? __INITIAL_NPXCW__ : __NetBSD_COMPAT_NPXCW__); | 837 | ? __INITIAL_NPXCW__ : __NetBSD_COMPAT_NPXCW__); | |
838 | 838 | |||
839 | memcpy(&pcb->pcb_fsd, &gdt[GUDATA_SEL], sizeof(pcb->pcb_fsd)); | 839 | memcpy(&pcb->pcb_fsd, &gdt[GUDATA_SEL], sizeof(pcb->pcb_fsd)); | |
840 | memcpy(&pcb->pcb_gsd, &gdt[GUDATA_SEL], sizeof(pcb->pcb_gsd)); | 840 | memcpy(&pcb->pcb_gsd, &gdt[GUDATA_SEL], sizeof(pcb->pcb_gsd)); | |
841 | 841 | |||
842 | tf = l->l_md.md_regs; | 842 | tf = l->l_md.md_regs; | |
843 | tf->tf_gs = GSEL(GUGS_SEL, SEL_UPL); | 843 | tf->tf_gs = GSEL(GUGS_SEL, SEL_UPL); | |
844 | tf->tf_fs = GSEL(GUFS_SEL, SEL_UPL); | 844 | tf->tf_fs = GSEL(GUFS_SEL, SEL_UPL); | |
845 | tf->tf_es = LSEL(LUDATA_SEL, SEL_UPL); | 845 | tf->tf_es = LSEL(LUDATA_SEL, SEL_UPL); | |
846 | tf->tf_ds = LSEL(LUDATA_SEL, SEL_UPL); | 846 | tf->tf_ds = LSEL(LUDATA_SEL, SEL_UPL); | |
847 | tf->tf_edi = 0; | 847 | tf->tf_edi = 0; | |
848 | tf->tf_esi = 0; | 848 | tf->tf_esi = 0; | |
849 | tf->tf_ebp = 0; | 849 | tf->tf_ebp = 0; | |
850 | tf->tf_ebx = l->l_proc->p_psstrp; | 850 | tf->tf_ebx = l->l_proc->p_psstrp; | |
851 | tf->tf_edx = 0; | 851 | tf->tf_edx = 0; | |
852 | tf->tf_ecx = 0; | 852 | tf->tf_ecx = 0; | |
853 | tf->tf_eax = 0; | 853 | tf->tf_eax = 0; | |
854 | tf->tf_eip = pack->ep_entry; | 854 | tf->tf_eip = pack->ep_entry; | |
855 | tf->tf_cs = pmap->pm_hiexec > I386_MAX_EXE_ADDR ? | 855 | tf->tf_cs = pmap->pm_hiexec > I386_MAX_EXE_ADDR ? | |
856 | LSEL(LUCODEBIG_SEL, SEL_UPL) : LSEL(LUCODE_SEL, SEL_UPL); | 856 | LSEL(LUCODEBIG_SEL, SEL_UPL) : LSEL(LUCODE_SEL, SEL_UPL); | |
857 | tf->tf_eflags = PSL_USERSET; | 857 | tf->tf_eflags = PSL_USERSET; | |
858 | tf->tf_esp = stack; | 858 | tf->tf_esp = stack; | |
859 | tf->tf_ss = LSEL(LUDATA_SEL, SEL_UPL); | 859 | tf->tf_ss = LSEL(LUDATA_SEL, SEL_UPL); | |
860 | } | 860 | } | |
861 | 861 | |||
862 | /* | 862 | /* | |
863 | * Initialize segments and descriptor tables | 863 | * Initialize segments and descriptor tables | |
864 | */ | 864 | */ | |
865 | 865 | |||
866 | union descriptor *gdt, *ldt; | 866 | union descriptor *gdt, *ldt; | |
867 | union descriptor *pentium_idt; | 867 | union descriptor *pentium_idt; | |
868 | extern vaddr_t lwp0uarea; | 868 | extern vaddr_t lwp0uarea; | |
869 | 869 | |||
870 | void | 870 | void | |
871 | setgate(struct gate_descriptor *gd, void *func, int args, int type, int dpl, | 871 | setgate(struct gate_descriptor *gd, void *func, int args, int type, int dpl, | |
872 | int sel) | 872 | int sel) | |
873 | { | 873 | { | |
874 | 874 | |||
875 | gd->gd_looffset = (int)func; | 875 | gd->gd_looffset = (int)func; | |
876 | gd->gd_selector = sel; | 876 | gd->gd_selector = sel; | |
877 | gd->gd_stkcpy = args; | 877 | gd->gd_stkcpy = args; | |
878 | gd->gd_xx = 0; | 878 | gd->gd_xx = 0; | |
879 | gd->gd_type = type; | 879 | gd->gd_type = type; | |
880 | gd->gd_dpl = dpl; | 880 | gd->gd_dpl = dpl; | |
881 | gd->gd_p = 1; | 881 | gd->gd_p = 1; | |
882 | gd->gd_hioffset = (int)func >> 16; | 882 | gd->gd_hioffset = (int)func >> 16; | |
883 | } | 883 | } | |
884 | 884 | |||
885 | void | 885 | void | |
886 | unsetgate(struct gate_descriptor *gd) | 886 | unsetgate(struct gate_descriptor *gd) | |
887 | { | 887 | { | |
888 | gd->gd_p = 0; | 888 | gd->gd_p = 0; | |
889 | gd->gd_hioffset = 0; | 889 | gd->gd_hioffset = 0; | |
890 | gd->gd_looffset = 0; | 890 | gd->gd_looffset = 0; | |
891 | gd->gd_selector = 0; | 891 | gd->gd_selector = 0; | |
892 | gd->gd_xx = 0; | 892 | gd->gd_xx = 0; | |
893 | gd->gd_stkcpy = 0; | 893 | gd->gd_stkcpy = 0; | |
894 | gd->gd_type = 0; | 894 | gd->gd_type = 0; | |
895 | gd->gd_dpl = 0; | 895 | gd->gd_dpl = 0; | |
896 | } | 896 | } | |
897 | 897 | |||
898 | 898 | |||
899 | void | 899 | void | |
900 | setregion(struct region_descriptor *rd, void *base, size_t limit) | 900 | setregion(struct region_descriptor *rd, void *base, size_t limit) | |
901 | { | 901 | { | |
902 | 902 | |||
903 | rd->rd_limit = (int)limit; | 903 | rd->rd_limit = (int)limit; | |
904 | rd->rd_base = (int)base; | 904 | rd->rd_base = (int)base; | |
905 | } | 905 | } | |
906 | 906 | |||
907 | void | 907 | void | |
908 | setsegment(struct segment_descriptor *sd, const void *base, size_t limit, | 908 | setsegment(struct segment_descriptor *sd, const void *base, size_t limit, | |
909 | int type, int dpl, int def32, int gran) | 909 | int type, int dpl, int def32, int gran) | |
910 | { | 910 | { | |
911 | 911 | |||
912 | sd->sd_lolimit = (int)limit; | 912 | sd->sd_lolimit = (int)limit; | |
913 | sd->sd_lobase = (int)base; | 913 | sd->sd_lobase = (int)base; | |
914 | sd->sd_type = type; | 914 | sd->sd_type = type; | |
915 | sd->sd_dpl = dpl; | 915 | sd->sd_dpl = dpl; | |
916 | sd->sd_p = 1; | 916 | sd->sd_p = 1; | |
917 | sd->sd_hilimit = (int)limit >> 16; | 917 | sd->sd_hilimit = (int)limit >> 16; | |
918 | sd->sd_xx = 0; | 918 | sd->sd_xx = 0; | |
919 | sd->sd_def32 = def32; | 919 | sd->sd_def32 = def32; | |
920 | sd->sd_gran = gran; | 920 | sd->sd_gran = gran; | |
921 | sd->sd_hibase = (int)base >> 24; | 921 | sd->sd_hibase = (int)base >> 24; | |
922 | } | 922 | } | |
923 | 923 | |||
924 | #define IDTVEC(name) __CONCAT(X, name) | 924 | #define IDTVEC(name) __CONCAT(X, name) | |
925 | typedef void (vector)(void); | 925 | typedef void (vector)(void); | |
926 | extern vector IDTVEC(syscall); | 926 | extern vector IDTVEC(syscall); | |
927 | extern vector IDTVEC(osyscall); | 927 | extern vector IDTVEC(osyscall); | |
928 | extern vector *IDTVEC(exceptions)[]; | 928 | extern vector *IDTVEC(exceptions)[]; | |
929 | extern vector IDTVEC(svr4_fasttrap); | 929 | extern vector IDTVEC(svr4_fasttrap); | |
930 | void (*svr4_fasttrap_vec)(void) = (void (*)(void))nullop; | 930 | void (*svr4_fasttrap_vec)(void) = (void (*)(void))nullop; | |
931 | krwlock_t svr4_fasttrap_lock; | 931 | krwlock_t svr4_fasttrap_lock; | |
932 | #ifdef XEN | 932 | #ifdef XEN | |
933 | #define MAX_XEN_IDT 128 | 933 | #define MAX_XEN_IDT 128 | |
934 | trap_info_t xen_idt[MAX_XEN_IDT]; | 934 | trap_info_t xen_idt[MAX_XEN_IDT]; | |
935 | int xen_idt_idx; | 935 | int xen_idt_idx; | |
936 | extern union descriptor tmpgdt[]; | 936 | extern union descriptor tmpgdt[]; | |
937 | #endif | 937 | #endif | |
938 | 938 | |||
939 | void | 939 | void | |
940 | cpu_init_idt(void) | 940 | cpu_init_idt(void) | |
941 | { | 941 | { | |
942 | #ifndef XEN | 942 | #ifndef XEN | |
943 | struct region_descriptor region; | 943 | struct region_descriptor region; | |
944 | setregion(®ion, pentium_idt, NIDT * sizeof(idt[0]) - 1); | 944 | setregion(®ion, pentium_idt, NIDT * sizeof(idt[0]) - 1); | |
945 | lidt(®ion); | 945 | lidt(®ion); | |
946 | #else /* XEN */ | 946 | #else /* XEN */ | |
947 | XENPRINTF(("HYPERVISOR_set_trap_table %p\n", xen_idt)); | 947 | XENPRINTF(("HYPERVISOR_set_trap_table %p\n", xen_idt)); | |
948 | if (HYPERVISOR_set_trap_table(xen_idt)) | 948 | if (HYPERVISOR_set_trap_table(xen_idt)) | |
949 | panic("HYPERVISOR_set_trap_table %p failed\n", xen_idt); | 949 | panic("HYPERVISOR_set_trap_table %p failed\n", xen_idt); | |
950 | #endif /* !XEN */ | 950 | #endif /* !XEN */ | |
951 | } | 951 | } | |
952 | 952 | |||
953 | void | 953 | void | |
954 | initgdt(union descriptor *tgdt) | 954 | initgdt(union descriptor *tgdt) | |
955 | { | 955 | { | |
956 | KASSERT(tgdt != NULL); | 956 | KASSERT(tgdt != NULL); | |
957 | 957 | |||
958 | gdt = tgdt; | 958 | gdt = tgdt; | |
959 | #ifdef XEN | 959 | #ifdef XEN | |
960 | u_long frames[16]; | 960 | u_long frames[16]; | |
961 | #else | 961 | #else | |
962 | struct region_descriptor region; | 962 | struct region_descriptor region; | |
963 | memset(gdt, 0, NGDT*sizeof(*gdt)); | 963 | memset(gdt, 0, NGDT*sizeof(*gdt)); | |
964 | #endif /* XEN */ | 964 | #endif /* XEN */ | |
965 | /* make gdt gates and memory segments */ | 965 | /* make gdt gates and memory segments */ | |
966 | setsegment(&gdt[GCODE_SEL].sd, 0, 0xfffff, SDT_MEMERA, SEL_KPL, 1, 1); | 966 | setsegment(&gdt[GCODE_SEL].sd, 0, 0xfffff, SDT_MEMERA, SEL_KPL, 1, 1); | |
967 | setsegment(&gdt[GDATA_SEL].sd, 0, 0xfffff, SDT_MEMRWA, SEL_KPL, 1, 1); | 967 | setsegment(&gdt[GDATA_SEL].sd, 0, 0xfffff, SDT_MEMRWA, SEL_KPL, 1, 1); | |
968 | setsegment(&gdt[GUCODE_SEL].sd, 0, x86_btop(I386_MAX_EXE_ADDR) - 1, | 968 | setsegment(&gdt[GUCODE_SEL].sd, 0, x86_btop(I386_MAX_EXE_ADDR) - 1, | |
969 | SDT_MEMERA, SEL_UPL, 1, 1); | 969 | SDT_MEMERA, SEL_UPL, 1, 1); | |
970 | setsegment(&gdt[GUCODEBIG_SEL].sd, 0, 0xfffff, | 970 | setsegment(&gdt[GUCODEBIG_SEL].sd, 0, 0xfffff, | |
971 | SDT_MEMERA, SEL_UPL, 1, 1); | 971 | SDT_MEMERA, SEL_UPL, 1, 1); | |
972 | setsegment(&gdt[GUDATA_SEL].sd, 0, 0xfffff, | 972 | setsegment(&gdt[GUDATA_SEL].sd, 0, 0xfffff, | |
973 | SDT_MEMRWA, SEL_UPL, 1, 1); | 973 | SDT_MEMRWA, SEL_UPL, 1, 1); | |
974 | #if NBIOSCALL > 0 | 974 | #if NBIOSCALL > 0 | |
975 | /* bios trampoline GDT entries */ | 975 | /* bios trampoline GDT entries */ | |
976 | setsegment(&gdt[GBIOSCODE_SEL].sd, 0, 0xfffff, SDT_MEMERA, SEL_KPL, 0, | 976 | setsegment(&gdt[GBIOSCODE_SEL].sd, 0, 0xfffff, SDT_MEMERA, SEL_KPL, 0, | |
977 | 0); | 977 | 0); | |
978 | setsegment(&gdt[GBIOSDATA_SEL].sd, 0, 0xfffff, SDT_MEMRWA, SEL_KPL, 0, | 978 | setsegment(&gdt[GBIOSDATA_SEL].sd, 0, 0xfffff, SDT_MEMRWA, SEL_KPL, 0, | |
979 | 0); | 979 | 0); | |
980 | #endif | 980 | #endif | |
981 | setsegment(&gdt[GCPU_SEL].sd, &cpu_info_primary, 0xfffff, | 981 | setsegment(&gdt[GCPU_SEL].sd, &cpu_info_primary, 0xfffff, | |
982 | SDT_MEMRWA, SEL_KPL, 1, 1); | 982 | SDT_MEMRWA, SEL_KPL, 1, 1); | |
983 | 983 | |||
984 | #ifndef XEN | 984 | #ifndef XEN | |
985 | setregion(®ion, gdt, NGDT * sizeof(gdt[0]) - 1); | 985 | setregion(®ion, gdt, NGDT * sizeof(gdt[0]) - 1); | |
986 | lgdt(®ion); | 986 | lgdt(®ion); | |
987 | #else /* !XEN */ | 987 | #else /* !XEN */ | |
988 | /* | 988 | /* | |
989 | * We jumpstart the bootstrap process a bit so we can update | 989 | * We jumpstart the bootstrap process a bit so we can update | |
990 | * page permissions. This is done redundantly later from | 990 | * page permissions. This is done redundantly later from | |
991 | * x86_xpmap.c:xen_pmap_bootstrap() - harmless. | 991 | * x86_xpmap.c:xen_pmap_bootstrap() - harmless. | |
992 | */ | 992 | */ | |
993 | xpmap_phys_to_machine_mapping = | 993 | xpmap_phys_to_machine_mapping = | |
994 | (unsigned long *)xen_start_info.mfn_list; | 994 | (unsigned long *)xen_start_info.mfn_list; | |
995 | 995 | |||
996 | frames[0] = xpmap_ptom((uint32_t)gdt - KERNBASE) >> PAGE_SHIFT; | 996 | frames[0] = xpmap_ptom((uint32_t)gdt - KERNBASE) >> PAGE_SHIFT; | |
997 | { /* | 997 | { /* | |
998 | * Enter the gdt page RO into the kernel map. We can't | 998 | * Enter the gdt page RO into the kernel map. We can't | |
999 | * use pmap_kenter_pa() here, because %fs is not | 999 | * use pmap_kenter_pa() here, because %fs is not | |
1000 | * usable until the gdt is loaded, and %fs is used as | 1000 | * usable until the gdt is loaded, and %fs is used as | |
1001 | * the base pointer for curcpu() and curlwp(), both of | 1001 | * the base pointer for curcpu() and curlwp(), both of | |
1002 | * which are in the callpath of pmap_kenter_pa(). | 1002 | * which are in the callpath of pmap_kenter_pa(). | |
1003 | * So we mash up our own - this is MD code anyway. | 1003 | * So we mash up our own - this is MD code anyway. | |
1004 | */ | 1004 | */ | |
1005 | pt_entry_t pte; | 1005 | pt_entry_t pte; | |
1006 | pt_entry_t pg_nx = (cpu_feature[2] & CPUID_NOX ? PG_NX : 0); | 1006 | pt_entry_t pg_nx = (cpu_feature[2] & CPUID_NOX ? PG_NX : 0); | |
1007 | 1007 | |||
1008 | pte = pmap_pa2pte((vaddr_t)gdt - KERNBASE); | 1008 | pte = pmap_pa2pte((vaddr_t)gdt - KERNBASE); | |
1009 | pte |= PG_k | PG_RO | pg_nx | PG_V; | 1009 | pte |= PG_k | PG_RO | pg_nx | PG_V; | |
1010 | 1010 | |||
1011 | if (HYPERVISOR_update_va_mapping((vaddr_t)gdt, pte, UVMF_INVLPG) < 0) { | 1011 | if (HYPERVISOR_update_va_mapping((vaddr_t)gdt, pte, UVMF_INVLPG) < 0) { | |
1012 | panic("gdt page RO update failed.\n"); | 1012 | panic("gdt page RO update failed.\n"); | |
1013 | } | 1013 | } | |
1014 | 1014 | |||
1015 | } | 1015 | } | |
1016 | 1016 | |||
1017 | XENPRINTK(("loading gdt %lx, %d entries\n", frames[0] << PAGE_SHIFT, | 1017 | XENPRINTK(("loading gdt %lx, %d entries\n", frames[0] << PAGE_SHIFT, | |
1018 | NGDT)); | 1018 | NGDT)); | |
1019 | if (HYPERVISOR_set_gdt(frames, NGDT /* XXX is it right ? */)) | 1019 | if (HYPERVISOR_set_gdt(frames, NGDT /* XXX is it right ? */)) | |
1020 | panic("HYPERVISOR_set_gdt failed!\n"); | 1020 | panic("HYPERVISOR_set_gdt failed!\n"); | |
1021 | 1021 | |||
1022 | lgdt_finish(); | 1022 | lgdt_finish(); | |
1023 | #endif /* !XEN */ | 1023 | #endif /* !XEN */ | |
1024 | } | 1024 | } | |
1025 | 1025 | |||
1026 | static void | 1026 | static void | |
1027 | init386_msgbuf(void) | 1027 | init386_msgbuf(void) | |
1028 | { | 1028 | { | |
1029 | /* Message buffer is located at end of core. */ | 1029 | /* Message buffer is located at end of core. */ | |
1030 | struct vm_physseg *vps; | 1030 | struct vm_physseg *vps; | |
1031 | psize_t sz = round_page(MSGBUFSIZE); | 1031 | psize_t sz = round_page(MSGBUFSIZE); | |
1032 | psize_t reqsz = sz; | 1032 | psize_t reqsz = sz; | |
1033 | unsigned int x; | 1033 | unsigned int x; | |
1034 | 1034 | |||
1035 | search_again: | 1035 | search_again: | |
1036 | vps = NULL; | 1036 | vps = NULL; | |
1037 | for (x = 0; x < vm_nphysseg; ++x) { | 1037 | for (x = 0; x < vm_nphysseg; ++x) { | |
1038 | vps = VM_PHYSMEM_PTR(x); | 1038 | vps = VM_PHYSMEM_PTR(x); | |
1039 | if (ctob(vps->avail_end) == avail_end) { | 1039 | if (ctob(vps->avail_end) == avail_end) { | |
1040 | break; | 1040 | break; | |
1041 | } | 1041 | } | |
1042 | } | 1042 | } | |
1043 | if (x == vm_nphysseg) | 1043 | if (x == vm_nphysseg) | |
1044 | panic("init386: can't find end of memory"); | 1044 | panic("init386: can't find end of memory"); | |
1045 | 1045 | |||
1046 | /* Shrink so it'll fit in the last segment. */ | 1046 | /* Shrink so it'll fit in the last segment. */ | |
1047 | if (vps->avail_end - vps->avail_start < atop(sz)) | 1047 | if (vps->avail_end - vps->avail_start < atop(sz)) | |
1048 | sz = ctob(vps->avail_end - vps->avail_start); | 1048 | sz = ctob(vps->avail_end - vps->avail_start); | |
1049 | 1049 | |||
1050 | vps->avail_end -= atop(sz); | 1050 | vps->avail_end -= atop(sz); | |
1051 | vps->end -= atop(sz); | 1051 | vps->end -= atop(sz); | |
1052 | msgbuf_p_seg[msgbuf_p_cnt].sz = sz; | 1052 | msgbuf_p_seg[msgbuf_p_cnt].sz = sz; | |
1053 | msgbuf_p_seg[msgbuf_p_cnt++].paddr = ctob(vps->avail_end); | 1053 | msgbuf_p_seg[msgbuf_p_cnt++].paddr = ctob(vps->avail_end); | |
1054 | 1054 | |||
1055 | /* Remove the last segment if it now has no pages. */ | 1055 | /* Remove the last segment if it now has no pages. */ | |
1056 | if (vps->start == vps->end) { | 1056 | if (vps->start == vps->end) { | |
1057 | for (--vm_nphysseg; x < vm_nphysseg; x++) | 1057 | for (--vm_nphysseg; x < vm_nphysseg; x++) | |
1058 | VM_PHYSMEM_PTR_SWAP(x, x + 1); | 1058 | VM_PHYSMEM_PTR_SWAP(x, x + 1); | |
1059 | } | 1059 | } | |
1060 | 1060 | |||
1061 | /* Now find where the new avail_end is. */ | 1061 | /* Now find where the new avail_end is. */ | |
1062 | for (avail_end = 0, x = 0; x < vm_nphysseg; x++) | 1062 | for (avail_end = 0, x = 0; x < vm_nphysseg; x++) | |
1063 | if (VM_PHYSMEM_PTR(x)->avail_end > avail_end) | 1063 | if (VM_PHYSMEM_PTR(x)->avail_end > avail_end) | |
1064 | avail_end = VM_PHYSMEM_PTR(x)->avail_end; | 1064 | avail_end = VM_PHYSMEM_PTR(x)->avail_end; | |
1065 | avail_end = ctob(avail_end); | 1065 | avail_end = ctob(avail_end); | |
1066 | 1066 | |||
1067 | if (sz == reqsz) | 1067 | if (sz == reqsz) | |
1068 | return; | 1068 | return; | |
1069 | 1069 | |||
1070 | reqsz -= sz; | 1070 | reqsz -= sz; | |
1071 | if (msgbuf_p_cnt == VM_PHYSSEG_MAX) { | 1071 | if (msgbuf_p_cnt == VM_PHYSSEG_MAX) { | |
1072 | /* No more segments available, bail out. */ | 1072 | /* No more segments available, bail out. */ | |
1073 | printf("WARNING: MSGBUFSIZE (%zu) too large, using %zu.\n", | 1073 | printf("WARNING: MSGBUFSIZE (%zu) too large, using %zu.\n", | |
1074 | (size_t)MSGBUFSIZE, (size_t)(MSGBUFSIZE - reqsz)); | 1074 | (size_t)MSGBUFSIZE, (size_t)(MSGBUFSIZE - reqsz)); | |
1075 | return; | 1075 | return; | |
1076 | } | 1076 | } | |
1077 | 1077 | |||
1078 | sz = reqsz; | 1078 | sz = reqsz; | |
1079 | goto search_again; | 1079 | goto search_again; | |
1080 | } | 1080 | } | |
1081 | 1081 | |||
1082 | #ifndef XEN | 1082 | #ifndef XEN | |
1083 | static void | 1083 | static void | |
1084 | init386_pte0(void) | 1084 | init386_pte0(void) | |
1085 | { | 1085 | { | |
1086 | paddr_t paddr; | 1086 | paddr_t paddr; | |
1087 | vaddr_t vaddr; | 1087 | vaddr_t vaddr; | |
1088 | 1088 | |||
1089 | paddr = 4 * PAGE_SIZE; | 1089 | paddr = 4 * PAGE_SIZE; | |
1090 | vaddr = (vaddr_t)vtopte(0); | 1090 | vaddr = (vaddr_t)vtopte(0); | |
1091 | pmap_kenter_pa(vaddr, paddr, VM_PROT_ALL, 0); | 1091 | pmap_kenter_pa(vaddr, paddr, VM_PROT_ALL, 0); | |
1092 | pmap_update(pmap_kernel()); | 1092 | pmap_update(pmap_kernel()); | |
1093 | /* make sure it is clean before using */ | 1093 | /* make sure it is clean before using */ | |
1094 | memset((void *)vaddr, 0, PAGE_SIZE); | 1094 | memset((void *)vaddr, 0, PAGE_SIZE); | |
1095 | } | 1095 | } | |
1096 | #endif /* !XEN */ | 1096 | #endif /* !XEN */ | |
1097 | 1097 | |||
1098 | static void | 1098 | static void | |
1099 | init386_ksyms(void) | 1099 | init386_ksyms(void) | |
1100 | { | 1100 | { | |
1101 | #if NKSYMS || defined(DDB) || defined(MODULAR) | 1101 | #if NKSYMS || defined(DDB) || defined(MODULAR) | |
1102 | extern int end; | 1102 | extern int end; | |
1103 | struct btinfo_symtab *symtab; | 1103 | struct btinfo_symtab *symtab; | |
1104 | 1104 | |||
1105 | #ifdef DDB | 1105 | #ifdef DDB | |
1106 | db_machine_init(); | 1106 | db_machine_init(); | |
1107 | #endif | 1107 | #endif | |
1108 | 1108 | |||
1109 | #if defined(MULTIBOOT) | 1109 | #if defined(MULTIBOOT) | |
1110 | if (multiboot_ksyms_addsyms_elf()) | 1110 | if (multiboot_ksyms_addsyms_elf()) | |
1111 | return; | 1111 | return; | |
1112 | #endif | 1112 | #endif | |
1113 | 1113 | |||
1114 | if ((symtab = lookup_bootinfo(BTINFO_SYMTAB)) == NULL) { | 1114 | if ((symtab = lookup_bootinfo(BTINFO_SYMTAB)) == NULL) { | |
1115 | ksyms_addsyms_elf(*(int *)&end, ((int *)&end) + 1, esym); | 1115 | ksyms_addsyms_elf(*(int *)&end, ((int *)&end) + 1, esym); | |
1116 | return; | 1116 | return; | |
1117 | } | 1117 | } | |
1118 | 1118 | |||
1119 | symtab->ssym += KERNBASE; | 1119 | symtab->ssym += KERNBASE; | |
1120 | symtab->esym += KERNBASE; | 1120 | symtab->esym += KERNBASE; | |
1121 | ksyms_addsyms_elf(symtab->nsym, (int *)symtab->ssym, (int *)symtab->esym); | 1121 | ksyms_addsyms_elf(symtab->nsym, (int *)symtab->ssym, (int *)symtab->esym); | |
1122 | #endif | 1122 | #endif | |
1123 | } | 1123 | } | |
1124 | 1124 | |||
1125 | void | 1125 | void | |
1126 | init386(paddr_t first_avail) | 1126 | init386(paddr_t first_avail) | |
1127 | { | 1127 | { | |
1128 | extern void consinit(void); | 1128 | extern void consinit(void); | |
1129 | int x; | 1129 | int x; | |
1130 | #ifndef XEN | 1130 | #ifndef XEN | |
1131 | union descriptor *tgdt; | 1131 | union descriptor *tgdt; | |
1132 | extern struct extent *iomem_ex; | 1132 | extern struct extent *iomem_ex; | |
1133 | struct region_descriptor region; | 1133 | struct region_descriptor region; | |
1134 | struct btinfo_memmap *bim; | 1134 | struct btinfo_memmap *bim; | |
1135 | #endif | 1135 | #endif | |
1136 | #if NBIOSCALL > 0 | 1136 | #if NBIOSCALL > 0 | |
1137 | extern int biostramp_image_size; | 1137 | extern int biostramp_image_size; | |
1138 | extern u_char biostramp_image[]; | 1138 | extern u_char biostramp_image[]; | |
1139 | #endif | 1139 | #endif | |
1140 | 1140 | |||
1141 | #ifdef XEN | 1141 | #ifdef XEN | |
1142 | XENPRINTK(("HYPERVISOR_shared_info %p (%x)\n", HYPERVISOR_shared_info, | 1142 | XENPRINTK(("HYPERVISOR_shared_info %p (%x)\n", HYPERVISOR_shared_info, | |
1143 | xen_start_info.shared_info)); | 1143 | xen_start_info.shared_info)); | |
1144 | KASSERT(HYPERVISOR_shared_info != NULL); | 1144 | KASSERT(HYPERVISOR_shared_info != NULL); | |
1145 | cpu_info_primary.ci_vcpu = &HYPERVISOR_shared_info->vcpu_info[0]; | 1145 | cpu_info_primary.ci_vcpu = &HYPERVISOR_shared_info->vcpu_info[0]; | |
1146 | #endif | 1146 | #endif | |
1147 | cpu_probe(&cpu_info_primary); | 1147 | cpu_probe(&cpu_info_primary); | |
1148 | 1148 | |||
1149 | uvm_lwp_setuarea(&lwp0, lwp0uarea); | 1149 | uvm_lwp_setuarea(&lwp0, lwp0uarea); | |
1150 | 1150 | |||
1151 | cpu_init_msrs(&cpu_info_primary, true); | 1151 | cpu_init_msrs(&cpu_info_primary, true); | |
1152 | 1152 | |||
1153 | #ifdef PAE | 1153 | #ifdef PAE | |
1154 | use_pae = 1; | 1154 | use_pae = 1; | |
1155 | #else | 1155 | #else | |
1156 | use_pae = 0; | 1156 | use_pae = 0; | |
1157 | #endif | 1157 | #endif | |
1158 | 1158 | |||
1159 | #ifdef XEN | 1159 | #ifdef XEN | |
1160 | struct pcb *pcb = lwp_getpcb(&lwp0); | 1160 | struct pcb *pcb = lwp_getpcb(&lwp0); | |
1161 | pcb->pcb_cr3 = PDPpaddr; | 1161 | pcb->pcb_cr3 = PDPpaddr; | |
1162 | __PRINTK(("pcb_cr3 0x%lx cr3 0x%lx\n", | 1162 | __PRINTK(("pcb_cr3 0x%lx cr3 0x%lx\n", | |
1163 | PDPpaddr, xpmap_ptom(PDPpaddr))); | 1163 | PDPpaddr, xpmap_ptom(PDPpaddr))); | |
1164 | XENPRINTK(("lwp0uarea %p first_avail %p\n", | 1164 | XENPRINTK(("lwp0uarea %p first_avail %p\n", | |
1165 | lwp0uarea, (void *)(long)first_avail)); | 1165 | lwp0uarea, (void *)(long)first_avail)); | |
1166 | XENPRINTK(("ptdpaddr %p atdevbase %p\n", (void *)PDPpaddr, | 1166 | XENPRINTK(("ptdpaddr %p atdevbase %p\n", (void *)PDPpaddr, | |
1167 | (void *)atdevbase)); | 1167 | (void *)atdevbase)); | |
1168 | #endif | 1168 | #endif | |
1169 | 1169 | |||
1170 | #if defined(PAE) && !defined(XEN) | 1170 | #if defined(PAE) && !defined(XEN) | |
1171 | /* | 1171 | /* | |
1172 | * Save VA and PA of L3 PD of boot processor (for Xen, this is done | 1172 | * Save VA and PA of L3 PD of boot processor (for Xen, this is done | |
1173 | * in xen_pmap_bootstrap()) | 1173 | * in xen_pmap_bootstrap()) | |
1174 | */ | 1174 | */ | |
1175 | cpu_info_primary.ci_pae_l3_pdirpa = rcr3(); | 1175 | cpu_info_primary.ci_pae_l3_pdirpa = rcr3(); | |
1176 | cpu_info_primary.ci_pae_l3_pdir = (pd_entry_t *)(rcr3() + KERNBASE); | 1176 | cpu_info_primary.ci_pae_l3_pdir = (pd_entry_t *)(rcr3() + KERNBASE); | |
1177 | #endif /* PAE && !XEN */ | 1177 | #endif /* PAE && !XEN */ | |
1178 | 1178 | |||
1179 | #ifdef XEN | 1179 | #ifdef XEN | |
1180 | xen_parse_cmdline(XEN_PARSE_BOOTFLAGS, NULL); | 1180 | xen_parse_cmdline(XEN_PARSE_BOOTFLAGS, NULL); | |
1181 | #endif | 1181 | #endif | |
1182 | 1182 | |||
1183 | /* | 1183 | /* | |
1184 | * Initialize PAGE_SIZE-dependent variables. | 1184 | * Initialize PAGE_SIZE-dependent variables. | |
1185 | */ | 1185 | */ | |
1186 | uvm_setpagesize(); | 1186 | uvm_setpagesize(); | |
1187 | 1187 | |||
1188 | /* | 1188 | /* | |
1189 | * Start with 2 color bins -- this is just a guess to get us | 1189 | * Start with 2 color bins -- this is just a guess to get us | |
1190 | * started. We'll recolor when we determine the largest cache | 1190 | * started. We'll recolor when we determine the largest cache | |
1191 | * sizes on the system. | 1191 | * sizes on the system. | |
1192 | */ | 1192 | */ | |
1193 | uvmexp.ncolors = 2; | 1193 | uvmexp.ncolors = 2; | |
1194 | 1194 | |||
1195 | #ifndef XEN | 1195 | #ifndef XEN | |
1196 | /* | 1196 | /* | |
1197 | * Low memory reservations: | 1197 | * Low memory reservations: | |
1198 | * Page 0: BIOS data | 1198 | * Page 0: BIOS data | |
1199 | * Page 1: BIOS callback | 1199 | * Page 1: BIOS callback | |
1200 | * Page 2: MP bootstrap | 1200 | * Page 2: MP bootstrap code (MP_TRAMPOLINE) | |
1201 | * Page 3: ACPI wakeup code | 1201 | * Page 3: ACPI wakeup code (ACPI_WAKEUP_ADDR) | |
1202 | * Page 4: Temporary page table for 0MB-4MB | 1202 | * Page 4: Temporary page table for 0MB-4MB | |
1203 | * Page 5: Temporary page directory | 1203 | * Page 5: Temporary page directory | |
1204 | */ | 1204 | */ | |
1205 | avail_start = 6 * PAGE_SIZE; | 1205 | avail_start = 6 * PAGE_SIZE; | |
1206 | #else /* !XEN */ | 1206 | #else /* !XEN */ | |
1207 | /* steal one page for gdt */ | 1207 | /* steal one page for gdt */ | |
1208 | gdt = (void *)((u_long)first_avail + KERNBASE); | 1208 | gdt = (void *)((u_long)first_avail + KERNBASE); | |
1209 | first_avail += PAGE_SIZE; | 1209 | first_avail += PAGE_SIZE; | |
1210 | /* Make sure the end of the space used by the kernel is rounded. */ | 1210 | /* Make sure the end of the space used by the kernel is rounded. */ | |
1211 | first_avail = round_page(first_avail); | 1211 | first_avail = round_page(first_avail); | |
1212 | avail_start = first_avail; | 1212 | avail_start = first_avail; | |
1213 | avail_end = ctob((paddr_t)xen_start_info.nr_pages); | 1213 | avail_end = ctob((paddr_t)xen_start_info.nr_pages); | |
1214 | pmap_pa_start = (KERNTEXTOFF - KERNBASE); | 1214 | pmap_pa_start = (KERNTEXTOFF - KERNBASE); | |
1215 | pmap_pa_end = pmap_pa_start + ctob((paddr_t)xen_start_info.nr_pages); | 1215 | pmap_pa_end = pmap_pa_start + ctob((paddr_t)xen_start_info.nr_pages); | |
1216 | mem_clusters[0].start = avail_start; | 1216 | mem_clusters[0].start = avail_start; | |
1217 | mem_clusters[0].size = avail_end - avail_start; | 1217 | mem_clusters[0].size = avail_end - avail_start; | |
1218 | mem_cluster_cnt++; | 1218 | mem_cluster_cnt++; | |
1219 | physmem += xen_start_info.nr_pages; | 1219 | physmem += xen_start_info.nr_pages; | |
1220 | uvmexp.wired += atop(avail_start); | 1220 | uvmexp.wired += atop(avail_start); | |
1221 | /* | 1221 | /* | |
1222 | * initgdt() has to be done before consinit(), so that %fs is properly | 1222 | * initgdt() has to be done before consinit(), so that %fs is properly | |
1223 | * initialised. initgdt() uses pmap_kenter_pa so it can't be called | 1223 | * initialised. initgdt() uses pmap_kenter_pa so it can't be called | |
1224 | * before the above variables are set. | 1224 | * before the above variables are set. | |
1225 | */ | 1225 | */ | |
1226 | 1226 | |||
1227 | initgdt(gdt); | 1227 | initgdt(gdt); | |
1228 | 1228 | |||
1229 | mutex_init(&pte_lock, MUTEX_DEFAULT, IPL_VM); | 1229 | mutex_init(&pte_lock, MUTEX_DEFAULT, IPL_VM); | |
1230 | #endif /* XEN */ | 1230 | #endif /* XEN */ | |
1231 | 1231 | |||
1232 | #if NISA > 0 || NPCI > 0 | 1232 | #if NISA > 0 || NPCI > 0 | |
1233 | x86_bus_space_init(); | 1233 | x86_bus_space_init(); | |
1234 | #endif /* NISA > 0 || NPCI > 0 */ | 1234 | #endif /* NISA > 0 || NPCI > 0 */ | |
1235 | 1235 | |||
1236 | consinit(); /* XXX SHOULD NOT BE DONE HERE */ | 1236 | consinit(); /* XXX SHOULD NOT BE DONE HERE */ | |
1237 | 1237 | |||
1238 | #ifdef DEBUG_MEMLOAD | 1238 | #ifdef DEBUG_MEMLOAD | |
1239 | printf("mem_cluster_count: %d\n", mem_cluster_cnt); | 1239 | printf("mem_cluster_count: %d\n", mem_cluster_cnt); | |
1240 | #endif | 1240 | #endif | |
1241 | 1241 | |||
1242 | /* | 1242 | /* | |
1243 | * Call pmap initialization to make new kernel address space. | 1243 | * Call pmap initialization to make new kernel address space. | |
1244 | * We must do this before loading pages into the VM system. | 1244 | * We must do this before loading pages into the VM system. | |
1245 | */ | 1245 | */ | |
1246 | pmap_bootstrap((vaddr_t)atdevbase + IOM_SIZE); | 1246 | pmap_bootstrap((vaddr_t)atdevbase + IOM_SIZE); | |
1247 | 1247 | |||
1248 | #ifndef XEN | 1248 | #ifndef XEN | |
1249 | /* | 1249 | /* | |
1250 | * Check to see if we have a memory map from the BIOS (passed | 1250 | * Check to see if we have a memory map from the BIOS (passed | |
1251 | * to us by the boot program. | 1251 | * to us by the boot program. | |
1252 | */ | 1252 | */ | |
1253 | bim = lookup_bootinfo(BTINFO_MEMMAP); | 1253 | bim = lookup_bootinfo(BTINFO_MEMMAP); | |
1254 | if ((biosmem_implicit || (biosbasemem == 0 && biosextmem == 0)) && | 1254 | if ((biosmem_implicit || (biosbasemem == 0 && biosextmem == 0)) && | |
1255 | bim != NULL && bim->num > 0) | 1255 | bim != NULL && bim->num > 0) | |
1256 | initx86_parse_memmap(bim, iomem_ex); | 1256 | initx86_parse_memmap(bim, iomem_ex); | |
1257 | 1257 | |||
1258 | /* | 1258 | /* | |
1259 | * If the loop above didn't find any valid segment, fall back to | 1259 | * If the loop above didn't find any valid segment, fall back to | |
1260 | * former code. | 1260 | * former code. | |
1261 | */ | 1261 | */ | |
1262 | if (mem_cluster_cnt == 0) | 1262 | if (mem_cluster_cnt == 0) | |
1263 | initx86_fake_memmap(iomem_ex); | 1263 | initx86_fake_memmap(iomem_ex); | |
1264 | 1264 | |||
1265 | initx86_load_memmap(first_avail); | 1265 | initx86_load_memmap(first_avail); | |
1266 | 1266 | |||
1267 | #else /* !XEN */ | 1267 | #else /* !XEN */ | |
1268 | XENPRINTK(("load the memory cluster 0x%" PRIx64 " (%" PRId64 ") - " | 1268 | XENPRINTK(("load the memory cluster 0x%" PRIx64 " (%" PRId64 ") - " | |
1269 | "0x%" PRIx64 " (%" PRId64 ")\n", | 1269 | "0x%" PRIx64 " (%" PRId64 ")\n", | |
1270 | (uint64_t)avail_start, (uint64_t)atop(avail_start), | 1270 | (uint64_t)avail_start, (uint64_t)atop(avail_start), | |
1271 | (uint64_t)avail_end, (uint64_t)atop(avail_end))); | 1271 | (uint64_t)avail_end, (uint64_t)atop(avail_end))); | |
1272 | uvm_page_physload(atop(avail_start), atop(avail_end), | 1272 | uvm_page_physload(atop(avail_start), atop(avail_end), | |
1273 | atop(avail_start), atop(avail_end), | 1273 | atop(avail_start), atop(avail_end), | |
1274 | VM_FREELIST_DEFAULT); | 1274 | VM_FREELIST_DEFAULT); | |
1275 | 1275 | |||
1276 | /* Reclaim the boot gdt page - see locore.s */ | 1276 | /* Reclaim the boot gdt page - see locore.s */ | |
1277 | { | 1277 | { | |
1278 | pt_entry_t pte; | 1278 | pt_entry_t pte; | |
1279 | pt_entry_t pg_nx = (cpu_feature[2] & CPUID_NOX ? PG_NX : 0); | 1279 | pt_entry_t pg_nx = (cpu_feature[2] & CPUID_NOX ? PG_NX : 0); | |
1280 | 1280 | |||
1281 | pte = pmap_pa2pte((vaddr_t)tmpgdt - KERNBASE); | 1281 | pte = pmap_pa2pte((vaddr_t)tmpgdt - KERNBASE); | |
1282 | pte |= PG_k | PG_RW | pg_nx | PG_V; | 1282 | pte |= PG_k | PG_RW | pg_nx | PG_V; | |
1283 | 1283 | |||
1284 | if (HYPERVISOR_update_va_mapping((vaddr_t)tmpgdt, pte, UVMF_INVLPG) < 0) { | 1284 | if (HYPERVISOR_update_va_mapping((vaddr_t)tmpgdt, pte, UVMF_INVLPG) < 0) { | |
1285 | panic("tmpgdt page relaim RW update failed.\n"); | 1285 | panic("tmpgdt page relaim RW update failed.\n"); | |
1286 | } | 1286 | } | |
1287 | } | 1287 | } | |
1288 | 1288 | |||
1289 | #endif /* !XEN */ | 1289 | #endif /* !XEN */ | |
1290 | 1290 | |||
1291 | init386_msgbuf(); | 1291 | init386_msgbuf(); | |
1292 | 1292 | |||
1293 | #ifndef XEN | 1293 | #ifndef XEN | |
1294 | /* | 1294 | /* | |
1295 | * XXX Remove this | 1295 | * XXX Remove this | |
1296 | * | 1296 | * | |
1297 | * Setup a temporary Page Table Entry to allow identity mappings of | 1297 | * Setup a temporary Page Table Entry to allow identity mappings of | |
1298 | * the real mode address. This is required by: | 1298 | * the real mode address. This is required by: | |
1299 | * - bioscall | 1299 | * - bioscall | |
1300 | * - MP bootstrap | 1300 | * - MP bootstrap | |
1301 | * - ACPI wakecode | 1301 | * - ACPI wakecode | |
1302 | */ | 1302 | */ | |
1303 | init386_pte0(); | 1303 | init386_pte0(); | |
1304 | 1304 | |||
1305 | #if NBIOSCALL > 0 | 1305 | #if NBIOSCALL > 0 | |
1306 | KASSERT(biostramp_image_size <= PAGE_SIZE); | 1306 | KASSERT(biostramp_image_size <= PAGE_SIZE); | |
1307 | pmap_kenter_pa((vaddr_t)BIOSTRAMP_BASE, /* virtual */ | 1307 | pmap_kenter_pa((vaddr_t)BIOSTRAMP_BASE, /* virtual */ | |
1308 | (paddr_t)BIOSTRAMP_BASE, /* physical */ | 1308 | (paddr_t)BIOSTRAMP_BASE, /* physical */ | |
1309 | VM_PROT_ALL, 0); /* protection */ | 1309 | VM_PROT_ALL, 0); /* protection */ | |
1310 | pmap_update(pmap_kernel()); | 1310 | pmap_update(pmap_kernel()); | |
1311 | memcpy((void *)BIOSTRAMP_BASE, biostramp_image, biostramp_image_size); | 1311 | memcpy((void *)BIOSTRAMP_BASE, biostramp_image, biostramp_image_size); | |
1312 | 1312 | |||
1313 | /* Needed early, for bioscall() */ | 1313 | /* Needed early, for bioscall() */ | |
1314 | cpu_info_primary.ci_pmap = pmap_kernel(); | 1314 | cpu_info_primary.ci_pmap = pmap_kernel(); | |
1315 | #endif | 1315 | #endif | |
1316 | #endif /* !XEN */ | 1316 | #endif /* !XEN */ | |
1317 | 1317 | |||
1318 | pmap_kenter_pa(idt_vaddr, idt_paddr, VM_PROT_READ|VM_PROT_WRITE, 0); | 1318 | pmap_kenter_pa(idt_vaddr, idt_paddr, VM_PROT_READ|VM_PROT_WRITE, 0); | |
1319 | pmap_update(pmap_kernel()); | 1319 | pmap_update(pmap_kernel()); | |
1320 | memset((void *)idt_vaddr, 0, PAGE_SIZE); | 1320 | memset((void *)idt_vaddr, 0, PAGE_SIZE); | |
1321 | 1321 | |||
1322 | 1322 | |||
1323 | #ifndef XEN | 1323 | #ifndef XEN | |
1324 | idt_init(); | 1324 | idt_init(); | |
1325 | 1325 | |||
1326 | idt = (struct gate_descriptor *)idt_vaddr; | 1326 | idt = (struct gate_descriptor *)idt_vaddr; | |
1327 | pmap_kenter_pa(pentium_idt_vaddr, idt_paddr, VM_PROT_READ, 0); | 1327 | pmap_kenter_pa(pentium_idt_vaddr, idt_paddr, VM_PROT_READ, 0); | |
1328 | pmap_update(pmap_kernel()); | 1328 | pmap_update(pmap_kernel()); | |
1329 | pentium_idt = (union descriptor *)pentium_idt_vaddr; | 1329 | pentium_idt = (union descriptor *)pentium_idt_vaddr; | |
1330 | 1330 | |||
1331 | tgdt = gdt; | 1331 | tgdt = gdt; | |
1332 | gdt = (union descriptor *) | 1332 | gdt = (union descriptor *) | |
1333 | ((char *)idt + NIDT * sizeof (struct gate_descriptor)); | 1333 | ((char *)idt + NIDT * sizeof (struct gate_descriptor)); | |
1334 | ldt = gdt + NGDT; | 1334 | ldt = gdt + NGDT; | |
1335 | 1335 | |||
1336 | memcpy(gdt, tgdt, NGDT*sizeof(*gdt)); | 1336 | memcpy(gdt, tgdt, NGDT*sizeof(*gdt)); | |
1337 | 1337 | |||
1338 | setsegment(&gdt[GLDT_SEL].sd, ldt, NLDT * sizeof(ldt[0]) - 1, | 1338 | setsegment(&gdt[GLDT_SEL].sd, ldt, NLDT * sizeof(ldt[0]) - 1, | |
1339 | SDT_SYSLDT, SEL_KPL, 0, 0); | 1339 | SDT_SYSLDT, SEL_KPL, 0, 0); | |
1340 | #else | 1340 | #else | |
1341 | HYPERVISOR_set_callbacks( | 1341 | HYPERVISOR_set_callbacks( | |
1342 | GSEL(GCODE_SEL, SEL_KPL), (unsigned long)hypervisor_callback, | 1342 | GSEL(GCODE_SEL, SEL_KPL), (unsigned long)hypervisor_callback, | |
1343 | GSEL(GCODE_SEL, SEL_KPL), (unsigned long)failsafe_callback); | 1343 | GSEL(GCODE_SEL, SEL_KPL), (unsigned long)failsafe_callback); | |
1344 | 1344 | |||
1345 | ldt = (union descriptor *)idt_vaddr; | 1345 | ldt = (union descriptor *)idt_vaddr; | |
1346 | #endif /* XEN */ | 1346 | #endif /* XEN */ | |
1347 | 1347 | |||
1348 | /* make ldt gates and memory segments */ | 1348 | /* make ldt gates and memory segments */ | |
1349 | setgate(&ldt[LSYS5CALLS_SEL].gd, &IDTVEC(osyscall), 1, | 1349 | setgate(&ldt[LSYS5CALLS_SEL].gd, &IDTVEC(osyscall), 1, | |
1350 | SDT_SYS386CGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); | 1350 | SDT_SYS386CGT, SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); | |
1351 | 1351 | |||
1352 | ldt[LUCODE_SEL] = gdt[GUCODE_SEL]; | 1352 | ldt[LUCODE_SEL] = gdt[GUCODE_SEL]; | |
1353 | ldt[LUCODEBIG_SEL] = gdt[GUCODEBIG_SEL]; | 1353 | ldt[LUCODEBIG_SEL] = gdt[GUCODEBIG_SEL]; | |
1354 | ldt[LUDATA_SEL] = gdt[GUDATA_SEL]; | 1354 | ldt[LUDATA_SEL] = gdt[GUDATA_SEL]; | |
1355 | ldt[LSOL26CALLS_SEL] = ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL]; | 1355 | ldt[LSOL26CALLS_SEL] = ldt[LBSDICALLS_SEL] = ldt[LSYS5CALLS_SEL]; | |
1356 | 1356 | |||
1357 | #ifndef XEN | 1357 | #ifndef XEN | |
1358 | /* exceptions */ | 1358 | /* exceptions */ | |
1359 | for (x = 0; x < 32; x++) { | 1359 | for (x = 0; x < 32; x++) { | |
1360 | idt_vec_reserve(x); | 1360 | idt_vec_reserve(x); | |
1361 | setgate(&idt[x], IDTVEC(exceptions)[x], 0, SDT_SYS386IGT, | 1361 | setgate(&idt[x], IDTVEC(exceptions)[x], 0, SDT_SYS386IGT, | |
1362 | (x == 3 || x == 4) ? SEL_UPL : SEL_KPL, | 1362 | (x == 3 || x == 4) ? SEL_UPL : SEL_KPL, | |
1363 | GSEL(GCODE_SEL, SEL_KPL)); | 1363 | GSEL(GCODE_SEL, SEL_KPL)); | |
1364 | } | 1364 | } | |
1365 | 1365 | |||
1366 | /* new-style interrupt gate for syscalls */ | 1366 | /* new-style interrupt gate for syscalls */ | |
1367 | idt_vec_reserve(128); | 1367 | idt_vec_reserve(128); | |
1368 | setgate(&idt[128], &IDTVEC(syscall), 0, SDT_SYS386IGT, SEL_UPL, | 1368 | setgate(&idt[128], &IDTVEC(syscall), 0, SDT_SYS386IGT, SEL_UPL, | |
1369 | GSEL(GCODE_SEL, SEL_KPL)); | 1369 | GSEL(GCODE_SEL, SEL_KPL)); | |
1370 | idt_vec_reserve(0xd2); | 1370 | idt_vec_reserve(0xd2); | |
1371 | setgate(&idt[0xd2], &IDTVEC(svr4_fasttrap), 0, SDT_SYS386IGT, | 1371 | setgate(&idt[0xd2], &IDTVEC(svr4_fasttrap), 0, SDT_SYS386IGT, | |
1372 | SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); | 1372 | SEL_UPL, GSEL(GCODE_SEL, SEL_KPL)); | |
1373 | 1373 | |||
1374 | setregion(®ion, gdt, NGDT * sizeof(gdt[0]) - 1); | 1374 | setregion(®ion, gdt, NGDT * sizeof(gdt[0]) - 1); | |
1375 | lgdt(®ion); | 1375 | lgdt(®ion); | |
1376 | 1376 | |||
1377 | cpu_init_idt(); | 1377 | cpu_init_idt(); | |
1378 | #else /* !XEN */ | 1378 | #else /* !XEN */ | |
1379 | memset(xen_idt, 0, sizeof(trap_info_t) * MAX_XEN_IDT); | 1379 | memset(xen_idt, 0, sizeof(trap_info_t) * MAX_XEN_IDT); | |
1380 | xen_idt_idx = 0; | 1380 | xen_idt_idx = 0; | |
1381 | for (x = 0; x < 32; x++) { | 1381 | for (x = 0; x < 32; x++) { | |
1382 | KASSERT(xen_idt_idx < MAX_XEN_IDT); | 1382 | KASSERT(xen_idt_idx < MAX_XEN_IDT); | |
1383 | xen_idt[xen_idt_idx].vector = x; | 1383 | xen_idt[xen_idt_idx].vector = x; | |
1384 | 1384 | |||
1385 | switch (x) { | 1385 | switch (x) { | |
1386 | case 2: /* NMI */ | 1386 | case 2: /* NMI */ | |
1387 | case 18: /* MCA */ | 1387 | case 18: /* MCA */ | |
1388 | TI_SET_IF(&(xen_idt[xen_idt_idx]), 2); | 1388 | TI_SET_IF(&(xen_idt[xen_idt_idx]), 2); | |
1389 | break; | 1389 | break; | |
1390 | case 3: | 1390 | case 3: | |
1391 | case 4: | 1391 | case 4: | |
1392 | xen_idt[xen_idt_idx].flags = SEL_UPL; | 1392 | xen_idt[xen_idt_idx].flags = SEL_UPL; | |
1393 | break; | 1393 | break; | |
1394 | default: | 1394 | default: | |
1395 | xen_idt[xen_idt_idx].flags = SEL_XEN; | 1395 | xen_idt[xen_idt_idx].flags = SEL_XEN; | |
1396 | break; | 1396 | break; | |
1397 | } | 1397 | } | |
1398 | 1398 | |||
1399 | xen_idt[xen_idt_idx].cs = GSEL(GCODE_SEL, SEL_KPL); | 1399 | xen_idt[xen_idt_idx].cs = GSEL(GCODE_SEL, SEL_KPL); | |
1400 | xen_idt[xen_idt_idx].address = | 1400 | xen_idt[xen_idt_idx].address = | |
1401 | (uint32_t)IDTVEC(exceptions)[x]; | 1401 | (uint32_t)IDTVEC(exceptions)[x]; | |
1402 | xen_idt_idx++; | 1402 | xen_idt_idx++; | |
1403 | } | 1403 | } | |
1404 | KASSERT(xen_idt_idx < MAX_XEN_IDT); | 1404 | KASSERT(xen_idt_idx < MAX_XEN_IDT); | |
1405 | xen_idt[xen_idt_idx].vector = 128; | 1405 | xen_idt[xen_idt_idx].vector = 128; | |
1406 | xen_idt[xen_idt_idx].flags = SEL_UPL; | 1406 | xen_idt[xen_idt_idx].flags = SEL_UPL; | |
1407 | xen_idt[xen_idt_idx].cs = GSEL(GCODE_SEL, SEL_KPL); | 1407 | xen_idt[xen_idt_idx].cs = GSEL(GCODE_SEL, SEL_KPL); | |
1408 | xen_idt[xen_idt_idx].address = (uint32_t)&IDTVEC(syscall); | 1408 | xen_idt[xen_idt_idx].address = (uint32_t)&IDTVEC(syscall); | |
1409 | xen_idt_idx++; | 1409 | xen_idt_idx++; | |
1410 | KASSERT(xen_idt_idx < MAX_XEN_IDT); | 1410 | KASSERT(xen_idt_idx < MAX_XEN_IDT); | |
1411 | xen_idt[xen_idt_idx].vector = 0xd2; | 1411 | xen_idt[xen_idt_idx].vector = 0xd2; | |
1412 | xen_idt[xen_idt_idx].flags = SEL_UPL; | 1412 | xen_idt[xen_idt_idx].flags = SEL_UPL; | |
1413 | xen_idt[xen_idt_idx].cs = GSEL(GCODE_SEL, SEL_KPL); | 1413 | xen_idt[xen_idt_idx].cs = GSEL(GCODE_SEL, SEL_KPL); | |
1414 | xen_idt[xen_idt_idx].address = (uint32_t)&IDTVEC(svr4_fasttrap); | 1414 | xen_idt[xen_idt_idx].address = (uint32_t)&IDTVEC(svr4_fasttrap); | |
1415 | xen_idt_idx++; | 1415 | xen_idt_idx++; | |
1416 | lldt(GSEL(GLDT_SEL, SEL_KPL)); | 1416 | lldt(GSEL(GLDT_SEL, SEL_KPL)); | |
1417 | cpu_init_idt(); | 1417 | cpu_init_idt(); | |
1418 | #endif /* XEN */ | 1418 | #endif /* XEN */ | |
1419 | 1419 | |||
1420 | init386_ksyms(); | 1420 | init386_ksyms(); | |
1421 | 1421 | |||
1422 | #if NMCA > 0 | 1422 | #if NMCA > 0 | |
1423 | /* check for MCA bus, needed to be done before ISA stuff - if | 1423 | /* check for MCA bus, needed to be done before ISA stuff - if | |
1424 | * MCA is detected, ISA needs to use level triggered interrupts | 1424 | * MCA is detected, ISA needs to use level triggered interrupts | |
1425 | * by default */ | 1425 | * by default */ | |
1426 | mca_busprobe(); | 1426 | mca_busprobe(); | |
1427 | #endif | 1427 | #endif | |
1428 | 1428 | |||
1429 | #ifdef XEN | 1429 | #ifdef XEN | |
1430 | XENPRINTF(("events_default_setup\n")); | 1430 | XENPRINTF(("events_default_setup\n")); | |
1431 | events_default_setup(); | 1431 | events_default_setup(); | |
1432 | #else | 1432 | #else | |
1433 | intr_default_setup(); | 1433 | intr_default_setup(); | |
1434 | #endif | 1434 | #endif | |
1435 | 1435 | |||
1436 | splraise(IPL_HIGH); | 1436 | splraise(IPL_HIGH); | |
1437 | x86_enable_intr(); | 1437 | x86_enable_intr(); | |
1438 | 1438 | |||
1439 | #ifdef DDB | 1439 | #ifdef DDB | |
1440 | if (boothowto & RB_KDB) | 1440 | if (boothowto & RB_KDB) | |
1441 | Debugger(); | 1441 | Debugger(); | |
1442 | #endif | 1442 | #endif | |
1443 | #ifdef IPKDB | 1443 | #ifdef IPKDB | |
1444 | ipkdb_init(); | 1444 | ipkdb_init(); | |
1445 | if (boothowto & RB_KDB) | 1445 | if (boothowto & RB_KDB) | |
1446 | ipkdb_connect(0); | 1446 | ipkdb_connect(0); | |
1447 | #endif | 1447 | #endif | |
1448 | #ifdef KGDB | 1448 | #ifdef KGDB | |
1449 | kgdb_port_init(); | 1449 | kgdb_port_init(); | |
1450 | if (boothowto & RB_KDB) { | 1450 | if (boothowto & RB_KDB) { | |
1451 | kgdb_debug_init = 1; | 1451 | kgdb_debug_init = 1; | |
1452 | kgdb_connect(1); | 1452 | kgdb_connect(1); | |
1453 | } | 1453 | } | |
1454 | #endif | 1454 | #endif | |
1455 | 1455 | |||
1456 | if (physmem < btoc(2 * 1024 * 1024)) { | 1456 | if (physmem < btoc(2 * 1024 * 1024)) { | |
1457 | printf("warning: too little memory available; " | 1457 | printf("warning: too little memory available; " | |
1458 | "have %lu bytes, want %lu bytes\n" | 1458 | "have %lu bytes, want %lu bytes\n" | |
1459 | "running in degraded mode\n" | 1459 | "running in degraded mode\n" | |
1460 | "press a key to confirm\n\n", | 1460 | "press a key to confirm\n\n", | |
1461 | (unsigned long)ptoa(physmem), 2*1024*1024UL); | 1461 | (unsigned long)ptoa(physmem), 2*1024*1024UL); | |
1462 | cngetc(); | 1462 | cngetc(); | |
1463 | } | 1463 | } | |
1464 | 1464 | |||
1465 | rw_init(&svr4_fasttrap_lock); | 1465 | rw_init(&svr4_fasttrap_lock); | |
1466 | } | 1466 | } | |
1467 | 1467 | |||
1468 | #include <dev/ic/mc146818reg.h> /* for NVRAM POST */ | 1468 | #include <dev/ic/mc146818reg.h> /* for NVRAM POST */ | |
1469 | #include <i386/isa/nvram.h> /* for NVRAM POST */ | 1469 | #include <i386/isa/nvram.h> /* for NVRAM POST */ | |
1470 | 1470 | |||
1471 | void | 1471 | void | |
1472 | cpu_reset(void) | 1472 | cpu_reset(void) | |
1473 | { | 1473 | { | |
1474 | #ifdef XEN | 1474 | #ifdef XEN | |
1475 | HYPERVISOR_reboot(); | 1475 | HYPERVISOR_reboot(); | |
1476 | for (;;); | 1476 | for (;;); | |
1477 | #else /* XEN */ | 1477 | #else /* XEN */ | |
1478 | struct region_descriptor region; | 1478 | struct region_descriptor region; | |
1479 | 1479 | |||
1480 | x86_disable_intr(); | 1480 | x86_disable_intr(); | |
1481 | 1481 | |||
1482 | /* | 1482 | /* | |
1483 | * Ensure the NVRAM reset byte contains something vaguely sane. | 1483 | * Ensure the NVRAM reset byte contains something vaguely sane. | |
1484 | */ | 1484 | */ | |
1485 | 1485 | |||
1486 | outb(IO_RTC, NVRAM_RESET); | 1486 | outb(IO_RTC, NVRAM_RESET); | |
1487 | outb(IO_RTC+1, NVRAM_RESET_RST); | 1487 | outb(IO_RTC+1, NVRAM_RESET_RST); | |
1488 | 1488 | |||
1489 | /* | 1489 | /* | |
1490 | * Reset AMD Geode SC1100. | 1490 | * Reset AMD Geode SC1100. | |
1491 | * | 1491 | * | |
1492 | * 1) Write PCI Configuration Address Register (0xcf8) to | 1492 | * 1) Write PCI Configuration Address Register (0xcf8) to | |
1493 | * select Function 0, Register 0x44: Bridge Configuration, | 1493 | * select Function 0, Register 0x44: Bridge Configuration, | |
1494 | * GPIO and LPC Configuration Register Space, Reset | 1494 | * GPIO and LPC Configuration Register Space, Reset | |
1495 | * Control Register. | 1495 | * Control Register. | |
1496 | * | 1496 | * | |
1497 | * 2) Write 0xf to PCI Configuration Data Register (0xcfc) | 1497 | * 2) Write 0xf to PCI Configuration Data Register (0xcfc) | |
1498 | * to reset IDE controller, IDE bus, and PCI bus, and | 1498 | * to reset IDE controller, IDE bus, and PCI bus, and | |
1499 | * to trigger a system-wide reset. | 1499 | * to trigger a system-wide reset. | |
1500 | * | 1500 | * | |
1501 | * See AMD Geode SC1100 Processor Data Book, Revision 2.0, | 1501 | * See AMD Geode SC1100 Processor Data Book, Revision 2.0, | |
1502 | * sections 6.3.1, 6.3.2, and 6.4.1. | 1502 | * sections 6.3.1, 6.3.2, and 6.4.1. | |
1503 | */ | 1503 | */ | |
1504 | if (cpu_info_primary.ci_signature == 0x540) { | 1504 | if (cpu_info_primary.ci_signature == 0x540) { | |
1505 | outl(0xcf8, 0x80009044); | 1505 | outl(0xcf8, 0x80009044); | |
1506 | outl(0xcfc, 0xf); | 1506 | outl(0xcfc, 0xf); | |
1507 | } | 1507 | } | |
1508 | 1508 | |||
1509 | x86_reset(); | 1509 | x86_reset(); | |
1510 | 1510 | |||
1511 | /* | 1511 | /* | |
1512 | * Try to cause a triple fault and watchdog reset by making the IDT | 1512 | * Try to cause a triple fault and watchdog reset by making the IDT | |
1513 | * invalid and causing a fault. | 1513 | * invalid and causing a fault. | |
1514 | */ | 1514 | */ | |
1515 | memset((void *)idt, 0, NIDT * sizeof(idt[0])); | 1515 | memset((void *)idt, 0, NIDT * sizeof(idt[0])); | |
1516 | setregion(®ion, idt, NIDT * sizeof(idt[0]) - 1); | 1516 | setregion(®ion, idt, NIDT * sizeof(idt[0]) - 1); | |
1517 | lidt(®ion); | 1517 | lidt(®ion); | |
1518 | breakpoint(); | 1518 | breakpoint(); | |
1519 | 1519 | |||
1520 | #if 0 | 1520 | #if 0 | |
1521 | /* | 1521 | /* | |
1522 | * Try to cause a triple fault and watchdog reset by unmapping the | 1522 | * Try to cause a triple fault and watchdog reset by unmapping the | |
1523 | * entire address space and doing a TLB flush. | 1523 | * entire address space and doing a TLB flush. | |
1524 | */ | 1524 | */ | |
1525 | memset((void *)PTD, 0, PAGE_SIZE); | 1525 | memset((void *)PTD, 0, PAGE_SIZE); | |
1526 | tlbflush(); | 1526 | tlbflush(); | |
1527 | #endif | 1527 | #endif | |
1528 | 1528 | |||
1529 | for (;;); | 1529 | for (;;); | |
1530 | #endif /* XEN */ | 1530 | #endif /* XEN */ | |
1531 | } | 1531 | } | |
1532 | 1532 | |||
1533 | void | 1533 | void | |
1534 | cpu_getmcontext(struct lwp *l, mcontext_t *mcp, unsigned int *flags) | 1534 | cpu_getmcontext(struct lwp *l, mcontext_t *mcp, unsigned int *flags) | |
1535 | { | 1535 | { | |
1536 | const struct trapframe *tf = l->l_md.md_regs; | 1536 | const struct trapframe *tf = l->l_md.md_regs; | |
1537 | __greg_t *gr = mcp->__gregs; | 1537 | __greg_t *gr = mcp->__gregs; | |
1538 | __greg_t ras_eip; | 1538 | __greg_t ras_eip; | |
1539 | 1539 | |||
1540 | /* Save register context. */ | 1540 | /* Save register context. */ | |
1541 | #ifdef VM86 | 1541 | #ifdef VM86 | |
1542 | if (tf->tf_eflags & PSL_VM) { | 1542 | if (tf->tf_eflags & PSL_VM) { | |
1543 | gr[_REG_GS] = tf->tf_vm86_gs; | 1543 | gr[_REG_GS] = tf->tf_vm86_gs; | |
1544 | gr[_REG_FS] = tf->tf_vm86_fs; | 1544 | gr[_REG_FS] = tf->tf_vm86_fs; | |
1545 | gr[_REG_ES] = tf->tf_vm86_es; | 1545 | gr[_REG_ES] = tf->tf_vm86_es; | |
1546 | gr[_REG_DS] = tf->tf_vm86_ds; | 1546 | gr[_REG_DS] = tf->tf_vm86_ds; | |
1547 | gr[_REG_EFL] = get_vflags(l); | 1547 | gr[_REG_EFL] = get_vflags(l); | |
1548 | } else | 1548 | } else | |
1549 | #endif | 1549 | #endif | |
1550 | { | 1550 | { | |
1551 | gr[_REG_GS] = tf->tf_gs; | 1551 | gr[_REG_GS] = tf->tf_gs; | |
1552 | gr[_REG_FS] = tf->tf_fs; | 1552 | gr[_REG_FS] = tf->tf_fs; | |
1553 | gr[_REG_ES] = tf->tf_es; | 1553 | gr[_REG_ES] = tf->tf_es; | |
1554 | gr[_REG_DS] = tf->tf_ds; | 1554 | gr[_REG_DS] = tf->tf_ds; | |
1555 | gr[_REG_EFL] = tf->tf_eflags; | 1555 | gr[_REG_EFL] = tf->tf_eflags; | |
1556 | } | 1556 | } | |
1557 | gr[_REG_EDI] = tf->tf_edi; | 1557 | gr[_REG_EDI] = tf->tf_edi; | |
1558 | gr[_REG_ESI] = tf->tf_esi; | 1558 | gr[_REG_ESI] = tf->tf_esi; | |
1559 | gr[_REG_EBP] = tf->tf_ebp; | 1559 | gr[_REG_EBP] = tf->tf_ebp; | |
1560 | gr[_REG_EBX] = tf->tf_ebx; | 1560 | gr[_REG_EBX] = tf->tf_ebx; | |
1561 | gr[_REG_EDX] = tf->tf_edx; | 1561 | gr[_REG_EDX] = tf->tf_edx; | |
1562 | gr[_REG_ECX] = tf->tf_ecx; | 1562 | gr[_REG_ECX] = tf->tf_ecx; | |
1563 | gr[_REG_EAX] = tf->tf_eax; | 1563 | gr[_REG_EAX] = tf->tf_eax; | |
1564 | gr[_REG_EIP] = tf->tf_eip; | 1564 | gr[_REG_EIP] = tf->tf_eip; | |
1565 | gr[_REG_CS] = tf->tf_cs; | 1565 | gr[_REG_CS] = tf->tf_cs; | |
1566 | gr[_REG_ESP] = tf->tf_esp; | 1566 | gr[_REG_ESP] = tf->tf_esp; | |
1567 | gr[_REG_UESP] = tf->tf_esp; | 1567 | gr[_REG_UESP] = tf->tf_esp; | |
1568 | gr[_REG_SS] = tf->tf_ss; | 1568 | gr[_REG_SS] = tf->tf_ss; | |
1569 | gr[_REG_TRAPNO] = tf->tf_trapno; | 1569 | gr[_REG_TRAPNO] = tf->tf_trapno; | |
1570 | gr[_REG_ERR] = tf->tf_err; | 1570 | gr[_REG_ERR] = tf->tf_err; | |
1571 | 1571 | |||
1572 | if ((ras_eip = (__greg_t)ras_lookup(l->l_proc, | 1572 | if ((ras_eip = (__greg_t)ras_lookup(l->l_proc, | |
1573 | (void *) gr[_REG_EIP])) != -1) | 1573 | (void *) gr[_REG_EIP])) != -1) | |
1574 | gr[_REG_EIP] = ras_eip; | 1574 | gr[_REG_EIP] = ras_eip; | |
1575 | 1575 | |||
1576 | *flags |= _UC_CPU; | 1576 | *flags |= _UC_CPU; | |
1577 | 1577 | |||
1578 | mcp->_mc_tlsbase = (uintptr_t)l->l_private; | 1578 | mcp->_mc_tlsbase = (uintptr_t)l->l_private; | |
1579 | *flags |= _UC_TLSBASE; | 1579 | *flags |= _UC_TLSBASE; | |
1580 | 1580 | |||
1581 | /* | 1581 | /* | |
1582 | * Save floating point register context. | 1582 | * Save floating point register context. | |
1583 | * | 1583 | * | |
1584 | * If the cpu doesn't support fxsave we must still write to | 1584 | * If the cpu doesn't support fxsave we must still write to | |
1585 | * the entire 512 byte area - otherwise we leak kernel memory | 1585 | * the entire 512 byte area - otherwise we leak kernel memory | |
1586 | * contents to userspace. | 1586 | * contents to userspace. | |
1587 | * It wouldn't matter if we were doing the copyout here. | 1587 | * It wouldn't matter if we were doing the copyout here. | |
1588 | * So we might as well convert to fxsave format. | 1588 | * So we might as well convert to fxsave format. | |
1589 | */ | 1589 | */ | |
1590 | __CTASSERT(sizeof (struct fxsave) == | 1590 | __CTASSERT(sizeof (struct fxsave) == | |
1591 | sizeof mcp->__fpregs.__fp_reg_set.__fp_xmm_state); | 1591 | sizeof mcp->__fpregs.__fp_reg_set.__fp_xmm_state); | |
1592 | process_read_fpregs_xmm(l, (struct fxsave *) | 1592 | process_read_fpregs_xmm(l, (struct fxsave *) | |
1593 | &mcp->__fpregs.__fp_reg_set.__fp_xmm_state); | 1593 | &mcp->__fpregs.__fp_reg_set.__fp_xmm_state); | |
1594 | memset(&mcp->__fpregs.__fp_pad, 0, sizeof mcp->__fpregs.__fp_pad); | 1594 | memset(&mcp->__fpregs.__fp_pad, 0, sizeof mcp->__fpregs.__fp_pad); | |
1595 | *flags |= _UC_FXSAVE | _UC_FPU; | 1595 | *flags |= _UC_FXSAVE | _UC_FPU; | |
1596 | } | 1596 | } | |
1597 | 1597 | |||
1598 | int | 1598 | int | |
1599 | cpu_mcontext_validate(struct lwp *l, const mcontext_t *mcp) | 1599 | cpu_mcontext_validate(struct lwp *l, const mcontext_t *mcp) | |
1600 | { | 1600 | { | |
1601 | const __greg_t *gr = mcp->__gregs; | 1601 | const __greg_t *gr = mcp->__gregs; | |
1602 | struct trapframe *tf = l->l_md.md_regs; | 1602 | struct trapframe *tf = l->l_md.md_regs; | |
1603 | 1603 | |||
1604 | /* | 1604 | /* | |
1605 | * Check for security violations. If we're returning | 1605 | * Check for security violations. If we're returning | |
1606 | * to protected mode, the CPU will validate the segment | 1606 | * to protected mode, the CPU will validate the segment | |
1607 | * registers automatically and generate a trap on | 1607 | * registers automatically and generate a trap on | |
1608 | * violations. We handle the trap, rather than doing | 1608 | * violations. We handle the trap, rather than doing | |
1609 | * all of the checking here. | 1609 | * all of the checking here. | |
1610 | */ | 1610 | */ | |
1611 | if (((gr[_REG_EFL] ^ tf->tf_eflags) & PSL_USERSTATIC) || | 1611 | if (((gr[_REG_EFL] ^ tf->tf_eflags) & PSL_USERSTATIC) || | |
1612 | !USERMODE(gr[_REG_CS], gr[_REG_EFL])) | 1612 | !USERMODE(gr[_REG_CS], gr[_REG_EFL])) | |
1613 | return EINVAL; | 1613 | return EINVAL; | |
1614 | 1614 | |||
1615 | return 0; | 1615 | return 0; | |
1616 | } | 1616 | } | |
1617 | 1617 | |||
1618 | int | 1618 | int | |
1619 | cpu_setmcontext(struct lwp *l, const mcontext_t *mcp, unsigned int flags) | 1619 | cpu_setmcontext(struct lwp *l, const mcontext_t *mcp, unsigned int flags) | |
1620 | { | 1620 | { | |
1621 | struct trapframe *tf = l->l_md.md_regs; | 1621 | struct trapframe *tf = l->l_md.md_regs; | |
1622 | const __greg_t *gr = mcp->__gregs; | 1622 | const __greg_t *gr = mcp->__gregs; | |
1623 | struct proc *p = l->l_proc; | 1623 | struct proc *p = l->l_proc; | |
1624 | int error; | 1624 | int error; | |
1625 | 1625 | |||
1626 | /* Restore register context, if any. */ | 1626 | /* Restore register context, if any. */ | |
1627 | if ((flags & _UC_CPU) != 0) { | 1627 | if ((flags & _UC_CPU) != 0) { | |
1628 | #ifdef VM86 | 1628 | #ifdef VM86 | |
1629 | if (gr[_REG_EFL] & PSL_VM) { | 1629 | if (gr[_REG_EFL] & PSL_VM) { | |
1630 | tf->tf_vm86_gs = gr[_REG_GS]; | 1630 | tf->tf_vm86_gs = gr[_REG_GS]; | |
1631 | tf->tf_vm86_fs = gr[_REG_FS]; | 1631 | tf->tf_vm86_fs = gr[_REG_FS]; | |
1632 | tf->tf_vm86_es = gr[_REG_ES]; | 1632 | tf->tf_vm86_es = gr[_REG_ES]; | |
1633 | tf->tf_vm86_ds = gr[_REG_DS]; | 1633 | tf->tf_vm86_ds = gr[_REG_DS]; | |
1634 | set_vflags(l, gr[_REG_EFL]); | 1634 | set_vflags(l, gr[_REG_EFL]); | |
1635 | if (flags & _UC_VM) { | 1635 | if (flags & _UC_VM) { | |
1636 | void syscall_vm86(struct trapframe *); | 1636 | void syscall_vm86(struct trapframe *); | |
1637 | l->l_proc->p_md.md_syscall = syscall_vm86; | 1637 | l->l_proc->p_md.md_syscall = syscall_vm86; | |
1638 | } | 1638 | } | |
1639 | } else | 1639 | } else | |
1640 | #endif | 1640 | #endif | |
1641 | { | 1641 | { | |
1642 | error = cpu_mcontext_validate(l, mcp); | 1642 | error = cpu_mcontext_validate(l, mcp); | |
1643 | if (error) | 1643 | if (error) | |
1644 | return error; | 1644 | return error; | |
1645 | 1645 | |||
1646 | tf->tf_gs = gr[_REG_GS]; | 1646 | tf->tf_gs = gr[_REG_GS]; | |
1647 | tf->tf_fs = gr[_REG_FS]; | 1647 | tf->tf_fs = gr[_REG_FS]; | |
1648 | tf->tf_es = gr[_REG_ES]; | 1648 | tf->tf_es = gr[_REG_ES]; | |
1649 | tf->tf_ds = gr[_REG_DS]; | 1649 | tf->tf_ds = gr[_REG_DS]; | |
1650 | /* Only change the user-alterable part of eflags */ | 1650 | /* Only change the user-alterable part of eflags */ | |
1651 | tf->tf_eflags &= ~PSL_USER; | 1651 | tf->tf_eflags &= ~PSL_USER; | |
1652 | tf->tf_eflags |= (gr[_REG_EFL] & PSL_USER); | 1652 | tf->tf_eflags |= (gr[_REG_EFL] & PSL_USER); | |
1653 | } | 1653 | } | |
1654 | tf->tf_edi = gr[_REG_EDI]; | 1654 | tf->tf_edi = gr[_REG_EDI]; | |
1655 | tf->tf_esi = gr[_REG_ESI]; | 1655 | tf->tf_esi = gr[_REG_ESI]; | |
1656 | tf->tf_ebp = gr[_REG_EBP]; | 1656 | tf->tf_ebp = gr[_REG_EBP]; | |
1657 | tf->tf_ebx = gr[_REG_EBX]; | 1657 | tf->tf_ebx = gr[_REG_EBX]; | |
1658 | tf->tf_edx = gr[_REG_EDX]; | 1658 | tf->tf_edx = gr[_REG_EDX]; | |
1659 | tf->tf_ecx = gr[_REG_ECX]; | 1659 | tf->tf_ecx = gr[_REG_ECX]; | |
1660 | tf->tf_eax = gr[_REG_EAX]; | 1660 | tf->tf_eax = gr[_REG_EAX]; | |
1661 | tf->tf_eip = gr[_REG_EIP]; | 1661 | tf->tf_eip = gr[_REG_EIP]; | |
1662 | tf->tf_cs = gr[_REG_CS]; | 1662 | tf->tf_cs = gr[_REG_CS]; | |
1663 | tf->tf_esp = gr[_REG_UESP]; | 1663 | tf->tf_esp = gr[_REG_UESP]; | |
1664 | tf->tf_ss = gr[_REG_SS]; | 1664 | tf->tf_ss = gr[_REG_SS]; | |
1665 | } | 1665 | } | |
1666 | 1666 | |||
1667 | if ((flags & _UC_TLSBASE) != 0) | 1667 | if ((flags & _UC_TLSBASE) != 0) | |
1668 | lwp_setprivate(l, (void *)(uintptr_t)mcp->_mc_tlsbase); | 1668 | lwp_setprivate(l, (void *)(uintptr_t)mcp->_mc_tlsbase); | |
1669 | 1669 | |||
1670 | /* Restore floating point register context, if given. */ | 1670 | /* Restore floating point register context, if given. */ | |
1671 | if ((flags & _UC_FPU) != 0) { | 1671 | if ((flags & _UC_FPU) != 0) { | |
1672 | __CTASSERT(sizeof (struct fxsave) == | 1672 | __CTASSERT(sizeof (struct fxsave) == | |
1673 | sizeof mcp->__fpregs.__fp_reg_set.__fp_xmm_state); | 1673 | sizeof mcp->__fpregs.__fp_reg_set.__fp_xmm_state); | |
1674 | __CTASSERT(sizeof (struct save87) == | 1674 | __CTASSERT(sizeof (struct save87) == | |
1675 | sizeof mcp->__fpregs.__fp_reg_set.__fpchip_state); | 1675 | sizeof mcp->__fpregs.__fp_reg_set.__fpchip_state); | |
1676 | 1676 | |||
1677 | if (flags & _UC_FXSAVE) { | 1677 | if (flags & _UC_FXSAVE) { | |
1678 | process_write_fpregs_xmm(l, (const struct fxsave *) | 1678 | process_write_fpregs_xmm(l, (const struct fxsave *) | |
1679 | &mcp->__fpregs.__fp_reg_set.__fp_xmm_state); | 1679 | &mcp->__fpregs.__fp_reg_set.__fp_xmm_state); | |
1680 | } else { | 1680 | } else { | |
1681 | process_write_fpregs_s87(l, (const struct save87 *) | 1681 | process_write_fpregs_s87(l, (const struct save87 *) | |
1682 | &mcp->__fpregs.__fp_reg_set.__fpchip_state); | 1682 | &mcp->__fpregs.__fp_reg_set.__fpchip_state); | |
1683 | } | 1683 | } | |
1684 | } | 1684 | } | |
1685 | 1685 | |||
1686 | mutex_enter(p->p_lock); | 1686 | mutex_enter(p->p_lock); | |
1687 | if (flags & _UC_SETSTACK) | 1687 | if (flags & _UC_SETSTACK) | |
1688 | l->l_sigstk.ss_flags |= SS_ONSTACK; | 1688 | l->l_sigstk.ss_flags |= SS_ONSTACK; | |
1689 | if (flags & _UC_CLRSTACK) | 1689 | if (flags & _UC_CLRSTACK) | |
1690 | l->l_sigstk.ss_flags &= ~SS_ONSTACK; | 1690 | l->l_sigstk.ss_flags &= ~SS_ONSTACK; | |
1691 | mutex_exit(p->p_lock); | 1691 | mutex_exit(p->p_lock); | |
1692 | return (0); | 1692 | return (0); | |
1693 | } | 1693 | } | |
1694 | 1694 | |||
1695 | void | 1695 | void | |
1696 | cpu_initclocks(void) | 1696 | cpu_initclocks(void) | |
1697 | { | 1697 | { | |
1698 | 1698 | |||
1699 | (*initclock_func)(); | 1699 | (*initclock_func)(); | |
1700 | } | 1700 | } | |
1701 | 1701 | |||
1702 | #define DEV_IO 14 /* iopl for compat_10 */ | 1702 | #define DEV_IO 14 /* iopl for compat_10 */ | |
1703 | 1703 | |||
1704 | int | 1704 | int | |
1705 | mm_md_open(dev_t dev, int flag, int mode, struct lwp *l) | 1705 | mm_md_open(dev_t dev, int flag, int mode, struct lwp *l) | |
1706 | { | 1706 | { | |
1707 | 1707 | |||
1708 | switch (minor(dev)) { | 1708 | switch (minor(dev)) { | |
1709 | case DEV_IO: | 1709 | case DEV_IO: | |
1710 | /* | 1710 | /* | |
1711 | * This is done by i386_iopl(3) now. | 1711 | * This is done by i386_iopl(3) now. | |
1712 | * | 1712 | * | |
1713 | * #if defined(COMPAT_10) || defined(COMPAT_FREEBSD) | 1713 | * #if defined(COMPAT_10) || defined(COMPAT_FREEBSD) | |
1714 | */ | 1714 | */ | |
1715 | if (flag & FWRITE) { | 1715 | if (flag & FWRITE) { | |
1716 | struct trapframe *fp; | 1716 | struct trapframe *fp; | |
1717 | int error; | 1717 | int error; | |
1718 | 1718 | |||
1719 | error = kauth_authorize_machdep(l->l_cred, | 1719 | error = kauth_authorize_machdep(l->l_cred, | |
1720 | KAUTH_MACHDEP_IOPL, NULL, NULL, NULL, NULL); | 1720 | KAUTH_MACHDEP_IOPL, NULL, NULL, NULL, NULL); | |
1721 | if (error) | 1721 | if (error) | |
1722 | return (error); | 1722 | return (error); | |
1723 | fp = curlwp->l_md.md_regs; | 1723 | fp = curlwp->l_md.md_regs; | |
1724 | fp->tf_eflags |= PSL_IOPL; | 1724 | fp->tf_eflags |= PSL_IOPL; | |
1725 | } | 1725 | } | |
1726 | break; | 1726 | break; | |
1727 | default: | 1727 | default: | |
1728 | break; | 1728 | break; | |
1729 | } | 1729 | } | |
1730 | return 0; | 1730 | return 0; | |
1731 | } | 1731 | } | |
1732 | 1732 | |||
1733 | #ifdef PAE | 1733 | #ifdef PAE | |
1734 | void | 1734 | void | |
1735 | cpu_alloc_l3_page(struct cpu_info *ci) | 1735 | cpu_alloc_l3_page(struct cpu_info *ci) | |
1736 | { | 1736 | { | |
1737 | int ret; | 1737 | int ret; | |
1738 | struct pglist pg; | 1738 | struct pglist pg; | |
1739 | struct vm_page *vmap; | 1739 | struct vm_page *vmap; | |
1740 | 1740 | |||
1741 | KASSERT(ci != NULL); | 1741 | KASSERT(ci != NULL); | |
1742 | /* | 1742 | /* | |
1743 | * Allocate a page for the per-CPU L3 PD. cr3 being 32 bits, PA musts | 1743 | * Allocate a page for the per-CPU L3 PD. cr3 being 32 bits, PA musts | |
1744 | * resides below the 4GB boundary. | 1744 | * resides below the 4GB boundary. | |
1745 | */ | 1745 | */ | |
1746 | ret = uvm_pglistalloc(PAGE_SIZE, 0, 0x100000000ULL, 32, 0, &pg, 1, 0); | 1746 | ret = uvm_pglistalloc(PAGE_SIZE, 0, 0x100000000ULL, 32, 0, &pg, 1, 0); | |
1747 | vmap = TAILQ_FIRST(&pg); | 1747 | vmap = TAILQ_FIRST(&pg); | |
1748 | 1748 | |||
1749 | if (ret != 0 || vmap == NULL) | 1749 | if (ret != 0 || vmap == NULL) | |
1750 | panic("%s: failed to allocate L3 pglist for CPU %d (ret %d)\n", | 1750 | panic("%s: failed to allocate L3 pglist for CPU %d (ret %d)\n", | |
1751 | __func__, cpu_index(ci), ret); | 1751 | __func__, cpu_index(ci), ret); | |
1752 | 1752 | |||
1753 | ci->ci_pae_l3_pdirpa = vmap->phys_addr; | 1753 | ci->ci_pae_l3_pdirpa = vmap->phys_addr; | |
1754 | 1754 | |||
1755 | ci->ci_pae_l3_pdir = (paddr_t *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0, | 1755 | ci->ci_pae_l3_pdir = (paddr_t *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0, | |
1756 | UVM_KMF_VAONLY | UVM_KMF_NOWAIT); | 1756 | UVM_KMF_VAONLY | UVM_KMF_NOWAIT); | |
1757 | if (ci->ci_pae_l3_pdir == NULL) | 1757 | if (ci->ci_pae_l3_pdir == NULL) | |
1758 | panic("%s: failed to allocate L3 PD for CPU %d\n", | 1758 | panic("%s: failed to allocate L3 PD for CPU %d\n", | |
1759 | __func__, cpu_index(ci)); | 1759 | __func__, cpu_index(ci)); | |
1760 | 1760 | |||
1761 | pmap_kenter_pa((vaddr_t)ci->ci_pae_l3_pdir, ci->ci_pae_l3_pdirpa, | 1761 | pmap_kenter_pa((vaddr_t)ci->ci_pae_l3_pdir, ci->ci_pae_l3_pdirpa, | |
1762 | VM_PROT_READ | VM_PROT_WRITE, 0); | 1762 | VM_PROT_READ | VM_PROT_WRITE, 0); | |
1763 | 1763 | |||
1764 | pmap_update(pmap_kernel()); | 1764 | pmap_update(pmap_kernel()); | |
1765 | } | 1765 | } | |
1766 | #endif /* PAE */ | 1766 | #endif /* PAE */ |
--- src/sys/arch/x86/x86/pmap.c 2016/05/14 09:37:21 1.194
+++ src/sys/arch/x86/x86/pmap.c 2016/05/15 10:35:54 1.195
@@ -1,1173 +1,1173 @@ | @@ -1,1173 +1,1173 @@ | |||
1 | /* $NetBSD: pmap.c,v 1.194 2016/05/14 09:37:21 maxv Exp $ */ | 1 | /* $NetBSD: pmap.c,v 1.195 2016/05/15 10:35:54 maxv Exp $ */ | |
2 | 2 | |||
3 | /*- | 3 | /*- | |
4 | * Copyright (c) 2008, 2010 The NetBSD Foundation, Inc. | 4 | * Copyright (c) 2008, 2010 The NetBSD Foundation, Inc. | |
5 | * All rights reserved. | 5 | * All rights reserved. | |
6 | * | 6 | * | |
7 | * This code is derived from software contributed to The NetBSD Foundation | 7 | * This code is derived from software contributed to The NetBSD Foundation | |
8 | * by Andrew Doran. | 8 | * by Andrew Doran. | |
9 | * | 9 | * | |
10 | * Redistribution and use in source and binary forms, with or without | 10 | * Redistribution and use in source and binary forms, with or without | |
11 | * modification, are permitted provided that the following conditions | 11 | * modification, are permitted provided that the following conditions | |
12 | * are met: | 12 | * are met: | |
13 | * 1. Redistributions of source code must retain the above copyright | 13 | * 1. Redistributions of source code must retain the above copyright | |
14 | * notice, this list of conditions and the following disclaimer. | 14 | * notice, this list of conditions and the following disclaimer. | |
15 | * 2. Redistributions in binary form must reproduce the above copyright | 15 | * 2. Redistributions in binary form must reproduce the above copyright | |
16 | * notice, this list of conditions and the following disclaimer in the | 16 | * notice, this list of conditions and the following disclaimer in the | |
17 | * documentation and/or other materials provided with the distribution. | 17 | * documentation and/or other materials provided with the distribution. | |
18 | * | 18 | * | |
19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | 19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | |
20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | 20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | |
21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | 21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | 22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | |
23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | 23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | 24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | 25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | 26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | 27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | 28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
29 | * POSSIBILITY OF SUCH DAMAGE. | 29 | * POSSIBILITY OF SUCH DAMAGE. | |
30 | */ | 30 | */ | |
31 | 31 | |||
32 | /* | 32 | /* | |
33 | * Copyright (c) 2007 Manuel Bouyer. | 33 | * Copyright (c) 2007 Manuel Bouyer. | |
34 | * | 34 | * | |
35 | * Redistribution and use in source and binary forms, with or without | 35 | * Redistribution and use in source and binary forms, with or without | |
36 | * modification, are permitted provided that the following conditions | 36 | * modification, are permitted provided that the following conditions | |
37 | * are met: | 37 | * are met: | |
38 | * 1. Redistributions of source code must retain the above copyright | 38 | * 1. Redistributions of source code must retain the above copyright | |
39 | * notice, this list of conditions and the following disclaimer. | 39 | * notice, this list of conditions and the following disclaimer. | |
40 | * 2. Redistributions in binary form must reproduce the above copyright | 40 | * 2. Redistributions in binary form must reproduce the above copyright | |
41 | * notice, this list of conditions and the following disclaimer in the | 41 | * notice, this list of conditions and the following disclaimer in the | |
42 | * documentation and/or other materials provided with the distribution. | 42 | * documentation and/or other materials provided with the distribution. | |
43 | * | 43 | * | |
44 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR | 44 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR | |
45 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | 45 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | |
46 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. | 46 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. | |
47 | * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, | 47 | * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, | |
48 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | 48 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
49 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 49 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
50 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 50 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
51 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 51 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
52 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF | 52 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF | |
53 | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 53 | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
54 | * | 54 | * | |
55 | */ | 55 | */ | |
56 | 56 | |||
57 | /* | 57 | /* | |
58 | * Copyright (c) 2006 Mathieu Ropert <mro@adviseo.fr> | 58 | * Copyright (c) 2006 Mathieu Ropert <mro@adviseo.fr> | |
59 | * | 59 | * | |
60 | * Permission to use, copy, modify, and distribute this software for any | 60 | * Permission to use, copy, modify, and distribute this software for any | |
61 | * purpose with or without fee is hereby granted, provided that the above | 61 | * purpose with or without fee is hereby granted, provided that the above | |
62 | * copyright notice and this permission notice appear in all copies. | 62 | * copyright notice and this permission notice appear in all copies. | |
63 | * | 63 | * | |
64 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | 64 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | |
65 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | 65 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | |
66 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | 66 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR | |
67 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | 67 | * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | |
68 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | 68 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN | |
69 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | 69 | * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF | |
70 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | 70 | * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. | |
71 | */ | 71 | */ | |
72 | 72 | |||
73 | /* | 73 | /* | |
74 | * Copyright (c) 1997 Charles D. Cranor and Washington University. | 74 | * Copyright (c) 1997 Charles D. Cranor and Washington University. | |
75 | * All rights reserved. | 75 | * All rights reserved. | |
76 | * | 76 | * | |
77 | * Redistribution and use in source and binary forms, with or without | 77 | * Redistribution and use in source and binary forms, with or without | |
78 | * modification, are permitted provided that the following conditions | 78 | * modification, are permitted provided that the following conditions | |
79 | * are met: | 79 | * are met: | |
80 | * 1. Redistributions of source code must retain the above copyright | 80 | * 1. Redistributions of source code must retain the above copyright | |
81 | * notice, this list of conditions and the following disclaimer. | 81 | * notice, this list of conditions and the following disclaimer. | |
82 | * 2. Redistributions in binary form must reproduce the above copyright | 82 | * 2. Redistributions in binary form must reproduce the above copyright | |
83 | * notice, this list of conditions and the following disclaimer in the | 83 | * notice, this list of conditions and the following disclaimer in the | |
84 | * documentation and/or other materials provided with the distribution. | 84 | * documentation and/or other materials provided with the distribution. | |
85 | * | 85 | * | |
86 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR | 86 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR | |
87 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | 87 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES | |
88 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. | 88 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. | |
89 | * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, | 89 | * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, | |
90 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | 90 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT | |
91 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 91 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
92 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 92 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
93 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 93 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
94 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF | 94 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF | |
95 | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 95 | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
96 | */ | 96 | */ | |
97 | 97 | |||
98 | /* | 98 | /* | |
99 | * Copyright 2001 (c) Wasabi Systems, Inc. | 99 | * Copyright 2001 (c) Wasabi Systems, Inc. | |
100 | * All rights reserved. | 100 | * All rights reserved. | |
101 | * | 101 | * | |
102 | * Written by Frank van der Linden for Wasabi Systems, Inc. | 102 | * Written by Frank van der Linden for Wasabi Systems, Inc. | |
103 | * | 103 | * | |
104 | * Redistribution and use in source and binary forms, with or without | 104 | * Redistribution and use in source and binary forms, with or without | |
105 | * modification, are permitted provided that the following conditions | 105 | * modification, are permitted provided that the following conditions | |
106 | * are met: | 106 | * are met: | |
107 | * 1. Redistributions of source code must retain the above copyright | 107 | * 1. Redistributions of source code must retain the above copyright | |
108 | * notice, this list of conditions and the following disclaimer. | 108 | * notice, this list of conditions and the following disclaimer. | |
109 | * 2. Redistributions in binary form must reproduce the above copyright | 109 | * 2. Redistributions in binary form must reproduce the above copyright | |
110 | * notice, this list of conditions and the following disclaimer in the | 110 | * notice, this list of conditions and the following disclaimer in the | |
111 | * documentation and/or other materials provided with the distribution. | 111 | * documentation and/or other materials provided with the distribution. | |
112 | * 3. All advertising materials mentioning features or use of this software | 112 | * 3. All advertising materials mentioning features or use of this software | |
113 | * must display the following acknowledgement: | 113 | * must display the following acknowledgement: | |
114 | * This product includes software developed for the NetBSD Project by | 114 | * This product includes software developed for the NetBSD Project by | |
115 | * Wasabi Systems, Inc. | 115 | * Wasabi Systems, Inc. | |
116 | * 4. The name of Wasabi Systems, Inc. may not be used to endorse | 116 | * 4. The name of Wasabi Systems, Inc. may not be used to endorse | |
117 | * or promote products derived from this software without specific prior | 117 | * or promote products derived from this software without specific prior | |
118 | * written permission. | 118 | * written permission. | |
119 | * | 119 | * | |
120 | * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND | 120 | * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND | |
121 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | 121 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | |
122 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | 122 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |
123 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC | 123 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC | |
124 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | 124 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
125 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | 125 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
126 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | 126 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
127 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | 127 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
128 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | 128 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
129 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | 129 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
130 | * POSSIBILITY OF SUCH DAMAGE. | 130 | * POSSIBILITY OF SUCH DAMAGE. | |
131 | */ | 131 | */ | |
132 | 132 | |||
133 | /* | 133 | /* | |
134 | * This is the i386 pmap modified and generalized to support x86-64 | 134 | * This is the i386 pmap modified and generalized to support x86-64 | |
135 | * as well. The idea is to hide the upper N levels of the page tables | 135 | * as well. The idea is to hide the upper N levels of the page tables | |
136 | * inside pmap_get_ptp, pmap_free_ptp and pmap_growkernel. The rest | 136 | * inside pmap_get_ptp, pmap_free_ptp and pmap_growkernel. The rest | |
137 | * is mostly untouched, except that it uses some more generalized | 137 | * is mostly untouched, except that it uses some more generalized | |
138 | * macros and interfaces. | 138 | * macros and interfaces. | |
139 | * | 139 | * | |
140 | * This pmap has been tested on the i386 as well, and it can be easily | 140 | * This pmap has been tested on the i386 as well, and it can be easily | |
141 | * adapted to PAE. | 141 | * adapted to PAE. | |
142 | * | 142 | * | |
143 | * fvdl@wasabisystems.com 18-Jun-2001 | 143 | * fvdl@wasabisystems.com 18-Jun-2001 | |
144 | */ | 144 | */ | |
145 | 145 | |||
146 | /* | 146 | /* | |
147 | * pmap.c: i386 pmap module rewrite | 147 | * pmap.c: i386 pmap module rewrite | |
148 | * Chuck Cranor <chuck@netbsd> | 148 | * Chuck Cranor <chuck@netbsd> | |
149 | * 11-Aug-97 | 149 | * 11-Aug-97 | |
150 | * | 150 | * | |
151 | * history of this pmap module: in addition to my own input, i used | 151 | * history of this pmap module: in addition to my own input, i used | |
152 | * the following references for this rewrite of the i386 pmap: | 152 | * the following references for this rewrite of the i386 pmap: | |
153 | * | 153 | * | |
154 | * [1] the NetBSD i386 pmap. this pmap appears to be based on the | 154 | * [1] the NetBSD i386 pmap. this pmap appears to be based on the | |
155 | * BSD hp300 pmap done by Mike Hibler at University of Utah. | 155 | * BSD hp300 pmap done by Mike Hibler at University of Utah. | |
156 | * it was then ported to the i386 by William Jolitz of UUNET | 156 | * it was then ported to the i386 by William Jolitz of UUNET | |
157 | * Technologies, Inc. Then Charles M. Hannum of the NetBSD | 157 | * Technologies, Inc. Then Charles M. Hannum of the NetBSD | |
158 | * project fixed some bugs and provided some speed ups. | 158 | * project fixed some bugs and provided some speed ups. | |
159 | * | 159 | * | |
160 | * [2] the FreeBSD i386 pmap. this pmap seems to be the | 160 | * [2] the FreeBSD i386 pmap. this pmap seems to be the | |
161 | * Hibler/Jolitz pmap, as modified for FreeBSD by John S. Dyson | 161 | * Hibler/Jolitz pmap, as modified for FreeBSD by John S. Dyson | |
162 | * and David Greenman. | 162 | * and David Greenman. | |
163 | * | 163 | * | |
164 | * [3] the Mach pmap. this pmap, from CMU, seems to have migrated | 164 | * [3] the Mach pmap. this pmap, from CMU, seems to have migrated | |
165 | * between several processors. the VAX version was done by | 165 | * between several processors. the VAX version was done by | |
166 | * Avadis Tevanian, Jr., and Michael Wayne Young. the i386 | 166 | * Avadis Tevanian, Jr., and Michael Wayne Young. the i386 | |
167 | * version was done by Lance Berc, Mike Kupfer, Bob Baron, | 167 | * version was done by Lance Berc, Mike Kupfer, Bob Baron, | |
168 | * David Golub, and Richard Draves. the alpha version was | 168 | * David Golub, and Richard Draves. the alpha version was | |
169 | * done by Alessandro Forin (CMU/Mach) and Chris Demetriou | 169 | * done by Alessandro Forin (CMU/Mach) and Chris Demetriou | |
170 | * (NetBSD/alpha). | 170 | * (NetBSD/alpha). | |
171 | */ | 171 | */ | |
172 | 172 | |||
173 | #include <sys/cdefs.h> | 173 | #include <sys/cdefs.h> | |
174 | __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.194 2016/05/14 09:37:21 maxv Exp $"); | 174 | __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.195 2016/05/15 10:35:54 maxv Exp $"); | |
175 | 175 | |||
176 | #include "opt_user_ldt.h" | 176 | #include "opt_user_ldt.h" | |
177 | #include "opt_lockdebug.h" | 177 | #include "opt_lockdebug.h" | |
178 | #include "opt_multiprocessor.h" | 178 | #include "opt_multiprocessor.h" | |
179 | #include "opt_xen.h" | 179 | #include "opt_xen.h" | |
180 | #if !defined(__x86_64__) | 180 | #if !defined(__x86_64__) | |
181 | #include "opt_kstack_dr0.h" | 181 | #include "opt_kstack_dr0.h" | |
182 | #endif /* !defined(__x86_64__) */ | 182 | #endif /* !defined(__x86_64__) */ | |
183 | 183 | |||
184 | #include <sys/param.h> | 184 | #include <sys/param.h> | |
185 | #include <sys/systm.h> | 185 | #include <sys/systm.h> | |
186 | #include <sys/proc.h> | 186 | #include <sys/proc.h> | |
187 | #include <sys/pool.h> | 187 | #include <sys/pool.h> | |
188 | #include <sys/kernel.h> | 188 | #include <sys/kernel.h> | |
189 | #include <sys/atomic.h> | 189 | #include <sys/atomic.h> | |
190 | #include <sys/cpu.h> | 190 | #include <sys/cpu.h> | |
191 | #include <sys/intr.h> | 191 | #include <sys/intr.h> | |
192 | #include <sys/xcall.h> | 192 | #include <sys/xcall.h> | |
193 | #include <sys/kcore.h> | 193 | #include <sys/kcore.h> | |
194 | 194 | |||
195 | #include <uvm/uvm.h> | 195 | #include <uvm/uvm.h> | |
196 | #include <uvm/pmap/pmap_pvt.h> | 196 | #include <uvm/pmap/pmap_pvt.h> | |
197 | 197 | |||
198 | #include <dev/isa/isareg.h> | 198 | #include <dev/isa/isareg.h> | |
199 | 199 | |||
200 | #include <machine/specialreg.h> | 200 | #include <machine/specialreg.h> | |
201 | #include <machine/gdt.h> | 201 | #include <machine/gdt.h> | |
202 | #include <machine/isa_machdep.h> | 202 | #include <machine/isa_machdep.h> | |
203 | #include <machine/cpuvar.h> | 203 | #include <machine/cpuvar.h> | |
204 | #include <machine/cputypes.h> | 204 | #include <machine/cputypes.h> | |
205 | 205 | |||
206 | #include <x86/pmap.h> | 206 | #include <x86/pmap.h> | |
207 | #include <x86/pmap_pv.h> | 207 | #include <x86/pmap_pv.h> | |
208 | 208 | |||
209 | #include <x86/i82489reg.h> | 209 | #include <x86/i82489reg.h> | |
210 | #include <x86/i82489var.h> | 210 | #include <x86/i82489var.h> | |
211 | 211 | |||
212 | #ifdef XEN | 212 | #ifdef XEN | |
213 | #include <xen/xen-public/xen.h> | 213 | #include <xen/xen-public/xen.h> | |
214 | #include <xen/hypervisor.h> | 214 | #include <xen/hypervisor.h> | |
215 | #endif | 215 | #endif | |
216 | 216 | |||
217 | /* | 217 | /* | |
218 | * general info: | 218 | * general info: | |
219 | * | 219 | * | |
220 | * - for an explanation of how the i386 MMU hardware works see | 220 | * - for an explanation of how the i386 MMU hardware works see | |
221 | * the comments in <machine/pte.h>. | 221 | * the comments in <machine/pte.h>. | |
222 | * | 222 | * | |
223 | * - for an explanation of the general memory structure used by | 223 | * - for an explanation of the general memory structure used by | |
224 | * this pmap (including the recursive mapping), see the comments | 224 | * this pmap (including the recursive mapping), see the comments | |
225 | * in <machine/pmap.h>. | 225 | * in <machine/pmap.h>. | |
226 | * | 226 | * | |
227 | * this file contains the code for the "pmap module." the module's | 227 | * this file contains the code for the "pmap module." the module's | |
228 | * job is to manage the hardware's virtual to physical address mappings. | 228 | * job is to manage the hardware's virtual to physical address mappings. | |
229 | * note that there are two levels of mapping in the VM system: | 229 | * note that there are two levels of mapping in the VM system: | |
230 | * | 230 | * | |
231 | * [1] the upper layer of the VM system uses vm_map's and vm_map_entry's | 231 | * [1] the upper layer of the VM system uses vm_map's and vm_map_entry's | |
232 | * to map ranges of virtual address space to objects/files. for | 232 | * to map ranges of virtual address space to objects/files. for | |
233 | * example, the vm_map may say: "map VA 0x1000 to 0x22000 read-only | 233 | * example, the vm_map may say: "map VA 0x1000 to 0x22000 read-only | |
234 | * to the file /bin/ls starting at offset zero." note that | 234 | * to the file /bin/ls starting at offset zero." note that | |
235 | * the upper layer mapping is not concerned with how individual | 235 | * the upper layer mapping is not concerned with how individual | |
236 | * vm_pages are mapped. | 236 | * vm_pages are mapped. | |
237 | * | 237 | * | |
238 | * [2] the lower layer of the VM system (the pmap) maintains the mappings | 238 | * [2] the lower layer of the VM system (the pmap) maintains the mappings | |
239 | * from virtual addresses. it is concerned with which vm_page is | 239 | * from virtual addresses. it is concerned with which vm_page is | |
240 | * mapped where. for example, when you run /bin/ls and start | 240 | * mapped where. for example, when you run /bin/ls and start | |
241 | * at page 0x1000 the fault routine may lookup the correct page | 241 | * at page 0x1000 the fault routine may lookup the correct page | |
242 | * of the /bin/ls file and then ask the pmap layer to establish | 242 | * of the /bin/ls file and then ask the pmap layer to establish | |
243 | * a mapping for it. | 243 | * a mapping for it. | |
244 | * | 244 | * | |
245 | * note that information in the lower layer of the VM system can be | 245 | * note that information in the lower layer of the VM system can be | |
246 | * thrown away since it can easily be reconstructed from the info | 246 | * thrown away since it can easily be reconstructed from the info | |
247 | * in the upper layer. | 247 | * in the upper layer. | |
248 | * | 248 | * | |
249 | * data structures we use include: | 249 | * data structures we use include: | |
250 | * | 250 | * | |
251 | * - struct pmap: describes the address space of one thread | 251 | * - struct pmap: describes the address space of one thread | |
252 | * - struct pmap_page: describes one pv-tracked page, without | 252 | * - struct pmap_page: describes one pv-tracked page, without | |
253 | * necessarily a corresponding vm_page | 253 | * necessarily a corresponding vm_page | |
254 | * - struct pv_entry: describes one <PMAP,VA> mapping of a PA | 254 | * - struct pv_entry: describes one <PMAP,VA> mapping of a PA | |
255 | * - struct pv_head: there is one pv_head per pv-tracked page of | 255 | * - struct pv_head: there is one pv_head per pv-tracked page of | |
256 | * physical memory. the pv_head points to a list of pv_entry | 256 | * physical memory. the pv_head points to a list of pv_entry | |
257 | * structures which describe all the <PMAP,VA> pairs that this | 257 | * structures which describe all the <PMAP,VA> pairs that this | |
258 | * page is mapped in. this is critical for page based operations | 258 | * page is mapped in. this is critical for page based operations | |
259 | * such as pmap_page_protect() [change protection on _all_ mappings | 259 | * such as pmap_page_protect() [change protection on _all_ mappings | |
260 | * of a page] | 260 | * of a page] | |
261 | */ | 261 | */ | |
262 | 262 | |||
263 | /* | 263 | /* | |
264 | * memory allocation | 264 | * memory allocation | |
265 | * | 265 | * | |
266 | * - there are three data structures that we must dynamically allocate: | 266 | * - there are three data structures that we must dynamically allocate: | |
267 | * | 267 | * | |
268 | * [A] new process' page directory page (PDP) | 268 | * [A] new process' page directory page (PDP) | |
269 | * - plan 1: done at pmap_create() we use | 269 | * - plan 1: done at pmap_create() we use | |
270 | * uvm_km_alloc(kernel_map, PAGE_SIZE) [fka kmem_alloc] to do this | 270 | * uvm_km_alloc(kernel_map, PAGE_SIZE) [fka kmem_alloc] to do this | |
271 | * allocation. | 271 | * allocation. | |
272 | * | 272 | * | |
273 | * if we are low in free physical memory then we sleep in | 273 | * if we are low in free physical memory then we sleep in | |
274 | * uvm_km_alloc -- in this case this is ok since we are creating | 274 | * uvm_km_alloc -- in this case this is ok since we are creating | |
275 | * a new pmap and should not be holding any locks. | 275 | * a new pmap and should not be holding any locks. | |
276 | * | 276 | * | |
277 | * if the kernel is totally out of virtual space | 277 | * if the kernel is totally out of virtual space | |
278 | * (i.e. uvm_km_alloc returns NULL), then we panic. | 278 | * (i.e. uvm_km_alloc returns NULL), then we panic. | |
279 | * | 279 | * | |
280 | * [B] new page tables pages (PTP) | 280 | * [B] new page tables pages (PTP) | |
281 | * - call uvm_pagealloc() | 281 | * - call uvm_pagealloc() | |
282 | * => success: zero page, add to pm_pdir | 282 | * => success: zero page, add to pm_pdir | |
283 | * => failure: we are out of free vm_pages, let pmap_enter() | 283 | * => failure: we are out of free vm_pages, let pmap_enter() | |
284 | * tell UVM about it. | 284 | * tell UVM about it. | |
285 | * | 285 | * | |
286 | * note: for kernel PTPs, we start with NKPTP of them. as we map | 286 | * note: for kernel PTPs, we start with NKPTP of them. as we map | |
287 | * kernel memory (at uvm_map time) we check to see if we've grown | 287 | * kernel memory (at uvm_map time) we check to see if we've grown | |
288 | * the kernel pmap. if so, we call the optional function | 288 | * the kernel pmap. if so, we call the optional function | |
289 | * pmap_growkernel() to grow the kernel PTPs in advance. | 289 | * pmap_growkernel() to grow the kernel PTPs in advance. | |
290 | * | 290 | * | |
291 | * [C] pv_entry structures | 291 | * [C] pv_entry structures | |
292 | */ | 292 | */ | |
293 | 293 | |||
294 | /* | 294 | /* | |
295 | * locking | 295 | * locking | |
296 | * | 296 | * | |
297 | * we have the following locks that we must contend with: | 297 | * we have the following locks that we must contend with: | |
298 | * | 298 | * | |
299 | * mutexes: | 299 | * mutexes: | |
300 | * | 300 | * | |
301 | * - pmap lock (per pmap, part of uvm_object) | 301 | * - pmap lock (per pmap, part of uvm_object) | |
302 | * this lock protects the fields in the pmap structure including | 302 | * this lock protects the fields in the pmap structure including | |
303 | * the non-kernel PDEs in the PDP, and the PTEs. it also locks | 303 | * the non-kernel PDEs in the PDP, and the PTEs. it also locks | |
304 | * in the alternate PTE space (since that is determined by the | 304 | * in the alternate PTE space (since that is determined by the | |
305 | * entry in the PDP). | 305 | * entry in the PDP). | |
306 | * | 306 | * | |
307 | * - pvh_lock (per pv_head) | 307 | * - pvh_lock (per pv_head) | |
308 | * this lock protects the pv_entry list which is chained off the | 308 | * this lock protects the pv_entry list which is chained off the | |
309 | * pv_head structure for a specific pv-tracked PA. it is locked | 309 | * pv_head structure for a specific pv-tracked PA. it is locked | |
310 | * when traversing the list (e.g. adding/removing mappings, | 310 | * when traversing the list (e.g. adding/removing mappings, | |
311 | * syncing R/M bits, etc.) | 311 | * syncing R/M bits, etc.) | |
312 | * | 312 | * | |
313 | * - pmaps_lock | 313 | * - pmaps_lock | |
314 | * this lock protects the list of active pmaps (headed by "pmaps"). | 314 | * this lock protects the list of active pmaps (headed by "pmaps"). | |
315 | * we lock it when adding or removing pmaps from this list. | 315 | * we lock it when adding or removing pmaps from this list. | |
316 | */ | 316 | */ | |
317 | 317 | |||
318 | const vaddr_t ptp_masks[] = PTP_MASK_INITIALIZER; | 318 | const vaddr_t ptp_masks[] = PTP_MASK_INITIALIZER; | |
319 | const int ptp_shifts[] = PTP_SHIFT_INITIALIZER; | 319 | const int ptp_shifts[] = PTP_SHIFT_INITIALIZER; | |
320 | const long nkptpmax[] = NKPTPMAX_INITIALIZER; | 320 | const long nkptpmax[] = NKPTPMAX_INITIALIZER; | |
321 | const long nbpd[] = NBPD_INITIALIZER; | 321 | const long nbpd[] = NBPD_INITIALIZER; | |
322 | pd_entry_t * const normal_pdes[] = PDES_INITIALIZER; | 322 | pd_entry_t * const normal_pdes[] = PDES_INITIALIZER; | |
323 | 323 | |||
324 | long nkptp[] = NKPTP_INITIALIZER; | 324 | long nkptp[] = NKPTP_INITIALIZER; | |
325 | 325 | |||
326 | struct pmap_head pmaps; | 326 | struct pmap_head pmaps; | |
327 | kmutex_t pmaps_lock; | 327 | kmutex_t pmaps_lock; | |
328 | 328 | |||
329 | static vaddr_t pmap_maxkvaddr; | 329 | static vaddr_t pmap_maxkvaddr; | |
330 | 330 | |||
331 | /* | 331 | /* | |
332 | * XXX kludge: dummy locking to make KASSERTs in uvm_page.c comfortable. | 332 | * XXX kludge: dummy locking to make KASSERTs in uvm_page.c comfortable. | |
333 | * actual locking is done by pm_lock. | 333 | * actual locking is done by pm_lock. | |
334 | */ | 334 | */ | |
335 | #if defined(DIAGNOSTIC) | 335 | #if defined(DIAGNOSTIC) | |
336 | #define PMAP_SUBOBJ_LOCK(pm, idx) \ | 336 | #define PMAP_SUBOBJ_LOCK(pm, idx) \ | |
337 | KASSERT(mutex_owned((pm)->pm_lock)); \ | 337 | KASSERT(mutex_owned((pm)->pm_lock)); \ | |
338 | if ((idx) != 0) \ | 338 | if ((idx) != 0) \ | |
339 | mutex_enter((pm)->pm_obj[(idx)].vmobjlock) | 339 | mutex_enter((pm)->pm_obj[(idx)].vmobjlock) | |
340 | #define PMAP_SUBOBJ_UNLOCK(pm, idx) \ | 340 | #define PMAP_SUBOBJ_UNLOCK(pm, idx) \ | |
341 | KASSERT(mutex_owned((pm)->pm_lock)); \ | 341 | KASSERT(mutex_owned((pm)->pm_lock)); \ | |
342 | if ((idx) != 0) \ | 342 | if ((idx) != 0) \ | |
343 | mutex_exit((pm)->pm_obj[(idx)].vmobjlock) | 343 | mutex_exit((pm)->pm_obj[(idx)].vmobjlock) | |
344 | #else /* defined(DIAGNOSTIC) */ | 344 | #else /* defined(DIAGNOSTIC) */ | |
345 | #define PMAP_SUBOBJ_LOCK(pm, idx) /* nothing */ | 345 | #define PMAP_SUBOBJ_LOCK(pm, idx) /* nothing */ | |
346 | #define PMAP_SUBOBJ_UNLOCK(pm, idx) /* nothing */ | 346 | #define PMAP_SUBOBJ_UNLOCK(pm, idx) /* nothing */ | |
347 | #endif /* defined(DIAGNOSTIC) */ | 347 | #endif /* defined(DIAGNOSTIC) */ | |
348 | 348 | |||
349 | /* | 349 | /* | |
350 | * Misc. event counters. | 350 | * Misc. event counters. | |
351 | */ | 351 | */ | |
352 | struct evcnt pmap_iobmp_evcnt; | 352 | struct evcnt pmap_iobmp_evcnt; | |
353 | struct evcnt pmap_ldt_evcnt; | 353 | struct evcnt pmap_ldt_evcnt; | |
354 | 354 | |||
355 | /* | 355 | /* | |
356 | * PAT | 356 | * PAT | |
357 | */ | 357 | */ | |
358 | #define PATENTRY(n, type) (type << ((n) * 8)) | 358 | #define PATENTRY(n, type) (type << ((n) * 8)) | |
359 | #define PAT_UC 0x0ULL | 359 | #define PAT_UC 0x0ULL | |
360 | #define PAT_WC 0x1ULL | 360 | #define PAT_WC 0x1ULL | |
361 | #define PAT_WT 0x4ULL | 361 | #define PAT_WT 0x4ULL | |
362 | #define PAT_WP 0x5ULL | 362 | #define PAT_WP 0x5ULL | |
363 | #define PAT_WB 0x6ULL | 363 | #define PAT_WB 0x6ULL | |
364 | #define PAT_UCMINUS 0x7ULL | 364 | #define PAT_UCMINUS 0x7ULL | |
365 | 365 | |||
366 | static bool cpu_pat_enabled __read_mostly = false; | 366 | static bool cpu_pat_enabled __read_mostly = false; | |
367 | 367 | |||
368 | /* | 368 | /* | |
369 | * global data structures | 369 | * global data structures | |
370 | */ | 370 | */ | |
371 | 371 | |||
372 | static struct pmap kernel_pmap_store; /* the kernel's pmap (proc0) */ | 372 | static struct pmap kernel_pmap_store; /* the kernel's pmap (proc0) */ | |
373 | struct pmap *const kernel_pmap_ptr = &kernel_pmap_store; | 373 | struct pmap *const kernel_pmap_ptr = &kernel_pmap_store; | |
374 | 374 | |||
375 | /* | 375 | /* | |
376 | * pmap_pg_g: if our processor supports PG_G in the PTE then we | 376 | * pmap_pg_g: if our processor supports PG_G in the PTE then we | |
377 | * set pmap_pg_g to PG_G (otherwise it is zero). | 377 | * set pmap_pg_g to PG_G (otherwise it is zero). | |
378 | */ | 378 | */ | |
379 | 379 | |||
380 | int pmap_pg_g __read_mostly = 0; | 380 | int pmap_pg_g __read_mostly = 0; | |
381 | 381 | |||
382 | /* | 382 | /* | |
383 | * pmap_largepages: if our processor supports PG_PS and we are | 383 | * pmap_largepages: if our processor supports PG_PS and we are | |
384 | * using it, this is set to true. | 384 | * using it, this is set to true. | |
385 | */ | 385 | */ | |
386 | 386 | |||
387 | int pmap_largepages __read_mostly; | 387 | int pmap_largepages __read_mostly; | |
388 | 388 | |||
389 | /* | 389 | /* | |
390 | * i386 physical memory comes in a big contig chunk with a small | 390 | * i386 physical memory comes in a big contig chunk with a small | |
391 | * hole toward the front of it... the following two paddr_t's | 391 | * hole toward the front of it... the following two paddr_t's | |
392 | * (shared with machdep.c) describe the physical address space | 392 | * (shared with machdep.c) describe the physical address space | |
393 | * of this machine. | 393 | * of this machine. | |
394 | */ | 394 | */ | |
395 | paddr_t avail_start __read_mostly; /* PA of first available physical page */ | 395 | paddr_t avail_start __read_mostly; /* PA of first available physical page */ | |
396 | paddr_t avail_end __read_mostly; /* PA of last available physical page */ | 396 | paddr_t avail_end __read_mostly; /* PA of last available physical page */ | |
397 | 397 | |||
398 | #ifdef XEN | 398 | #ifdef XEN | |
399 | #ifdef __x86_64__ | 399 | #ifdef __x86_64__ | |
400 | /* Dummy PGD for user cr3, used between pmap_deactivate() and pmap_activate() */ | 400 | /* Dummy PGD for user cr3, used between pmap_deactivate() and pmap_activate() */ | |
401 | static paddr_t xen_dummy_user_pgd; | 401 | static paddr_t xen_dummy_user_pgd; | |
402 | #endif /* __x86_64__ */ | 402 | #endif /* __x86_64__ */ | |
403 | paddr_t pmap_pa_start; /* PA of first physical page for this domain */ | 403 | paddr_t pmap_pa_start; /* PA of first physical page for this domain */ | |
404 | paddr_t pmap_pa_end; /* PA of last physical page for this domain */ | 404 | paddr_t pmap_pa_end; /* PA of last physical page for this domain */ | |
405 | #endif /* XEN */ | 405 | #endif /* XEN */ | |
406 | 406 | |||
407 | #define VM_PAGE_TO_PP(pg) (&(pg)->mdpage.mp_pp) | 407 | #define VM_PAGE_TO_PP(pg) (&(pg)->mdpage.mp_pp) | |
408 | 408 | |||
409 | #define PV_HASH_SIZE 32768 | 409 | #define PV_HASH_SIZE 32768 | |
410 | #define PV_HASH_LOCK_CNT 32 | 410 | #define PV_HASH_LOCK_CNT 32 | |
411 | 411 | |||
412 | struct pv_hash_lock { | 412 | struct pv_hash_lock { | |
413 | kmutex_t lock; | 413 | kmutex_t lock; | |
414 | } __aligned(CACHE_LINE_SIZE) pv_hash_locks[PV_HASH_LOCK_CNT] | 414 | } __aligned(CACHE_LINE_SIZE) pv_hash_locks[PV_HASH_LOCK_CNT] | |
415 | __aligned(CACHE_LINE_SIZE); | 415 | __aligned(CACHE_LINE_SIZE); | |
416 | 416 | |||
417 | struct pv_hash_head { | 417 | struct pv_hash_head { | |
418 | SLIST_HEAD(, pv_entry) hh_list; | 418 | SLIST_HEAD(, pv_entry) hh_list; | |
419 | } pv_hash_heads[PV_HASH_SIZE]; | 419 | } pv_hash_heads[PV_HASH_SIZE]; | |
420 | 420 | |||
421 | static u_int | 421 | static u_int | |
422 | pvhash_hash(struct vm_page *ptp, vaddr_t va) | 422 | pvhash_hash(struct vm_page *ptp, vaddr_t va) | |
423 | { | 423 | { | |
424 | 424 | |||
425 | return (uintptr_t)ptp / sizeof(*ptp) + (va >> PAGE_SHIFT); | 425 | return (uintptr_t)ptp / sizeof(*ptp) + (va >> PAGE_SHIFT); | |
426 | } | 426 | } | |
427 | 427 | |||
428 | static struct pv_hash_head * | 428 | static struct pv_hash_head * | |
429 | pvhash_head(u_int hash) | 429 | pvhash_head(u_int hash) | |
430 | { | 430 | { | |
431 | 431 | |||
432 | return &pv_hash_heads[hash % PV_HASH_SIZE]; | 432 | return &pv_hash_heads[hash % PV_HASH_SIZE]; | |
433 | } | 433 | } | |
434 | 434 | |||
435 | static kmutex_t * | 435 | static kmutex_t * | |
436 | pvhash_lock(u_int hash) | 436 | pvhash_lock(u_int hash) | |
437 | { | 437 | { | |
438 | 438 | |||
439 | return &pv_hash_locks[hash % PV_HASH_LOCK_CNT].lock; | 439 | return &pv_hash_locks[hash % PV_HASH_LOCK_CNT].lock; | |
440 | } | 440 | } | |
441 | 441 | |||
442 | static struct pv_entry * | 442 | static struct pv_entry * | |
443 | pvhash_remove(struct pv_hash_head *hh, struct vm_page *ptp, vaddr_t va) | 443 | pvhash_remove(struct pv_hash_head *hh, struct vm_page *ptp, vaddr_t va) | |
444 | { | 444 | { | |
445 | struct pv_entry *pve; | 445 | struct pv_entry *pve; | |
446 | struct pv_entry *prev; | 446 | struct pv_entry *prev; | |
447 | 447 | |||
448 | prev = NULL; | 448 | prev = NULL; | |
449 | SLIST_FOREACH(pve, &hh->hh_list, pve_hash) { | 449 | SLIST_FOREACH(pve, &hh->hh_list, pve_hash) { | |
450 | if (pve->pve_pte.pte_ptp == ptp && | 450 | if (pve->pve_pte.pte_ptp == ptp && | |
451 | pve->pve_pte.pte_va == va) { | 451 | pve->pve_pte.pte_va == va) { | |
452 | if (prev != NULL) { | 452 | if (prev != NULL) { | |
453 | SLIST_REMOVE_AFTER(prev, pve_hash); | 453 | SLIST_REMOVE_AFTER(prev, pve_hash); | |
454 | } else { | 454 | } else { | |
455 | SLIST_REMOVE_HEAD(&hh->hh_list, pve_hash); | 455 | SLIST_REMOVE_HEAD(&hh->hh_list, pve_hash); | |
456 | } | 456 | } | |
457 | break; | 457 | break; | |
458 | } | 458 | } | |
459 | prev = pve; | 459 | prev = pve; | |
460 | } | 460 | } | |
461 | return pve; | 461 | return pve; | |
462 | } | 462 | } | |
463 | 463 | |||
464 | /* | 464 | /* | |
465 | * other data structures | 465 | * other data structures | |
466 | */ | 466 | */ | |
467 | 467 | |||
468 | static pt_entry_t protection_codes[8] __read_mostly; /* maps MI prot to i386 | 468 | static pt_entry_t protection_codes[8] __read_mostly; /* maps MI prot to i386 | |
469 | prot code */ | 469 | prot code */ | |
470 | static bool pmap_initialized __read_mostly = false; /* pmap_init done yet? */ | 470 | static bool pmap_initialized __read_mostly = false; /* pmap_init done yet? */ | |
471 | 471 | |||
472 | /* | 472 | /* | |
473 | * the following two vaddr_t's are used during system startup | 473 | * the following two vaddr_t's are used during system startup | |
474 | * to keep track of how much of the kernel's VM space we have used. | 474 | * to keep track of how much of the kernel's VM space we have used. | |
475 | * once the system is started, the management of the remaining kernel | 475 | * once the system is started, the management of the remaining kernel | |
476 | * VM space is turned over to the kernel_map vm_map. | 476 | * VM space is turned over to the kernel_map vm_map. | |
477 | */ | 477 | */ | |
478 | 478 | |||
479 | static vaddr_t virtual_avail __read_mostly; /* VA of first free KVA */ | 479 | static vaddr_t virtual_avail __read_mostly; /* VA of first free KVA */ | |
480 | static vaddr_t virtual_end __read_mostly; /* VA of last free KVA */ | 480 | static vaddr_t virtual_end __read_mostly; /* VA of last free KVA */ | |
481 | 481 | |||
482 | /* | 482 | /* | |
483 | * pool that pmap structures are allocated from | 483 | * pool that pmap structures are allocated from | |
484 | */ | 484 | */ | |
485 | 485 | |||
486 | static struct pool_cache pmap_cache; | 486 | static struct pool_cache pmap_cache; | |
487 | 487 | |||
488 | /* | 488 | /* | |
489 | * pv_entry cache | 489 | * pv_entry cache | |
490 | */ | 490 | */ | |
491 | 491 | |||
492 | static struct pool_cache pmap_pv_cache; | 492 | static struct pool_cache pmap_pv_cache; | |
493 | 493 | |||
494 | #ifdef __HAVE_DIRECT_MAP | 494 | #ifdef __HAVE_DIRECT_MAP | |
495 | 495 | |||
496 | extern phys_ram_seg_t mem_clusters[]; | 496 | extern phys_ram_seg_t mem_clusters[]; | |
497 | extern int mem_cluster_cnt; | 497 | extern int mem_cluster_cnt; | |
498 | 498 | |||
499 | #else | 499 | #else | |
500 | 500 | |||
501 | /* | 501 | /* | |
502 | * MULTIPROCESSOR: special VA's/ PTE's are actually allocated inside a | 502 | * MULTIPROCESSOR: special VA's/ PTE's are actually allocated inside a | |
503 | * maxcpus*NPTECL array of PTE's, to avoid cache line thrashing | 503 | * maxcpus*NPTECL array of PTE's, to avoid cache line thrashing | |
504 | * due to false sharing. | 504 | * due to false sharing. | |
505 | */ | 505 | */ | |
506 | 506 | |||
507 | #ifdef MULTIPROCESSOR | 507 | #ifdef MULTIPROCESSOR | |
508 | #define PTESLEW(pte, id) ((pte)+(id)*NPTECL) | 508 | #define PTESLEW(pte, id) ((pte)+(id)*NPTECL) | |
509 | #define VASLEW(va,id) ((va)+(id)*NPTECL*PAGE_SIZE) | 509 | #define VASLEW(va,id) ((va)+(id)*NPTECL*PAGE_SIZE) | |
510 | #else | 510 | #else | |
511 | #define PTESLEW(pte, id) ((void)id, pte) | 511 | #define PTESLEW(pte, id) ((void)id, pte) | |
512 | #define VASLEW(va,id) ((void)id, va) | 512 | #define VASLEW(va,id) ((void)id, va) | |
513 | #endif | 513 | #endif | |
514 | 514 | |||
515 | /* | 515 | /* | |
516 | * special VAs and the PTEs that map them | 516 | * special VAs and the PTEs that map them | |
517 | */ | 517 | */ | |
518 | static pt_entry_t *csrc_pte, *cdst_pte, *zero_pte, *ptp_pte, *early_zero_pte; | 518 | static pt_entry_t *csrc_pte, *cdst_pte, *zero_pte, *ptp_pte, *early_zero_pte; | |
519 | static char *csrcp, *cdstp, *zerop, *ptpp; | 519 | static char *csrcp, *cdstp, *zerop, *ptpp; | |
520 | #ifdef XEN | 520 | #ifdef XEN | |
521 | char *early_zerop; /* also referenced from xen_pmap_bootstrap() */ | 521 | char *early_zerop; /* also referenced from xen_pmap_bootstrap() */ | |
522 | #else | 522 | #else | |
523 | static char *early_zerop; | 523 | static char *early_zerop; | |
524 | #endif | 524 | #endif | |
525 | 525 | |||
526 | #endif | 526 | #endif | |
527 | 527 | |||
528 | int pmap_enter_default(pmap_t, vaddr_t, paddr_t, vm_prot_t, u_int); | 528 | int pmap_enter_default(pmap_t, vaddr_t, paddr_t, vm_prot_t, u_int); | |
529 | 529 | |||
530 | /* PDP pool_cache(9) and its callbacks */ | 530 | /* PDP pool_cache(9) and its callbacks */ | |
531 | struct pool_cache pmap_pdp_cache; | 531 | struct pool_cache pmap_pdp_cache; | |
532 | static int pmap_pdp_ctor(void *, void *, int); | 532 | static int pmap_pdp_ctor(void *, void *, int); | |
533 | static void pmap_pdp_dtor(void *, void *); | 533 | static void pmap_pdp_dtor(void *, void *); | |
534 | #ifdef PAE | 534 | #ifdef PAE | |
535 | /* need to allocate items of 4 pages */ | 535 | /* need to allocate items of 4 pages */ | |
536 | static void *pmap_pdp_alloc(struct pool *, int); | 536 | static void *pmap_pdp_alloc(struct pool *, int); | |
537 | static void pmap_pdp_free(struct pool *, void *); | 537 | static void pmap_pdp_free(struct pool *, void *); | |
538 | static struct pool_allocator pmap_pdp_allocator = { | 538 | static struct pool_allocator pmap_pdp_allocator = { | |
539 | .pa_alloc = pmap_pdp_alloc, | 539 | .pa_alloc = pmap_pdp_alloc, | |
540 | .pa_free = pmap_pdp_free, | 540 | .pa_free = pmap_pdp_free, | |
541 | .pa_pagesz = PAGE_SIZE * PDP_SIZE, | 541 | .pa_pagesz = PAGE_SIZE * PDP_SIZE, | |
542 | }; | 542 | }; | |
543 | #endif /* PAE */ | 543 | #endif /* PAE */ | |
544 | 544 | |||
545 | extern vaddr_t idt_vaddr; /* we allocate IDT early */ | 545 | extern vaddr_t idt_vaddr; /* we allocate IDT early */ | |
546 | extern paddr_t idt_paddr; | 546 | extern paddr_t idt_paddr; | |
547 | 547 | |||
548 | #ifdef _LP64 | 548 | #ifdef _LP64 | |
549 | extern vaddr_t lo32_vaddr; | 549 | extern vaddr_t lo32_vaddr; | |
550 | extern vaddr_t lo32_paddr; | 550 | extern vaddr_t lo32_paddr; | |
551 | #endif | 551 | #endif | |
552 | 552 | |||
553 | extern int end; | 553 | extern int end; | |
554 | 554 | |||
555 | #ifdef i386 | 555 | #ifdef i386 | |
556 | /* stuff to fix the pentium f00f bug */ | 556 | /* stuff to fix the pentium f00f bug */ | |
557 | extern vaddr_t pentium_idt_vaddr; | 557 | extern vaddr_t pentium_idt_vaddr; | |
558 | #endif | 558 | #endif | |
559 | 559 | |||
560 | 560 | |||
561 | /* | 561 | /* | |
562 | * local prototypes | 562 | * local prototypes | |
563 | */ | 563 | */ | |
564 | 564 | |||
565 | static struct vm_page *pmap_get_ptp(struct pmap *, vaddr_t, | 565 | static struct vm_page *pmap_get_ptp(struct pmap *, vaddr_t, | |
566 | pd_entry_t * const *); | 566 | pd_entry_t * const *); | |
567 | static struct vm_page *pmap_find_ptp(struct pmap *, vaddr_t, paddr_t, int); | 567 | static struct vm_page *pmap_find_ptp(struct pmap *, vaddr_t, paddr_t, int); | |
568 | static void pmap_freepage(struct pmap *, struct vm_page *, int); | 568 | static void pmap_freepage(struct pmap *, struct vm_page *, int); | |
569 | static void pmap_free_ptp(struct pmap *, struct vm_page *, | 569 | static void pmap_free_ptp(struct pmap *, struct vm_page *, | |
570 | vaddr_t, pt_entry_t *, | 570 | vaddr_t, pt_entry_t *, | |
571 | pd_entry_t * const *); | 571 | pd_entry_t * const *); | |
572 | static bool pmap_remove_pte(struct pmap *, struct vm_page *, | 572 | static bool pmap_remove_pte(struct pmap *, struct vm_page *, | |
573 | pt_entry_t *, vaddr_t, | 573 | pt_entry_t *, vaddr_t, | |
574 | struct pv_entry **); | 574 | struct pv_entry **); | |
575 | static void pmap_remove_ptes(struct pmap *, struct vm_page *, | 575 | static void pmap_remove_ptes(struct pmap *, struct vm_page *, | |
576 | vaddr_t, vaddr_t, vaddr_t, | 576 | vaddr_t, vaddr_t, vaddr_t, | |
577 | struct pv_entry **); | 577 | struct pv_entry **); | |
578 | 578 | |||
579 | static bool pmap_get_physpage(vaddr_t, int, paddr_t *); | 579 | static bool pmap_get_physpage(vaddr_t, int, paddr_t *); | |
580 | static void pmap_alloc_level(pd_entry_t * const *, vaddr_t, int, | 580 | static void pmap_alloc_level(pd_entry_t * const *, vaddr_t, int, | |
581 | long *); | 581 | long *); | |
582 | 582 | |||
583 | static bool pmap_reactivate(struct pmap *); | 583 | static bool pmap_reactivate(struct pmap *); | |
584 | 584 | |||
585 | /* | 585 | /* | |
586 | * p m a p h e l p e r f u n c t i o n s | 586 | * p m a p h e l p e r f u n c t i o n s | |
587 | */ | 587 | */ | |
588 | 588 | |||
589 | static inline void | 589 | static inline void | |
590 | pmap_stats_update(struct pmap *pmap, int resid_diff, int wired_diff) | 590 | pmap_stats_update(struct pmap *pmap, int resid_diff, int wired_diff) | |
591 | { | 591 | { | |
592 | 592 | |||
593 | if (pmap == pmap_kernel()) { | 593 | if (pmap == pmap_kernel()) { | |
594 | atomic_add_long(&pmap->pm_stats.resident_count, resid_diff); | 594 | atomic_add_long(&pmap->pm_stats.resident_count, resid_diff); | |
595 | atomic_add_long(&pmap->pm_stats.wired_count, wired_diff); | 595 | atomic_add_long(&pmap->pm_stats.wired_count, wired_diff); | |
596 | } else { | 596 | } else { | |
597 | KASSERT(mutex_owned(pmap->pm_lock)); | 597 | KASSERT(mutex_owned(pmap->pm_lock)); | |
598 | pmap->pm_stats.resident_count += resid_diff; | 598 | pmap->pm_stats.resident_count += resid_diff; | |
599 | pmap->pm_stats.wired_count += wired_diff; | 599 | pmap->pm_stats.wired_count += wired_diff; | |
600 | } | 600 | } | |
601 | } | 601 | } | |
602 | 602 | |||
603 | static inline void | 603 | static inline void | |
604 | pmap_stats_update_bypte(struct pmap *pmap, pt_entry_t npte, pt_entry_t opte) | 604 | pmap_stats_update_bypte(struct pmap *pmap, pt_entry_t npte, pt_entry_t opte) | |
605 | { | 605 | { | |
606 | int resid_diff = ((npte & PG_V) ? 1 : 0) - ((opte & PG_V) ? 1 : 0); | 606 | int resid_diff = ((npte & PG_V) ? 1 : 0) - ((opte & PG_V) ? 1 : 0); | |
607 | int wired_diff = ((npte & PG_W) ? 1 : 0) - ((opte & PG_W) ? 1 : 0); | 607 | int wired_diff = ((npte & PG_W) ? 1 : 0) - ((opte & PG_W) ? 1 : 0); | |
608 | 608 | |||
609 | KASSERT((npte & (PG_V | PG_W)) != PG_W); | 609 | KASSERT((npte & (PG_V | PG_W)) != PG_W); | |
610 | KASSERT((opte & (PG_V | PG_W)) != PG_W); | 610 | KASSERT((opte & (PG_V | PG_W)) != PG_W); | |
611 | 611 | |||
612 | pmap_stats_update(pmap, resid_diff, wired_diff); | 612 | pmap_stats_update(pmap, resid_diff, wired_diff); | |
613 | } | 613 | } | |
614 | 614 | |||
615 | /* | 615 | /* | |
616 | * ptp_to_pmap: lookup pmap by ptp | 616 | * ptp_to_pmap: lookup pmap by ptp | |
617 | */ | 617 | */ | |
618 | 618 | |||
619 | static struct pmap * | 619 | static struct pmap * | |
620 | ptp_to_pmap(struct vm_page *ptp) | 620 | ptp_to_pmap(struct vm_page *ptp) | |
621 | { | 621 | { | |
622 | struct pmap *pmap; | 622 | struct pmap *pmap; | |
623 | 623 | |||
624 | if (ptp == NULL) { | 624 | if (ptp == NULL) { | |
625 | return pmap_kernel(); | 625 | return pmap_kernel(); | |
626 | } | 626 | } | |
627 | pmap = (struct pmap *)ptp->uobject; | 627 | pmap = (struct pmap *)ptp->uobject; | |
628 | KASSERT(pmap != NULL); | 628 | KASSERT(pmap != NULL); | |
629 | KASSERT(&pmap->pm_obj[0] == ptp->uobject); | 629 | KASSERT(&pmap->pm_obj[0] == ptp->uobject); | |
630 | return pmap; | 630 | return pmap; | |
631 | } | 631 | } | |
632 | 632 | |||
633 | static inline struct pv_pte * | 633 | static inline struct pv_pte * | |
634 | pve_to_pvpte(struct pv_entry *pve) | 634 | pve_to_pvpte(struct pv_entry *pve) | |
635 | { | 635 | { | |
636 | 636 | |||
637 | KASSERT((void *)&pve->pve_pte == (void *)pve); | 637 | KASSERT((void *)&pve->pve_pte == (void *)pve); | |
638 | return &pve->pve_pte; | 638 | return &pve->pve_pte; | |
639 | } | 639 | } | |
640 | 640 | |||
641 | static inline struct pv_entry * | 641 | static inline struct pv_entry * | |
642 | pvpte_to_pve(struct pv_pte *pvpte) | 642 | pvpte_to_pve(struct pv_pte *pvpte) | |
643 | { | 643 | { | |
644 | struct pv_entry *pve = (void *)pvpte; | 644 | struct pv_entry *pve = (void *)pvpte; | |
645 | 645 | |||
646 | KASSERT(pve_to_pvpte(pve) == pvpte); | 646 | KASSERT(pve_to_pvpte(pve) == pvpte); | |
647 | return pve; | 647 | return pve; | |
648 | } | 648 | } | |
649 | 649 | |||
650 | /* | 650 | /* | |
651 | * pv_pte_first, pv_pte_next: PV list iterator. | 651 | * pv_pte_first, pv_pte_next: PV list iterator. | |
652 | */ | 652 | */ | |
653 | 653 | |||
654 | static struct pv_pte * | 654 | static struct pv_pte * | |
655 | pv_pte_first(struct pmap_page *pp) | 655 | pv_pte_first(struct pmap_page *pp) | |
656 | { | 656 | { | |
657 | 657 | |||
658 | if ((pp->pp_flags & PP_EMBEDDED) != 0) { | 658 | if ((pp->pp_flags & PP_EMBEDDED) != 0) { | |
659 | return &pp->pp_pte; | 659 | return &pp->pp_pte; | |
660 | } | 660 | } | |
661 | return pve_to_pvpte(LIST_FIRST(&pp->pp_head.pvh_list)); | 661 | return pve_to_pvpte(LIST_FIRST(&pp->pp_head.pvh_list)); | |
662 | } | 662 | } | |
663 | 663 | |||
664 | static struct pv_pte * | 664 | static struct pv_pte * | |
665 | pv_pte_next(struct pmap_page *pp, struct pv_pte *pvpte) | 665 | pv_pte_next(struct pmap_page *pp, struct pv_pte *pvpte) | |
666 | { | 666 | { | |
667 | 667 | |||
668 | KASSERT(pvpte != NULL); | 668 | KASSERT(pvpte != NULL); | |
669 | if (pvpte == &pp->pp_pte) { | 669 | if (pvpte == &pp->pp_pte) { | |
670 | KASSERT((pp->pp_flags & PP_EMBEDDED) != 0); | 670 | KASSERT((pp->pp_flags & PP_EMBEDDED) != 0); | |
671 | return NULL; | 671 | return NULL; | |
672 | } | 672 | } | |
673 | KASSERT((pp->pp_flags & PP_EMBEDDED) == 0); | 673 | KASSERT((pp->pp_flags & PP_EMBEDDED) == 0); | |
674 | return pve_to_pvpte(LIST_NEXT(pvpte_to_pve(pvpte), pve_list)); | 674 | return pve_to_pvpte(LIST_NEXT(pvpte_to_pve(pvpte), pve_list)); | |
675 | } | 675 | } | |
676 | 676 | |||
677 | /* | 677 | /* | |
678 | * pmap_is_curpmap: is this pmap the one currently loaded [in %cr3]? | 678 | * pmap_is_curpmap: is this pmap the one currently loaded [in %cr3]? | |
679 | * of course the kernel is always loaded | 679 | * of course the kernel is always loaded | |
680 | */ | 680 | */ | |
681 | 681 | |||
682 | bool | 682 | bool | |
683 | pmap_is_curpmap(struct pmap *pmap) | 683 | pmap_is_curpmap(struct pmap *pmap) | |
684 | { | 684 | { | |
685 | return((pmap == pmap_kernel()) || | 685 | return((pmap == pmap_kernel()) || | |
686 | (pmap == curcpu()->ci_pmap)); | 686 | (pmap == curcpu()->ci_pmap)); | |
687 | } | 687 | } | |
688 | 688 | |||
689 | /* | 689 | /* | |
690 | * Add a reference to the specified pmap. | 690 | * Add a reference to the specified pmap. | |
691 | */ | 691 | */ | |
692 | 692 | |||
693 | void | 693 | void | |
694 | pmap_reference(struct pmap *pmap) | 694 | pmap_reference(struct pmap *pmap) | |
695 | { | 695 | { | |
696 | 696 | |||
697 | atomic_inc_uint(&pmap->pm_obj[0].uo_refs); | 697 | atomic_inc_uint(&pmap->pm_obj[0].uo_refs); | |
698 | } | 698 | } | |
699 | 699 | |||
700 | /* | 700 | /* | |
701 | * pmap_map_ptes: map a pmap's PTEs into KVM and lock them in | 701 | * pmap_map_ptes: map a pmap's PTEs into KVM and lock them in | |
702 | * | 702 | * | |
703 | * there are several pmaps involved. some or all of them might be same. | 703 | * there are several pmaps involved. some or all of them might be same. | |
704 | * | 704 | * | |
705 | * - the pmap given by the first argument | 705 | * - the pmap given by the first argument | |
706 | * our caller wants to access this pmap's PTEs. | 706 | * our caller wants to access this pmap's PTEs. | |
707 | * | 707 | * | |
708 | * - pmap_kernel() | 708 | * - pmap_kernel() | |
709 | * the kernel pmap. note that it only contains the kernel part | 709 | * the kernel pmap. note that it only contains the kernel part | |
710 | * of the address space which is shared by any pmap. ie. any | 710 | * of the address space which is shared by any pmap. ie. any | |
711 | * pmap can be used instead of pmap_kernel() for our purpose. | 711 | * pmap can be used instead of pmap_kernel() for our purpose. | |
712 | * | 712 | * | |
713 | * - ci->ci_pmap | 713 | * - ci->ci_pmap | |
714 | * pmap currently loaded on the cpu. | 714 | * pmap currently loaded on the cpu. | |
715 | * | 715 | * | |
716 | * - vm_map_pmap(&curproc->p_vmspace->vm_map) | 716 | * - vm_map_pmap(&curproc->p_vmspace->vm_map) | |
717 | * current process' pmap. | 717 | * current process' pmap. | |
718 | * | 718 | * | |
719 | * => we lock enough pmaps to keep things locked in | 719 | * => we lock enough pmaps to keep things locked in | |
720 | * => must be undone with pmap_unmap_ptes before returning | 720 | * => must be undone with pmap_unmap_ptes before returning | |
721 | */ | 721 | */ | |
722 | 722 | |||
723 | void | 723 | void | |
724 | pmap_map_ptes(struct pmap *pmap, struct pmap **pmap2, | 724 | pmap_map_ptes(struct pmap *pmap, struct pmap **pmap2, | |
725 | pd_entry_t **ptepp, pd_entry_t * const **pdeppp) | 725 | pd_entry_t **ptepp, pd_entry_t * const **pdeppp) | |
726 | { | 726 | { | |
727 | struct pmap *curpmap; | 727 | struct pmap *curpmap; | |
728 | struct cpu_info *ci; | 728 | struct cpu_info *ci; | |
729 | lwp_t *l; | 729 | lwp_t *l; | |
730 | 730 | |||
731 | /* The kernel's pmap is always accessible. */ | 731 | /* The kernel's pmap is always accessible. */ | |
732 | if (pmap == pmap_kernel()) { | 732 | if (pmap == pmap_kernel()) { | |
733 | *pmap2 = NULL; | 733 | *pmap2 = NULL; | |
734 | *ptepp = PTE_BASE; | 734 | *ptepp = PTE_BASE; | |
735 | *pdeppp = normal_pdes; | 735 | *pdeppp = normal_pdes; | |
736 | return; | 736 | return; | |
737 | } | 737 | } | |
738 | KASSERT(kpreempt_disabled()); | 738 | KASSERT(kpreempt_disabled()); | |
739 | 739 | |||
740 | l = curlwp; | 740 | l = curlwp; | |
741 | retry: | 741 | retry: | |
742 | mutex_enter(pmap->pm_lock); | 742 | mutex_enter(pmap->pm_lock); | |
743 | ci = curcpu(); | 743 | ci = curcpu(); | |
744 | curpmap = ci->ci_pmap; | 744 | curpmap = ci->ci_pmap; | |
745 | if (vm_map_pmap(&l->l_proc->p_vmspace->vm_map) == pmap) { | 745 | if (vm_map_pmap(&l->l_proc->p_vmspace->vm_map) == pmap) { | |
746 | /* Our own pmap so just load it: easy. */ | 746 | /* Our own pmap so just load it: easy. */ | |
747 | if (__predict_false(ci->ci_want_pmapload)) { | 747 | if (__predict_false(ci->ci_want_pmapload)) { | |
748 | mutex_exit(pmap->pm_lock); | 748 | mutex_exit(pmap->pm_lock); | |
749 | pmap_load(); | 749 | pmap_load(); | |
750 | goto retry; | 750 | goto retry; | |
751 | } | 751 | } | |
752 | KASSERT(pmap == curpmap); | 752 | KASSERT(pmap == curpmap); | |
753 | } else if (pmap == curpmap) { | 753 | } else if (pmap == curpmap) { | |
754 | /* | 754 | /* | |
755 | * Already on the CPU: make it valid. This is very | 755 | * Already on the CPU: make it valid. This is very | |
756 | * often the case during exit(), when we have switched | 756 | * often the case during exit(), when we have switched | |
757 | * to the kernel pmap in order to destroy a user pmap. | 757 | * to the kernel pmap in order to destroy a user pmap. | |
758 | */ | 758 | */ | |
759 | if (!pmap_reactivate(pmap)) { | 759 | if (!pmap_reactivate(pmap)) { | |
760 | u_int gen = uvm_emap_gen_return(); | 760 | u_int gen = uvm_emap_gen_return(); | |
761 | tlbflush(); | 761 | tlbflush(); | |
762 | uvm_emap_update(gen); | 762 | uvm_emap_update(gen); | |
763 | } | 763 | } | |
764 | } else { | 764 | } else { | |
765 | /* | 765 | /* | |
766 | * Toss current pmap from CPU, but keep a reference to it. | 766 | * Toss current pmap from CPU, but keep a reference to it. | |
767 | * The reference will be dropped by pmap_unmap_ptes(). | 767 | * The reference will be dropped by pmap_unmap_ptes(). | |
768 | * Can happen if we block during exit(). | 768 | * Can happen if we block during exit(). | |
769 | */ | 769 | */ | |
770 | const cpuid_t cid = cpu_index(ci); | 770 | const cpuid_t cid = cpu_index(ci); | |
771 | 771 | |||
772 | kcpuset_atomic_clear(curpmap->pm_cpus, cid); | 772 | kcpuset_atomic_clear(curpmap->pm_cpus, cid); | |
773 | kcpuset_atomic_clear(curpmap->pm_kernel_cpus, cid); | 773 | kcpuset_atomic_clear(curpmap->pm_kernel_cpus, cid); | |
774 | ci->ci_pmap = pmap; | 774 | ci->ci_pmap = pmap; | |
775 | ci->ci_tlbstate = TLBSTATE_VALID; | 775 | ci->ci_tlbstate = TLBSTATE_VALID; | |
776 | kcpuset_atomic_set(pmap->pm_cpus, cid); | 776 | kcpuset_atomic_set(pmap->pm_cpus, cid); | |
777 | kcpuset_atomic_set(pmap->pm_kernel_cpus, cid); | 777 | kcpuset_atomic_set(pmap->pm_kernel_cpus, cid); | |
778 | cpu_load_pmap(pmap, curpmap); | 778 | cpu_load_pmap(pmap, curpmap); | |
779 | } | 779 | } | |
780 | pmap->pm_ncsw = l->l_ncsw; | 780 | pmap->pm_ncsw = l->l_ncsw; | |
781 | *pmap2 = curpmap; | 781 | *pmap2 = curpmap; | |
782 | *ptepp = PTE_BASE; | 782 | *ptepp = PTE_BASE; | |
783 | #if defined(XEN) && defined(__x86_64__) | 783 | #if defined(XEN) && defined(__x86_64__) | |
784 | KASSERT(ci->ci_normal_pdes[PTP_LEVELS - 2] == L4_BASE); | 784 | KASSERT(ci->ci_normal_pdes[PTP_LEVELS - 2] == L4_BASE); | |
785 | ci->ci_normal_pdes[PTP_LEVELS - 2] = pmap->pm_pdir; | 785 | ci->ci_normal_pdes[PTP_LEVELS - 2] = pmap->pm_pdir; | |
786 | *pdeppp = ci->ci_normal_pdes; | 786 | *pdeppp = ci->ci_normal_pdes; | |
787 | #else /* XEN && __x86_64__ */ | 787 | #else /* XEN && __x86_64__ */ | |
788 | *pdeppp = normal_pdes; | 788 | *pdeppp = normal_pdes; | |
789 | #endif /* XEN && __x86_64__ */ | 789 | #endif /* XEN && __x86_64__ */ | |
790 | } | 790 | } | |
791 | 791 | |||
792 | /* | 792 | /* | |
793 | * pmap_unmap_ptes: unlock the PTE mapping of "pmap" | 793 | * pmap_unmap_ptes: unlock the PTE mapping of "pmap" | |
794 | */ | 794 | */ | |
795 | 795 | |||
796 | void | 796 | void | |
797 | pmap_unmap_ptes(struct pmap *pmap, struct pmap *pmap2) | 797 | pmap_unmap_ptes(struct pmap *pmap, struct pmap *pmap2) | |
798 | { | 798 | { | |
799 | struct cpu_info *ci; | 799 | struct cpu_info *ci; | |
800 | struct pmap *mypmap; | 800 | struct pmap *mypmap; | |
801 | 801 | |||
802 | KASSERT(kpreempt_disabled()); | 802 | KASSERT(kpreempt_disabled()); | |
803 | 803 | |||
804 | /* The kernel's pmap is always accessible. */ | 804 | /* The kernel's pmap is always accessible. */ | |
805 | if (pmap == pmap_kernel()) { | 805 | if (pmap == pmap_kernel()) { | |
806 | return; | 806 | return; | |
807 | } | 807 | } | |
808 | 808 | |||
809 | ci = curcpu(); | 809 | ci = curcpu(); | |
810 | #if defined(XEN) && defined(__x86_64__) | 810 | #if defined(XEN) && defined(__x86_64__) | |
811 | /* Reset per-cpu normal_pdes */ | 811 | /* Reset per-cpu normal_pdes */ | |
812 | KASSERT(ci->ci_normal_pdes[PTP_LEVELS - 2] != L4_BASE); | 812 | KASSERT(ci->ci_normal_pdes[PTP_LEVELS - 2] != L4_BASE); | |
813 | ci->ci_normal_pdes[PTP_LEVELS - 2] = L4_BASE; | 813 | ci->ci_normal_pdes[PTP_LEVELS - 2] = L4_BASE; | |
814 | #endif /* XEN && __x86_64__ */ | 814 | #endif /* XEN && __x86_64__ */ | |
815 | /* | 815 | /* | |
816 | * We cannot tolerate context switches while mapped in. | 816 | * We cannot tolerate context switches while mapped in. | |
817 | * If it is our own pmap all we have to do is unlock. | 817 | * If it is our own pmap all we have to do is unlock. | |
818 | */ | 818 | */ | |
819 | KASSERT(pmap->pm_ncsw == curlwp->l_ncsw); | 819 | KASSERT(pmap->pm_ncsw == curlwp->l_ncsw); | |
820 | mypmap = vm_map_pmap(&curproc->p_vmspace->vm_map); | 820 | mypmap = vm_map_pmap(&curproc->p_vmspace->vm_map); | |
821 | if (pmap == mypmap) { | 821 | if (pmap == mypmap) { | |
822 | mutex_exit(pmap->pm_lock); | 822 | mutex_exit(pmap->pm_lock); | |
823 | return; | 823 | return; | |
824 | } | 824 | } | |
825 | 825 | |||
826 | /* | 826 | /* | |
827 | * Mark whatever's on the CPU now as lazy and unlock. | 827 | * Mark whatever's on the CPU now as lazy and unlock. | |
828 | * If the pmap was already installed, we are done. | 828 | * If the pmap was already installed, we are done. | |
829 | */ | 829 | */ | |
830 | ci->ci_tlbstate = TLBSTATE_LAZY; | 830 | ci->ci_tlbstate = TLBSTATE_LAZY; | |
831 | ci->ci_want_pmapload = (mypmap != pmap_kernel()); | 831 | ci->ci_want_pmapload = (mypmap != pmap_kernel()); | |
832 | mutex_exit(pmap->pm_lock); | 832 | mutex_exit(pmap->pm_lock); | |
833 | if (pmap == pmap2) { | 833 | if (pmap == pmap2) { | |
834 | return; | 834 | return; | |
835 | } | 835 | } | |
836 | 836 | |||
837 | /* | 837 | /* | |
838 | * We installed another pmap on the CPU. Grab a reference to | 838 | * We installed another pmap on the CPU. Grab a reference to | |
839 | * it and leave in place. Toss the evicted pmap (can block). | 839 | * it and leave in place. Toss the evicted pmap (can block). | |
840 | */ | 840 | */ | |
841 | pmap_reference(pmap); | 841 | pmap_reference(pmap); | |
842 | pmap_destroy(pmap2); | 842 | pmap_destroy(pmap2); | |
843 | } | 843 | } | |
844 | 844 | |||
845 | 845 | |||
846 | inline static void | 846 | inline static void | |
847 | pmap_exec_account(struct pmap *pm, vaddr_t va, pt_entry_t opte, pt_entry_t npte) | 847 | pmap_exec_account(struct pmap *pm, vaddr_t va, pt_entry_t opte, pt_entry_t npte) | |
848 | { | 848 | { | |
849 | 849 | |||
850 | #if !defined(__x86_64__) | 850 | #if !defined(__x86_64__) | |
851 | if (curproc == NULL || curproc->p_vmspace == NULL || | 851 | if (curproc == NULL || curproc->p_vmspace == NULL || | |
852 | pm != vm_map_pmap(&curproc->p_vmspace->vm_map)) | 852 | pm != vm_map_pmap(&curproc->p_vmspace->vm_map)) | |
853 | return; | 853 | return; | |
854 | 854 | |||
855 | if ((opte ^ npte) & PG_X) | 855 | if ((opte ^ npte) & PG_X) | |
856 | pmap_update_pg(va); | 856 | pmap_update_pg(va); | |
857 | 857 | |||
858 | /* | 858 | /* | |
859 | * Executability was removed on the last executable change. | 859 | * Executability was removed on the last executable change. | |
860 | * Reset the code segment to something conservative and | 860 | * Reset the code segment to something conservative and | |
861 | * let the trap handler deal with setting the right limit. | 861 | * let the trap handler deal with setting the right limit. | |
862 | * We can't do that because of locking constraints on the vm map. | 862 | * We can't do that because of locking constraints on the vm map. | |
863 | */ | 863 | */ | |
864 | 864 | |||
865 | if ((opte & PG_X) && (npte & PG_X) == 0 && va == pm->pm_hiexec) { | 865 | if ((opte & PG_X) && (npte & PG_X) == 0 && va == pm->pm_hiexec) { | |
866 | struct trapframe *tf = curlwp->l_md.md_regs; | 866 | struct trapframe *tf = curlwp->l_md.md_regs; | |
867 | 867 | |||
868 | tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL); | 868 | tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL); | |
869 | pm->pm_hiexec = I386_MAX_EXE_ADDR; | 869 | pm->pm_hiexec = I386_MAX_EXE_ADDR; | |
870 | } | 870 | } | |
871 | #endif /* !defined(__x86_64__) */ | 871 | #endif /* !defined(__x86_64__) */ | |
872 | } | 872 | } | |
873 | 873 | |||
874 | #if !defined(__x86_64__) | 874 | #if !defined(__x86_64__) | |
875 | /* | 875 | /* | |
876 | * Fixup the code segment to cover all potential executable mappings. | 876 | * Fixup the code segment to cover all potential executable mappings. | |
877 | * returns 0 if no changes to the code segment were made. | 877 | * returns 0 if no changes to the code segment were made. | |
878 | */ | 878 | */ | |
879 | 879 | |||
880 | int | 880 | int | |
881 | pmap_exec_fixup(struct vm_map *map, struct trapframe *tf, struct pcb *pcb) | 881 | pmap_exec_fixup(struct vm_map *map, struct trapframe *tf, struct pcb *pcb) | |
882 | { | 882 | { | |
883 | struct vm_map_entry *ent; | 883 | struct vm_map_entry *ent; | |
884 | struct pmap *pm = vm_map_pmap(map); | 884 | struct pmap *pm = vm_map_pmap(map); | |
885 | vaddr_t va = 0; | 885 | vaddr_t va = 0; | |
886 | 886 | |||
887 | vm_map_lock_read(map); | 887 | vm_map_lock_read(map); | |
888 | for (ent = (&map->header)->next; ent != &map->header; ent = ent->next) { | 888 | for (ent = (&map->header)->next; ent != &map->header; ent = ent->next) { | |
889 | 889 | |||
890 | /* | 890 | /* | |
891 | * This entry has greater va than the entries before. | 891 | * This entry has greater va than the entries before. | |
892 | * We need to make it point to the last page, not past it. | 892 | * We need to make it point to the last page, not past it. | |
893 | */ | 893 | */ | |
894 | 894 | |||
895 | if (ent->protection & VM_PROT_EXECUTE) | 895 | if (ent->protection & VM_PROT_EXECUTE) | |
896 | va = trunc_page(ent->end) - PAGE_SIZE; | 896 | va = trunc_page(ent->end) - PAGE_SIZE; | |
897 | } | 897 | } | |
898 | vm_map_unlock_read(map); | 898 | vm_map_unlock_read(map); | |
899 | if (va == pm->pm_hiexec && tf->tf_cs == GSEL(GUCODEBIG_SEL, SEL_UPL)) | 899 | if (va == pm->pm_hiexec && tf->tf_cs == GSEL(GUCODEBIG_SEL, SEL_UPL)) | |
900 | return (0); | 900 | return (0); | |
901 | 901 | |||
902 | pm->pm_hiexec = va; | 902 | pm->pm_hiexec = va; | |
903 | if (pm->pm_hiexec > I386_MAX_EXE_ADDR) { | 903 | if (pm->pm_hiexec > I386_MAX_EXE_ADDR) { | |
904 | tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL); | 904 | tf->tf_cs = GSEL(GUCODEBIG_SEL, SEL_UPL); | |
905 | } else { | 905 | } else { | |
906 | tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL); | 906 | tf->tf_cs = GSEL(GUCODE_SEL, SEL_UPL); | |
907 | return (0); | 907 | return (0); | |
908 | } | 908 | } | |
909 | return (1); | 909 | return (1); | |
910 | } | 910 | } | |
911 | #endif /* !defined(__x86_64__) */ | 911 | #endif /* !defined(__x86_64__) */ | |
912 | 912 | |||
913 | void | 913 | void | |
914 | pat_init(struct cpu_info *ci) | 914 | pat_init(struct cpu_info *ci) | |
915 | { | 915 | { | |
916 | uint64_t pat; | 916 | uint64_t pat; | |
917 | 917 | |||
918 | if (!(ci->ci_feat_val[0] & CPUID_PAT)) | 918 | if (!(ci->ci_feat_val[0] & CPUID_PAT)) | |
919 | return; | 919 | return; | |
920 | 920 | |||
921 | /* We change WT to WC. Leave all other entries the default values. */ | 921 | /* We change WT to WC. Leave all other entries the default values. */ | |
922 | pat = PATENTRY(0, PAT_WB) | PATENTRY(1, PAT_WC) | | 922 | pat = PATENTRY(0, PAT_WB) | PATENTRY(1, PAT_WC) | | |
923 | PATENTRY(2, PAT_UCMINUS) | PATENTRY(3, PAT_UC) | | 923 | PATENTRY(2, PAT_UCMINUS) | PATENTRY(3, PAT_UC) | | |
924 | PATENTRY(4, PAT_WB) | PATENTRY(5, PAT_WC) | | 924 | PATENTRY(4, PAT_WB) | PATENTRY(5, PAT_WC) | | |
925 | PATENTRY(6, PAT_UCMINUS) | PATENTRY(7, PAT_UC); | 925 | PATENTRY(6, PAT_UCMINUS) | PATENTRY(7, PAT_UC); | |
926 | 926 | |||
927 | wrmsr(MSR_CR_PAT, pat); | 927 | wrmsr(MSR_CR_PAT, pat); | |
928 | cpu_pat_enabled = true; | 928 | cpu_pat_enabled = true; | |
929 | aprint_debug_dev(ci->ci_dev, "PAT enabled\n"); | 929 | aprint_debug_dev(ci->ci_dev, "PAT enabled\n"); | |
930 | } | 930 | } | |
931 | 931 | |||
932 | static pt_entry_t | 932 | static pt_entry_t | |
933 | pmap_pat_flags(u_int flags) | 933 | pmap_pat_flags(u_int flags) | |
934 | { | 934 | { | |
935 | u_int cacheflags = (flags & PMAP_CACHE_MASK); | 935 | u_int cacheflags = (flags & PMAP_CACHE_MASK); | |
936 | 936 | |||
937 | if (!cpu_pat_enabled) { | 937 | if (!cpu_pat_enabled) { | |
938 | switch (cacheflags) { | 938 | switch (cacheflags) { | |
939 | case PMAP_NOCACHE: | 939 | case PMAP_NOCACHE: | |
940 | case PMAP_NOCACHE_OVR: | 940 | case PMAP_NOCACHE_OVR: | |
941 | /* results in PGC_UCMINUS on cpus which have | 941 | /* results in PGC_UCMINUS on cpus which have | |
942 | * the cpuid PAT but PAT "disabled" | 942 | * the cpuid PAT but PAT "disabled" | |
943 | */ | 943 | */ | |
944 | return PG_N; | 944 | return PG_N; | |
945 | default: | 945 | default: | |
946 | return 0; | 946 | return 0; | |
947 | } | 947 | } | |
948 | } | 948 | } | |
949 | 949 | |||
950 | switch (cacheflags) { | 950 | switch (cacheflags) { | |
951 | case PMAP_NOCACHE: | 951 | case PMAP_NOCACHE: | |
952 | return PGC_UC; | 952 | return PGC_UC; | |
953 | case PMAP_WRITE_COMBINE: | 953 | case PMAP_WRITE_COMBINE: | |
954 | return PGC_WC; | 954 | return PGC_WC; | |
955 | case PMAP_WRITE_BACK: | 955 | case PMAP_WRITE_BACK: | |
956 | return PGC_WB; | 956 | return PGC_WB; | |
957 | case PMAP_NOCACHE_OVR: | 957 | case PMAP_NOCACHE_OVR: | |
958 | return PGC_UCMINUS; | 958 | return PGC_UCMINUS; | |
959 | } | 959 | } | |
960 | 960 | |||
961 | return 0; | 961 | return 0; | |
962 | } | 962 | } | |
963 | 963 | |||
964 | /* | 964 | /* | |
965 | * p m a p k e n t e r f u n c t i o n s | 965 | * p m a p k e n t e r f u n c t i o n s | |
966 | * | 966 | * | |
967 | * functions to quickly enter/remove pages from the kernel address | 967 | * functions to quickly enter/remove pages from the kernel address | |
968 | * space. pmap_kremove is exported to MI kernel. we make use of | 968 | * space. pmap_kremove is exported to MI kernel. we make use of | |
969 | * the recursive PTE mappings. | 969 | * the recursive PTE mappings. | |
970 | */ | 970 | */ | |
971 | 971 | |||
972 | /* | 972 | /* | |
973 | * pmap_kenter_pa: enter a kernel mapping without R/M (pv_entry) tracking | 973 | * pmap_kenter_pa: enter a kernel mapping without R/M (pv_entry) tracking | |
974 | * | 974 | * | |
975 | * => no need to lock anything, assume va is already allocated | 975 | * => no need to lock anything, assume va is already allocated | |
976 | * => should be faster than normal pmap enter function | 976 | * => should be faster than normal pmap enter function | |
977 | */ | 977 | */ | |
978 | 978 | |||
979 | void | 979 | void | |
980 | pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags) | 980 | pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags) | |
981 | { | 981 | { | |
982 | pt_entry_t *pte, opte, npte; | 982 | pt_entry_t *pte, opte, npte; | |
983 | 983 | |||
984 | KASSERT(!(prot & ~VM_PROT_ALL)); | 984 | KASSERT(!(prot & ~VM_PROT_ALL)); | |
985 | 985 | |||
986 | if (va < VM_MIN_KERNEL_ADDRESS) | 986 | if (va < VM_MIN_KERNEL_ADDRESS) | |
987 | pte = vtopte(va); | 987 | pte = vtopte(va); | |
988 | else | 988 | else | |
989 | pte = kvtopte(va); | 989 | pte = kvtopte(va); | |
990 | #ifdef DOM0OPS | 990 | #ifdef DOM0OPS | |
991 | if (pa < pmap_pa_start || pa >= pmap_pa_end) { | 991 | if (pa < pmap_pa_start || pa >= pmap_pa_end) { | |
992 | #ifdef DEBUG | 992 | #ifdef DEBUG | |
993 | printf_nolog("%s: pa 0x%" PRIx64 " for va 0x%" PRIx64 | 993 | printf_nolog("%s: pa 0x%" PRIx64 " for va 0x%" PRIx64 | |
994 | " outside range\n", __func__, (int64_t)pa, (int64_t)va); | 994 | " outside range\n", __func__, (int64_t)pa, (int64_t)va); | |
995 | #endif /* DEBUG */ | 995 | #endif /* DEBUG */ | |
996 | npte = pa; | 996 | npte = pa; | |
997 | } else | 997 | } else | |
998 | #endif /* DOM0OPS */ | 998 | #endif /* DOM0OPS */ | |
999 | npte = pmap_pa2pte(pa); | 999 | npte = pmap_pa2pte(pa); | |
1000 | npte |= protection_codes[prot] | PG_k | PG_V | pmap_pg_g; | 1000 | npte |= protection_codes[prot] | PG_k | PG_V | pmap_pg_g; | |
1001 | npte |= pmap_pat_flags(flags); | 1001 | npte |= pmap_pat_flags(flags); | |
1002 | opte = pmap_pte_testset(pte, npte); /* zap! */ | 1002 | opte = pmap_pte_testset(pte, npte); /* zap! */ | |
1003 | #if defined(DIAGNOSTIC) | 1003 | #if defined(DIAGNOSTIC) | |
1004 | /* XXX For now... */ | 1004 | /* XXX For now... */ | |
1005 | if (opte & PG_PS) | 1005 | if (opte & PG_PS) | |
1006 | panic("%s: PG_PS", __func__); | 1006 | panic("%s: PG_PS", __func__); | |
1007 | #endif | 1007 | #endif | |
1008 | if ((opte & (PG_V | PG_U)) == (PG_V | PG_U)) { | 1008 | if ((opte & (PG_V | PG_U)) == (PG_V | PG_U)) { | |
1009 | /* This should not happen. */ | 1009 | /* This should not happen. */ | |
1010 | printf_nolog("%s: mapping already present\n", __func__); | 1010 | printf_nolog("%s: mapping already present\n", __func__); | |
1011 | kpreempt_disable(); | 1011 | kpreempt_disable(); | |
1012 | pmap_tlb_shootdown(pmap_kernel(), va, opte, TLBSHOOT_KENTER); | 1012 | pmap_tlb_shootdown(pmap_kernel(), va, opte, TLBSHOOT_KENTER); | |
1013 | kpreempt_enable(); | 1013 | kpreempt_enable(); | |
1014 | } | 1014 | } | |
1015 | } | 1015 | } | |
1016 | 1016 | |||
1017 | void | 1017 | void | |
1018 | pmap_emap_enter(vaddr_t va, paddr_t pa, vm_prot_t prot) | 1018 | pmap_emap_enter(vaddr_t va, paddr_t pa, vm_prot_t prot) | |
1019 | { | 1019 | { | |
1020 | pt_entry_t *pte, npte; | 1020 | pt_entry_t *pte, npte; | |
1021 | 1021 | |||
1022 | KASSERT((prot & ~VM_PROT_ALL) == 0); | 1022 | KASSERT((prot & ~VM_PROT_ALL) == 0); | |
1023 | pte = (va < VM_MIN_KERNEL_ADDRESS) ? vtopte(va) : kvtopte(va); | 1023 | pte = (va < VM_MIN_KERNEL_ADDRESS) ? vtopte(va) : kvtopte(va); | |
1024 | 1024 | |||
1025 | #ifdef DOM0OPS | 1025 | #ifdef DOM0OPS | |
1026 | if (pa < pmap_pa_start || pa >= pmap_pa_end) { | 1026 | if (pa < pmap_pa_start || pa >= pmap_pa_end) { | |
1027 | npte = pa; | 1027 | npte = pa; | |
1028 | } else | 1028 | } else | |
1029 | #endif | 1029 | #endif | |
1030 | npte = pmap_pa2pte(pa); | 1030 | npte = pmap_pa2pte(pa); | |
1031 | 1031 | |||
1032 | npte = pmap_pa2pte(pa); | 1032 | npte = pmap_pa2pte(pa); | |
1033 | npte |= protection_codes[prot] | PG_k | PG_V; | 1033 | npte |= protection_codes[prot] | PG_k | PG_V; | |
1034 | pmap_pte_set(pte, npte); | 1034 | pmap_pte_set(pte, npte); | |
1035 | } | 1035 | } | |
1036 | 1036 | |||
1037 | /* | 1037 | /* | |
1038 | * pmap_emap_sync: perform TLB flush or pmap load, if it was deferred. | 1038 | * pmap_emap_sync: perform TLB flush or pmap load, if it was deferred. | |
1039 | */ | 1039 | */ | |
1040 | void | 1040 | void | |
1041 | pmap_emap_sync(bool canload) | 1041 | pmap_emap_sync(bool canload) | |
1042 | { | 1042 | { | |
1043 | struct cpu_info *ci = curcpu(); | 1043 | struct cpu_info *ci = curcpu(); | |
1044 | struct pmap *pmap; | 1044 | struct pmap *pmap; | |
1045 | 1045 | |||
1046 | KASSERT(kpreempt_disabled()); | 1046 | KASSERT(kpreempt_disabled()); | |
1047 | if (__predict_true(ci->ci_want_pmapload && canload)) { | 1047 | if (__predict_true(ci->ci_want_pmapload && canload)) { | |
1048 | /* | 1048 | /* | |
1049 | * XXX: Hint for pmap_reactivate(), which might suggest to | 1049 | * XXX: Hint for pmap_reactivate(), which might suggest to | |
1050 | * not perform TLB flush, if state has not changed. | 1050 | * not perform TLB flush, if state has not changed. | |
1051 | */ | 1051 | */ | |
1052 | pmap = vm_map_pmap(&curlwp->l_proc->p_vmspace->vm_map); | 1052 | pmap = vm_map_pmap(&curlwp->l_proc->p_vmspace->vm_map); | |
1053 | if (__predict_false(pmap == ci->ci_pmap)) { | 1053 | if (__predict_false(pmap == ci->ci_pmap)) { | |
1054 | kcpuset_atomic_clear(pmap->pm_cpus, cpu_index(ci)); | 1054 | kcpuset_atomic_clear(pmap->pm_cpus, cpu_index(ci)); | |
1055 | } | 1055 | } | |
1056 | pmap_load(); | 1056 | pmap_load(); | |
1057 | KASSERT(ci->ci_want_pmapload == 0); | 1057 | KASSERT(ci->ci_want_pmapload == 0); | |
1058 | } else { | 1058 | } else { | |
1059 | tlbflush(); | 1059 | tlbflush(); | |
1060 | } | 1060 | } | |
1061 | 1061 | |||
1062 | } | 1062 | } | |
1063 | 1063 | |||
1064 | void | 1064 | void | |
1065 | pmap_emap_remove(vaddr_t sva, vsize_t len) | 1065 | pmap_emap_remove(vaddr_t sva, vsize_t len) | |
1066 | { | 1066 | { | |
1067 | pt_entry_t *pte; | 1067 | pt_entry_t *pte; | |
1068 | vaddr_t va, eva = sva + len; | 1068 | vaddr_t va, eva = sva + len; | |
1069 | 1069 | |||
1070 | for (va = sva; va < eva; va += PAGE_SIZE) { | 1070 | for (va = sva; va < eva; va += PAGE_SIZE) { | |
1071 | pte = (va < VM_MIN_KERNEL_ADDRESS) ? vtopte(va) : kvtopte(va); | 1071 | pte = (va < VM_MIN_KERNEL_ADDRESS) ? vtopte(va) : kvtopte(va); | |
1072 | pmap_pte_set(pte, 0); | 1072 | pmap_pte_set(pte, 0); | |
1073 | } | 1073 | } | |
1074 | } | 1074 | } | |
1075 | 1075 | |||
1076 | __strict_weak_alias(pmap_kenter_ma, pmap_kenter_pa); | 1076 | __strict_weak_alias(pmap_kenter_ma, pmap_kenter_pa); | |
1077 | 1077 | |||
1078 | #if defined(__x86_64__) | 1078 | #if defined(__x86_64__) | |
1079 | /* | 1079 | /* | |
1080 | * Change protection for a virtual address. Local for a CPU only, don't | 1080 | * Change protection for a virtual address. Local for a CPU only, don't | |
1081 | * care about TLB shootdowns. | 1081 | * care about TLB shootdowns. | |
1082 | * | 1082 | * | |
1083 | * => must be called with preemption disabled | 1083 | * => must be called with preemption disabled | |
1084 | */ | 1084 | */ | |
1085 | void | 1085 | void | |
1086 | pmap_changeprot_local(vaddr_t va, vm_prot_t prot) | 1086 | pmap_changeprot_local(vaddr_t va, vm_prot_t prot) | |
1087 | { | 1087 | { | |
1088 | pt_entry_t *pte, opte, npte; | 1088 | pt_entry_t *pte, opte, npte; | |
1089 | 1089 | |||
1090 | KASSERT(kpreempt_disabled()); | 1090 | KASSERT(kpreempt_disabled()); | |
1091 | 1091 | |||
1092 | if (va < VM_MIN_KERNEL_ADDRESS) | 1092 | if (va < VM_MIN_KERNEL_ADDRESS) | |
1093 | pte = vtopte(va); | 1093 | pte = vtopte(va); | |
1094 | else | 1094 | else | |
1095 | pte = kvtopte(va); | 1095 | pte = kvtopte(va); | |
1096 | 1096 | |||
1097 | npte = opte = *pte; | 1097 | npte = opte = *pte; | |
1098 | 1098 | |||
1099 | if ((prot & VM_PROT_WRITE) != 0) | 1099 | if ((prot & VM_PROT_WRITE) != 0) | |
1100 | npte |= PG_RW; | 1100 | npte |= PG_RW; | |
1101 | else | 1101 | else | |
1102 | npte &= ~PG_RW; | 1102 | npte &= ~PG_RW; | |
1103 | 1103 | |||
1104 | if (opte != npte) { | 1104 | if (opte != npte) { | |
1105 | pmap_pte_set(pte, npte); | 1105 | pmap_pte_set(pte, npte); | |
1106 | pmap_pte_flush(); | 1106 | pmap_pte_flush(); | |
1107 | invlpg(va); | 1107 | invlpg(va); | |
1108 | } | 1108 | } | |
1109 | } | 1109 | } | |
1110 | #endif /* defined(__x86_64__) */ | 1110 | #endif /* defined(__x86_64__) */ | |
1111 | 1111 | |||
1112 | /* | 1112 | /* | |
1113 | * pmap_kremove: remove a kernel mapping(s) without R/M (pv_entry) tracking | 1113 | * pmap_kremove: remove a kernel mapping(s) without R/M (pv_entry) tracking | |
1114 | * | 1114 | * | |
1115 | * => no need to lock anything | 1115 | * => no need to lock anything | |
1116 | * => caller must dispose of any vm_page mapped in the va range | 1116 | * => caller must dispose of any vm_page mapped in the va range | |
1117 | * => note: not an inline function | 1117 | * => note: not an inline function | |
1118 | * => we assume the va is page aligned and the len is a multiple of PAGE_SIZE | 1118 | * => we assume the va is page aligned and the len is a multiple of PAGE_SIZE | |
1119 | * => we assume kernel only unmaps valid addresses and thus don't bother | 1119 | * => we assume kernel only unmaps valid addresses and thus don't bother | |
1120 | * checking the valid bit before doing TLB flushing | 1120 | * checking the valid bit before doing TLB flushing | |
1121 | * => must be followed by call to pmap_update() before reuse of page | 1121 | * => must be followed by call to pmap_update() before reuse of page | |
1122 | */ | 1122 | */ | |
1123 | 1123 | |||
1124 | static inline void | 1124 | static inline void | |
1125 | pmap_kremove1(vaddr_t sva, vsize_t len, bool localonly) | 1125 | pmap_kremove1(vaddr_t sva, vsize_t len, bool localonly) | |
1126 | { | 1126 | { | |
1127 | pt_entry_t *pte, opte; | 1127 | pt_entry_t *pte, opte; | |
1128 | vaddr_t va, eva; | 1128 | vaddr_t va, eva; | |
1129 | 1129 | |||
1130 | eva = sva + len; | 1130 | eva = sva + len; | |
1131 | 1131 | |||
1132 | kpreempt_disable(); | 1132 | kpreempt_disable(); | |
1133 | for (va = sva; va < eva; va += PAGE_SIZE) { | 1133 | for (va = sva; va < eva; va += PAGE_SIZE) { | |
1134 | pte = kvtopte(va); | 1134 | pte = kvtopte(va); | |
1135 | opte = pmap_pte_testset(pte, 0); /* zap! */ | 1135 | opte = pmap_pte_testset(pte, 0); /* zap! */ | |
1136 | if ((opte & (PG_V | PG_U)) == (PG_V | PG_U) && !localonly) { | 1136 | if ((opte & (PG_V | PG_U)) == (PG_V | PG_U) && !localonly) { | |
1137 | pmap_tlb_shootdown(pmap_kernel(), va, opte, | 1137 | pmap_tlb_shootdown(pmap_kernel(), va, opte, | |
1138 | TLBSHOOT_KREMOVE); | 1138 | TLBSHOOT_KREMOVE); | |
1139 | } | 1139 | } | |
1140 | KASSERT((opte & PG_PS) == 0); | 1140 | KASSERT((opte & PG_PS) == 0); | |
1141 | KASSERT((opte & PG_PVLIST) == 0); | 1141 | KASSERT((opte & PG_PVLIST) == 0); | |
1142 | } | 1142 | } | |
1143 | if (localonly) { | 1143 | if (localonly) { | |
1144 | tlbflushg(); | 1144 | tlbflushg(); | |
1145 | } | 1145 | } | |
1146 | kpreempt_enable(); | 1146 | kpreempt_enable(); | |
1147 | } | 1147 | } | |
1148 | 1148 | |||
1149 | void | 1149 | void | |
1150 | pmap_kremove(vaddr_t sva, vsize_t len) | 1150 | pmap_kremove(vaddr_t sva, vsize_t len) | |
1151 | { | 1151 | { | |
1152 | 1152 | |||
1153 | pmap_kremove1(sva, len, false); | 1153 | pmap_kremove1(sva, len, false); | |
1154 | } | 1154 | } | |
1155 | 1155 | |||
1156 | /* | 1156 | /* | |
1157 | * pmap_kremove_local: like pmap_kremove(), but only worry about | 1157 | * pmap_kremove_local: like pmap_kremove(), but only worry about | |
1158 | * TLB invalidations on the current CPU. this is only intended | 1158 | * TLB invalidations on the current CPU. this is only intended | |
1159 | * for use while writing kernel crash dumps. | 1159 | * for use while writing kernel crash dumps. | |
1160 | */ | 1160 | */ | |
1161 | 1161 | |||
1162 | void | 1162 | void | |
1163 | pmap_kremove_local(vaddr_t sva, vsize_t len) | 1163 | pmap_kremove_local(vaddr_t sva, vsize_t len) | |
1164 | { | 1164 | { | |
1165 | 1165 | |||
1166 | KASSERT(panicstr != NULL); | 1166 | KASSERT(panicstr != NULL); | |
1167 | pmap_kremove1(sva, len, true); | 1167 | pmap_kremove1(sva, len, true); | |
1168 | } | 1168 | } | |
1169 | 1169 | |||
1170 | /* | 1170 | /* | |
1171 | * p m a p i n i t f u n c t i o n s | 1171 | * p m a p i n i t f u n c t i o n s | |
1172 | * | 1172 | * | |
1173 | * pmap_bootstrap and pmap_init are called during system startup | 1173 | * pmap_bootstrap and pmap_init are called during system startup | |
@@ -3535,1081 +3535,1081 @@ pmap_sync_pv(struct pv_pte *pvpte, pt_en | @@ -3535,1081 +3535,1081 @@ pmap_sync_pv(struct pv_pte *pvpte, pt_en | |||
3535 | 3535 | |||
3536 | /* | 3536 | /* | |
3537 | * check if there's anything to do on this pte. | 3537 | * check if there's anything to do on this pte. | |
3538 | */ | 3538 | */ | |
3539 | 3539 | |||
3540 | if ((opte & clearbits) == 0) { | 3540 | if ((opte & clearbits) == 0) { | |
3541 | need_shootdown = false; | 3541 | need_shootdown = false; | |
3542 | break; | 3542 | break; | |
3543 | } | 3543 | } | |
3544 | 3544 | |||
3545 | /* | 3545 | /* | |
3546 | * we need a shootdown if the pte is cached. (PG_U) | 3546 | * we need a shootdown if the pte is cached. (PG_U) | |
3547 | * | 3547 | * | |
3548 | * ...unless we are clearing only the PG_RW bit and | 3548 | * ...unless we are clearing only the PG_RW bit and | |
3549 | * it isn't cached as RW. (PG_M) | 3549 | * it isn't cached as RW. (PG_M) | |
3550 | */ | 3550 | */ | |
3551 | 3551 | |||
3552 | need_shootdown = (opte & PG_U) != 0 && | 3552 | need_shootdown = (opte & PG_U) != 0 && | |
3553 | !(clearbits == PG_RW && (opte & PG_M) == 0); | 3553 | !(clearbits == PG_RW && (opte & PG_M) == 0); | |
3554 | 3554 | |||
3555 | npte = opte & ~clearbits; | 3555 | npte = opte & ~clearbits; | |
3556 | 3556 | |||
3557 | /* | 3557 | /* | |
3558 | * if we need a shootdown anyway, clear PG_U and PG_M. | 3558 | * if we need a shootdown anyway, clear PG_U and PG_M. | |
3559 | */ | 3559 | */ | |
3560 | 3560 | |||
3561 | if (need_shootdown) { | 3561 | if (need_shootdown) { | |
3562 | npte &= ~(PG_U | PG_M); | 3562 | npte &= ~(PG_U | PG_M); | |
3563 | } | 3563 | } | |
3564 | KASSERT((npte & (PG_M | PG_U)) != PG_M); | 3564 | KASSERT((npte & (PG_M | PG_U)) != PG_M); | |
3565 | KASSERT((npte & (PG_U | PG_V)) != PG_U); | 3565 | KASSERT((npte & (PG_U | PG_V)) != PG_U); | |
3566 | KASSERT(npte == 0 || (opte & PG_V) != 0); | 3566 | KASSERT(npte == 0 || (opte & PG_V) != 0); | |
3567 | } while (pmap_pte_cas(ptep, opte, npte) != opte); | 3567 | } while (pmap_pte_cas(ptep, opte, npte) != opte); | |
3568 | 3568 | |||
3569 | if (need_shootdown) { | 3569 | if (need_shootdown) { | |
3570 | pmap_tlb_shootdown(pmap, va, opte, TLBSHOOT_SYNC_PV2); | 3570 | pmap_tlb_shootdown(pmap, va, opte, TLBSHOOT_SYNC_PV2); | |
3571 | } | 3571 | } | |
3572 | pmap_unmap_pte(); | 3572 | pmap_unmap_pte(); | |
3573 | 3573 | |||
3574 | *optep = opte; | 3574 | *optep = opte; | |
3575 | return 0; | 3575 | return 0; | |
3576 | } | 3576 | } | |
3577 | 3577 | |||
3578 | static void | 3578 | static void | |
3579 | pmap_pp_remove(struct pmap_page *pp, paddr_t pa) | 3579 | pmap_pp_remove(struct pmap_page *pp, paddr_t pa) | |
3580 | { | 3580 | { | |
3581 | struct pv_pte *pvpte; | 3581 | struct pv_pte *pvpte; | |
3582 | struct pv_entry *killlist = NULL; | 3582 | struct pv_entry *killlist = NULL; | |
3583 | struct vm_page *ptp; | 3583 | struct vm_page *ptp; | |
3584 | pt_entry_t expect; | 3584 | pt_entry_t expect; | |
3585 | int count; | 3585 | int count; | |
3586 | 3586 | |||
3587 | expect = pmap_pa2pte(pa) | PG_V; | 3587 | expect = pmap_pa2pte(pa) | PG_V; | |
3588 | count = SPINLOCK_BACKOFF_MIN; | 3588 | count = SPINLOCK_BACKOFF_MIN; | |
3589 | kpreempt_disable(); | 3589 | kpreempt_disable(); | |
3590 | startover: | 3590 | startover: | |
3591 | while ((pvpte = pv_pte_first(pp)) != NULL) { | 3591 | while ((pvpte = pv_pte_first(pp)) != NULL) { | |
3592 | struct pmap *pmap; | 3592 | struct pmap *pmap; | |
3593 | struct pv_entry *pve; | 3593 | struct pv_entry *pve; | |
3594 | pt_entry_t opte; | 3594 | pt_entry_t opte; | |
3595 | vaddr_t va; | 3595 | vaddr_t va; | |
3596 | int error; | 3596 | int error; | |
3597 | 3597 | |||
3598 | /* | 3598 | /* | |
3599 | * add a reference to the pmap before clearing the pte. | 3599 | * add a reference to the pmap before clearing the pte. | |
3600 | * otherwise the pmap can disappear behind us. | 3600 | * otherwise the pmap can disappear behind us. | |
3601 | */ | 3601 | */ | |
3602 | 3602 | |||
3603 | ptp = pvpte->pte_ptp; | 3603 | ptp = pvpte->pte_ptp; | |
3604 | pmap = ptp_to_pmap(ptp); | 3604 | pmap = ptp_to_pmap(ptp); | |
3605 | if (ptp != NULL) { | 3605 | if (ptp != NULL) { | |
3606 | pmap_reference(pmap); | 3606 | pmap_reference(pmap); | |
3607 | } | 3607 | } | |
3608 | 3608 | |||
3609 | error = pmap_sync_pv(pvpte, expect, ~0, &opte); | 3609 | error = pmap_sync_pv(pvpte, expect, ~0, &opte); | |
3610 | if (error == EAGAIN) { | 3610 | if (error == EAGAIN) { | |
3611 | int hold_count; | 3611 | int hold_count; | |
3612 | KERNEL_UNLOCK_ALL(curlwp, &hold_count); | 3612 | KERNEL_UNLOCK_ALL(curlwp, &hold_count); | |
3613 | if (ptp != NULL) { | 3613 | if (ptp != NULL) { | |
3614 | pmap_destroy(pmap); | 3614 | pmap_destroy(pmap); | |
3615 | } | 3615 | } | |
3616 | SPINLOCK_BACKOFF(count); | 3616 | SPINLOCK_BACKOFF(count); | |
3617 | KERNEL_LOCK(hold_count, curlwp); | 3617 | KERNEL_LOCK(hold_count, curlwp); | |
3618 | goto startover; | 3618 | goto startover; | |
3619 | } | 3619 | } | |
3620 | 3620 | |||
3621 | pp->pp_attrs |= opte; | 3621 | pp->pp_attrs |= opte; | |
3622 | va = pvpte->pte_va; | 3622 | va = pvpte->pte_va; | |
3623 | pve = pmap_remove_pv(pp, ptp, va); | 3623 | pve = pmap_remove_pv(pp, ptp, va); | |
3624 | 3624 | |||
3625 | /* update the PTP reference count. free if last reference. */ | 3625 | /* update the PTP reference count. free if last reference. */ | |
3626 | if (ptp != NULL) { | 3626 | if (ptp != NULL) { | |
3627 | struct pmap *pmap2; | 3627 | struct pmap *pmap2; | |
3628 | pt_entry_t *ptes; | 3628 | pt_entry_t *ptes; | |
3629 | pd_entry_t * const *pdes; | 3629 | pd_entry_t * const *pdes; | |
3630 | 3630 | |||
3631 | KASSERT(pmap != pmap_kernel()); | 3631 | KASSERT(pmap != pmap_kernel()); | |
3632 | 3632 | |||
3633 | pmap_tlb_shootnow(); | 3633 | pmap_tlb_shootnow(); | |
3634 | pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); | 3634 | pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); | |
3635 | pmap_stats_update_bypte(pmap, 0, opte); | 3635 | pmap_stats_update_bypte(pmap, 0, opte); | |
3636 | ptp->wire_count--; | 3636 | ptp->wire_count--; | |
3637 | if (ptp->wire_count <= 1) { | 3637 | if (ptp->wire_count <= 1) { | |
3638 | pmap_free_ptp(pmap, ptp, va, ptes, pdes); | 3638 | pmap_free_ptp(pmap, ptp, va, ptes, pdes); | |
3639 | } | 3639 | } | |
3640 | pmap_unmap_ptes(pmap, pmap2); | 3640 | pmap_unmap_ptes(pmap, pmap2); | |
3641 | pmap_destroy(pmap); | 3641 | pmap_destroy(pmap); | |
3642 | } else { | 3642 | } else { | |
3643 | KASSERT(pmap == pmap_kernel()); | 3643 | KASSERT(pmap == pmap_kernel()); | |
3644 | pmap_stats_update_bypte(pmap, 0, opte); | 3644 | pmap_stats_update_bypte(pmap, 0, opte); | |
3645 | } | 3645 | } | |
3646 | 3646 | |||
3647 | if (pve != NULL) { | 3647 | if (pve != NULL) { | |
3648 | pve->pve_next = killlist; /* mark it for death */ | 3648 | pve->pve_next = killlist; /* mark it for death */ | |
3649 | killlist = pve; | 3649 | killlist = pve; | |
3650 | } | 3650 | } | |
3651 | } | 3651 | } | |
3652 | pmap_tlb_shootnow(); | 3652 | pmap_tlb_shootnow(); | |
3653 | kpreempt_enable(); | 3653 | kpreempt_enable(); | |
3654 | 3654 | |||
3655 | /* Now free unused pvs. */ | 3655 | /* Now free unused pvs. */ | |
3656 | pmap_free_pvs(killlist); | 3656 | pmap_free_pvs(killlist); | |
3657 | } | 3657 | } | |
3658 | 3658 | |||
3659 | /* | 3659 | /* | |
3660 | * pmap_page_remove: remove a managed vm_page from all pmaps that map it | 3660 | * pmap_page_remove: remove a managed vm_page from all pmaps that map it | |
3661 | * | 3661 | * | |
3662 | * => R/M bits are sync'd back to attrs | 3662 | * => R/M bits are sync'd back to attrs | |
3663 | */ | 3663 | */ | |
3664 | 3664 | |||
3665 | void | 3665 | void | |
3666 | pmap_page_remove(struct vm_page *pg) | 3666 | pmap_page_remove(struct vm_page *pg) | |
3667 | { | 3667 | { | |
3668 | struct pmap_page *pp; | 3668 | struct pmap_page *pp; | |
3669 | paddr_t pa; | 3669 | paddr_t pa; | |
3670 | 3670 | |||
3671 | KASSERT(uvm_page_locked_p(pg)); | 3671 | KASSERT(uvm_page_locked_p(pg)); | |
3672 | 3672 | |||
3673 | pp = VM_PAGE_TO_PP(pg); | 3673 | pp = VM_PAGE_TO_PP(pg); | |
3674 | pa = VM_PAGE_TO_PHYS(pg); | 3674 | pa = VM_PAGE_TO_PHYS(pg); | |
3675 | pmap_pp_remove(pp, pa); | 3675 | pmap_pp_remove(pp, pa); | |
3676 | } | 3676 | } | |
3677 | 3677 | |||
3678 | /* | 3678 | /* | |
3679 | * pmap_pv_remove: remove an unmanaged pv-tracked page from all pmaps | 3679 | * pmap_pv_remove: remove an unmanaged pv-tracked page from all pmaps | |
3680 | * that map it | 3680 | * that map it | |
3681 | */ | 3681 | */ | |
3682 | 3682 | |||
3683 | void | 3683 | void | |
3684 | pmap_pv_remove(paddr_t pa) | 3684 | pmap_pv_remove(paddr_t pa) | |
3685 | { | 3685 | { | |
3686 | struct pmap_page *pp; | 3686 | struct pmap_page *pp; | |
3687 | 3687 | |||
3688 | pp = pmap_pv_tracked(pa); | 3688 | pp = pmap_pv_tracked(pa); | |
3689 | if (pp == NULL) | 3689 | if (pp == NULL) | |
3690 | panic("pmap_pv_protect: page not pv-tracked: 0x%"PRIxPADDR, | 3690 | panic("pmap_pv_protect: page not pv-tracked: 0x%"PRIxPADDR, | |
3691 | pa); | 3691 | pa); | |
3692 | pmap_pp_remove(pp, pa); | 3692 | pmap_pp_remove(pp, pa); | |
3693 | } | 3693 | } | |
3694 | 3694 | |||
3695 | /* | 3695 | /* | |
3696 | * p m a p a t t r i b u t e f u n c t i o n s | 3696 | * p m a p a t t r i b u t e f u n c t i o n s | |
3697 | * functions that test/change managed page's attributes | 3697 | * functions that test/change managed page's attributes | |
3698 | * since a page can be mapped multiple times we must check each PTE that | 3698 | * since a page can be mapped multiple times we must check each PTE that | |
3699 | * maps it by going down the pv lists. | 3699 | * maps it by going down the pv lists. | |
3700 | */ | 3700 | */ | |
3701 | 3701 | |||
3702 | /* | 3702 | /* | |
3703 | * pmap_test_attrs: test a page's attributes | 3703 | * pmap_test_attrs: test a page's attributes | |
3704 | */ | 3704 | */ | |
3705 | 3705 | |||
3706 | bool | 3706 | bool | |
3707 | pmap_test_attrs(struct vm_page *pg, unsigned testbits) | 3707 | pmap_test_attrs(struct vm_page *pg, unsigned testbits) | |
3708 | { | 3708 | { | |
3709 | struct pmap_page *pp; | 3709 | struct pmap_page *pp; | |
3710 | struct pv_pte *pvpte; | 3710 | struct pv_pte *pvpte; | |
3711 | pt_entry_t expect; | 3711 | pt_entry_t expect; | |
3712 | u_int result; | 3712 | u_int result; | |
3713 | 3713 | |||
3714 | KASSERT(uvm_page_locked_p(pg)); | 3714 | KASSERT(uvm_page_locked_p(pg)); | |
3715 | 3715 | |||
3716 | pp = VM_PAGE_TO_PP(pg); | 3716 | pp = VM_PAGE_TO_PP(pg); | |
3717 | if ((pp->pp_attrs & testbits) != 0) { | 3717 | if ((pp->pp_attrs & testbits) != 0) { | |
3718 | return true; | 3718 | return true; | |
3719 | } | 3719 | } | |
3720 | expect = pmap_pa2pte(VM_PAGE_TO_PHYS(pg)) | PG_V; | 3720 | expect = pmap_pa2pte(VM_PAGE_TO_PHYS(pg)) | PG_V; | |
3721 | kpreempt_disable(); | 3721 | kpreempt_disable(); | |
3722 | for (pvpte = pv_pte_first(pp); pvpte; pvpte = pv_pte_next(pp, pvpte)) { | 3722 | for (pvpte = pv_pte_first(pp); pvpte; pvpte = pv_pte_next(pp, pvpte)) { | |
3723 | pt_entry_t opte; | 3723 | pt_entry_t opte; | |
3724 | int error; | 3724 | int error; | |
3725 | 3725 | |||
3726 | if ((pp->pp_attrs & testbits) != 0) { | 3726 | if ((pp->pp_attrs & testbits) != 0) { | |
3727 | break; | 3727 | break; | |
3728 | } | 3728 | } | |
3729 | error = pmap_sync_pv(pvpte, expect, 0, &opte); | 3729 | error = pmap_sync_pv(pvpte, expect, 0, &opte); | |
3730 | if (error == 0) { | 3730 | if (error == 0) { | |
3731 | pp->pp_attrs |= opte; | 3731 | pp->pp_attrs |= opte; | |
3732 | } | 3732 | } | |
3733 | } | 3733 | } | |
3734 | result = pp->pp_attrs & testbits; | 3734 | result = pp->pp_attrs & testbits; | |
3735 | kpreempt_enable(); | 3735 | kpreempt_enable(); | |
3736 | 3736 | |||
3737 | /* | 3737 | /* | |
3738 | * note that we will exit the for loop with a non-null pve if | 3738 | * note that we will exit the for loop with a non-null pve if | |
3739 | * we have found the bits we are testing for. | 3739 | * we have found the bits we are testing for. | |
3740 | */ | 3740 | */ | |
3741 | 3741 | |||
3742 | return result != 0; | 3742 | return result != 0; | |
3743 | } | 3743 | } | |
3744 | 3744 | |||
3745 | static bool | 3745 | static bool | |
3746 | pmap_pp_clear_attrs(struct pmap_page *pp, paddr_t pa, unsigned clearbits) | 3746 | pmap_pp_clear_attrs(struct pmap_page *pp, paddr_t pa, unsigned clearbits) | |
3747 | { | 3747 | { | |
3748 | struct pv_pte *pvpte; | 3748 | struct pv_pte *pvpte; | |
3749 | u_int result; | 3749 | u_int result; | |
3750 | pt_entry_t expect; | 3750 | pt_entry_t expect; | |
3751 | int count; | 3751 | int count; | |
3752 | 3752 | |||
3753 | expect = pmap_pa2pte(pa) | PG_V; | 3753 | expect = pmap_pa2pte(pa) | PG_V; | |
3754 | count = SPINLOCK_BACKOFF_MIN; | 3754 | count = SPINLOCK_BACKOFF_MIN; | |
3755 | kpreempt_disable(); | 3755 | kpreempt_disable(); | |
3756 | startover: | 3756 | startover: | |
3757 | for (pvpte = pv_pte_first(pp); pvpte; pvpte = pv_pte_next(pp, pvpte)) { | 3757 | for (pvpte = pv_pte_first(pp); pvpte; pvpte = pv_pte_next(pp, pvpte)) { | |
3758 | pt_entry_t opte; | 3758 | pt_entry_t opte; | |
3759 | int error; | 3759 | int error; | |
3760 | 3760 | |||
3761 | error = pmap_sync_pv(pvpte, expect, clearbits, &opte); | 3761 | error = pmap_sync_pv(pvpte, expect, clearbits, &opte); | |
3762 | if (error == EAGAIN) { | 3762 | if (error == EAGAIN) { | |
3763 | int hold_count; | 3763 | int hold_count; | |
3764 | KERNEL_UNLOCK_ALL(curlwp, &hold_count); | 3764 | KERNEL_UNLOCK_ALL(curlwp, &hold_count); | |
3765 | SPINLOCK_BACKOFF(count); | 3765 | SPINLOCK_BACKOFF(count); | |
3766 | KERNEL_LOCK(hold_count, curlwp); | 3766 | KERNEL_LOCK(hold_count, curlwp); | |
3767 | goto startover; | 3767 | goto startover; | |
3768 | } | 3768 | } | |
3769 | pp->pp_attrs |= opte; | 3769 | pp->pp_attrs |= opte; | |
3770 | } | 3770 | } | |
3771 | result = pp->pp_attrs & clearbits; | 3771 | result = pp->pp_attrs & clearbits; | |
3772 | pp->pp_attrs &= ~clearbits; | 3772 | pp->pp_attrs &= ~clearbits; | |
3773 | pmap_tlb_shootnow(); | 3773 | pmap_tlb_shootnow(); | |
3774 | kpreempt_enable(); | 3774 | kpreempt_enable(); | |
3775 | 3775 | |||
3776 | return result != 0; | 3776 | return result != 0; | |
3777 | } | 3777 | } | |
3778 | 3778 | |||
3779 | /* | 3779 | /* | |
3780 | * pmap_clear_attrs: clear the specified attribute for a page. | 3780 | * pmap_clear_attrs: clear the specified attribute for a page. | |
3781 | * | 3781 | * | |
3782 | * => we return true if we cleared one of the bits we were asked to | 3782 | * => we return true if we cleared one of the bits we were asked to | |
3783 | */ | 3783 | */ | |
3784 | 3784 | |||
3785 | bool | 3785 | bool | |
3786 | pmap_clear_attrs(struct vm_page *pg, unsigned clearbits) | 3786 | pmap_clear_attrs(struct vm_page *pg, unsigned clearbits) | |
3787 | { | 3787 | { | |
3788 | struct pmap_page *pp; | 3788 | struct pmap_page *pp; | |
3789 | paddr_t pa; | 3789 | paddr_t pa; | |
3790 | 3790 | |||
3791 | KASSERT(uvm_page_locked_p(pg)); | 3791 | KASSERT(uvm_page_locked_p(pg)); | |
3792 | 3792 | |||
3793 | pp = VM_PAGE_TO_PP(pg); | 3793 | pp = VM_PAGE_TO_PP(pg); | |
3794 | pa = VM_PAGE_TO_PHYS(pg); | 3794 | pa = VM_PAGE_TO_PHYS(pg); | |
3795 | 3795 | |||
3796 | return pmap_pp_clear_attrs(pp, pa, clearbits); | 3796 | return pmap_pp_clear_attrs(pp, pa, clearbits); | |
3797 | } | 3797 | } | |
3798 | 3798 | |||
3799 | /* | 3799 | /* | |
3800 | * pmap_pv_clear_attrs: clear the specified attributes for an unmanaged | 3800 | * pmap_pv_clear_attrs: clear the specified attributes for an unmanaged | |
3801 | * pv-tracked page. | 3801 | * pv-tracked page. | |
3802 | */ | 3802 | */ | |
3803 | 3803 | |||
3804 | bool | 3804 | bool | |
3805 | pmap_pv_clear_attrs(paddr_t pa, unsigned clearbits) | 3805 | pmap_pv_clear_attrs(paddr_t pa, unsigned clearbits) | |
3806 | { | 3806 | { | |
3807 | struct pmap_page *pp; | 3807 | struct pmap_page *pp; | |
3808 | 3808 | |||
3809 | pp = pmap_pv_tracked(pa); | 3809 | pp = pmap_pv_tracked(pa); | |
3810 | if (pp == NULL) | 3810 | if (pp == NULL) | |
3811 | panic("pmap_pv_protect: page not pv-tracked: 0x%"PRIxPADDR, | 3811 | panic("pmap_pv_protect: page not pv-tracked: 0x%"PRIxPADDR, | |
3812 | pa); | 3812 | pa); | |
3813 | 3813 | |||
3814 | return pmap_pp_clear_attrs(pp, pa, clearbits); | 3814 | return pmap_pp_clear_attrs(pp, pa, clearbits); | |
3815 | } | 3815 | } | |
3816 | 3816 | |||
3817 | /* | 3817 | /* | |
3818 | * p m a p p r o t e c t i o n f u n c t i o n s | 3818 | * p m a p p r o t e c t i o n f u n c t i o n s | |
3819 | */ | 3819 | */ | |
3820 | 3820 | |||
3821 | /* | 3821 | /* | |
3822 | * pmap_page_protect: change the protection of all recorded mappings | 3822 | * pmap_page_protect: change the protection of all recorded mappings | |
3823 | * of a managed page | 3823 | * of a managed page | |
3824 | * | 3824 | * | |
3825 | * => NOTE: this is an inline function in pmap.h | 3825 | * => NOTE: this is an inline function in pmap.h | |
3826 | */ | 3826 | */ | |
3827 | 3827 | |||
3828 | /* see pmap.h */ | 3828 | /* see pmap.h */ | |
3829 | 3829 | |||
3830 | /* | 3830 | /* | |
3831 | * pmap_pv_protect: change the protection of all recorded mappings | 3831 | * pmap_pv_protect: change the protection of all recorded mappings | |
3832 | * of an unmanaged pv-tracked page | 3832 | * of an unmanaged pv-tracked page | |
3833 | * | 3833 | * | |
3834 | * => NOTE: this is an inline function in pmap.h | 3834 | * => NOTE: this is an inline function in pmap.h | |
3835 | */ | 3835 | */ | |
3836 | 3836 | |||
3837 | /* see pmap.h */ | 3837 | /* see pmap.h */ | |
3838 | 3838 | |||
3839 | /* | 3839 | /* | |
3840 | * pmap_protect: set the protection in of the pages in a pmap | 3840 | * pmap_protect: set the protection in of the pages in a pmap | |
3841 | * | 3841 | * | |
3842 | * => NOTE: this is an inline function in pmap.h | 3842 | * => NOTE: this is an inline function in pmap.h | |
3843 | */ | 3843 | */ | |
3844 | 3844 | |||
3845 | /* see pmap.h */ | 3845 | /* see pmap.h */ | |
3846 | 3846 | |||
3847 | /* | 3847 | /* | |
3848 | * pmap_write_protect: write-protect pages in a pmap. | 3848 | * pmap_write_protect: write-protect pages in a pmap. | |
3849 | */ | 3849 | */ | |
3850 | void | 3850 | void | |
3851 | pmap_write_protect(struct pmap *pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot) | 3851 | pmap_write_protect(struct pmap *pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot) | |
3852 | { | 3852 | { | |
3853 | pt_entry_t *ptes; | 3853 | pt_entry_t *ptes; | |
3854 | pt_entry_t * const *pdes; | 3854 | pt_entry_t * const *pdes; | |
3855 | struct pmap *pmap2; | 3855 | struct pmap *pmap2; | |
3856 | vaddr_t blockend, va; | 3856 | vaddr_t blockend, va; | |
3857 | 3857 | |||
3858 | KASSERT(curlwp->l_md.md_gc_pmap != pmap); | 3858 | KASSERT(curlwp->l_md.md_gc_pmap != pmap); | |
3859 | 3859 | |||
3860 | sva &= PG_FRAME; | 3860 | sva &= PG_FRAME; | |
3861 | eva &= PG_FRAME; | 3861 | eva &= PG_FRAME; | |
3862 | 3862 | |||
3863 | /* Acquire pmap. */ | 3863 | /* Acquire pmap. */ | |
3864 | kpreempt_disable(); | 3864 | kpreempt_disable(); | |
3865 | pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); | 3865 | pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); | |
3866 | 3866 | |||
3867 | for (va = sva ; va < eva ; va = blockend) { | 3867 | for (va = sva ; va < eva ; va = blockend) { | |
3868 | pt_entry_t *spte, *epte; | 3868 | pt_entry_t *spte, *epte; | |
3869 | int i; | 3869 | int i; | |
3870 | 3870 | |||
3871 | blockend = x86_round_pdr(va + 1); | 3871 | blockend = x86_round_pdr(va + 1); | |
3872 | if (blockend > eva) | 3872 | if (blockend > eva) | |
3873 | blockend = eva; | 3873 | blockend = eva; | |
3874 | 3874 | |||
3875 | /* | 3875 | /* | |
3876 | * XXXCDC: our PTE mappings should never be write-protected! | 3876 | * XXXCDC: our PTE mappings should never be write-protected! | |
3877 | * | 3877 | * | |
3878 | * long term solution is to move the PTEs out of user | 3878 | * long term solution is to move the PTEs out of user | |
3879 | * address space. and into kernel address space (up | 3879 | * address space. and into kernel address space (up | |
3880 | * with APTE). then we can set VM_MAXUSER_ADDRESS to | 3880 | * with APTE). then we can set VM_MAXUSER_ADDRESS to | |
3881 | * be VM_MAX_ADDRESS. | 3881 | * be VM_MAX_ADDRESS. | |
3882 | */ | 3882 | */ | |
3883 | 3883 | |||
3884 | /* XXXCDC: ugly hack to avoid freeing PDP here */ | 3884 | /* XXXCDC: ugly hack to avoid freeing PDP here */ | |
3885 | for (i = 0; i < PDP_SIZE; i++) { | 3885 | for (i = 0; i < PDP_SIZE; i++) { | |
3886 | if (pl_i(va, PTP_LEVELS) == PDIR_SLOT_PTE+i) | 3886 | if (pl_i(va, PTP_LEVELS) == PDIR_SLOT_PTE+i) | |
3887 | continue; | 3887 | continue; | |
3888 | } | 3888 | } | |
3889 | 3889 | |||
3890 | /* Is it a valid block? */ | 3890 | /* Is it a valid block? */ | |
3891 | if (!pmap_pdes_valid(va, pdes, NULL)) { | 3891 | if (!pmap_pdes_valid(va, pdes, NULL)) { | |
3892 | continue; | 3892 | continue; | |
3893 | } | 3893 | } | |
3894 | KASSERT(va < VM_MAXUSER_ADDRESS || va >= VM_MAX_ADDRESS); | 3894 | KASSERT(va < VM_MAXUSER_ADDRESS || va >= VM_MAX_ADDRESS); | |
3895 | 3895 | |||
3896 | spte = &ptes[pl1_i(va)]; | 3896 | spte = &ptes[pl1_i(va)]; | |
3897 | epte = &ptes[pl1_i(blockend)]; | 3897 | epte = &ptes[pl1_i(blockend)]; | |
3898 | 3898 | |||
3899 | for (/*null */; spte < epte ; spte++) { | 3899 | for (/*null */; spte < epte ; spte++) { | |
3900 | pt_entry_t opte, npte; | 3900 | pt_entry_t opte, npte; | |
3901 | 3901 | |||
3902 | do { | 3902 | do { | |
3903 | opte = *spte; | 3903 | opte = *spte; | |
3904 | if ((~opte & (PG_RW | PG_V)) != 0) { | 3904 | if ((~opte & (PG_RW | PG_V)) != 0) { | |
3905 | goto next; | 3905 | goto next; | |
3906 | } | 3906 | } | |
3907 | npte = opte & ~PG_RW; | 3907 | npte = opte & ~PG_RW; | |
3908 | } while (pmap_pte_cas(spte, opte, npte) != opte); | 3908 | } while (pmap_pte_cas(spte, opte, npte) != opte); | |
3909 | 3909 | |||
3910 | if ((opte & PG_M) != 0) { | 3910 | if ((opte & PG_M) != 0) { | |
3911 | vaddr_t tva = x86_ptob(spte - ptes); | 3911 | vaddr_t tva = x86_ptob(spte - ptes); | |
3912 | pmap_tlb_shootdown(pmap, tva, opte, | 3912 | pmap_tlb_shootdown(pmap, tva, opte, | |
3913 | TLBSHOOT_WRITE_PROTECT); | 3913 | TLBSHOOT_WRITE_PROTECT); | |
3914 | } | 3914 | } | |
3915 | next:; | 3915 | next:; | |
3916 | } | 3916 | } | |
3917 | } | 3917 | } | |
3918 | 3918 | |||
3919 | /* Release pmap. */ | 3919 | /* Release pmap. */ | |
3920 | pmap_unmap_ptes(pmap, pmap2); | 3920 | pmap_unmap_ptes(pmap, pmap2); | |
3921 | kpreempt_enable(); | 3921 | kpreempt_enable(); | |
3922 | } | 3922 | } | |
3923 | 3923 | |||
3924 | /* | 3924 | /* | |
3925 | * pmap_unwire: clear the wired bit in the PTE. | 3925 | * pmap_unwire: clear the wired bit in the PTE. | |
3926 | * | 3926 | * | |
3927 | * => Mapping should already be present. | 3927 | * => Mapping should already be present. | |
3928 | */ | 3928 | */ | |
3929 | void | 3929 | void | |
3930 | pmap_unwire(struct pmap *pmap, vaddr_t va) | 3930 | pmap_unwire(struct pmap *pmap, vaddr_t va) | |
3931 | { | 3931 | { | |
3932 | pt_entry_t *ptes, *ptep, opte; | 3932 | pt_entry_t *ptes, *ptep, opte; | |
3933 | pd_entry_t * const *pdes; | 3933 | pd_entry_t * const *pdes; | |
3934 | struct pmap *pmap2; | 3934 | struct pmap *pmap2; | |
3935 | 3935 | |||
3936 | /* Acquire pmap. */ | 3936 | /* Acquire pmap. */ | |
3937 | kpreempt_disable(); | 3937 | kpreempt_disable(); | |
3938 | pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); | 3938 | pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); | |
3939 | 3939 | |||
3940 | if (!pmap_pdes_valid(va, pdes, NULL)) { | 3940 | if (!pmap_pdes_valid(va, pdes, NULL)) { | |
3941 | panic("pmap_unwire: invalid PDE"); | 3941 | panic("pmap_unwire: invalid PDE"); | |
3942 | } | 3942 | } | |
3943 | 3943 | |||
3944 | ptep = &ptes[pl1_i(va)]; | 3944 | ptep = &ptes[pl1_i(va)]; | |
3945 | opte = *ptep; | 3945 | opte = *ptep; | |
3946 | KASSERT(pmap_valid_entry(opte)); | 3946 | KASSERT(pmap_valid_entry(opte)); | |
3947 | 3947 | |||
3948 | if (opte & PG_W) { | 3948 | if (opte & PG_W) { | |
3949 | pt_entry_t npte = opte & ~PG_W; | 3949 | pt_entry_t npte = opte & ~PG_W; | |
3950 | 3950 | |||
3951 | opte = pmap_pte_testset(ptep, npte); | 3951 | opte = pmap_pte_testset(ptep, npte); | |
3952 | pmap_stats_update_bypte(pmap, npte, opte); | 3952 | pmap_stats_update_bypte(pmap, npte, opte); | |
3953 | } else { | 3953 | } else { | |
3954 | printf("pmap_unwire: wiring for pmap %p va 0x%lx " | 3954 | printf("pmap_unwire: wiring for pmap %p va 0x%lx " | |
3955 | "did not change!\n", pmap, va); | 3955 | "did not change!\n", pmap, va); | |
3956 | } | 3956 | } | |
3957 | 3957 | |||
3958 | /* Release pmap. */ | 3958 | /* Release pmap. */ | |
3959 | pmap_unmap_ptes(pmap, pmap2); | 3959 | pmap_unmap_ptes(pmap, pmap2); | |
3960 | kpreempt_enable(); | 3960 | kpreempt_enable(); | |
3961 | } | 3961 | } | |
3962 | 3962 | |||
3963 | /* | 3963 | /* | |
3964 | * pmap_copy: copy mappings from one pmap to another | 3964 | * pmap_copy: copy mappings from one pmap to another | |
3965 | * | 3965 | * | |
3966 | * => optional function | 3966 | * => optional function | |
3967 | * void pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr) | 3967 | * void pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr) | |
3968 | */ | 3968 | */ | |
3969 | 3969 | |||
3970 | /* | 3970 | /* | |
3971 | * defined as macro in pmap.h | 3971 | * defined as macro in pmap.h | |
3972 | */ | 3972 | */ | |
3973 | 3973 | |||
3974 | __strict_weak_alias(pmap_enter, pmap_enter_default); | 3974 | __strict_weak_alias(pmap_enter, pmap_enter_default); | |
3975 | 3975 | |||
3976 | int | 3976 | int | |
3977 | pmap_enter_default(pmap_t pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, | 3977 | pmap_enter_default(pmap_t pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, | |
3978 | u_int flags) | 3978 | u_int flags) | |
3979 | { | 3979 | { | |
3980 | return pmap_enter_ma(pmap, va, pa, pa, prot, flags, 0); | 3980 | return pmap_enter_ma(pmap, va, pa, pa, prot, flags, 0); | |
3981 | } | 3981 | } | |
3982 | 3982 | |||
3983 | /* | 3983 | /* | |
3984 | * pmap_enter: enter a mapping into a pmap | 3984 | * pmap_enter: enter a mapping into a pmap | |
3985 | * | 3985 | * | |
3986 | * => must be done "now" ... no lazy-evaluation | 3986 | * => must be done "now" ... no lazy-evaluation | |
3987 | * => we set pmap => pv_head locking | 3987 | * => we set pmap => pv_head locking | |
3988 | */ | 3988 | */ | |
3989 | int | 3989 | int | |
3990 | pmap_enter_ma(struct pmap *pmap, vaddr_t va, paddr_t ma, paddr_t pa, | 3990 | pmap_enter_ma(struct pmap *pmap, vaddr_t va, paddr_t ma, paddr_t pa, | |
3991 | vm_prot_t prot, u_int flags, int domid) | 3991 | vm_prot_t prot, u_int flags, int domid) | |
3992 | { | 3992 | { | |
3993 | pt_entry_t *ptes, opte, npte; | 3993 | pt_entry_t *ptes, opte, npte; | |
3994 | pt_entry_t *ptep; | 3994 | pt_entry_t *ptep; | |
3995 | pd_entry_t * const *pdes; | 3995 | pd_entry_t * const *pdes; | |
3996 | struct vm_page *ptp; | 3996 | struct vm_page *ptp; | |
3997 | struct vm_page *new_pg, *old_pg; | 3997 | struct vm_page *new_pg, *old_pg; | |
3998 | struct pmap_page *new_pp, *old_pp; | 3998 | struct pmap_page *new_pp, *old_pp; | |
3999 | struct pv_entry *old_pve = NULL; | 3999 | struct pv_entry *old_pve = NULL; | |
4000 | struct pv_entry *new_pve; | 4000 | struct pv_entry *new_pve; | |
4001 | struct pv_entry *new_pve2; | 4001 | struct pv_entry *new_pve2; | |
4002 | int error; | 4002 | int error; | |
4003 | bool wired = (flags & PMAP_WIRED) != 0; | 4003 | bool wired = (flags & PMAP_WIRED) != 0; | |
4004 | struct pmap *pmap2; | 4004 | struct pmap *pmap2; | |
4005 | 4005 | |||
4006 | KASSERT(pmap_initialized); | 4006 | KASSERT(pmap_initialized); | |
4007 | KASSERT(curlwp->l_md.md_gc_pmap != pmap); | 4007 | KASSERT(curlwp->l_md.md_gc_pmap != pmap); | |
4008 | KASSERT(va < VM_MAX_KERNEL_ADDRESS); | 4008 | KASSERT(va < VM_MAX_KERNEL_ADDRESS); | |
4009 | KASSERTMSG(va != (vaddr_t)PDP_BASE, | 4009 | KASSERTMSG(va != (vaddr_t)PDP_BASE, | |
4010 | "pmap_enter: trying to map over PDP!"); | 4010 | "pmap_enter: trying to map over PDP!"); | |
4011 | KASSERTMSG(va < VM_MIN_KERNEL_ADDRESS || | 4011 | KASSERTMSG(va < VM_MIN_KERNEL_ADDRESS || | |
4012 | pmap_valid_entry(pmap->pm_pdir[pl_i(va, PTP_LEVELS)]), | 4012 | pmap_valid_entry(pmap->pm_pdir[pl_i(va, PTP_LEVELS)]), | |
4013 | "pmap_enter: missing kernel PTP for VA %lx!", va); | 4013 | "pmap_enter: missing kernel PTP for VA %lx!", va); | |
4014 | 4014 | |||
4015 | #ifdef XEN | 4015 | #ifdef XEN | |
4016 | KASSERT(domid == DOMID_SELF || pa == 0); | 4016 | KASSERT(domid == DOMID_SELF || pa == 0); | |
4017 | #endif /* XEN */ | 4017 | #endif /* XEN */ | |
4018 | 4018 | |||
4019 | npte = ma | protection_codes[prot] | PG_V; | 4019 | npte = ma | protection_codes[prot] | PG_V; | |
4020 | npte |= pmap_pat_flags(flags); | 4020 | npte |= pmap_pat_flags(flags); | |
4021 | if (wired) | 4021 | if (wired) | |
4022 | npte |= PG_W; | 4022 | npte |= PG_W; | |
4023 | if (va < VM_MAXUSER_ADDRESS) | 4023 | if (va < VM_MAXUSER_ADDRESS) | |
4024 | npte |= PG_u; | 4024 | npte |= PG_u; | |
4025 | else if (va < VM_MAX_ADDRESS) | 4025 | else if (va < VM_MAX_ADDRESS) | |
4026 | npte |= (PG_u | PG_RW); /* XXXCDC: no longer needed? */ | 4026 | npte |= (PG_u | PG_RW); /* XXXCDC: no longer needed? */ | |
4027 | else | 4027 | else | |
4028 | npte |= PG_k; | 4028 | npte |= PG_k; | |
4029 | if (pmap == pmap_kernel()) | 4029 | if (pmap == pmap_kernel()) | |
4030 | npte |= pmap_pg_g; | 4030 | npte |= pmap_pg_g; | |
4031 | if (flags & VM_PROT_ALL) { | 4031 | if (flags & VM_PROT_ALL) { | |
4032 | npte |= PG_U; | 4032 | npte |= PG_U; | |
4033 | if (flags & VM_PROT_WRITE) { | 4033 | if (flags & VM_PROT_WRITE) { | |
4034 | KASSERT((npte & PG_RW) != 0); | 4034 | KASSERT((npte & PG_RW) != 0); | |
4035 | npte |= PG_M; | 4035 | npte |= PG_M; | |
4036 | } | 4036 | } | |
4037 | } | 4037 | } | |
4038 | 4038 | |||
4039 | #ifdef XEN | 4039 | #ifdef XEN | |
4040 | if (domid != DOMID_SELF) | 4040 | if (domid != DOMID_SELF) | |
4041 | new_pg = NULL; | 4041 | new_pg = NULL; | |
4042 | else | 4042 | else | |
4043 | #endif | 4043 | #endif | |
4044 | new_pg = PHYS_TO_VM_PAGE(pa); | 4044 | new_pg = PHYS_TO_VM_PAGE(pa); | |
4045 | if (new_pg != NULL) { | 4045 | if (new_pg != NULL) { | |
4046 | /* This is a managed page */ | 4046 | /* This is a managed page */ | |
4047 | npte |= PG_PVLIST; | 4047 | npte |= PG_PVLIST; | |
4048 | new_pp = VM_PAGE_TO_PP(new_pg); | 4048 | new_pp = VM_PAGE_TO_PP(new_pg); | |
4049 | } else if ((new_pp = pmap_pv_tracked(pa)) != NULL) { | 4049 | } else if ((new_pp = pmap_pv_tracked(pa)) != NULL) { | |
4050 | /* This is an unmanaged pv-tracked page */ | 4050 | /* This is an unmanaged pv-tracked page */ | |
4051 | npte |= PG_PVLIST; | 4051 | npte |= PG_PVLIST; | |
4052 | } else { | 4052 | } else { | |
4053 | new_pp = NULL; | 4053 | new_pp = NULL; | |
4054 | } | 4054 | } | |
4055 | 4055 | |||
4056 | /* get pves. */ | 4056 | /* get pves. */ | |
4057 | new_pve = pool_cache_get(&pmap_pv_cache, PR_NOWAIT); | 4057 | new_pve = pool_cache_get(&pmap_pv_cache, PR_NOWAIT); | |
4058 | new_pve2 = pool_cache_get(&pmap_pv_cache, PR_NOWAIT); | 4058 | new_pve2 = pool_cache_get(&pmap_pv_cache, PR_NOWAIT); | |
4059 | if (new_pve == NULL || new_pve2 == NULL) { | 4059 | if (new_pve == NULL || new_pve2 == NULL) { | |
4060 | if (flags & PMAP_CANFAIL) { | 4060 | if (flags & PMAP_CANFAIL) { | |
4061 | error = ENOMEM; | 4061 | error = ENOMEM; | |
4062 | goto out2; | 4062 | goto out2; | |
4063 | } | 4063 | } | |
4064 | panic("pmap_enter: pve allocation failed"); | 4064 | panic("pmap_enter: pve allocation failed"); | |
4065 | } | 4065 | } | |
4066 | 4066 | |||
4067 | kpreempt_disable(); | 4067 | kpreempt_disable(); | |
4068 | pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); /* locks pmap */ | 4068 | pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); /* locks pmap */ | |
4069 | if (pmap == pmap_kernel()) { | 4069 | if (pmap == pmap_kernel()) { | |
4070 | ptp = NULL; | 4070 | ptp = NULL; | |
4071 | } else { | 4071 | } else { | |
4072 | ptp = pmap_get_ptp(pmap, va, pdes); | 4072 | ptp = pmap_get_ptp(pmap, va, pdes); | |
4073 | if (ptp == NULL) { | 4073 | if (ptp == NULL) { | |
4074 | pmap_unmap_ptes(pmap, pmap2); | 4074 | pmap_unmap_ptes(pmap, pmap2); | |
4075 | if (flags & PMAP_CANFAIL) { | 4075 | if (flags & PMAP_CANFAIL) { | |
4076 | error = ENOMEM; | 4076 | error = ENOMEM; | |
4077 | goto out; | 4077 | goto out; | |
4078 | } | 4078 | } | |
4079 | panic("pmap_enter: get ptp failed"); | 4079 | panic("pmap_enter: get ptp failed"); | |
4080 | } | 4080 | } | |
4081 | } | 4081 | } | |
4082 | 4082 | |||
4083 | /* | 4083 | /* | |
4084 | * update the pte. | 4084 | * update the pte. | |
4085 | */ | 4085 | */ | |
4086 | 4086 | |||
4087 | ptep = &ptes[pl1_i(va)]; | 4087 | ptep = &ptes[pl1_i(va)]; | |
4088 | do { | 4088 | do { | |
4089 | opte = *ptep; | 4089 | opte = *ptep; | |
4090 | 4090 | |||
4091 | /* | 4091 | /* | |
4092 | * if the same page, inherit PG_U and PG_M. | 4092 | * if the same page, inherit PG_U and PG_M. | |
4093 | */ | 4093 | */ | |
4094 | if (((opte ^ npte) & (PG_FRAME | PG_V)) == 0) { | 4094 | if (((opte ^ npte) & (PG_FRAME | PG_V)) == 0) { | |
4095 | npte |= opte & (PG_U | PG_M); | 4095 | npte |= opte & (PG_U | PG_M); | |
4096 | } | 4096 | } | |
4097 | #if defined(XEN) | 4097 | #if defined(XEN) | |
4098 | if (domid != DOMID_SELF) { | 4098 | if (domid != DOMID_SELF) { | |
4099 | /* pmap_pte_cas with error handling */ | 4099 | /* pmap_pte_cas with error handling */ | |
4100 | int s = splvm(); | 4100 | int s = splvm(); | |
4101 | if (opte != *ptep) { | 4101 | if (opte != *ptep) { | |
4102 | splx(s); | 4102 | splx(s); | |
4103 | continue; | 4103 | continue; | |
4104 | } | 4104 | } | |
4105 | error = xpq_update_foreign( | 4105 | error = xpq_update_foreign( | |
4106 | vtomach((vaddr_t)ptep), npte, domid); | 4106 | vtomach((vaddr_t)ptep), npte, domid); | |
4107 | splx(s); | 4107 | splx(s); | |
4108 | if (error) { | 4108 | if (error) { | |
4109 | if (ptp != NULL && ptp->wire_count <= 1) { | 4109 | if (ptp != NULL && ptp->wire_count <= 1) { | |
4110 | pmap_free_ptp(pmap, ptp, va, ptes, pdes); | 4110 | pmap_free_ptp(pmap, ptp, va, ptes, pdes); | |
4111 | } | 4111 | } | |
4112 | pmap_unmap_ptes(pmap, pmap2); | 4112 | pmap_unmap_ptes(pmap, pmap2); | |
4113 | goto out; | 4113 | goto out; | |
4114 | } | 4114 | } | |
4115 | break; | 4115 | break; | |
4116 | } | 4116 | } | |
4117 | #endif /* defined(XEN) */ | 4117 | #endif /* defined(XEN) */ | |
4118 | } while (pmap_pte_cas(ptep, opte, npte) != opte); | 4118 | } while (pmap_pte_cas(ptep, opte, npte) != opte); | |
4119 | 4119 | |||
4120 | /* | 4120 | /* | |
4121 | * update statistics and PTP's reference count. | 4121 | * update statistics and PTP's reference count. | |
4122 | */ | 4122 | */ | |
4123 | 4123 | |||
4124 | pmap_stats_update_bypte(pmap, npte, opte); | 4124 | pmap_stats_update_bypte(pmap, npte, opte); | |
4125 | if (ptp != NULL && !pmap_valid_entry(opte)) { | 4125 | if (ptp != NULL && !pmap_valid_entry(opte)) { | |
4126 | ptp->wire_count++; | 4126 | ptp->wire_count++; | |
4127 | } | 4127 | } | |
4128 | KASSERT(ptp == NULL || ptp->wire_count > 1); | 4128 | KASSERT(ptp == NULL || ptp->wire_count > 1); | |
4129 | 4129 | |||
4130 | /* | 4130 | /* | |
4131 | * if the same page, we can skip pv_entry handling. | 4131 | * if the same page, we can skip pv_entry handling. | |
4132 | */ | 4132 | */ | |
4133 | 4133 | |||
4134 | if (((opte ^ npte) & (PG_FRAME | PG_V)) == 0) { | 4134 | if (((opte ^ npte) & (PG_FRAME | PG_V)) == 0) { | |
4135 | KASSERT(((opte ^ npte) & PG_PVLIST) == 0); | 4135 | KASSERT(((opte ^ npte) & PG_PVLIST) == 0); | |
4136 | goto same_pa; | 4136 | goto same_pa; | |
4137 | } | 4137 | } | |
4138 | 4138 | |||
4139 | /* | 4139 | /* | |
4140 | * if old page is pv-tracked, remove pv_entry from its list. | 4140 | * if old page is pv-tracked, remove pv_entry from its list. | |
4141 | */ | 4141 | */ | |
4142 | 4142 | |||
4143 | if ((~opte & (PG_V | PG_PVLIST)) == 0) { | 4143 | if ((~opte & (PG_V | PG_PVLIST)) == 0) { | |
4144 | if ((old_pg = PHYS_TO_VM_PAGE(pmap_pte2pa(opte))) != NULL) { | 4144 | if ((old_pg = PHYS_TO_VM_PAGE(pmap_pte2pa(opte))) != NULL) { | |
4145 | KASSERT(uvm_page_locked_p(old_pg)); | 4145 | KASSERT(uvm_page_locked_p(old_pg)); | |
4146 | old_pp = VM_PAGE_TO_PP(old_pg); | 4146 | old_pp = VM_PAGE_TO_PP(old_pg); | |
4147 | } else if ((old_pp = pmap_pv_tracked(pmap_pte2pa(opte))) | 4147 | } else if ((old_pp = pmap_pv_tracked(pmap_pte2pa(opte))) | |
4148 | == NULL) { | 4148 | == NULL) { | |
4149 | pa = pmap_pte2pa(opte); | 4149 | pa = pmap_pte2pa(opte); | |
4150 | panic("pmap_enter: PG_PVLIST with pv-untracked page" | 4150 | panic("pmap_enter: PG_PVLIST with pv-untracked page" | |
4151 | " va = 0x%"PRIxVADDR | 4151 | " va = 0x%"PRIxVADDR | |
4152 | " pa = 0x%" PRIxPADDR " (0x%" PRIxPADDR ")", | 4152 | " pa = 0x%" PRIxPADDR " (0x%" PRIxPADDR ")", | |
4153 | va, pa, atop(pa)); | 4153 | va, pa, atop(pa)); | |
4154 | } | 4154 | } | |
4155 | 4155 | |||
4156 | old_pve = pmap_remove_pv(old_pp, ptp, va); | 4156 | old_pve = pmap_remove_pv(old_pp, ptp, va); | |
4157 | old_pp->pp_attrs |= opte; | 4157 | old_pp->pp_attrs |= opte; | |
4158 | } | 4158 | } | |
4159 | 4159 | |||
4160 | /* | 4160 | /* | |
4161 | * if new page is pv-tracked, insert pv_entry into its list. | 4161 | * if new page is pv-tracked, insert pv_entry into its list. | |
4162 | */ | 4162 | */ | |
4163 | 4163 | |||
4164 | if (new_pp) { | 4164 | if (new_pp) { | |
4165 | new_pve = pmap_enter_pv(new_pp, new_pve, &new_pve2, ptp, va); | 4165 | new_pve = pmap_enter_pv(new_pp, new_pve, &new_pve2, ptp, va); | |
4166 | } | 4166 | } | |
4167 | 4167 | |||
4168 | same_pa: | 4168 | same_pa: | |
4169 | pmap_unmap_ptes(pmap, pmap2); | 4169 | pmap_unmap_ptes(pmap, pmap2); | |
4170 | 4170 | |||
4171 | /* | 4171 | /* | |
4172 | * shootdown tlb if necessary. | 4172 | * shootdown tlb if necessary. | |
4173 | */ | 4173 | */ | |
4174 | 4174 | |||
4175 | if ((~opte & (PG_V | PG_U)) == 0 && | 4175 | if ((~opte & (PG_V | PG_U)) == 0 && | |
4176 | ((opte ^ npte) & (PG_FRAME | PG_RW)) != 0) { | 4176 | ((opte ^ npte) & (PG_FRAME | PG_RW)) != 0) { | |
4177 | pmap_tlb_shootdown(pmap, va, opte, TLBSHOOT_ENTER); | 4177 | pmap_tlb_shootdown(pmap, va, opte, TLBSHOOT_ENTER); | |
4178 | } | 4178 | } | |
4179 | 4179 | |||
4180 | error = 0; | 4180 | error = 0; | |
4181 | out: | 4181 | out: | |
4182 | kpreempt_enable(); | 4182 | kpreempt_enable(); | |
4183 | out2: | 4183 | out2: | |
4184 | if (old_pve != NULL) { | 4184 | if (old_pve != NULL) { | |
4185 | pool_cache_put(&pmap_pv_cache, old_pve); | 4185 | pool_cache_put(&pmap_pv_cache, old_pve); | |
4186 | } | 4186 | } | |
4187 | if (new_pve != NULL) { | 4187 | if (new_pve != NULL) { | |
4188 | pool_cache_put(&pmap_pv_cache, new_pve); | 4188 | pool_cache_put(&pmap_pv_cache, new_pve); | |
4189 | } | 4189 | } | |
4190 | if (new_pve2 != NULL) { | 4190 | if (new_pve2 != NULL) { | |
4191 | pool_cache_put(&pmap_pv_cache, new_pve2); | 4191 | pool_cache_put(&pmap_pv_cache, new_pve2); | |
4192 | } | 4192 | } | |
4193 | 4193 | |||
4194 | return error; | 4194 | return error; | |
4195 | } | 4195 | } | |
4196 | 4196 | |||
4197 | static bool | 4197 | static bool | |
4198 | pmap_get_physpage(vaddr_t va, int level, paddr_t *paddrp) | 4198 | pmap_get_physpage(vaddr_t va, int level, paddr_t *paddrp) | |
4199 | { | 4199 | { | |
4200 | struct vm_page *ptp; | 4200 | struct vm_page *ptp; | |
4201 | struct pmap *kpm = pmap_kernel(); | 4201 | struct pmap *kpm = pmap_kernel(); | |
4202 | 4202 | |||
4203 | if (!uvm.page_init_done) { | 4203 | if (!uvm.page_init_done) { | |
4204 | 4204 | |||
4205 | /* | 4205 | /* | |
4206 | * we're growing the kernel pmap early (from | 4206 | * we're growing the kernel pmap early (from | |
4207 | * uvm_pageboot_alloc()). this case must be | 4207 | * uvm_pageboot_alloc()). this case must be | |
4208 | * handled a little differently. | 4208 | * handled a little differently. | |
4209 | */ | 4209 | */ | |
4210 | 4210 | |||
4211 | if (!uvm_page_physget(paddrp)) | 4211 | if (!uvm_page_physget(paddrp)) | |
4212 | panic("pmap_get_physpage: out of memory"); | 4212 | panic("pmap_get_physpage: out of memory"); | |
4213 | #if defined(__HAVE_DIRECT_MAP) | 4213 | #if defined(__HAVE_DIRECT_MAP) | |
4214 | pagezero(PMAP_DIRECT_MAP(*paddrp)); | 4214 | pagezero(PMAP_DIRECT_MAP(*paddrp)); | |
4215 | #else | 4215 | #else | |
4216 | #if defined(XEN) | 4216 | #if defined(XEN) | |
4217 | if (XEN_VERSION_SUPPORTED(3, 4)) { | 4217 | if (XEN_VERSION_SUPPORTED(3, 4)) { | |
4218 | xen_pagezero(*paddrp); | 4218 | xen_pagezero(*paddrp); | |
4219 | return true; | 4219 | return true; | |
4220 | } | 4220 | } | |
4221 | #endif | 4221 | #endif | |
4222 | kpreempt_disable(); | 4222 | kpreempt_disable(); | |
4223 | pmap_pte_set(early_zero_pte, | 4223 | pmap_pte_set(early_zero_pte, | |
4224 | pmap_pa2pte(*paddrp) | PG_V | PG_RW | PG_k); | 4224 | pmap_pa2pte(*paddrp) | PG_V | PG_RW | PG_k); | |
4225 | pmap_pte_flush(); | 4225 | pmap_pte_flush(); | |
4226 | pmap_update_pg((vaddr_t)early_zerop); | 4226 | pmap_update_pg((vaddr_t)early_zerop); | |
4227 | memset(early_zerop, 0, PAGE_SIZE); | 4227 | memset(early_zerop, 0, PAGE_SIZE); | |
4228 | #if defined(DIAGNOSTIC) || defined(XEN) | 4228 | #if defined(DIAGNOSTIC) || defined(XEN) | |
4229 | pmap_pte_set(early_zero_pte, 0); | 4229 | pmap_pte_set(early_zero_pte, 0); | |
4230 | pmap_pte_flush(); | 4230 | pmap_pte_flush(); | |
4231 | #endif /* defined(DIAGNOSTIC) */ | 4231 | #endif /* defined(DIAGNOSTIC) */ | |
4232 | kpreempt_enable(); | 4232 | kpreempt_enable(); | |
4233 | #endif /* defined(__HAVE_DIRECT_MAP) */ | 4233 | #endif /* defined(__HAVE_DIRECT_MAP) */ | |
4234 | } else { | 4234 | } else { | |
4235 | /* XXX */ | 4235 | /* XXX */ | |
4236 | ptp = uvm_pagealloc(NULL, 0, NULL, | 4236 | ptp = uvm_pagealloc(NULL, 0, NULL, | |
4237 | UVM_PGA_USERESERVE|UVM_PGA_ZERO); | 4237 | UVM_PGA_USERESERVE|UVM_PGA_ZERO); | |
4238 | if (ptp == NULL) | 4238 | if (ptp == NULL) | |
4239 | panic("pmap_get_physpage: out of memory"); | 4239 | panic("pmap_get_physpage: out of memory"); | |
4240 | ptp->flags &= ~PG_BUSY; | 4240 | ptp->flags &= ~PG_BUSY; | |
4241 | ptp->wire_count = 1; | 4241 | ptp->wire_count = 1; | |
4242 | *paddrp = VM_PAGE_TO_PHYS(ptp); | 4242 | *paddrp = VM_PAGE_TO_PHYS(ptp); | |
4243 | } | 4243 | } | |
4244 | pmap_stats_update(kpm, 1, 0); | 4244 | pmap_stats_update(kpm, 1, 0); | |
4245 | return true; | 4245 | return true; | |
4246 | } | 4246 | } | |
4247 | 4247 | |||
4248 | /* | 4248 | /* | |
4249 | * Allocate the amount of specified ptps for a ptp level, and populate | 4249 | * Allocate the amount of specified ptps for a ptp level, and populate | |
4250 | * all levels below accordingly, mapping virtual addresses starting at | 4250 | * all levels below accordingly, mapping virtual addresses starting at | |
4251 | * kva. | 4251 | * kva. | |
4252 | * | 4252 | * | |
4253 | * Used by pmap_growkernel. | 4253 | * Used by pmap_growkernel. | |
4254 | */ | 4254 | */ | |
4255 | static void | 4255 | static void | |
4256 | pmap_alloc_level(pd_entry_t * const *pdes, vaddr_t kva, int lvl, | 4256 | pmap_alloc_level(pd_entry_t * const *pdes, vaddr_t kva, int lvl, | |
4257 | long *needed_ptps) | 4257 | long *needed_ptps) | |
4258 | { | 4258 | { | |
4259 | unsigned long i; | 4259 | unsigned long i; | |
4260 | vaddr_t va; | 4260 | vaddr_t va; | |
4261 | paddr_t pa; | 4261 | paddr_t pa; | |
4262 | unsigned long index, endindex; | 4262 | unsigned long index, endindex; | |
4263 | int level; | 4263 | int level; | |
4264 | pd_entry_t *pdep; | 4264 | pd_entry_t *pdep; | |
4265 | #ifdef XEN | 4265 | #ifdef XEN | |
4266 | int s = splvm(); /* protect xpq_* */ | 4266 | int s = splvm(); /* protect xpq_* */ | |
4267 | #endif | 4267 | #endif | |
4268 | 4268 | |||
4269 | for (level = lvl; level > 1; level--) { | 4269 | for (level = lvl; level > 1; level--) { | |
4270 | if (level == PTP_LEVELS) | 4270 | if (level == PTP_LEVELS) | |
4271 | pdep = pmap_kernel()->pm_pdir; | 4271 | pdep = pmap_kernel()->pm_pdir; | |
4272 | else | 4272 | else | |
4273 | pdep = pdes[level - 2]; | 4273 | pdep = pdes[level - 2]; | |
4274 | va = kva; | 4274 | va = kva; | |
4275 | index = pl_i_roundup(kva, level); | 4275 | index = pl_i_roundup(kva, level); | |
4276 | endindex = index + needed_ptps[level - 1] - 1; | 4276 | endindex = index + needed_ptps[level - 1] - 1; | |
4277 | 4277 | |||
4278 | 4278 | |||
4279 | for (i = index; i <= endindex; i++) { | 4279 | for (i = index; i <= endindex; i++) { | |
4280 | pt_entry_t pte; | 4280 | pt_entry_t pte; | |
4281 | 4281 | |||
4282 | KASSERT(!pmap_valid_entry(pdep[i])); | 4282 | KASSERT(!pmap_valid_entry(pdep[i])); | |
4283 | pmap_get_physpage(va, level - 1, &pa); | 4283 | pmap_get_physpage(va, level - 1, &pa); | |
4284 | pte = pmap_pa2pte(pa) | PG_k | PG_V | PG_RW; | 4284 | pte = pmap_pa2pte(pa) | PG_k | PG_V | PG_RW; | |
4285 | #ifdef XEN | 4285 | #ifdef XEN | |
4286 | pmap_pte_set(&pdep[i], pte); | 4286 | pmap_pte_set(&pdep[i], pte); | |
4287 | #if defined(PAE) || defined(__x86_64__) | 4287 | #if defined(PAE) || defined(__x86_64__) | |
4288 | if (level == PTP_LEVELS && i >= PDIR_SLOT_KERN) { | 4288 | if (level == PTP_LEVELS && i >= PDIR_SLOT_KERN) { | |
4289 | if (__predict_true( | 4289 | if (__predict_true( | |
4290 | cpu_info_primary.ci_flags & CPUF_PRESENT)) { | 4290 | cpu_info_primary.ci_flags & CPUF_PRESENT)) { | |
4291 | /* update per-cpu PMDs on all cpus */ | 4291 | /* update per-cpu PMDs on all cpus */ | |
4292 | xen_kpm_sync(pmap_kernel(), i); | 4292 | xen_kpm_sync(pmap_kernel(), i); | |
4293 | } else { | 4293 | } else { | |
4294 | /* | 4294 | /* | |
4295 | * too early; update primary CPU | 4295 | * too early; update primary CPU | |
4296 | * PMD only (without locks) | 4296 | * PMD only (without locks) | |
4297 | */ | 4297 | */ | |
4298 | #ifdef PAE | 4298 | #ifdef PAE | |
4299 | pd_entry_t *cpu_pdep = | 4299 | pd_entry_t *cpu_pdep = | |
4300 | &cpu_info_primary.ci_kpm_pdir[l2tol2(i)]; | 4300 | &cpu_info_primary.ci_kpm_pdir[l2tol2(i)]; | |
4301 | #endif | 4301 | #endif | |
4302 | #ifdef __x86_64__ | 4302 | #ifdef __x86_64__ | |
4303 | pd_entry_t *cpu_pdep = | 4303 | pd_entry_t *cpu_pdep = | |
4304 | &cpu_info_primary.ci_kpm_pdir[i]; | 4304 | &cpu_info_primary.ci_kpm_pdir[i]; | |
4305 | #endif | 4305 | #endif | |
4306 | pmap_pte_set(cpu_pdep, pte); | 4306 | pmap_pte_set(cpu_pdep, pte); | |
4307 | } | 4307 | } | |
4308 | } | 4308 | } | |
4309 | #endif /* PAE || __x86_64__ */ | 4309 | #endif /* PAE || __x86_64__ */ | |
4310 | #else /* XEN */ | 4310 | #else /* XEN */ | |
4311 | pdep[i] = pte; | 4311 | pdep[i] = pte; | |
4312 | #endif /* XEN */ | 4312 | #endif /* XEN */ | |
4313 | KASSERT(level != PTP_LEVELS || nkptp[level - 1] + | 4313 | KASSERT(level != PTP_LEVELS || nkptp[level - 1] + | |
4314 | pl_i(VM_MIN_KERNEL_ADDRESS, level) == i); | 4314 | pl_i(VM_MIN_KERNEL_ADDRESS, level) == i); | |
4315 | nkptp[level - 1]++; | 4315 | nkptp[level - 1]++; | |
4316 | va += nbpd[level - 1]; | 4316 | va += nbpd[level - 1]; | |
4317 | } | 4317 | } | |
4318 | pmap_pte_flush(); | 4318 | pmap_pte_flush(); | |
4319 | } | 4319 | } | |
4320 | #ifdef XEN | 4320 | #ifdef XEN | |
4321 | splx(s); | 4321 | splx(s); | |
4322 | #endif | 4322 | #endif | |
4323 | } | 4323 | } | |
4324 | 4324 | |||
4325 | /* | 4325 | /* | |
4326 | * pmap_growkernel: increase usage of KVM space | 4326 | * pmap_growkernel: increase usage of KVM space | |
4327 | * | 4327 | * | |
4328 | * => we allocate new PTPs for the kernel and install them in all | 4328 | * => we allocate new PTPs for the kernel and install them in all | |
4329 | * the pmaps on the system. | 4329 | * the pmaps on the system. | |
4330 | */ | 4330 | */ | |
4331 | 4331 | |||
4332 | vaddr_t | 4332 | vaddr_t | |
4333 | pmap_growkernel(vaddr_t maxkvaddr) | 4333 | pmap_growkernel(vaddr_t maxkvaddr) | |
4334 | { | 4334 | { | |
4335 | struct pmap *kpm = pmap_kernel(); | 4335 | struct pmap *kpm = pmap_kernel(); | |
4336 | #if !defined(XEN) || !defined(__x86_64__) | 4336 | #if !defined(XEN) || !defined(__x86_64__) | |
4337 | struct pmap *pm; | 4337 | struct pmap *pm; | |
4338 | long old; | 4338 | long old; | |
4339 | #endif | 4339 | #endif | |
4340 | int s, i; | 4340 | int s, i; | |
4341 | long needed_kptp[PTP_LEVELS], target_nptp; | 4341 | long needed_kptp[PTP_LEVELS], target_nptp; | |
4342 | bool invalidate = false; | 4342 | bool invalidate = false; | |
4343 | 4343 | |||
4344 | s = splvm(); /* to be safe */ | 4344 | s = splvm(); /* to be safe */ | |
4345 | mutex_enter(kpm->pm_lock); | 4345 | mutex_enter(kpm->pm_lock); | |
4346 | 4346 | |||
4347 | if (maxkvaddr <= pmap_maxkvaddr) { | 4347 | if (maxkvaddr <= pmap_maxkvaddr) { | |
4348 | mutex_exit(kpm->pm_lock); | 4348 | mutex_exit(kpm->pm_lock); | |
4349 | splx(s); | 4349 | splx(s); | |
4350 | return pmap_maxkvaddr; | 4350 | return pmap_maxkvaddr; | |
4351 | } | 4351 | } | |
4352 | 4352 | |||
4353 | maxkvaddr = x86_round_pdr(maxkvaddr); | 4353 | maxkvaddr = x86_round_pdr(maxkvaddr); | |
4354 | #if !defined(XEN) || !defined(__x86_64__) | 4354 | #if !defined(XEN) || !defined(__x86_64__) | |
4355 | old = nkptp[PTP_LEVELS - 1]; | 4355 | old = nkptp[PTP_LEVELS - 1]; | |
4356 | #endif | 4356 | #endif | |
4357 | 4357 | |||
4358 | /* | 4358 | /* | |
4359 | * This loop could be optimized more, but pmap_growkernel() | 4359 | * This loop could be optimized more, but pmap_growkernel() | |
4360 | * is called infrequently. | 4360 | * is called infrequently. | |
4361 | */ | 4361 | */ | |
4362 | for (i = PTP_LEVELS - 1; i >= 1; i--) { | 4362 | for (i = PTP_LEVELS - 1; i >= 1; i--) { | |
4363 | target_nptp = pl_i_roundup(maxkvaddr, i + 1) - | 4363 | target_nptp = pl_i_roundup(maxkvaddr, i + 1) - | |
4364 | pl_i_roundup(VM_MIN_KERNEL_ADDRESS, i + 1); | 4364 | pl_i_roundup(VM_MIN_KERNEL_ADDRESS, i + 1); | |
4365 | /* | 4365 | /* | |
4366 | * XXX only need to check toplevel. | 4366 | * XXX only need to check toplevel. | |
4367 | */ | 4367 | */ | |
4368 | if (target_nptp > nkptpmax[i]) | 4368 | if (target_nptp > nkptpmax[i]) | |
4369 | panic("out of KVA space"); | 4369 | panic("out of KVA space"); | |
4370 | KASSERT(target_nptp >= nkptp[i]); | 4370 | KASSERT(target_nptp >= nkptp[i]); | |
4371 | needed_kptp[i] = target_nptp - nkptp[i]; | 4371 | needed_kptp[i] = target_nptp - nkptp[i]; | |
4372 | } | 4372 | } | |
4373 | 4373 | |||
4374 | pmap_alloc_level(normal_pdes, pmap_maxkvaddr, PTP_LEVELS, needed_kptp); | 4374 | pmap_alloc_level(normal_pdes, pmap_maxkvaddr, PTP_LEVELS, needed_kptp); | |
4375 | 4375 | |||
4376 | /* | 4376 | /* | |
4377 | * If the number of top level entries changed, update all | 4377 | * If the number of top level entries changed, update all | |
4378 | * pmaps. | 4378 | * pmaps. | |
4379 | */ | 4379 | */ | |
4380 | if (needed_kptp[PTP_LEVELS - 1] != 0) { | 4380 | if (needed_kptp[PTP_LEVELS - 1] != 0) { | |
4381 | #ifdef XEN | 4381 | #ifdef XEN | |
4382 | #ifdef __x86_64__ | 4382 | #ifdef __x86_64__ | |
4383 | /* nothing, kernel entries are never entered in user pmap */ | 4383 | /* nothing, kernel entries are never entered in user pmap */ | |
4384 | #else /* __x86_64__ */ | 4384 | #else /* __x86_64__ */ | |
4385 | mutex_enter(&pmaps_lock); | 4385 | mutex_enter(&pmaps_lock); | |
4386 | LIST_FOREACH(pm, &pmaps, pm_list) { | 4386 | LIST_FOREACH(pm, &pmaps, pm_list) { | |
4387 | int pdkidx; | 4387 | int pdkidx; | |
4388 | for (pdkidx = PDIR_SLOT_KERN + old; | 4388 | for (pdkidx = PDIR_SLOT_KERN + old; | |
4389 | pdkidx < PDIR_SLOT_KERN + nkptp[PTP_LEVELS - 1]; | 4389 | pdkidx < PDIR_SLOT_KERN + nkptp[PTP_LEVELS - 1]; | |
4390 | pdkidx++) { | 4390 | pdkidx++) { | |
4391 | pmap_pte_set(&pm->pm_pdir[pdkidx], | 4391 | pmap_pte_set(&pm->pm_pdir[pdkidx], | |
4392 | kpm->pm_pdir[pdkidx]); | 4392 | kpm->pm_pdir[pdkidx]); | |
4393 | } | 4393 | } | |
4394 | pmap_pte_flush(); | 4394 | pmap_pte_flush(); | |
4395 | } | 4395 | } | |
4396 | mutex_exit(&pmaps_lock); | 4396 | mutex_exit(&pmaps_lock); | |
4397 | #endif /* __x86_64__ */ | 4397 | #endif /* __x86_64__ */ | |
4398 | #else /* XEN */ | 4398 | #else /* XEN */ | |
4399 | unsigned newpdes; | 4399 | unsigned newpdes; | |
4400 | newpdes = nkptp[PTP_LEVELS - 1] - old; | 4400 | newpdes = nkptp[PTP_LEVELS - 1] - old; | |
4401 | mutex_enter(&pmaps_lock); | 4401 | mutex_enter(&pmaps_lock); | |
4402 | LIST_FOREACH(pm, &pmaps, pm_list) { | 4402 | LIST_FOREACH(pm, &pmaps, pm_list) { | |
4403 | memcpy(&pm->pm_pdir[PDIR_SLOT_KERN + old], | 4403 | memcpy(&pm->pm_pdir[PDIR_SLOT_KERN + old], | |
4404 | &kpm->pm_pdir[PDIR_SLOT_KERN + old], | 4404 | &kpm->pm_pdir[PDIR_SLOT_KERN + old], | |
4405 | newpdes * sizeof (pd_entry_t)); | 4405 | newpdes * sizeof (pd_entry_t)); | |
4406 | } | 4406 | } | |
4407 | mutex_exit(&pmaps_lock); | 4407 | mutex_exit(&pmaps_lock); | |
4408 | #endif | 4408 | #endif | |
4409 | invalidate = true; | 4409 | invalidate = true; | |
4410 | } | 4410 | } | |
4411 | pmap_maxkvaddr = maxkvaddr; | 4411 | pmap_maxkvaddr = maxkvaddr; | |
4412 | mutex_exit(kpm->pm_lock); | 4412 | mutex_exit(kpm->pm_lock); | |
4413 | splx(s); | 4413 | splx(s); | |
4414 | 4414 | |||
4415 | if (invalidate && pmap_initialized) { | 4415 | if (invalidate && pmap_initialized) { | |
4416 | /* Invalidate the PDP cache. */ | 4416 | /* Invalidate the PDP cache. */ | |
4417 | pool_cache_invalidate(&pmap_pdp_cache); | 4417 | pool_cache_invalidate(&pmap_pdp_cache); | |
4418 | } | 4418 | } | |
4419 | 4419 | |||
4420 | return maxkvaddr; | 4420 | return maxkvaddr; | |
4421 | } | 4421 | } | |
4422 | 4422 | |||
4423 | #ifdef DEBUG | 4423 | #ifdef DEBUG | |
4424 | void pmap_dump(struct pmap *, vaddr_t, vaddr_t); | 4424 | void pmap_dump(struct pmap *, vaddr_t, vaddr_t); | |
4425 | 4425 | |||
4426 | /* | 4426 | /* | |
4427 | * pmap_dump: dump all the mappings from a pmap | 4427 | * pmap_dump: dump all the mappings from a pmap | |
4428 | * | 4428 | * | |
4429 | * => caller should not be holding any pmap locks | 4429 | * => caller should not be holding any pmap locks | |
4430 | */ | 4430 | */ | |
4431 | 4431 | |||
4432 | void | 4432 | void | |
4433 | pmap_dump(struct pmap *pmap, vaddr_t sva, vaddr_t eva) | 4433 | pmap_dump(struct pmap *pmap, vaddr_t sva, vaddr_t eva) | |
4434 | { | 4434 | { | |
4435 | pt_entry_t *ptes, *pte; | 4435 | pt_entry_t *ptes, *pte; | |
4436 | pd_entry_t * const *pdes; | 4436 | pd_entry_t * const *pdes; | |
4437 | struct pmap *pmap2; | 4437 | struct pmap *pmap2; | |
4438 | vaddr_t blkendva; | 4438 | vaddr_t blkendva; | |
4439 | 4439 | |||
4440 | /* | 4440 | /* | |
4441 | * if end is out of range truncate. | 4441 | * if end is out of range truncate. | |
4442 | * if (end == start) update to max. | 4442 | * if (end == start) update to max. | |
4443 | */ | 4443 | */ | |
4444 | 4444 | |||
4445 | if (eva > VM_MAXUSER_ADDRESS || eva <= sva) | 4445 | if (eva > VM_MAXUSER_ADDRESS || eva <= sva) | |
4446 | eva = VM_MAXUSER_ADDRESS; | 4446 | eva = VM_MAXUSER_ADDRESS; | |
4447 | 4447 | |||
4448 | /* | 4448 | /* | |
4449 | * we lock in the pmap => pv_head direction | 4449 | * we lock in the pmap => pv_head direction | |
4450 | */ | 4450 | */ | |
4451 | 4451 | |||
4452 | kpreempt_disable(); | 4452 | kpreempt_disable(); | |
4453 | pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); /* locks pmap */ | 4453 | pmap_map_ptes(pmap, &pmap2, &ptes, &pdes); /* locks pmap */ | |
4454 | 4454 | |||
4455 | /* | 4455 | /* | |
4456 | * dumping a range of pages: we dump in PTP sized blocks (4MB) | 4456 | * dumping a range of pages: we dump in PTP sized blocks (4MB) | |
4457 | */ | 4457 | */ | |
4458 | 4458 | |||
4459 | for (/* null */ ; sva < eva ; sva = blkendva) { | 4459 | for (/* null */ ; sva < eva ; sva = blkendva) { | |
4460 | 4460 | |||
4461 | /* determine range of block */ | 4461 | /* determine range of block */ | |
4462 | blkendva = x86_round_pdr(sva+1); | 4462 | blkendva = x86_round_pdr(sva+1); | |
4463 | if (blkendva > eva) | 4463 | if (blkendva > eva) | |
4464 | blkendva = eva; | 4464 | blkendva = eva; | |
4465 | 4465 | |||
4466 | /* valid block? */ | 4466 | /* valid block? */ | |
4467 | if (!pmap_pdes_valid(sva, pdes, NULL)) | 4467 | if (!pmap_pdes_valid(sva, pdes, NULL)) | |
4468 | continue; | 4468 | continue; | |
4469 | 4469 | |||
4470 | pte = &ptes[pl1_i(sva)]; | 4470 | pte = &ptes[pl1_i(sva)]; | |
4471 | for (/* null */; sva < blkendva ; sva += PAGE_SIZE, pte++) { | 4471 | for (/* null */; sva < blkendva ; sva += PAGE_SIZE, pte++) { | |
4472 | if (!pmap_valid_entry(*pte)) | 4472 | if (!pmap_valid_entry(*pte)) | |
4473 | continue; | 4473 | continue; | |
4474 | printf("va %#" PRIxVADDR " -> pa %#" PRIxPADDR | 4474 | printf("va %#" PRIxVADDR " -> pa %#" PRIxPADDR | |
4475 | " (pte=%#" PRIxPADDR ")\n", | 4475 | " (pte=%#" PRIxPADDR ")\n", | |
4476 | sva, (paddr_t)pmap_pte2pa(*pte), (paddr_t)*pte); | 4476 | sva, (paddr_t)pmap_pte2pa(*pte), (paddr_t)*pte); | |
4477 | } | 4477 | } | |
4478 | } | 4478 | } | |
4479 | pmap_unmap_ptes(pmap, pmap2); | 4479 | pmap_unmap_ptes(pmap, pmap2); | |
4480 | kpreempt_enable(); | 4480 | kpreempt_enable(); | |
4481 | } | 4481 | } | |
4482 | #endif | 4482 | #endif | |
4483 | 4483 | |||
4484 | /* | 4484 | /* | |
4485 | * pmap_update: process deferred invalidations and frees. | 4485 | * pmap_update: process deferred invalidations and frees. | |
4486 | */ | 4486 | */ | |
4487 | 4487 | |||
4488 | void | 4488 | void | |
4489 | pmap_update(struct pmap *pmap) | 4489 | pmap_update(struct pmap *pmap) | |
4490 | { | 4490 | { | |
4491 | struct vm_page *empty_ptps; | 4491 | struct vm_page *empty_ptps; | |
4492 | lwp_t *l = curlwp; | 4492 | lwp_t *l = curlwp; | |
4493 | 4493 | |||
4494 | /* | 4494 | /* | |
4495 | * If we have torn down this pmap, invalidate non-global TLB | 4495 | * If we have torn down this pmap, invalidate non-global TLB | |
4496 | * entries on any processors using it. | 4496 | * entries on any processors using it. | |
4497 | */ | 4497 | */ | |
4498 | kpreempt_disable(); | 4498 | kpreempt_disable(); | |
4499 | if (__predict_false(l->l_md.md_gc_pmap == pmap)) { | 4499 | if (__predict_false(l->l_md.md_gc_pmap == pmap)) { | |
4500 | l->l_md.md_gc_pmap = NULL; | 4500 | l->l_md.md_gc_pmap = NULL; | |
4501 | pmap_tlb_shootdown(pmap, (vaddr_t)-1LL, 0, TLBSHOOT_UPDATE); | 4501 | pmap_tlb_shootdown(pmap, (vaddr_t)-1LL, 0, TLBSHOOT_UPDATE); | |
4502 | } | 4502 | } | |
4503 | /* | 4503 | /* | |
4504 | * Initiate any pending TLB shootdowns. Wait for them to | 4504 | * Initiate any pending TLB shootdowns. Wait for them to | |
4505 | * complete before returning control to the caller. | 4505 | * complete before returning control to the caller. | |
4506 | */ | 4506 | */ | |
4507 | pmap_tlb_shootnow(); | 4507 | pmap_tlb_shootnow(); | |
4508 | kpreempt_enable(); | 4508 | kpreempt_enable(); | |
4509 | 4509 | |||
4510 | /* | 4510 | /* | |
4511 | * Now that shootdowns are complete, process deferred frees, | 4511 | * Now that shootdowns are complete, process deferred frees, | |
4512 | * but not from interrupt context. | 4512 | * but not from interrupt context. | |
4513 | */ | 4513 | */ | |
4514 | if (l->l_md.md_gc_ptp != NULL) { | 4514 | if (l->l_md.md_gc_ptp != NULL) { | |
4515 | KASSERT((l->l_pflag & LP_INTR) == 0); | 4515 | KASSERT((l->l_pflag & LP_INTR) == 0); | |
4516 | if (cpu_intr_p()) { | 4516 | if (cpu_intr_p()) { | |
4517 | return; | 4517 | return; | |
4518 | } | 4518 | } | |
4519 | empty_ptps = l->l_md.md_gc_ptp; | 4519 | empty_ptps = l->l_md.md_gc_ptp; | |
4520 | l->l_md.md_gc_ptp = NULL; | 4520 | l->l_md.md_gc_ptp = NULL; | |
4521 | pmap_free_ptps(empty_ptps); | 4521 | pmap_free_ptps(empty_ptps); | |
4522 | } | 4522 | } | |
4523 | } | 4523 | } | |
4524 | 4524 | |||
4525 | #if PTP_LEVELS > 4 | 4525 | #if PTP_LEVELS > 4 | |
4526 | #error "Unsupported number of page table mappings" | 4526 | #error "Unsupported number of page table mappings" | |
4527 | #endif | 4527 | #endif | |
4528 | 4528 | |||
4529 | paddr_t | 4529 | paddr_t | |
4530 | pmap_init_tmp_pgtbl(paddr_t pg) | 4530 | pmap_init_tmp_pgtbl(paddr_t pg) | |
4531 | { | 4531 | { | |
4532 | static bool maps_loaded; | 4532 | static bool maps_loaded; | |
4533 | static const paddr_t x86_tmp_pml_paddr[] = { | 4533 | static const paddr_t x86_tmp_pml_paddr[] = { | |
4534 | 4 * PAGE_SIZE, | 4534 | 4 * PAGE_SIZE, /* L1 */ | |
4535 | 5 * PAGE_SIZE, | 4535 | 5 * PAGE_SIZE, /* L2 */ | |
4536 | 6 * PAGE_SIZE, | 4536 | 6 * PAGE_SIZE, /* L3 */ | |
4537 | 7 * PAGE_SIZE | 4537 | 7 * PAGE_SIZE /* L4 */ | |
4538 | }; | 4538 | }; | |
4539 | static vaddr_t x86_tmp_pml_vaddr[] = { 0, 0, 0, 0 }; | 4539 | static vaddr_t x86_tmp_pml_vaddr[] = { 0, 0, 0, 0 }; | |
4540 | 4540 | |||
4541 | pd_entry_t *tmp_pml, *kernel_pml; | 4541 | pd_entry_t *tmp_pml, *kernel_pml; | |
4542 | 4542 | |||
4543 | int level; | 4543 | int level; | |
4544 | 4544 | |||
4545 | if (!maps_loaded) { | 4545 | if (!maps_loaded) { | |
4546 | for (level = 0; level < PTP_LEVELS; ++level) { | 4546 | for (level = 0; level < PTP_LEVELS; ++level) { | |
4547 | x86_tmp_pml_vaddr[level] = | 4547 | x86_tmp_pml_vaddr[level] = | |
4548 | uvm_km_alloc(kernel_map, PAGE_SIZE, 0, | 4548 | uvm_km_alloc(kernel_map, PAGE_SIZE, 0, | |
4549 | UVM_KMF_VAONLY); | 4549 | UVM_KMF_VAONLY); | |
4550 | 4550 | |||
4551 | if (x86_tmp_pml_vaddr[level] == 0) | 4551 | if (x86_tmp_pml_vaddr[level] == 0) | |
4552 | panic("mapping of real mode PML failed\n"); | 4552 | panic("mapping of real mode PML failed\n"); | |
4553 | pmap_kenter_pa(x86_tmp_pml_vaddr[level], | 4553 | pmap_kenter_pa(x86_tmp_pml_vaddr[level], | |
4554 | x86_tmp_pml_paddr[level], | 4554 | x86_tmp_pml_paddr[level], | |
4555 | VM_PROT_READ | VM_PROT_WRITE, 0); | 4555 | VM_PROT_READ | VM_PROT_WRITE, 0); | |
4556 | pmap_update(pmap_kernel()); | 4556 | pmap_update(pmap_kernel()); | |
4557 | } | 4557 | } | |
4558 | maps_loaded = true; | 4558 | maps_loaded = true; | |
4559 | } | 4559 | } | |
4560 | 4560 | |||
4561 | /* Zero levels 1-3 */ | 4561 | /* Zero levels 1-3 */ | |
4562 | for (level = 0; level < PTP_LEVELS - 1; ++level) { | 4562 | for (level = 0; level < PTP_LEVELS - 1; ++level) { | |
4563 | tmp_pml = (void *)x86_tmp_pml_vaddr[level]; | 4563 | tmp_pml = (void *)x86_tmp_pml_vaddr[level]; | |
4564 | memset(tmp_pml, 0, PAGE_SIZE); | 4564 | memset(tmp_pml, 0, PAGE_SIZE); | |
4565 | } | 4565 | } | |
4566 | 4566 | |||
4567 | /* Copy PML4 */ | 4567 | /* Copy PML4 */ | |
4568 | kernel_pml = pmap_kernel()->pm_pdir; | 4568 | kernel_pml = pmap_kernel()->pm_pdir; | |
4569 | tmp_pml = (void *)x86_tmp_pml_vaddr[PTP_LEVELS - 1]; | 4569 | tmp_pml = (void *)x86_tmp_pml_vaddr[PTP_LEVELS - 1]; | |
4570 | memcpy(tmp_pml, kernel_pml, PAGE_SIZE); | 4570 | memcpy(tmp_pml, kernel_pml, PAGE_SIZE); | |
4571 | 4571 | |||
4572 | #ifdef PAE | 4572 | #ifdef PAE | |
4573 | /* | 4573 | /* | |
4574 | * Use the last 4 entries of the L2 page as L3 PD entries. These | 4574 | * Use the last 4 entries of the L2 page as L3 PD entries. These | |
4575 | * last entries are unlikely to be used for temporary mappings. | 4575 | * last entries are unlikely to be used for temporary mappings. | |
4576 | * 508: maps 0->1GB (userland) | 4576 | * 508: maps 0->1GB (userland) | |
4577 | * 509: unused | 4577 | * 509: unused | |
4578 | * 510: unused | 4578 | * 510: unused | |
4579 | * 511: maps 3->4GB (kernel) | 4579 | * 511: maps 3->4GB (kernel) | |
4580 | */ | 4580 | */ | |
4581 | tmp_pml[508] = x86_tmp_pml_paddr[PTP_LEVELS - 1] | PG_V; | 4581 | tmp_pml[508] = x86_tmp_pml_paddr[PTP_LEVELS - 1] | PG_V; | |
4582 | tmp_pml[509] = 0; | 4582 | tmp_pml[509] = 0; | |
4583 | tmp_pml[510] = 0; | 4583 | tmp_pml[510] = 0; | |
4584 | tmp_pml[511] = pmap_pdirpa(pmap_kernel(), PDIR_SLOT_KERN) | PG_V; | 4584 | tmp_pml[511] = pmap_pdirpa(pmap_kernel(), PDIR_SLOT_KERN) | PG_V; | |
4585 | #endif | 4585 | #endif | |
4586 | 4586 | |||
4587 | for (level = PTP_LEVELS - 1; level > 0; --level) { | 4587 | for (level = PTP_LEVELS - 1; level > 0; --level) { | |
4588 | tmp_pml = (void *)x86_tmp_pml_vaddr[level]; | 4588 | tmp_pml = (void *)x86_tmp_pml_vaddr[level]; | |
4589 | 4589 | |||
4590 | tmp_pml[pl_i(pg, level + 1)] = | 4590 | tmp_pml[pl_i(pg, level + 1)] = | |
4591 | (x86_tmp_pml_paddr[level - 1] & PG_FRAME) | PG_RW | PG_V; | 4591 | (x86_tmp_pml_paddr[level - 1] & PG_FRAME) | PG_RW | PG_V; | |
4592 | } | 4592 | } | |
4593 | 4593 | |||
4594 | tmp_pml = (void *)x86_tmp_pml_vaddr[0]; | 4594 | tmp_pml = (void *)x86_tmp_pml_vaddr[0]; | |
4595 | tmp_pml[pl_i(pg, 1)] = (pg & PG_FRAME) | PG_RW | PG_V; | 4595 | tmp_pml[pl_i(pg, 1)] = (pg & PG_FRAME) | PG_RW | PG_V; | |
4596 | 4596 | |||
4597 | #ifdef PAE | 4597 | #ifdef PAE | |
4598 | /* Return the PA of the L3 page (entry 508 of the L2 page) */ | 4598 | /* Return the PA of the L3 page (entry 508 of the L2 page) */ | |
4599 | return x86_tmp_pml_paddr[PTP_LEVELS - 1] + 508 * sizeof(pd_entry_t); | 4599 | return x86_tmp_pml_paddr[PTP_LEVELS - 1] + 508 * sizeof(pd_entry_t); | |
4600 | #endif | 4600 | #endif | |
4601 | 4601 | |||
4602 | return x86_tmp_pml_paddr[PTP_LEVELS - 1]; | 4602 | return x86_tmp_pml_paddr[PTP_LEVELS - 1]; | |
4603 | } | 4603 | } | |
4604 | 4604 | |||
4605 | u_int | 4605 | u_int | |
4606 | x86_mmap_flags(paddr_t mdpgno) | 4606 | x86_mmap_flags(paddr_t mdpgno) | |
4607 | { | 4607 | { | |
4608 | u_int nflag = (mdpgno >> X86_MMAP_FLAG_SHIFT) & X86_MMAP_FLAG_MASK; | 4608 | u_int nflag = (mdpgno >> X86_MMAP_FLAG_SHIFT) & X86_MMAP_FLAG_MASK; | |
4609 | u_int pflag = 0; | 4609 | u_int pflag = 0; | |
4610 | 4610 | |||
4611 | if (nflag & X86_MMAP_FLAG_PREFETCH) | 4611 | if (nflag & X86_MMAP_FLAG_PREFETCH) | |
4612 | pflag |= PMAP_WRITE_COMBINE; | 4612 | pflag |= PMAP_WRITE_COMBINE; | |
4613 | 4613 | |||
4614 | return pflag; | 4614 | return pflag; | |
4615 | } | 4615 | } |