Fri Jul 29 21:21:43 2011 UTC ()
Don't shutdown the boostrap processor (BSP) because we may have to run
BIOS methods on it.  For example, ACPI requires that we execute the code
for changing sleep state on the BSP.

This may help the problem where folks' machines would hang instead of
powering off when they entered ACPI sleep state 5.

XXX If the BSP is already shut down, we should start it back up.


(dyoung)
diff -r1.89 -r1.90 src/sys/arch/x86/x86/cpu.c

cvs diff -r1.89 -r1.90 src/sys/arch/x86/x86/cpu.c (switch to unified diff)

--- src/sys/arch/x86/x86/cpu.c 2011/06/22 09:28:08 1.89
+++ src/sys/arch/x86/x86/cpu.c 2011/07/29 21:21:43 1.90
@@ -1,1206 +1,1212 @@ @@ -1,1206 +1,1212 @@
1/* $NetBSD: cpu.c,v 1.89 2011/06/22 09:28:08 jruoho Exp $ */ 1/* $NetBSD: cpu.c,v 1.90 2011/07/29 21:21:43 dyoung Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 2000, 2006, 2007, 2008 The NetBSD Foundation, Inc. 4 * Copyright (c) 2000, 2006, 2007, 2008 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to The NetBSD Foundation 7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Bill Sommerfeld of RedBack Networks Inc, and by Andrew Doran. 8 * by Bill Sommerfeld of RedBack Networks Inc, and by Andrew Doran.
9 * 9 *
10 * Redistribution and use in source and binary forms, with or without 10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions 11 * modification, are permitted provided that the following conditions
12 * are met: 12 * are met:
13 * 1. Redistributions of source code must retain the above copyright 13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer. 14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright 15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the 16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution. 17 * documentation and/or other materials provided with the distribution.
18 * 18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE. 29 * POSSIBILITY OF SUCH DAMAGE.
30 */ 30 */
31 31
32/* 32/*
33 * Copyright (c) 1999 Stefan Grefen 33 * Copyright (c) 1999 Stefan Grefen
34 * 34 *
35 * Redistribution and use in source and binary forms, with or without 35 * Redistribution and use in source and binary forms, with or without
36 * modification, are permitted provided that the following conditions 36 * modification, are permitted provided that the following conditions
37 * are met: 37 * are met:
38 * 1. Redistributions of source code must retain the above copyright 38 * 1. Redistributions of source code must retain the above copyright
39 * notice, this list of conditions and the following disclaimer. 39 * notice, this list of conditions and the following disclaimer.
40 * 2. Redistributions in binary form must reproduce the above copyright 40 * 2. Redistributions in binary form must reproduce the above copyright
41 * notice, this list of conditions and the following disclaimer in the 41 * notice, this list of conditions and the following disclaimer in the
42 * documentation and/or other materials provided with the distribution. 42 * documentation and/or other materials provided with the distribution.
43 * 3. All advertising materials mentioning features or use of this software 43 * 3. All advertising materials mentioning features or use of this software
44 * must display the following acknowledgement: 44 * must display the following acknowledgement:
45 * This product includes software developed by the NetBSD 45 * This product includes software developed by the NetBSD
46 * Foundation, Inc. and its contributors. 46 * Foundation, Inc. and its contributors.
47 * 4. Neither the name of The NetBSD Foundation nor the names of its 47 * 4. Neither the name of The NetBSD Foundation nor the names of its
48 * contributors may be used to endorse or promote products derived 48 * contributors may be used to endorse or promote products derived
49 * from this software without specific prior written permission. 49 * from this software without specific prior written permission.
50 * 50 *
51 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY 51 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY
52 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
54 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR AND CONTRIBUTORS BE LIABLE 54 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR AND CONTRIBUTORS BE LIABLE
55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * SUCH DAMAGE. 61 * SUCH DAMAGE.
62 */ 62 */
63 63
64#include <sys/cdefs.h> 64#include <sys/cdefs.h>
65__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.89 2011/06/22 09:28:08 jruoho Exp $"); 65__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.90 2011/07/29 21:21:43 dyoung Exp $");
66 66
67#include "opt_ddb.h" 67#include "opt_ddb.h"
68#include "opt_mpbios.h" /* for MPDEBUG */ 68#include "opt_mpbios.h" /* for MPDEBUG */
69#include "opt_mtrr.h" 69#include "opt_mtrr.h"
70 70
71#include "lapic.h" 71#include "lapic.h"
72#include "ioapic.h" 72#include "ioapic.h"
73 73
74#ifdef i386 74#ifdef i386
75#include "npx.h" 75#include "npx.h"
76#endif 76#endif
77 77
78#include <sys/param.h> 78#include <sys/param.h>
79#include <sys/proc.h> 79#include <sys/proc.h>
80#include <sys/systm.h> 80#include <sys/systm.h>
81#include <sys/device.h> 81#include <sys/device.h>
82#include <sys/kmem.h> 82#include <sys/kmem.h>
83#include <sys/cpu.h> 83#include <sys/cpu.h>
84#include <sys/atomic.h> 84#include <sys/atomic.h>
85#include <sys/reboot.h> 85#include <sys/reboot.h>
86 86
87#include <uvm/uvm.h> 87#include <uvm/uvm.h>
88 88
89#include <machine/cpufunc.h> 89#include <machine/cpufunc.h>
90#include <machine/cpuvar.h> 90#include <machine/cpuvar.h>
91#include <machine/pmap.h> 91#include <machine/pmap.h>
92#include <machine/vmparam.h> 92#include <machine/vmparam.h>
93#include <machine/mpbiosvar.h> 93#include <machine/mpbiosvar.h>
94#include <machine/pcb.h> 94#include <machine/pcb.h>
95#include <machine/specialreg.h> 95#include <machine/specialreg.h>
96#include <machine/segments.h> 96#include <machine/segments.h>
97#include <machine/gdt.h> 97#include <machine/gdt.h>
98#include <machine/mtrr.h> 98#include <machine/mtrr.h>
99#include <machine/pio.h> 99#include <machine/pio.h>
100#include <machine/cpu_counter.h> 100#include <machine/cpu_counter.h>
101 101
102#ifdef i386 102#ifdef i386
103#include <machine/tlog.h> 103#include <machine/tlog.h>
104#endif 104#endif
105 105
106#include <machine/apicvar.h> 106#include <machine/apicvar.h>
107#include <machine/i82489reg.h> 107#include <machine/i82489reg.h>
108#include <machine/i82489var.h> 108#include <machine/i82489var.h>
109 109
110#include <dev/ic/mc146818reg.h> 110#include <dev/ic/mc146818reg.h>
111#include <i386/isa/nvram.h> 111#include <i386/isa/nvram.h>
112#include <dev/isa/isareg.h> 112#include <dev/isa/isareg.h>
113 113
114#include "tsc.h" 114#include "tsc.h"
115 115
116#if MAXCPUS > 32 116#if MAXCPUS > 32
117#error cpu_info contains 32bit bitmasks 117#error cpu_info contains 32bit bitmasks
118#endif 118#endif
119 119
120static int cpu_match(device_t, cfdata_t, void *); 120static int cpu_match(device_t, cfdata_t, void *);
121static void cpu_attach(device_t, device_t, void *); 121static void cpu_attach(device_t, device_t, void *);
122static void cpu_defer(device_t); 122static void cpu_defer(device_t);
123static int cpu_rescan(device_t, const char *, const int *); 123static int cpu_rescan(device_t, const char *, const int *);
124static void cpu_childdetached(device_t, device_t); 124static void cpu_childdetached(device_t, device_t);
125static bool cpu_suspend(device_t, const pmf_qual_t *); 125static bool cpu_suspend(device_t, const pmf_qual_t *);
126static bool cpu_resume(device_t, const pmf_qual_t *); 126static bool cpu_resume(device_t, const pmf_qual_t *);
127static bool cpu_shutdown(device_t, int); 127static bool cpu_shutdown(device_t, int);
128 128
129struct cpu_softc { 129struct cpu_softc {
130 device_t sc_dev; /* device tree glue */ 130 device_t sc_dev; /* device tree glue */
131 struct cpu_info *sc_info; /* pointer to CPU info */ 131 struct cpu_info *sc_info; /* pointer to CPU info */
132 bool sc_wasonline; 132 bool sc_wasonline;
133}; 133};
134 134
135int mp_cpu_start(struct cpu_info *, paddr_t);  135int mp_cpu_start(struct cpu_info *, paddr_t);
136void mp_cpu_start_cleanup(struct cpu_info *); 136void mp_cpu_start_cleanup(struct cpu_info *);
137const struct cpu_functions mp_cpu_funcs = { mp_cpu_start, NULL, 137const struct cpu_functions mp_cpu_funcs = { mp_cpu_start, NULL,
138 mp_cpu_start_cleanup }; 138 mp_cpu_start_cleanup };
139 139
140 140
141CFATTACH_DECL2_NEW(cpu, sizeof(struct cpu_softc), 141CFATTACH_DECL2_NEW(cpu, sizeof(struct cpu_softc),
142 cpu_match, cpu_attach, NULL, NULL, cpu_rescan, cpu_childdetached); 142 cpu_match, cpu_attach, NULL, NULL, cpu_rescan, cpu_childdetached);
143 143
144/* 144/*
145 * Statically-allocated CPU info for the primary CPU (or the only 145 * Statically-allocated CPU info for the primary CPU (or the only
146 * CPU, on uniprocessors). The CPU info list is initialized to 146 * CPU, on uniprocessors). The CPU info list is initialized to
147 * point at it. 147 * point at it.
148 */ 148 */
149#ifdef TRAPLOG 149#ifdef TRAPLOG
150struct tlog tlog_primary; 150struct tlog tlog_primary;
151#endif 151#endif
152struct cpu_info cpu_info_primary __aligned(CACHE_LINE_SIZE) = { 152struct cpu_info cpu_info_primary __aligned(CACHE_LINE_SIZE) = {
153 .ci_dev = 0, 153 .ci_dev = 0,
154 .ci_self = &cpu_info_primary, 154 .ci_self = &cpu_info_primary,
155 .ci_idepth = -1, 155 .ci_idepth = -1,
156 .ci_curlwp = &lwp0, 156 .ci_curlwp = &lwp0,
157 .ci_curldt = -1, 157 .ci_curldt = -1,
158 .ci_cpumask = 1, 158 .ci_cpumask = 1,
159#ifdef TRAPLOG 159#ifdef TRAPLOG
160 .ci_tlog_base = &tlog_primary, 160 .ci_tlog_base = &tlog_primary,
161#endif /* !TRAPLOG */ 161#endif /* !TRAPLOG */
162}; 162};
163 163
164struct cpu_info *cpu_info_list = &cpu_info_primary; 164struct cpu_info *cpu_info_list = &cpu_info_primary;
165 165
166static void cpu_set_tss_gates(struct cpu_info *); 166static void cpu_set_tss_gates(struct cpu_info *);
167 167
168#ifdef i386 168#ifdef i386
169static void tss_init(struct i386tss *, void *, void *); 169static void tss_init(struct i386tss *, void *, void *);
170#endif 170#endif
171 171
172static void cpu_init_idle_lwp(struct cpu_info *); 172static void cpu_init_idle_lwp(struct cpu_info *);
173 173
174uint32_t cpus_attached = 0; 174uint32_t cpus_attached = 0;
175uint32_t cpus_running = 1; 175uint32_t cpus_running = 1;
176 176
177uint32_t cpu_feature[5]; /* X86 CPUID feature bits 177uint32_t cpu_feature[5]; /* X86 CPUID feature bits
178 * [0] basic features %edx 178 * [0] basic features %edx
179 * [1] basic features %ecx 179 * [1] basic features %ecx
180 * [2] extended features %edx 180 * [2] extended features %edx
181 * [3] extended features %ecx 181 * [3] extended features %ecx
182 * [4] VIA padlock features 182 * [4] VIA padlock features
183 */ 183 */
184 184
185extern char x86_64_doubleflt_stack[]; 185extern char x86_64_doubleflt_stack[];
186 186
187bool x86_mp_online; 187bool x86_mp_online;
188paddr_t mp_trampoline_paddr = MP_TRAMPOLINE; 188paddr_t mp_trampoline_paddr = MP_TRAMPOLINE;
189static vaddr_t cmos_data_mapping; 189static vaddr_t cmos_data_mapping;
190struct cpu_info *cpu_starting; 190struct cpu_info *cpu_starting;
191 191
192void cpu_hatch(void *); 192void cpu_hatch(void *);
193static void cpu_boot_secondary(struct cpu_info *ci); 193static void cpu_boot_secondary(struct cpu_info *ci);
194static void cpu_start_secondary(struct cpu_info *ci); 194static void cpu_start_secondary(struct cpu_info *ci);
195static void cpu_copy_trampoline(void); 195static void cpu_copy_trampoline(void);
196 196
197/* 197/*
198 * Runs once per boot once multiprocessor goo has been detected and 198 * Runs once per boot once multiprocessor goo has been detected and
199 * the local APIC on the boot processor has been mapped. 199 * the local APIC on the boot processor has been mapped.
200 * 200 *
201 * Called from lapic_boot_init() (from mpbios_scan()). 201 * Called from lapic_boot_init() (from mpbios_scan()).
202 */ 202 */
203void 203void
204cpu_init_first(void) 204cpu_init_first(void)
205{ 205{
206 206
207 cpu_info_primary.ci_cpuid = lapic_cpu_number(); 207 cpu_info_primary.ci_cpuid = lapic_cpu_number();
208 cpu_copy_trampoline(); 208 cpu_copy_trampoline();
209 209
210 cmos_data_mapping = uvm_km_alloc(kernel_map, PAGE_SIZE, 0, UVM_KMF_VAONLY); 210 cmos_data_mapping = uvm_km_alloc(kernel_map, PAGE_SIZE, 0, UVM_KMF_VAONLY);
211 if (cmos_data_mapping == 0) 211 if (cmos_data_mapping == 0)
212 panic("No KVA for page 0"); 212 panic("No KVA for page 0");
213 pmap_kenter_pa(cmos_data_mapping, 0, VM_PROT_READ|VM_PROT_WRITE, 0); 213 pmap_kenter_pa(cmos_data_mapping, 0, VM_PROT_READ|VM_PROT_WRITE, 0);
214 pmap_update(pmap_kernel()); 214 pmap_update(pmap_kernel());
215} 215}
216 216
217static int 217static int
218cpu_match(device_t parent, cfdata_t match, void *aux) 218cpu_match(device_t parent, cfdata_t match, void *aux)
219{ 219{
220 220
221 return 1; 221 return 1;
222} 222}
223 223
224static void 224static void
225cpu_vm_init(struct cpu_info *ci) 225cpu_vm_init(struct cpu_info *ci)
226{ 226{
227 int ncolors = 2, i; 227 int ncolors = 2, i;
228 228
229 for (i = CAI_ICACHE; i <= CAI_L2CACHE; i++) { 229 for (i = CAI_ICACHE; i <= CAI_L2CACHE; i++) {
230 struct x86_cache_info *cai; 230 struct x86_cache_info *cai;
231 int tcolors; 231 int tcolors;
232 232
233 cai = &ci->ci_cinfo[i]; 233 cai = &ci->ci_cinfo[i];
234 234
235 tcolors = atop(cai->cai_totalsize); 235 tcolors = atop(cai->cai_totalsize);
236 switch(cai->cai_associativity) { 236 switch(cai->cai_associativity) {
237 case 0xff: 237 case 0xff:
238 tcolors = 1; /* fully associative */ 238 tcolors = 1; /* fully associative */
239 break; 239 break;
240 case 0: 240 case 0:
241 case 1: 241 case 1:
242 break; 242 break;
243 default: 243 default:
244 tcolors /= cai->cai_associativity; 244 tcolors /= cai->cai_associativity;
245 } 245 }
246 ncolors = max(ncolors, tcolors); 246 ncolors = max(ncolors, tcolors);
247 /* 247 /*
248 * If the desired number of colors is not a power of 248 * If the desired number of colors is not a power of
249 * two, it won't be good. Find the greatest power of 249 * two, it won't be good. Find the greatest power of
250 * two which is an even divisor of the number of colors, 250 * two which is an even divisor of the number of colors,
251 * to preserve even coloring of pages. 251 * to preserve even coloring of pages.
252 */ 252 */
253 if (ncolors & (ncolors - 1) ) { 253 if (ncolors & (ncolors - 1) ) {
254 int try, picked = 1; 254 int try, picked = 1;
255 for (try = 1; try < ncolors; try *= 2) { 255 for (try = 1; try < ncolors; try *= 2) {
256 if (ncolors % try == 0) picked = try; 256 if (ncolors % try == 0) picked = try;
257 } 257 }
258 if (picked == 1) { 258 if (picked == 1) {
259 panic("desired number of cache colors %d is " 259 panic("desired number of cache colors %d is "
260 " > 1, but not even!", ncolors); 260 " > 1, but not even!", ncolors);
261 } 261 }
262 ncolors = picked; 262 ncolors = picked;
263 } 263 }
264 } 264 }
265 265
266 /* 266 /*
267 * Knowing the size of the largest cache on this CPU, re-color 267 * Knowing the size of the largest cache on this CPU, re-color
268 * our pages. 268 * our pages.
269 */ 269 */
270 if (ncolors <= uvmexp.ncolors) 270 if (ncolors <= uvmexp.ncolors)
271 return; 271 return;
272 aprint_debug_dev(ci->ci_dev, "%d page colors\n", ncolors); 272 aprint_debug_dev(ci->ci_dev, "%d page colors\n", ncolors);
273 uvm_page_recolor(ncolors); 273 uvm_page_recolor(ncolors);
274} 274}
275 275
276 276
277static void 277static void
278cpu_attach(device_t parent, device_t self, void *aux) 278cpu_attach(device_t parent, device_t self, void *aux)
279{ 279{
280 struct cpu_softc *sc = device_private(self); 280 struct cpu_softc *sc = device_private(self);
281 struct cpu_attach_args *caa = aux; 281 struct cpu_attach_args *caa = aux;
282 struct cpu_info *ci; 282 struct cpu_info *ci;
283 uintptr_t ptr; 283 uintptr_t ptr;
284 int cpunum = caa->cpu_number; 284 int cpunum = caa->cpu_number;
285 static bool again; 285 static bool again;
286 286
287 sc->sc_dev = self; 287 sc->sc_dev = self;
288 288
289 if (cpus_attached == ~0) { 289 if (cpus_attached == ~0) {
290 aprint_error(": increase MAXCPUS\n"); 290 aprint_error(": increase MAXCPUS\n");
291 return; 291 return;
292 } 292 }
293 293
294 /* 294 /*
295 * If we're an Application Processor, allocate a cpu_info 295 * If we're an Application Processor, allocate a cpu_info
296 * structure, otherwise use the primary's. 296 * structure, otherwise use the primary's.
297 */ 297 */
298 if (caa->cpu_role == CPU_ROLE_AP) { 298 if (caa->cpu_role == CPU_ROLE_AP) {
299 if ((boothowto & RB_MD1) != 0) { 299 if ((boothowto & RB_MD1) != 0) {
300 aprint_error(": multiprocessor boot disabled\n"); 300 aprint_error(": multiprocessor boot disabled\n");
301 if (!pmf_device_register(self, NULL, NULL)) 301 if (!pmf_device_register(self, NULL, NULL))
302 aprint_error_dev(self, 302 aprint_error_dev(self,
303 "couldn't establish power handler\n"); 303 "couldn't establish power handler\n");
304 return; 304 return;
305 } 305 }
306 aprint_naive(": Application Processor\n"); 306 aprint_naive(": Application Processor\n");
307 ptr = (uintptr_t)kmem_zalloc(sizeof(*ci) + CACHE_LINE_SIZE - 1, 307 ptr = (uintptr_t)kmem_zalloc(sizeof(*ci) + CACHE_LINE_SIZE - 1,
308 KM_SLEEP); 308 KM_SLEEP);
309 ci = (struct cpu_info *)roundup2(ptr, CACHE_LINE_SIZE); 309 ci = (struct cpu_info *)roundup2(ptr, CACHE_LINE_SIZE);
310 ci->ci_curldt = -1; 310 ci->ci_curldt = -1;
311#ifdef TRAPLOG 311#ifdef TRAPLOG
312 ci->ci_tlog_base = kmem_zalloc(sizeof(struct tlog), KM_SLEEP); 312 ci->ci_tlog_base = kmem_zalloc(sizeof(struct tlog), KM_SLEEP);
313#endif 313#endif
314 } else { 314 } else {
315 aprint_naive(": %s Processor\n", 315 aprint_naive(": %s Processor\n",
316 caa->cpu_role == CPU_ROLE_SP ? "Single" : "Boot"); 316 caa->cpu_role == CPU_ROLE_SP ? "Single" : "Boot");
317 ci = &cpu_info_primary; 317 ci = &cpu_info_primary;
318 if (cpunum != lapic_cpu_number()) { 318 if (cpunum != lapic_cpu_number()) {
319 /* XXX should be done earlier. */ 319 /* XXX should be done earlier. */
320 uint32_t reg; 320 uint32_t reg;
321 aprint_verbose("\n"); 321 aprint_verbose("\n");
322 aprint_verbose_dev(self, "running CPU at apic %d" 322 aprint_verbose_dev(self, "running CPU at apic %d"
323 " instead of at expected %d", lapic_cpu_number(), 323 " instead of at expected %d", lapic_cpu_number(),
324 cpunum); 324 cpunum);
325 reg = i82489_readreg(LAPIC_ID); 325 reg = i82489_readreg(LAPIC_ID);
326 i82489_writereg(LAPIC_ID, (reg & ~LAPIC_ID_MASK) | 326 i82489_writereg(LAPIC_ID, (reg & ~LAPIC_ID_MASK) |
327 (cpunum << LAPIC_ID_SHIFT)); 327 (cpunum << LAPIC_ID_SHIFT));
328 } 328 }
329 if (cpunum != lapic_cpu_number()) { 329 if (cpunum != lapic_cpu_number()) {
330 aprint_error_dev(self, "unable to reset apic id\n"); 330 aprint_error_dev(self, "unable to reset apic id\n");
331 } 331 }
332 } 332 }
333 333
334 ci->ci_self = ci; 334 ci->ci_self = ci;
335 sc->sc_info = ci; 335 sc->sc_info = ci;
336 ci->ci_dev = self; 336 ci->ci_dev = self;
337 ci->ci_acpiid = caa->cpu_id; 337 ci->ci_acpiid = caa->cpu_id;
338 ci->ci_cpuid = caa->cpu_number; 338 ci->ci_cpuid = caa->cpu_number;
339 ci->ci_func = caa->cpu_func; 339 ci->ci_func = caa->cpu_func;
340 340
341 /* Must be before mi_cpu_attach(). */ 341 /* Must be before mi_cpu_attach(). */
342 cpu_vm_init(ci); 342 cpu_vm_init(ci);
343 343
344 if (caa->cpu_role == CPU_ROLE_AP) { 344 if (caa->cpu_role == CPU_ROLE_AP) {
345 int error; 345 int error;
346 346
347 error = mi_cpu_attach(ci); 347 error = mi_cpu_attach(ci);
348 if (error != 0) { 348 if (error != 0) {
349 aprint_normal("\n"); 349 aprint_normal("\n");
350 aprint_error_dev(self, 350 aprint_error_dev(self,
351 "mi_cpu_attach failed with %d\n", error); 351 "mi_cpu_attach failed with %d\n", error);
352 return; 352 return;
353 } 353 }
354 cpu_init_tss(ci); 354 cpu_init_tss(ci);
355 } else { 355 } else {
356 KASSERT(ci->ci_data.cpu_idlelwp != NULL); 356 KASSERT(ci->ci_data.cpu_idlelwp != NULL);
357 } 357 }
358 358
359 ci->ci_cpumask = (1 << cpu_index(ci)); 359 ci->ci_cpumask = (1 << cpu_index(ci));
360 pmap_reference(pmap_kernel()); 360 pmap_reference(pmap_kernel());
361 ci->ci_pmap = pmap_kernel(); 361 ci->ci_pmap = pmap_kernel();
362 ci->ci_tlbstate = TLBSTATE_STALE; 362 ci->ci_tlbstate = TLBSTATE_STALE;
363 363
364 /* 364 /*
365 * Boot processor may not be attached first, but the below 365 * Boot processor may not be attached first, but the below
366 * must be done to allow booting other processors. 366 * must be done to allow booting other processors.
367 */ 367 */
368 if (!again) { 368 if (!again) {
369 atomic_or_32(&ci->ci_flags, CPUF_PRESENT | CPUF_PRIMARY); 369 atomic_or_32(&ci->ci_flags, CPUF_PRESENT | CPUF_PRIMARY);
370 /* Basic init. */ 370 /* Basic init. */
371 cpu_intr_init(ci); 371 cpu_intr_init(ci);
372 cpu_get_tsc_freq(ci); 372 cpu_get_tsc_freq(ci);
373 cpu_init(ci); 373 cpu_init(ci);
374 cpu_set_tss_gates(ci); 374 cpu_set_tss_gates(ci);
375 pmap_cpu_init_late(ci); 375 pmap_cpu_init_late(ci);
376 if (caa->cpu_role != CPU_ROLE_SP) { 376 if (caa->cpu_role != CPU_ROLE_SP) {
377 /* Enable lapic. */ 377 /* Enable lapic. */
378 lapic_enable(); 378 lapic_enable();
379 lapic_set_lvt(); 379 lapic_set_lvt();
380 lapic_calibrate_timer(ci); 380 lapic_calibrate_timer(ci);
381 } 381 }
382 /* Make sure DELAY() is initialized. */ 382 /* Make sure DELAY() is initialized. */
383 DELAY(1); 383 DELAY(1);
384 again = true; 384 again = true;
385 } 385 }
386 386
387 /* further PCB init done later. */ 387 /* further PCB init done later. */
388 388
389 switch (caa->cpu_role) { 389 switch (caa->cpu_role) {
390 case CPU_ROLE_SP: 390 case CPU_ROLE_SP:
391 atomic_or_32(&ci->ci_flags, CPUF_SP); 391 atomic_or_32(&ci->ci_flags, CPUF_SP);
392 cpu_identify(ci); 392 cpu_identify(ci);
393 x86_errata(); 393 x86_errata();
394 x86_cpu_idle_init(); 394 x86_cpu_idle_init();
395 break; 395 break;
396 396
397 case CPU_ROLE_BP: 397 case CPU_ROLE_BP:
398 atomic_or_32(&ci->ci_flags, CPUF_BSP); 398 atomic_or_32(&ci->ci_flags, CPUF_BSP);
399 cpu_identify(ci); 399 cpu_identify(ci);
400 x86_errata(); 400 x86_errata();
401 x86_cpu_idle_init(); 401 x86_cpu_idle_init();
402 break; 402 break;
403 403
404 case CPU_ROLE_AP: 404 case CPU_ROLE_AP:
405 /* 405 /*
406 * report on an AP 406 * report on an AP
407 */ 407 */
408 cpu_intr_init(ci); 408 cpu_intr_init(ci);
409 gdt_alloc_cpu(ci); 409 gdt_alloc_cpu(ci);
410 cpu_set_tss_gates(ci); 410 cpu_set_tss_gates(ci);
411 pmap_cpu_init_late(ci); 411 pmap_cpu_init_late(ci);
412 cpu_start_secondary(ci); 412 cpu_start_secondary(ci);
413 if (ci->ci_flags & CPUF_PRESENT) { 413 if (ci->ci_flags & CPUF_PRESENT) {
414 struct cpu_info *tmp; 414 struct cpu_info *tmp;
415 415
416 cpu_identify(ci); 416 cpu_identify(ci);
417 tmp = cpu_info_list; 417 tmp = cpu_info_list;
418 while (tmp->ci_next) 418 while (tmp->ci_next)
419 tmp = tmp->ci_next; 419 tmp = tmp->ci_next;
420 420
421 tmp->ci_next = ci; 421 tmp->ci_next = ci;
422 } 422 }
423 break; 423 break;
424 424
425 default: 425 default:
426 aprint_normal("\n"); 426 aprint_normal("\n");
427 panic("unknown processor type??\n"); 427 panic("unknown processor type??\n");
428 } 428 }
429 429
430 pat_init(ci); 430 pat_init(ci);
431 atomic_or_32(&cpus_attached, ci->ci_cpumask); 431 atomic_or_32(&cpus_attached, ci->ci_cpumask);
432 432
433 if (!pmf_device_register1(self, cpu_suspend, cpu_resume, cpu_shutdown)) 433 if (!pmf_device_register1(self, cpu_suspend, cpu_resume, cpu_shutdown))
434 aprint_error_dev(self, "couldn't establish power handler\n"); 434 aprint_error_dev(self, "couldn't establish power handler\n");
435 435
436 if (mp_verbose) { 436 if (mp_verbose) {
437 struct lwp *l = ci->ci_data.cpu_idlelwp; 437 struct lwp *l = ci->ci_data.cpu_idlelwp;
438 struct pcb *pcb = lwp_getpcb(l); 438 struct pcb *pcb = lwp_getpcb(l);
439 439
440 aprint_verbose_dev(self, 440 aprint_verbose_dev(self,
441 "idle lwp at %p, idle sp at %p\n", 441 "idle lwp at %p, idle sp at %p\n",
442 l, 442 l,
443#ifdef i386 443#ifdef i386
444 (void *)pcb->pcb_esp 444 (void *)pcb->pcb_esp
445#else 445#else
446 (void *)pcb->pcb_rsp 446 (void *)pcb->pcb_rsp
447#endif 447#endif
448 ); 448 );
449 } 449 }
450 450
451 /* 451 /*
452 * Postpone the "cpufeaturebus" scan. 452 * Postpone the "cpufeaturebus" scan.
453 * It is safe to scan the pseudo-bus 453 * It is safe to scan the pseudo-bus
454 * only after all CPUs have attached. 454 * only after all CPUs have attached.
455 */ 455 */
456 (void)config_defer(self, cpu_defer); 456 (void)config_defer(self, cpu_defer);
457} 457}
458 458
459static void 459static void
460cpu_defer(device_t self) 460cpu_defer(device_t self)
461{ 461{
462 cpu_rescan(self, NULL, NULL); 462 cpu_rescan(self, NULL, NULL);
463} 463}
464 464
465static int 465static int
466cpu_rescan(device_t self, const char *ifattr, const int *locators) 466cpu_rescan(device_t self, const char *ifattr, const int *locators)
467{ 467{
468 struct cpu_softc *sc = device_private(self); 468 struct cpu_softc *sc = device_private(self);
469 struct cpufeature_attach_args cfaa; 469 struct cpufeature_attach_args cfaa;
470 struct cpu_info *ci = sc->sc_info; 470 struct cpu_info *ci = sc->sc_info;
471 471
472 memset(&cfaa, 0, sizeof(cfaa)); 472 memset(&cfaa, 0, sizeof(cfaa));
473 cfaa.ci = ci; 473 cfaa.ci = ci;
474 474
475 if (ifattr_match(ifattr, "cpufeaturebus")) { 475 if (ifattr_match(ifattr, "cpufeaturebus")) {
476 476
477 if (ci->ci_frequency == NULL) { 477 if (ci->ci_frequency == NULL) {
478 cfaa.name = "frequency"; 478 cfaa.name = "frequency";
479 ci->ci_frequency = config_found_ia(self, 479 ci->ci_frequency = config_found_ia(self,
480 "cpufeaturebus", &cfaa, NULL); 480 "cpufeaturebus", &cfaa, NULL);
481 } 481 }
482 482
483 if (ci->ci_padlock == NULL) { 483 if (ci->ci_padlock == NULL) {
484 cfaa.name = "padlock"; 484 cfaa.name = "padlock";
485 ci->ci_padlock = config_found_ia(self, 485 ci->ci_padlock = config_found_ia(self,
486 "cpufeaturebus", &cfaa, NULL); 486 "cpufeaturebus", &cfaa, NULL);
487 } 487 }
488 488
489 if (ci->ci_temperature == NULL) { 489 if (ci->ci_temperature == NULL) {
490 cfaa.name = "temperature"; 490 cfaa.name = "temperature";
491 ci->ci_temperature = config_found_ia(self, 491 ci->ci_temperature = config_found_ia(self,
492 "cpufeaturebus", &cfaa, NULL); 492 "cpufeaturebus", &cfaa, NULL);
493 } 493 }
494 } 494 }
495 495
496 return 0; 496 return 0;
497} 497}
498 498
499static void 499static void
500cpu_childdetached(device_t self, device_t child) 500cpu_childdetached(device_t self, device_t child)
501{ 501{
502 struct cpu_softc *sc = device_private(self); 502 struct cpu_softc *sc = device_private(self);
503 struct cpu_info *ci = sc->sc_info; 503 struct cpu_info *ci = sc->sc_info;
504 504
505 if (ci->ci_frequency == child) 505 if (ci->ci_frequency == child)
506 ci->ci_frequency = NULL; 506 ci->ci_frequency = NULL;
507 507
508 if (ci->ci_padlock == child) 508 if (ci->ci_padlock == child)
509 ci->ci_padlock = NULL; 509 ci->ci_padlock = NULL;
510 510
511 if (ci->ci_temperature == child) 511 if (ci->ci_temperature == child)
512 ci->ci_temperature = NULL; 512 ci->ci_temperature = NULL;
513} 513}
514 514
515/* 515/*
516 * Initialize the processor appropriately. 516 * Initialize the processor appropriately.
517 */ 517 */
518 518
519void 519void
520cpu_init(struct cpu_info *ci) 520cpu_init(struct cpu_info *ci)
521{ 521{
522 522
523 lcr0(rcr0() | CR0_WP); 523 lcr0(rcr0() | CR0_WP);
524 524
525 /* 525 /*
526 * On a P6 or above, enable global TLB caching if the 526 * On a P6 or above, enable global TLB caching if the
527 * hardware supports it. 527 * hardware supports it.
528 */ 528 */
529 if (cpu_feature[0] & CPUID_PGE) 529 if (cpu_feature[0] & CPUID_PGE)
530 lcr4(rcr4() | CR4_PGE); /* enable global TLB caching */ 530 lcr4(rcr4() | CR4_PGE); /* enable global TLB caching */
531 531
532 /* 532 /*
533 * If we have FXSAVE/FXRESTOR, use them. 533 * If we have FXSAVE/FXRESTOR, use them.
534 */ 534 */
535 if (cpu_feature[0] & CPUID_FXSR) { 535 if (cpu_feature[0] & CPUID_FXSR) {
536 lcr4(rcr4() | CR4_OSFXSR); 536 lcr4(rcr4() | CR4_OSFXSR);
537 537
538 /* 538 /*
539 * If we have SSE/SSE2, enable XMM exceptions. 539 * If we have SSE/SSE2, enable XMM exceptions.
540 */ 540 */
541 if (cpu_feature[0] & (CPUID_SSE|CPUID_SSE2)) 541 if (cpu_feature[0] & (CPUID_SSE|CPUID_SSE2))
542 lcr4(rcr4() | CR4_OSXMMEXCPT); 542 lcr4(rcr4() | CR4_OSXMMEXCPT);
543 } 543 }
544 544
545#ifdef MTRR 545#ifdef MTRR
546 /* 546 /*
547 * On a P6 or above, initialize MTRR's if the hardware supports them. 547 * On a P6 or above, initialize MTRR's if the hardware supports them.
548 */ 548 */
549 if (cpu_feature[0] & CPUID_MTRR) { 549 if (cpu_feature[0] & CPUID_MTRR) {
550 if ((ci->ci_flags & CPUF_AP) == 0) 550 if ((ci->ci_flags & CPUF_AP) == 0)
551 i686_mtrr_init_first(); 551 i686_mtrr_init_first();
552 mtrr_init_cpu(ci); 552 mtrr_init_cpu(ci);
553 } 553 }
554 554
555#ifdef i386 555#ifdef i386
556 if (strcmp((char *)(ci->ci_vendor), "AuthenticAMD") == 0) { 556 if (strcmp((char *)(ci->ci_vendor), "AuthenticAMD") == 0) {
557 /* 557 /*
558 * Must be a K6-2 Step >= 7 or a K6-III. 558 * Must be a K6-2 Step >= 7 or a K6-III.
559 */ 559 */
560 if (CPUID2FAMILY(ci->ci_signature) == 5) { 560 if (CPUID2FAMILY(ci->ci_signature) == 5) {
561 if (CPUID2MODEL(ci->ci_signature) > 8 || 561 if (CPUID2MODEL(ci->ci_signature) > 8 ||
562 (CPUID2MODEL(ci->ci_signature) == 8 && 562 (CPUID2MODEL(ci->ci_signature) == 8 &&
563 CPUID2STEPPING(ci->ci_signature) >= 7)) { 563 CPUID2STEPPING(ci->ci_signature) >= 7)) {
564 mtrr_funcs = &k6_mtrr_funcs; 564 mtrr_funcs = &k6_mtrr_funcs;
565 k6_mtrr_init_first(); 565 k6_mtrr_init_first();
566 mtrr_init_cpu(ci); 566 mtrr_init_cpu(ci);
567 } 567 }
568 } 568 }
569 } 569 }
570#endif /* i386 */ 570#endif /* i386 */
571#endif /* MTRR */ 571#endif /* MTRR */
572 572
573 atomic_or_32(&cpus_running, ci->ci_cpumask); 573 atomic_or_32(&cpus_running, ci->ci_cpumask);
574 574
575 if (ci != &cpu_info_primary) { 575 if (ci != &cpu_info_primary) {
576 /* Synchronize TSC again, and check for drift. */ 576 /* Synchronize TSC again, and check for drift. */
577 wbinvd(); 577 wbinvd();
578 atomic_or_32(&ci->ci_flags, CPUF_RUNNING); 578 atomic_or_32(&ci->ci_flags, CPUF_RUNNING);
579 tsc_sync_ap(ci); 579 tsc_sync_ap(ci);
580 } else { 580 } else {
581 atomic_or_32(&ci->ci_flags, CPUF_RUNNING); 581 atomic_or_32(&ci->ci_flags, CPUF_RUNNING);
582 } 582 }
583} 583}
584 584
585void 585void
586cpu_boot_secondary_processors(void) 586cpu_boot_secondary_processors(void)
587{ 587{
588 struct cpu_info *ci; 588 struct cpu_info *ci;
589 u_long i; 589 u_long i;
590 590
591 /* Now that we know the number of CPUs, patch the text segment. */ 591 /* Now that we know the number of CPUs, patch the text segment. */
592 x86_patch(false); 592 x86_patch(false);
593 593
594 for (i=0; i < maxcpus; i++) { 594 for (i=0; i < maxcpus; i++) {
595 ci = cpu_lookup(i); 595 ci = cpu_lookup(i);
596 if (ci == NULL) 596 if (ci == NULL)
597 continue; 597 continue;
598 if (ci->ci_data.cpu_idlelwp == NULL) 598 if (ci->ci_data.cpu_idlelwp == NULL)
599 continue; 599 continue;
600 if ((ci->ci_flags & CPUF_PRESENT) == 0) 600 if ((ci->ci_flags & CPUF_PRESENT) == 0)
601 continue; 601 continue;
602 if (ci->ci_flags & (CPUF_BSP|CPUF_SP|CPUF_PRIMARY)) 602 if (ci->ci_flags & (CPUF_BSP|CPUF_SP|CPUF_PRIMARY))
603 continue; 603 continue;
604 cpu_boot_secondary(ci); 604 cpu_boot_secondary(ci);
605 } 605 }
606 606
607 x86_mp_online = true; 607 x86_mp_online = true;
608 608
609 /* Now that we know about the TSC, attach the timecounter. */ 609 /* Now that we know about the TSC, attach the timecounter. */
610 tsc_tc_init(); 610 tsc_tc_init();
611 611
612 /* Enable zeroing of pages in the idle loop if we have SSE2. */ 612 /* Enable zeroing of pages in the idle loop if we have SSE2. */
613 vm_page_zero_enable = ((cpu_feature[0] & CPUID_SSE2) != 0); 613 vm_page_zero_enable = ((cpu_feature[0] & CPUID_SSE2) != 0);
614} 614}
615 615
616static void 616static void
617cpu_init_idle_lwp(struct cpu_info *ci) 617cpu_init_idle_lwp(struct cpu_info *ci)
618{ 618{
619 struct lwp *l = ci->ci_data.cpu_idlelwp; 619 struct lwp *l = ci->ci_data.cpu_idlelwp;
620 struct pcb *pcb = lwp_getpcb(l); 620 struct pcb *pcb = lwp_getpcb(l);
621 621
622 pcb->pcb_cr0 = rcr0(); 622 pcb->pcb_cr0 = rcr0();
623} 623}
624 624
625void 625void
626cpu_init_idle_lwps(void) 626cpu_init_idle_lwps(void)
627{ 627{
628 struct cpu_info *ci; 628 struct cpu_info *ci;
629 u_long i; 629 u_long i;
630 630
631 for (i = 0; i < maxcpus; i++) { 631 for (i = 0; i < maxcpus; i++) {
632 ci = cpu_lookup(i); 632 ci = cpu_lookup(i);
633 if (ci == NULL) 633 if (ci == NULL)
634 continue; 634 continue;
635 if (ci->ci_data.cpu_idlelwp == NULL) 635 if (ci->ci_data.cpu_idlelwp == NULL)
636 continue; 636 continue;
637 if ((ci->ci_flags & CPUF_PRESENT) == 0) 637 if ((ci->ci_flags & CPUF_PRESENT) == 0)
638 continue; 638 continue;
639 cpu_init_idle_lwp(ci); 639 cpu_init_idle_lwp(ci);
640 } 640 }
641} 641}
642 642
643void 643void
644cpu_start_secondary(struct cpu_info *ci) 644cpu_start_secondary(struct cpu_info *ci)
645{ 645{
646 extern paddr_t mp_pdirpa; 646 extern paddr_t mp_pdirpa;
647 u_long psl; 647 u_long psl;
648 int i; 648 int i;
649 649
650 mp_pdirpa = pmap_init_tmp_pgtbl(mp_trampoline_paddr); 650 mp_pdirpa = pmap_init_tmp_pgtbl(mp_trampoline_paddr);
651 atomic_or_32(&ci->ci_flags, CPUF_AP); 651 atomic_or_32(&ci->ci_flags, CPUF_AP);
652 ci->ci_curlwp = ci->ci_data.cpu_idlelwp; 652 ci->ci_curlwp = ci->ci_data.cpu_idlelwp;
653 if (CPU_STARTUP(ci, mp_trampoline_paddr) != 0) { 653 if (CPU_STARTUP(ci, mp_trampoline_paddr) != 0) {
654 return; 654 return;
655 } 655 }
656 656
657 /* 657 /*
658 * Wait for it to become ready. Setting cpu_starting opens the 658 * Wait for it to become ready. Setting cpu_starting opens the
659 * initial gate and allows the AP to start soft initialization. 659 * initial gate and allows the AP to start soft initialization.
660 */ 660 */
661 KASSERT(cpu_starting == NULL); 661 KASSERT(cpu_starting == NULL);
662 cpu_starting = ci; 662 cpu_starting = ci;
663 for (i = 100000; (!(ci->ci_flags & CPUF_PRESENT)) && i > 0; i--) { 663 for (i = 100000; (!(ci->ci_flags & CPUF_PRESENT)) && i > 0; i--) {
664#ifdef MPDEBUG 664#ifdef MPDEBUG
665 extern int cpu_trace[3]; 665 extern int cpu_trace[3];
666 static int otrace[3]; 666 static int otrace[3];
667 if (memcmp(otrace, cpu_trace, sizeof(otrace)) != 0) { 667 if (memcmp(otrace, cpu_trace, sizeof(otrace)) != 0) {
668 aprint_debug_dev(ci->ci_dev, "trace %02x %02x %02x\n", 668 aprint_debug_dev(ci->ci_dev, "trace %02x %02x %02x\n",
669 cpu_trace[0], cpu_trace[1], cpu_trace[2]); 669 cpu_trace[0], cpu_trace[1], cpu_trace[2]);
670 memcpy(otrace, cpu_trace, sizeof(otrace)); 670 memcpy(otrace, cpu_trace, sizeof(otrace));
671 } 671 }
672#endif 672#endif
673 i8254_delay(10); 673 i8254_delay(10);
674 } 674 }
675 675
676 if ((ci->ci_flags & CPUF_PRESENT) == 0) { 676 if ((ci->ci_flags & CPUF_PRESENT) == 0) {
677 aprint_error_dev(ci->ci_dev, "failed to become ready\n"); 677 aprint_error_dev(ci->ci_dev, "failed to become ready\n");
678#if defined(MPDEBUG) && defined(DDB) 678#if defined(MPDEBUG) && defined(DDB)
679 printf("dropping into debugger; continue from here to resume boot\n"); 679 printf("dropping into debugger; continue from here to resume boot\n");
680 Debugger(); 680 Debugger();
681#endif 681#endif
682 } else { 682 } else {
683 /* 683 /*
684 * Synchronize time stamp counters. Invalidate cache and do 684 * Synchronize time stamp counters. Invalidate cache and do
685 * twice to try and minimize possible cache effects. Disable 685 * twice to try and minimize possible cache effects. Disable
686 * interrupts to try and rule out any external interference. 686 * interrupts to try and rule out any external interference.
687 */ 687 */
688 psl = x86_read_psl(); 688 psl = x86_read_psl();
689 x86_disable_intr(); 689 x86_disable_intr();
690 wbinvd(); 690 wbinvd();
691 tsc_sync_bp(ci); 691 tsc_sync_bp(ci);
692 x86_write_psl(psl); 692 x86_write_psl(psl);
693 } 693 }
694 694
695 CPU_START_CLEANUP(ci); 695 CPU_START_CLEANUP(ci);
696 cpu_starting = NULL; 696 cpu_starting = NULL;
697} 697}
698 698
699void 699void
700cpu_boot_secondary(struct cpu_info *ci) 700cpu_boot_secondary(struct cpu_info *ci)
701{ 701{
702 int64_t drift; 702 int64_t drift;
703 u_long psl; 703 u_long psl;
704 int i; 704 int i;
705 705
706 atomic_or_32(&ci->ci_flags, CPUF_GO); 706 atomic_or_32(&ci->ci_flags, CPUF_GO);
707 for (i = 100000; (!(ci->ci_flags & CPUF_RUNNING)) && i > 0; i--) { 707 for (i = 100000; (!(ci->ci_flags & CPUF_RUNNING)) && i > 0; i--) {
708 i8254_delay(10); 708 i8254_delay(10);
709 } 709 }
710 if ((ci->ci_flags & CPUF_RUNNING) == 0) { 710 if ((ci->ci_flags & CPUF_RUNNING) == 0) {
711 aprint_error_dev(ci->ci_dev, "failed to start\n"); 711 aprint_error_dev(ci->ci_dev, "failed to start\n");
712#if defined(MPDEBUG) && defined(DDB) 712#if defined(MPDEBUG) && defined(DDB)
713 printf("dropping into debugger; continue from here to resume boot\n"); 713 printf("dropping into debugger; continue from here to resume boot\n");
714 Debugger(); 714 Debugger();
715#endif 715#endif
716 } else { 716 } else {
717 /* Synchronize TSC again, check for drift. */ 717 /* Synchronize TSC again, check for drift. */
718 drift = ci->ci_data.cpu_cc_skew; 718 drift = ci->ci_data.cpu_cc_skew;
719 psl = x86_read_psl(); 719 psl = x86_read_psl();
720 x86_disable_intr(); 720 x86_disable_intr();
721 wbinvd(); 721 wbinvd();
722 tsc_sync_bp(ci); 722 tsc_sync_bp(ci);
723 x86_write_psl(psl); 723 x86_write_psl(psl);
724 drift -= ci->ci_data.cpu_cc_skew; 724 drift -= ci->ci_data.cpu_cc_skew;
725 aprint_debug_dev(ci->ci_dev, "TSC skew=%lld drift=%lld\n", 725 aprint_debug_dev(ci->ci_dev, "TSC skew=%lld drift=%lld\n",
726 (long long)ci->ci_data.cpu_cc_skew, (long long)drift); 726 (long long)ci->ci_data.cpu_cc_skew, (long long)drift);
727 tsc_sync_drift(drift); 727 tsc_sync_drift(drift);
728 } 728 }
729} 729}
730 730
731/* 731/*
732 * The CPU ends up here when its ready to run 732 * The CPU ends up here when its ready to run
733 * This is called from code in mptramp.s; at this point, we are running 733 * This is called from code in mptramp.s; at this point, we are running
734 * in the idle pcb/idle stack of the new CPU. When this function returns, 734 * in the idle pcb/idle stack of the new CPU. When this function returns,
735 * this processor will enter the idle loop and start looking for work. 735 * this processor will enter the idle loop and start looking for work.
736 */ 736 */
737void 737void
738cpu_hatch(void *v) 738cpu_hatch(void *v)
739{ 739{
740 struct cpu_info *ci = (struct cpu_info *)v; 740 struct cpu_info *ci = (struct cpu_info *)v;
741 struct pcb *pcb; 741 struct pcb *pcb;
742 int s, i; 742 int s, i;
743 743
744 cpu_init_msrs(ci, true); 744 cpu_init_msrs(ci, true);
745 cpu_probe(ci); 745 cpu_probe(ci);
746 746
747 ci->ci_data.cpu_cc_freq = cpu_info_primary.ci_data.cpu_cc_freq; 747 ci->ci_data.cpu_cc_freq = cpu_info_primary.ci_data.cpu_cc_freq;
748 /* cpu_get_tsc_freq(ci); */  748 /* cpu_get_tsc_freq(ci); */
749 749
750 KDASSERT((ci->ci_flags & CPUF_PRESENT) == 0); 750 KDASSERT((ci->ci_flags & CPUF_PRESENT) == 0);
751 751
752 /* 752 /*
753 * Synchronize time stamp counters. Invalidate cache and do twice 753 * Synchronize time stamp counters. Invalidate cache and do twice
754 * to try and minimize possible cache effects. Note that interrupts 754 * to try and minimize possible cache effects. Note that interrupts
755 * are off at this point. 755 * are off at this point.
756 */ 756 */
757 wbinvd(); 757 wbinvd();
758 atomic_or_32(&ci->ci_flags, CPUF_PRESENT); 758 atomic_or_32(&ci->ci_flags, CPUF_PRESENT);
759 tsc_sync_ap(ci); 759 tsc_sync_ap(ci);
760 760
761 /* 761 /*
762 * Wait to be brought online. Use 'monitor/mwait' if available, 762 * Wait to be brought online. Use 'monitor/mwait' if available,
763 * in order to make the TSC drift as much as possible. so that 763 * in order to make the TSC drift as much as possible. so that
764 * we can detect it later. If not available, try 'pause'.  764 * we can detect it later. If not available, try 'pause'.
765 * We'd like to use 'hlt', but we have interrupts off. 765 * We'd like to use 'hlt', but we have interrupts off.
766 */ 766 */
767 while ((ci->ci_flags & CPUF_GO) == 0) { 767 while ((ci->ci_flags & CPUF_GO) == 0) {
768 if ((cpu_feature[1] & CPUID2_MONITOR) != 0) { 768 if ((cpu_feature[1] & CPUID2_MONITOR) != 0) {
769 x86_monitor(&ci->ci_flags, 0, 0); 769 x86_monitor(&ci->ci_flags, 0, 0);
770 if ((ci->ci_flags & CPUF_GO) != 0) { 770 if ((ci->ci_flags & CPUF_GO) != 0) {
771 continue; 771 continue;
772 } 772 }
773 x86_mwait(0, 0); 773 x86_mwait(0, 0);
774 } else { 774 } else {
775 for (i = 10000; i != 0; i--) { 775 for (i = 10000; i != 0; i--) {
776 x86_pause(); 776 x86_pause();
777 } 777 }
778 } 778 }
779 } 779 }
780 780
781 /* Because the text may have been patched in x86_patch(). */ 781 /* Because the text may have been patched in x86_patch(). */
782 wbinvd(); 782 wbinvd();
783 x86_flush(); 783 x86_flush();
784 tlbflushg(); 784 tlbflushg();
785 785
786 KASSERT((ci->ci_flags & CPUF_RUNNING) == 0); 786 KASSERT((ci->ci_flags & CPUF_RUNNING) == 0);
787 787
788#ifdef PAE 788#ifdef PAE
789 pd_entry_t * l3_pd = ci->ci_pae_l3_pdir; 789 pd_entry_t * l3_pd = ci->ci_pae_l3_pdir;
790 for (i = 0 ; i < PDP_SIZE; i++) { 790 for (i = 0 ; i < PDP_SIZE; i++) {
791 l3_pd[i] = pmap_kernel()->pm_pdirpa[i] | PG_V; 791 l3_pd[i] = pmap_kernel()->pm_pdirpa[i] | PG_V;
792 } 792 }
793 lcr3(ci->ci_pae_l3_pdirpa); 793 lcr3(ci->ci_pae_l3_pdirpa);
794#else 794#else
795 lcr3(pmap_pdirpa(pmap_kernel(), 0)); 795 lcr3(pmap_pdirpa(pmap_kernel(), 0));
796#endif 796#endif
797 797
798 pcb = lwp_getpcb(curlwp); 798 pcb = lwp_getpcb(curlwp);
799 pcb->pcb_cr3 = rcr3(); 799 pcb->pcb_cr3 = rcr3();
800 pcb = lwp_getpcb(ci->ci_data.cpu_idlelwp); 800 pcb = lwp_getpcb(ci->ci_data.cpu_idlelwp);
801 lcr0(pcb->pcb_cr0); 801 lcr0(pcb->pcb_cr0);
802 802
803 cpu_init_idt(); 803 cpu_init_idt();
804 gdt_init_cpu(ci); 804 gdt_init_cpu(ci);
805 lapic_enable(); 805 lapic_enable();
806 lapic_set_lvt(); 806 lapic_set_lvt();
807 lapic_initclocks(); 807 lapic_initclocks();
808 808
809#ifdef i386 809#ifdef i386
810#if NNPX > 0 810#if NNPX > 0
811 npxinit(ci); 811 npxinit(ci);
812#endif 812#endif
813#else 813#else
814 fpuinit(ci); 814 fpuinit(ci);
815#endif 815#endif
816 lldt(GSYSSEL(GLDT_SEL, SEL_KPL)); 816 lldt(GSYSSEL(GLDT_SEL, SEL_KPL));
817 ltr(ci->ci_tss_sel); 817 ltr(ci->ci_tss_sel);
818 818
819 cpu_init(ci); 819 cpu_init(ci);
820 cpu_get_tsc_freq(ci); 820 cpu_get_tsc_freq(ci);
821 821
822 s = splhigh(); 822 s = splhigh();
823#ifdef i386 823#ifdef i386
824 lapic_tpr = 0; 824 lapic_tpr = 0;
825#else 825#else
826 lcr8(0); 826 lcr8(0);
827#endif 827#endif
828 x86_enable_intr(); 828 x86_enable_intr();
829 splx(s); 829 splx(s);
830 x86_errata(); 830 x86_errata();
831 831
832 aprint_debug_dev(ci->ci_dev, "running\n"); 832 aprint_debug_dev(ci->ci_dev, "running\n");
833} 833}
834 834
835#if defined(DDB) 835#if defined(DDB)
836 836
837#include <ddb/db_output.h> 837#include <ddb/db_output.h>
838#include <machine/db_machdep.h> 838#include <machine/db_machdep.h>
839 839
840/* 840/*
841 * Dump CPU information from ddb. 841 * Dump CPU information from ddb.
842 */ 842 */
843void 843void
844cpu_debug_dump(void) 844cpu_debug_dump(void)
845{ 845{
846 struct cpu_info *ci; 846 struct cpu_info *ci;
847 CPU_INFO_ITERATOR cii; 847 CPU_INFO_ITERATOR cii;
848 848
849 db_printf("addr dev id flags ipis curlwp fpcurlwp\n"); 849 db_printf("addr dev id flags ipis curlwp fpcurlwp\n");
850 for (CPU_INFO_FOREACH(cii, ci)) { 850 for (CPU_INFO_FOREACH(cii, ci)) {
851 db_printf("%p %s %ld %x %x %10p %10p\n", 851 db_printf("%p %s %ld %x %x %10p %10p\n",
852 ci, 852 ci,
853 ci->ci_dev == NULL ? "BOOT" : device_xname(ci->ci_dev), 853 ci->ci_dev == NULL ? "BOOT" : device_xname(ci->ci_dev),
854 (long)ci->ci_cpuid, 854 (long)ci->ci_cpuid,
855 ci->ci_flags, ci->ci_ipis, 855 ci->ci_flags, ci->ci_ipis,
856 ci->ci_curlwp, 856 ci->ci_curlwp,
857 ci->ci_fpcurlwp); 857 ci->ci_fpcurlwp);
858 } 858 }
859} 859}
860#endif 860#endif
861 861
862static void 862static void
863cpu_copy_trampoline(void) 863cpu_copy_trampoline(void)
864{ 864{
865 /* 865 /*
866 * Copy boot code. 866 * Copy boot code.
867 */ 867 */
868 extern u_char cpu_spinup_trampoline[]; 868 extern u_char cpu_spinup_trampoline[];
869 extern u_char cpu_spinup_trampoline_end[]; 869 extern u_char cpu_spinup_trampoline_end[];
870  870
871 vaddr_t mp_trampoline_vaddr; 871 vaddr_t mp_trampoline_vaddr;
872 872
873 mp_trampoline_vaddr = uvm_km_alloc(kernel_map, PAGE_SIZE, 0, 873 mp_trampoline_vaddr = uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
874 UVM_KMF_VAONLY); 874 UVM_KMF_VAONLY);
875 875
876 pmap_kenter_pa(mp_trampoline_vaddr, mp_trampoline_paddr, 876 pmap_kenter_pa(mp_trampoline_vaddr, mp_trampoline_paddr,
877 VM_PROT_READ | VM_PROT_WRITE, 0); 877 VM_PROT_READ | VM_PROT_WRITE, 0);
878 pmap_update(pmap_kernel()); 878 pmap_update(pmap_kernel());
879 memcpy((void *)mp_trampoline_vaddr, 879 memcpy((void *)mp_trampoline_vaddr,
880 cpu_spinup_trampoline, 880 cpu_spinup_trampoline,
881 cpu_spinup_trampoline_end - cpu_spinup_trampoline); 881 cpu_spinup_trampoline_end - cpu_spinup_trampoline);
882 882
883 pmap_kremove(mp_trampoline_vaddr, PAGE_SIZE); 883 pmap_kremove(mp_trampoline_vaddr, PAGE_SIZE);
884 pmap_update(pmap_kernel()); 884 pmap_update(pmap_kernel());
885 uvm_km_free(kernel_map, mp_trampoline_vaddr, PAGE_SIZE, UVM_KMF_VAONLY); 885 uvm_km_free(kernel_map, mp_trampoline_vaddr, PAGE_SIZE, UVM_KMF_VAONLY);
886} 886}
887 887
888#ifdef i386 888#ifdef i386
889static void 889static void
890tss_init(struct i386tss *tss, void *stack, void *func) 890tss_init(struct i386tss *tss, void *stack, void *func)
891{ 891{
892 KASSERT(curcpu()->ci_pmap == pmap_kernel()); 892 KASSERT(curcpu()->ci_pmap == pmap_kernel());
893 893
894 memset(tss, 0, sizeof *tss); 894 memset(tss, 0, sizeof *tss);
895 tss->tss_esp0 = tss->tss_esp = (int)((char *)stack + USPACE - 16); 895 tss->tss_esp0 = tss->tss_esp = (int)((char *)stack + USPACE - 16);
896 tss->tss_ss0 = GSEL(GDATA_SEL, SEL_KPL); 896 tss->tss_ss0 = GSEL(GDATA_SEL, SEL_KPL);
897 tss->__tss_cs = GSEL(GCODE_SEL, SEL_KPL); 897 tss->__tss_cs = GSEL(GCODE_SEL, SEL_KPL);
898 tss->tss_fs = GSEL(GCPU_SEL, SEL_KPL); 898 tss->tss_fs = GSEL(GCPU_SEL, SEL_KPL);
899 tss->tss_gs = tss->__tss_es = tss->__tss_ds = 899 tss->tss_gs = tss->__tss_es = tss->__tss_ds =
900 tss->__tss_ss = GSEL(GDATA_SEL, SEL_KPL); 900 tss->__tss_ss = GSEL(GDATA_SEL, SEL_KPL);
901 /* %cr3 contains the value associated to pmap_kernel */ 901 /* %cr3 contains the value associated to pmap_kernel */
902 tss->tss_cr3 = rcr3(); 902 tss->tss_cr3 = rcr3();
903 tss->tss_esp = (int)((char *)stack + USPACE - 16); 903 tss->tss_esp = (int)((char *)stack + USPACE - 16);
904 tss->tss_ldt = GSEL(GLDT_SEL, SEL_KPL); 904 tss->tss_ldt = GSEL(GLDT_SEL, SEL_KPL);
905 tss->__tss_eflags = PSL_MBO | PSL_NT; /* XXX not needed? */ 905 tss->__tss_eflags = PSL_MBO | PSL_NT; /* XXX not needed? */
906 tss->__tss_eip = (int)func; 906 tss->__tss_eip = (int)func;
907} 907}
908 908
909/* XXX */ 909/* XXX */
910#define IDTVEC(name) __CONCAT(X, name) 910#define IDTVEC(name) __CONCAT(X, name)
911typedef void (vector)(void); 911typedef void (vector)(void);
912extern vector IDTVEC(tss_trap08); 912extern vector IDTVEC(tss_trap08);
913#ifdef DDB 913#ifdef DDB
914extern vector Xintrddbipi; 914extern vector Xintrddbipi;
915extern int ddb_vec; 915extern int ddb_vec;
916#endif 916#endif
917 917
918static void 918static void
919cpu_set_tss_gates(struct cpu_info *ci) 919cpu_set_tss_gates(struct cpu_info *ci)
920{ 920{
921 struct segment_descriptor sd; 921 struct segment_descriptor sd;
922 922
923 ci->ci_doubleflt_stack = (char *)uvm_km_alloc(kernel_map, USPACE, 0, 923 ci->ci_doubleflt_stack = (char *)uvm_km_alloc(kernel_map, USPACE, 0,
924 UVM_KMF_WIRED); 924 UVM_KMF_WIRED);
925 tss_init(&ci->ci_doubleflt_tss, ci->ci_doubleflt_stack, 925 tss_init(&ci->ci_doubleflt_tss, ci->ci_doubleflt_stack,
926 IDTVEC(tss_trap08)); 926 IDTVEC(tss_trap08));
927 setsegment(&sd, &ci->ci_doubleflt_tss, sizeof(struct i386tss) - 1, 927 setsegment(&sd, &ci->ci_doubleflt_tss, sizeof(struct i386tss) - 1,
928 SDT_SYS386TSS, SEL_KPL, 0, 0); 928 SDT_SYS386TSS, SEL_KPL, 0, 0);
929 ci->ci_gdt[GTRAPTSS_SEL].sd = sd; 929 ci->ci_gdt[GTRAPTSS_SEL].sd = sd;
930 setgate(&idt[8], NULL, 0, SDT_SYSTASKGT, SEL_KPL, 930 setgate(&idt[8], NULL, 0, SDT_SYSTASKGT, SEL_KPL,
931 GSEL(GTRAPTSS_SEL, SEL_KPL)); 931 GSEL(GTRAPTSS_SEL, SEL_KPL));
932 932
933#if defined(DDB) 933#if defined(DDB)
934 /* 934 /*
935 * Set up separate handler for the DDB IPI, so that it doesn't 935 * Set up separate handler for the DDB IPI, so that it doesn't
936 * stomp on a possibly corrupted stack. 936 * stomp on a possibly corrupted stack.
937 * 937 *
938 * XXX overwriting the gate set in db_machine_init. 938 * XXX overwriting the gate set in db_machine_init.
939 * Should rearrange the code so that it's set only once. 939 * Should rearrange the code so that it's set only once.
940 */ 940 */
941 ci->ci_ddbipi_stack = (char *)uvm_km_alloc(kernel_map, USPACE, 0, 941 ci->ci_ddbipi_stack = (char *)uvm_km_alloc(kernel_map, USPACE, 0,
942 UVM_KMF_WIRED); 942 UVM_KMF_WIRED);
943 tss_init(&ci->ci_ddbipi_tss, ci->ci_ddbipi_stack, Xintrddbipi); 943 tss_init(&ci->ci_ddbipi_tss, ci->ci_ddbipi_stack, Xintrddbipi);
944 944
945 setsegment(&sd, &ci->ci_ddbipi_tss, sizeof(struct i386tss) - 1, 945 setsegment(&sd, &ci->ci_ddbipi_tss, sizeof(struct i386tss) - 1,
946 SDT_SYS386TSS, SEL_KPL, 0, 0); 946 SDT_SYS386TSS, SEL_KPL, 0, 0);
947 ci->ci_gdt[GIPITSS_SEL].sd = sd; 947 ci->ci_gdt[GIPITSS_SEL].sd = sd;
948 948
949 setgate(&idt[ddb_vec], NULL, 0, SDT_SYSTASKGT, SEL_KPL, 949 setgate(&idt[ddb_vec], NULL, 0, SDT_SYSTASKGT, SEL_KPL,
950 GSEL(GIPITSS_SEL, SEL_KPL)); 950 GSEL(GIPITSS_SEL, SEL_KPL));
951#endif 951#endif
952} 952}
953#else 953#else
954static void 954static void
955cpu_set_tss_gates(struct cpu_info *ci) 955cpu_set_tss_gates(struct cpu_info *ci)
956{ 956{
957 957
958} 958}
959#endif /* i386 */ 959#endif /* i386 */
960 960
961int 961int
962mp_cpu_start(struct cpu_info *ci, paddr_t target) 962mp_cpu_start(struct cpu_info *ci, paddr_t target)
963{ 963{
964 unsigned short dwordptr[2]; 964 unsigned short dwordptr[2];
965 int error; 965 int error;
966 966
967 /* 967 /*
968 * Bootstrap code must be addressable in real mode 968 * Bootstrap code must be addressable in real mode
969 * and it must be page aligned. 969 * and it must be page aligned.
970 */ 970 */
971 KASSERT(target < 0x10000 && target % PAGE_SIZE == 0); 971 KASSERT(target < 0x10000 && target % PAGE_SIZE == 0);
972 972
973 /* 973 /*
974 * "The BSP must initialize CMOS shutdown code to 0Ah ..." 974 * "The BSP must initialize CMOS shutdown code to 0Ah ..."
975 */ 975 */
976 976
977 outb(IO_RTC, NVRAM_RESET); 977 outb(IO_RTC, NVRAM_RESET);
978 outb(IO_RTC+1, NVRAM_RESET_JUMP); 978 outb(IO_RTC+1, NVRAM_RESET_JUMP);
979 979
980 /* 980 /*
981 * "and the warm reset vector (DWORD based at 40:67) to point 981 * "and the warm reset vector (DWORD based at 40:67) to point
982 * to the AP startup code ..." 982 * to the AP startup code ..."
983 */ 983 */
984 984
985 dwordptr[0] = 0; 985 dwordptr[0] = 0;
986 dwordptr[1] = target >> 4; 986 dwordptr[1] = target >> 4;
987 987
988 memcpy((uint8_t *)cmos_data_mapping + 0x467, dwordptr, 4); 988 memcpy((uint8_t *)cmos_data_mapping + 0x467, dwordptr, 4);
989 989
990 if ((cpu_feature[0] & CPUID_APIC) == 0) { 990 if ((cpu_feature[0] & CPUID_APIC) == 0) {
991 aprint_error("mp_cpu_start: CPU does not have APIC\n"); 991 aprint_error("mp_cpu_start: CPU does not have APIC\n");
992 return ENODEV; 992 return ENODEV;
993 } 993 }
994 994
995 /* 995 /*
996 * ... prior to executing the following sequence:". We'll also add in 996 * ... prior to executing the following sequence:". We'll also add in
997 * local cache flush, in case the BIOS has left the AP with its cache 997 * local cache flush, in case the BIOS has left the AP with its cache
998 * disabled. It may not be able to cope with MP coherency. 998 * disabled. It may not be able to cope with MP coherency.
999 */ 999 */
1000 wbinvd(); 1000 wbinvd();
1001 1001
1002 if (ci->ci_flags & CPUF_AP) { 1002 if (ci->ci_flags & CPUF_AP) {
1003 error = x86_ipi_init(ci->ci_cpuid); 1003 error = x86_ipi_init(ci->ci_cpuid);
1004 if (error != 0) { 1004 if (error != 0) {
1005 aprint_error_dev(ci->ci_dev, "%s: IPI not taken (1)\n", 1005 aprint_error_dev(ci->ci_dev, "%s: IPI not taken (1)\n",
1006 __func__); 1006 __func__);
1007 return error; 1007 return error;
1008 } 1008 }
1009 i8254_delay(10000); 1009 i8254_delay(10000);
1010 1010
1011 error = x86_ipi_startup(ci->ci_cpuid, target / PAGE_SIZE); 1011 error = x86_ipi_startup(ci->ci_cpuid, target / PAGE_SIZE);
1012 if (error != 0) { 1012 if (error != 0) {
1013 aprint_error_dev(ci->ci_dev, "%s: IPI not taken (2)\n", 1013 aprint_error_dev(ci->ci_dev, "%s: IPI not taken (2)\n",
1014 __func__); 1014 __func__);
1015 return error; 1015 return error;
1016 } 1016 }
1017 i8254_delay(200); 1017 i8254_delay(200);
1018 1018
1019 error = x86_ipi_startup(ci->ci_cpuid, target / PAGE_SIZE); 1019 error = x86_ipi_startup(ci->ci_cpuid, target / PAGE_SIZE);
1020 if (error != 0) { 1020 if (error != 0) {
1021 aprint_error_dev(ci->ci_dev, "%s: IPI not taken (3)\n", 1021 aprint_error_dev(ci->ci_dev, "%s: IPI not taken (3)\n",
1022 __func__); 1022 __func__);
1023 return error; 1023 return error;
1024 } 1024 }
1025 i8254_delay(200); 1025 i8254_delay(200);
1026 } 1026 }
1027 1027
1028 return 0; 1028 return 0;
1029} 1029}
1030 1030
1031void 1031void
1032mp_cpu_start_cleanup(struct cpu_info *ci) 1032mp_cpu_start_cleanup(struct cpu_info *ci)
1033{ 1033{
1034 /* 1034 /*
1035 * Ensure the NVRAM reset byte contains something vaguely sane. 1035 * Ensure the NVRAM reset byte contains something vaguely sane.
1036 */ 1036 */
1037 1037
1038 outb(IO_RTC, NVRAM_RESET); 1038 outb(IO_RTC, NVRAM_RESET);
1039 outb(IO_RTC+1, NVRAM_RESET_RST); 1039 outb(IO_RTC+1, NVRAM_RESET_RST);
1040} 1040}
1041 1041
1042#ifdef __x86_64__ 1042#ifdef __x86_64__
1043typedef void (vector)(void); 1043typedef void (vector)(void);
1044extern vector Xsyscall, Xsyscall32; 1044extern vector Xsyscall, Xsyscall32;
1045#endif 1045#endif
1046 1046
1047void 1047void
1048cpu_init_msrs(struct cpu_info *ci, bool full) 1048cpu_init_msrs(struct cpu_info *ci, bool full)
1049{ 1049{
1050#ifdef __x86_64__ 1050#ifdef __x86_64__
1051 wrmsr(MSR_STAR, 1051 wrmsr(MSR_STAR,
1052 ((uint64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) | 1052 ((uint64_t)GSEL(GCODE_SEL, SEL_KPL) << 32) |
1053 ((uint64_t)LSEL(LSYSRETBASE_SEL, SEL_UPL) << 48)); 1053 ((uint64_t)LSEL(LSYSRETBASE_SEL, SEL_UPL) << 48));
1054 wrmsr(MSR_LSTAR, (uint64_t)Xsyscall); 1054 wrmsr(MSR_LSTAR, (uint64_t)Xsyscall);
1055 wrmsr(MSR_CSTAR, (uint64_t)Xsyscall32); 1055 wrmsr(MSR_CSTAR, (uint64_t)Xsyscall32);
1056 wrmsr(MSR_SFMASK, PSL_NT|PSL_T|PSL_I|PSL_C); 1056 wrmsr(MSR_SFMASK, PSL_NT|PSL_T|PSL_I|PSL_C);
1057 1057
1058 if (full) { 1058 if (full) {
1059 wrmsr(MSR_FSBASE, 0); 1059 wrmsr(MSR_FSBASE, 0);
1060 wrmsr(MSR_GSBASE, (uint64_t)ci); 1060 wrmsr(MSR_GSBASE, (uint64_t)ci);
1061 wrmsr(MSR_KERNELGSBASE, 0); 1061 wrmsr(MSR_KERNELGSBASE, 0);
1062 } 1062 }
1063#endif /* __x86_64__ */ 1063#endif /* __x86_64__ */
1064 1064
1065 if (cpu_feature[2] & CPUID_NOX) 1065 if (cpu_feature[2] & CPUID_NOX)
1066 wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_NXE); 1066 wrmsr(MSR_EFER, rdmsr(MSR_EFER) | EFER_NXE);
1067} 1067}
1068 1068
1069void 1069void
1070cpu_offline_md(void) 1070cpu_offline_md(void)
1071{ 1071{
1072 int s; 1072 int s;
1073 1073
1074 s = splhigh(); 1074 s = splhigh();
1075#ifdef i386 1075#ifdef i386
1076#if NNPX > 0 1076#if NNPX > 0
1077 npxsave_cpu(true); 1077 npxsave_cpu(true);
1078#endif 1078#endif
1079#else 1079#else
1080 fpusave_cpu(true); 1080 fpusave_cpu(true);
1081#endif 1081#endif
1082 splx(s); 1082 splx(s);
1083} 1083}
1084 1084
1085/* XXX joerg restructure and restart CPUs individually */ 1085/* XXX joerg restructure and restart CPUs individually */
1086static bool 1086static bool
1087cpu_suspend(device_t dv, const pmf_qual_t *qual) 1087cpu_suspend(device_t dv, const pmf_qual_t *qual)
1088{ 1088{
1089 struct cpu_softc *sc = device_private(dv); 1089 struct cpu_softc *sc = device_private(dv);
1090 struct cpu_info *ci = sc->sc_info; 1090 struct cpu_info *ci = sc->sc_info;
1091 int err; 1091 int err;
1092 1092
1093 if (ci->ci_flags & CPUF_PRIMARY) 1093 if (ci->ci_flags & CPUF_PRIMARY)
1094 return true; 1094 return true;
1095 if (ci->ci_data.cpu_idlelwp == NULL) 1095 if (ci->ci_data.cpu_idlelwp == NULL)
1096 return true; 1096 return true;
1097 if ((ci->ci_flags & CPUF_PRESENT) == 0) 1097 if ((ci->ci_flags & CPUF_PRESENT) == 0)
1098 return true; 1098 return true;
1099 1099
1100 sc->sc_wasonline = !(ci->ci_schedstate.spc_flags & SPCF_OFFLINE); 1100 sc->sc_wasonline = !(ci->ci_schedstate.spc_flags & SPCF_OFFLINE);
1101 1101
1102 if (sc->sc_wasonline) { 1102 if (sc->sc_wasonline) {
1103 mutex_enter(&cpu_lock); 1103 mutex_enter(&cpu_lock);
1104 err = cpu_setstate(ci, false); 1104 err = cpu_setstate(ci, false);
1105 mutex_exit(&cpu_lock); 1105 mutex_exit(&cpu_lock);
1106 1106
1107 if (err) 1107 if (err)
1108 return false; 1108 return false;
1109 } 1109 }
1110 1110
1111 return true; 1111 return true;
1112} 1112}
1113 1113
1114static bool 1114static bool
1115cpu_resume(device_t dv, const pmf_qual_t *qual) 1115cpu_resume(device_t dv, const pmf_qual_t *qual)
1116{ 1116{
1117 struct cpu_softc *sc = device_private(dv); 1117 struct cpu_softc *sc = device_private(dv);
1118 struct cpu_info *ci = sc->sc_info; 1118 struct cpu_info *ci = sc->sc_info;
1119 int err = 0; 1119 int err = 0;
1120 1120
1121 if (ci->ci_flags & CPUF_PRIMARY) 1121 if (ci->ci_flags & CPUF_PRIMARY)
1122 return true; 1122 return true;
1123 if (ci->ci_data.cpu_idlelwp == NULL) 1123 if (ci->ci_data.cpu_idlelwp == NULL)
1124 return true; 1124 return true;
1125 if ((ci->ci_flags & CPUF_PRESENT) == 0) 1125 if ((ci->ci_flags & CPUF_PRESENT) == 0)
1126 return true; 1126 return true;
1127 1127
1128 if (sc->sc_wasonline) { 1128 if (sc->sc_wasonline) {
1129 mutex_enter(&cpu_lock); 1129 mutex_enter(&cpu_lock);
1130 err = cpu_setstate(ci, true); 1130 err = cpu_setstate(ci, true);
1131 mutex_exit(&cpu_lock); 1131 mutex_exit(&cpu_lock);
1132 } 1132 }
1133 1133
1134 return err == 0; 1134 return err == 0;
1135} 1135}
1136 1136
1137static bool 1137static bool
1138cpu_shutdown(device_t dv, int how) 1138cpu_shutdown(device_t dv, int how)
1139{ 1139{
 1140 struct cpu_softc *sc = device_private(dv);
 1141 struct cpu_info *ci = sc->sc_info;
 1142
 1143 if (ci->ci_flags & CPUF_BSP)
 1144 return false;
 1145
1140 return cpu_suspend(dv, NULL); 1146 return cpu_suspend(dv, NULL);
1141} 1147}
1142 1148
1143void 1149void
1144cpu_get_tsc_freq(struct cpu_info *ci) 1150cpu_get_tsc_freq(struct cpu_info *ci)
1145{ 1151{
1146 uint64_t last_tsc; 1152 uint64_t last_tsc;
1147 1153
1148 if (cpu_hascounter()) { 1154 if (cpu_hascounter()) {
1149 last_tsc = cpu_counter_serializing(); 1155 last_tsc = cpu_counter_serializing();
1150 i8254_delay(100000); 1156 i8254_delay(100000);
1151 ci->ci_data.cpu_cc_freq = 1157 ci->ci_data.cpu_cc_freq =
1152 (cpu_counter_serializing() - last_tsc) * 10; 1158 (cpu_counter_serializing() - last_tsc) * 10;
1153 } 1159 }
1154} 1160}
1155 1161
1156void 1162void
1157x86_cpu_idle_mwait(void) 1163x86_cpu_idle_mwait(void)
1158{ 1164{
1159 struct cpu_info *ci = curcpu(); 1165 struct cpu_info *ci = curcpu();
1160 1166
1161 KASSERT(ci->ci_ilevel == IPL_NONE); 1167 KASSERT(ci->ci_ilevel == IPL_NONE);
1162 1168
1163 x86_monitor(&ci->ci_want_resched, 0, 0); 1169 x86_monitor(&ci->ci_want_resched, 0, 0);
1164 if (__predict_false(ci->ci_want_resched)) { 1170 if (__predict_false(ci->ci_want_resched)) {
1165 return; 1171 return;
1166 } 1172 }
1167 x86_mwait(0, 0); 1173 x86_mwait(0, 0);
1168} 1174}
1169 1175
1170void 1176void
1171x86_cpu_idle_halt(void) 1177x86_cpu_idle_halt(void)
1172{ 1178{
1173 struct cpu_info *ci = curcpu(); 1179 struct cpu_info *ci = curcpu();
1174 1180
1175 KASSERT(ci->ci_ilevel == IPL_NONE); 1181 KASSERT(ci->ci_ilevel == IPL_NONE);
1176 1182
1177 x86_disable_intr(); 1183 x86_disable_intr();
1178 if (!__predict_false(ci->ci_want_resched)) { 1184 if (!__predict_false(ci->ci_want_resched)) {
1179 x86_stihlt(); 1185 x86_stihlt();
1180 } else { 1186 } else {
1181 x86_enable_intr(); 1187 x86_enable_intr();
1182 } 1188 }
1183} 1189}
1184 1190
1185/* 1191/*
1186 * Loads pmap for the current CPU. 1192 * Loads pmap for the current CPU.
1187 */ 1193 */
1188void 1194void
1189cpu_load_pmap(struct pmap *pmap) 1195cpu_load_pmap(struct pmap *pmap)
1190{ 1196{
1191#ifdef PAE 1197#ifdef PAE
1192 int i, s; 1198 int i, s;
1193 struct cpu_info *ci; 1199 struct cpu_info *ci;
1194 1200
1195 s = splvm(); /* just to be safe */ 1201 s = splvm(); /* just to be safe */
1196 ci = curcpu(); 1202 ci = curcpu();
1197 pd_entry_t *l3_pd = ci->ci_pae_l3_pdir; 1203 pd_entry_t *l3_pd = ci->ci_pae_l3_pdir;
1198 for (i = 0 ; i < PDP_SIZE; i++) { 1204 for (i = 0 ; i < PDP_SIZE; i++) {
1199 l3_pd[i] = pmap->pm_pdirpa[i] | PG_V; 1205 l3_pd[i] = pmap->pm_pdirpa[i] | PG_V;
1200 } 1206 }
1201 splx(s); 1207 splx(s);
1202 tlbflush(); 1208 tlbflush();
1203#else /* PAE */ 1209#else /* PAE */
1204 lcr3(pmap_pdirpa(pmap, 0)); 1210 lcr3(pmap_pdirpa(pmap, 0));
1205#endif /* PAE */ 1211#endif /* PAE */
1206} 1212}