Wed May 27 02:19:50 2009 UTC ()
- use _MAXNCPU instead of 4
- convert xpmsg_lock from a simplelock to a kmutex
- don't wait for sparc_noop IPI calls
- remove xmpsg_func's "retval" parameter and usage
- remove the IPI at high IPL message
- rework cpu_attach() a bunch, refactoring calls to getcpuinfo() and setting
  of cpi, and split most of the non-boot CPU handling into a new function
- make CPU_INFO_FOREACH() work whether modular or not
- move the MP cpu_info pages earlier
- move a few things in cpu.c around to colsolidate the MP code together
- remove useless if (cpus == NULL) tests -- cpus is an array now

with these changes, and an additional change to crazyintr() to not printf(),
i can get to single user shell on my SS20 again.  i can run a fwe commands
but some of them cause hangs.  "ps auxw" works, but "top -b" does not.

tested in UP LOCKDEBUG/DEBUG/DIAGNOSTIC kernel as well.
MP kernel with only cpu0 configured panics starting /sbin/init.
have not yet tested on a real UP machine.


(mrg)
diff -r1.214 -r1.215 src/sys/arch/sparc/sparc/cpu.c
diff -r1.77 -r1.78 src/sys/arch/sparc/sparc/cpuvar.h
diff -r1.103 -r1.104 src/sys/arch/sparc/sparc/intr.c
diff -r1.328 -r1.329 src/sys/arch/sparc/sparc/pmap.c

cvs diff -r1.214 -r1.215 src/sys/arch/sparc/sparc/cpu.c (expand / switch to context diff)
--- src/sys/arch/sparc/sparc/cpu.c 2009/05/18 01:36:11 1.214
+++ src/sys/arch/sparc/sparc/cpu.c 2009/05/27 02:19:49 1.215
@@ -1,4 +1,4 @@
-/*	$NetBSD: cpu.c,v 1.214 2009/05/18 01:36:11 mrg Exp $ */
+/*	$NetBSD: cpu.c,v 1.215 2009/05/27 02:19:49 mrg Exp $ */
 
 /*
  * Copyright (c) 1996
@@ -52,7 +52,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.214 2009/05/18 01:36:11 mrg Exp $");
+__KERNEL_RCSID(0, "$NetBSD: cpu.c,v 1.215 2009/05/27 02:19:49 mrg Exp $");
 
 #include "opt_multiprocessor.h"
 #include "opt_lockdebug.h"
@@ -102,10 +102,7 @@
 extern char machine_model[];
 
 int	sparc_ncpus;			/* # of CPUs detected by PROM */
-#ifdef MULTIPROCESSOR
-struct cpu_info *cpus[4];		/* we only support 4 CPUs. */
-u_int	cpu_ready_mask;			/* the set of CPUs marked as READY */
-#endif
+struct cpu_info *cpus[_MAXNCPU];	/* we only support 4 CPUs. */
 
 /* The CPU configuration driver. */
 static void cpu_mainbus_attach(struct device *, struct device *, void *);
@@ -136,44 +133,7 @@
 #define SRMMU_VERS(mmusr)	(((mmusr) >> 24) & 0xf)
 
 int bootmid;		/* Module ID of boot CPU */
-#if defined(MULTIPROCESSOR)
-void cpu_spinup(struct cpu_info *);
-static void init_cpuinfo(struct cpu_info *, int);
 
-int go_smp_cpus = 0;	/* non-primary CPUs wait for this to go */
-
-/* lock this to send IPI's */
-struct simplelock xpmsg_lock = SIMPLELOCK_INITIALIZER;
-
-static void
-init_cpuinfo(struct cpu_info *cpi, int node)
-{
-	vaddr_t intstack, va;
-
-	/*
-	 * Finish initialising this cpu_info.
-	 */
-	getcpuinfo(cpi, node);
-
-	/*
-	 * Arrange interrupt stack.  This cpu will also abuse the bottom
-	 * half of the interrupt stack before it gets to run its idle LWP.
-	 */
-	intstack = uvm_km_alloc(kernel_map, INT_STACK_SIZE, 0, UVM_KMF_WIRED);
-	if (intstack == 0)
-		panic("%s: no uspace/intstack", __func__);
-	cpi->eintstack = (void*)(intstack + INT_STACK_SIZE);
-
-	/* Allocate virtual space for pmap page_copy/page_zero */
-	va = uvm_km_alloc(kernel_map, 2*PAGE_SIZE, 0, UVM_KMF_VAONLY);
-	if (va == 0)
-		panic("%s: no virtual space", __func__);
-
-	cpi->vpage[0] = (void *)(va + 0);
-	cpi->vpage[1] = (void *)(va + PAGE_SIZE);
-}
-#endif /* MULTIPROCESSOR */
-
 #ifdef notdef
 /*
  * IU implementations are parceled out to vendors (with some slight
@@ -199,7 +159,22 @@
 };
 #endif
 
+#if defined(MULTIPROCESSOR)
+u_int	cpu_ready_mask;			/* the set of CPUs marked as READY */
+void cpu_spinup(struct cpu_info *);
+static void cpu_attach_non_boot(struct cpu_softc *, struct cpu_info *, int);
+
+int go_smp_cpus = 0;	/* non-primary CPUs wait for this to go */
+
 /*
+ * This must be locked around all message transactions to ensure only
+ * one CPU is generating them.
+ */
+static kmutex_t xpmsg_mutex;
+
+#endif /* MULTIPROCESSOR */
+
+/*
  * 4/110 comment: the 4/110 chops off the top 4 bits of an OBIO address.
  *	this confuses autoconf.  for example, if you try and map
  *	0xfe000000 in obio space on a 4/110 it actually maps 0x0e000000.
@@ -326,6 +301,7 @@
 static void
 cpu_attach(struct cpu_softc *sc, int node, int mid)
 {
+	char buf[100];
 	struct cpu_info *cpi;
 	int idx;
 	static int cpu_attach_count = 0;
@@ -335,69 +311,26 @@
 	 * (see autoconf.c and cpuunit.c)
 	 */
 	idx = cpu_attach_count++;
-	if (cpu_attach_count == 1) {
-		getcpuinfo(&cpuinfo, node);
 
-#if defined(MULTIPROCESSOR)
-		cpi = sc->sc_cpuinfo = cpus[idx];
-#else
-		/* The `local' VA is global for uniprocessor. */
-		cpi = sc->sc_cpuinfo = (struct cpu_info *)CPUINFO_VA;
-#endif
-		cpi->master = 1;
-		cpi->eintstack = eintstack;
-		/* Note: `curpcb' is set to `proc0' in locore */
-
-		/*
-		 * If we haven't been able to determine the Id of the
-		 * boot CPU, set it now. In this case we can only boot
-		 * from CPU #0 (see also the CPU attach code in autoconf.c)
-		 */
-		if (bootmid == 0)
-			bootmid = mid;
-	} else {
-#if defined(MULTIPROCESSOR)
-		int error;
-
-		/*
-		 * Initialise this cpu's cpu_info.
-		 */
-		cpi = sc->sc_cpuinfo = cpus[idx];
-		init_cpuinfo(cpi, node);
-
-		/*
-		 * Call the MI attach which creates an idle LWP for us.
-		 */
-		error = mi_cpu_attach(cpi);
-		if (error != 0) {
-			aprint_normal("\n");
-			aprint_error("%s: mi_cpu_attach failed with %d\n",
-			    sc->sc_dev.dv_xname, error);
-			return;
-		}
-
-		/*
-		 * Note: `eintstack' is set in init_cpuinfo() above.
-		 * The %wim register will be initialized in cpu_hatch().
-		 */
-		cpi->ci_curlwp = cpi->ci_data.cpu_idlelwp;
-		cpi->curpcb = (struct pcb *)cpi->ci_curlwp->l_addr;
-		cpi->curpcb->pcb_wim = 1;
-
-#else
-		sc->sc_cpuinfo = NULL;
+#if !defined(MULTIPROCESSOR)
+	if (cpu_attach_count > 1) {
 		printf(": no SMP support in kernel\n");
 		return;
-#endif
 	}
-
-#ifdef DEBUG
-	cpi->redzone = (void *)((long)cpi->eintstack + REDSIZE);
 #endif
 
+	/*
+	 * Initialise this cpu's cpu_info.
+	 */
+	cpi = sc->sc_cpuinfo = cpus[idx];
+	getcpuinfo(cpi, node);
+
 	cpi->ci_cpuid = idx;
 	cpi->mid = mid;
 	cpi->node = node;
+#ifdef DEBUG
+	cpi->redzone = (void *)((long)cpi->eintstack + REDSIZE);
+#endif
 
 	if (sparc_ncpus > 1) {
 		printf(": mid %d", mid);
@@ -405,24 +338,102 @@
 			printf(" [WARNING: mid should not be 0]");
 	}
 
+#if defined(MULTIPROCESSOR)
+	if (cpu_attach_count > 1) {
+		cpu_attach_non_boot(sc, cpi, node);
+		return;
+	}
+#endif /* MULTIPROCESSOR */
 
-	if (cpi->master) {
-		char buf[100];
+	/* Stuff to only run on the boot CPU */
+	cpu_setup();
+	snprintf(buf, sizeof buf, "%s @ %s MHz, %s FPU",
+		cpi->cpu_name, clockfreq(cpi->hz), cpi->fpu_name);
+	snprintf(cpu_model, sizeof cpu_model, "%s (%s)",
+		machine_model, buf);
+	printf(": %s\n", buf);
+	cache_print(sc);
 
-		cpu_setup();
-		snprintf(buf, sizeof buf, "%s @ %s MHz, %s FPU",
-			cpi->cpu_name, clockfreq(cpi->hz), cpi->fpu_name);
-		snprintf(cpu_model, sizeof cpu_model, "%s (%s)",
-			machine_model, buf);
-		printf(": %s\n", buf);
-		cache_print(sc);
+	cpi->master = 1;
+	cpi->eintstack = eintstack;
+
+	/*
+	 * If we haven't been able to determine the Id of the
+	 * boot CPU, set it now. In this case we can only boot
+	 * from CPU #0 (see also the CPU attach code in autoconf.c)
+	 */
+	if (bootmid == 0)
+		bootmid = mid;
+}
+
+/*
+ * Finish CPU attach.
+ * Must be run by the CPU which is being attached.
+ */
+void
+cpu_setup(void)
+{
+ 	if (cpuinfo.hotfix)
+		(*cpuinfo.hotfix)(&cpuinfo);
+
+	/* Initialize FPU */
+	fpu_init(&cpuinfo);
+
+	/* Enable the cache */
+	cpuinfo.cache_enable();
+
+	cpuinfo.flags |= CPUFLG_HATCHED;
+}
+
+#if defined(MULTIPROCESSOR)
+/*
+ * Perform most of the tasks needed for a non-boot CPU.
+ */
+static void
+cpu_attach_non_boot(struct cpu_softc *sc, struct cpu_info *cpi, int node)
+{
+	vaddr_t intstack, va;
+	int error;
+
+	/*
+	 * Arrange interrupt stack.  This cpu will also abuse the bottom
+	 * half of the interrupt stack before it gets to run its idle LWP.
+	 */
+	intstack = uvm_km_alloc(kernel_map, INT_STACK_SIZE, 0, UVM_KMF_WIRED);
+	if (intstack == 0)
+		panic("%s: no uspace/intstack", __func__);
+	cpi->eintstack = (void*)(intstack + INT_STACK_SIZE);
+
+	/* Allocate virtual space for pmap page_copy/page_zero */
+	va = uvm_km_alloc(kernel_map, 2*PAGE_SIZE, 0, UVM_KMF_VAONLY);
+	if (va == 0)
+		panic("%s: no virtual space", __func__);
+
+	cpi->vpage[0] = (void *)(va + 0);
+	cpi->vpage[1] = (void *)(va + PAGE_SIZE);
+
+	/*
+	 * Call the MI attach which creates an idle LWP for us.
+	 */
+	error = mi_cpu_attach(cpi);
+	if (error != 0) {
+		aprint_normal("\n");
+		aprint_error("%s: mi_cpu_attach failed with %d\n",
+		    sc->sc_dev.dv_xname, error);
 		return;
 	}
 
-#if defined(MULTIPROCESSOR)
+	/*
+	 * Note: `eintstack' is set in init_cpuinfo() above.
+	 * The %wim register will be initialized in cpu_hatch().
+	 */
+	cpi->ci_curlwp = cpi->ci_data.cpu_idlelwp;
+	cpi->curpcb = (struct pcb *)cpi->ci_curlwp->l_addr;
+	cpi->curpcb->pcb_wim = 1;
+
 	/* for now use the fixed virtual addresses setup in autoconf.c */
 	cpi->intreg_4m = (struct icr_pi *)
-		(PI_INTR_VA + (_MAXNBPG * CPU_MID2CPUNO(mid)));
+		(PI_INTR_VA + (_MAXNBPG * CPU_MID2CPUNO(cpi->mid)));
 
 	/* Now start this CPU */
 	cpu_spinup(cpi);
@@ -431,7 +442,10 @@
 
 	cache_print(sc);
 
-	if (sparc_ncpus > 1 && idx == sparc_ncpus-1) {
+	/*
+	 * Now we're on the last CPU to be attaching.
+	 */
+	if (sparc_ncpus > 1 && cpi->ci_cpuid == sparc_ncpus - 1) {
 		CPU_INFO_ITERATOR n;
 		/*
 		 * Install MP cache flush functions, unless the
@@ -446,10 +460,9 @@
 			SET_CACHE_FUNC(vcache_flush_context);
 		}
 	}
-#endif /* MULTIPROCESSOR */
+#undef SET_CACHE_FUNC
 }
 
-#if defined(MULTIPROCESSOR)
 /*
  * Start secondary processors in motion.
  */
@@ -479,41 +492,30 @@
 
 	printf("\n");
 }
-#endif /* MULTIPROCESSOR */
 
 /*
- * Finish CPU attach.
- * Must be run by the CPU which is being attached.
+ * Early initialisation, before main().
  */
 void
-cpu_setup(void)
+cpu_init_system(void)
 {
- 	if (cpuinfo.hotfix)
-		(*cpuinfo.hotfix)(&cpuinfo);
 
-	/* Initialize FPU */
-	fpu_init(&cpuinfo);
-
-	/* Enable the cache */
-	cpuinfo.cache_enable();
-
-	cpuinfo.flags |= CPUFLG_HATCHED;
+	mutex_init(&xpmsg_mutex, MUTEX_SPIN, IPL_VM);
 }
 
-#if defined(MULTIPROCESSOR)
-
-extern void cpu_hatch(void); /* in locore.s */
-
 /*
  * Allocate per-CPU data, then start up this CPU using PROM.
  */
 void
 cpu_spinup(struct cpu_info *cpi)
 {
+	extern void cpu_hatch(void); /* in locore.s */
 	struct openprom_addr oa;
-	void *pc = (void *)cpu_hatch;
+	void *pc;
 	int n;
 
+	pc = (void *)cpu_hatch;
+
 	/* Setup CPU-specific MMU tables */
 	pmap_alloc_cpu(cpi);
 
@@ -556,42 +558,20 @@
       u_int cpuset)
 {
 	struct cpu_info *cpi;
-	int s, n, i, done, callself, mybit;
+	int n, i, done, callself, mybit;
 	volatile struct xpmsg_func *p;
 	int fasttrap;
+	int is_noop = func == (xcall_func_t)sparc_noop;
 
-	/* XXX - note p->retval is probably no longer useful */
-
 	mybit = (1 << cpuinfo.ci_cpuid);
 	callself = func && (cpuset & mybit) != 0;
 	cpuset &= ~mybit;
 
-	/*
-	 * If no cpus are configured yet, just call ourselves.
-	 */
-	if (cpus == NULL) {
-		p = &cpuinfo.msg.u.xpmsg_func;
-		if (callself)
-			p->retval = (*func)(arg0, arg1, arg2);
-		return;
-	}
-
 	/* Mask any CPUs that are not ready */
 	cpuset &= cpu_ready_mask;
 
 	/* prevent interrupts that grab the kernel lock */
-	s = splsched();
-#ifdef DEBUG
-	if (!cold) {
-		u_int pc, lvl = ((u_int)s & PSR_PIL) >> 8;
-		if (lvl > IPL_SCHED) {
-			__asm("mov %%i7, %0" : "=r" (pc) : );
-			printf_nolog("%d: xcall at lvl %u from 0x%x\n",
-				cpu_number(), lvl, pc);
-		}
-	}
-#endif
-	LOCK_XPMSG();
+	mutex_spin_enter(&xpmsg_mutex);
 
 	/*
 	 * Firstly, call each CPU.  We do this so that they might have
@@ -621,14 +601,14 @@
 	 */
 	p = &cpuinfo.msg.u.xpmsg_func;
 	if (callself)
-		p->retval = (*func)(arg0, arg1, arg2);
+		(*func)(arg0, arg1, arg2);
 
 	/*
 	 * Lastly, start looping, waiting for all CPUs to register that they
 	 * have completed (bailing if it takes "too long", being loud about
 	 * this in the process).
 	 */
-	done = 0;
+	done = is_noop;
 	i = 100000;	/* time-out, not too long, but still an _AGE_ */
 	while (!done) {
 		if (--i < 0) {
@@ -654,8 +634,7 @@
 	if (i < 0)
 		printf_nolog("\n");
 
-	UNLOCK_XPMSG();
-	splx(s);
+	mutex_spin_exit(&xpmsg_mutex);
 }
 
 /*
@@ -667,9 +646,6 @@
 	CPU_INFO_ITERATOR n;
 	struct cpu_info *cpi;
 
-	if (cpus == NULL)
-		return;
-
 	for (CPU_INFO_FOREACH(n, cpi)) {
 		if (cpuinfo.mid == cpi->mid ||
 		    (cpi->flags & CPUFLG_HATCHED) == 0)
@@ -694,9 +670,6 @@
 	CPU_INFO_ITERATOR n;
 	struct cpu_info *cpi;
 
-	if (cpus == NULL)
-		return;
-
 	for (CPU_INFO_FOREACH(n, cpi)) {
 		if (cpuinfo.mid == cpi->mid ||
 		    (cpi->flags & CPUFLG_HATCHED) == 0)
@@ -720,9 +693,6 @@
 	CPU_INFO_ITERATOR n;
 	struct cpu_info *cpi;
 
-	if (cpus == NULL)
-		return;
-
 	for (CPU_INFO_FOREACH(n, cpi)) {
 		int r;
 
@@ -747,9 +717,6 @@
 	CPU_INFO_ITERATOR n;
 	struct cpu_info *cpi;
 
-	if (cpus == NULL)
-		return;
-
 	for (CPU_INFO_FOREACH(n, cpi)) {
 		if (cpi == NULL || cpi->mid == cpuinfo.mid ||
 		    (cpi->flags & CPUFLG_HATCHED) == 0)
@@ -765,9 +732,6 @@
 {
 	CPU_INFO_ITERATOR n;
 	struct cpu_info *cpi;
-
-	if (cpus == NULL)
-		return;
 
 	for (CPU_INFO_FOREACH(n, cpi)) {
 		if (cpi == NULL || cpuinfo.mid == cpi->mid ||

cvs diff -r1.77 -r1.78 src/sys/arch/sparc/sparc/cpuvar.h (expand / switch to context diff)
--- src/sys/arch/sparc/sparc/cpuvar.h 2009/05/18 01:36:11 1.77
+++ src/sys/arch/sparc/sparc/cpuvar.h 2009/05/27 02:19:49 1.78
@@ -1,4 +1,4 @@
-/*	$NetBSD: cpuvar.h,v 1.77 2009/05/18 01:36:11 mrg Exp $ */
+/*	$NetBSD: cpuvar.h,v 1.78 2009/05/27 02:19:49 mrg Exp $ */
 
 /*
  *  Copyright (c) 1996 The NetBSD Foundation, Inc.
@@ -37,6 +37,7 @@
 #include "opt_lockdebug.h"
 #include "opt_ddb.h"
 #include "opt_sparc_arch.h"
+#include "opt_modular.h"
 #endif
 
 #include <sys/device.h>
@@ -104,12 +105,11 @@
 		 * the trap window (see locore.s).
 		 */
 		struct xpmsg_func {
-			int	(*func)(int, int, int);
+			void	(*func)(int, int, int);
 			void	(*trap)(int, int, int);
 			int	arg0;
 			int	arg1;
 			int	arg2;
-			int	retval;
 		} xpmsg_func;
 	} u;
 	volatile int	received;
@@ -117,15 +117,6 @@
 };
 
 /*
- * This must be locked around all message transactions to ensure only
- * one CPU is generating them.
- */
-extern struct simplelock xpmsg_lock;
-
-#define LOCK_XPMSG()	simple_lock(&xpmsg_lock);
-#define UNLOCK_XPMSG()	simple_unlock(&xpmsg_lock);
-
-/*
  * The cpuinfo structure. This structure maintains information about one
  * currently installed CPU (there may be several of these if the machine
  * supports multiple CPUs, as on some Sun4m architectures). The information
@@ -415,10 +406,16 @@
 
 
 #define CPU_INFO_ITERATOR		int
-#ifdef MULTIPROCESSOR
-#define CPU_INFO_FOREACH(cii, cp)	cii = 0; cp = cpus[cii], cii < sparc_ncpus; cii++
+/*
+ * Provide two forms of CPU_INFO_FOREACH.  One fast one for non-modular
+ * non-SMP kernels, and the other for everyone else.  Both work in the
+ * non-SMP case, just involving an extra indirection through cpus[0] for
+ * the portable version.
+ */
+#if defined(MULTIPROCESSOR) || defined(MODULAR) || defined(_MODULE)
+#define	CPU_INFO_FOREACH(cii, cp)	cii = 0; (cp = cpus[cii]) && cp->eintstack && cii < sparc_ncpus; cii++
 #else
-#define	CPU_INFO_FOREACH(cii, cp)	(void)cii, cp = curcpu(); cp != NULL; cp = NULL
+#define CPU_INFO_FOREACH(cii, cp)	(void)cii, cp = curcpu(); cp != NULL; cp = NULL
 #endif
 
 /*
@@ -437,7 +434,8 @@
 #define	CPUSET_ALL	0xffffffffU	/* xcall to all configured CPUs */
 
 #if defined(MULTIPROCESSOR)
-typedef int (*xcall_func_t)(int, int, int);
+void cpu_init_system(void);
+typedef void (*xcall_func_t)(int, int, int);
 typedef void (*xcall_trap_t)(int, int, int);
 void xcall(xcall_func_t, xcall_trap_t, int, int, int, u_int);
 /* Shorthand */
@@ -472,8 +470,8 @@
 extern int bootmid;			/* Module ID of boot CPU */
 #define CPU_MID2CPUNO(mid)		((mid) != 0 ? (mid) - 8 : 0)
 
-#ifdef MULTIPROCESSOR
 extern struct cpu_info *cpus[];
+#ifdef MULTIPROCESSOR
 extern u_int cpu_ready_mask;		/* the set of CPUs marked as READY */
 #endif
 

cvs diff -r1.103 -r1.104 src/sys/arch/sparc/sparc/intr.c (expand / switch to context diff)
--- src/sys/arch/sparc/sparc/intr.c 2009/05/18 00:25:15 1.103
+++ src/sys/arch/sparc/sparc/intr.c 2009/05/27 02:19:50 1.104
@@ -1,4 +1,4 @@
-/*	$NetBSD: intr.c,v 1.103 2009/05/18 00:25:15 mrg Exp $ */
+/*	$NetBSD: intr.c,v 1.104 2009/05/27 02:19:50 mrg Exp $ */
 
 /*
  * Copyright (c) 1992, 1993
@@ -41,7 +41,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: intr.c,v 1.103 2009/05/18 00:25:15 mrg Exp $");
+__KERNEL_RCSID(0, "$NetBSD: intr.c,v 1.104 2009/05/27 02:19:50 mrg Exp $");
 
 #include "opt_multiprocessor.h"
 #include "opt_sparc_arch.h"
@@ -344,7 +344,7 @@
 		volatile struct xpmsg_func *p = &cpuinfo.msg.u.xpmsg_func;
 
 		if (p->func)
-			p->retval = (*p->func)(p->arg0, p->arg1, p->arg2);
+			(*p->func)(p->arg0, p->arg1, p->arg2);
 		break;
 	    }
 	}

cvs diff -r1.328 -r1.329 src/sys/arch/sparc/sparc/pmap.c (expand / switch to context diff)
--- src/sys/arch/sparc/sparc/pmap.c 2009/05/18 02:28:35 1.328
+++ src/sys/arch/sparc/sparc/pmap.c 2009/05/27 02:19:50 1.329
@@ -1,4 +1,4 @@
-/*	$NetBSD: pmap.c,v 1.328 2009/05/18 02:28:35 mrg Exp $ */
+/*	$NetBSD: pmap.c,v 1.329 2009/05/27 02:19:50 mrg Exp $ */
 
 /*
  * Copyright (c) 1996
@@ -56,7 +56,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.328 2009/05/18 02:28:35 mrg Exp $");
+__KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.329 2009/05/27 02:19:50 mrg Exp $");
 
 #include "opt_ddb.h"
 #include "opt_kgdb.h"
@@ -3502,7 +3502,24 @@
 	 */
 	p = (vaddr_t)top;
 
+#if defined(MULTIPROCESSOR)
 	/*
+	 * allocate the rest of the cpu_info{} area.  note we waste the
+	 * first one to get a VA space.
+	 */
+	cpuinfo_len = ((sizeof(struct cpu_info) + NBPG - 1) & ~PGOFSET);
+	if (sparc_ncpus > 1) {
+		p = (p + NBPG - 1) & ~PGOFSET;
+		cpuinfo_data = (uint8_t *)p;
+		p += (cpuinfo_len * sparc_ncpus);
+
+		/* XXX we waste the first one */
+		memset(cpuinfo_data + cpuinfo_len, 0, cpuinfo_len * (sparc_ncpus - 1));
+	} else
+		cpuinfo_data = (uint8_t *)CPUINFO_VA;
+#endif
+
+	/*
 	 * Intialize the kernel pmap.
 	 */
 	/* kernel_pmap_store.pm_ctxnum = 0; */
@@ -3539,23 +3556,7 @@
 	p += ncontext * sizeof *ci;
 	memset((void *)ci, 0, (u_int)p - (u_int)ci);
 
-#if defined(MULTIPROCESSOR)
 	/*
-	 * allocate the rest of the cpu_info{} area.  note we waste the
-	 * first one to get a VA space.
-	 */
-	p = (p + NBPG - 1) & ~PGOFSET;
-	cpuinfo_data = (uint8_t *)p;
-	cpuinfo_len = ((sizeof(struct cpu_info) + NBPG - 1) & ~PGOFSET);
-	p += (cpuinfo_len * sparc_ncpus);
-	prom_printf("extra cpus: %p, p: %p, gap start: %p, gap end: %p\n",
-	    cpuinfo_data, p, etext_gap_start, etext_gap_end);
-
-	/* XXX we waste the first one */
-	memset(cpuinfo_data + cpuinfo_len, 0, cpuinfo_len * (sparc_ncpus - 1));
-#endif
-
-	/*
 	 * Set up the `constants' for the call to vm_init()
 	 * in main().  All pages beginning at p (rounded up to
 	 * the next whole page) and continuing through the number
@@ -3799,16 +3800,23 @@
 
 #ifdef MULTIPROCESSOR
 	/*
+	 * Initialise any cpu-specific data now.
+	 */
+	cpu_init_system();
+
+	/*
 	 * Remap cpu0 from CPUINFO_VA to the new correct value, wasting the
-	 * backing pages we allocated above XXX.
+	 * backing page we allocated above XXX.
 	 */
 	for (off = 0, va = (vaddr_t)cpuinfo_data;
-	     off < sizeof(struct cpu_info);
+	     sparc_ncpus > 1 && off < sizeof(struct cpu_info);
 	     va += NBPG, off += NBPG) {
 		paddr_t pa = PMAP_BOOTSTRAP_VA2PA(CPUINFO_VA + off);
 		prom_printf("going to pmap_kenter_pa(va=%p, pa=%p)\n", va, pa);
 		pmap_kremove(va, NBPG);
 		pmap_kenter_pa(va, pa, VM_PROT_READ | VM_PROT_WRITE);
+		cache_flush_page(va, 0);
+		cache_flush_page(CPUINFO_VA, 0);
 	}
 
 	/*
@@ -3820,6 +3828,8 @@
 		cpus[i]->ci_self = cpus[i];
 		prom_printf("set cpu%d ci_self address: %p\n", i, cpus[i]);
 	}
+#else
+	cpus[0] = (struct cpu_info *)CPUINFO_VA;
 #endif
 
 	pmap_update(pmap_kernel());