Fri Feb 21 22:22:48 2014 UTC ()
Rework PIC method to be simplier.  Change be more cortex neutral.


(matt)
diff -r1.12 -r1.13 src/sys/arch/arm/cortex/a9_mpsubr.S
diff -r1.10 -r1.11 src/sys/arch/evbarm/bcm53xx/bcm53xx_start.S
diff -r1.5 -r1.6 src/sys/arch/evbarm/cubie/cubie_start.S

cvs diff -r1.12 -r1.13 src/sys/arch/arm/cortex/Attic/a9_mpsubr.S (expand / switch to context diff)
--- src/sys/arch/arm/cortex/Attic/a9_mpsubr.S 2014/01/24 05:14:11 1.12
+++ src/sys/arch/arm/cortex/Attic/a9_mpsubr.S 2014/02/21 22:22:48 1.13
@@ -1,4 +1,4 @@
-/*	$NetBSD: a9_mpsubr.S,v 1.12 2014/01/24 05:14:11 matt Exp $	*/
+/*	$NetBSD: a9_mpsubr.S,v 1.13 2014/02/21 22:22:48 matt Exp $	*/
 /*-
  * Copyright (c) 2012 The NetBSD Foundation, Inc.
  * All rights reserved.
@@ -111,27 +111,34 @@
 
 #if defined(CPU_CORTEXA8)
 #undef CPU_CONTROL_SWP_ENABLE		// not present on A8
-#define	CPU_CONTROL_SWP_ENABLE		0
+#define CPU_CONTROL_SWP_ENABLE		0
 #endif
 #ifdef __ARMEL__
-#undef CPU_CONTROL_EX_BEND		// needs to clear on LE systems
-#define	CPU_CONTROL_EX_BEND		0
+#define CPU_CONTROL_EX_BEND_SET		0
+#else
+#define CPU_CONTROL_EX_BEND_SET		CPU_CONTROL_EX_BEND
 #endif
 #ifdef ARM32_DISABLE_ALIGNMENT_FAULTS
-#undef CPU_CONTROL_AFLT_ENABLE
-#define CPU_CONTROL_AFLT_ENABLE		0
+#define CPU_CONTROL_AFLT_ENABLE_CLR	CPU_CONTROL_AFLT_ENABLE
+#define CPU_CONTROL_AFLT_ENABLE_SET	0
+#else
+#deifne CPU_CONTROL_AFLT_ENABLE_CLR	0
+#define CPU_CONTROL_AFLT_ENABLE_SET	CPU_CONTROL_AFLT_ENABLE
 #endif
 
-#define	CPU_CONTROL_SET \
+#define CPU_CONTROL_SET \
 	(CPU_CONTROL_MMU_ENABLE		|	\
-	 CPU_CONTROL_AFLT_ENABLE	|	\
-	 CPU_CONTROL_EX_BEND		|	\
+	 CPU_CONTROL_AFLT_ENABLE_SET	|	\
 	 CPU_CONTROL_DC_ENABLE		|	\
 	 CPU_CONTROL_SWP_ENABLE		|	\
 	 CPU_CONTROL_BPRD_ENABLE	|	\
 	 CPU_CONTROL_IC_ENABLE		|	\
+	 CPU_CONTROL_EX_BEND_SET	|	\
 	 CPU_CONTROL_UNAL_ENABLE)
 
+#define CPU_CONTROL_CLR \
+	(CPU_CONTROL_AFLT_ENABLE_CLR)
+
 arm_cpuinit:
 	/*
 	 * In theory, because the MMU is off, we shouldn't need all of this,
@@ -140,41 +147,57 @@
 	 */
 	mov	ip, lr
 	mov	r10, r0
+	mov	r1, #0
 
-	mcr     p15, 0, r10, c7, c5, 0	/* invalidate I cache */
+	mcr     p15, 0, r1, c7, c5, 0	// invalidate I cache
 
-	mrc	p15, 0, r2, c1, c0, 0	/*  "       "   "     */
-	bic	r2, r2, #CPU_CONTROL_DC_ENABLE	@ clear data cache enable
-	bic	r2, r2, #CPU_CONTROL_IC_ENABLE	@ clear instruction cache enable
-	mcr	p15, 0, r2, c1, c0, 0	/*  "       "   "     */
+	mrc	p15, 0, r2, c1, c0, 0	// read SCTRL
+	movw	r1, #(CPU_CONTROL_DC_ENABLE|CPU_CONTROL_IC_ENABLE)
+	bic	r2, r2, r1		// clear I+D cache enable
 
+#ifdef __ARMEB__
+	/*
+	 * SCTRL.EE determines the endianness of translation table lookups.
+	 * So we need to make sure it's set before starting to use the new
+	 * translation tables (which are big endian).
+	 */
+	orr	r2, r2, #CPU_CONTROL_EX_BEND
+	bic	r2, r2, #CPU_CONTROL_MMU_ENABLE
+	pli	[pc, #32]		/* preload the next few cachelines */
+	pli	[pc, #64]
+	pli	[pc, #96]
+	pli	[pc, #128]
+#endif
+
+	mcr	p15, 0, r2, c1, c0, 0	/* write SCTRL */
+
 	XPUTC(#70)
-	mov	r1, #0
 	dsb				/* Drain the write buffers. */
-
+1:
 	XPUTC(#71)
-	mrc	p15, 0, r2, c0, c0, 5	/* get MPIDR */
-	cmp	r2, #0
+	mrc	p15, 0, r1, c0, c0, 5	/* get MPIDR */
+	cmp	r1, #0
 	orrlt	r10, r10, #0x5b		/* MP, cachable (Normal WB) */
 	orrge	r10, r10, #0x1b		/* Non-MP, cacheable, normal WB */
 	mcr	p15, 0, r10, c2, c0, 0	/* Set Translation Table Base */
 
-	XPUTC(#49)
+	XPUTC(#72)
+	mov	r1, #0
 	mcr	p15, 0, r1, c2, c0, 2	/* Set Translation Table Control */
 
-	XPUTC(#72)
+	XPUTC(#73)
 	mov	r1, #0
 	mcr	p15, 0, r1, c8, c7, 0	/* Invalidate TLBs */
 
 	/* Set the Domain Access register.  Very important! */
-	XPUTC(#73)
+	XPUTC(#74)
 	mov     r1, #((DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL*2)) | DOMAIN_CLIENT)
 	mcr	p15, 0, r1, c3, c0, 0
 
 	/*
 	 * Enable the MMU, etc.
 	 */
-	XPUTC(#74)
+	XPUTC(#75)
 	mrc	p15, 0, r0, c1, c0, 0
 
 	movw	r3, #:lower16:CPU_CONTROL_SET
@@ -182,19 +205,22 @@
 	movt	r3, #:upper16:CPU_CONTROL_SET
 #endif
 	orr	r0, r0, r3
+#if defined(CPU_CONTROL_CLR) && (CPU_CONTROL_CLR != 0)
+	bic	r0, r0, #CPU_CONTROL_CLR
+#endif
+	pli	1f
 	
 	dsb
-	.align 5
 	@ turn mmu on!
-	mov	r0, r0
-	mcr	p15, 0, r0, c1, c0, 0
+	mov	r0, r0			/* fetch instruction cacheline */
+1:	mcr	p15, 0, r0, c1, c0, 0
 
 	/*
 	 * Ensure that the coprocessor has finished turning on the MMU.
 	 */
 	mrc	p15, 0, r0, c0, c0, 0	/* Read an arbitrary value. */
 	mov	r0, r0			/* Stall until read completes. */
-	XPUTC(#76)
+1:	XPUTC(#76)
 
 	bx	ip			/* return */
 
@@ -207,14 +233,17 @@
 	/* bits to set in the Control Register */
 
 #if defined(VERBOSE_INIT_ARM) && XPUTC_COM
-#define	TIMO	0x25000
+#define TIMO		0x25000
 #ifndef COM_MULT
-#define	COM_MULT	1
+#define COM_MULT	1
 #endif
 xputc:
 #ifdef MULTIPROCESSOR
+	adr	r3, xputc
+	movw	r2, #:lower16:comlock
+	movt	r2, #:upper16:comlock
+	bfi	r3, r2, #0, #28
 	mov	r2, #1
-	ldr	r3, .Lcomlock
 10:
 	ldrex	r1, [r3]
 	cmp	r1, #0
@@ -226,7 +255,13 @@
 #endif
 
 	mov	r2, #TIMO
-	ldr	r3, .Luart0
+#ifdef CONADDR
+	movw	r3, #:lower16:CONADDR
+	movt	r3, #:upper16:CONADDR
+#elif defined(CONSADDR)
+	movw	r3, #:lower16:CONSADDR
+	movt	r3, #:upper16:CONSADDR
+#endif
 1:
 #if COM_MULT == 1
 	ldrb	r1, [r3, #(COM_LSR*COM_MULT)]
@@ -278,42 +313,40 @@
 	bne	3b
 4:
 #ifdef MULTIPROCESSOR
-	ldr	r3, .Lcomlock
+	adr	r3, xputc
+	movw	r2, #:lower16:comlock
+	movt	r2, #:upper16:comlock
+	bfi	r3, r2, #0, #28
 	mov	r0, #0
 	str	r0, [r3]
 	dsb
 #endif
 	bx	lr
 
-.Luart0:
-#ifdef CONADDR
-	.word	CONADDR
-#elif defined(CONSADDR)
-	.word	CONSADDR
-#endif
-
 #ifdef MULTIPROCESSOR
-.Lcomlock:
-	.word	comlock
-
 	.pushsection .data
 comlock:
-	.p2align 2
+	.p2align 4
 	.word	0		@ not in bss
+	.p2align 4
 
 	.popsection
 #endif /* MULTIPROCESSOR */
 #endif /* VERBOSE_INIT_ARM */
 
-#ifdef CPU_CORTEXA9
-a9_start:
+cortex_init:
 	mov	r10, lr				@ save lr
 
 	cpsid	if, #PSR_SVC32_MODE
 
 	XPUTC(#64)
-	bl	_C_LABEL(armv7_icache_inv_all)	@ invalidate i-cache
+	adr	ip, cortex_init
+	movw	r0, #:lower16:_C_LABEL(armv7_icache_inv_all)
+	movt	r0, #:upper16:_C_LABEL(armv7_icache_inv_all)
+	bfi	ip, r0, #0, #28
+	blx	ip				@ toss i-cache
 
+#ifdef CPU_CORTEXA9
 	/*
 	 * Step 1a, invalidate the all cache tags in all ways on the SCU.
 	 */
@@ -327,14 +360,20 @@
 	str	r1, [r3, #SCU_INV_ALL_REG]	@ write scu invalidate all
 	dsb
 	isb
+#endif
 
 	/*
 	 * Step 1b, invalidate the data cache
 	 */
 	XPUTC(#66)
-	bl	_C_LABEL(armv7_dcache_wbinv_all)	@ writeback/invalidate d-cache
+	adr	ip, cortex_init
+	movw	r0, #:lower16:_C_LABEL(armv7_dcache_wbinv_all)
+	movt	r0, #:upper16:_C_LABEL(armv7_dcache_wbinv_all)
+	bfi	ip, r0, #0, #28
+	blx	ip				@ writeback & toss d-cache
 	XPUTC(#67)
 
+#ifdef CPU_CORTEXA9
 	/*
 	 * Step 2, disable the data cache
 	 */
@@ -362,35 +401,59 @@
 	mcr	p15, 0, r2, c1, c0, 0		@ reenable caches
 	isb
 	XPUTC(#51)
+#endif
 
 #ifdef MULTIPROCESSOR
 	/*
-	 * Step 4b, set ACTLR.SMP=1 (and ACTRL.FX=1)
+	 * Step 4b, set ACTLR.SMP=1 (and on A9, ACTRL.FX=1)
 	 */
 	mrc	p15, 0, r0, c1, c0, 1		@ read aux ctl
 	orr	r0, r0, #CORTEXA9_AUXCTL_SMP	@ enable SMP
 	mcr	p15, 0, r0, c1, c0, 1		@ write aux ctl
 	isb
+#ifdef CPU_CORTEXA9
 	orr	r0, r0, #CORTEXA9_AUXCTL_FW	@ enable cache/tlb/coherency
 	mcr	p15, 0, r0, c1, c0, 1		@ write aux ctl
 	isb
-	XPUTC(#52)
 #endif
+	XPUTC(#52)
+#endif /* MULTIPROCESSOR */
 
 	bx	r10
-ASEND(a9_start)
+ASEND(cortex_init)
 
 /*
  * Secondary processors come here after exiting the SKU ROM.
+ * Running native endian until we have SMP enabled.  Since no data
+ * is accessed, that shouldn't be a problem.
  */
-a9_mpstart:
-#ifdef MULTIPROCESSOR
+cortex_mpstart:
+	cpsid	if, #PSR_SVC32_MODE		@ make sure we are in SVC mode
+        mrs	r0, cpsr			@ fetch CPSR value
+        msr	spsr_sxc, r0			@ set SPSR[23:8] to known value
+
+#ifndef MULTIPROCESSOR
 	/*
+	 * If not MULTIPROCESSOR, drop CPU into power saving state.
+	 */
+3:	wfe
+	b	3b
+#else
+	/*
 	 * Step 1, invalidate the caches
 	 */
-	bl	_C_LABEL(armv7_icache_inv_all)	@ toss i-cache
-	bl	_C_LABEL(armv7_dcache_inv_all)	@ toss d-cache
+	adr	ip, cortex_mpstart
+	movw	r0, #:lower16:_C_LABEL(armv7_icache_inv_all)
+	movt	r0, #:upper16:_C_LABEL(armv7_icache_inv_all)
+	bfi	ip, r0, #0, #28
+	blx	ip				@ toss i-cache
+	adr	ip, cortex_mpstart
+	movw	ip, #:lower16:_C_LABEL(armv7_dcache_inv_all)
+	movt	ip, #:upper16:_C_LABEL(armv7_dcache_inv_all)
+	bfi	ip, r0, #0, #28
+	blx	ip				@ toss d-cache
 
+#if defined(CPU_CORTEXA9)
 	/*
 	 * Step 2, wait for the SCU to be enabled
 	 */
@@ -398,6 +461,7 @@
 1:	ldr	r0, [r3, #SCU_CTL]		@ read scu control
 	tst	r0, #SCU_CTL_SCU_ENA		@ enable bit set yet?
 	bne	1b				@ try again
+#endif
 
 	/*
 	 * Step 3, set ACTLR.SMP=1 (and ACTRL.FX=1)
@@ -406,9 +470,11 @@
 	orr	r0, #CORTEXA9_AUXCTL_SMP	@ enable SMP
 	mcr	p15, 0, r0, c1, c0, 1		@ write aux ctl
 	mov	r0, r0
+#if defined(CPU_CORTEXA9)
 	orr	r0, #CORTEXA9_AUXCTL_FW		@ enable cache/tlb/coherency
 	mcr	p15, 0, r0, c1, c0, 1		@ write aux ctl
 	mov	r0, r0
+#endif
 
 	/*
 	 * We should be in SMP mode now.
@@ -416,56 +482,86 @@
 	mrc	p15, 0, r4, c0, c0, 5		@ get MPIDR
 	and	r4, r4, #7			@ get our cpu numder
 
+#ifdef __ARMEB__
+	setend	be				@ switch to BE now
+#endif
+
 #if defined(VERBOSE_INIT_ARM)
 	add	r0, r4, #48
 	bl	xputc
 #endif
 
-	ldr	r0, .Lcpu_hatched		@ now show we've hatched
+	/*
+	 * To access things are not in .start, we need to replace the upper
+	 * 4 bits of the address with where we are current executing.
+	 */
+	adr	r10, cortex_mpstart
+	lsr	r10, r10, #28
+
+	movw	r0, #:lower16:_C_LABEL(arm_cpu_hatched)
+	movt	r0, #:upper16:_C_LABEL(arm_cpu_hatched)
+	bfi	r0, r10, #28, #4		// replace top 4 bits
+	add	r0, r0, r10
 	mov	r5, #1
 	lsl	r5, r5, r4
-	mov	r1, r5
-	bl	_C_LABEL(atomic_or_32)
+	/*
+	 * We inline the atomic_or_32 call since we might be in a different
+	 * area of memory.
+	 */
+2:	ldrex	r1, [r0]
+	orr	r1, r1, r5
+	strex	r2, r1, [r0]
+	cmp	r2, #0
+	bne	2b
 
 	XPUTC(#97)
-#endif
 
-	cpsid	if, #PSR_SVC32_MODE		@ make sure we are in SVC mode
-
 	/* Now we will wait for someone tell this cpu to start running */
-#ifdef MULTIPROCESSOR
-	ldr	r0, .Lcpu_mbox
-#else
-	cmp	r0, r0
-#endif
-2:
-#ifdef MULTIPROCESSOR
-	dmb
+	movw	r0, #:lower16:_C_LABEL(arm_cpu_mbox)
+	movt	r0, #:upper16:_C_LABEL(arm_cpu_mbox)
+	bfi	r0, r10, #28, #4
+	add	r0, r0, r10
+3:	dmb
 	ldr	r2, [r0]
 	tst	r2, r5
-#endif
-	@wfeeq
-	beq	2b
+	wfeeq
+	beq	3b
 
-#ifdef MULTIPROCESSOR
-3:	XPUTC(#98)
-	ldr	r0, .Lcpu_marker
+	XPUTC(#98)
+	movw	r0, #:lower16:_C_LABEL(arm_cpu_marker)
+	movt	r0, #:upper16:_C_LABEL(arm_cpu_marker)
+	bfi	r0, r10, #28, #4
 	str	pc, [r0]
 
-	ldr	r0, .Lkernel_l1pt		/* get address of l1pt pvaddr */
+	movw	r0, #:lower16:_C_LABEL(kernel_l1pt)
+	movt	r0, #:upper16:_C_LABEL(kernel_l1pt)
+	bfi	r0, r10, #28, #4		/* get address of l1pt pvaddr */
 	ldr	r0, [r0, #PV_PA]		/* Now get the phys addr */
-	bl	cpu_init
+	/*
+	 * After we turn on the MMU, we will no longer in .start so setup
+	 * return to rest of MP startup code in .text.
+	 */
+	movw	lr, #:lower16:cortex_mpcontinuation
+	movt	lr, #:upper16:cortex_mpcontinuation
+	b	arm_cpuinit
+#endif /* MULTIPROCESSOR */
+ASEND(cortex_mpstart)
 
-	ldr	r0, .Lcpu_marker
-	str	pc, [r0]
-
+#ifdef MULTIPROCESSOR
+	.pushsection .text
+cortex_mpcontinuation:
 	/* MMU, L1, are now on. */
 
-	ldr	r0, .Lcpu_info			/* get pointer to cpu_infos */
+	movw	r0, #:lower16:_C_LABEL(arm_cpu_marker)
+	movt	r0, #:upper16:_C_LABEL(arm_cpu_marker)
+	str	pc, [r0]
+
+	movw	r0, #:lower16:cpu_info
+	movt	r0, #:upper16:cpu_info		/* get pointer to cpu_infos */
 	ldr	r5, [r0, r4, lsl #2]		/* load our cpu_info */
 	ldr	r6, [r5, #CI_IDLELWP]		/* get the idlelwp */
 	ldr	r7, [r6, #L_PCB]		/* now get its pcb */
-	ldr	sp, [r7, #PCB_SP]		/* finally, we can load our SP */
+	ldr	sp, [r7, #PCB_KSP]		/* finally, we can load our SP */
 #ifdef TPIDRPRW_IS_CURCPU
 	mcr	p15, 0, r5, c13, c0, 4		/* squirrel away curcpu() */
 #elif defined(TPIDRPRW_IS_CURLWP)
@@ -475,30 +571,15 @@
 #endif
 	str	r6, [r5, #CI_CURLWP]		/* and note we are running on it */
 
-	ldr	r0, .Lcpu_marker
-	str	pc, [r0]
+	str	pc, [r0]			// r0 still have arm_cpu_marker
 
-	mov	r0, r5				/* pass cpu_info */
-	mov	r1, r4				/* pass cpu_id */
-	ldr	r2, .Lbcm53xx_cpu_hatch		/* pass md_cpu_hatch */
+	mov	r0, r5				// pass cpu_info
+	mov	r1, r4				// pass cpu_id
+	movw	r2, #:lower16:MD_CPU_HATCH	// pass md_cpu_hatch
+	movt	r2, #:upper16:MD_CPU_HATCH	// pass md_cpu_hatch
 	bl	_C_LABEL(cpu_hatch)
 	b	_C_LABEL(idle_loop)
-ASEND(a9_mpstart)
+ASEND(cortex_mpcontinuation)
 	/* NOT REACHED */
-
-.Lkernel_l1pt:
-	.word	_C_LABEL(kernel_l1pt)
-.Lcpu_info:
-	.word	_C_LABEL(cpu_info)
-.Lcpu_max:
-	.word	_C_LABEL(arm_cpu_max)
-.Lcpu_hatched:
-	.word	_C_LABEL(arm_cpu_hatched)
-.Lcpu_mbox:
-	.word	_C_LABEL(arm_cpu_mbox)
-.Lcpu_marker:
-	.word	_C_LABEL(arm_cpu_marker)
-.Lbcm53xx_cpu_hatch:
-	.word	_C_LABEL(bcm53xx_cpu_hatch)
+	.popsection
 #endif /* MULTIPROCESSOR */
-#endif /* CPU_CORTEXA9 */

cvs diff -r1.10 -r1.11 src/sys/arch/evbarm/bcm53xx/Attic/bcm53xx_start.S (expand / switch to context diff)
--- src/sys/arch/evbarm/bcm53xx/Attic/bcm53xx_start.S 2014/01/24 04:15:33 1.10
+++ src/sys/arch/evbarm/bcm53xx/Attic/bcm53xx_start.S 2014/02/21 22:22:48 1.11
@@ -1,4 +1,4 @@
-/*	$NetBSD: bcm53xx_start.S,v 1.10 2014/01/24 04:15:33 matt Exp $	*/
+/*	$NetBSD: bcm53xx_start.S,v 1.11 2014/02/21 22:22:48 matt Exp $	*/
 /*-
  * Copyright (c) 2012 The NetBSD Foundation, Inc.
  * All rights reserved.
@@ -45,7 +45,7 @@
 #define	CONADDR		0x18000300
 #endif
 
-RCSID("$NetBSD: bcm53xx_start.S,v 1.10 2014/01/24 04:15:33 matt Exp $")
+RCSID("$NetBSD: bcm53xx_start.S,v 1.11 2014/02/21 22:22:48 matt Exp $")
 
 #undef VERBOSE_INIT_ARM
 #define VERBOSE_INIT_ARM
@@ -74,10 +74,10 @@
 	/*
 	 * Save any arguments u-boot passed us.
 	 */
-	movw	r4, #:lower16:(uboot_args-.LPIC0)
-	movt	r4, #:upper16:(uboot_args-.LPIC0)
-	bic	r4, r4, #0xf0000000
-	add	r4, r4, pc
+	adr	r4, _C_LABEL(bcm53xx_start)
+	movw	r5, #:lower16:uboot_args
+	movt	r5, #:upper16:uboot_args
+	bfi	r4, r5, #0, #28
 	stmia	r4, {r0-r3}
 .LPIC0:
 
@@ -89,16 +89,18 @@
 	str	r1, [r0]
 
 	/*
-	 * Cal the initial start code for the a9
+	 * Cal the initial start code for Cortex cores
 	 */
-	bl	a9_start
+	bl	cortex_init
 
 	/*
 	 * Set up a preliminary mapping in the MMU to allow us to run
 	 * at KERNEL_BASE with caches on.
 	 */
+	movw	r1, #:lower16:(mmu_init_table-.LPIC1)
+	add	r1, r1, pc
+.LPIC1:
 	ldr	r0, .Ltemp_l1_table	/* The L1PT address - entered into TTB later */
-	adr	r1, mmu_init_table
 	bl	arm_boot_l1pt_init
 
 	XPUTC(#68)
@@ -108,8 +110,9 @@
 	 * SKU ROM but setting the magic LUT address to our own mp_start
 	 * routine. 
 	 */
-	ldr	r1, .Lsku_rom_lut
-	adr	r2, a9_mpstart
+	movw	r1, #:lower16:0xffff0400
+	movt	r1, #:upper16:0xffff0400
+	adr	r2, cortex_mpstart
 	str	r2, [r1]
 	sev				/* wake up the others */
 
@@ -123,15 +126,13 @@
 
 	XPUTC(#89)
 
+	adr	r1, bcm53xx_start
 	movw	r0, #:lower16:uboot_args
 	movt	r0, #:upper16:uboot_args
+	bfi	r1, r0, #0, #28
 	ldr	r2, [r0]
-	movw	r1, #:lower16:(uboot_args-.LPIC1)
-	movt	r1, #:upper16:(uboot_args-.LPIC1)
-	add	r1, r1, pc
-	ldr	r1, [r1]
-.LPIC1:
-	cmp	r1, r2
+	ldr	r3, [r1]
+	cmp	r1, r3
 1:	bne	1b
 
 	XPUTC(#90)
@@ -158,9 +159,6 @@
 	nop
 
 	/* NOTREACHED */
-
-.Lsku_rom_lut:
-	.word	0xffff0400
 
 .Lcca_wdog:
 	.word	0x18000080

cvs diff -r1.5 -r1.6 src/sys/arch/evbarm/cubie/Attic/cubie_start.S (expand / switch to context diff)
--- src/sys/arch/evbarm/cubie/Attic/cubie_start.S 2014/01/24 05:13:06 1.5
+++ src/sys/arch/evbarm/cubie/Attic/cubie_start.S 2014/02/21 22:22:48 1.6
@@ -40,7 +40,7 @@
 #include <arm/allwinner/awin_reg.h>
 #include <evbarm/cubie/platform.h>  
 
-RCSID("$NetBSD: cubie_start.S,v 1.5 2014/01/24 05:13:06 matt Exp $")
+RCSID("$NetBSD: cubie_start.S,v 1.6 2014/02/21 22:22:48 matt Exp $")
 
 #if defined(VERBOSE_INIT_ARM)
 #define	XPUTC(n)	mov r0, n; bl xputc
@@ -56,6 +56,8 @@
 #define INIT_MEMSIZE	128
 #define	TEMP_L1_TABLE	(AWIN_SDRAM_PBASE + INIT_MEMSIZE * 0x100000 - L1_TABLE_SIZE)
 
+#define	MD_CPU_HATCH	_C_LABEL(awin_cpu_hatch)
+
 /*
  * Kernel start routine for BEAGLEBOARD boards.
  * At this point, this code has been loaded into SDRAM
@@ -73,26 +75,38 @@
 	cpsid	if, #PSR_SVC32_MODE
 
 	/*
-	 * Save any arguments passed to us (do it PIC).
+	 * Save any arguments passed to us.  But since .start is at 0x40000000
+	 * and .text is at 0x8000000, we can't directly use the address that
+	 * the linker gave us directly.  We have to replace the upper 4 bits
+	 * of the address the linker gave us and replace it with the upper 4
+	 * bits of our pc.  Or replace the lower 28 bits of our PC with the
+	 * lower 28 bits of what the linker gave us.
 	 */
-	movw	r4, #:lower16:uboot_args-.LPIC0
-	movt	r4, #:upper16:uboot_args-.LPIC0
+	adr	r4, _C_LABEL(cubie_start)
+	movw	r5, #:lower16:uboot_args
+	movt	r5, #:upper16:uboot_args
+	bfi	r4, r5, #0, #28
+
+	stmia	r4, {r0-r3}		// Save the arguments
 	/*
-	 * Since .start is at 0x40000000 and .text is at 0x8000000
-	 * we have to clear the upper bits of the address so it's relative
-	 * to the current PC, not .text.
+	 * Turn on the SMP bit
 	 */
-	bic	r4, r4, #0xf0000000
-	add	r4, r4, pc
-	stmia	r4, {r0-r3}
-.LPIC0:
-#ifdef CPU_CORTEXA9
-	/*
-	 * Turn on the SCU if we are on a Cortex-A9
-	 */
-	bl	a9_start
+	bl	cortex_init
 	XPUTC(#67)
-#endif
+
+#if defined(MULTIPROCESSOR) && 0
+	movw	r0, #:lower16:(AWIN_CORE_PBASE+AWIN_CPUCFG_OFFSET)
+	movt	r0, #:upper16:(AWIN_CORE_PBASE+AWIN_CPUCFG_OFFSET)
+
+	/* Set where the other CPU(s) are going to execute */
+	adr	r1, cortex_mpstart
+	str	r1, [r0, #AWIN_CPUCFG_PRIVATE_REG]
+
+	/* Bring CPU1 out of reset */
+	ldr	r1, [r0, #AWIN_CPUCFG_CPU1_RST_CTRL_REG]
+	orr	r1, r1, #(AWIN_CPUCFG_CPU_RST_CTRL_CORE_RESET|AWIN_CPUCFG_CPU_RST_CTRL_RESET)
+	str	r1, [r0, #AWIN_CPUCFG_CPU1_RST_CTRL_REG]
+#endif /* MULTIPROCESSOR */
 
 	/*
 	 * Set up a preliminary mapping in the MMU to allow us to run