Sun May 23 18:49:14 2010 UTC ()
Split copy related functions out of locore.s into copy.S, move functions
only needed on multiprocessors into mp_subr.S.


(martin)
diff -r1.68 -r1.69 src/sys/arch/sparc64/conf/Makefile.sparc64
diff -r1.3 -r1.4 src/sys/arch/sparc64/include/locore.h
diff -r0 -r1.1 src/sys/arch/sparc64/sparc64/copy.S
diff -r0 -r1.1 src/sys/arch/sparc64/sparc64/mp_subr.S
diff -r1.329 -r1.330 src/sys/arch/sparc64/sparc64/locore.s

cvs diff -r1.68 -r1.69 src/sys/arch/sparc64/conf/Makefile.sparc64 (expand / switch to unified diff)

--- src/sys/arch/sparc64/conf/Makefile.sparc64 2008/12/11 05:27:42 1.68
+++ src/sys/arch/sparc64/conf/Makefile.sparc64 2010/05/23 18:49:14 1.69
@@ -1,14 +1,14 @@ @@ -1,14 +1,14 @@
1# $NetBSD: Makefile.sparc64,v 1.68 2008/12/11 05:27:42 alc Exp $ 1# $NetBSD: Makefile.sparc64,v 1.69 2010/05/23 18:49:14 martin Exp $
2 2
3#========================================================================= 3#=========================================================================
4# 4#
5# ***** WARNING ****** 5# ***** WARNING ******
6# 6#
7# ANYONE WHO CHANGES THIS MAKEFILE AND DOES NOT TEST BOTH A 32-BIT 7# ANYONE WHO CHANGES THIS MAKEFILE AND DOES NOT TEST BOTH A 32-BIT
8# AND 64-BIT KERNEL WILL BE SHOT. 8# AND 64-BIT KERNEL WILL BE SHOT.
9# 9#
10#========================================================================= 10#=========================================================================
11 11
12 12
13# Makefile for NetBSD 13# Makefile for NetBSD
14# 14#
@@ -75,33 +75,40 @@ AFLAGS+= -Wa,-Av8plusa @@ -75,33 +75,40 @@ AFLAGS+= -Wa,-Av8plusa
75CC_NOOPT= ${NORMAL_C:C/-O./-O0/} 75CC_NOOPT= ${NORMAL_C:C/-O./-O0/}
76.endif 76.endif
77CFLAGS+= -mno-fpu 77CFLAGS+= -mno-fpu
78AFLAGS+= -x assembler-with-cpp -traditional-cpp 78AFLAGS+= -x assembler-with-cpp -traditional-cpp
79 79
80## 80##
81## (3) libkern and compat 81## (3) libkern and compat
82## 82##
83# KERN_AS= obj # memcpy, memset, etc. are in locore.s 83# KERN_AS= obj # memcpy, memset, etc. are in locore.s
84 84
85## 85##
86## (4) local objects, compile rules, and dependencies 86## (4) local objects, compile rules, and dependencies
87## 87##
88MD_OBJS= locore.o 88MD_OBJS= locore.o copy.o mp_subr.o
89MD_CFILES= 89MD_CFILES=
90MD_SFILES= ${SPARC64}/sparc64/locore.s 90MD_SFILES= ${SPARC64}/sparc64/locore.s ${SPARC64}/sparc64/copy.S \
 91 ${SPARC64}/sparc64/mp_subr.S
91 92
92locore.o: ${SPARC64}/sparc64/locore.s assym.h 93locore.o: ${SPARC64}/sparc64/locore.s assym.h
93 ${NORMAL_S} 94 ${NORMAL_S}
94 95
 96copy.o: ${SPARC64}/sparc64/copy.S assym.h
 97 ${NORMAL_S}
 98
 99mp_subr.o: ${SPARC64}/sparc64/mp_subr.S assym.h
 100 ${NORMAL_S}
 101
95## 102##
96## (5) link settings 103## (5) link settings
97## 104##
98TEXTADDR?= 01000000 105TEXTADDR?= 01000000
99LINKFORMAT= -T ${SPARC64}/conf/${KERN_LDSCRIPT} 106LINKFORMAT= -T ${SPARC64}/conf/${KERN_LDSCRIPT}
100.if ${LP64} == "yes" 107.if ${LP64} == "yes"
101KERN_LDSCRIPT?= kern.ldscript 108KERN_LDSCRIPT?= kern.ldscript
102.else 109.else
103KERN_LDSCRIPT?= kern32.ldscript 110KERN_LDSCRIPT?= kern32.ldscript
104.endif 111.endif
105 112
106## 113##
107## (6) port specific target dependencies 114## (6) port specific target dependencies

cvs diff -r1.3 -r1.4 src/sys/arch/sparc64/include/locore.h (expand / switch to unified diff)

--- src/sys/arch/sparc64/include/locore.h 2010/03/07 01:52:44 1.3
+++ src/sys/arch/sparc64/include/locore.h 2010/05/23 18:49:14 1.4
@@ -1,14 +1,14 @@ @@ -1,14 +1,14 @@
1/* $NetBSD: locore.h,v 1.3 2010/03/07 01:52:44 mrg Exp $ */ 1/* $NetBSD: locore.h,v 1.4 2010/05/23 18:49:14 martin Exp $ */
2 2
3/* 3/*
4 * Copyright (c) 1996-2002 Eduardo Horvath 4 * Copyright (c) 1996-2002 Eduardo Horvath
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * Redistribution and use in source and binary forms, with or without 7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions 8 * modification, are permitted provided that the following conditions
9 * are met: 9 * are met:
10 * 1. Redistributions of source code must retain the above copyright 10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer. 11 * notice, this list of conditions and the following disclaimer.
12 * 12 *
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
@@ -80,13 +80,166 @@ @@ -80,13 +80,166 @@
80#define STKB 0 80#define STKB 0
81#define CCCR %icc 81#define CCCR %icc
82#endif 82#endif
83 83
84/* Give this real authority: reset the machine */ 84/* Give this real authority: reset the machine */
85#define NOTREACHED sir 85#define NOTREACHED sir
86 86
87/* if < 32, copy by bytes, memcpy, kcopy, ... */ 87/* if < 32, copy by bytes, memcpy, kcopy, ... */
88#define BCOPY_SMALL 32 88#define BCOPY_SMALL 32
89 89
90/* use as needed to align things on longword boundaries */ 90/* use as needed to align things on longword boundaries */
91#define _ALIGN .align 8 91#define _ALIGN .align 8
92#define ICACHE_ALIGN .align 32 92#define ICACHE_ALIGN .align 32
 93
 94/* A few convenient abbreviations for trapframe fields. */
 95#define TF_G TF_GLOBAL
 96#define TF_O TF_OUT
 97#define TF_L TF_LOCAL
 98#define TF_I TF_IN
 99
 100/* Let us use same syntax as C code */
 101#define Debugger() ta 1; nop
 102
 103
 104/*
 105 * This macro will clear out a cache line before an explicit
 106 * access to that location. It's mostly used to make certain
 107 * loads bypassing the D$ do not get stale D$ data.
 108 *
 109 * It uses a register with the address to clear and a temporary
 110 * which is destroyed.
 111 */
 112#ifdef DCACHE_BUG
 113#define DLFLUSH(a,t) \
 114 andn a, 0x3f, t; \
 115 stxa %g0, [ t ] ASI_DCACHE_TAG; \
 116 membar #Sync
 117/* The following can be used if the pointer is 32-byte aligned */
 118#define DLFLUSH2(t) \
 119 stxa %g0, [ t ] ASI_DCACHE_TAG; \
 120 membar #Sync
 121#else
 122#define DLFLUSH(a,t)
 123#define DLFLUSH2(t)
 124#endif
 125
 126
 127/*
 128 * Combine 2 regs -- used to convert 64-bit ILP32
 129 * values to LP64.
 130 */
 131#define COMBINE(r1, r2, d) \
 132 sllx r1, 32, d; \
 133 or d, r2, d
 134
 135/*
 136 * Split 64-bit value in 1 reg into high and low halves.
 137 * Used for ILP32 return values.
 138 */
 139#define SPLIT(r0, r1) \
 140 srl r0, 0, r1; \
 141 srlx r0, 32, r0
 142
 143
 144/*
 145 * A handy macro for maintaining instrumentation counters.
 146 * Note that this clobbers %o0, %o1 and %o2. Normal usage is
 147 * something like:
 148 * foointr:
 149 * TRAP_SETUP(...) ! makes %o registers safe
 150 * INCR(_C_LABEL(cnt)+V_FOO) ! count a foo
 151 */
 152#define INCR(what) \
 153 sethi %hi(what), %o0; \
 154 or %o0, %lo(what), %o0; \
 15599: \
 156 lduw [%o0], %o1; \
 157 add %o1, 1, %o2; \
 158 casa [%o0] ASI_P, %o1, %o2; \
 159 cmp %o1, %o2; \
 160 bne,pn %icc, 99b; \
 161 nop
 162
 163/*
 164 * A couple of handy macros to save and restore globals to/from
 165 * locals. Since udivrem uses several globals, and it's called
 166 * from vsprintf, we need to do this before and after doing a printf.
 167 */
 168#define GLOBTOLOC \
 169 mov %g1, %l1; \
 170 mov %g2, %l2; \
 171 mov %g3, %l3; \
 172 mov %g4, %l4; \
 173 mov %g5, %l5; \
 174 mov %g6, %l6; \
 175 mov %g7, %l7
 176
 177#define LOCTOGLOB \
 178 mov %l1, %g1; \
 179 mov %l2, %g2; \
 180 mov %l3, %g3; \
 181 mov %l4, %g4; \
 182 mov %l5, %g5; \
 183 mov %l6, %g6; \
 184 mov %l7, %g7
 185
 186/* Load strings address into register; NOTE: hidden local label 99 */
 187#define LOAD_ASCIZ(reg, s) \
 188 set 99f, reg ; \
 189 .data ; \
 19099: .asciz s ; \
 191 _ALIGN ; \
 192 .text
 193
 194/*
 195 * Handy stack conversion macros.
 196 * They correctly switch to requested stack type
 197 * regardless of the current stack.
 198 */
 199
 200#define TO_STACK64(size) \
 201 save %sp, size, %sp; \
 202 add %sp, -BIAS, %o0; /* Convert to 64-bits */ \
 203 andcc %sp, 1, %g0; /* 64-bit stack? */ \
 204 movz %icc, %o0, %sp
 205
 206#define TO_STACK32(size) \
 207 save %sp, size, %sp; \
 208 add %sp, +BIAS, %o0; /* Convert to 32-bits */ \
 209 andcc %sp, 1, %g0; /* 64-bit stack? */ \
 210 movnz %icc, %o0, %sp
 211
 212#ifdef _LP64
 213#define STACKFRAME(size) TO_STACK64(size)
 214#else
 215#define STACKFRAME(size) TO_STACK32(size)
 216#endif
 217
 218/*
 219 * Primitives
 220 */
 221#ifdef ENTRY
 222#undef ENTRY
 223#endif
 224
 225#ifdef GPROF
 226 .globl _mcount
 227#define ENTRY(x) \
 228 .globl _C_LABEL(x); .proc 1; .type _C_LABEL(x),@function; \
 229_C_LABEL(x): ; \
 230 .data; \
 231 .align 8; \
 2320: .uaword 0; .uaword 0; \
 233 .text; \
 234 save %sp, -CC64FSZ, %sp; \
 235 sethi %hi(0b), %o0; \
 236 call _mcount; \
 237 or %o0, %lo(0b), %o0; \
 238 restore
 239#else
 240#define ENTRY(x) .globl _C_LABEL(x); .proc 1; \
 241 .type _C_LABEL(x),@function; _C_LABEL(x):
 242#endif
 243#define ALTENTRY(x) .globl _C_LABEL(x); _C_LABEL(x):
 244
 245

File Added: src/sys/arch/sparc64/sparc64/copy.S
/*	$NetBSD: copy.S,v 1.1 2010/05/23 18:49:14 martin Exp $	*/

/*
 * Copyright (c) 2006-2010 Matthew R. Green
 * Copyright (c) 1996-2002 Eduardo Horvath
 * Copyright (c) 1996 Paul Kranenburg
 * Copyright (c) 1996
 * 	The President and Fellows of Harvard College.
 *	All rights reserved.
 * Copyright (c) 1992, 1993
 *	The Regents of the University of California.
 *	All rights reserved.
 *
 * This software was developed by the Computer Systems Engineering group
 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
 * contributed to Berkeley.
 *
 * All advertising materials mentioning features or use of this software
 * must display the following acknowledgement:
 *	This product includes software developed by the University of
 *	California, Lawrence Berkeley Laboratory.
 *	This product includes software developed by Harvard University.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:

 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the
 *    distribution.
 * 3. All advertising materials mentioning features or use of this
 *    software must display the following acknowledgement:
 *	This product includes software developed by the University of
 *	California, Berkeley and its contributors.
 *	This product includes software developed by Harvard University.
 *	This product includes software developed by Paul Kranenburg.
 * 4. Neither the name of the University nor the names of its
 *    contributors may be used to endorse or promote products derived
 *    from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
 * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
 * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
 * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
 * DAMAGE.
 *
 *	@(#)locore.s	8.4 (Berkeley) 12/10/93
 */


#include "opt_ddb.h"
#include "opt_kgdb.h"
#include "opt_multiprocessor.h"
#include "opt_compat_netbsd.h"
#include "opt_compat_netbsd32.h"
#include "opt_lockdebug.h"

#include "assym.h"
#include <machine/param.h>
#include <machine/ctlreg.h>
#include <machine/asm.h>
#include <machine/locore.h>

#include "ksyms.h"

	.register	%g2,#scratch
	.register	%g3,#scratch

/*
 * copyinstr(fromaddr, toaddr, maxlength, &lencopied)
 *
 * Copy a null terminated string from the user address space into
 * the kernel address space.
 */
ENTRY(copyinstr)
	! %o0 = fromaddr, %o1 = toaddr, %o2 = maxlen, %o3 = &lencopied
#ifdef NOTDEF_DEBUG
	save	%sp, -CC64FSZ, %sp
	set	8f, %o0
	mov	%i0, %o1
	mov	%i1, %o2
	mov	%i2, %o3
	call	printf
	 mov	%i3, %o4
	restore
	.data
8:	.asciz	"copyinstr: from=%x to=%x max=%x &len=%x\n"
	_ALIGN
	.text
#endif
	brgz,pt	%o2, 1f					! Make sure len is valid
	 sethi	%hi(CPCB), %o4		! (first instr of copy)
	retl
	 mov	ENAMETOOLONG, %o0
1:
	LDPTR	[%o4 + %lo(CPCB)], %o4	! catch faults
	set	Lcsdone, %o5
	membar	#Sync
	STPTR	%o5, [%o4 + PCB_ONFAULT]

	mov	%o1, %o5		!	save = toaddr;
! XXX should do this in bigger chunks when possible
0:					! loop:
	ldsba	[%o0] ASI_AIUS, %g1	!	c = *fromaddr;
	stb	%g1, [%o1]		!	*toaddr++ = c;
	inc	%o1
	brz,a,pn	%g1, Lcsdone	!	if (c == NULL)
	 clr	%o0			!		{ error = 0; done; }
	deccc	%o2			!	if (--len > 0) {
	bg,pt	%icc, 0b		!		fromaddr++;
	 inc	%o0			!		goto loop;
	ba,pt	%xcc, Lcsdone		!	}
	 mov	ENAMETOOLONG, %o0	!	error = ENAMETOOLONG;
	NOTREACHED

/*
 * copyoutstr(fromaddr, toaddr, maxlength, &lencopied)
 *
 * Copy a null terminated string from the kernel
 * address space to the user address space.
 */
ENTRY(copyoutstr)
	! %o0 = fromaddr, %o1 = toaddr, %o2 = maxlen, %o3 = &lencopied
#ifdef NOTDEF_DEBUG
	save	%sp, -CC64FSZ, %sp
	set	8f, %o0
	mov	%i0, %o1
	mov	%i1, %o2
	mov	%i2, %o3
	call	printf
	 mov	%i3, %o4
	restore
	.data
8:	.asciz	"copyoutstr: from=%x to=%x max=%x &len=%x\n"
	_ALIGN
	.text
#endif
	brgz,pt	%o2, 1f					! Make sure len is valid
	 sethi	%hi(CPCB), %o4		! (first instr of copy)
	retl
	 mov	ENAMETOOLONG, %o0
1:
	LDPTR	[%o4 + %lo(CPCB)], %o4	! catch faults
	set	Lcsdone, %o5
	membar	#Sync
	STPTR	%o5, [%o4 + PCB_ONFAULT]

	mov	%o1, %o5		!	save = toaddr;
! XXX should do this in bigger chunks when possible
0:					! loop:
	ldsb	[%o0], %g1		!	c = *fromaddr;
	stba	%g1, [%o1] ASI_AIUS	!	*toaddr++ = c;
	inc	%o1
	brz,a,pn	%g1, Lcsdone	!	if (c == NULL)
	 clr	%o0			!		{ error = 0; done; }
	deccc	%o2			!	if (--len > 0) {
	bg,pt	%icc, 0b		!		fromaddr++;
	 inc	%o0			!		goto loop;
					!	}
	mov	ENAMETOOLONG, %o0	!	error = ENAMETOOLONG;
Lcsdone:				! done:
	sub	%o1, %o5, %o1		!	len = to - save;
	brnz,a	%o3, 1f			!	if (lencopied)
	 STPTR	%o1, [%o3]		!		*lencopied = len;
1:
	retl				! cpcb->pcb_onfault = 0;
	 STPTR	%g0, [%o4 + PCB_ONFAULT]! return (error);


/*
 * copyin(src, dst, len)
 *
 * Copy specified amount of data from user space into the kernel.
 *
 * This is a modified version of memcpy that uses ASI_AIUS.  When
 * memcpy is optimized to use block copy ASIs, this should be also.
 */

ENTRY(copyin)
!	flushw			! Make sure we don't have stack probs & lose hibits of %o
#ifdef NOTDEF_DEBUG
	save	%sp, -CC64FSZ, %sp
	set	1f, %o0
	mov	%i0, %o1
	mov	%i1, %o2
	call	printf
	 mov	%i2, %o3
	restore
	.data
1:	.asciz	"copyin: src=%x dest=%x len=%x\n"
	_ALIGN
	.text
#endif
	sethi	%hi(CPCB), %o3
	wr	%g0, ASI_AIUS, %asi
	LDPTR	[%o3 + %lo(CPCB)], %o3
	set	Lcopyfault, %o4
!	mov	%o7, %g7		! save return address
	membar	#Sync
	STPTR	%o4, [%o3 + PCB_ONFAULT]
	cmp	%o2, BCOPY_SMALL
Lcopyin_start:
	bge,a	Lcopyin_fancy	! if >= this many, go be fancy.
	 btst	7, %o0		! (part of being fancy)

	/*
	 * Not much to copy, just do it a byte at a time.
	 */
	deccc	%o2		! while (--len >= 0)
	bl	1f
0:
	 inc	%o0
	ldsba	[%o0 - 1] %asi, %o4!	*dst++ = (++src)[-1];
	stb	%o4, [%o1]
	deccc	%o2
	bge	0b
	 inc	%o1
1:
	ba	Lcopyin_done
	 clr	%o0
	NOTREACHED

	/*
	 * Plenty of data to copy, so try to do it optimally.
	 */
Lcopyin_fancy:
	! check for common case first: everything lines up.
!	btst	7, %o0		! done already
	bne	1f
	 .empty
	btst	7, %o1
	be,a	Lcopyin_doubles
	 dec	8, %o2		! if all lined up, len -= 8, goto copyin_doubes

	! If the low bits match, we can make these line up.
1:
	xor	%o0, %o1, %o3	! t = src ^ dst;
	btst	1, %o3		! if (t & 1) {
	be,a	1f
	 btst	1, %o0		! [delay slot: if (src & 1)]

	! low bits do not match, must copy by bytes.
0:
	ldsba	[%o0] %asi, %o4	!	do {
	inc	%o0		!		(++dst)[-1] = *src++;
	inc	%o1
	deccc	%o2
	bnz	0b		!	} while (--len != 0);
	 stb	%o4, [%o1 - 1]
	ba	Lcopyin_done
	 clr	%o0
	NOTREACHED

	! lowest bit matches, so we can copy by words, if nothing else
1:
	be,a	1f		! if (src & 1) {
	 btst	2, %o3		! [delay slot: if (t & 2)]

	! although low bits match, both are 1: must copy 1 byte to align
	ldsba	[%o0] %asi, %o4	!	*dst++ = *src++;
	stb	%o4, [%o1]
	inc	%o0
	inc	%o1
	dec	%o2		!	len--;
	btst	2, %o3		! } [if (t & 2)]
1:
	be,a	1f		! if (t & 2) {
	 btst	2, %o0		! [delay slot: if (src & 2)]
	dec	2, %o2		!	len -= 2;
0:
	ldsha	[%o0] %asi, %o4	!	do {
	sth	%o4, [%o1]	!		*(short *)dst = *(short *)src;
	inc	2, %o0		!		dst += 2, src += 2;
	deccc	2, %o2		!	} while ((len -= 2) >= 0);
	bge	0b
	 inc	2, %o1
	b	Lcopyin_mopb	!	goto mop_up_byte;
	 btst	1, %o2		! } [delay slot: if (len & 1)]
	NOTREACHED

	! low two bits match, so we can copy by longwords
1:
	be,a	1f		! if (src & 2) {
	 btst	4, %o3		! [delay slot: if (t & 4)]

	! although low 2 bits match, they are 10: must copy one short to align
	ldsha	[%o0] %asi, %o4	!	(*short *)dst = *(short *)src;
	sth	%o4, [%o1]
	inc	2, %o0		!	dst += 2;
	inc	2, %o1		!	src += 2;
	dec	2, %o2		!	len -= 2;
	btst	4, %o3		! } [if (t & 4)]
1:
	be,a	1f		! if (t & 4) {
	 btst	4, %o0		! [delay slot: if (src & 4)]
	dec	4, %o2		!	len -= 4;
0:
	lduwa	[%o0] %asi, %o4	!	do {
	st	%o4, [%o1]	!		*(int *)dst = *(int *)src;
	inc	4, %o0		!		dst += 4, src += 4;
	deccc	4, %o2		!	} while ((len -= 4) >= 0);
	bge	0b
	 inc	4, %o1
	b	Lcopyin_mopw	!	goto mop_up_word_and_byte;
	 btst	2, %o2		! } [delay slot: if (len & 2)]
	NOTREACHED

	! low three bits match, so we can copy by doublewords
1:
	be	1f		! if (src & 4) {
	 dec	8, %o2		! [delay slot: len -= 8]
	lduwa	[%o0] %asi, %o4	!	*(int *)dst = *(int *)src;
	st	%o4, [%o1]
	inc	4, %o0		!	dst += 4, src += 4, len -= 4;
	inc	4, %o1
	dec	4, %o2		! }
1:
Lcopyin_doubles:
	ldxa	[%o0] %asi, %g1	! do {
	stx	%g1, [%o1]	!	*(double *)dst = *(double *)src;
	inc	8, %o0		!	dst += 8, src += 8;
	deccc	8, %o2		! } while ((len -= 8) >= 0);
	bge	Lcopyin_doubles
	 inc	8, %o1

	! check for a usual case again (save work)
	btst	7, %o2		! if ((len & 7) == 0)
	be	Lcopyin_done	!	goto copyin_done;

	 btst	4, %o2		! if ((len & 4)) == 0)
	be,a	Lcopyin_mopw	!	goto mop_up_word_and_byte;
	 btst	2, %o2		! [delay slot: if (len & 2)]
	lduwa	[%o0] %asi, %o4	!	*(int *)dst = *(int *)src;
	st	%o4, [%o1]
	inc	4, %o0		!	dst += 4;
	inc	4, %o1		!	src += 4;
	btst	2, %o2		! } [if (len & 2)]

1:
	! mop up trailing word (if present) and byte (if present).
Lcopyin_mopw:
	be	Lcopyin_mopb	! no word, go mop up byte
	 btst	1, %o2		! [delay slot: if (len & 1)]
	ldsha	[%o0] %asi, %o4	! *(short *)dst = *(short *)src;
	be	Lcopyin_done	! if ((len & 1) == 0) goto done;
	 sth	%o4, [%o1]
	ldsba	[%o0 + 2] %asi, %o4	! dst[2] = src[2];
	stb	%o4, [%o1 + 2]
	ba	Lcopyin_done
	 clr	%o0
	NOTREACHED

	! mop up trailing byte (if present).
Lcopyin_mopb:
	be,a	Lcopyin_done
	 nop
	ldsba	[%o0] %asi, %o4
	stb	%o4, [%o1]

Lcopyin_done:
	sethi	%hi(CPCB), %o3
!	stb	%o4,[%o1]	! Store last byte -- should not be needed
	LDPTR	[%o3 + %lo(CPCB)], %o3
	membar	#Sync
	STPTR	%g0, [%o3 + PCB_ONFAULT]
	wr	%g0, ASI_PRIMARY_NOFAULT, %asi		! Restore ASI
	retl
	 clr	%o0			! return 0

/*
 * copyout(src, dst, len)
 *
 * Copy specified amount of data from kernel to user space.
 * Just like copyin, except that the `dst' addresses are user space
 * rather than the `src' addresses.
 *
 * This is a modified version of memcpy that uses ASI_AIUS.  When
 * memcpy is optimized to use block copy ASIs, this should be also.
 */
 /*
  * This needs to be reimplemented to really do the copy.
  */
ENTRY(copyout)
	/*
	 * ******NOTE****** this depends on memcpy() not using %g7
	 */
#ifdef NOTDEF_DEBUG
	save	%sp, -CC64FSZ, %sp
	set	1f, %o0
	mov	%i0, %o1
	set	CTX_SECONDARY, %o4
	mov	%i1, %o2
	ldxa	[%o4] ASI_DMMU, %o4
	call	printf
	 mov	%i2, %o3
	restore
	.data
1:	.asciz	"copyout: src=%x dest=%x len=%x ctx=%d\n"
	_ALIGN
	.text
#endif
Ldocopy:
	sethi	%hi(CPCB), %o3
	wr	%g0, ASI_AIUS, %asi
	LDPTR	[%o3 + %lo(CPCB)], %o3
	set	Lcopyfault, %o4
!	mov	%o7, %g7		! save return address
	membar	#Sync
	STPTR	%o4, [%o3 + PCB_ONFAULT]
	cmp	%o2, BCOPY_SMALL
Lcopyout_start:
	membar	#StoreStore
	bge,a	Lcopyout_fancy	! if >= this many, go be fancy.
	 btst	7, %o0		! (part of being fancy)

	/*
	 * Not much to copy, just do it a byte at a time.
	 */
	deccc	%o2		! while (--len >= 0)
	bl	1f
	 .empty
0:
	inc	%o0
	ldsb	[%o0 - 1], %o4!	(++dst)[-1] = *src++;
	stba	%o4, [%o1] %asi
	deccc	%o2
	bge	0b
	 inc	%o1
1:
	ba	Lcopyout_done
	 clr	%o0
	NOTREACHED

	/*
	 * Plenty of data to copy, so try to do it optimally.
	 */
Lcopyout_fancy:
	! check for common case first: everything lines up.
!	btst	7, %o0		! done already
	bne	1f
	 .empty
	btst	7, %o1
	be,a	Lcopyout_doubles
	 dec	8, %o2		! if all lined up, len -= 8, goto copyout_doubes

	! If the low bits match, we can make these line up.
1:
	xor	%o0, %o1, %o3	! t = src ^ dst;
	btst	1, %o3		! if (t & 1) {
	be,a	1f
	 btst	1, %o0		! [delay slot: if (src & 1)]

	! low bits do not match, must copy by bytes.
0:
	ldsb	[%o0], %o4	!	do {
	inc	%o0		!		(++dst)[-1] = *src++;
	inc	%o1
	deccc	%o2
	bnz	0b		!	} while (--len != 0);
	 stba	%o4, [%o1 - 1] %asi
	ba	Lcopyout_done
	 clr	%o0
	NOTREACHED

	! lowest bit matches, so we can copy by words, if nothing else
1:
	be,a	1f		! if (src & 1) {
	 btst	2, %o3		! [delay slot: if (t & 2)]

	! although low bits match, both are 1: must copy 1 byte to align
	ldsb	[%o0], %o4	!	*dst++ = *src++;
	stba	%o4, [%o1] %asi
	inc	%o0
	inc	%o1
	dec	%o2		!	len--;
	btst	2, %o3		! } [if (t & 2)]
1:
	be,a	1f		! if (t & 2) {
	 btst	2, %o0		! [delay slot: if (src & 2)]
	dec	2, %o2		!	len -= 2;
0:
	ldsh	[%o0], %o4	!	do {
	stha	%o4, [%o1] %asi	!		*(short *)dst = *(short *)src;
	inc	2, %o0		!		dst += 2, src += 2;
	deccc	2, %o2		!	} while ((len -= 2) >= 0);
	bge	0b
	 inc	2, %o1
	b	Lcopyout_mopb	!	goto mop_up_byte;
	 btst	1, %o2		! } [delay slot: if (len & 1)]
	NOTREACHED

	! low two bits match, so we can copy by longwords
1:
	be,a	1f		! if (src & 2) {
	 btst	4, %o3		! [delay slot: if (t & 4)]

	! although low 2 bits match, they are 10: must copy one short to align
	ldsh	[%o0], %o4	!	(*short *)dst = *(short *)src;
	stha	%o4, [%o1] %asi
	inc	2, %o0		!	dst += 2;
	inc	2, %o1		!	src += 2;
	dec	2, %o2		!	len -= 2;
	btst	4, %o3		! } [if (t & 4)]
1:
	be,a	1f		! if (t & 4) {
	 btst	4, %o0		! [delay slot: if (src & 4)]
	dec	4, %o2		!	len -= 4;
0:
	lduw	[%o0], %o4	!	do {
	sta	%o4, [%o1] %asi	!		*(int *)dst = *(int *)src;
	inc	4, %o0		!		dst += 4, src += 4;
	deccc	4, %o2		!	} while ((len -= 4) >= 0);
	bge	0b
	 inc	4, %o1
	b	Lcopyout_mopw	!	goto mop_up_word_and_byte;
	 btst	2, %o2		! } [delay slot: if (len & 2)]
	NOTREACHED

	! low three bits match, so we can copy by doublewords
1:
	be	1f		! if (src & 4) {
	 dec	8, %o2		! [delay slot: len -= 8]
	lduw	[%o0], %o4	!	*(int *)dst = *(int *)src;
	sta	%o4, [%o1] %asi
	inc	4, %o0		!	dst += 4, src += 4, len -= 4;
	inc	4, %o1
	dec	4, %o2		! }
1:
Lcopyout_doubles:
	ldx	[%o0], %g1	! do {
	stxa	%g1, [%o1] %asi	!	*(double *)dst = *(double *)src;
	inc	8, %o0		!	dst += 8, src += 8;
	deccc	8, %o2		! } while ((len -= 8) >= 0);
	bge	Lcopyout_doubles
	 inc	8, %o1

	! check for a usual case again (save work)
	btst	7, %o2		! if ((len & 7) == 0)
	be	Lcopyout_done	!	goto copyout_done;

	 btst	4, %o2		! if ((len & 4)) == 0)
	be,a	Lcopyout_mopw	!	goto mop_up_word_and_byte;
	 btst	2, %o2		! [delay slot: if (len & 2)]
	lduw	[%o0], %o4	!	*(int *)dst = *(int *)src;
	sta	%o4, [%o1] %asi
	inc	4, %o0		!	dst += 4;
	inc	4, %o1		!	src += 4;
	btst	2, %o2		! } [if (len & 2)]

1:
	! mop up trailing word (if present) and byte (if present).
Lcopyout_mopw:
	be	Lcopyout_mopb	! no word, go mop up byte
	 btst	1, %o2		! [delay slot: if (len & 1)]
	ldsh	[%o0], %o4	! *(short *)dst = *(short *)src;
	be	Lcopyout_done	! if ((len & 1) == 0) goto done;
	 stha	%o4, [%o1] %asi
	ldsb	[%o0 + 2], %o4	! dst[2] = src[2];
	stba	%o4, [%o1 + 2] %asi
	ba	Lcopyout_done
	 clr	%o0
	NOTREACHED

	! mop up trailing byte (if present).
Lcopyout_mopb:
	be,a	Lcopyout_done
	 nop
	ldsb	[%o0], %o4
	stba	%o4, [%o1] %asi

Lcopyout_done:
	sethi	%hi(CPCB), %o3
	LDPTR	[%o3 + %lo(CPCB)], %o3
	membar	#Sync
	STPTR	%g0, [%o3 + PCB_ONFAULT]
!	jmp	%g7 + 8		! Original instr
	wr	%g0, ASI_PRIMARY_NOFAULT, %asi		! Restore ASI
	membar	#StoreStore|#StoreLoad
	retl			! New instr
	 clr	%o0			! return 0

! Copyin or copyout fault.  Clear cpcb->pcb_onfault and return error.
! Note that although we were in memcpy, there is no state to clean up;
! the only special thing is that we have to return to [g7 + 8] rather than
! [o7 + 8].
Lcopyfault:
	sethi	%hi(CPCB), %o3
	LDPTR	[%o3 + %lo(CPCB)], %o3
	STPTR	%g0, [%o3 + PCB_ONFAULT]
	membar	#StoreStore|#StoreLoad
#ifdef NOTDEF_DEBUG
	save	%sp, -CC64FSZ, %sp
	set	1f, %o0
	call	printf
	 nop
	restore
	.data
1:	.asciz	"copyfault: fault occurred\n"
	_ALIGN
	.text
#endif
	retl
	 wr	%g0, ASI_PRIMARY_NOFAULT, %asi		! Restore ASI

/*
 * {fu,su}{,i}{byte,word}
 */
ALTENTRY(fuiword)
ENTRY(fuword)
	btst	3, %o0			! has low bits set...
	bnz	Lfsbadaddr		!	go return -1
	 .empty
	sethi	%hi(CPCB), %o2		! cpcb->pcb_onfault = Lfserr;
	set	Lfserr, %o3
	LDPTR	[%o2 + %lo(CPCB)], %o2
	membar	#LoadStore
	STPTR	%o3, [%o2 + PCB_ONFAULT]
	membar	#Sync
	LDPTRA	[%o0] ASI_AIUS, %o0	! fetch the word
	membar	#Sync
	STPTR	%g0, [%o2 + PCB_ONFAULT]! but first clear onfault
	retl				! phew, made it, return the word
	 membar	#StoreStore|#StoreLoad

Lfserr:
	STPTR	%g0, [%o2 + PCB_ONFAULT]! error in r/w, clear pcb_onfault
	membar	#StoreStore|#StoreLoad
Lfsbadaddr:
#ifndef _LP64
	mov	-1, %o1
#endif
	retl				! and return error indicator
	 mov	-1, %o0

	/*
	 * This is just like Lfserr, but it's a global label that allows
	 * mem_access_fault() to check to see that we don't want to try to
	 * page in the fault.  It's used by fuswintr() etc.
	 */
	.globl	_C_LABEL(Lfsbail)
_C_LABEL(Lfsbail):
	STPTR	%g0, [%o2 + PCB_ONFAULT]! error in r/w, clear pcb_onfault
	membar	#StoreStore|#StoreLoad
	retl				! and return error indicator
	 mov	-1, %o0

	/*
	 * Like fusword but callable from interrupt context.
	 * Fails if data isn't resident.
	 */
ENTRY(fuswintr)
	sethi	%hi(CPCB), %o2		! cpcb->pcb_onfault = _Lfsbail;
	LDPTR	[%o2 + %lo(CPCB)], %o2
	set	_C_LABEL(Lfsbail), %o3
	STPTR	%o3, [%o2 + PCB_ONFAULT]
	membar	#Sync
	lduha	[%o0] ASI_AIUS, %o0	! fetch the halfword
	membar	#Sync
	STPTR	%g0, [%o2 + PCB_ONFAULT]! but first clear onfault
	retl				! made it
	 membar	#StoreStore|#StoreLoad

ENTRY(fusword)
	sethi	%hi(CPCB), %o2		! cpcb->pcb_onfault = Lfserr;
	LDPTR	[%o2 + %lo(CPCB)], %o2
	set	Lfserr, %o3
	STPTR	%o3, [%o2 + PCB_ONFAULT]
	membar	#Sync
	lduha	[%o0] ASI_AIUS, %o0		! fetch the halfword
	membar	#Sync
	STPTR	%g0, [%o2 + PCB_ONFAULT]! but first clear onfault
	retl				! made it
	 membar	#StoreStore|#StoreLoad

ALTENTRY(fuibyte)
ENTRY(fubyte)
	sethi	%hi(CPCB), %o2		! cpcb->pcb_onfault = Lfserr;
	LDPTR	[%o2 + %lo(CPCB)], %o2
	set	Lfserr, %o3
	STPTR	%o3, [%o2 + PCB_ONFAULT]
	membar	#Sync
	lduba	[%o0] ASI_AIUS, %o0	! fetch the byte
	membar	#Sync
	STPTR	%g0, [%o2 + PCB_ONFAULT]! but first clear onfault
	retl				! made it
	 membar	#StoreStore|#StoreLoad

ALTENTRY(suiword)
ENTRY(suword)
	btst	3, %o0			! or has low bits set ...
	bnz	Lfsbadaddr		!	go return error
	 .empty
	sethi	%hi(CPCB), %o2		! cpcb->pcb_onfault = Lfserr;
	LDPTR	[%o2 + %lo(CPCB)], %o2
	set	Lfserr, %o3
	STPTR	%o3, [%o2 + PCB_ONFAULT]
	membar	#Sync
	STPTRA	%o1, [%o0] ASI_AIUS	! store the word
	membar	#Sync
	STPTR	%g0, [%o2 + PCB_ONFAULT]! made it, clear onfault
	membar	#StoreStore|#StoreLoad
	retl				! and return 0
	 clr	%o0

ENTRY(suswintr)
	sethi	%hi(CPCB), %o2		! cpcb->pcb_onfault = _Lfsbail;
	LDPTR	[%o2 + %lo(CPCB)], %o2
	set	_C_LABEL(Lfsbail), %o3
	STPTR	%o3, [%o2 + PCB_ONFAULT]
	membar	#Sync
	stha	%o1, [%o0] ASI_AIUS	! store the halfword
	membar	#Sync
	STPTR	%g0, [%o2 + PCB_ONFAULT]! made it, clear onfault
	membar	#StoreStore|#StoreLoad
	retl				! and return 0
	 clr	%o0

ENTRY(susword)
	sethi	%hi(CPCB), %o2		! cpcb->pcb_onfault = Lfserr;
	LDPTR	[%o2 + %lo(CPCB)], %o2
	set	Lfserr, %o3
	STPTR	%o3, [%o2 + PCB_ONFAULT]
	membar	#Sync
	stha	%o1, [%o0] ASI_AIUS	! store the halfword
	membar	#Sync
	STPTR	%g0, [%o2 + PCB_ONFAULT]! made it, clear onfault
	membar	#StoreStore|#StoreLoad
	retl				! and return 0
	 clr	%o0

ALTENTRY(suibyte)
ENTRY(subyte)
	sethi	%hi(CPCB), %o2		! cpcb->pcb_onfault = Lfserr;
	LDPTR	[%o2 + %lo(CPCB)], %o2
	set	Lfserr, %o3
	STPTR	%o3, [%o2 + PCB_ONFAULT]
	membar	#Sync
	stba	%o1, [%o0] ASI_AIUS	! store the byte
	membar	#Sync
	STPTR	%g0, [%o2 + PCB_ONFAULT]! made it, clear onfault
	membar	#StoreStore|#StoreLoad
	retl				! and return 0
	 clr	%o0

/* probeget and probeset are meant to be used during autoconfiguration */
/*
 * The following probably need to be changed, but to what I don't know.
 */

/*
 * uint64_t
 * probeget(addr, asi, size)
 *	paddr_t addr;
 *	int asi;
 *	int size;
 *
 * Read or write a (byte,word,longword) from the given address.
 * Like {fu,su}{byte,halfword,word} but our caller is supposed
 * to know what he is doing... the address can be anywhere.
 *
 * We optimize for space, rather than time, here.
 */
ENTRY(probeget)
#ifndef _LP64
	!! Shuffle the args around into LP64 format
	COMBINE(%o0, %o1, %o0)
	mov	%o2, %o1
	mov	%o3, %o2
#endif
	mov	%o2, %o4
	! %o0 = addr, %o1 = asi, %o4 = (1,2,4)
	sethi	%hi(CPCB), %o2
	LDPTR	[%o2 + %lo(CPCB)], %o2	! cpcb->pcb_onfault = Lfserr;
#ifdef _LP64
	set	_C_LABEL(Lfsbail), %o5
#else
	set	_C_LABEL(Lfsprobe), %o5
#endif
	STPTR	%o5, [%o2 + PCB_ONFAULT]
	or	%o0, 0x9, %o3		! if (PHYS_ASI(asi)) {
	sub	%o3, 0x1d, %o3
	brz,a	%o3, 0f
	 mov	%g0, %o5
	DLFLUSH(%o0,%o5)		!	flush cache line
					! }
0:
#ifndef _LP64
	rdpr	%pstate, %g1
	wrpr	%g1, PSTATE_AM, %pstate
#endif
	btst	1, %o4
	wr	%o1, 0, %asi
	membar	#Sync
	bz	0f			! if (len & 1)
	 btst	2, %o4
	ba,pt	%icc, 1f
	 lduba	[%o0] %asi, %o0		!	value = *(char *)addr;
0:
	bz	0f			! if (len & 2)
	 btst	4, %o4
	ba,pt	%icc, 1f
	 lduha	[%o0] %asi, %o0		!	value = *(short *)addr;
0:
	bz	0f			! if (len & 4)
	 btst	8, %o4
	ba,pt	%icc, 1f
	 lda	[%o0] %asi, %o0		!	value = *(int *)addr;
0:
	ldxa	[%o0] %asi, %o0		!	value = *(long *)addr;
1:	
#ifndef _LP64
	SPLIT(%o0, %o1)
#endif
	membar	#Sync
#ifndef _LP64
	wrpr	%g1, 0, %pstate
#endif
	brz	%o5, 1f			! if (cache flush addr != 0)
	 nop
	DLFLUSH2(%o5)			!	flush cache line again
1:
	wr	%g0, ASI_PRIMARY_NOFAULT, %asi		! Restore default ASI	
	STPTR	%g0, [%o2 + PCB_ONFAULT]
	retl				! made it, clear onfault and return
	 membar	#StoreStore|#StoreLoad

	/*
	 * Fault handler for probeget
	 */
_C_LABEL(Lfsprobe):
#ifndef _LP64
	wrpr	%g1, 0, %pstate
#endif
	STPTR	%g0, [%o2 + PCB_ONFAULT]! error in r/w, clear pcb_onfault
	mov	-1, %o1
	wr	%g0, ASI_PRIMARY_NOFAULT, %asi		! Restore default ASI	
	membar	#StoreStore|#StoreLoad
	retl				! and return error indicator
	 mov	-1, %o0

/*
 * probeset(addr, asi, size, val)
 *	paddr_t addr;
 *	int asi;
 *	int size;
 *	long val;
 *
 * As above, but we return 0 on success.
 */
ENTRY(probeset)
#ifndef _LP64
	!! Shuffle the args around into LP64 format
	COMBINE(%o0, %o1, %o0)
	mov	%o2, %o1
	mov	%o3, %o2
	COMBINE(%o4, %o5, %o3)
#endif
	mov	%o2, %o4
	! %o0 = addr, %o1 = asi, %o4 = (1,2,4), %o3 = val
	sethi	%hi(CPCB), %o2		! Lfserr requires CPCB in %o2
	LDPTR	[%o2 + %lo(CPCB)], %o2	! cpcb->pcb_onfault = Lfserr;
	set	_C_LABEL(Lfsbail), %o5
	STPTR	%o5, [%o2 + PCB_ONFAULT]
	btst	1, %o4
	wr	%o1, 0, %asi
	membar	#Sync
	bz	0f			! if (len & 1)
	 btst	2, %o4
	ba,pt	%icc, 1f
	 stba	%o3, [%o0] %asi		!	*(char *)addr = value;
0:
	bz	0f			! if (len & 2)
	 btst	4, %o4
	ba,pt	%icc, 1f
	 stha	%o3, [%o0] %asi		!	*(short *)addr = value;
0:
	bz	0f			! if (len & 4)
	 btst	8, %o4
	ba,pt	%icc, 1f
	 sta	%o3, [%o0] %asi		!	*(int *)addr = value;
0:
	bz	Lfserr			! if (len & 8)
	ba,pt	%icc, 1f
	 sta	%o3, [%o0] %asi		!	*(int *)addr = value;
1:	membar	#Sync
	clr	%o0			! made it, clear onfault and return 0
	wr	%g0, ASI_PRIMARY_NOFAULT, %asi		! Restore default ASI	
	STPTR	%g0, [%o2 + PCB_ONFAULT]
	retl
	 membar	#StoreStore|#StoreLoad


/*
 * kcopy() is exactly like bcopy except that it set pcb_onfault such that
 * when a fault occurs, it is able to return -1 to indicate this to the
 * caller.
 */
ENTRY(kcopy)
#ifdef DEBUG
	set	pmapdebug, %o4
	ld	[%o4], %o4
	btst	0x80, %o4	! PDB_COPY
	bz,pt	%icc, 3f
	 nop
	save	%sp, -CC64FSZ, %sp
	mov	%i0, %o1
	set	2f, %o0
	mov	%i1, %o2
	call	printf
	 mov	%i2, %o3
!	ta	1; nop
	restore
	.data
2:	.asciz	"kcopy(%p->%p,%x)\n"
	_ALIGN
	.text
3:
#endif
	sethi	%hi(CPCB), %o5		! cpcb->pcb_onfault = Lkcerr;
	LDPTR	[%o5 + %lo(CPCB)], %o5
	set	Lkcerr, %o3
	LDPTR	[%o5 + PCB_ONFAULT], %g1! save current onfault handler
	membar	#LoadStore
	STPTR	%o3, [%o5 + PCB_ONFAULT]
	membar	#StoreStore|#StoreLoad

	cmp	%o2, BCOPY_SMALL
Lkcopy_start:
	bge,a	Lkcopy_fancy	! if >= this many, go be fancy.
	 btst	7, %o0		! (part of being fancy)

	/*
	 * Not much to copy, just do it a byte at a time.
	 */
	deccc	%o2		! while (--len >= 0)
	bl	1f
	 .empty
0:
	ldsb	[%o0], %o4	!	*dst++ = *src++;
	inc	%o0
	stb	%o4, [%o1]
	deccc	%o2
	bge	0b
	 inc	%o1
1:
	membar	#Sync		! Make sure all fauls are processed
	STPTR	%g1, [%o5 + PCB_ONFAULT]! restore fault handler
	membar	#StoreStore|#StoreLoad
	retl
	 clr	%o0
	NOTREACHED

	/*
	 * Plenty of data to copy, so try to do it optimally.
	 */
Lkcopy_fancy:
	! check for common case first: everything lines up.
!	btst	7, %o0		! done already
	bne	1f
	 .empty
	btst	7, %o1
	be,a	Lkcopy_doubles
	 dec	8, %o2		! if all lined up, len -= 8, goto kcopy_doubes

	! If the low bits match, we can make these line up.
1:
	xor	%o0, %o1, %o3	! t = src ^ dst;
	btst	1, %o3		! if (t & 1) {
	be,a	1f
	 btst	1, %o0		! [delay slot: if (src & 1)]

	! low bits do not match, must copy by bytes.
0:
	ldsb	[%o0], %o4	!	do {
	inc	%o0		!		*dst++ = *src++;
	stb	%o4, [%o1]
	deccc	%o2
	bnz	0b		!	} while (--len != 0);
	 inc	%o1
	membar	#Sync		! Make sure all traps are taken
	STPTR	%g1, [%o5 + PCB_ONFAULT]! restore fault handler
	membar	#StoreStore|#StoreLoad
	retl
	 clr	%o0
	NOTREACHED

	! lowest bit matches, so we can copy by words, if nothing else
1:
	be,a	1f		! if (src & 1) {
	 btst	2, %o3		! [delay slot: if (t & 2)]

	! although low bits match, both are 1: must copy 1 byte to align
	ldsb	[%o0], %o4	!	*dst++ = *src++;
	inc	%o0
	stb	%o4, [%o1]
	dec	%o2		!	len--;
	inc	%o1
	btst	2, %o3		! } [if (t & 2)]
1:
	be,a	1f		! if (t & 2) {
	 btst	2, %o0		! [delay slot: if (src & 2)]
	dec	2, %o2		!	len -= 2;
0:
	ldsh	[%o0], %o4	!	do {
	inc	2, %o0		!		dst += 2, src += 2;
	sth	%o4, [%o1]	!		*(short *)dst = *(short *)src;
	deccc	2, %o2		!	} while ((len -= 2) >= 0);
	bge	0b
	 inc	2, %o1
	b	Lkcopy_mopb	!	goto mop_up_byte;
	 btst	1, %o2		! } [delay slot: if (len & 1)]
	NOTREACHED

	! low two bits match, so we can copy by longwords
1:
	be,a	1f		! if (src & 2) {
	 btst	4, %o3		! [delay slot: if (t & 4)]

	! although low 2 bits match, they are 10: must copy one short to align
	ldsh	[%o0], %o4	!	(*short *)dst = *(short *)src;
	inc	2, %o0		!	dst += 2;
	sth	%o4, [%o1]
	dec	2, %o2		!	len -= 2;
	inc	2, %o1		!	src += 2;
	btst	4, %o3		! } [if (t & 4)]
1:
	be,a	1f		! if (t & 4) {
	 btst	4, %o0		! [delay slot: if (src & 4)]
	dec	4, %o2		!	len -= 4;
0:
	ld	[%o0], %o4	!	do {
	inc	4, %o0		!		dst += 4, src += 4;
	st	%o4, [%o1]	!		*(int *)dst = *(int *)src;
	deccc	4, %o2		!	} while ((len -= 4) >= 0);
	bge	0b
	 inc	4, %o1
	b	Lkcopy_mopw	!	goto mop_up_word_and_byte;
	 btst	2, %o2		! } [delay slot: if (len & 2)]
	NOTREACHED

	! low three bits match, so we can copy by doublewords
1:
	be	1f		! if (src & 4) {
	 dec	8, %o2		! [delay slot: len -= 8]
	ld	[%o0], %o4	!	*(int *)dst = *(int *)src;
	inc	4, %o0		!	dst += 4, src += 4, len -= 4;
	st	%o4, [%o1]
	dec	4, %o2		! }
	inc	4, %o1
1:
Lkcopy_doubles:
	ldx	[%o0], %g5	! do {
	inc	8, %o0		!	dst += 8, src += 8;
	stx	%g5, [%o1]	!	*(double *)dst = *(double *)src;
	deccc	8, %o2		! } while ((len -= 8) >= 0);
	bge	Lkcopy_doubles
	 inc	8, %o1

	! check for a usual case again (save work)
	btst	7, %o2		! if ((len & 7) == 0)
	be	Lkcopy_done	!	goto kcopy_done;

	 btst	4, %o2		! if ((len & 4)) == 0)
	be,a	Lkcopy_mopw	!	goto mop_up_word_and_byte;
	 btst	2, %o2		! [delay slot: if (len & 2)]
	ld	[%o0], %o4	!	*(int *)dst = *(int *)src;
	inc	4, %o0		!	dst += 4;
	st	%o4, [%o1]
	inc	4, %o1		!	src += 4;
	btst	2, %o2		! } [if (len & 2)]

1:
	! mop up trailing word (if present) and byte (if present).
Lkcopy_mopw:
	be	Lkcopy_mopb	! no word, go mop up byte
	 btst	1, %o2		! [delay slot: if (len & 1)]
	ldsh	[%o0], %o4	! *(short *)dst = *(short *)src;
	be	Lkcopy_done	! if ((len & 1) == 0) goto done;
	 sth	%o4, [%o1]
	ldsb	[%o0 + 2], %o4	! dst[2] = src[2];
	stb	%o4, [%o1 + 2]
	membar	#Sync		! Make sure all traps are taken
	STPTR	%g1, [%o5 + PCB_ONFAULT]! restore fault handler
	membar	#StoreStore|#StoreLoad
	retl
	 clr	%o0
	NOTREACHED

	! mop up trailing byte (if present).
Lkcopy_mopb:
	bne,a	1f
	 ldsb	[%o0], %o4

Lkcopy_done:
	membar	#Sync		! Make sure all traps are taken
	STPTR	%g1, [%o5 + PCB_ONFAULT]! restore fault handler
	membar	#StoreStore|#StoreLoad
	retl
	 clr	%o0
	NOTREACHED

1:
	stb	%o4, [%o1]
	membar	#Sync		! Make sure all traps are taken
	STPTR	%g1, [%o5 + PCB_ONFAULT]! restore fault handler
	membar	#StoreStore|#StoreLoad
	retl
	 clr	%o0
	NOTREACHED

Lkcerr:
#ifdef DEBUG
	set	pmapdebug, %o4
	ld	[%o4], %o4
	btst	0x80, %o4	! PDB_COPY
	bz,pt	%icc, 3f
	 nop
	save	%sp, -CC64FSZ, %sp
	set	2f, %o0
	call	printf
	 nop
!	ta	1; nop
	restore
	.data
2:	.asciz	"kcopy error\n"
	_ALIGN
	.text
3:
#endif
	STPTR	%g1, [%o5 + PCB_ONFAULT]! restore fault handler
	retl				! and return error indicator
	 membar	#StoreStore|#StoreLoad
	NOTREACHED

/*
 * copystr(fromaddr, toaddr, maxlength, &lencopied)
 *
 * Copy a null terminated string from one point to another in
 * the kernel address space.  (This is a leaf procedure, but
 * it does not seem that way to the C compiler.)
 */
ENTRY(copystr)
	brgz,pt	%o2, 0f	! Make sure len is valid
	 mov	%o1, %o5		!	to0 = to;
	retl
	 mov	ENAMETOOLONG, %o0
0:					! loop:
	ldsb	[%o0], %o4		!	c = *from;
	tst	%o4
	stb	%o4, [%o1]		!	*to++ = c;
	be	1f			!	if (c == 0)
	 inc	%o1			!		goto ok;
	deccc	%o2			!	if (--len > 0) {
	bg,a	0b			!		from++;
	 inc	%o0			!		goto loop;
	b	2f			!	}
	 mov	ENAMETOOLONG, %o0	!	ret = ENAMETOOLONG; goto done;
1:					! ok:
	clr	%o0			!	ret = 0;
2:
	sub	%o1, %o5, %o1		!	len = to - to0;
	tst	%o3			!	if (lencopied)
	bnz,a	3f
	 STPTR	%o1, [%o3]		!		*lencopied = len;
3:
	retl
	 nop
#ifdef DIAGNOSTIC
4:
	sethi	%hi(5f), %o0
	call	_C_LABEL(panic)
	 or	%lo(5f), %o0, %o0
	.data
5:
	.asciz	"copystr"
	_ALIGN
	.text
#endif


File Added: src/sys/arch/sparc64/sparc64/mp_subr.S
/*	$NetBSD: mp_subr.S,v 1.1 2010/05/23 18:49:14 martin Exp $	*/

/*
 * Copyright (c) 2006-2010 Matthew R. Green
 * Copyright (c) 1996-2002 Eduardo Horvath
 * Copyright (c) 1996 Paul Kranenburg
 * Copyright (c) 1996
 * 	The President and Fellows of Harvard College.
 *	All rights reserved.
 * Copyright (c) 1992, 1993
 *	The Regents of the University of California.
 *	All rights reserved.
 *
 * This software was developed by the Computer Systems Engineering group
 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
 * contributed to Berkeley.
 *
 * All advertising materials mentioning features or use of this software
 * must display the following acknowledgement:
 *	This product includes software developed by the University of
 *	California, Lawrence Berkeley Laboratory.
 *	This product includes software developed by Harvard University.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the
 *    distribution.
 * 3. All advertising materials mentioning features or use of this
 *    software must display the following acknowledgement:
 *	This product includes software developed by the University of
 *	California, Berkeley and its contributors.
 *	This product includes software developed by Harvard University.
 *	This product includes software developed by Paul Kranenburg.
 * 4. Neither the name of the University nor the names of its
 *    contributors may be used to endorse or promote products derived
 *    from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
 * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR
 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
 * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
 * THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
 * DAMAGE.
 *
 *	@(#)locore.s	8.4 (Berkeley) 12/10/93
 */

#include "opt_ddb.h"
#include "opt_kgdb.h"
#include "opt_multiprocessor.h"
#include "opt_compat_netbsd.h"
#include "opt_compat_netbsd32.h"
#include "opt_lockdebug.h"

#include "assym.h"
#include <machine/param.h>
#include <sparc64/sparc64/intreg.h>
#include <sparc64/sparc64/timerreg.h>
#include <machine/ctlreg.h>
#include <machine/psl.h>
#include <machine/signal.h>
#include <machine/trap.h>
#include <machine/frame.h>
#include <machine/pte.h>
#include <machine/pmap.h>
#include <machine/intr.h>
#include <machine/asm.h>
#include <machine/locore.h>
#include <sys/syscall.h>

#include "ksyms.h"

	.register	%g2,#scratch
	.register	%g3,#scratch


#if defined(MULTIPROCESSOR)
/*
 * IPI handler to do nothing, but causes rescheduling..
 * void sparc64_ipi_nop(void *);
 */
ENTRY(sparc64_ipi_nop)
	ba,a	ret_from_intr_vector
	 nop

/*
 * IPI handler to halt the CPU.  Just calls the C vector.
 * void sparc64_ipi_halt(void *);
 */
ENTRY(sparc64_ipi_halt)
	call	_C_LABEL(sparc64_ipi_halt_thiscpu)
	 clr	%g4
	sir

/*
 * IPI handler to pause the CPU.  We just trap to the debugger if it
 * is configured, otherwise just return.
 */
ENTRY(sparc64_ipi_pause)
#if defined(DDB)
	.global sparc64_ipi_pause_trap_point
sparc64_ipi_pause_trap_point:
	ta	1
	 nop
#endif
	ba,a	ret_from_intr_vector
	 nop

/*
 * Increment IPI event counter, defined in machine/{cpu,intr}.h.
 */
#define IPIEVC_INC(n,r1,r2)						\
	sethi	%hi(CPUINFO_VA+CI_IPIEVC+EVC_SIZE*n), r2;		\
	ldx	[r2 + %lo(CPUINFO_VA+CI_IPIEVC+EVC_SIZE*n)], r1;	\
	inc	r1;							\
	stx	r1, [r2 + %lo(CPUINFO_VA+CI_IPIEVC+EVC_SIZE*n)]

/*
 * void sparc64_ipi_flush_pte_us(void *);
 * void sparc64_ipi_flush_pte_usiii(void *);
 *
 * IPI handler to flush single pte.  We enter here with %tl already 1
 * and PSTATE_IE already disabled, so there's no need to do it again.
 *
 * On entry:
 *	%g2 = vaddr_t va
 *	%g3 = int ctx
 */
ENTRY(sparc64_ipi_flush_pte_us)
	srlx	%g2, PG_SHIFT4U, %g2		! drop unused va bits
	mov	CTX_SECONDARY, %g5
	sllx	%g2, PG_SHIFT4U, %g2
	ldxa	[%g5] ASI_DMMU, %g6		! Save secondary context
	sethi	%hi(KERNBASE), %g7
	membar	#LoadStore
	stxa	%g3, [%g5] ASI_DMMU		! Insert context to demap
	membar	#Sync
	or	%g2, DEMAP_PAGE_SECONDARY, %g2	! Demap page from secondary context only
	stxa	%g2, [%g2] ASI_DMMU_DEMAP	! Do the demap
	stxa	%g2, [%g2] ASI_IMMU_DEMAP	! to both TLBs
#ifdef TLB_FLUSH_LOWVA
	srl	%g2, 0, %g2			! and make sure it's both 32- and 64-bit entries
	stxa	%g2, [%g2] ASI_DMMU_DEMAP	! Do the demap
	stxa	%g2, [%g2] ASI_IMMU_DEMAP	! Do the demap
#endif
	flush	%g7
	stxa	%g6, [%g5] ASI_DMMU		! Restore secondary context
	membar	#Sync
	IPIEVC_INC(IPI_EVCNT_TLB_PTE,%g2,%g3)
	 
	ba,a	ret_from_intr_vector
	 nop

ENTRY(sparc64_ipi_flush_pte_usiii)
	andn	%g2, 0xfff, %g2			! drop unused va bits
	mov	CTX_PRIMARY, %g5
	ldxa	[%g5] ASI_DMMU, %g6		! Save primary context
	sethi	%hi(KERNBASE), %g7
	membar	#LoadStore
	stxa	%g3, [%g5] ASI_DMMU		! Insert context to demap
	membar	#Sync
	or	%g2, DEMAP_PAGE_PRIMARY, %g2
	stxa	%g2, [%g2] ASI_DMMU_DEMAP	! Do the demap
	stxa	%g2, [%g2] ASI_IMMU_DEMAP	! to both TLBs
#ifdef TLB_FLUSH_LOWVA
	srl	%g2, 0, %g2			! and make sure it's both 32- and 64-bit entries
	stxa	%g2, [%g2] ASI_DMMU_DEMAP	! Do the demap
	stxa	%g2, [%g2] ASI_IMMU_DEMAP	! Do the demap
#endif
	membar	#Sync
	flush	%g7
	stxa	%g6, [%g5] ASI_DMMU		! Restore primary context
	membar	#Sync
	flush	%g7
	IPIEVC_INC(IPI_EVCNT_TLB_PTE,%g2,%g3)

	ba,a	ret_from_intr_vector
	 nop


/*
 * Secondary CPU bootstrap code.
 */
	.text
	.align 32
1:	rd	%pc, %l0
	LDULNG	[%l0 + (4f-1b)], %l1
	add	%l0, (6f-1b), %l2
	clr	%l3
2:	cmp	%l3, %l1
	be	CCCR, 3f
	 nop
	ldx	[%l2 + TTE_VPN], %l4
	ldx	[%l2 + TTE_DATA], %l5
	wr	%g0, ASI_DMMU, %asi
	stxa	%l4, [%g0 + TLB_TAG_ACCESS] %asi
	stxa	%l5, [%g0] ASI_DMMU_DATA_IN
	wr	%g0, ASI_IMMU, %asi
	stxa	%l4, [%g0 + TLB_TAG_ACCESS] %asi
	stxa	%l5, [%g0] ASI_IMMU_DATA_IN
	membar	#Sync
	flush	%l4
	add	%l2, PTE_SIZE, %l2
	add	%l3, 1, %l3
	ba	%xcc, 2b
	 nop
3:	LDULNG	[%l0 + (5f-1b)], %l1
	LDULNG	[%l0 + (7f-1b)], %g2	! Load cpu_info address.
	jmpl	%l1, %g0
	 nop

	.align PTRSZ
4:	ULONG	0x0
5:	ULONG	0x0
7:	ULONG	0x0
	_ALIGN
6:

#define DATA(name) \
        .data ; \
        .align PTRSZ ; \
        .globl  name ; \
name:

DATA(mp_tramp_code)
	POINTER	1b
DATA(mp_tramp_code_len)
	ULONG	6b-1b
DATA(mp_tramp_tlb_slots)
	ULONG	4b-1b
DATA(mp_tramp_func)
	ULONG	5b-1b
DATA(mp_tramp_ci)
	ULONG	7b-1b

	.text
	.align 32


/*
 * IPI handler to store the current FPU state.
 * void sparc64_ipi_save_fpstate(void *);
 *
 * On entry:
 *	%g2 = lwp
 */
ENTRY(sparc64_ipi_save_fpstate)
	sethi	%hi(FPLWP), %g1
	LDPTR	[%g1 + %lo(FPLWP)], %g3
	cmp	%g3, %g2
	bne,pn	CCCR, 7f		! skip if fplwp has changed

	 rdpr	%pstate, %g2		! enable FP before we begin
	rd	%fprs, %g5
	wr	%g0, FPRS_FEF, %fprs
	or	%g2, PSTATE_PEF, %g2
	wrpr	%g2, 0, %pstate

	LDPTR	[%g3 + L_FPSTATE], %g3
	stx	%fsr, [%g3 + FS_FSR]	! f->fs_fsr = getfsr();
	rd	%gsr, %g2		! Save %gsr
	st	%g2, [%g3 + FS_GSR]
#if FS_REGS > 0
	add	%g3, FS_REGS, %g3
#endif
#ifdef DIAGNOSTIC
	btst	BLOCK_ALIGN, %g3	! Needs to be re-executed
	bnz,pn	%icc, 6f		! Check alignment
#endif
	 st	%g0, [%g3 + FS_QSIZE - FS_REGS]	! f->fs_qsize = 0;
	btst	FPRS_DL|FPRS_DU, %g5	! Both FPU halves clean?
	bz,pt	%icc, 5f		! Then skip it

	 mov	CTX_PRIMARY, %g2
	ldxa	[%g2] ASI_DMMU, %g6
	membar	#LoadStore
	stxa	%g0, [%g2] ASI_DMMU	! Switch MMU to kernel primary context
	membar	#Sync

	btst	FPRS_DL, %g5		! Lower FPU clean?
	bz,a,pt	%icc, 1f		! Then skip it, but upper FPU not clean
	 add	%g3, 2*BLOCK_SIZE, %g3	! Skip a block

	stda	%f0, [%g3] ASI_BLK_P	! f->fs_f0 = etc;
	inc	BLOCK_SIZE, %g3
	stda	%f16, [%g3] ASI_BLK_P

	btst	FPRS_DU, %g5		! Upper FPU clean?
	bz,pt	%icc, 2f		! Then skip it
	 inc	BLOCK_SIZE, %g3
1:
	stda	%f32, [%g3] ASI_BLK_P
	inc	BLOCK_SIZE, %g3
	stda	%f48, [%g3] ASI_BLK_P
2:
	membar	#Sync			! Finish operation so we can
	brz,pn	%g6, 5f			! Skip if context 0
	 nop
	stxa	%g6, [%g2] ASI_DMMU	! Restore primary context
	membar	#Sync
5:
	wr	%g0, FPRS_FEF, %fprs	! Mark FPU clean
	STPTR	%g0, [%g1 + %lo(FPLWP)]	! fplwp = NULL
7:
	IPIEVC_INC(IPI_EVCNT_FPU_SYNCH,%g2,%g3)
	ba,a	ret_from_intr_vector
	 nop

#ifdef DIAGNOSTIC
	!!
	!! Damn thing is *NOT* aligned on a 64-byte boundary
	!! 
6:
	wr	%g0, FPRS_FEF, %fprs
	! XXX -- we should panic instead of silently entering debugger
	ta	1
	 nop
	ba,a	ret_from_intr_vector
	 nop
#endif

/*
 * IPI handler to drop the current FPU state.
 * void sparc64_ipi_drop_fpstate(void *);
 *
 * On entry:
 *	%g2 = lwp
 */
ENTRY(sparc64_ipi_drop_fpstate)
	rdpr	%pstate, %g1
	wr	%g0, FPRS_FEF, %fprs
	or	%g1, PSTATE_PEF, %g1
	wrpr	%g1, 0, %pstate
	set	FPLWP, %g1
	CASPTR	[%g1] ASI_N, %g2, %g0	! fplwp = NULL if fplwp == %g2
	membar	#Sync			! Should not be needed due to retry
	IPIEVC_INC(IPI_EVCNT_FPU_FLUSH,%g2,%g3)
	ba,a	ret_from_intr_vector
	 nop

/*
 * IPI handler to drop the current FPU state.
 * void sparc64_ipi_dcache_flush_page_usiii(paddr_t pa, int line_size)
 * void sparc64_ipi_dcache_flush_page_us(paddr_t pa, int line_size)
 *
 * On entry:
 *	%g2 = pa
 *	%g3 = line_size
 */
ENTRY(sparc64_ipi_dcache_flush_page_usiii)
	set	NBPG, %g1
	add	%g2, %g1, %g1	! end address

1:
	stxa	%g0, [%g2] ASI_DCACHE_INVALIDATE
	add	%g2, %g3, %g2
	cmp	%g2, %g1
	bl,pt	%xcc, 1b
	 nop

	sethi	%hi(KERNBASE), %g5
	flush	%g5
	membar	#Sync
	ba,a	ret_from_intr_vector
	 nop

ENTRY(sparc64_ipi_dcache_flush_page_us)
	mov	-1, %g1		! Generate mask for tag: bits [29..2]
	srlx	%g2, 13-2, %g5	! Tag is PA bits <40:13> in bits <29:2>
	clr	%g4
	srl	%g1, 2, %g1	! Now we have bits <29:0> set
	set	(2*NBPG), %g7
	ba,pt	%icc, 1f
	 andn	%g1, 3, %g1	! Now we have bits <29:2> set

	.align 8
1:
	ldxa	[%g4] ASI_DCACHE_TAG, %g6
	mov	%g4, %g2
	deccc	32, %g7
	bl,pn	%icc, 2f
	 inc	32, %g4

	xor	%g6, %g5, %g6
	andcc	%g6, %g1, %g0
	bne,pt	%xcc, 1b
	 membar	#LoadStore

	stxa	%g0, [%g2] ASI_DCACHE_TAG
	ba,pt	%icc, 1b
	 membar	#StoreLoad
2:

	sethi	%hi(KERNBASE), %g5
	flush	%g5
	membar	#Sync
	ba,a	ret_from_intr_vector
	 nop
#endif


cvs diff -r1.329 -r1.330 src/sys/arch/sparc64/sparc64/locore.s (expand / switch to unified diff)

--- src/sys/arch/sparc64/sparc64/locore.s 2010/05/17 11:51:10 1.329
+++ src/sys/arch/sparc64/sparc64/locore.s 2010/05/23 18:49:14 1.330
@@ -1,14 +1,14 @@ @@ -1,14 +1,14 @@
1/* $NetBSD: locore.s,v 1.329 2010/05/17 11:51:10 martin Exp $ */ 1/* $NetBSD: locore.s,v 1.330 2010/05/23 18:49:14 martin Exp $ */
2 2
3/* 3/*
4 * Copyright (c) 2006-2010 Matthew R. Green 4 * Copyright (c) 2006-2010 Matthew R. Green
5 * Copyright (c) 1996-2002 Eduardo Horvath 5 * Copyright (c) 1996-2002 Eduardo Horvath
6 * Copyright (c) 1996 Paul Kranenburg 6 * Copyright (c) 1996 Paul Kranenburg
7 * Copyright (c) 1996 7 * Copyright (c) 1996
8 * The President and Fellows of Harvard College. 8 * The President and Fellows of Harvard College.
9 * All rights reserved. 9 * All rights reserved.
10 * Copyright (c) 1992, 1993 10 * Copyright (c) 1992, 1993
11 * The Regents of the University of California. 11 * The Regents of the University of California.
12 * All rights reserved. 12 * All rights reserved.
13 * 13 *
14 * This software was developed by the Computer Systems Engineering group 14 * This software was developed by the Computer Systems Engineering group
@@ -81,166 +81,44 @@ @@ -81,166 +81,44 @@
81#include <machine/psl.h> 81#include <machine/psl.h>
82#include <machine/signal.h> 82#include <machine/signal.h>
83#include <machine/trap.h> 83#include <machine/trap.h>
84#include <machine/frame.h> 84#include <machine/frame.h>
85#include <machine/pte.h> 85#include <machine/pte.h>
86#include <machine/pmap.h> 86#include <machine/pmap.h>
87#include <machine/intr.h> 87#include <machine/intr.h>
88#include <machine/asm.h> 88#include <machine/asm.h>
89#include <machine/locore.h> 89#include <machine/locore.h>
90#include <sys/syscall.h> 90#include <sys/syscall.h>
91 91
92#include "ksyms.h" 92#include "ksyms.h"
93 93
94/* A few convenient abbreviations for trapframe fields. */ 
95#define TF_G TF_GLOBAL 
96#define TF_O TF_OUT 
97#define TF_L TF_LOCAL 
98#define TF_I TF_IN 
99 
100/* Let us use same syntax as C code */ 
101#define Debugger() ta 1; nop 
102 
103#if 1 94#if 1
104/* 95/*
105 * Try to issue an elf note to ask the Solaris 96 * Try to issue an elf note to ask the Solaris
106 * bootloader to align the kernel properly. 97 * bootloader to align the kernel properly.
107 */ 98 */
108 .section .note 99 .section .note
109 .word 0x0d 100 .word 0x0d
110 .word 4 ! Dunno why 101 .word 4 ! Dunno why
111 .word 1 102 .word 1
1120: .asciz "SUNW Solaris" 1030: .asciz "SUNW Solaris"
1131: 1041:
114 .align 4 105 .align 4
115 .word 0x0400000 106 .word 0x0400000
116#endif 107#endif
117 108
118 .register %g2,#scratch 109 .register %g2,#scratch
119 .register %g3,#scratch 110 .register %g3,#scratch
120 111
121/* 
122 * This macro will clear out a cache line before an explicit 
123 * access to that location. It's mostly used to make certain 
124 * loads bypassing the D$ do not get stale D$ data. 
125 * 
126 * It uses a register with the address to clear and a temporary 
127 * which is destroyed. 
128 */ 
129#ifdef DCACHE_BUG 
130#define DLFLUSH(a,t) \ 
131 andn a, 0x3f, t; \ 
132 stxa %g0, [ t ] ASI_DCACHE_TAG; \ 
133 membar #Sync 
134/* The following can be used if the pointer is 32-byte aligned */ 
135#define DLFLUSH2(t) \ 
136 stxa %g0, [ t ] ASI_DCACHE_TAG; \ 
137 membar #Sync 
138#else 
139#define DLFLUSH(a,t) 
140#define DLFLUSH2(t) 
141#endif 
142 
143 
144/* 
145 * Combine 2 regs -- used to convert 64-bit ILP32 
146 * values to LP64. 
147 */ 
148#define COMBINE(r1, r2, d) \ 
149 sllx r1, 32, d; \ 
150 or d, r2, d 
151 
152/* 
153 * Split 64-bit value in 1 reg into high and low halves. 
154 * Used for ILP32 return values. 
155 */ 
156#define SPLIT(r0, r1) \ 
157 srl r0, 0, r1; \ 
158 srlx r0, 32, r0 
159 
160 
161/* 
162 * A handy macro for maintaining instrumentation counters. 
163 * Note that this clobbers %o0, %o1 and %o2. Normal usage is 
164 * something like: 
165 * foointr: 
166 * TRAP_SETUP(...) ! makes %o registers safe 
167 * INCR(_C_LABEL(cnt)+V_FOO) ! count a foo 
168 */ 
169#define INCR(what) \ 
170 sethi %hi(what), %o0; \ 
171 or %o0, %lo(what), %o0; \ 
17299: \ 
173 lduw [%o0], %o1; \ 
174 add %o1, 1, %o2; \ 
175 casa [%o0] ASI_P, %o1, %o2; \ 
176 cmp %o1, %o2; \ 
177 bne,pn %icc, 99b; \ 
178 nop 
179 
180/* 
181 * A couple of handy macros to save and restore globals to/from 
182 * locals. Since udivrem uses several globals, and it's called 
183 * from vsprintf, we need to do this before and after doing a printf. 
184 */ 
185#define GLOBTOLOC \ 
186 mov %g1, %l1; \ 
187 mov %g2, %l2; \ 
188 mov %g3, %l3; \ 
189 mov %g4, %l4; \ 
190 mov %g5, %l5; \ 
191 mov %g6, %l6; \ 
192 mov %g7, %l7 
193 
194#define LOCTOGLOB \ 
195 mov %l1, %g1; \ 
196 mov %l2, %g2; \ 
197 mov %l3, %g3; \ 
198 mov %l4, %g4; \ 
199 mov %l5, %g5; \ 
200 mov %l6, %g6; \ 
201 mov %l7, %g7 
202 
203/* Load strings address into register; NOTE: hidden local label 99 */ 
204#define LOAD_ASCIZ(reg, s) \ 
205 set 99f, reg ; \ 
206 .data ; \ 
20799: .asciz s ; \ 
208 _ALIGN ; \ 
209 .text 
210 
211/* 
212 * Handy stack conversion macros. 
213 * They correctly switch to requested stack type 
214 * regardless of the current stack. 
215 */ 
216 
217#define TO_STACK64(size) \ 
218 save %sp, size, %sp; \ 
219 add %sp, -BIAS, %o0; /* Convert to 64-bits */ \ 
220 andcc %sp, 1, %g0; /* 64-bit stack? */ \ 
221 movz %icc, %o0, %sp 
222 
223#define TO_STACK32(size) \ 
224 save %sp, size, %sp; \ 
225 add %sp, +BIAS, %o0; /* Convert to 32-bits */ \ 
226 andcc %sp, 1, %g0; /* 64-bit stack? */ \ 
227 movnz %icc, %o0, %sp 
228 
229#ifdef _LP64 
230#define STACKFRAME(size) TO_STACK64(size) 
231#else 
232#define STACKFRAME(size) TO_STACK32(size) 
233#endif 
234 112
235 .data 113 .data
236 .globl _C_LABEL(data_start) 114 .globl _C_LABEL(data_start)
237_C_LABEL(data_start): ! Start of data segment 115_C_LABEL(data_start): ! Start of data segment
238 116
239#ifdef KGDB 117#ifdef KGDB
240/* 118/*
241 * Another item that must be aligned, easiest to put it here. 119 * Another item that must be aligned, easiest to put it here.
242 */ 120 */
243KGDB_STACK_SIZE = 2048 121KGDB_STACK_SIZE = 2048
244 .globl _C_LABEL(kgdb_stack) 122 .globl _C_LABEL(kgdb_stack)
245_C_LABEL(kgdb_stack): 123_C_LABEL(kgdb_stack):
246 .space KGDB_STACK_SIZE ! hope this is enough 124 .space KGDB_STACK_SIZE ! hope this is enough
@@ -3272,26 +3150,27 @@ setup_sparcintr: @@ -3272,26 +3150,27 @@ setup_sparcintr:
3272 mov %g1, %o4 3150 mov %g1, %o4
3273 GLOBTOLOC 3151 GLOBTOLOC
3274 clr %g4 3152 clr %g4
3275 call prom_printf 3153 call prom_printf
3276 mov %g6, %o2 3154 mov %g6, %o2
3277 LOCTOGLOB 3155 LOCTOGLOB
3278 restore 3156 restore
327997: 315797:
3280#endif 3158#endif
3281 mov 1, %g7 3159 mov 1, %g7
3282 sll %g7, %g6, %g6 3160 sll %g7, %g6, %g6
3283 wr %g6, 0, SET_SOFTINT ! Invoke a softint 3161 wr %g6, 0, SET_SOFTINT ! Invoke a softint
3284 3162
 3163 .global ret_from_intr_vector
3285ret_from_intr_vector: 3164ret_from_intr_vector:
3286 retry 3165 retry
3287 NOTREACHED 3166 NOTREACHED
3288 3167
32893: 31683:
3290#ifdef NOT_DEBUG /* always do this */ 3169#ifdef NOT_DEBUG /* always do this */
3291 set _C_LABEL(intrdebug), %g6 3170 set _C_LABEL(intrdebug), %g6
3292 ld [%g6], %g6 3171 ld [%g6], %g6
3293 btst INTRDEBUG_SPUR, %g6 3172 btst INTRDEBUG_SPUR, %g6
3294 bz,pt %icc, 97f 3173 bz,pt %icc, 97f
3295 nop 3174 nop
3296#endif 3175#endif
3297#if 1 3176#if 1
@@ -3299,188 +3178,26 @@ ret_from_intr_vector: @@ -3299,188 +3178,26 @@ ret_from_intr_vector:
3299 LOAD_ASCIZ(%o0, "interrupt_vector: spurious vector %lx at pil %d\r\n") 3178 LOAD_ASCIZ(%o0, "interrupt_vector: spurious vector %lx at pil %d\r\n")
3300 mov %g7, %o1 3179 mov %g7, %o1
3301 GLOBTOLOC 3180 GLOBTOLOC
3302 clr %g4 3181 clr %g4
3303 call prom_printf 3182 call prom_printf
3304 rdpr %pil, %o2 3183 rdpr %pil, %o2
3305 LOCTOGLOB 3184 LOCTOGLOB
3306 restore 3185 restore
330797: 318697:
3308#endif 3187#endif
3309 ba,a ret_from_intr_vector 3188 ba,a ret_from_intr_vector
3310 nop ! XXX spitfire bug? 3189 nop ! XXX spitfire bug?
3311 3190
3312#if defined(MULTIPROCESSOR) 
3313/* 
3314 * IPI handler to do nothing, but causes rescheduling.. 
3315 * void sparc64_ipi_nop(void *); 
3316 */ 
3317ENTRY(sparc64_ipi_nop) 
3318 ba,a ret_from_intr_vector 
3319 nop 
3320 
3321/* 
3322 * IPI handler to halt the CPU. Just calls the C vector. 
3323 * void sparc64_ipi_halt(void *); 
3324 */ 
3325ENTRY(sparc64_ipi_halt) 
3326 call _C_LABEL(sparc64_ipi_halt_thiscpu) 
3327 clr %g4 
3328 sir 
3329 
3330/* 
3331 * IPI handler to pause the CPU. We just trap to the debugger if it 
3332 * is configured, otherwise just return. 
3333 */ 
3334ENTRY(sparc64_ipi_pause) 
3335#if defined(DDB) 
3336sparc64_ipi_pause_trap_point: 
3337 ta 1 
3338 nop 
3339#endif 
3340 ba,a ret_from_intr_vector 
3341 nop 
3342 
3343/* 
3344 * Increment IPI event counter, defined in machine/{cpu,intr}.h. 
3345 */ 
3346#define IPIEVC_INC(n,r1,r2) \ 
3347 sethi %hi(CPUINFO_VA+CI_IPIEVC+EVC_SIZE*n), r2; \ 
3348 ldx [r2 + %lo(CPUINFO_VA+CI_IPIEVC+EVC_SIZE*n)], r1; \ 
3349 inc r1; \ 
3350 stx r1, [r2 + %lo(CPUINFO_VA+CI_IPIEVC+EVC_SIZE*n)] 
3351 
3352/* 
3353 * void sparc64_ipi_flush_pte_us(void *); 
3354 * void sparc64_ipi_flush_pte_usiii(void *); 
3355 * 
3356 * IPI handler to flush single pte. We enter here with %tl already 1 
3357 * and PSTATE_IE already disabled, so there's no need to do it again. 
3358 * 
3359 * On entry: 
3360 * %g2 = vaddr_t va 
3361 * %g3 = int ctx 
3362 */ 
3363ENTRY(sparc64_ipi_flush_pte_us) 
3364 srlx %g2, PG_SHIFT4U, %g2 ! drop unused va bits 
3365 mov CTX_SECONDARY, %g5 
3366 sllx %g2, PG_SHIFT4U, %g2 
3367 ldxa [%g5] ASI_DMMU, %g6 ! Save secondary context 
3368 sethi %hi(KERNBASE), %g7 
3369 membar #LoadStore 
3370 stxa %g3, [%g5] ASI_DMMU ! Insert context to demap 
3371 membar #Sync 
3372 or %g2, DEMAP_PAGE_SECONDARY, %g2 ! Demap page from secondary context only 
3373 stxa %g2, [%g2] ASI_DMMU_DEMAP ! Do the demap 
3374 stxa %g2, [%g2] ASI_IMMU_DEMAP ! to both TLBs 
3375#ifdef TLB_FLUSH_LOWVA 
3376 srl %g2, 0, %g2 ! and make sure it's both 32- and 64-bit entries 
3377 stxa %g2, [%g2] ASI_DMMU_DEMAP ! Do the demap 
3378 stxa %g2, [%g2] ASI_IMMU_DEMAP ! Do the demap 
3379#endif 
3380 flush %g7 
3381 stxa %g6, [%g5] ASI_DMMU ! Restore secondary context 
3382 membar #Sync 
3383 IPIEVC_INC(IPI_EVCNT_TLB_PTE,%g2,%g3) 
3384  
3385 ba,a ret_from_intr_vector 
3386 nop 
3387 
3388ENTRY(sparc64_ipi_flush_pte_usiii) 
3389 andn %g2, 0xfff, %g2 ! drop unused va bits 
3390 mov CTX_PRIMARY, %g5 
3391 ldxa [%g5] ASI_DMMU, %g6 ! Save primary context 
3392 sethi %hi(KERNBASE), %g7 
3393 membar #LoadStore 
3394 stxa %g3, [%g5] ASI_DMMU ! Insert context to demap 
3395 membar #Sync 
3396 or %g2, DEMAP_PAGE_PRIMARY, %g2 
3397 stxa %g2, [%g2] ASI_DMMU_DEMAP ! Do the demap 
3398 stxa %g2, [%g2] ASI_IMMU_DEMAP ! to both TLBs 
3399#ifdef TLB_FLUSH_LOWVA 
3400 srl %g2, 0, %g2 ! and make sure it's both 32- and 64-bit entries 
3401 stxa %g2, [%g2] ASI_DMMU_DEMAP ! Do the demap 
3402 stxa %g2, [%g2] ASI_IMMU_DEMAP ! Do the demap 
3403#endif 
3404 membar #Sync 
3405 flush %g7 
3406 stxa %g6, [%g5] ASI_DMMU ! Restore primary context 
3407 membar #Sync 
3408 flush %g7 
3409 IPIEVC_INC(IPI_EVCNT_TLB_PTE,%g2,%g3) 
3410 
3411 ba,a ret_from_intr_vector 
3412 nop 
3413 
3414 
3415/* 
3416 * Secondary CPU bootstrap code. 
3417 */ 
3418 .text 
3419 .align 32 
34201: rd %pc, %l0 
3421 LDULNG [%l0 + (4f-1b)], %l1 
3422 add %l0, (6f-1b), %l2 
3423 clr %l3 
34242: cmp %l3, %l1 
3425 be CCCR, 3f 
3426 nop 
3427 ldx [%l2 + TTE_VPN], %l4 
3428 ldx [%l2 + TTE_DATA], %l5 
3429 wr %g0, ASI_DMMU, %asi 
3430 stxa %l4, [%g0 + TLB_TAG_ACCESS] %asi 
3431 stxa %l5, [%g0] ASI_DMMU_DATA_IN 
3432 wr %g0, ASI_IMMU, %asi 
3433 stxa %l4, [%g0 + TLB_TAG_ACCESS] %asi 
3434 stxa %l5, [%g0] ASI_IMMU_DATA_IN 
3435 membar #Sync 
3436 flush %l4 
3437 add %l2, PTE_SIZE, %l2 
3438 add %l3, 1, %l3 
3439 ba %xcc, 2b 
3440 nop 
34413: LDULNG [%l0 + (5f-1b)], %l1 
3442 LDULNG [%l0 + (7f-1b)], %g2 ! Load cpu_info address. 
3443 jmpl %l1, %g0 
3444 nop 
3445 
3446 .align PTRSZ 
34474: ULONG 0x0 
34485: ULONG 0x0 
34497: ULONG 0x0 
3450 _ALIGN 
34516: 
3452 
3453#define DATA(name) \ 
3454 .data ; \ 
3455 .align PTRSZ ; \ 
3456 .globl name ; \ 
3457name: 
3458 
3459DATA(mp_tramp_code) 
3460 POINTER 1b 
3461DATA(mp_tramp_code_len) 
3462 ULONG 6b-1b 
3463DATA(mp_tramp_tlb_slots) 
3464 ULONG 4b-1b 
3465DATA(mp_tramp_func) 
3466 ULONG 5b-1b 
3467DATA(mp_tramp_ci) 
3468 ULONG 7b-1b 
3469 
3470 .text 
3471 .align 32 
3472#endif /* MULTIPROCESSOR */ 
3473 
3474/* 3191/*
3475 * Ultra1 and Ultra2 CPUs use soft interrupts for everything. What we do 3192 * Ultra1 and Ultra2 CPUs use soft interrupts for everything. What we do
3476 * on a soft interrupt, is we should check which bits in ASR_SOFTINT(0x16) 3193 * on a soft interrupt, is we should check which bits in ASR_SOFTINT(0x16)
3477 * are set, handle those interrupts, then clear them by setting the 3194 * are set, handle those interrupts, then clear them by setting the
3478 * appropriate bits in ASR_CLEAR_SOFTINT(0x15). 3195 * appropriate bits in ASR_CLEAR_SOFTINT(0x15).
3479 * 3196 *
3480 * We have an array of 8 interrupt vector slots for each of 15 interrupt 3197 * We have an array of 8 interrupt vector slots for each of 15 interrupt
3481 * levels. If a vectored interrupt can be dispatched, the dispatch 3198 * levels. If a vectored interrupt can be dispatched, the dispatch
3482 * routine will place a pointer to an intrhand structure in one of 3199 * routine will place a pointer to an intrhand structure in one of
3483 * the slots. The interrupt handler will go through the list to look 3200 * the slots. The interrupt handler will go through the list to look
3484 * for an interrupt to dispatch. If it finds one it will pull it off 3201 * for an interrupt to dispatch. If it finds one it will pull it off
3485 * the list, free the entry, and call the handler. The code is like 3202 * the list, free the entry, and call the handler. The code is like
3486 * this: 3203 * this:
@@ -4423,27 +4140,32 @@ ENTRY_NOPROFILE(cpu_initialize) /* for c @@ -4423,27 +4140,32 @@ ENTRY_NOPROFILE(cpu_initialize) /* for c
4423 4140
4424 call %o1 ! Call routine 4141 call %o1 ! Call routine
4425 clr %o0 ! our frame arg is ignored 4142 clr %o0 ! our frame arg is ignored
4426 4143
4427 set 1f, %o0 ! Main should never come back here 4144 set 1f, %o0 ! Main should never come back here
4428 call _C_LABEL(panic) 4145 call _C_LABEL(panic)
4429 nop 4146 nop
4430 .data 4147 .data
44311: 41481:
4432 .asciz "main() returned\r\n" 4149 .asciz "main() returned\r\n"
4433 _ALIGN 4150 _ALIGN
4434 .text 4151 .text
4435 4152
4436#if defined(MULTIPROCESSOR) 4153 .align 8
 4154ENTRY(get_romtba)
 4155 retl
 4156 rdpr %tba, %o0
 4157
 4158#ifdef MULTIPROCESSOR
4437 /* 4159 /*
4438 * cpu_mp_startup is called with: 4160 * cpu_mp_startup is called with:
4439 * 4161 *
4440 * %g2 = cpu_args 4162 * %g2 = cpu_args
4441 */ 4163 */
4442ENTRY(cpu_mp_startup) 4164ENTRY(cpu_mp_startup)
4443 mov 1, %o0 4165 mov 1, %o0
4444 sllx %o0, 63, %o0 4166 sllx %o0, 63, %o0
4445 wr %o0, TICK_CMPR ! XXXXXXX clear and disable %tick_cmpr for now 4167 wr %o0, TICK_CMPR ! XXXXXXX clear and disable %tick_cmpr for now
4446 wrpr %g0, 0, %cleanwin 4168 wrpr %g0, 0, %cleanwin
4447 wrpr %g0, 0, %tl ! Make sure we're not in NUCLEUS mode 4169 wrpr %g0, 0, %tl ! Make sure we're not in NUCLEUS mode
4448 wrpr %g0, WSTATE_KERN, %wstate 4170 wrpr %g0, WSTATE_KERN, %wstate
4449 wrpr %g0, PSTATE_KERN, %pstate 4171 wrpr %g0, PSTATE_KERN, %pstate
@@ -4543,32 +4265,27 @@ ENTRY(cpu_mp_startup) @@ -4543,32 +4265,27 @@ ENTRY(cpu_mp_startup)
4543 wrpr %g0, PSTATE_INTR|PSTATE_PEF, %pstate 4265 wrpr %g0, PSTATE_INTR|PSTATE_PEF, %pstate
4544 wr %g0, FPRS_FEF, %fprs ! Turn on FPU 4266 wr %g0, FPRS_FEF, %fprs ! Turn on FPU
4545 4267
4546 call _C_LABEL(cpu_hatch) 4268 call _C_LABEL(cpu_hatch)
4547 clr %g4 4269 clr %g4
4548 4270
4549 b _C_LABEL(idle_loop) 4271 b _C_LABEL(idle_loop)
4550 clr %o0 4272 clr %o0
4551 4273
4552 NOTREACHED 4274 NOTREACHED
4553 4275
4554 .globl cpu_mp_startup_end 4276 .globl cpu_mp_startup_end
4555cpu_mp_startup_end: 4277cpu_mp_startup_end:
4556#endif /* MULTIPROCESSOR */ 4278#endif
4557 
4558 .align 8 
4559ENTRY(get_romtba) 
4560 retl 
4561 rdpr %tba, %o0 
4562 4279
4563/* 4280/*
4564 * openfirmware(cell* param); 4281 * openfirmware(cell* param);
4565 * 4282 *
4566 * OpenFirmware entry point 4283 * OpenFirmware entry point
4567 * 4284 *
4568 * If we're running in 32-bit mode we need to convert to a 64-bit stack 4285 * If we're running in 32-bit mode we need to convert to a 64-bit stack
4569 * and 64-bit cells. The cells we'll allocate off the stack for simplicity. 4286 * and 64-bit cells. The cells we'll allocate off the stack for simplicity.
4570 */ 4287 */
4571 .align 8 4288 .align 8
4572ENTRY(openfirmware) 4289ENTRY(openfirmware)
4573 sethi %hi(romp), %o4 4290 sethi %hi(romp), %o4
4574 andcc %sp, 1, %g0 4291 andcc %sp, 1, %g0
@@ -5327,700 +5044,95 @@ _C_LABEL(esigcode): @@ -5327,700 +5044,95 @@ _C_LABEL(esigcode):
5327#if !defined(_LP64) 5044#if !defined(_LP64)
5328 5045
5329#define SIGCODE_NAME sigcode 5046#define SIGCODE_NAME sigcode
5330#define ESIGCODE_NAME esigcode 5047#define ESIGCODE_NAME esigcode
5331#define SIGRETURN_NAME SYS_compat_16___sigreturn14 5048#define SIGRETURN_NAME SYS_compat_16___sigreturn14
5332#define EXIT_NAME SYS_exit 5049#define EXIT_NAME SYS_exit
5333 5050
5334#include "sigcode32.s" 5051#include "sigcode32.s"
5335 5052
5336#endif 5053#endif
5337#endif 5054#endif
5338 5055
5339/* 5056/*
5340 * Primitives 
5341 */ 
5342#ifdef ENTRY 
5343#undef ENTRY 
5344#endif 
5345 
5346#ifdef GPROF 
5347 .globl _mcount 
5348#define ENTRY(x) \ 
5349 .globl _C_LABEL(x); .proc 1; .type _C_LABEL(x),@function; \ 
5350_C_LABEL(x): ; \ 
5351 .data; \ 
5352 .align 8; \ 
53530: .uaword 0; .uaword 0; \ 
5354 .text; \ 
5355 save %sp, -CC64FSZ, %sp; \ 
5356 sethi %hi(0b), %o0; \ 
5357 call _mcount; \ 
5358 or %o0, %lo(0b), %o0; \ 
5359 restore 
5360#else 
5361#define ENTRY(x) .globl _C_LABEL(x); .proc 1; \ 
5362 .type _C_LABEL(x),@function; _C_LABEL(x): 
5363#endif 
5364#define ALTENTRY(x) .globl _C_LABEL(x); _C_LABEL(x): 
5365 
5366/* 
5367 * getfp() - get stack frame pointer 5057 * getfp() - get stack frame pointer
5368 */ 5058 */
5369ENTRY(getfp) 5059ENTRY(getfp)
5370 retl 5060 retl
5371 mov %fp, %o0 5061 mov %fp, %o0
5372 5062
5373/* 5063/*
5374 * copyinstr(fromaddr, toaddr, maxlength, &lencopied) 5064 * nothing MD to do in the idle loop
5375 * 
5376 * Copy a null terminated string from the user address space into 
5377 * the kernel address space. 
5378 */ 
5379ENTRY(copyinstr) 
5380 ! %o0 = fromaddr, %o1 = toaddr, %o2 = maxlen, %o3 = &lencopied 
5381#ifdef NOTDEF_DEBUG 
5382 save %sp, -CC64FSZ, %sp 
5383 set 8f, %o0 
5384 mov %i0, %o1 
5385 mov %i1, %o2 
5386 mov %i2, %o3 
5387 call printf 
5388 mov %i3, %o4 
5389 restore 
5390 .data 
53918: .asciz "copyinstr: from=%x to=%x max=%x &len=%x\n" 
5392 _ALIGN 
5393 .text 
5394#endif 
5395 brgz,pt %o2, 1f ! Make sure len is valid 
5396 sethi %hi(CPCB), %o4 ! (first instr of copy) 
5397 retl 
5398 mov ENAMETOOLONG, %o0 
53991: 
5400 LDPTR [%o4 + %lo(CPCB)], %o4 ! catch faults 
5401 set Lcsdone, %o5 
5402 membar #Sync 
5403 STPTR %o5, [%o4 + PCB_ONFAULT] 
5404 
5405 mov %o1, %o5 ! save = toaddr; 
5406! XXX should do this in bigger chunks when possible 
54070: ! loop: 
5408 ldsba [%o0] ASI_AIUS, %g1 ! c = *fromaddr; 
5409 stb %g1, [%o1] ! *toaddr++ = c; 
5410 inc %o1 
5411 brz,a,pn %g1, Lcsdone ! if (c == NULL) 
5412 clr %o0 ! { error = 0; done; } 
5413 deccc %o2 ! if (--len > 0) { 
5414 bg,pt %icc, 0b ! fromaddr++; 
5415 inc %o0 ! goto loop; 
5416 ba,pt %xcc, Lcsdone ! } 
5417 mov ENAMETOOLONG, %o0 ! error = ENAMETOOLONG; 
5418 NOTREACHED 
5419 
5420/* 
5421 * copyoutstr(fromaddr, toaddr, maxlength, &lencopied) 
5422 * 
5423 * Copy a null terminated string from the kernel 
5424 * address space to the user address space. 
5425 */ 5065 */
5426ENTRY(copyoutstr) 5066ENTRY(cpu_idle)
5427 ! %o0 = fromaddr, %o1 = toaddr, %o2 = maxlen, %o3 = &lencopied 
5428#ifdef NOTDEF_DEBUG 
5429 save %sp, -CC64FSZ, %sp 
5430 set 8f, %o0 
5431 mov %i0, %o1 
5432 mov %i1, %o2 
5433 mov %i2, %o3 
5434 call printf 
5435 mov %i3, %o4 
5436 restore 
5437 .data 
54388: .asciz "copyoutstr: from=%x to=%x max=%x &len=%x\n" 
5439 _ALIGN 
5440 .text 
5441#endif 
5442 brgz,pt %o2, 1f ! Make sure len is valid 
5443 sethi %hi(CPCB), %o4 ! (first instr of copy) 
5444 retl 
5445 mov ENAMETOOLONG, %o0 
54461: 
5447 LDPTR [%o4 + %lo(CPCB)], %o4 ! catch faults 
5448 set Lcsdone, %o5 
5449 membar #Sync 
5450 STPTR %o5, [%o4 + PCB_ONFAULT] 
5451 
5452 mov %o1, %o5 ! save = toaddr; 
5453! XXX should do this in bigger chunks when possible 
54540: ! loop: 
5455 ldsb [%o0], %g1 ! c = *fromaddr; 
5456 stba %g1, [%o1] ASI_AIUS ! *toaddr++ = c; 
5457 inc %o1 
5458 brz,a,pn %g1, Lcsdone ! if (c == NULL) 
5459 clr %o0 ! { error = 0; done; } 
5460 deccc %o2 ! if (--len > 0) { 
5461 bg,pt %icc, 0b ! fromaddr++; 
5462 inc %o0 ! goto loop; 
5463 ! } 
5464 mov ENAMETOOLONG, %o0 ! error = ENAMETOOLONG; 
5465Lcsdone: ! done: 
5466 sub %o1, %o5, %o1 ! len = to - save; 
5467 brnz,a %o3, 1f ! if (lencopied) 
5468 STPTR %o1, [%o3] ! *lencopied = len; 
54691: 
5470 retl ! cpcb->pcb_onfault = 0; 
5471 STPTR %g0, [%o4 + PCB_ONFAULT]! return (error); 
5472 
5473/* 
5474 * copystr(fromaddr, toaddr, maxlength, &lencopied) 
5475 * 
5476 * Copy a null terminated string from one point to another in 
5477 * the kernel address space. (This is a leaf procedure, but 
5478 * it does not seem that way to the C compiler.) 
5479 */ 
5480ENTRY(copystr) 
5481 brgz,pt %o2, 0f ! Make sure len is valid 
5482 mov %o1, %o5 ! to0 = to; 
5483 retl 
5484 mov ENAMETOOLONG, %o0 
54850: ! loop: 
5486 ldsb [%o0], %o4 ! c = *from; 
5487 tst %o4 
5488 stb %o4, [%o1] ! *to++ = c; 
5489 be 1f ! if (c == 0) 
5490 inc %o1 ! goto ok; 
5491 deccc %o2 ! if (--len > 0) { 
5492 bg,a 0b ! from++; 
5493 inc %o0 ! goto loop; 
5494 b 2f ! } 
5495 mov ENAMETOOLONG, %o0 ! ret = ENAMETOOLONG; goto done; 
54961: ! ok: 
5497 clr %o0 ! ret = 0; 
54982: 
5499 sub %o1, %o5, %o1 ! len = to - to0; 
5500 tst %o3 ! if (lencopied) 
5501 bnz,a 3f 
5502 STPTR %o1, [%o3] ! *lencopied = len; 
55033: 
5504 retl 5067 retl
5505 nop 5068 nop
5506#ifdef DIAGNOSTIC 
55074: 
5508 sethi %hi(5f), %o0 
5509 call _C_LABEL(panic) 
5510 or %lo(5f), %o0, %o0 
5511 .data 
55125: 
5513 .asciz "copystr" 
5514 _ALIGN 
5515 .text 
5516#endif 
5517 5069
5518/* 5070/*
5519 * copyin(src, dst, len) 5071 * cpu_switchto() switches to an lwp to run and runs it, saving the
5520 * 5072 * current one away.
5521 * Copy specified amount of data from user space into the kernel. 
5522 * 5073 *
5523 * This is a modified version of memcpy that uses ASI_AIUS. When 5074 * stuct lwp * cpu_switchto(struct lwp *current, struct lwp *next)
5524 * memcpy is optimized to use block copy ASIs, this should be also. 5075 * Switch to the specified next LWP
 5076 * Arguments:
 5077 * i0 'struct lwp *' of the current LWP
 5078 * i1 'struct lwp *' of the LWP to switch to
 5079 * Returns:
 5080 * the old lwp switched away from
5525 */ 5081 */
5526 5082ENTRY(cpu_switchto)
5527ENTRY(copyin) 
5528! flushw ! Make sure we don't have stack probs & lose hibits of %o 
5529#ifdef NOTDEF_DEBUG 
5530 save %sp, -CC64FSZ, %sp 5083 save %sp, -CC64FSZ, %sp
5531 set 1f, %o0 
5532 mov %i0, %o1 
5533 mov %i1, %o2 
5534 call printf 
5535 mov %i2, %o3 
5536 restore 
5537 .data 
55381: .asciz "copyin: src=%x dest=%x len=%x\n" 
5539 _ALIGN 
5540 .text 
5541#endif 
5542 sethi %hi(CPCB), %o3 
5543 wr %g0, ASI_AIUS, %asi 
5544 LDPTR [%o3 + %lo(CPCB)], %o3 
5545 set Lcopyfault, %o4 
5546! mov %o7, %g7 ! save return address 
5547 membar #Sync 
5548 STPTR %o4, [%o3 + PCB_ONFAULT] 
5549 cmp %o2, BCOPY_SMALL 
5550Lcopyin_start: 
5551 bge,a Lcopyin_fancy ! if >= this many, go be fancy. 
5552 btst 7, %o0 ! (part of being fancy) 
5553 
5554 /* 5084 /*
5555 * Not much to copy, just do it a byte at a time. 5085 * REGISTER USAGE AT THIS POINT:
5556 */ 5086 * %l1 = newpcb
5557 deccc %o2 ! while (--len >= 0) 5087 * %l3 = new trapframe
5558 bl 1f 5088 * %l4 = new l->l_proc
55590: 5089 * %l5 = pcb of oldlwp
5560 inc %o0 5090 * %l6 = %hi(CPCB)
5561 ldsba [%o0 - 1] %asi, %o4! *dst++ = (++src)[-1]; 5091 * %l7 = %hi(CURLWP)
5562 stb %o4, [%o1] 5092 * %i0 = oldlwp
5563 deccc %o2 5093 * %i1 = lwp
5564 bge 0b 5094 * %o0 = tmp 1
5565 inc %o1 5095 * %o1 = tmp 2
55661: 5096 * %o2 = tmp 3
5567 ba Lcopyin_done 5097 * %o3 = tmp 4
5568 clr %o0 
5569 NOTREACHED 
5570 
5571 /* 
5572 * Plenty of data to copy, so try to do it optimally. 
5573 */ 5098 */
5574Lcopyin_fancy: 
5575 ! check for common case first: everything lines up. 
5576! btst 7, %o0 ! done already 
5577 bne 1f 
5578 .empty 
5579 btst 7, %o1 
5580 be,a Lcopyin_doubles 
5581 dec 8, %o2 ! if all lined up, len -= 8, goto copyin_doubes 
5582 
5583 ! If the low bits match, we can make these line up. 
55841: 
5585 xor %o0, %o1, %o3 ! t = src ^ dst; 
5586 btst 1, %o3 ! if (t & 1) { 
5587 be,a 1f 
5588 btst 1, %o0 ! [delay slot: if (src & 1)] 
5589 5099
5590 ! low bits do not match, must copy by bytes. 5100 flushw ! save all register windows except this one
55910: 5101 wrpr %g0, PSTATE_KERN, %pstate ! make sure we're on normal globals
5592 ldsba [%o0] %asi, %o4 ! do { 5102 ! with traps turned off
5593 inc %o0 ! (++dst)[-1] = *src++; 
5594 inc %o1 
5595 deccc %o2 
5596 bnz 0b ! } while (--len != 0); 
5597 stb %o4, [%o1 - 1] 
5598 ba Lcopyin_done 
5599 clr %o0 
5600 NOTREACHED 
5601 5103
5602 ! lowest bit matches, so we can copy by words, if nothing else 5104 brz,pn %i0, 1f
56031: 5105 sethi %hi(CPCB), %l6
5604 be,a 1f ! if (src & 1) { 
5605 btst 2, %o3 ! [delay slot: if (t & 2)] 
5606 5106
5607 ! although low bits match, both are 1: must copy 1 byte to align 5107 rdpr %pstate, %o1 ! oldpstate = %pstate;
5608 ldsba [%o0] %asi, %o4 ! *dst++ = *src++; 5108 LDPTR [%i0 + L_PCB], %l5
5609 stb %o4, [%o1] 
5610 inc %o0 
5611 inc %o1 
5612 dec %o2 ! len--; 
5613 btst 2, %o3 ! } [if (t & 2)] 
56141: 
5615 be,a 1f ! if (t & 2) { 
5616 btst 2, %o0 ! [delay slot: if (src & 2)] 
5617 dec 2, %o2 ! len -= 2; 
56180: 
5619 ldsha [%o0] %asi, %o4 ! do { 
5620 sth %o4, [%o1] ! *(short *)dst = *(short *)src; 
5621 inc 2, %o0 ! dst += 2, src += 2; 
5622 deccc 2, %o2 ! } while ((len -= 2) >= 0); 
5623 bge 0b 
5624 inc 2, %o1 
5625 b Lcopyin_mopb ! goto mop_up_byte; 
5626 btst 1, %o2 ! } [delay slot: if (len & 1)] 
5627 NOTREACHED 
5628 5109
5629 ! low two bits match, so we can copy by longwords 5110 stx %i7, [%l5 + PCB_PC]
56301: 5111 stx %i6, [%l5 + PCB_SP]
5631 be,a 1f ! if (src & 2) { 5112 sth %o1, [%l5 + PCB_PSTATE]
5632 btst 4, %o3 ! [delay slot: if (t & 4)] 
5633 5113
5634 ! although low 2 bits match, they are 10: must copy one short to align 5114 rdpr %cwp, %o2 ! Useless
5635 ldsha [%o0] %asi, %o4 ! (*short *)dst = *(short *)src; 5115 stb %o2, [%l5 + PCB_CWP]
5636 sth %o4, [%o1] 
5637 inc 2, %o0 ! dst += 2; 
5638 inc 2, %o1 ! src += 2; 
5639 dec 2, %o2 ! len -= 2; 
5640 btst 4, %o3 ! } [if (t & 4)] 
56411: 
5642 be,a 1f ! if (t & 4) { 
5643 btst 4, %o0 ! [delay slot: if (src & 4)] 
5644 dec 4, %o2 ! len -= 4; 
56450: 
5646 lduwa [%o0] %asi, %o4 ! do { 
5647 st %o4, [%o1] ! *(int *)dst = *(int *)src; 
5648 inc 4, %o0 ! dst += 4, src += 4; 
5649 deccc 4, %o2 ! } while ((len -= 4) >= 0); 
5650 bge 0b 
5651 inc 4, %o1 
5652 b Lcopyin_mopw ! goto mop_up_word_and_byte; 
5653 btst 2, %o2 ! } [delay slot: if (len & 2)] 
5654 NOTREACHED 
5655 5116
5656 ! low three bits match, so we can copy by doublewords 
56571: 51171:
5658 be 1f ! if (src & 4) { 5118 sethi %hi(CURLWP), %l7
5659 dec 8, %o2 ! [delay slot: len -= 8] 
5660 lduwa [%o0] %asi, %o4 ! *(int *)dst = *(int *)src; 
5661 st %o4, [%o1] 
5662 inc 4, %o0 ! dst += 4, src += 4, len -= 4; 
5663 inc 4, %o1 
5664 dec 4, %o2 ! } 
56651: 
5666Lcopyin_doubles: 
5667 ldxa [%o0] %asi, %g1 ! do { 
5668 stx %g1, [%o1] ! *(double *)dst = *(double *)src; 
5669 inc 8, %o0 ! dst += 8, src += 8; 
5670 deccc 8, %o2 ! } while ((len -= 8) >= 0); 
5671 bge Lcopyin_doubles 
5672 inc 8, %o1 
5673 
5674 ! check for a usual case again (save work) 
5675 btst 7, %o2 ! if ((len & 7) == 0) 
5676 be Lcopyin_done ! goto copyin_done; 
5677 
5678 btst 4, %o2 ! if ((len & 4)) == 0) 
5679 be,a Lcopyin_mopw ! goto mop_up_word_and_byte; 
5680 btst 2, %o2 ! [delay slot: if (len & 2)] 
5681 lduwa [%o0] %asi, %o4 ! *(int *)dst = *(int *)src; 
5682 st %o4, [%o1] 
5683 inc 4, %o0 ! dst += 4; 
5684 inc 4, %o1 ! src += 4; 
5685 btst 2, %o2 ! } [if (len & 2)] 
5686 
56871: 
5688 ! mop up trailing word (if present) and byte (if present). 
5689Lcopyin_mopw: 
5690 be Lcopyin_mopb ! no word, go mop up byte 
5691 btst 1, %o2 ! [delay slot: if (len & 1)] 
5692 ldsha [%o0] %asi, %o4 ! *(short *)dst = *(short *)src; 
5693 be Lcopyin_done ! if ((len & 1) == 0) goto done; 
5694 sth %o4, [%o1] 
5695 ldsba [%o0 + 2] %asi, %o4 ! dst[2] = src[2]; 
5696 stb %o4, [%o1 + 2] 
5697 ba Lcopyin_done 
5698 clr %o0 
5699 NOTREACHED 
5700 5119
5701 ! mop up trailing byte (if present). 5120 LDPTR [%i1 + L_PCB], %l1 ! newpcb = l->l_pcb;
5702Lcopyin_mopb: 
5703 be,a Lcopyin_done 
5704 nop 
5705 ldsba [%o0] %asi, %o4 
5706 stb %o4, [%o1] 
5707 
5708Lcopyin_done: 
5709 sethi %hi(CPCB), %o3 
5710! stb %o4,[%o1] ! Store last byte -- should not be needed 
5711 LDPTR [%o3 + %lo(CPCB)], %o3 
5712 membar #Sync 
5713 STPTR %g0, [%o3 + PCB_ONFAULT] 
5714 wr %g0, ASI_PRIMARY_NOFAULT, %asi ! Restore ASI 
5715 retl 
5716 clr %o0 ! return 0 
5717 5121
5718/* 
5719 * copyout(src, dst, len) 
5720 * 
5721 * Copy specified amount of data from kernel to user space. 
5722 * Just like copyin, except that the `dst' addresses are user space 
5723 * rather than the `src' addresses. 
5724 * 
5725 * This is a modified version of memcpy that uses ASI_AIUS. When 
5726 * memcpy is optimized to use block copy ASIs, this should be also. 
5727 */ 
5728 /* 
5729 * This needs to be reimplemented to really do the copy. 
5730 */ 
5731ENTRY(copyout) 
5732 /* 5122 /*
5733 * ******NOTE****** this depends on memcpy() not using %g7 5123 * Load the new lwp. To load, we must change stacks and
5734 */ 5124 * alter cpcb and the window control registers, hence we must
5735#ifdef NOTDEF_DEBUG 5125 * keep interrupts disabled.
5736 save %sp, -CC64FSZ, %sp 
5737 set 1f, %o0 
5738 mov %i0, %o1 
5739 set CTX_SECONDARY, %o4 
5740 mov %i1, %o2 
5741 ldxa [%o4] ASI_DMMU, %o4 
5742 call printf 
5743 mov %i2, %o3 
5744 restore 
5745 .data 
57461: .asciz "copyout: src=%x dest=%x len=%x ctx=%d\n" 
5747 _ALIGN 
5748 .text 
5749#endif 
5750Ldocopy: 
5751 sethi %hi(CPCB), %o3 
5752 wr %g0, ASI_AIUS, %asi 
5753 LDPTR [%o3 + %lo(CPCB)], %o3 
5754 set Lcopyfault, %o4 
5755! mov %o7, %g7 ! save return address 
5756 membar #Sync 
5757 STPTR %o4, [%o3 + PCB_ONFAULT] 
5758 cmp %o2, BCOPY_SMALL 
5759Lcopyout_start: 
5760 membar #StoreStore 
5761 bge,a Lcopyout_fancy ! if >= this many, go be fancy. 
5762 btst 7, %o0 ! (part of being fancy) 
5763 
5764 /* 
5765 * Not much to copy, just do it a byte at a time. 
5766 */ 
5767 deccc %o2 ! while (--len >= 0) 
5768 bl 1f 
5769 .empty 
57700: 
5771 inc %o0 
5772 ldsb [%o0 - 1], %o4! (++dst)[-1] = *src++; 
5773 stba %o4, [%o1] %asi 
5774 deccc %o2 
5775 bge 0b 
5776 inc %o1 
57771: 
5778 ba Lcopyout_done 
5779 clr %o0 
5780 NOTREACHED 
5781 
5782 /* 
5783 * Plenty of data to copy, so try to do it optimally. 
5784 */ 
5785Lcopyout_fancy: 
5786 ! check for common case first: everything lines up. 
5787! btst 7, %o0 ! done already 
5788 bne 1f 
5789 .empty 
5790 btst 7, %o1 
5791 be,a Lcopyout_doubles 
5792 dec 8, %o2 ! if all lined up, len -= 8, goto copyout_doubes 
5793 
5794 ! If the low bits match, we can make these line up. 
57951: 
5796 xor %o0, %o1, %o3 ! t = src ^ dst; 
5797 btst 1, %o3 ! if (t & 1) { 
5798 be,a 1f 
5799 btst 1, %o0 ! [delay slot: if (src & 1)] 
5800 
5801 ! low bits do not match, must copy by bytes. 
58020: 
5803 ldsb [%o0], %o4 ! do { 
5804 inc %o0 ! (++dst)[-1] = *src++; 
5805 inc %o1 
5806 deccc %o2 
5807 bnz 0b ! } while (--len != 0); 
5808 stba %o4, [%o1 - 1] %asi 
5809 ba Lcopyout_done 
5810 clr %o0 
5811 NOTREACHED 
5812 
5813 ! lowest bit matches, so we can copy by words, if nothing else 
58141: 
5815 be,a 1f ! if (src & 1) { 
5816 btst 2, %o3 ! [delay slot: if (t & 2)] 
5817 
5818 ! although low bits match, both are 1: must copy 1 byte to align 
5819 ldsb [%o0], %o4 ! *dst++ = *src++; 
5820 stba %o4, [%o1] %asi 
5821 inc %o0 
5822 inc %o1 
5823 dec %o2 ! len--; 
5824 btst 2, %o3 ! } [if (t & 2)] 
58251: 
5826 be,a 1f ! if (t & 2) { 
5827 btst 2, %o0 ! [delay slot: if (src & 2)] 
5828 dec 2, %o2 ! len -= 2; 
58290: 
5830 ldsh [%o0], %o4 ! do { 
5831 stha %o4, [%o1] %asi ! *(short *)dst = *(short *)src; 
5832 inc 2, %o0 ! dst += 2, src += 2; 
5833 deccc 2, %o2 ! } while ((len -= 2) >= 0); 
5834 bge 0b 
5835 inc 2, %o1 
5836 b Lcopyout_mopb ! goto mop_up_byte; 
5837 btst 1, %o2 ! } [delay slot: if (len & 1)] 
5838 NOTREACHED 
5839 
5840 ! low two bits match, so we can copy by longwords 
58411: 
5842 be,a 1f ! if (src & 2) { 
5843 btst 4, %o3 ! [delay slot: if (t & 4)] 
5844 
5845 ! although low 2 bits match, they are 10: must copy one short to align 
5846 ldsh [%o0], %o4 ! (*short *)dst = *(short *)src; 
5847 stha %o4, [%o1] %asi 
5848 inc 2, %o0 ! dst += 2; 
5849 inc 2, %o1 ! src += 2; 
5850 dec 2, %o2 ! len -= 2; 
5851 btst 4, %o3 ! } [if (t & 4)] 
58521: 
5853 be,a 1f ! if (t & 4) { 
5854 btst 4, %o0 ! [delay slot: if (src & 4)] 
5855 dec 4, %o2 ! len -= 4; 
58560: 
5857 lduw [%o0], %o4 ! do { 
5858 sta %o4, [%o1] %asi ! *(int *)dst = *(int *)src; 
5859 inc 4, %o0 ! dst += 4, src += 4; 
5860 deccc 4, %o2 ! } while ((len -= 4) >= 0); 
5861 bge 0b 
5862 inc 4, %o1 
5863 b Lcopyout_mopw ! goto mop_up_word_and_byte; 
5864 btst 2, %o2 ! } [delay slot: if (len & 2)] 
5865 NOTREACHED 
5866 
5867 ! low three bits match, so we can copy by doublewords 
58681: 
5869 be 1f ! if (src & 4) { 
5870 dec 8, %o2 ! [delay slot: len -= 8] 
5871 lduw [%o0], %o4 ! *(int *)dst = *(int *)src; 
5872 sta %o4, [%o1] %asi 
5873 inc 4, %o0 ! dst += 4, src += 4, len -= 4; 
5874 inc 4, %o1 
5875 dec 4, %o2 ! } 
58761: 
5877Lcopyout_doubles: 
5878 ldx [%o0], %g1 ! do { 
5879 stxa %g1, [%o1] %asi ! *(double *)dst = *(double *)src; 
5880 inc 8, %o0 ! dst += 8, src += 8; 
5881 deccc 8, %o2 ! } while ((len -= 8) >= 0); 
5882 bge Lcopyout_doubles 
5883 inc 8, %o1 
5884 
5885 ! check for a usual case again (save work) 
5886 btst 7, %o2 ! if ((len & 7) == 0) 
5887 be Lcopyout_done ! goto copyout_done; 
5888 
5889 btst 4, %o2 ! if ((len & 4)) == 0) 
5890 be,a Lcopyout_mopw ! goto mop_up_word_and_byte; 
5891 btst 2, %o2 ! [delay slot: if (len & 2)] 
5892 lduw [%o0], %o4 ! *(int *)dst = *(int *)src; 
5893 sta %o4, [%o1] %asi 
5894 inc 4, %o0 ! dst += 4; 
5895 inc 4, %o1 ! src += 4; 
5896 btst 2, %o2 ! } [if (len & 2)] 
5897 
58981: 
5899 ! mop up trailing word (if present) and byte (if present). 
5900Lcopyout_mopw: 
5901 be Lcopyout_mopb ! no word, go mop up byte 
5902 btst 1, %o2 ! [delay slot: if (len & 1)] 
5903 ldsh [%o0], %o4 ! *(short *)dst = *(short *)src; 
5904 be Lcopyout_done ! if ((len & 1) == 0) goto done; 
5905 stha %o4, [%o1] %asi 
5906 ldsb [%o0 + 2], %o4 ! dst[2] = src[2]; 
5907 stba %o4, [%o1 + 2] %asi 
5908 ba Lcopyout_done 
5909 clr %o0 
5910 NOTREACHED 
5911 
5912 ! mop up trailing byte (if present). 
5913Lcopyout_mopb: 
5914 be,a Lcopyout_done 
5915 nop 
5916 ldsb [%o0], %o4 
5917 stba %o4, [%o1] %asi 
5918 
5919Lcopyout_done: 
5920 sethi %hi(CPCB), %o3 
5921 LDPTR [%o3 + %lo(CPCB)], %o3 
5922 membar #Sync 
5923 STPTR %g0, [%o3 + PCB_ONFAULT] 
5924! jmp %g7 + 8 ! Original instr 
5925 wr %g0, ASI_PRIMARY_NOFAULT, %asi ! Restore ASI 
5926 membar #StoreStore|#StoreLoad 
5927 retl ! New instr 
5928 clr %o0 ! return 0 
5929 
5930! Copyin or copyout fault. Clear cpcb->pcb_onfault and return error. 
5931! Note that although we were in memcpy, there is no state to clean up; 
5932! the only special thing is that we have to return to [g7 + 8] rather than 
5933! [o7 + 8]. 
5934Lcopyfault: 
5935 sethi %hi(CPCB), %o3 
5936 LDPTR [%o3 + %lo(CPCB)], %o3 
5937 STPTR %g0, [%o3 + PCB_ONFAULT] 
5938 membar #StoreStore|#StoreLoad 
5939#ifdef NOTDEF_DEBUG 
5940 save %sp, -CC64FSZ, %sp 
5941 set 1f, %o0 
5942 call printf 
5943 nop 
5944 restore 
5945 .data 
59461: .asciz "copyfault: fault occurred\n" 
5947 _ALIGN 
5948 .text 
5949#endif 
5950 retl 
5951 wr %g0, ASI_PRIMARY_NOFAULT, %asi ! Restore ASI 
5952 
5953ENTRY(cpu_idle) 
5954 retl 
5955 nop 
5956 
5957 
5958/* 
5959 * cpu_switchto() switches to an lwp to run and runs it, saving the 
5960 * current one away. 
5961 * 
5962 * stuct lwp * cpu_switchto(struct lwp *current, struct lwp *next) 
5963 * Switch to the specified next LWP 
5964 * Arguments: 
5965 * i0 'struct lwp *' of the current LWP 
5966 * i1 'struct lwp *' of the LWP to switch to 
5967 * Returns: 
5968 * the old lwp switched away from 
5969 */ 
5970ENTRY(cpu_switchto) 
5971 save %sp, -CC64FSZ, %sp 
5972 /* 
5973 * REGISTER USAGE AT THIS POINT: 
5974 * %l1 = newpcb 
5975 * %l3 = new trapframe 
5976 * %l4 = new l->l_proc 
5977 * %l5 = pcb of oldlwp 
5978 * %l6 = %hi(CPCB) 
5979 * %l7 = %hi(CURLWP) 
5980 * %i0 = oldlwp 
5981 * %i1 = lwp 
5982 * %o0 = tmp 1 
5983 * %o1 = tmp 2 
5984 * %o2 = tmp 3 
5985 * %o3 = tmp 4 
5986 */ 
5987 
5988 flushw ! save all register windows except this one 
5989 wrpr %g0, PSTATE_KERN, %pstate ! make sure we're on normal globals 
5990 ! with traps turned off 
5991 
5992 brz,pn %i0, 1f 
5993 sethi %hi(CPCB), %l6 
5994 
5995 rdpr %pstate, %o1 ! oldpstate = %pstate; 
5996 LDPTR [%i0 + L_PCB], %l5 
5997 
5998 stx %i7, [%l5 + PCB_PC] 
5999 stx %i6, [%l5 + PCB_SP] 
6000 sth %o1, [%l5 + PCB_PSTATE] 
6001 
6002 rdpr %cwp, %o2 ! Useless 
6003 stb %o2, [%l5 + PCB_CWP] 
6004 
60051: 
6006 sethi %hi(CURLWP), %l7 
6007 
6008 LDPTR [%i1 + L_PCB], %l1 ! newpcb = l->l_pcb; 
6009 
6010 /* 
6011 * Load the new lwp. To load, we must change stacks and 
6012 * alter cpcb and the window control registers, hence we must 
6013 * keep interrupts disabled. 
6014 */ 5126 */
6015 5127
6016 STPTR %i1, [%l7 + %lo(CURLWP)] ! curlwp = l; 5128 STPTR %i1, [%l7 + %lo(CURLWP)] ! curlwp = l;
6017 STPTR %l1, [%l6 + %lo(CPCB)] ! cpcb = newpcb; 5129 STPTR %l1, [%l6 + %lo(CPCB)] ! cpcb = newpcb;
6018 5130
6019 ldx [%l1 + PCB_SP], %i6 5131 ldx [%l1 + PCB_SP], %i6
6020 ldx [%l1 + PCB_PC], %i7 5132 ldx [%l1 + PCB_PC], %i7
6021 5133
6022 wrpr %g0, 0, %otherwin ! These two insns should be redundant 5134 wrpr %g0, 0, %otherwin ! These two insns should be redundant
6023 wrpr %g0, 0, %canrestore 5135 wrpr %g0, 0, %canrestore
6024 rdpr %ver, %o3 5136 rdpr %ver, %o3
6025 and %o3, CWP, %o3 5137 and %o3, CWP, %o3
6026 wrpr %g0, %o3, %cleanwin 5138 wrpr %g0, %o3, %cleanwin
@@ -6128,313 +5240,26 @@ ENTRY(lwp_trampoline) @@ -6128,313 +5240,26 @@ ENTRY(lwp_trampoline)
6128 nop 5240 nop
6129 5241
6130 /* 5242 /*
6131 * Like lwp_trampoline, but for cpu_setfunc(), i.e. without newlwp 5243 * Like lwp_trampoline, but for cpu_setfunc(), i.e. without newlwp
6132 * arguement and will not call lwp_startup. 5244 * arguement and will not call lwp_startup.
6133 */ 5245 */
6134ENTRY(setfunc_trampoline) 5246ENTRY(setfunc_trampoline)
6135 call %l0 ! re-use current frame 5247 call %l0 ! re-use current frame
6136 mov %l1, %o0 5248 mov %l1, %o0
6137 ba,a,pt %icc, return_from_trap 5249 ba,a,pt %icc, return_from_trap
6138 nop 5250 nop
6139 5251
6140/* 5252/*
6141 * {fu,su}{,i}{byte,word} 
6142 */ 
6143ALTENTRY(fuiword) 
6144ENTRY(fuword) 
6145 btst 3, %o0 ! has low bits set... 
6146 bnz Lfsbadaddr ! go return -1 
6147 .empty 
6148 sethi %hi(CPCB), %o2 ! cpcb->pcb_onfault = Lfserr; 
6149 set Lfserr, %o3 
6150 LDPTR [%o2 + %lo(CPCB)], %o2 
6151 membar #LoadStore 
6152 STPTR %o3, [%o2 + PCB_ONFAULT] 
6153 membar #Sync 
6154 LDPTRA [%o0] ASI_AIUS, %o0 ! fetch the word 
6155 membar #Sync 
6156 STPTR %g0, [%o2 + PCB_ONFAULT]! but first clear onfault 
6157 retl ! phew, made it, return the word 
6158 membar #StoreStore|#StoreLoad 
6159 
6160Lfserr: 
6161 STPTR %g0, [%o2 + PCB_ONFAULT]! error in r/w, clear pcb_onfault 
6162 membar #StoreStore|#StoreLoad 
6163Lfsbadaddr: 
6164#ifndef _LP64 
6165 mov -1, %o1 
6166#endif 
6167 retl ! and return error indicator 
6168 mov -1, %o0 
6169 
6170 /* 
6171 * This is just like Lfserr, but it's a global label that allows 
6172 * mem_access_fault() to check to see that we don't want to try to 
6173 * page in the fault. It's used by fuswintr() etc. 
6174 */ 
6175 .globl _C_LABEL(Lfsbail) 
6176_C_LABEL(Lfsbail): 
6177 STPTR %g0, [%o2 + PCB_ONFAULT]! error in r/w, clear pcb_onfault 
6178 membar #StoreStore|#StoreLoad 
6179 retl ! and return error indicator 
6180 mov -1, %o0 
6181 
6182 /* 
6183 * Like fusword but callable from interrupt context. 
6184 * Fails if data isn't resident. 
6185 */ 
6186ENTRY(fuswintr) 
6187 sethi %hi(CPCB), %o2 ! cpcb->pcb_onfault = _Lfsbail; 
6188 LDPTR [%o2 + %lo(CPCB)], %o2 
6189 set _C_LABEL(Lfsbail), %o3 
6190 STPTR %o3, [%o2 + PCB_ONFAULT] 
6191 membar #Sync 
6192 lduha [%o0] ASI_AIUS, %o0 ! fetch the halfword 
6193 membar #Sync 
6194 STPTR %g0, [%o2 + PCB_ONFAULT]! but first clear onfault 
6195 retl ! made it 
6196 membar #StoreStore|#StoreLoad 
6197 
6198ENTRY(fusword) 
6199 sethi %hi(CPCB), %o2 ! cpcb->pcb_onfault = Lfserr; 
6200 LDPTR [%o2 + %lo(CPCB)], %o2 
6201 set Lfserr, %o3 
6202 STPTR %o3, [%o2 + PCB_ONFAULT] 
6203 membar #Sync 
6204 lduha [%o0] ASI_AIUS, %o0 ! fetch the halfword 
6205 membar #Sync 
6206 STPTR %g0, [%o2 + PCB_ONFAULT]! but first clear onfault 
6207 retl ! made it 
6208 membar #StoreStore|#StoreLoad 
6209 
6210ALTENTRY(fuibyte) 
6211ENTRY(fubyte) 
6212 sethi %hi(CPCB), %o2 ! cpcb->pcb_onfault = Lfserr; 
6213 LDPTR [%o2 + %lo(CPCB)], %o2 
6214 set Lfserr, %o3 
6215 STPTR %o3, [%o2 + PCB_ONFAULT] 
6216 membar #Sync 
6217 lduba [%o0] ASI_AIUS, %o0 ! fetch the byte 
6218 membar #Sync 
6219 STPTR %g0, [%o2 + PCB_ONFAULT]! but first clear onfault 
6220 retl ! made it 
6221 membar #StoreStore|#StoreLoad 
6222 
6223ALTENTRY(suiword) 
6224ENTRY(suword) 
6225 btst 3, %o0 ! or has low bits set ... 
6226 bnz Lfsbadaddr ! go return error 
6227 .empty 
6228 sethi %hi(CPCB), %o2 ! cpcb->pcb_onfault = Lfserr; 
6229 LDPTR [%o2 + %lo(CPCB)], %o2 
6230 set Lfserr, %o3 
6231 STPTR %o3, [%o2 + PCB_ONFAULT] 
6232 membar #Sync 
6233 STPTRA %o1, [%o0] ASI_AIUS ! store the word 
6234 membar #Sync 
6235 STPTR %g0, [%o2 + PCB_ONFAULT]! made it, clear onfault 
6236 membar #StoreStore|#StoreLoad 
6237 retl ! and return 0 
6238 clr %o0 
6239 
6240ENTRY(suswintr) 
6241 sethi %hi(CPCB), %o2 ! cpcb->pcb_onfault = _Lfsbail; 
6242 LDPTR [%o2 + %lo(CPCB)], %o2 
6243 set _C_LABEL(Lfsbail), %o3 
6244 STPTR %o3, [%o2 + PCB_ONFAULT] 
6245 membar #Sync 
6246 stha %o1, [%o0] ASI_AIUS ! store the halfword 
6247 membar #Sync 
6248 STPTR %g0, [%o2 + PCB_ONFAULT]! made it, clear onfault 
6249 membar #StoreStore|#StoreLoad 
6250 retl ! and return 0 
6251 clr %o0 
6252 
6253ENTRY(susword) 
6254 sethi %hi(CPCB), %o2 ! cpcb->pcb_onfault = Lfserr; 
6255 LDPTR [%o2 + %lo(CPCB)], %o2 
6256 set Lfserr, %o3 
6257 STPTR %o3, [%o2 + PCB_ONFAULT] 
6258 membar #Sync 
6259 stha %o1, [%o0] ASI_AIUS ! store the halfword 
6260 membar #Sync 
6261 STPTR %g0, [%o2 + PCB_ONFAULT]! made it, clear onfault 
6262 membar #StoreStore|#StoreLoad 
6263 retl ! and return 0 
6264 clr %o0 
6265 
6266ALTENTRY(suibyte) 
6267ENTRY(subyte) 
6268 sethi %hi(CPCB), %o2 ! cpcb->pcb_onfault = Lfserr; 
6269 LDPTR [%o2 + %lo(CPCB)], %o2 
6270 set Lfserr, %o3 
6271 STPTR %o3, [%o2 + PCB_ONFAULT] 
6272 membar #Sync 
6273 stba %o1, [%o0] ASI_AIUS ! store the byte 
6274 membar #Sync 
6275 STPTR %g0, [%o2 + PCB_ONFAULT]! made it, clear onfault 
6276 membar #StoreStore|#StoreLoad 
6277 retl ! and return 0 
6278 clr %o0 
6279 
6280/* probeget and probeset are meant to be used during autoconfiguration */ 
6281/* 
6282 * The following probably need to be changed, but to what I don't know. 
6283 */ 
6284 
6285/* 
6286 * uint64_t 
6287 * probeget(addr, asi, size) 
6288 * paddr_t addr; 
6289 * int asi; 
6290 * int size; 
6291 * 
6292 * Read or write a (byte,word,longword) from the given address. 
6293 * Like {fu,su}{byte,halfword,word} but our caller is supposed 
6294 * to know what he is doing... the address can be anywhere. 
6295 * 
6296 * We optimize for space, rather than time, here. 
6297 */ 
6298ENTRY(probeget) 
6299#ifndef _LP64 
6300 !! Shuffle the args around into LP64 format 
6301 COMBINE(%o0, %o1, %o0) 
6302 mov %o2, %o1 
6303 mov %o3, %o2 
6304#endif 
6305 mov %o2, %o4 
6306 ! %o0 = addr, %o1 = asi, %o4 = (1,2,4) 
6307 sethi %hi(CPCB), %o2 
6308 LDPTR [%o2 + %lo(CPCB)], %o2 ! cpcb->pcb_onfault = Lfserr; 
6309#ifdef _LP64 
6310 set _C_LABEL(Lfsbail), %o5 
6311#else 
6312 set _C_LABEL(Lfsprobe), %o5 
6313#endif 
6314 STPTR %o5, [%o2 + PCB_ONFAULT] 
6315 or %o0, 0x9, %o3 ! if (PHYS_ASI(asi)) { 
6316 sub %o3, 0x1d, %o3 
6317 brz,a %o3, 0f 
6318 mov %g0, %o5 
6319 DLFLUSH(%o0,%o5) ! flush cache line 
6320 ! } 
63210: 
6322#ifndef _LP64 
6323 rdpr %pstate, %g1 
6324 wrpr %g1, PSTATE_AM, %pstate 
6325#endif 
6326 btst 1, %o4 
6327 wr %o1, 0, %asi 
6328 membar #Sync 
6329 bz 0f ! if (len & 1) 
6330 btst 2, %o4 
6331 ba,pt %icc, 1f 
6332 lduba [%o0] %asi, %o0 ! value = *(char *)addr; 
63330: 
6334 bz 0f ! if (len & 2) 
6335 btst 4, %o4 
6336 ba,pt %icc, 1f 
6337 lduha [%o0] %asi, %o0 ! value = *(short *)addr; 
63380: 
6339 bz 0f ! if (len & 4) 
6340 btst 8, %o4 
6341 ba,pt %icc, 1f 
6342 lda [%o0] %asi, %o0 ! value = *(int *)addr; 
63430: 
6344 ldxa [%o0] %asi, %o0 ! value = *(long *)addr; 
63451:  
6346#ifndef _LP64 
6347 SPLIT(%o0, %o1) 
6348#endif 
6349 membar #Sync 
6350#ifndef _LP64 
6351 wrpr %g1, 0, %pstate 
6352#endif 
6353 brz %o5, 1f ! if (cache flush addr != 0) 
6354 nop 
6355 DLFLUSH2(%o5) ! flush cache line again 
63561: 
6357 wr %g0, ASI_PRIMARY_NOFAULT, %asi ! Restore default ASI  
6358 STPTR %g0, [%o2 + PCB_ONFAULT] 
6359 retl ! made it, clear onfault and return 
6360 membar #StoreStore|#StoreLoad 
6361 
6362 /* 
6363 * Fault handler for probeget 
6364 */ 
6365_C_LABEL(Lfsprobe): 
6366#ifndef _LP64 
6367 wrpr %g1, 0, %pstate 
6368#endif 
6369 STPTR %g0, [%o2 + PCB_ONFAULT]! error in r/w, clear pcb_onfault 
6370 mov -1, %o1 
6371 wr %g0, ASI_PRIMARY_NOFAULT, %asi ! Restore default ASI  
6372 membar #StoreStore|#StoreLoad 
6373 retl ! and return error indicator 
6374 mov -1, %o0 
6375 
6376/* 
6377 * probeset(addr, asi, size, val) 
6378 * paddr_t addr; 
6379 * int asi; 
6380 * int size; 
6381 * long val; 
6382 * 
6383 * As above, but we return 0 on success. 
6384 */ 
6385ENTRY(probeset) 
6386#ifndef _LP64 
6387 !! Shuffle the args around into LP64 format 
6388 COMBINE(%o0, %o1, %o0) 
6389 mov %o2, %o1 
6390 mov %o3, %o2 
6391 COMBINE(%o4, %o5, %o3) 
6392#endif 
6393 mov %o2, %o4 
6394 ! %o0 = addr, %o1 = asi, %o4 = (1,2,4), %o3 = val 
6395 sethi %hi(CPCB), %o2 ! Lfserr requires CPCB in %o2 
6396 LDPTR [%o2 + %lo(CPCB)], %o2 ! cpcb->pcb_onfault = Lfserr; 
6397 set _C_LABEL(Lfsbail), %o5 
6398 STPTR %o5, [%o2 + PCB_ONFAULT] 
6399 btst 1, %o4 
6400 wr %o1, 0, %asi 
6401 membar #Sync 
6402 bz 0f ! if (len & 1) 
6403 btst 2, %o4 
6404 ba,pt %icc, 1f 
6405 stba %o3, [%o0] %asi ! *(char *)addr = value; 
64060: 
6407 bz 0f ! if (len & 2) 
6408 btst 4, %o4 
6409 ba,pt %icc, 1f 
6410 stha %o3, [%o0] %asi ! *(short *)addr = value; 
64110: 
6412 bz 0f ! if (len & 4) 
6413 btst 8, %o4 
6414 ba,pt %icc, 1f 
6415 sta %o3, [%o0] %asi ! *(int *)addr = value; 
64160: 
6417 bz Lfserr ! if (len & 8) 
6418 ba,pt %icc, 1f 
6419 sta %o3, [%o0] %asi ! *(int *)addr = value; 
64201: membar #Sync 
6421 clr %o0 ! made it, clear onfault and return 0 
6422 wr %g0, ASI_PRIMARY_NOFAULT, %asi ! Restore default ASI  
6423 STPTR %g0, [%o2 + PCB_ONFAULT] 
6424 retl 
6425 membar #StoreStore|#StoreLoad 
6426 
6427/* 
6428 * pmap_zero_page_phys(pa) 5253 * pmap_zero_page_phys(pa)
6429 * 5254 *
6430 * Zero one page physically addressed 5255 * Zero one page physically addressed
6431 * 5256 *
6432 * Block load/store ASIs do not exist for physical addresses, 5257 * Block load/store ASIs do not exist for physical addresses,
6433 * so we won't use them. 5258 * so we won't use them.
6434 * 5259 *
6435 * We will execute a flush at the end to sync the I$. 5260 * We will execute a flush at the end to sync the I$.
6436 * 5261 *
6437 * This version expects to have the dcache_flush_page_all(pa) 5262 * This version expects to have the dcache_flush_page_all(pa)
6438 * to have been called before calling into here. 5263 * to have been called before calling into here.
6439 */ 5264 */
6440ENTRY(pmap_zero_page_phys) 5265ENTRY(pmap_zero_page_phys)
@@ -6761,429 +5586,26 @@ ENTRY(pseg_set_real) @@ -6761,429 +5586,26 @@ ENTRY(pseg_set_real)
6761 btst %g5, %o2 ! don't waste delay slot, check if new one is wired 5586 btst %g5, %o2 ! don't waste delay slot, check if new one is wired
6762 LDPTR [%o0 + PM_WIRED], %o1 ! gonna update wired count 5587 LDPTR [%o0 + PM_WIRED], %o1 ! gonna update wired count
6763 bnz,pt %xcc, 0f ! if wired changes, we predict it increments 5588 bnz,pt %xcc, 0f ! if wired changes, we predict it increments
6764 mov 1, %o4 5589 mov 1, %o4
6765 neg %o4 ! new is not wired -> decrement 5590 neg %o4 ! new is not wired -> decrement
67660: add %o1, %o4, %o1 55910: add %o1, %o4, %o1
6767 STPTR %o1, [%o0 + PM_WIRED] 5592 STPTR %o1, [%o0 + PM_WIRED]
67688: retl 55938: retl
6769 mov %g1, %o0 ! return %g1 5594 mov %g1, %o0 ! return %g1
6770 5595
67719: retl 55969: retl
6772 or %g1, 1, %o0 ! spare needed, return flags + 1 5597 or %g1, 1, %o0 ! spare needed, return flags + 1
6773 5598
6774/* 
6775 * kcopy() is exactly like bcopy except that it set pcb_onfault such that 
6776 * when a fault occurs, it is able to return -1 to indicate this to the 
6777 * caller. 
6778 */ 
6779ENTRY(kcopy) 
6780#ifdef DEBUG 
6781 set pmapdebug, %o4 
6782 ld [%o4], %o4 
6783 btst 0x80, %o4 ! PDB_COPY 
6784 bz,pt %icc, 3f 
6785 nop 
6786 save %sp, -CC64FSZ, %sp 
6787 mov %i0, %o1 
6788 set 2f, %o0 
6789 mov %i1, %o2 
6790 call printf 
6791 mov %i2, %o3 
6792! ta 1; nop 
6793 restore 
6794 .data 
67952: .asciz "kcopy(%p->%p,%x)\n" 
6796 _ALIGN 
6797 .text 
67983: 
6799#endif 
6800 sethi %hi(CPCB), %o5 ! cpcb->pcb_onfault = Lkcerr; 
6801 LDPTR [%o5 + %lo(CPCB)], %o5 
6802 set Lkcerr, %o3 
6803 LDPTR [%o5 + PCB_ONFAULT], %g1! save current onfault handler 
6804 membar #LoadStore 
6805 STPTR %o3, [%o5 + PCB_ONFAULT] 
6806 membar #StoreStore|#StoreLoad 
6807 
6808 cmp %o2, BCOPY_SMALL 
6809Lkcopy_start: 
6810 bge,a Lkcopy_fancy ! if >= this many, go be fancy. 
6811 btst 7, %o0 ! (part of being fancy) 
6812 
6813 /* 
6814 * Not much to copy, just do it a byte at a time. 
6815 */ 
6816 deccc %o2 ! while (--len >= 0) 
6817 bl 1f 
6818 .empty 
68190: 
6820 ldsb [%o0], %o4 ! *dst++ = *src++; 
6821 inc %o0 
6822 stb %o4, [%o1] 
6823 deccc %o2 
6824 bge 0b 
6825 inc %o1 
68261: 
6827 membar #Sync ! Make sure all fauls are processed 
6828 STPTR %g1, [%o5 + PCB_ONFAULT]! restore fault handler 
6829 membar #StoreStore|#StoreLoad 
6830 retl 
6831 clr %o0 
6832 NOTREACHED 
6833 
6834 /* 
6835 * Plenty of data to copy, so try to do it optimally. 
6836 */ 
6837Lkcopy_fancy: 
6838 ! check for common case first: everything lines up. 
6839! btst 7, %o0 ! done already 
6840 bne 1f 
6841 .empty 
6842 btst 7, %o1 
6843 be,a Lkcopy_doubles 
6844 dec 8, %o2 ! if all lined up, len -= 8, goto kcopy_doubes 
6845 
6846 ! If the low bits match, we can make these line up. 
68471: 
6848 xor %o0, %o1, %o3 ! t = src ^ dst; 
6849 btst 1, %o3 ! if (t & 1) { 
6850 be,a 1f 
6851 btst 1, %o0 ! [delay slot: if (src & 1)] 
6852 
6853 ! low bits do not match, must copy by bytes. 
68540: 
6855 ldsb [%o0], %o4 ! do { 
6856 inc %o0 ! *dst++ = *src++; 
6857 stb %o4, [%o1] 
6858 deccc %o2 
6859 bnz 0b ! } while (--len != 0); 
6860 inc %o1 
6861 membar #Sync ! Make sure all traps are taken 
6862 STPTR %g1, [%o5 + PCB_ONFAULT]! restore fault handler 
6863 membar #StoreStore|#StoreLoad 
6864 retl 
6865 clr %o0 
6866 NOTREACHED 
6867 
6868 ! lowest bit matches, so we can copy by words, if nothing else 
68691: 
6870 be,a 1f ! if (src & 1) { 
6871 btst 2, %o3 ! [delay slot: if (t & 2)] 
6872 
6873 ! although low bits match, both are 1: must copy 1 byte to align 
6874 ldsb [%o0], %o4 ! *dst++ = *src++; 
6875 inc %o0 
6876 stb %o4, [%o1] 
6877 dec %o2 ! len--; 
6878 inc %o1 
6879 btst 2, %o3 ! } [if (t & 2)] 
68801: 
6881 be,a 1f ! if (t & 2) { 
6882 btst 2, %o0 ! [delay slot: if (src & 2)] 
6883 dec 2, %o2 ! len -= 2; 
68840: 
6885 ldsh [%o0], %o4 ! do { 
6886 inc 2, %o0 ! dst += 2, src += 2; 
6887 sth %o4, [%o1] ! *(short *)dst = *(short *)src; 
6888 deccc 2, %o2 ! } while ((len -= 2) >= 0); 
6889 bge 0b 
6890 inc 2, %o1 
6891 b Lkcopy_mopb ! goto mop_up_byte; 
6892 btst 1, %o2 ! } [delay slot: if (len & 1)] 
6893 NOTREACHED 
6894 
6895 ! low two bits match, so we can copy by longwords 
68961: 
6897 be,a 1f ! if (src & 2) { 
6898 btst 4, %o3 ! [delay slot: if (t & 4)] 
6899 
6900 ! although low 2 bits match, they are 10: must copy one short to align 
6901 ldsh [%o0], %o4 ! (*short *)dst = *(short *)src; 
6902 inc 2, %o0 ! dst += 2; 
6903 sth %o4, [%o1] 
6904 dec 2, %o2 ! len -= 2; 
6905 inc 2, %o1 ! src += 2; 
6906 btst 4, %o3 ! } [if (t & 4)] 
69071: 
6908 be,a 1f ! if (t & 4) { 
6909 btst 4, %o0 ! [delay slot: if (src & 4)] 
6910 dec 4, %o2 ! len -= 4; 
69110: 
6912 ld [%o0], %o4 ! do { 
6913 inc 4, %o0 ! dst += 4, src += 4; 
6914 st %o4, [%o1] ! *(int *)dst = *(int *)src; 
6915 deccc 4, %o2 ! } while ((len -= 4) >= 0); 
6916 bge 0b 
6917 inc 4, %o1 
6918 b Lkcopy_mopw ! goto mop_up_word_and_byte; 
6919 btst 2, %o2 ! } [delay slot: if (len & 2)] 
6920 NOTREACHED 
6921 
6922 ! low three bits match, so we can copy by doublewords 
69231: 
6924 be 1f ! if (src & 4) { 
6925 dec 8, %o2 ! [delay slot: len -= 8] 
6926 ld [%o0], %o4 ! *(int *)dst = *(int *)src; 
6927 inc 4, %o0 ! dst += 4, src += 4, len -= 4; 
6928 st %o4, [%o1] 
6929 dec 4, %o2 ! } 
6930 inc 4, %o1 
69311: 
6932Lkcopy_doubles: 
6933 ldx [%o0], %g5 ! do { 
6934 inc 8, %o0 ! dst += 8, src += 8; 
6935 stx %g5, [%o1] ! *(double *)dst = *(double *)src; 
6936 deccc 8, %o2 ! } while ((len -= 8) >= 0); 
6937 bge Lkcopy_doubles 
6938 inc 8, %o1 
6939 
6940 ! check for a usual case again (save work) 
6941 btst 7, %o2 ! if ((len & 7) == 0) 
6942 be Lkcopy_done ! goto kcopy_done; 
6943 
6944 btst 4, %o2 ! if ((len & 4)) == 0) 
6945 be,a Lkcopy_mopw ! goto mop_up_word_and_byte; 
6946 btst 2, %o2 ! [delay slot: if (len & 2)] 
6947 ld [%o0], %o4 ! *(int *)dst = *(int *)src; 
6948 inc 4, %o0 ! dst += 4; 
6949 st %o4, [%o1] 
6950 inc 4, %o1 ! src += 4; 
6951 btst 2, %o2 ! } [if (len & 2)] 
6952 
69531: 
6954 ! mop up trailing word (if present) and byte (if present). 
6955Lkcopy_mopw: 
6956 be Lkcopy_mopb ! no word, go mop up byte 
6957 btst 1, %o2 ! [delay slot: if (len & 1)] 
6958 ldsh [%o0], %o4 ! *(short *)dst = *(short *)src; 
6959 be Lkcopy_done ! if ((len & 1) == 0) goto done; 
6960 sth %o4, [%o1] 
6961 ldsb [%o0 + 2], %o4 ! dst[2] = src[2]; 
6962 stb %o4, [%o1 + 2] 
6963 membar #Sync ! Make sure all traps are taken 
6964 STPTR %g1, [%o5 + PCB_ONFAULT]! restore fault handler 
6965 membar #StoreStore|#StoreLoad 
6966 retl 
6967 clr %o0 
6968 NOTREACHED 
6969 
6970 ! mop up trailing byte (if present). 
6971Lkcopy_mopb: 
6972 bne,a 1f 
6973 ldsb [%o0], %o4 
6974 
6975Lkcopy_done: 
6976 membar #Sync ! Make sure all traps are taken 
6977 STPTR %g1, [%o5 + PCB_ONFAULT]! restore fault handler 
6978 membar #StoreStore|#StoreLoad 
6979 retl 
6980 clr %o0 
6981 NOTREACHED 
6982 
69831: 
6984 stb %o4, [%o1] 
6985 membar #Sync ! Make sure all traps are taken 
6986 STPTR %g1, [%o5 + PCB_ONFAULT]! restore fault handler 
6987 membar #StoreStore|#StoreLoad 
6988 retl 
6989 clr %o0 
6990 NOTREACHED 
6991 
6992Lkcerr: 
6993#ifdef DEBUG 
6994 set pmapdebug, %o4 
6995 ld [%o4], %o4 
6996 btst 0x80, %o4 ! PDB_COPY 
6997 bz,pt %icc, 3f 
6998 nop 
6999 save %sp, -CC64FSZ, %sp 
7000 set 2f, %o0 
7001 call printf 
7002 nop 
7003! ta 1; nop 
7004 restore 
7005 .data 
70062: .asciz "kcopy error\n" 
7007 _ALIGN 
7008 .text 
70093: 
7010#endif 
7011 STPTR %g1, [%o5 + PCB_ONFAULT]! restore fault handler 
7012 retl ! and return error indicator 
7013 membar #StoreStore|#StoreLoad 
7014 NOTREACHED 
7015 
7016#ifdef MULTIPROCESSOR 
7017/* 
7018 * IPI handler to store the current FPU state. 
7019 * void sparc64_ipi_save_fpstate(void *); 
7020 * 
7021 * On entry: 
7022 * %g2 = lwp 
7023 */ 
7024ENTRY(sparc64_ipi_save_fpstate) 
7025 sethi %hi(FPLWP), %g1 
7026 LDPTR [%g1 + %lo(FPLWP)], %g3 
7027 cmp %g3, %g2 
7028 bne,pn CCCR, 7f ! skip if fplwp has changed 
7029 
7030 rdpr %pstate, %g2 ! enable FP before we begin 
7031 rd %fprs, %g5 
7032 wr %g0, FPRS_FEF, %fprs 
7033 or %g2, PSTATE_PEF, %g2 
7034 wrpr %g2, 0, %pstate 
7035 
7036 LDPTR [%g3 + L_FPSTATE], %g3 
7037 stx %fsr, [%g3 + FS_FSR] ! f->fs_fsr = getfsr(); 
7038 rd %gsr, %g2 ! Save %gsr 
7039 st %g2, [%g3 + FS_GSR] 
7040#if FS_REGS > 0 
7041 add %g3, FS_REGS, %g3 
7042#endif 
7043#ifdef DIAGNOSTIC 
7044 btst BLOCK_ALIGN, %g3 ! Needs to be re-executed 
7045 bnz,pn %icc, 6f ! Check alignment 
7046#endif 
7047 st %g0, [%g3 + FS_QSIZE - FS_REGS] ! f->fs_qsize = 0; 
7048 btst FPRS_DL|FPRS_DU, %g5 ! Both FPU halves clean? 
7049 bz,pt %icc, 5f ! Then skip it 
7050 
7051 mov CTX_PRIMARY, %g2 
7052 ldxa [%g2] ASI_DMMU, %g6 
7053 membar #LoadStore 
7054 stxa %g0, [%g2] ASI_DMMU ! Switch MMU to kernel primary context 
7055 membar #Sync 
7056 
7057 btst FPRS_DL, %g5 ! Lower FPU clean? 
7058 bz,a,pt %icc, 1f ! Then skip it, but upper FPU not clean 
7059 add %g3, 2*BLOCK_SIZE, %g3 ! Skip a block 
7060 
7061 stda %f0, [%g3] ASI_BLK_P ! f->fs_f0 = etc; 
7062 inc BLOCK_SIZE, %g3 
7063 stda %f16, [%g3] ASI_BLK_P 
7064 
7065 btst FPRS_DU, %g5 ! Upper FPU clean? 
7066 bz,pt %icc, 2f ! Then skip it 
7067 inc BLOCK_SIZE, %g3 
70681: 
7069 stda %f32, [%g3] ASI_BLK_P 
7070 inc BLOCK_SIZE, %g3 
7071 stda %f48, [%g3] ASI_BLK_P 
70722: 
7073 membar #Sync ! Finish operation so we can 
7074 brz,pn %g6, 5f ! Skip if context 0 
7075 nop 
7076 stxa %g6, [%g2] ASI_DMMU ! Restore primary context 
7077 membar #Sync 
70785: 
7079 wr %g0, FPRS_FEF, %fprs ! Mark FPU clean 
7080 STPTR %g0, [%g1 + %lo(FPLWP)] ! fplwp = NULL 
70817: 
7082 IPIEVC_INC(IPI_EVCNT_FPU_SYNCH,%g2,%g3) 
7083 ba,a ret_from_intr_vector 
7084 nop 
7085 
7086#ifdef DIAGNOSTIC 
7087 !! 
7088 !! Damn thing is *NOT* aligned on a 64-byte boundary 
7089 !!  
70906: 
7091 wr %g0, FPRS_FEF, %fprs 
7092 ! XXX -- we should panic instead of silently entering debugger 
7093 ta 1 
7094 nop 
7095 ba,a ret_from_intr_vector 
7096 nop 
7097#endif 
7098 
7099/* 
7100 * IPI handler to drop the current FPU state. 
7101 * void sparc64_ipi_drop_fpstate(void *); 
7102 * 
7103 * On entry: 
7104 * %g2 = lwp 
7105 */ 
7106ENTRY(sparc64_ipi_drop_fpstate) 
7107 rdpr %pstate, %g1 
7108 wr %g0, FPRS_FEF, %fprs 
7109 or %g1, PSTATE_PEF, %g1 
7110 wrpr %g1, 0, %pstate 
7111 set FPLWP, %g1 
7112 CASPTR [%g1] ASI_N, %g2, %g0 ! fplwp = NULL if fplwp == %g2 
7113 membar #Sync ! Should not be needed due to retry 
7114 IPIEVC_INC(IPI_EVCNT_FPU_FLUSH,%g2,%g3) 
7115 ba,a ret_from_intr_vector 
7116 nop 
7117 
7118/* 
7119 * IPI handler to drop the current FPU state. 
7120 * void sparc64_ipi_dcache_flush_page_usiii(paddr_t pa, int line_size) 
7121 * void sparc64_ipi_dcache_flush_page_us(paddr_t pa, int line_size) 
7122 * 
7123 * On entry: 
7124 * %g2 = pa 
7125 * %g3 = line_size 
7126 */ 
7127ENTRY(sparc64_ipi_dcache_flush_page_usiii) 
7128 set NBPG, %g1 
7129 add %g2, %g1, %g1 ! end address 
7130 
71311: 
7132 stxa %g0, [%g2] ASI_DCACHE_INVALIDATE 
7133 add %g2, %g3, %g2 
7134 cmp %g2, %g1 
7135 bl,pt %xcc, 1b 
7136 nop 
7137 
7138 sethi %hi(KERNBASE), %g5 
7139 flush %g5 
7140 membar #Sync 
7141 ba,a ret_from_intr_vector 
7142 nop 
7143 
7144ENTRY(sparc64_ipi_dcache_flush_page_us) 
7145 mov -1, %g1 ! Generate mask for tag: bits [29..2] 
7146 srlx %g2, 13-2, %g5 ! Tag is PA bits <40:13> in bits <29:2> 
7147 clr %g4 
7148 srl %g1, 2, %g1 ! Now we have bits <29:0> set 
7149 set (2*NBPG), %g7 
7150 ba,pt %icc, 1f 
7151 andn %g1, 3, %g1 ! Now we have bits <29:2> set 
7152 
7153 .align 8 
71541: 
7155 ldxa [%g4] ASI_DCACHE_TAG, %g6 
7156 mov %g4, %g2 
7157 deccc 32, %g7 
7158 bl,pn %icc, 2f 
7159 inc 32, %g4 
7160 
7161 xor %g6, %g5, %g6 
7162 andcc %g6, %g1, %g0 
7163 bne,pt %xcc, 1b 
7164 membar #LoadStore 
7165 
7166 stxa %g0, [%g2] ASI_DCACHE_TAG 
7167 ba,pt %icc, 1b 
7168 membar #StoreLoad 
71692: 
7170 
7171 sethi %hi(KERNBASE), %g5 
7172 flush %g5 
7173 membar #Sync 
7174 ba,a ret_from_intr_vector 
7175 nop 
7176#endif 
7177 5599
7178/* 5600/*
7179 * clearfpstate() 5601 * clearfpstate()
7180 * 5602 *
7181 * Drops the current fpu state, without saving it. 5603 * Drops the current fpu state, without saving it.
7182 */ 5604 */
7183ENTRY(clearfpstate) 5605ENTRY(clearfpstate)
7184 rdpr %pstate, %o1 ! enable FPU 5606 rdpr %pstate, %o1 ! enable FPU
7185 wr %g0, FPRS_FEF, %fprs 5607 wr %g0, FPRS_FEF, %fprs
7186 or %o1, PSTATE_PEF, %o1 5608 or %o1, PSTATE_PEF, %o1
7187 retl 5609 retl
7188 wrpr %o1, 0, %pstate 5610 wrpr %o1, 0, %pstate
7189 5611