Sat Jan 22 10:58:44 2011 UTC ()
Fix MULTIPROCESSOR curlwp/curcpu handling... missed in previous commit.


(skrll)
diff -r1.11 -r1.12 src/sys/lib/libkern/arch/hppa/bcopy.S

cvs diff -r1.11 -r1.12 src/sys/lib/libkern/arch/hppa/bcopy.S (switch to unified diff)

--- src/sys/lib/libkern/arch/hppa/bcopy.S 2010/03/20 23:31:30 1.11
+++ src/sys/lib/libkern/arch/hppa/bcopy.S 2011/01/22 10:58:44 1.12
@@ -1,609 +1,612 @@ @@ -1,609 +1,612 @@
1/* $NetBSD: bcopy.S,v 1.11 2010/03/20 23:31:30 chs Exp $ */ 1/* $NetBSD: bcopy.S,v 1.12 2011/01/22 10:58:44 skrll Exp $ */
2 2
3/* 3/*
4 * Copyright (c) 2002 The NetBSD Foundation, Inc. 4 * Copyright (c) 2002 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to The NetBSD Foundation 7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Matthew Fredette. 8 * by Matthew Fredette.
9 * 9 *
10 * Redistribution and use in source and binary forms, with or without 10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions 11 * modification, are permitted provided that the following conditions
12 * are met: 12 * are met:
13 * 1. Redistributions of source code must retain the above copyright 13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer. 14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright 15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the 16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution. 17 * documentation and/or other materials provided with the distribution.
18 * 18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE. 29 * POSSIBILITY OF SUCH DAMAGE.
30 */ 30 */
31 31
32/* 32/*
33 * Copy routines for NetBSD/hppa. 33 * Copy routines for NetBSD/hppa.
34 */ 34 */
35 35
 36#include "opt_multiprocessor.h"
 37
36#undef _LOCORE 38#undef _LOCORE
37#define _LOCORE /* XXX fredette - unfortunate */ 39#define _LOCORE /* XXX fredette - unfortunate */
38 40
 41#include <machine/cpu.h>
39#include <machine/asm.h> 42#include <machine/asm.h>
40#include <machine/frame.h> 43#include <machine/frame.h>
41#include <machine/reg.h> 44#include <machine/reg.h>
42 45
43#if defined(LIBC_SCCS) && !defined(lint) 46#if defined(LIBC_SCCS) && !defined(lint)
44RCSID("$NetBSD: bcopy.S,v 1.11 2010/03/20 23:31:30 chs Exp $") 47RCSID("$NetBSD: bcopy.S,v 1.12 2011/01/22 10:58:44 skrll Exp $")
45#endif /* LIBC_SCCS and not lint */ 48#endif /* LIBC_SCCS and not lint */
46 49
47/* 50/*
48 * The stbys instruction is a little asymmetric. When (%r2 & 3) 51 * The stbys instruction is a little asymmetric. When (%r2 & 3)
49 * is zero, stbys,b,m %r1, 4(%r2) works like stws,ma. You 52 * is zero, stbys,b,m %r1, 4(%r2) works like stws,ma. You
50 * might then wish that when (%r2 & 3) == 0, stbys,e,m %r1, -4(%r2) 53 * might then wish that when (%r2 & 3) == 0, stbys,e,m %r1, -4(%r2)
51 * worked like stws,mb. But it doesn't. 54 * worked like stws,mb. But it doesn't.
52 * 55 *
53 * This macro works around this problem. It requires that %t2 56 * This macro works around this problem. It requires that %t2
54 * hold the number of bytes that will be written by this store 57 * hold the number of bytes that will be written by this store
55 * (meaning that it ranges from one to four). 58 * (meaning that it ranges from one to four).
56 * 59 *
57 * Watch the delay-slot trickery here. The comib is used to set 60 * Watch the delay-slot trickery here. The comib is used to set
58 * up which instruction, either the stws or the stbys, is run 61 * up which instruction, either the stws or the stbys, is run
59 * in the delay slot of the b instruction. 62 * in the delay slot of the b instruction.
60 */ 63 */
61#define _STBYS_E_M(r, dst_spc, dst_off) \ 64#define _STBYS_E_M(r, dst_spc, dst_off) \
62 comib,<> 4, %t2, 4 ! \ 65 comib,<> 4, %t2, 4 ! \
63 b 4 ! \ 66 b 4 ! \
64 stws,mb r, -4(dst_spc, dst_off) ! \ 67 stws,mb r, -4(dst_spc, dst_off) ! \
65 stbys,e,m r, 0(dst_spc, dst_off) 68 stbys,e,m r, 0(dst_spc, dst_off)
66 69
67/* 70/*
68 * This macro does a bulk copy with no shifting. cmplt and m are 71 * This macro does a bulk copy with no shifting. cmplt and m are
69 * the completer and displacement multiplier, respectively, for 72 * the completer and displacement multiplier, respectively, for
70 * the load and store instructions. 73 * the load and store instructions.
71 */ 74 */
72#define _COPY(src_spc, src_off, dst_spc, dst_off, count, cmplt, m) \ 75#define _COPY(src_spc, src_off, dst_spc, dst_off, count, cmplt, m) \
73 ! \ 76 ! \
74 /* ! \ 77 /* ! \
75 * Loop storing 16 bytes at a time. Since count ! \ 78 * Loop storing 16 bytes at a time. Since count ! \
76 * may be > INT_MAX, we have to be careful and ! \ 79 * may be > INT_MAX, we have to be careful and ! \
77 * avoid comparisons that treat it as a signed ! \ 80 * avoid comparisons that treat it as a signed ! \
78 * quantity, until after this loop, when count ! \ 81 * quantity, until after this loop, when count ! \
79 * is guaranteed to be less than 16. ! \ 82 * is guaranteed to be less than 16. ! \
80 */ ! \ 83 */ ! \
81 comib,>>=,n 15, count, _LABEL(_skip16) ! \ 84 comib,>>=,n 15, count, _LABEL(_skip16) ! \
82.label _LABEL(_loop16) ! \ 85.label _LABEL(_loop16) ! \
83 addi -16, count, count ! \ 86 addi -16, count, count ! \
84 ldws,cmplt m*4(src_spc, src_off), %t1 ! \ 87 ldws,cmplt m*4(src_spc, src_off), %t1 ! \
85 ldws,cmplt m*4(src_spc, src_off), %t2 ! \ 88 ldws,cmplt m*4(src_spc, src_off), %t2 ! \
86 ldws,cmplt m*4(src_spc, src_off), %t3 ! \ 89 ldws,cmplt m*4(src_spc, src_off), %t3 ! \
87 ldws,cmplt m*4(src_spc, src_off), %t4 ! \ 90 ldws,cmplt m*4(src_spc, src_off), %t4 ! \
88 stws,cmplt %t1, m*4(dst_spc, dst_off) ! \ 91 stws,cmplt %t1, m*4(dst_spc, dst_off) ! \
89 stws,cmplt %t2, m*4(dst_spc, dst_off) ! \ 92 stws,cmplt %t2, m*4(dst_spc, dst_off) ! \
90 stws,cmplt %t3, m*4(dst_spc, dst_off) ! \ 93 stws,cmplt %t3, m*4(dst_spc, dst_off) ! \
91 comib,<< 15, count, _LABEL(_loop16) ! \ 94 comib,<< 15, count, _LABEL(_loop16) ! \
92 stws,cmplt %t4, m*4(dst_spc, dst_off) ! \ 95 stws,cmplt %t4, m*4(dst_spc, dst_off) ! \
93.label _LABEL(_skip16) ! \ 96.label _LABEL(_skip16) ! \
94 ! \ 97 ! \
95 /* Loop storing 4 bytes at a time. */ ! \ 98 /* Loop storing 4 bytes at a time. */ ! \
96 addib,<,n -4, count, _LABEL(_skip4) ! \ 99 addib,<,n -4, count, _LABEL(_skip4) ! \
97.label _LABEL(_loop4) ! \ 100.label _LABEL(_loop4) ! \
98 ldws,cmplt m*4(src_spc, src_off), %t1 ! \ 101 ldws,cmplt m*4(src_spc, src_off), %t1 ! \
99 addib,>= -4, count, _LABEL(_loop4) ! \ 102 addib,>= -4, count, _LABEL(_loop4) ! \
100 stws,cmplt %t1, m*4(dst_spc, dst_off) ! \ 103 stws,cmplt %t1, m*4(dst_spc, dst_off) ! \
101.label _LABEL(_skip4) ! \ 104.label _LABEL(_skip4) ! \
102 /* Restore the correct count. */ ! \ 105 /* Restore the correct count. */ ! \
103 addi 4, count, count ! \ 106 addi 4, count, count ! \
104 ! \ 107 ! \
105.label _LABEL(_do1) ! \ 108.label _LABEL(_do1) ! \
106 ! \ 109 ! \
107 /* Loop storing 1 byte at a time. */ ! \ 110 /* Loop storing 1 byte at a time. */ ! \
108 addib,<,n -1, count, _LABEL(_skip1) ! \ 111 addib,<,n -1, count, _LABEL(_skip1) ! \
109.label _LABEL(_loop1) ! \ 112.label _LABEL(_loop1) ! \
110 ldbs,cmplt m*1(src_spc, src_off), %t1 ! \ 113 ldbs,cmplt m*1(src_spc, src_off), %t1 ! \
111 addib,>= -1, count, _LABEL(_loop1) ! \ 114 addib,>= -1, count, _LABEL(_loop1) ! \
112 stbs,cmplt %t1, m*1(dst_spc, dst_off) ! \ 115 stbs,cmplt %t1, m*1(dst_spc, dst_off) ! \
113.label _LABEL(_skip1) ! \ 116.label _LABEL(_skip1) ! \
114 /* Restore the correct count. */ ! \ 117 /* Restore the correct count. */ ! \
115 b _LABEL(_done) ! \ 118 b _LABEL(_done) ! \
116 addi 1, count, count 119 addi 1, count, count
117 120
118/* 121/*
119 * This macro is definitely strange. It exists purely to 122 * This macro is definitely strange. It exists purely to
120 * allow the _COPYS macro to be reused, but because it 123 * allow the _COPYS macro to be reused, but because it
121 * requires this long attempt to explain it, I'm starting 124 * requires this long attempt to explain it, I'm starting
122 * to doubt the value of that. 125 * to doubt the value of that.
123 * 126 *
124 * Part of the expansion of the _COPYS macro below are loops 127 * Part of the expansion of the _COPYS macro below are loops
125 * that copy four words or one word at a time, performing shifts 128 * that copy four words or one word at a time, performing shifts
126 * to get data to line up correctly in the destination buffer. 129 * to get data to line up correctly in the destination buffer.
127 * 130 *
128 * The _COPYS macro is used when copying backwards, as well 131 * The _COPYS macro is used when copying backwards, as well
129 * as forwards. The 4-word loop always loads into %t1, %t2, %t3, 132 * as forwards. The 4-word loop always loads into %t1, %t2, %t3,
130 * and %t4 in that order. This means that when copying forward, 133 * and %t4 in that order. This means that when copying forward,
131 * %t1 will have the word from the lowest address, and %t4 will 134 * %t1 will have the word from the lowest address, and %t4 will
132 * have the word from the highest address. When copying 135 * have the word from the highest address. When copying
133 * backwards, the opposite is true. 136 * backwards, the opposite is true.
134 * 137 *
135 * The shift instructions need pairs of registers with adjacent 138 * The shift instructions need pairs of registers with adjacent
136 * words, with the register containing the word from the lowest 139 * words, with the register containing the word from the lowest
137 * address *always* coming first. It is this assymetry that 140 * address *always* coming first. It is this assymetry that
138 * gives rise to this macro - depending on which direction 141 * gives rise to this macro - depending on which direction
139 * we're copying in, these ordered pairs are different. 142 * we're copying in, these ordered pairs are different.
140 * 143 *
141 * Fortunately, we can compute those register numbers at compile 144 * Fortunately, we can compute those register numbers at compile
142 * time, and assemble them manually into a shift instruction. 145 * time, and assemble them manually into a shift instruction.
143 * That's what this macro does. 146 * That's what this macro does.
144 * 147 *
145 * This macro takes two arguments. n ranges from 0 to 3 and 148 * This macro takes two arguments. n ranges from 0 to 3 and
146 * is the "shift number", i.e., n = 0 means we're doing the 149 * is the "shift number", i.e., n = 0 means we're doing the
147 * shift for what will be the first store. 150 * shift for what will be the first store.
148 * 151 *
149 * m is the displacement multiplier from the _COPYS macro call. 152 * m is the displacement multiplier from the _COPYS macro call.
150 * This is 1 for a forward copy and -1 for a backwards copy. 153 * This is 1 for a forward copy and -1 for a backwards copy.
151 * So, the ((m + 1) / 2) term yields 0 for a backwards copy and 154 * So, the ((m + 1) / 2) term yields 0 for a backwards copy and
152 * 1 for a forward copy, and the ((m - 1) / 2) term yields 155 * 1 for a forward copy, and the ((m - 1) / 2) term yields
153 * 0 for a forward copy, and -1 for a backwards copy. 156 * 0 for a forward copy, and -1 for a backwards copy.
154 * These terms are used to discriminate the register computations 157 * These terms are used to discriminate the register computations
155 * below. 158 * below.
156 * 159 *
157 * When copying forward, then, the first register used with 160 * When copying forward, then, the first register used with
158 * the first vshd will be 19 + (3 - ((0 - 1) & 3)), or %t4, 161 * the first vshd will be 19 + (3 - ((0 - 1) & 3)), or %t4,
159 * which matches _COPYS' requirement that the word last loaded 162 * which matches _COPYS' requirement that the word last loaded
160 * be in %t4. The first register used for the second vshd 163 * be in %t4. The first register used for the second vshd
161 * will then "wrap" around to 19 + (3 - ((1 - 1) & 3)), or %t1. 164 * will then "wrap" around to 19 + (3 - ((1 - 1) & 3)), or %t1.
162 * And so on to %t2 and %t3. 165 * And so on to %t2 and %t3.
163 * 166 *
164 * When copying forward, the second register used with the first 167 * When copying forward, the second register used with the first
165 * vshd will be (19 + (3 - ((n + 0) & 3)), or %t1. It will 168 * vshd will be (19 + (3 - ((n + 0) & 3)), or %t1. It will
166 * continue to be %t2, then %t3, and finally %t4. 169 * continue to be %t2, then %t3, and finally %t4.
167 * 170 *
168 * When copying backwards, the values for the first and second 171 * When copying backwards, the values for the first and second
169 * register for each vshd are reversed from the forwards case. 172 * register for each vshd are reversed from the forwards case.
170 * (Symmetry reclaimed!) Proving this is "left as an exercise 173 * (Symmetry reclaimed!) Proving this is "left as an exercise
171 * for the reader" (remember the different discriminating values!) 174 * for the reader" (remember the different discriminating values!)
172 */ 175 */
173#define _VSHD(n, m, t) \ 176#define _VSHD(n, m, t) \
174 .word (0xd0000000 | \ 177 .word (0xd0000000 | \
175 ((19 + (3 - ((n - 1 * ((m + 1) / 2)) & 3))) << 16) | \ 178 ((19 + (3 - ((n - 1 * ((m + 1) / 2)) & 3))) << 16) | \
176 ((19 + (3 - ((n + 1 * ((m - 1) / 2)) & 3))) << 21) | \ 179 ((19 + (3 - ((n + 1 * ((m - 1) / 2)) & 3))) << 21) | \
177 (t)) 180 (t))
178 181
179/* 182/*
180 * This macro does a bulk copy with shifting. cmplt and m are 183 * This macro does a bulk copy with shifting. cmplt and m are
181 * the completer and displacement multiplier, respectively, for 184 * the completer and displacement multiplier, respectively, for
182 * the load and store instructions. It is assumed that the 185 * the load and store instructions. It is assumed that the
183 * word last loaded is already in %t4. 186 * word last loaded is already in %t4.
184 */ 187 */
185#define _COPYS(src_spc, src_off, dst_spc, dst_off, count, cmplt, m) \ 188#define _COPYS(src_spc, src_off, dst_spc, dst_off, count, cmplt, m) \
186 ! \ 189 ! \
187 /* ! \ 190 /* ! \
188 * Loop storing 16 bytes at a time. Since count ! \ 191 * Loop storing 16 bytes at a time. Since count ! \
189 * may be > INT_MAX, we have to be careful and ! \ 192 * may be > INT_MAX, we have to be careful and ! \
190 * avoid comparisons that treat it as a signed ! \ 193 * avoid comparisons that treat it as a signed ! \
191 * quantity, until after this loop, when count ! \ 194 * quantity, until after this loop, when count ! \
192 * is guaranteed to be less than 16. ! \ 195 * is guaranteed to be less than 16. ! \
193 */ ! \ 196 */ ! \
194 comib,>>=,n 15, count, _LABEL(S_skip16) ! \ 197 comib,>>=,n 15, count, _LABEL(S_skip16) ! \
195.label _LABEL(S_loop16) ! \ 198.label _LABEL(S_loop16) ! \
196 addi -16, count, count ! \ 199 addi -16, count, count ! \
197 ldws,cmplt m*4(src_spc, src_off), %t1 ! \ 200 ldws,cmplt m*4(src_spc, src_off), %t1 ! \
198 ldws,cmplt m*4(src_spc, src_off), %t2 ! \ 201 ldws,cmplt m*4(src_spc, src_off), %t2 ! \
199 ldws,cmplt m*4(src_spc, src_off), %t3 ! \ 202 ldws,cmplt m*4(src_spc, src_off), %t3 ! \
200 _VSHD(0, m, 1) /* vshd %t4, %t1, %r1 */ ! \ 203 _VSHD(0, m, 1) /* vshd %t4, %t1, %r1 */ ! \
201 ldws,cmplt m*4(src_spc, src_off), %t4 ! \ 204 ldws,cmplt m*4(src_spc, src_off), %t4 ! \
202 _VSHD(1, m, 22) /* vshd %t1, %t2, %t1 */ ! \ 205 _VSHD(1, m, 22) /* vshd %t1, %t2, %t1 */ ! \
203 _VSHD(2, m, 21) /* vshd %t2, %t3, %t2 */ ! \ 206 _VSHD(2, m, 21) /* vshd %t2, %t3, %t2 */ ! \
204 _VSHD(3, m, 20) /* vshd %t3, %t4, %t3 */ ! \ 207 _VSHD(3, m, 20) /* vshd %t3, %t4, %t3 */ ! \
205 stws,cmplt %r1, m*4(dst_spc, dst_off) ! \ 208 stws,cmplt %r1, m*4(dst_spc, dst_off) ! \
206 stws,cmplt %t1, m*4(dst_spc, dst_off) ! \ 209 stws,cmplt %t1, m*4(dst_spc, dst_off) ! \
207 stws,cmplt %t2, m*4(dst_spc, dst_off) ! \ 210 stws,cmplt %t2, m*4(dst_spc, dst_off) ! \
208 comib,<< 15, count, _LABEL(S_loop16) ! \ 211 comib,<< 15, count, _LABEL(S_loop16) ! \
209 stws,cmplt %t3, m*4(dst_spc, dst_off) ! \ 212 stws,cmplt %t3, m*4(dst_spc, dst_off) ! \
210.label _LABEL(S_skip16) ! \ 213.label _LABEL(S_skip16) ! \
211 ! \ 214 ! \
212 /* Loop storing 4 bytes at a time. */ ! \ 215 /* Loop storing 4 bytes at a time. */ ! \
213 addib,<,n -4, count, _LABEL(S_skip4) ! \ 216 addib,<,n -4, count, _LABEL(S_skip4) ! \
214.label _LABEL(S_loop4) ! \ 217.label _LABEL(S_loop4) ! \
215 ldws,cmplt m*4(src_spc, src_off), %t1 ! \ 218 ldws,cmplt m*4(src_spc, src_off), %t1 ! \
216 _VSHD(0, m, 1) /* into %r1 (1) */ ! \ 219 _VSHD(0, m, 1) /* into %r1 (1) */ ! \
217 copy %t1, %t4 ! \ 220 copy %t1, %t4 ! \
218 addib,>= -4, count, _LABEL(S_loop4) ! \ 221 addib,>= -4, count, _LABEL(S_loop4) ! \
219 stws,cmplt %r1, m*4(dst_spc, dst_off) ! \ 222 stws,cmplt %r1, m*4(dst_spc, dst_off) ! \
220.label _LABEL(S_skip4) ! \ 223.label _LABEL(S_skip4) ! \
221 ! \ 224 ! \
222 /* ! \ 225 /* ! \
223 * We now need to "back up" src_off by the ! \ 226 * We now need to "back up" src_off by the ! \
224 * number of bytes remaining in the FIFO ! \ 227 * number of bytes remaining in the FIFO ! \
225 * (i.e., the number of bytes remaining in %t4), ! \ 228 * (i.e., the number of bytes remaining in %t4), ! \
226 * because (the correct) count still includes ! \ 229 * because (the correct) count still includes ! \
227 * these bytes, and we intent to keep it that ! \ 230 * these bytes, and we intent to keep it that ! \
228 * way, and finish with the single-byte copier. ! \ 231 * way, and finish with the single-byte copier. ! \
229 * ! \ 232 * ! \
230 * The number of bytes remaining in the FIFO is ! \ 233 * The number of bytes remaining in the FIFO is ! \
231 * related to the shift count, so recover it, ! \ 234 * related to the shift count, so recover it, ! \
232 * restoring the correct count at the same time. ! \ 235 * restoring the correct count at the same time. ! \
233 */ ! \ 236 */ ! \
234 mfctl %cr11, %t1 ! \ 237 mfctl %cr11, %t1 ! \
235 addi 4, count, count ! \ 238 addi 4, count, count ! \
236 shd %r0, %t1, 3, %t1 ! \ 239 shd %r0, %t1, 3, %t1 ! \
237 ! \ 240 ! \
238 /* ! \ 241 /* ! \
239 * If we're copying forward, the shift count ! \ 242 * If we're copying forward, the shift count ! \
240 * is the number of bytes remaining in the ! \ 243 * is the number of bytes remaining in the ! \
241 * FIFO, and we want to subtract it from src_off. ! \ 244 * FIFO, and we want to subtract it from src_off. ! \
242 * If we're copying backwards, (4 - shift count) ! \ 245 * If we're copying backwards, (4 - shift count) ! \
243 * is the number of bytes remaining in the FIFO, ! \ 246 * is the number of bytes remaining in the FIFO, ! \
244 * and we want to add it to src_off. ! \ 247 * and we want to add it to src_off. ! \
245 * ! \ 248 * ! \
246 * We observe that x + (4 - y) = x - (y - 4), ! \ 249 * We observe that x + (4 - y) = x - (y - 4), ! \
247 * and introduce this instruction to add -4 when ! \ 250 * and introduce this instruction to add -4 when ! \
248 * m is -1, although this does mean one extra ! \ 251 * m is -1, although this does mean one extra ! \
249 * instruction in the forward case. ! \ 252 * instruction in the forward case. ! \
250 */ ! \ 253 */ ! \
251 addi 4*((m - 1) / 2), %t1, %t1 ! \ 254 addi 4*((m - 1) / 2), %t1, %t1 ! \
252 ! \ 255 ! \
253 /* Now branch to the byte-at-a-time loop. */ ! \ 256 /* Now branch to the byte-at-a-time loop. */ ! \
254 b _LABEL(_do1) ! \ 257 b _LABEL(_do1) ! \
255 sub src_off, %t1, src_off 258 sub src_off, %t1, src_off
256 259
257/* 260/*
258 * This macro copies a region in the forward direction. 261 * This macro copies a region in the forward direction.
259 */ 262 */
260#define _COPY_FORWARD(src_spc, src_off, dst_spc, dst_off, count) \ 263#define _COPY_FORWARD(src_spc, src_off, dst_spc, dst_off, count) \
261 ! \ 264 ! \
262 /* ! \ 265 /* ! \
263 * Since in the shifting-left case we will ! \ 266 * Since in the shifting-left case we will ! \
264 * load 8 bytes before checking count, to ! \ 267 * load 8 bytes before checking count, to ! \
265 * keep things simple, branch to the byte ! \ 268 * keep things simple, branch to the byte ! \
266 * copier unless we're copying at least 8. ! \ 269 * copier unless we're copying at least 8. ! \
267 */ ! \ 270 */ ! \
268 comib,>>,n 8, count, _LABEL(_do1) ! \ 271 comib,>>,n 8, count, _LABEL(_do1) ! \
269 ! \ 272 ! \
270 /* ! \ 273 /* ! \
271 * Once we 4-byte align the source offset, ! \ 274 * Once we 4-byte align the source offset, ! \
272 * figure out how many bytes from the region ! \ 275 * figure out how many bytes from the region ! \
273 * will be in the first 4-byte word we read. ! \ 276 * will be in the first 4-byte word we read. ! \
274 * Ditto for writing the destination offset. ! \ 277 * Ditto for writing the destination offset. ! \
275 */ ! \ 278 */ ! \
276 extru src_off, 31, 2, %t1 ! \ 279 extru src_off, 31, 2, %t1 ! \
277 extru dst_off, 31, 2, %t2 ! \ 280 extru dst_off, 31, 2, %t2 ! \
278 subi 4, %t1, %t1 ! \ 281 subi 4, %t1, %t1 ! \
279 subi 4, %t2, %t2 ! \ 282 subi 4, %t2, %t2 ! \
280 ! \ 283 ! \
281 /* ! \ 284 /* ! \
282 * Calculate the byte shift required. A ! \ 285 * Calculate the byte shift required. A ! \
283 * positive value means a source 4-byte word ! \ 286 * positive value means a source 4-byte word ! \
284 * has to be shifted to the right to line up ! \ 287 * has to be shifted to the right to line up ! \
285 * as a destination 4-byte word. ! \ 288 * as a destination 4-byte word. ! \
286 */ ! \ 289 */ ! \
287 sub %t1, %t2, %t1 ! \ 290 sub %t1, %t2, %t1 ! \
288 ! \ 291 ! \
289 /* 4-byte align src_off. */ ! \ 292 /* 4-byte align src_off. */ ! \
290 depi 0, 31, 2, src_off ! \ 293 depi 0, 31, 2, src_off ! \
291 ! \ 294 ! \
292 /* ! \ 295 /* ! \
293 * It's somewhat important to note that this ! \ 296 * It's somewhat important to note that this ! \
294 * code thinks of count as "the number of bytes ! \ 297 * code thinks of count as "the number of bytes ! \
295 * that haven't been stored yet", as opposed to ! \ 298 * that haven't been stored yet", as opposed to ! \
296 * "the number of bytes that haven't been copied ! \ 299 * "the number of bytes that haven't been copied ! \
297 * yet". The distinction is subtle, but becomes ! \ 300 * yet". The distinction is subtle, but becomes ! \
298 * apparent at the end of the shifting code, where ! \ 301 * apparent at the end of the shifting code, where ! \
299 * we "back up" src_off to correspond to count, ! \ 302 * we "back up" src_off to correspond to count, ! \
300 * as opposed to flushing the FIFO. ! \ 303 * as opposed to flushing the FIFO. ! \
301 * ! \ 304 * ! \
302 * We calculated above how many bytes our first ! \ 305 * We calculated above how many bytes our first ! \
303 * store will store, so update count now. ! \ 306 * store will store, so update count now. ! \
304 * ! \ 307 * ! \
305 * If the shift is zero, strictly as an optimization ! \ 308 * If the shift is zero, strictly as an optimization ! \
306 * we use a copy loop that does no shifting. ! \ 309 * we use a copy loop that does no shifting. ! \
307 */ ! \ 310 */ ! \
308 comb,<> %r0, %t1, _LABEL(_shifting) ! \ 311 comb,<> %r0, %t1, _LABEL(_shifting) ! \
309 sub count, %t2, count ! \ 312 sub count, %t2, count ! \
310 ! \ 313 ! \
311 /* Load and store the first word. */ ! \ 314 /* Load and store the first word. */ ! \
312 ldws,ma 4(src_spc, src_off), %t4 ! \ 315 ldws,ma 4(src_spc, src_off), %t4 ! \
313 stbys,b,m %t4, 4(dst_spc, dst_off) ! \ 316 stbys,b,m %t4, 4(dst_spc, dst_off) ! \
314 ! \ 317 ! \
315 /* Do the rest of the copy. */ ! \ 318 /* Do the rest of the copy. */ ! \
316 _COPY(src_spc,src_off,dst_spc,dst_off,count,ma,1) ! \ 319 _COPY(src_spc,src_off,dst_spc,dst_off,count,ma,1) ! \
317 ! \ 320 ! \
318.label _LABEL(_shifting) ! \ 321.label _LABEL(_shifting) ! \
319 ! \ 322 ! \
320 /* ! \ 323 /* ! \
321 * If shift < 0, we need to shift words to the ! \ 324 * If shift < 0, we need to shift words to the ! \
322 * left. Since we can't do this directly, we ! \ 325 * left. Since we can't do this directly, we ! \
323 * adjust the shift so it's a shift to the right ! \ 326 * adjust the shift so it's a shift to the right ! \
324 * and load the first word into the high word of ! \ 327 * and load the first word into the high word of ! \
325 * the FIFO. Otherwise, we load a zero into the ! \ 328 * the FIFO. Otherwise, we load a zero into the ! \
326 * high word of the FIFO. ! \ 329 * high word of the FIFO. ! \
327 */ ! \ 330 */ ! \
328 comb,<= %r0, %t1, _LABEL(_shiftingrt) ! \ 331 comb,<= %r0, %t1, _LABEL(_shiftingrt) ! \
329 copy %r0, %t3 ! \ 332 copy %r0, %t3 ! \
330 addi 4, %t1, %t1 ! \ 333 addi 4, %t1, %t1 ! \
331 ldws,ma 4(src_spc, src_off), %t3 ! \ 334 ldws,ma 4(src_spc, src_off), %t3 ! \
332.label _LABEL(_shiftingrt) ! \ 335.label _LABEL(_shiftingrt) ! \
333 ! \ 336 ! \
334 /* ! \ 337 /* ! \
335 * Turn the shift byte count into a bit count, ! \ 338 * Turn the shift byte count into a bit count, ! \
336 * load the next word, set the Shift Amount ! \ 339 * load the next word, set the Shift Amount ! \
337 * Register, and form and store the first word. ! \ 340 * Register, and form and store the first word. ! \
338 */ ! \ 341 */ ! \
339 sh3add %t1, %r0, %t1 ! \ 342 sh3add %t1, %r0, %t1 ! \
340 ldws,ma 4(src_spc, src_off), %t4 ! \ 343 ldws,ma 4(src_spc, src_off), %t4 ! \
341 mtctl %t1, %cr11 ! \ 344 mtctl %t1, %cr11 ! \
342 vshd %t3, %t4, %r1 ! \ 345 vshd %t3, %t4, %r1 ! \
343 stbys,b,m %r1, 4(dst_spc, dst_off) ! \ 346 stbys,b,m %r1, 4(dst_spc, dst_off) ! \
344 ! \ 347 ! \
345 /* Do the rest of the copy. */ ! \ 348 /* Do the rest of the copy. */ ! \
346 _COPYS(src_spc,src_off,dst_spc,dst_off,count,ma,1) 349 _COPYS(src_spc,src_off,dst_spc,dst_off,count,ma,1)
347 350
348/* This macro copies a region in the reverse direction. */ 351/* This macro copies a region in the reverse direction. */
349#define _COPY_REVERSE(src_spc, src_off, dst_spc, dst_off, count) \ 352#define _COPY_REVERSE(src_spc, src_off, dst_spc, dst_off, count) \
350 ! \ 353 ! \
351 /* Immediately add count to both offsets. */ ! \ 354 /* Immediately add count to both offsets. */ ! \
352 add src_off, count, src_off ! \ 355 add src_off, count, src_off ! \
353 add dst_off, count, dst_off ! \ 356 add dst_off, count, dst_off ! \
354 ! \ 357 ! \
355 /* ! \ 358 /* ! \
356 * Since in the shifting-right case we ! \ 359 * Since in the shifting-right case we ! \
357 * will load 8 bytes before checking ! \ 360 * will load 8 bytes before checking ! \
358 * count, to keep things simple, branch ! \ 361 * count, to keep things simple, branch ! \
359 * to the byte copier unless we're ! \ 362 * to the byte copier unless we're ! \
360 * copying at least 8 bytes. ! \ 363 * copying at least 8 bytes. ! \
361 */ ! \ 364 */ ! \
362 comib,>>,n 8, count, _LABEL(_do1) ! \ 365 comib,>>,n 8, count, _LABEL(_do1) ! \
363 ! \ 366 ! \
364 /* ! \ 367 /* ! \
365 * Once we 4-byte align the source offset, ! \ 368 * Once we 4-byte align the source offset, ! \
366 * figure out how many bytes from the region ! \ 369 * figure out how many bytes from the region ! \
367 * will be in the first 4-byte word we read. ! \ 370 * will be in the first 4-byte word we read. ! \
368 * Ditto for writing the destination offset. ! \ 371 * Ditto for writing the destination offset. ! \
369 */ ! \ 372 */ ! \
370 extru,<> src_off, 31, 2, %t1 ! \ 373 extru,<> src_off, 31, 2, %t1 ! \
371 ldi 4, %t1 ! \ 374 ldi 4, %t1 ! \
372 extru,<> dst_off, 31, 2, %t2 ! \ 375 extru,<> dst_off, 31, 2, %t2 ! \
373 ldi 4, %t2 ! \ 376 ldi 4, %t2 ! \
374 ! \ 377 ! \
375 /* ! \ 378 /* ! \
376 * Calculate the byte shift required. A ! \ 379 * Calculate the byte shift required. A ! \
377 * positive value means a source 4-byte ! \ 380 * positive value means a source 4-byte ! \
378 * word has to be shifted to the right to ! \ 381 * word has to be shifted to the right to ! \
379 * line up as a destination 4-byte word. ! \ 382 * line up as a destination 4-byte word. ! \
380 */ ! \ 383 */ ! \
381 sub %t2, %t1, %t1 ! \ 384 sub %t2, %t1, %t1 ! \
382 ! \ 385 ! \
383 /* ! \ 386 /* ! \
384 * 4-byte align src_off, leaving it pointing ! \ 387 * 4-byte align src_off, leaving it pointing ! \
385 * to the 4-byte word *after* the next word ! \ 388 * to the 4-byte word *after* the next word ! \
386 * we intend to load. ! \ 389 * we intend to load. ! \
387 * ! \ 390 * ! \
388 * It's somewhat important to note that this ! \ 391 * It's somewhat important to note that this ! \
389 * code thinks of count as "the number of bytes ! \ 392 * code thinks of count as "the number of bytes ! \
390 * that haven't been stored yet", as opposed to ! \ 393 * that haven't been stored yet", as opposed to ! \
391 * "the number of bytes that haven't been copied ! \ 394 * "the number of bytes that haven't been copied ! \
392 * yet". The distinction is subtle, but becomes ! \ 395 * yet". The distinction is subtle, but becomes ! \
393 * apparent at the end of the shifting code, where ! \ 396 * apparent at the end of the shifting code, where ! \
394 * we "back up" src_off to correspond to count, ! \ 397 * we "back up" src_off to correspond to count, ! \
395 * as opposed to flushing the FIFO. ! \ 398 * as opposed to flushing the FIFO. ! \
396 * ! \ 399 * ! \
397 * We calculated above how many bytes our first ! \ 400 * We calculated above how many bytes our first ! \
398 * store will store, so update count now. ! \ 401 * store will store, so update count now. ! \
399 * ! \ 402 * ! \
400 * If the shift is zero, we use a copy loop that ! \ 403 * If the shift is zero, we use a copy loop that ! \
401 * does no shifting. NB: unlike the forward case, ! \ 404 * does no shifting. NB: unlike the forward case, ! \
402 * this is NOT strictly an optimization. If the ! \ 405 * this is NOT strictly an optimization. If the ! \
403 * SAR is zero the vshds do NOT do the right thing. ! \ 406 * SAR is zero the vshds do NOT do the right thing. ! \
404 * This is another assymetry more or less the "fault" ! \ 407 * This is another assymetry more or less the "fault" ! \
405 * of vshd. ! \ 408 * of vshd. ! \
406 */ ! \ 409 */ ! \
407 addi 3, src_off, src_off ! \ 410 addi 3, src_off, src_off ! \
408 sub count, %t2, count ! \ 411 sub count, %t2, count ! \
409 comb,<> %r0, %t1, _LABEL(_shifting) ! \ 412 comb,<> %r0, %t1, _LABEL(_shifting) ! \
410 depi 0, 31, 2, src_off ! \ 413 depi 0, 31, 2, src_off ! \
411 ! \ 414 ! \
412 /* Load and store the first word. */ ! \ 415 /* Load and store the first word. */ ! \
413 ldws,mb -4(src_spc, src_off), %t4 ! \ 416 ldws,mb -4(src_spc, src_off), %t4 ! \
414 _STBYS_E_M(%t4, dst_spc, dst_off) ! \ 417 _STBYS_E_M(%t4, dst_spc, dst_off) ! \
415 ! \ 418 ! \
416 /* Do the rest of the copy. */ ! \ 419 /* Do the rest of the copy. */ ! \
417 _COPY(src_spc,src_off,dst_spc,dst_off,count,mb,-1) ! \ 420 _COPY(src_spc,src_off,dst_spc,dst_off,count,mb,-1) ! \
418 ! \ 421 ! \
419.label _LABEL(_shifting) ! \ 422.label _LABEL(_shifting) ! \
420 ! \ 423 ! \
421 /* ! \ 424 /* ! \
422 * If shift < 0, we need to shift words to the ! \ 425 * If shift < 0, we need to shift words to the ! \
423 * left. Since we can't do this directly, we ! \ 426 * left. Since we can't do this directly, we ! \
424 * adjust the shift so it's a shift to the right ! \ 427 * adjust the shift so it's a shift to the right ! \
425 * and load a zero in to the low word of the FIFO. ! \ 428 * and load a zero in to the low word of the FIFO. ! \
426 * Otherwise, we load the first word into the ! \ 429 * Otherwise, we load the first word into the ! \
427 * low word of the FIFO. ! \ 430 * low word of the FIFO. ! \
428 * ! \ 431 * ! \
429 * Note the nullification trickery here. We ! \ 432 * Note the nullification trickery here. We ! \
430 * assume that we're shifting to the left, and ! \ 433 * assume that we're shifting to the left, and ! \
431 * load zero into the low word of the FIFO. Then ! \ 434 * load zero into the low word of the FIFO. Then ! \
432 * we nullify the addi if we're shifting to the ! \ 435 * we nullify the addi if we're shifting to the ! \
433 * right. If the addi is not nullified, we are ! \ 436 * right. If the addi is not nullified, we are ! \
434 * shifting to the left, so we nullify the load. ! \ 437 * shifting to the left, so we nullify the load. ! \
435 * we branch if we're shifting to the ! \ 438 * we branch if we're shifting to the ! \
436 */ ! \ 439 */ ! \
437 copy %r0, %t3 ! \ 440 copy %r0, %t3 ! \
438 comb,<=,n %r0, %t1, 0 ! \ 441 comb,<=,n %r0, %t1, 0 ! \
439 addi,tr 4, %t1, %t1 ! \ 442 addi,tr 4, %t1, %t1 ! \
440 ldws,mb -4(src_spc, src_off), %t3 ! \ 443 ldws,mb -4(src_spc, src_off), %t3 ! \
441 ! \ 444 ! \
442 /* ! \ 445 /* ! \
443 * Turn the shift byte count into a bit count, ! \ 446 * Turn the shift byte count into a bit count, ! \
444 * load the next word, set the Shift Amount ! \ 447 * load the next word, set the Shift Amount ! \
445 * Register, and form and store the first word. ! \ 448 * Register, and form and store the first word. ! \
446 */ ! \ 449 */ ! \
447 sh3add %t1, %r0, %t1 ! \ 450 sh3add %t1, %r0, %t1 ! \
448 ldws,mb -4(src_spc, src_off), %t4 ! \ 451 ldws,mb -4(src_spc, src_off), %t4 ! \
449 mtctl %t1, %cr11 ! \ 452 mtctl %t1, %cr11 ! \
450 vshd %t4, %t3, %r1 ! \ 453 vshd %t4, %t3, %r1 ! \
451 _STBYS_E_M(%r1, dst_spc, dst_off) ! \ 454 _STBYS_E_M(%r1, dst_spc, dst_off) ! \
452 ! \ 455 ! \
453 /* Do the rest of the copy. */ ! \ 456 /* Do the rest of the copy. */ ! \
454 _COPYS(src_spc,src_off,dst_spc,dst_off,count,mb,-1) 457 _COPYS(src_spc,src_off,dst_spc,dst_off,count,mb,-1)
455 458
456/* 459/*
457 * For paranoia, when things aren't going well, enable this 460 * For paranoia, when things aren't going well, enable this
458 * code to assemble byte-at-a-time-only copying. 461 * code to assemble byte-at-a-time-only copying.
459 */ 462 */
460#if 1 463#if 1
461#undef _COPY_FORWARD 464#undef _COPY_FORWARD
462#define _COPY_FORWARD(src_spc, src_off, dst_spc, dst_off, count) \ 465#define _COPY_FORWARD(src_spc, src_off, dst_spc, dst_off, count) \
463 comb,=,n %r0, count, _LABEL(_done) ! \ 466 comb,=,n %r0, count, _LABEL(_done) ! \
464 ldbs,ma 1(src_spc, src_off), %r1 ! \ 467 ldbs,ma 1(src_spc, src_off), %r1 ! \
465 addib,<> -1, count, -12 ! \ 468 addib,<> -1, count, -12 ! \
466 stbs,ma %r1, 1(dst_spc, dst_off) ! \ 469 stbs,ma %r1, 1(dst_spc, dst_off) ! \
467 b,n _LABEL(_done) 470 b,n _LABEL(_done)
468#undef _COPY_REVERSE 471#undef _COPY_REVERSE
469#define _COPY_REVERSE(src_spc, src_off, dst_spc, dst_off, count) \ 472#define _COPY_REVERSE(src_spc, src_off, dst_spc, dst_off, count) \
470 comb,= %r0, count, _LABEL(_done) ! \ 473 comb,= %r0, count, _LABEL(_done) ! \
471 add src_off, count, src_off ! \ 474 add src_off, count, src_off ! \
472 add dst_off, count, dst_off ! \ 475 add dst_off, count, dst_off ! \
473 ldbs,mb -1(src_spc, src_off), %r1 ! \ 476 ldbs,mb -1(src_spc, src_off), %r1 ! \
474 addib,<> -1, count, -12 ! \ 477 addib,<> -1, count, -12 ! \
475 stbs,mb %r1, -1(dst_spc, dst_off) ! \ 478 stbs,mb %r1, -1(dst_spc, dst_off) ! \
476 b,n _LABEL(_done) 479 b,n _LABEL(_done)
477#endif 480#endif
478 481
479/* 482/*
480 * If none of the following are defined, define BCOPY. 483 * If none of the following are defined, define BCOPY.
481 */ 484 */
482#if !(defined(SPCOPY) || defined(MEMCPY) || defined(MEMMOVE)) 485#if !(defined(SPCOPY) || defined(MEMCPY) || defined(MEMMOVE))
483#define BCOPY 486#define BCOPY
484#endif 487#endif
485 488
486#if defined(SPCOPY) && !defined(_STANDALONE) 489#if defined(SPCOPY) && !defined(_STANDALONE)
487#include <sys/errno.h> 490#include <sys/errno.h>
488#include "assym.h" 491#include "assym.h"
489 492
490/* 493/*
491 * int spcopy(pa_space_t ssp, const void *src, pa_space_t dsp, void *dst, 494 * int spcopy(pa_space_t ssp, const void *src, pa_space_t dsp, void *dst,
492 * size_t len) 495 * size_t len)
493 * 496 *
494 * We assume that the regions do not overlap. 497 * We assume that the regions do not overlap.
495 */ 498 */
496LEAF_ENTRY(spcopy) 499LEAF_ENTRY(spcopy)
497 500
498 /* 501 /*
499 * Setup the fault handler, which will fill in %ret0 if triggered. 502 * Setup the fault handler, which will fill in %ret0 if triggered.
500 */ 503 */
501 mfctl CR_CURLWP, %r31 504 GET_CURLWP(%r31)
502#ifdef DIAGNOSTIC 505#ifdef DIAGNOSTIC
503 comb,<>,n %r0, %r31, Lspcopy_curlwp_ok 506 comb,<>,n %r0, %r31, Lspcopy_curlwp_ok
504 ldil L%panic, %r1 507 ldil L%panic, %r1
505 ldil L%Lspcopy_curlwp_bad, %arg0 508 ldil L%Lspcopy_curlwp_bad, %arg0
506 ldo R%panic(%r1), %r1 509 ldo R%panic(%r1), %r1
507 ldo R%Lspcopy_curlwp_bad(%arg0), %arg0 510 ldo R%Lspcopy_curlwp_bad(%arg0), %arg0
508 .call 511 .call
509 bv,n %r0(%r1) 512 bv,n %r0(%r1)
510 nop 513 nop
511Lspcopy_curlwp_bad: 514Lspcopy_curlwp_bad:
512 .asciz "spcopy: curlwp == NULL\n" 515 .asciz "spcopy: curlwp == NULL\n"
513 .align 8 516 .align 8
514Lspcopy_curlwp_ok: 517Lspcopy_curlwp_ok:
515#endif /* DIAGNOSTIC */ 518#endif /* DIAGNOSTIC */
516 ldil L%spcopy_fault, %r1 519 ldil L%spcopy_fault, %r1
517 ldw L_PCB(%r31), %r31 520 ldw L_PCB(%r31), %r31
518 ldo R%spcopy_fault(%r1), %r1 521 ldo R%spcopy_fault(%r1), %r1
519 stw %r1, PCB_ONFAULT(%r31) 522 stw %r1, PCB_ONFAULT(%r31)
520 523
521 /* Setup the space registers. */ 524 /* Setup the space registers. */
522 mfsp %sr2, %ret1 525 mfsp %sr2, %ret1
523 mtsp %arg0, %sr1 526 mtsp %arg0, %sr1
524 mtsp %arg2, %sr2 527 mtsp %arg2, %sr2
525 528
526 /* Get the len argument and do the copy. */ 529 /* Get the len argument and do the copy. */
527 ldw HPPA_FRAME_ARG(4)(%sp), %arg0 530 ldw HPPA_FRAME_ARG(4)(%sp), %arg0
528#define _LABEL(l) __CONCAT(spcopy,l) 531#define _LABEL(l) __CONCAT(spcopy,l)
529 _COPY_FORWARD(%sr1,%arg1,%sr2,%arg3,%arg0) 532 _COPY_FORWARD(%sr1,%arg1,%sr2,%arg3,%arg0)
530_LABEL(_done): 533_LABEL(_done):
531 534
532 /* Return. */ 535 /* Return. */
533 copy %r0, %ret0 536 copy %r0, %ret0
534ALTENTRY(spcopy_fault) 537ALTENTRY(spcopy_fault)
535 stw %r0, PCB_ONFAULT(%r31) 538 stw %r0, PCB_ONFAULT(%r31)
536 bv %r0(%rp) 539 bv %r0(%rp)
537 mtsp %ret1, %sr2 540 mtsp %ret1, %sr2
538EXIT(spcopy) 541EXIT(spcopy)
539#endif /* SPCOPY && !_STANDALONE */ 542#endif /* SPCOPY && !_STANDALONE */
540 543
541#ifdef MEMCPY 544#ifdef MEMCPY
542/* 545/*
543 * void *memcpy(void *restrict dst, const void *restrict src, size_t len); 546 * void *memcpy(void *restrict dst, const void *restrict src, size_t len);
544 * 547 *
545 * memcpy is specifically restricted to working on 548 * memcpy is specifically restricted to working on
546 * non-overlapping regions, so we can just copy forward. 549 * non-overlapping regions, so we can just copy forward.
547 */ 550 */
548LEAF_ENTRY(memcpy) 551LEAF_ENTRY(memcpy)
549 copy %arg0, %ret0 552 copy %arg0, %ret0
550#define _LABEL(l) __CONCAT(memcpy,l) 553#define _LABEL(l) __CONCAT(memcpy,l)
551 _COPY_FORWARD(%sr0,%arg1,%sr0,%arg0,%arg2) 554 _COPY_FORWARD(%sr0,%arg1,%sr0,%arg0,%arg2)
552_LABEL(_done): 555_LABEL(_done):
553 bv,n %r0(%rp) 556 bv,n %r0(%rp)
554 nop 557 nop
555EXIT(memcpy) 558EXIT(memcpy)
556#endif /* MEMCPY */ 559#endif /* MEMCPY */
557 560
558#ifdef BCOPY 561#ifdef BCOPY
559/* 562/*
560 * void bcopy(const void *src, void *dst, size_t len); 563 * void bcopy(const void *src, void *dst, size_t len);
561 */ 564 */
562LEAF_ENTRY(bcopy) 565LEAF_ENTRY(bcopy)
563 copy %arg0, %r1 566 copy %arg0, %r1
564 copy %arg1, %arg0 567 copy %arg1, %arg0
565 copy %r1, %arg1 568 copy %r1, %arg1
566 /* FALLTHROUGH */ 569 /* FALLTHROUGH */
567#define _LABEL_F(l) __CONCAT(bcopy_F,l) 570#define _LABEL_F(l) __CONCAT(bcopy_F,l)
568#define _LABEL_R(l) __CONCAT(bcopy_R,l) 571#define _LABEL_R(l) __CONCAT(bcopy_R,l)
569#endif 572#endif
570 573
571#ifdef MEMMOVE 574#ifdef MEMMOVE
572/* 575/*
573 * void *memmove(void *dst, const void *src, size_t len); 576 * void *memmove(void *dst, const void *src, size_t len);
574 */ 577 */
575LEAF_ENTRY(memmove) 578LEAF_ENTRY(memmove)
576#define _LABEL_F(l) __CONCAT(memmove_F,l) 579#define _LABEL_F(l) __CONCAT(memmove_F,l)
577#define _LABEL_R(l) __CONCAT(memmove_R,l) 580#define _LABEL_R(l) __CONCAT(memmove_R,l)
578 copy %arg0, %ret0 581 copy %arg0, %ret0
579#endif /* MEMMOVE */ 582#endif /* MEMMOVE */
580 583
581#if defined(BCOPY) || defined(MEMMOVE) 584#if defined(BCOPY) || defined(MEMMOVE)
582 585
583 /* 586 /*
584 * If src >= dst or src + len <= dst, we copy 587 * If src >= dst or src + len <= dst, we copy
585 * forward, else we copy in reverse. 588 * forward, else we copy in reverse.
586 */ 589 */
587 add %arg1, %arg2, %r1 590 add %arg1, %arg2, %r1
588 comb,>>=,n %arg1, %arg0, 0 591 comb,>>=,n %arg1, %arg0, 0
589 comb,>>,n %r1, %arg0, _LABEL_R(_go) 592 comb,>>,n %r1, %arg0, _LABEL_R(_go)
590 593
591#define _LABEL _LABEL_F 594#define _LABEL _LABEL_F
592 _COPY_FORWARD(%sr0,%arg1,%sr0,%arg0,%arg2) 595 _COPY_FORWARD(%sr0,%arg1,%sr0,%arg0,%arg2)
593#undef _LABEL 596#undef _LABEL
594 597
595_LABEL_R(_go): 598_LABEL_R(_go):
596#define _LABEL _LABEL_R 599#define _LABEL _LABEL_R
597 _COPY_REVERSE(%sr0,%arg1,%sr0,%arg0,%arg2) 600 _COPY_REVERSE(%sr0,%arg1,%sr0,%arg0,%arg2)
598#undef _LABEL 601#undef _LABEL
599 602
600_LABEL_F(_done): 603_LABEL_F(_done):
601_LABEL_R(_done): 604_LABEL_R(_done):
602 bv,n %r0(%rp) 605 bv,n %r0(%rp)
603 nop 606 nop
604#ifdef BCOPY 607#ifdef BCOPY
605EXIT(bcopy) 608EXIT(bcopy)
606#else 609#else
607EXIT(memmove) 610EXIT(memmove)
608#endif 611#endif
609#endif /* BCOPY || MEMMOVE */ 612#endif /* BCOPY || MEMMOVE */