| @@ -1,609 +1,612 @@ | | | @@ -1,609 +1,612 @@ |
1 | /* $NetBSD: bcopy.S,v 1.11 2010/03/20 23:31:30 chs Exp $ */ | | 1 | /* $NetBSD: bcopy.S,v 1.12 2011/01/22 10:58:44 skrll Exp $ */ |
2 | | | 2 | |
3 | /* | | 3 | /* |
4 | * Copyright (c) 2002 The NetBSD Foundation, Inc. | | 4 | * Copyright (c) 2002 The NetBSD Foundation, Inc. |
5 | * All rights reserved. | | 5 | * All rights reserved. |
6 | * | | 6 | * |
7 | * This code is derived from software contributed to The NetBSD Foundation | | 7 | * This code is derived from software contributed to The NetBSD Foundation |
8 | * by Matthew Fredette. | | 8 | * by Matthew Fredette. |
9 | * | | 9 | * |
10 | * Redistribution and use in source and binary forms, with or without | | 10 | * Redistribution and use in source and binary forms, with or without |
11 | * modification, are permitted provided that the following conditions | | 11 | * modification, are permitted provided that the following conditions |
12 | * are met: | | 12 | * are met: |
13 | * 1. Redistributions of source code must retain the above copyright | | 13 | * 1. Redistributions of source code must retain the above copyright |
14 | * notice, this list of conditions and the following disclaimer. | | 14 | * notice, this list of conditions and the following disclaimer. |
15 | * 2. Redistributions in binary form must reproduce the above copyright | | 15 | * 2. Redistributions in binary form must reproduce the above copyright |
16 | * notice, this list of conditions and the following disclaimer in the | | 16 | * notice, this list of conditions and the following disclaimer in the |
17 | * documentation and/or other materials provided with the distribution. | | 17 | * documentation and/or other materials provided with the distribution. |
18 | * | | 18 | * |
19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | | 19 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | | 20 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | | 21 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | | 22 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | | 23 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | | 24 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | | 25 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | | 26 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | | 27 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | | 28 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
29 | * POSSIBILITY OF SUCH DAMAGE. | | 29 | * POSSIBILITY OF SUCH DAMAGE. |
30 | */ | | 30 | */ |
31 | | | 31 | |
32 | /* | | 32 | /* |
33 | * Copy routines for NetBSD/hppa. | | 33 | * Copy routines for NetBSD/hppa. |
34 | */ | | 34 | */ |
35 | | | 35 | |
| | | 36 | #include "opt_multiprocessor.h" |
| | | 37 | |
36 | #undef _LOCORE | | 38 | #undef _LOCORE |
37 | #define _LOCORE /* XXX fredette - unfortunate */ | | 39 | #define _LOCORE /* XXX fredette - unfortunate */ |
38 | | | 40 | |
| | | 41 | #include <machine/cpu.h> |
39 | #include <machine/asm.h> | | 42 | #include <machine/asm.h> |
40 | #include <machine/frame.h> | | 43 | #include <machine/frame.h> |
41 | #include <machine/reg.h> | | 44 | #include <machine/reg.h> |
42 | | | 45 | |
43 | #if defined(LIBC_SCCS) && !defined(lint) | | 46 | #if defined(LIBC_SCCS) && !defined(lint) |
44 | RCSID("$NetBSD: bcopy.S,v 1.11 2010/03/20 23:31:30 chs Exp $") | | 47 | RCSID("$NetBSD: bcopy.S,v 1.12 2011/01/22 10:58:44 skrll Exp $") |
45 | #endif /* LIBC_SCCS and not lint */ | | 48 | #endif /* LIBC_SCCS and not lint */ |
46 | | | 49 | |
47 | /* | | 50 | /* |
48 | * The stbys instruction is a little asymmetric. When (%r2 & 3) | | 51 | * The stbys instruction is a little asymmetric. When (%r2 & 3) |
49 | * is zero, stbys,b,m %r1, 4(%r2) works like stws,ma. You | | 52 | * is zero, stbys,b,m %r1, 4(%r2) works like stws,ma. You |
50 | * might then wish that when (%r2 & 3) == 0, stbys,e,m %r1, -4(%r2) | | 53 | * might then wish that when (%r2 & 3) == 0, stbys,e,m %r1, -4(%r2) |
51 | * worked like stws,mb. But it doesn't. | | 54 | * worked like stws,mb. But it doesn't. |
52 | * | | 55 | * |
53 | * This macro works around this problem. It requires that %t2 | | 56 | * This macro works around this problem. It requires that %t2 |
54 | * hold the number of bytes that will be written by this store | | 57 | * hold the number of bytes that will be written by this store |
55 | * (meaning that it ranges from one to four). | | 58 | * (meaning that it ranges from one to four). |
56 | * | | 59 | * |
57 | * Watch the delay-slot trickery here. The comib is used to set | | 60 | * Watch the delay-slot trickery here. The comib is used to set |
58 | * up which instruction, either the stws or the stbys, is run | | 61 | * up which instruction, either the stws or the stbys, is run |
59 | * in the delay slot of the b instruction. | | 62 | * in the delay slot of the b instruction. |
60 | */ | | 63 | */ |
61 | #define _STBYS_E_M(r, dst_spc, dst_off) \ | | 64 | #define _STBYS_E_M(r, dst_spc, dst_off) \ |
62 | comib,<> 4, %t2, 4 ! \ | | 65 | comib,<> 4, %t2, 4 ! \ |
63 | b 4 ! \ | | 66 | b 4 ! \ |
64 | stws,mb r, -4(dst_spc, dst_off) ! \ | | 67 | stws,mb r, -4(dst_spc, dst_off) ! \ |
65 | stbys,e,m r, 0(dst_spc, dst_off) | | 68 | stbys,e,m r, 0(dst_spc, dst_off) |
66 | | | 69 | |
67 | /* | | 70 | /* |
68 | * This macro does a bulk copy with no shifting. cmplt and m are | | 71 | * This macro does a bulk copy with no shifting. cmplt and m are |
69 | * the completer and displacement multiplier, respectively, for | | 72 | * the completer and displacement multiplier, respectively, for |
70 | * the load and store instructions. | | 73 | * the load and store instructions. |
71 | */ | | 74 | */ |
72 | #define _COPY(src_spc, src_off, dst_spc, dst_off, count, cmplt, m) \ | | 75 | #define _COPY(src_spc, src_off, dst_spc, dst_off, count, cmplt, m) \ |
73 | ! \ | | 76 | ! \ |
74 | /* ! \ | | 77 | /* ! \ |
75 | * Loop storing 16 bytes at a time. Since count ! \ | | 78 | * Loop storing 16 bytes at a time. Since count ! \ |
76 | * may be > INT_MAX, we have to be careful and ! \ | | 79 | * may be > INT_MAX, we have to be careful and ! \ |
77 | * avoid comparisons that treat it as a signed ! \ | | 80 | * avoid comparisons that treat it as a signed ! \ |
78 | * quantity, until after this loop, when count ! \ | | 81 | * quantity, until after this loop, when count ! \ |
79 | * is guaranteed to be less than 16. ! \ | | 82 | * is guaranteed to be less than 16. ! \ |
80 | */ ! \ | | 83 | */ ! \ |
81 | comib,>>=,n 15, count, _LABEL(_skip16) ! \ | | 84 | comib,>>=,n 15, count, _LABEL(_skip16) ! \ |
82 | .label _LABEL(_loop16) ! \ | | 85 | .label _LABEL(_loop16) ! \ |
83 | addi -16, count, count ! \ | | 86 | addi -16, count, count ! \ |
84 | ldws,cmplt m*4(src_spc, src_off), %t1 ! \ | | 87 | ldws,cmplt m*4(src_spc, src_off), %t1 ! \ |
85 | ldws,cmplt m*4(src_spc, src_off), %t2 ! \ | | 88 | ldws,cmplt m*4(src_spc, src_off), %t2 ! \ |
86 | ldws,cmplt m*4(src_spc, src_off), %t3 ! \ | | 89 | ldws,cmplt m*4(src_spc, src_off), %t3 ! \ |
87 | ldws,cmplt m*4(src_spc, src_off), %t4 ! \ | | 90 | ldws,cmplt m*4(src_spc, src_off), %t4 ! \ |
88 | stws,cmplt %t1, m*4(dst_spc, dst_off) ! \ | | 91 | stws,cmplt %t1, m*4(dst_spc, dst_off) ! \ |
89 | stws,cmplt %t2, m*4(dst_spc, dst_off) ! \ | | 92 | stws,cmplt %t2, m*4(dst_spc, dst_off) ! \ |
90 | stws,cmplt %t3, m*4(dst_spc, dst_off) ! \ | | 93 | stws,cmplt %t3, m*4(dst_spc, dst_off) ! \ |
91 | comib,<< 15, count, _LABEL(_loop16) ! \ | | 94 | comib,<< 15, count, _LABEL(_loop16) ! \ |
92 | stws,cmplt %t4, m*4(dst_spc, dst_off) ! \ | | 95 | stws,cmplt %t4, m*4(dst_spc, dst_off) ! \ |
93 | .label _LABEL(_skip16) ! \ | | 96 | .label _LABEL(_skip16) ! \ |
94 | ! \ | | 97 | ! \ |
95 | /* Loop storing 4 bytes at a time. */ ! \ | | 98 | /* Loop storing 4 bytes at a time. */ ! \ |
96 | addib,<,n -4, count, _LABEL(_skip4) ! \ | | 99 | addib,<,n -4, count, _LABEL(_skip4) ! \ |
97 | .label _LABEL(_loop4) ! \ | | 100 | .label _LABEL(_loop4) ! \ |
98 | ldws,cmplt m*4(src_spc, src_off), %t1 ! \ | | 101 | ldws,cmplt m*4(src_spc, src_off), %t1 ! \ |
99 | addib,>= -4, count, _LABEL(_loop4) ! \ | | 102 | addib,>= -4, count, _LABEL(_loop4) ! \ |
100 | stws,cmplt %t1, m*4(dst_spc, dst_off) ! \ | | 103 | stws,cmplt %t1, m*4(dst_spc, dst_off) ! \ |
101 | .label _LABEL(_skip4) ! \ | | 104 | .label _LABEL(_skip4) ! \ |
102 | /* Restore the correct count. */ ! \ | | 105 | /* Restore the correct count. */ ! \ |
103 | addi 4, count, count ! \ | | 106 | addi 4, count, count ! \ |
104 | ! \ | | 107 | ! \ |
105 | .label _LABEL(_do1) ! \ | | 108 | .label _LABEL(_do1) ! \ |
106 | ! \ | | 109 | ! \ |
107 | /* Loop storing 1 byte at a time. */ ! \ | | 110 | /* Loop storing 1 byte at a time. */ ! \ |
108 | addib,<,n -1, count, _LABEL(_skip1) ! \ | | 111 | addib,<,n -1, count, _LABEL(_skip1) ! \ |
109 | .label _LABEL(_loop1) ! \ | | 112 | .label _LABEL(_loop1) ! \ |
110 | ldbs,cmplt m*1(src_spc, src_off), %t1 ! \ | | 113 | ldbs,cmplt m*1(src_spc, src_off), %t1 ! \ |
111 | addib,>= -1, count, _LABEL(_loop1) ! \ | | 114 | addib,>= -1, count, _LABEL(_loop1) ! \ |
112 | stbs,cmplt %t1, m*1(dst_spc, dst_off) ! \ | | 115 | stbs,cmplt %t1, m*1(dst_spc, dst_off) ! \ |
113 | .label _LABEL(_skip1) ! \ | | 116 | .label _LABEL(_skip1) ! \ |
114 | /* Restore the correct count. */ ! \ | | 117 | /* Restore the correct count. */ ! \ |
115 | b _LABEL(_done) ! \ | | 118 | b _LABEL(_done) ! \ |
116 | addi 1, count, count | | 119 | addi 1, count, count |
117 | | | 120 | |
118 | /* | | 121 | /* |
119 | * This macro is definitely strange. It exists purely to | | 122 | * This macro is definitely strange. It exists purely to |
120 | * allow the _COPYS macro to be reused, but because it | | 123 | * allow the _COPYS macro to be reused, but because it |
121 | * requires this long attempt to explain it, I'm starting | | 124 | * requires this long attempt to explain it, I'm starting |
122 | * to doubt the value of that. | | 125 | * to doubt the value of that. |
123 | * | | 126 | * |
124 | * Part of the expansion of the _COPYS macro below are loops | | 127 | * Part of the expansion of the _COPYS macro below are loops |
125 | * that copy four words or one word at a time, performing shifts | | 128 | * that copy four words or one word at a time, performing shifts |
126 | * to get data to line up correctly in the destination buffer. | | 129 | * to get data to line up correctly in the destination buffer. |
127 | * | | 130 | * |
128 | * The _COPYS macro is used when copying backwards, as well | | 131 | * The _COPYS macro is used when copying backwards, as well |
129 | * as forwards. The 4-word loop always loads into %t1, %t2, %t3, | | 132 | * as forwards. The 4-word loop always loads into %t1, %t2, %t3, |
130 | * and %t4 in that order. This means that when copying forward, | | 133 | * and %t4 in that order. This means that when copying forward, |
131 | * %t1 will have the word from the lowest address, and %t4 will | | 134 | * %t1 will have the word from the lowest address, and %t4 will |
132 | * have the word from the highest address. When copying | | 135 | * have the word from the highest address. When copying |
133 | * backwards, the opposite is true. | | 136 | * backwards, the opposite is true. |
134 | * | | 137 | * |
135 | * The shift instructions need pairs of registers with adjacent | | 138 | * The shift instructions need pairs of registers with adjacent |
136 | * words, with the register containing the word from the lowest | | 139 | * words, with the register containing the word from the lowest |
137 | * address *always* coming first. It is this assymetry that | | 140 | * address *always* coming first. It is this assymetry that |
138 | * gives rise to this macro - depending on which direction | | 141 | * gives rise to this macro - depending on which direction |
139 | * we're copying in, these ordered pairs are different. | | 142 | * we're copying in, these ordered pairs are different. |
140 | * | | 143 | * |
141 | * Fortunately, we can compute those register numbers at compile | | 144 | * Fortunately, we can compute those register numbers at compile |
142 | * time, and assemble them manually into a shift instruction. | | 145 | * time, and assemble them manually into a shift instruction. |
143 | * That's what this macro does. | | 146 | * That's what this macro does. |
144 | * | | 147 | * |
145 | * This macro takes two arguments. n ranges from 0 to 3 and | | 148 | * This macro takes two arguments. n ranges from 0 to 3 and |
146 | * is the "shift number", i.e., n = 0 means we're doing the | | 149 | * is the "shift number", i.e., n = 0 means we're doing the |
147 | * shift for what will be the first store. | | 150 | * shift for what will be the first store. |
148 | * | | 151 | * |
149 | * m is the displacement multiplier from the _COPYS macro call. | | 152 | * m is the displacement multiplier from the _COPYS macro call. |
150 | * This is 1 for a forward copy and -1 for a backwards copy. | | 153 | * This is 1 for a forward copy and -1 for a backwards copy. |
151 | * So, the ((m + 1) / 2) term yields 0 for a backwards copy and | | 154 | * So, the ((m + 1) / 2) term yields 0 for a backwards copy and |
152 | * 1 for a forward copy, and the ((m - 1) / 2) term yields | | 155 | * 1 for a forward copy, and the ((m - 1) / 2) term yields |
153 | * 0 for a forward copy, and -1 for a backwards copy. | | 156 | * 0 for a forward copy, and -1 for a backwards copy. |
154 | * These terms are used to discriminate the register computations | | 157 | * These terms are used to discriminate the register computations |
155 | * below. | | 158 | * below. |
156 | * | | 159 | * |
157 | * When copying forward, then, the first register used with | | 160 | * When copying forward, then, the first register used with |
158 | * the first vshd will be 19 + (3 - ((0 - 1) & 3)), or %t4, | | 161 | * the first vshd will be 19 + (3 - ((0 - 1) & 3)), or %t4, |
159 | * which matches _COPYS' requirement that the word last loaded | | 162 | * which matches _COPYS' requirement that the word last loaded |
160 | * be in %t4. The first register used for the second vshd | | 163 | * be in %t4. The first register used for the second vshd |
161 | * will then "wrap" around to 19 + (3 - ((1 - 1) & 3)), or %t1. | | 164 | * will then "wrap" around to 19 + (3 - ((1 - 1) & 3)), or %t1. |
162 | * And so on to %t2 and %t3. | | 165 | * And so on to %t2 and %t3. |
163 | * | | 166 | * |
164 | * When copying forward, the second register used with the first | | 167 | * When copying forward, the second register used with the first |
165 | * vshd will be (19 + (3 - ((n + 0) & 3)), or %t1. It will | | 168 | * vshd will be (19 + (3 - ((n + 0) & 3)), or %t1. It will |
166 | * continue to be %t2, then %t3, and finally %t4. | | 169 | * continue to be %t2, then %t3, and finally %t4. |
167 | * | | 170 | * |
168 | * When copying backwards, the values for the first and second | | 171 | * When copying backwards, the values for the first and second |
169 | * register for each vshd are reversed from the forwards case. | | 172 | * register for each vshd are reversed from the forwards case. |
170 | * (Symmetry reclaimed!) Proving this is "left as an exercise | | 173 | * (Symmetry reclaimed!) Proving this is "left as an exercise |
171 | * for the reader" (remember the different discriminating values!) | | 174 | * for the reader" (remember the different discriminating values!) |
172 | */ | | 175 | */ |
173 | #define _VSHD(n, m, t) \ | | 176 | #define _VSHD(n, m, t) \ |
174 | .word (0xd0000000 | \ | | 177 | .word (0xd0000000 | \ |
175 | ((19 + (3 - ((n - 1 * ((m + 1) / 2)) & 3))) << 16) | \ | | 178 | ((19 + (3 - ((n - 1 * ((m + 1) / 2)) & 3))) << 16) | \ |
176 | ((19 + (3 - ((n + 1 * ((m - 1) / 2)) & 3))) << 21) | \ | | 179 | ((19 + (3 - ((n + 1 * ((m - 1) / 2)) & 3))) << 21) | \ |
177 | (t)) | | 180 | (t)) |
178 | | | 181 | |
179 | /* | | 182 | /* |
180 | * This macro does a bulk copy with shifting. cmplt and m are | | 183 | * This macro does a bulk copy with shifting. cmplt and m are |
181 | * the completer and displacement multiplier, respectively, for | | 184 | * the completer and displacement multiplier, respectively, for |
182 | * the load and store instructions. It is assumed that the | | 185 | * the load and store instructions. It is assumed that the |
183 | * word last loaded is already in %t4. | | 186 | * word last loaded is already in %t4. |
184 | */ | | 187 | */ |
185 | #define _COPYS(src_spc, src_off, dst_spc, dst_off, count, cmplt, m) \ | | 188 | #define _COPYS(src_spc, src_off, dst_spc, dst_off, count, cmplt, m) \ |
186 | ! \ | | 189 | ! \ |
187 | /* ! \ | | 190 | /* ! \ |
188 | * Loop storing 16 bytes at a time. Since count ! \ | | 191 | * Loop storing 16 bytes at a time. Since count ! \ |
189 | * may be > INT_MAX, we have to be careful and ! \ | | 192 | * may be > INT_MAX, we have to be careful and ! \ |
190 | * avoid comparisons that treat it as a signed ! \ | | 193 | * avoid comparisons that treat it as a signed ! \ |
191 | * quantity, until after this loop, when count ! \ | | 194 | * quantity, until after this loop, when count ! \ |
192 | * is guaranteed to be less than 16. ! \ | | 195 | * is guaranteed to be less than 16. ! \ |
193 | */ ! \ | | 196 | */ ! \ |
194 | comib,>>=,n 15, count, _LABEL(S_skip16) ! \ | | 197 | comib,>>=,n 15, count, _LABEL(S_skip16) ! \ |
195 | .label _LABEL(S_loop16) ! \ | | 198 | .label _LABEL(S_loop16) ! \ |
196 | addi -16, count, count ! \ | | 199 | addi -16, count, count ! \ |
197 | ldws,cmplt m*4(src_spc, src_off), %t1 ! \ | | 200 | ldws,cmplt m*4(src_spc, src_off), %t1 ! \ |
198 | ldws,cmplt m*4(src_spc, src_off), %t2 ! \ | | 201 | ldws,cmplt m*4(src_spc, src_off), %t2 ! \ |
199 | ldws,cmplt m*4(src_spc, src_off), %t3 ! \ | | 202 | ldws,cmplt m*4(src_spc, src_off), %t3 ! \ |
200 | _VSHD(0, m, 1) /* vshd %t4, %t1, %r1 */ ! \ | | 203 | _VSHD(0, m, 1) /* vshd %t4, %t1, %r1 */ ! \ |
201 | ldws,cmplt m*4(src_spc, src_off), %t4 ! \ | | 204 | ldws,cmplt m*4(src_spc, src_off), %t4 ! \ |
202 | _VSHD(1, m, 22) /* vshd %t1, %t2, %t1 */ ! \ | | 205 | _VSHD(1, m, 22) /* vshd %t1, %t2, %t1 */ ! \ |
203 | _VSHD(2, m, 21) /* vshd %t2, %t3, %t2 */ ! \ | | 206 | _VSHD(2, m, 21) /* vshd %t2, %t3, %t2 */ ! \ |
204 | _VSHD(3, m, 20) /* vshd %t3, %t4, %t3 */ ! \ | | 207 | _VSHD(3, m, 20) /* vshd %t3, %t4, %t3 */ ! \ |
205 | stws,cmplt %r1, m*4(dst_spc, dst_off) ! \ | | 208 | stws,cmplt %r1, m*4(dst_spc, dst_off) ! \ |
206 | stws,cmplt %t1, m*4(dst_spc, dst_off) ! \ | | 209 | stws,cmplt %t1, m*4(dst_spc, dst_off) ! \ |
207 | stws,cmplt %t2, m*4(dst_spc, dst_off) ! \ | | 210 | stws,cmplt %t2, m*4(dst_spc, dst_off) ! \ |
208 | comib,<< 15, count, _LABEL(S_loop16) ! \ | | 211 | comib,<< 15, count, _LABEL(S_loop16) ! \ |
209 | stws,cmplt %t3, m*4(dst_spc, dst_off) ! \ | | 212 | stws,cmplt %t3, m*4(dst_spc, dst_off) ! \ |
210 | .label _LABEL(S_skip16) ! \ | | 213 | .label _LABEL(S_skip16) ! \ |
211 | ! \ | | 214 | ! \ |
212 | /* Loop storing 4 bytes at a time. */ ! \ | | 215 | /* Loop storing 4 bytes at a time. */ ! \ |
213 | addib,<,n -4, count, _LABEL(S_skip4) ! \ | | 216 | addib,<,n -4, count, _LABEL(S_skip4) ! \ |
214 | .label _LABEL(S_loop4) ! \ | | 217 | .label _LABEL(S_loop4) ! \ |
215 | ldws,cmplt m*4(src_spc, src_off), %t1 ! \ | | 218 | ldws,cmplt m*4(src_spc, src_off), %t1 ! \ |
216 | _VSHD(0, m, 1) /* into %r1 (1) */ ! \ | | 219 | _VSHD(0, m, 1) /* into %r1 (1) */ ! \ |
217 | copy %t1, %t4 ! \ | | 220 | copy %t1, %t4 ! \ |
218 | addib,>= -4, count, _LABEL(S_loop4) ! \ | | 221 | addib,>= -4, count, _LABEL(S_loop4) ! \ |
219 | stws,cmplt %r1, m*4(dst_spc, dst_off) ! \ | | 222 | stws,cmplt %r1, m*4(dst_spc, dst_off) ! \ |
220 | .label _LABEL(S_skip4) ! \ | | 223 | .label _LABEL(S_skip4) ! \ |
221 | ! \ | | 224 | ! \ |
222 | /* ! \ | | 225 | /* ! \ |
223 | * We now need to "back up" src_off by the ! \ | | 226 | * We now need to "back up" src_off by the ! \ |
224 | * number of bytes remaining in the FIFO ! \ | | 227 | * number of bytes remaining in the FIFO ! \ |
225 | * (i.e., the number of bytes remaining in %t4), ! \ | | 228 | * (i.e., the number of bytes remaining in %t4), ! \ |
226 | * because (the correct) count still includes ! \ | | 229 | * because (the correct) count still includes ! \ |
227 | * these bytes, and we intent to keep it that ! \ | | 230 | * these bytes, and we intent to keep it that ! \ |
228 | * way, and finish with the single-byte copier. ! \ | | 231 | * way, and finish with the single-byte copier. ! \ |
229 | * ! \ | | 232 | * ! \ |
230 | * The number of bytes remaining in the FIFO is ! \ | | 233 | * The number of bytes remaining in the FIFO is ! \ |
231 | * related to the shift count, so recover it, ! \ | | 234 | * related to the shift count, so recover it, ! \ |
232 | * restoring the correct count at the same time. ! \ | | 235 | * restoring the correct count at the same time. ! \ |
233 | */ ! \ | | 236 | */ ! \ |
234 | mfctl %cr11, %t1 ! \ | | 237 | mfctl %cr11, %t1 ! \ |
235 | addi 4, count, count ! \ | | 238 | addi 4, count, count ! \ |
236 | shd %r0, %t1, 3, %t1 ! \ | | 239 | shd %r0, %t1, 3, %t1 ! \ |
237 | ! \ | | 240 | ! \ |
238 | /* ! \ | | 241 | /* ! \ |
239 | * If we're copying forward, the shift count ! \ | | 242 | * If we're copying forward, the shift count ! \ |
240 | * is the number of bytes remaining in the ! \ | | 243 | * is the number of bytes remaining in the ! \ |
241 | * FIFO, and we want to subtract it from src_off. ! \ | | 244 | * FIFO, and we want to subtract it from src_off. ! \ |
242 | * If we're copying backwards, (4 - shift count) ! \ | | 245 | * If we're copying backwards, (4 - shift count) ! \ |
243 | * is the number of bytes remaining in the FIFO, ! \ | | 246 | * is the number of bytes remaining in the FIFO, ! \ |
244 | * and we want to add it to src_off. ! \ | | 247 | * and we want to add it to src_off. ! \ |
245 | * ! \ | | 248 | * ! \ |
246 | * We observe that x + (4 - y) = x - (y - 4), ! \ | | 249 | * We observe that x + (4 - y) = x - (y - 4), ! \ |
247 | * and introduce this instruction to add -4 when ! \ | | 250 | * and introduce this instruction to add -4 when ! \ |
248 | * m is -1, although this does mean one extra ! \ | | 251 | * m is -1, although this does mean one extra ! \ |
249 | * instruction in the forward case. ! \ | | 252 | * instruction in the forward case. ! \ |
250 | */ ! \ | | 253 | */ ! \ |
251 | addi 4*((m - 1) / 2), %t1, %t1 ! \ | | 254 | addi 4*((m - 1) / 2), %t1, %t1 ! \ |
252 | ! \ | | 255 | ! \ |
253 | /* Now branch to the byte-at-a-time loop. */ ! \ | | 256 | /* Now branch to the byte-at-a-time loop. */ ! \ |
254 | b _LABEL(_do1) ! \ | | 257 | b _LABEL(_do1) ! \ |
255 | sub src_off, %t1, src_off | | 258 | sub src_off, %t1, src_off |
256 | | | 259 | |
257 | /* | | 260 | /* |
258 | * This macro copies a region in the forward direction. | | 261 | * This macro copies a region in the forward direction. |
259 | */ | | 262 | */ |
260 | #define _COPY_FORWARD(src_spc, src_off, dst_spc, dst_off, count) \ | | 263 | #define _COPY_FORWARD(src_spc, src_off, dst_spc, dst_off, count) \ |
261 | ! \ | | 264 | ! \ |
262 | /* ! \ | | 265 | /* ! \ |
263 | * Since in the shifting-left case we will ! \ | | 266 | * Since in the shifting-left case we will ! \ |
264 | * load 8 bytes before checking count, to ! \ | | 267 | * load 8 bytes before checking count, to ! \ |
265 | * keep things simple, branch to the byte ! \ | | 268 | * keep things simple, branch to the byte ! \ |
266 | * copier unless we're copying at least 8. ! \ | | 269 | * copier unless we're copying at least 8. ! \ |
267 | */ ! \ | | 270 | */ ! \ |
268 | comib,>>,n 8, count, _LABEL(_do1) ! \ | | 271 | comib,>>,n 8, count, _LABEL(_do1) ! \ |
269 | ! \ | | 272 | ! \ |
270 | /* ! \ | | 273 | /* ! \ |
271 | * Once we 4-byte align the source offset, ! \ | | 274 | * Once we 4-byte align the source offset, ! \ |
272 | * figure out how many bytes from the region ! \ | | 275 | * figure out how many bytes from the region ! \ |
273 | * will be in the first 4-byte word we read. ! \ | | 276 | * will be in the first 4-byte word we read. ! \ |
274 | * Ditto for writing the destination offset. ! \ | | 277 | * Ditto for writing the destination offset. ! \ |
275 | */ ! \ | | 278 | */ ! \ |
276 | extru src_off, 31, 2, %t1 ! \ | | 279 | extru src_off, 31, 2, %t1 ! \ |
277 | extru dst_off, 31, 2, %t2 ! \ | | 280 | extru dst_off, 31, 2, %t2 ! \ |
278 | subi 4, %t1, %t1 ! \ | | 281 | subi 4, %t1, %t1 ! \ |
279 | subi 4, %t2, %t2 ! \ | | 282 | subi 4, %t2, %t2 ! \ |
280 | ! \ | | 283 | ! \ |
281 | /* ! \ | | 284 | /* ! \ |
282 | * Calculate the byte shift required. A ! \ | | 285 | * Calculate the byte shift required. A ! \ |
283 | * positive value means a source 4-byte word ! \ | | 286 | * positive value means a source 4-byte word ! \ |
284 | * has to be shifted to the right to line up ! \ | | 287 | * has to be shifted to the right to line up ! \ |
285 | * as a destination 4-byte word. ! \ | | 288 | * as a destination 4-byte word. ! \ |
286 | */ ! \ | | 289 | */ ! \ |
287 | sub %t1, %t2, %t1 ! \ | | 290 | sub %t1, %t2, %t1 ! \ |
288 | ! \ | | 291 | ! \ |
289 | /* 4-byte align src_off. */ ! \ | | 292 | /* 4-byte align src_off. */ ! \ |
290 | depi 0, 31, 2, src_off ! \ | | 293 | depi 0, 31, 2, src_off ! \ |
291 | ! \ | | 294 | ! \ |
292 | /* ! \ | | 295 | /* ! \ |
293 | * It's somewhat important to note that this ! \ | | 296 | * It's somewhat important to note that this ! \ |
294 | * code thinks of count as "the number of bytes ! \ | | 297 | * code thinks of count as "the number of bytes ! \ |
295 | * that haven't been stored yet", as opposed to ! \ | | 298 | * that haven't been stored yet", as opposed to ! \ |
296 | * "the number of bytes that haven't been copied ! \ | | 299 | * "the number of bytes that haven't been copied ! \ |
297 | * yet". The distinction is subtle, but becomes ! \ | | 300 | * yet". The distinction is subtle, but becomes ! \ |
298 | * apparent at the end of the shifting code, where ! \ | | 301 | * apparent at the end of the shifting code, where ! \ |
299 | * we "back up" src_off to correspond to count, ! \ | | 302 | * we "back up" src_off to correspond to count, ! \ |
300 | * as opposed to flushing the FIFO. ! \ | | 303 | * as opposed to flushing the FIFO. ! \ |
301 | * ! \ | | 304 | * ! \ |
302 | * We calculated above how many bytes our first ! \ | | 305 | * We calculated above how many bytes our first ! \ |
303 | * store will store, so update count now. ! \ | | 306 | * store will store, so update count now. ! \ |
304 | * ! \ | | 307 | * ! \ |
305 | * If the shift is zero, strictly as an optimization ! \ | | 308 | * If the shift is zero, strictly as an optimization ! \ |
306 | * we use a copy loop that does no shifting. ! \ | | 309 | * we use a copy loop that does no shifting. ! \ |
307 | */ ! \ | | 310 | */ ! \ |
308 | comb,<> %r0, %t1, _LABEL(_shifting) ! \ | | 311 | comb,<> %r0, %t1, _LABEL(_shifting) ! \ |
309 | sub count, %t2, count ! \ | | 312 | sub count, %t2, count ! \ |
310 | ! \ | | 313 | ! \ |
311 | /* Load and store the first word. */ ! \ | | 314 | /* Load and store the first word. */ ! \ |
312 | ldws,ma 4(src_spc, src_off), %t4 ! \ | | 315 | ldws,ma 4(src_spc, src_off), %t4 ! \ |
313 | stbys,b,m %t4, 4(dst_spc, dst_off) ! \ | | 316 | stbys,b,m %t4, 4(dst_spc, dst_off) ! \ |
314 | ! \ | | 317 | ! \ |
315 | /* Do the rest of the copy. */ ! \ | | 318 | /* Do the rest of the copy. */ ! \ |
316 | _COPY(src_spc,src_off,dst_spc,dst_off,count,ma,1) ! \ | | 319 | _COPY(src_spc,src_off,dst_spc,dst_off,count,ma,1) ! \ |
317 | ! \ | | 320 | ! \ |
318 | .label _LABEL(_shifting) ! \ | | 321 | .label _LABEL(_shifting) ! \ |
319 | ! \ | | 322 | ! \ |
320 | /* ! \ | | 323 | /* ! \ |
321 | * If shift < 0, we need to shift words to the ! \ | | 324 | * If shift < 0, we need to shift words to the ! \ |
322 | * left. Since we can't do this directly, we ! \ | | 325 | * left. Since we can't do this directly, we ! \ |
323 | * adjust the shift so it's a shift to the right ! \ | | 326 | * adjust the shift so it's a shift to the right ! \ |
324 | * and load the first word into the high word of ! \ | | 327 | * and load the first word into the high word of ! \ |
325 | * the FIFO. Otherwise, we load a zero into the ! \ | | 328 | * the FIFO. Otherwise, we load a zero into the ! \ |
326 | * high word of the FIFO. ! \ | | 329 | * high word of the FIFO. ! \ |
327 | */ ! \ | | 330 | */ ! \ |
328 | comb,<= %r0, %t1, _LABEL(_shiftingrt) ! \ | | 331 | comb,<= %r0, %t1, _LABEL(_shiftingrt) ! \ |
329 | copy %r0, %t3 ! \ | | 332 | copy %r0, %t3 ! \ |
330 | addi 4, %t1, %t1 ! \ | | 333 | addi 4, %t1, %t1 ! \ |
331 | ldws,ma 4(src_spc, src_off), %t3 ! \ | | 334 | ldws,ma 4(src_spc, src_off), %t3 ! \ |
332 | .label _LABEL(_shiftingrt) ! \ | | 335 | .label _LABEL(_shiftingrt) ! \ |
333 | ! \ | | 336 | ! \ |
334 | /* ! \ | | 337 | /* ! \ |
335 | * Turn the shift byte count into a bit count, ! \ | | 338 | * Turn the shift byte count into a bit count, ! \ |
336 | * load the next word, set the Shift Amount ! \ | | 339 | * load the next word, set the Shift Amount ! \ |
337 | * Register, and form and store the first word. ! \ | | 340 | * Register, and form and store the first word. ! \ |
338 | */ ! \ | | 341 | */ ! \ |
339 | sh3add %t1, %r0, %t1 ! \ | | 342 | sh3add %t1, %r0, %t1 ! \ |
340 | ldws,ma 4(src_spc, src_off), %t4 ! \ | | 343 | ldws,ma 4(src_spc, src_off), %t4 ! \ |
341 | mtctl %t1, %cr11 ! \ | | 344 | mtctl %t1, %cr11 ! \ |
342 | vshd %t3, %t4, %r1 ! \ | | 345 | vshd %t3, %t4, %r1 ! \ |
343 | stbys,b,m %r1, 4(dst_spc, dst_off) ! \ | | 346 | stbys,b,m %r1, 4(dst_spc, dst_off) ! \ |
344 | ! \ | | 347 | ! \ |
345 | /* Do the rest of the copy. */ ! \ | | 348 | /* Do the rest of the copy. */ ! \ |
346 | _COPYS(src_spc,src_off,dst_spc,dst_off,count,ma,1) | | 349 | _COPYS(src_spc,src_off,dst_spc,dst_off,count,ma,1) |
347 | | | 350 | |
348 | /* This macro copies a region in the reverse direction. */ | | 351 | /* This macro copies a region in the reverse direction. */ |
349 | #define _COPY_REVERSE(src_spc, src_off, dst_spc, dst_off, count) \ | | 352 | #define _COPY_REVERSE(src_spc, src_off, dst_spc, dst_off, count) \ |
350 | ! \ | | 353 | ! \ |
351 | /* Immediately add count to both offsets. */ ! \ | | 354 | /* Immediately add count to both offsets. */ ! \ |
352 | add src_off, count, src_off ! \ | | 355 | add src_off, count, src_off ! \ |
353 | add dst_off, count, dst_off ! \ | | 356 | add dst_off, count, dst_off ! \ |
354 | ! \ | | 357 | ! \ |
355 | /* ! \ | | 358 | /* ! \ |
356 | * Since in the shifting-right case we ! \ | | 359 | * Since in the shifting-right case we ! \ |
357 | * will load 8 bytes before checking ! \ | | 360 | * will load 8 bytes before checking ! \ |
358 | * count, to keep things simple, branch ! \ | | 361 | * count, to keep things simple, branch ! \ |
359 | * to the byte copier unless we're ! \ | | 362 | * to the byte copier unless we're ! \ |
360 | * copying at least 8 bytes. ! \ | | 363 | * copying at least 8 bytes. ! \ |
361 | */ ! \ | | 364 | */ ! \ |
362 | comib,>>,n 8, count, _LABEL(_do1) ! \ | | 365 | comib,>>,n 8, count, _LABEL(_do1) ! \ |
363 | ! \ | | 366 | ! \ |
364 | /* ! \ | | 367 | /* ! \ |
365 | * Once we 4-byte align the source offset, ! \ | | 368 | * Once we 4-byte align the source offset, ! \ |
366 | * figure out how many bytes from the region ! \ | | 369 | * figure out how many bytes from the region ! \ |
367 | * will be in the first 4-byte word we read. ! \ | | 370 | * will be in the first 4-byte word we read. ! \ |
368 | * Ditto for writing the destination offset. ! \ | | 371 | * Ditto for writing the destination offset. ! \ |
369 | */ ! \ | | 372 | */ ! \ |
370 | extru,<> src_off, 31, 2, %t1 ! \ | | 373 | extru,<> src_off, 31, 2, %t1 ! \ |
371 | ldi 4, %t1 ! \ | | 374 | ldi 4, %t1 ! \ |
372 | extru,<> dst_off, 31, 2, %t2 ! \ | | 375 | extru,<> dst_off, 31, 2, %t2 ! \ |
373 | ldi 4, %t2 ! \ | | 376 | ldi 4, %t2 ! \ |
374 | ! \ | | 377 | ! \ |
375 | /* ! \ | | 378 | /* ! \ |
376 | * Calculate the byte shift required. A ! \ | | 379 | * Calculate the byte shift required. A ! \ |
377 | * positive value means a source 4-byte ! \ | | 380 | * positive value means a source 4-byte ! \ |
378 | * word has to be shifted to the right to ! \ | | 381 | * word has to be shifted to the right to ! \ |
379 | * line up as a destination 4-byte word. ! \ | | 382 | * line up as a destination 4-byte word. ! \ |
380 | */ ! \ | | 383 | */ ! \ |
381 | sub %t2, %t1, %t1 ! \ | | 384 | sub %t2, %t1, %t1 ! \ |
382 | ! \ | | 385 | ! \ |
383 | /* ! \ | | 386 | /* ! \ |
384 | * 4-byte align src_off, leaving it pointing ! \ | | 387 | * 4-byte align src_off, leaving it pointing ! \ |
385 | * to the 4-byte word *after* the next word ! \ | | 388 | * to the 4-byte word *after* the next word ! \ |
386 | * we intend to load. ! \ | | 389 | * we intend to load. ! \ |
387 | * ! \ | | 390 | * ! \ |
388 | * It's somewhat important to note that this ! \ | | 391 | * It's somewhat important to note that this ! \ |
389 | * code thinks of count as "the number of bytes ! \ | | 392 | * code thinks of count as "the number of bytes ! \ |
390 | * that haven't been stored yet", as opposed to ! \ | | 393 | * that haven't been stored yet", as opposed to ! \ |
391 | * "the number of bytes that haven't been copied ! \ | | 394 | * "the number of bytes that haven't been copied ! \ |
392 | * yet". The distinction is subtle, but becomes ! \ | | 395 | * yet". The distinction is subtle, but becomes ! \ |
393 | * apparent at the end of the shifting code, where ! \ | | 396 | * apparent at the end of the shifting code, where ! \ |
394 | * we "back up" src_off to correspond to count, ! \ | | 397 | * we "back up" src_off to correspond to count, ! \ |
395 | * as opposed to flushing the FIFO. ! \ | | 398 | * as opposed to flushing the FIFO. ! \ |
396 | * ! \ | | 399 | * ! \ |
397 | * We calculated above how many bytes our first ! \ | | 400 | * We calculated above how many bytes our first ! \ |
398 | * store will store, so update count now. ! \ | | 401 | * store will store, so update count now. ! \ |
399 | * ! \ | | 402 | * ! \ |
400 | * If the shift is zero, we use a copy loop that ! \ | | 403 | * If the shift is zero, we use a copy loop that ! \ |
401 | * does no shifting. NB: unlike the forward case, ! \ | | 404 | * does no shifting. NB: unlike the forward case, ! \ |
402 | * this is NOT strictly an optimization. If the ! \ | | 405 | * this is NOT strictly an optimization. If the ! \ |
403 | * SAR is zero the vshds do NOT do the right thing. ! \ | | 406 | * SAR is zero the vshds do NOT do the right thing. ! \ |
404 | * This is another assymetry more or less the "fault" ! \ | | 407 | * This is another assymetry more or less the "fault" ! \ |
405 | * of vshd. ! \ | | 408 | * of vshd. ! \ |
406 | */ ! \ | | 409 | */ ! \ |
407 | addi 3, src_off, src_off ! \ | | 410 | addi 3, src_off, src_off ! \ |
408 | sub count, %t2, count ! \ | | 411 | sub count, %t2, count ! \ |
409 | comb,<> %r0, %t1, _LABEL(_shifting) ! \ | | 412 | comb,<> %r0, %t1, _LABEL(_shifting) ! \ |
410 | depi 0, 31, 2, src_off ! \ | | 413 | depi 0, 31, 2, src_off ! \ |
411 | ! \ | | 414 | ! \ |
412 | /* Load and store the first word. */ ! \ | | 415 | /* Load and store the first word. */ ! \ |
413 | ldws,mb -4(src_spc, src_off), %t4 ! \ | | 416 | ldws,mb -4(src_spc, src_off), %t4 ! \ |
414 | _STBYS_E_M(%t4, dst_spc, dst_off) ! \ | | 417 | _STBYS_E_M(%t4, dst_spc, dst_off) ! \ |
415 | ! \ | | 418 | ! \ |
416 | /* Do the rest of the copy. */ ! \ | | 419 | /* Do the rest of the copy. */ ! \ |
417 | _COPY(src_spc,src_off,dst_spc,dst_off,count,mb,-1) ! \ | | 420 | _COPY(src_spc,src_off,dst_spc,dst_off,count,mb,-1) ! \ |
418 | ! \ | | 421 | ! \ |
419 | .label _LABEL(_shifting) ! \ | | 422 | .label _LABEL(_shifting) ! \ |
420 | ! \ | | 423 | ! \ |
421 | /* ! \ | | 424 | /* ! \ |
422 | * If shift < 0, we need to shift words to the ! \ | | 425 | * If shift < 0, we need to shift words to the ! \ |
423 | * left. Since we can't do this directly, we ! \ | | 426 | * left. Since we can't do this directly, we ! \ |
424 | * adjust the shift so it's a shift to the right ! \ | | 427 | * adjust the shift so it's a shift to the right ! \ |
425 | * and load a zero in to the low word of the FIFO. ! \ | | 428 | * and load a zero in to the low word of the FIFO. ! \ |
426 | * Otherwise, we load the first word into the ! \ | | 429 | * Otherwise, we load the first word into the ! \ |
427 | * low word of the FIFO. ! \ | | 430 | * low word of the FIFO. ! \ |
428 | * ! \ | | 431 | * ! \ |
429 | * Note the nullification trickery here. We ! \ | | 432 | * Note the nullification trickery here. We ! \ |
430 | * assume that we're shifting to the left, and ! \ | | 433 | * assume that we're shifting to the left, and ! \ |
431 | * load zero into the low word of the FIFO. Then ! \ | | 434 | * load zero into the low word of the FIFO. Then ! \ |
432 | * we nullify the addi if we're shifting to the ! \ | | 435 | * we nullify the addi if we're shifting to the ! \ |
433 | * right. If the addi is not nullified, we are ! \ | | 436 | * right. If the addi is not nullified, we are ! \ |
434 | * shifting to the left, so we nullify the load. ! \ | | 437 | * shifting to the left, so we nullify the load. ! \ |
435 | * we branch if we're shifting to the ! \ | | 438 | * we branch if we're shifting to the ! \ |
436 | */ ! \ | | 439 | */ ! \ |
437 | copy %r0, %t3 ! \ | | 440 | copy %r0, %t3 ! \ |
438 | comb,<=,n %r0, %t1, 0 ! \ | | 441 | comb,<=,n %r0, %t1, 0 ! \ |
439 | addi,tr 4, %t1, %t1 ! \ | | 442 | addi,tr 4, %t1, %t1 ! \ |
440 | ldws,mb -4(src_spc, src_off), %t3 ! \ | | 443 | ldws,mb -4(src_spc, src_off), %t3 ! \ |
441 | ! \ | | 444 | ! \ |
442 | /* ! \ | | 445 | /* ! \ |
443 | * Turn the shift byte count into a bit count, ! \ | | 446 | * Turn the shift byte count into a bit count, ! \ |
444 | * load the next word, set the Shift Amount ! \ | | 447 | * load the next word, set the Shift Amount ! \ |
445 | * Register, and form and store the first word. ! \ | | 448 | * Register, and form and store the first word. ! \ |
446 | */ ! \ | | 449 | */ ! \ |
447 | sh3add %t1, %r0, %t1 ! \ | | 450 | sh3add %t1, %r0, %t1 ! \ |
448 | ldws,mb -4(src_spc, src_off), %t4 ! \ | | 451 | ldws,mb -4(src_spc, src_off), %t4 ! \ |
449 | mtctl %t1, %cr11 ! \ | | 452 | mtctl %t1, %cr11 ! \ |
450 | vshd %t4, %t3, %r1 ! \ | | 453 | vshd %t4, %t3, %r1 ! \ |
451 | _STBYS_E_M(%r1, dst_spc, dst_off) ! \ | | 454 | _STBYS_E_M(%r1, dst_spc, dst_off) ! \ |
452 | ! \ | | 455 | ! \ |
453 | /* Do the rest of the copy. */ ! \ | | 456 | /* Do the rest of the copy. */ ! \ |
454 | _COPYS(src_spc,src_off,dst_spc,dst_off,count,mb,-1) | | 457 | _COPYS(src_spc,src_off,dst_spc,dst_off,count,mb,-1) |
455 | | | 458 | |
456 | /* | | 459 | /* |
457 | * For paranoia, when things aren't going well, enable this | | 460 | * For paranoia, when things aren't going well, enable this |
458 | * code to assemble byte-at-a-time-only copying. | | 461 | * code to assemble byte-at-a-time-only copying. |
459 | */ | | 462 | */ |
460 | #if 1 | | 463 | #if 1 |
461 | #undef _COPY_FORWARD | | 464 | #undef _COPY_FORWARD |
462 | #define _COPY_FORWARD(src_spc, src_off, dst_spc, dst_off, count) \ | | 465 | #define _COPY_FORWARD(src_spc, src_off, dst_spc, dst_off, count) \ |
463 | comb,=,n %r0, count, _LABEL(_done) ! \ | | 466 | comb,=,n %r0, count, _LABEL(_done) ! \ |
464 | ldbs,ma 1(src_spc, src_off), %r1 ! \ | | 467 | ldbs,ma 1(src_spc, src_off), %r1 ! \ |
465 | addib,<> -1, count, -12 ! \ | | 468 | addib,<> -1, count, -12 ! \ |
466 | stbs,ma %r1, 1(dst_spc, dst_off) ! \ | | 469 | stbs,ma %r1, 1(dst_spc, dst_off) ! \ |
467 | b,n _LABEL(_done) | | 470 | b,n _LABEL(_done) |
468 | #undef _COPY_REVERSE | | 471 | #undef _COPY_REVERSE |
469 | #define _COPY_REVERSE(src_spc, src_off, dst_spc, dst_off, count) \ | | 472 | #define _COPY_REVERSE(src_spc, src_off, dst_spc, dst_off, count) \ |
470 | comb,= %r0, count, _LABEL(_done) ! \ | | 473 | comb,= %r0, count, _LABEL(_done) ! \ |
471 | add src_off, count, src_off ! \ | | 474 | add src_off, count, src_off ! \ |
472 | add dst_off, count, dst_off ! \ | | 475 | add dst_off, count, dst_off ! \ |
473 | ldbs,mb -1(src_spc, src_off), %r1 ! \ | | 476 | ldbs,mb -1(src_spc, src_off), %r1 ! \ |
474 | addib,<> -1, count, -12 ! \ | | 477 | addib,<> -1, count, -12 ! \ |
475 | stbs,mb %r1, -1(dst_spc, dst_off) ! \ | | 478 | stbs,mb %r1, -1(dst_spc, dst_off) ! \ |
476 | b,n _LABEL(_done) | | 479 | b,n _LABEL(_done) |
477 | #endif | | 480 | #endif |
478 | | | 481 | |
479 | /* | | 482 | /* |
480 | * If none of the following are defined, define BCOPY. | | 483 | * If none of the following are defined, define BCOPY. |
481 | */ | | 484 | */ |
482 | #if !(defined(SPCOPY) || defined(MEMCPY) || defined(MEMMOVE)) | | 485 | #if !(defined(SPCOPY) || defined(MEMCPY) || defined(MEMMOVE)) |
483 | #define BCOPY | | 486 | #define BCOPY |
484 | #endif | | 487 | #endif |
485 | | | 488 | |
486 | #if defined(SPCOPY) && !defined(_STANDALONE) | | 489 | #if defined(SPCOPY) && !defined(_STANDALONE) |
487 | #include <sys/errno.h> | | 490 | #include <sys/errno.h> |
488 | #include "assym.h" | | 491 | #include "assym.h" |
489 | | | 492 | |
490 | /* | | 493 | /* |
491 | * int spcopy(pa_space_t ssp, const void *src, pa_space_t dsp, void *dst, | | 494 | * int spcopy(pa_space_t ssp, const void *src, pa_space_t dsp, void *dst, |
492 | * size_t len) | | 495 | * size_t len) |
493 | * | | 496 | * |
494 | * We assume that the regions do not overlap. | | 497 | * We assume that the regions do not overlap. |
495 | */ | | 498 | */ |
496 | LEAF_ENTRY(spcopy) | | 499 | LEAF_ENTRY(spcopy) |
497 | | | 500 | |
498 | /* | | 501 | /* |
499 | * Setup the fault handler, which will fill in %ret0 if triggered. | | 502 | * Setup the fault handler, which will fill in %ret0 if triggered. |
500 | */ | | 503 | */ |
501 | mfctl CR_CURLWP, %r31 | | 504 | GET_CURLWP(%r31) |
502 | #ifdef DIAGNOSTIC | | 505 | #ifdef DIAGNOSTIC |
503 | comb,<>,n %r0, %r31, Lspcopy_curlwp_ok | | 506 | comb,<>,n %r0, %r31, Lspcopy_curlwp_ok |
504 | ldil L%panic, %r1 | | 507 | ldil L%panic, %r1 |
505 | ldil L%Lspcopy_curlwp_bad, %arg0 | | 508 | ldil L%Lspcopy_curlwp_bad, %arg0 |
506 | ldo R%panic(%r1), %r1 | | 509 | ldo R%panic(%r1), %r1 |
507 | ldo R%Lspcopy_curlwp_bad(%arg0), %arg0 | | 510 | ldo R%Lspcopy_curlwp_bad(%arg0), %arg0 |
508 | .call | | 511 | .call |
509 | bv,n %r0(%r1) | | 512 | bv,n %r0(%r1) |
510 | nop | | 513 | nop |
511 | Lspcopy_curlwp_bad: | | 514 | Lspcopy_curlwp_bad: |
512 | .asciz "spcopy: curlwp == NULL\n" | | 515 | .asciz "spcopy: curlwp == NULL\n" |
513 | .align 8 | | 516 | .align 8 |
514 | Lspcopy_curlwp_ok: | | 517 | Lspcopy_curlwp_ok: |
515 | #endif /* DIAGNOSTIC */ | | 518 | #endif /* DIAGNOSTIC */ |
516 | ldil L%spcopy_fault, %r1 | | 519 | ldil L%spcopy_fault, %r1 |
517 | ldw L_PCB(%r31), %r31 | | 520 | ldw L_PCB(%r31), %r31 |
518 | ldo R%spcopy_fault(%r1), %r1 | | 521 | ldo R%spcopy_fault(%r1), %r1 |
519 | stw %r1, PCB_ONFAULT(%r31) | | 522 | stw %r1, PCB_ONFAULT(%r31) |
520 | | | 523 | |
521 | /* Setup the space registers. */ | | 524 | /* Setup the space registers. */ |
522 | mfsp %sr2, %ret1 | | 525 | mfsp %sr2, %ret1 |
523 | mtsp %arg0, %sr1 | | 526 | mtsp %arg0, %sr1 |
524 | mtsp %arg2, %sr2 | | 527 | mtsp %arg2, %sr2 |
525 | | | 528 | |
526 | /* Get the len argument and do the copy. */ | | 529 | /* Get the len argument and do the copy. */ |
527 | ldw HPPA_FRAME_ARG(4)(%sp), %arg0 | | 530 | ldw HPPA_FRAME_ARG(4)(%sp), %arg0 |
528 | #define _LABEL(l) __CONCAT(spcopy,l) | | 531 | #define _LABEL(l) __CONCAT(spcopy,l) |
529 | _COPY_FORWARD(%sr1,%arg1,%sr2,%arg3,%arg0) | | 532 | _COPY_FORWARD(%sr1,%arg1,%sr2,%arg3,%arg0) |
530 | _LABEL(_done): | | 533 | _LABEL(_done): |
531 | | | 534 | |
532 | /* Return. */ | | 535 | /* Return. */ |
533 | copy %r0, %ret0 | | 536 | copy %r0, %ret0 |
534 | ALTENTRY(spcopy_fault) | | 537 | ALTENTRY(spcopy_fault) |
535 | stw %r0, PCB_ONFAULT(%r31) | | 538 | stw %r0, PCB_ONFAULT(%r31) |
536 | bv %r0(%rp) | | 539 | bv %r0(%rp) |
537 | mtsp %ret1, %sr2 | | 540 | mtsp %ret1, %sr2 |
538 | EXIT(spcopy) | | 541 | EXIT(spcopy) |
539 | #endif /* SPCOPY && !_STANDALONE */ | | 542 | #endif /* SPCOPY && !_STANDALONE */ |
540 | | | 543 | |
541 | #ifdef MEMCPY | | 544 | #ifdef MEMCPY |
542 | /* | | 545 | /* |
543 | * void *memcpy(void *restrict dst, const void *restrict src, size_t len); | | 546 | * void *memcpy(void *restrict dst, const void *restrict src, size_t len); |
544 | * | | 547 | * |
545 | * memcpy is specifically restricted to working on | | 548 | * memcpy is specifically restricted to working on |
546 | * non-overlapping regions, so we can just copy forward. | | 549 | * non-overlapping regions, so we can just copy forward. |
547 | */ | | 550 | */ |
548 | LEAF_ENTRY(memcpy) | | 551 | LEAF_ENTRY(memcpy) |
549 | copy %arg0, %ret0 | | 552 | copy %arg0, %ret0 |
550 | #define _LABEL(l) __CONCAT(memcpy,l) | | 553 | #define _LABEL(l) __CONCAT(memcpy,l) |
551 | _COPY_FORWARD(%sr0,%arg1,%sr0,%arg0,%arg2) | | 554 | _COPY_FORWARD(%sr0,%arg1,%sr0,%arg0,%arg2) |
552 | _LABEL(_done): | | 555 | _LABEL(_done): |
553 | bv,n %r0(%rp) | | 556 | bv,n %r0(%rp) |
554 | nop | | 557 | nop |
555 | EXIT(memcpy) | | 558 | EXIT(memcpy) |
556 | #endif /* MEMCPY */ | | 559 | #endif /* MEMCPY */ |
557 | | | 560 | |
558 | #ifdef BCOPY | | 561 | #ifdef BCOPY |
559 | /* | | 562 | /* |
560 | * void bcopy(const void *src, void *dst, size_t len); | | 563 | * void bcopy(const void *src, void *dst, size_t len); |
561 | */ | | 564 | */ |
562 | LEAF_ENTRY(bcopy) | | 565 | LEAF_ENTRY(bcopy) |
563 | copy %arg0, %r1 | | 566 | copy %arg0, %r1 |
564 | copy %arg1, %arg0 | | 567 | copy %arg1, %arg0 |
565 | copy %r1, %arg1 | | 568 | copy %r1, %arg1 |
566 | /* FALLTHROUGH */ | | 569 | /* FALLTHROUGH */ |
567 | #define _LABEL_F(l) __CONCAT(bcopy_F,l) | | 570 | #define _LABEL_F(l) __CONCAT(bcopy_F,l) |
568 | #define _LABEL_R(l) __CONCAT(bcopy_R,l) | | 571 | #define _LABEL_R(l) __CONCAT(bcopy_R,l) |
569 | #endif | | 572 | #endif |
570 | | | 573 | |
571 | #ifdef MEMMOVE | | 574 | #ifdef MEMMOVE |
572 | /* | | 575 | /* |
573 | * void *memmove(void *dst, const void *src, size_t len); | | 576 | * void *memmove(void *dst, const void *src, size_t len); |
574 | */ | | 577 | */ |
575 | LEAF_ENTRY(memmove) | | 578 | LEAF_ENTRY(memmove) |
576 | #define _LABEL_F(l) __CONCAT(memmove_F,l) | | 579 | #define _LABEL_F(l) __CONCAT(memmove_F,l) |
577 | #define _LABEL_R(l) __CONCAT(memmove_R,l) | | 580 | #define _LABEL_R(l) __CONCAT(memmove_R,l) |
578 | copy %arg0, %ret0 | | 581 | copy %arg0, %ret0 |
579 | #endif /* MEMMOVE */ | | 582 | #endif /* MEMMOVE */ |
580 | | | 583 | |
581 | #if defined(BCOPY) || defined(MEMMOVE) | | 584 | #if defined(BCOPY) || defined(MEMMOVE) |
582 | | | 585 | |
583 | /* | | 586 | /* |
584 | * If src >= dst or src + len <= dst, we copy | | 587 | * If src >= dst or src + len <= dst, we copy |
585 | * forward, else we copy in reverse. | | 588 | * forward, else we copy in reverse. |
586 | */ | | 589 | */ |
587 | add %arg1, %arg2, %r1 | | 590 | add %arg1, %arg2, %r1 |
588 | comb,>>=,n %arg1, %arg0, 0 | | 591 | comb,>>=,n %arg1, %arg0, 0 |
589 | comb,>>,n %r1, %arg0, _LABEL_R(_go) | | 592 | comb,>>,n %r1, %arg0, _LABEL_R(_go) |
590 | | | 593 | |
591 | #define _LABEL _LABEL_F | | 594 | #define _LABEL _LABEL_F |
592 | _COPY_FORWARD(%sr0,%arg1,%sr0,%arg0,%arg2) | | 595 | _COPY_FORWARD(%sr0,%arg1,%sr0,%arg0,%arg2) |
593 | #undef _LABEL | | 596 | #undef _LABEL |
594 | | | 597 | |
595 | _LABEL_R(_go): | | 598 | _LABEL_R(_go): |
596 | #define _LABEL _LABEL_R | | 599 | #define _LABEL _LABEL_R |
597 | _COPY_REVERSE(%sr0,%arg1,%sr0,%arg0,%arg2) | | 600 | _COPY_REVERSE(%sr0,%arg1,%sr0,%arg0,%arg2) |
598 | #undef _LABEL | | 601 | #undef _LABEL |
599 | | | 602 | |
600 | _LABEL_F(_done): | | 603 | _LABEL_F(_done): |
601 | _LABEL_R(_done): | | 604 | _LABEL_R(_done): |
602 | bv,n %r0(%rp) | | 605 | bv,n %r0(%rp) |
603 | nop | | 606 | nop |
604 | #ifdef BCOPY | | 607 | #ifdef BCOPY |
605 | EXIT(bcopy) | | 608 | EXIT(bcopy) |
606 | #else | | 609 | #else |
607 | EXIT(memmove) | | 610 | EXIT(memmove) |
608 | #endif | | 611 | #endif |
609 | #endif /* BCOPY || MEMMOVE */ | | 612 | #endif /* BCOPY || MEMMOVE */ |