| @@ -1,235 +1,284 @@ | | | @@ -1,235 +1,284 @@ |
1 | $NetBSD: patch-ac,v 1.2 2005/11/03 21:38:04 rillig Exp $ | | 1 | $NetBSD: patch-ac,v 1.3 2013/04/13 19:08:27 dholland Exp $ |
2 | | | 2 | |
3 | --- src/ddrawsdl.cpp.orig 2002-03-26 16:02:25.000000000 +0100 | | 3 | - Fix C syntax (string constants continued onto the next line must |
| | | 4 | have a backslash) |
| | | 5 | |
| | | 6 | - Use the asm blocks only on platforms where they're understood. |
| | | 7 | Disable the more elaborate blitter modes on other platforms |
| | | 8 | until/unless someone wants to write non-asm equivalents. |
| | | 9 | |
| | | 10 | --- src/ddrawsdl.cpp.orig 2002-03-26 15:02:25.000000000 +0000 |
4 | +++ src/ddrawsdl.cpp | | 11 | +++ src/ddrawsdl.cpp |
5 | @@ -174,66 +174,66 @@ void UpdateFrame_h() | | 12 | @@ -12,6 +12,7 @@ |
| | | 13 | */ |
| | | 14 | |
| | | 15 | #include <SDL.h> |
| | | 16 | +#include <assert.h> |
| | | 17 | #include "globals.h" |
| | | 18 | #include "gfx.h" |
| | | 19 | |
| | | 20 | @@ -174,69 +175,83 @@ void UpdateFrame_h() |
6 | { | | 21 | { |
7 | switch(fBlitterMode) { | | 22 | switch(fBlitterMode) { |
8 | case 1: | | 23 | case 1: |
9 | - __asm__ __volatile__(".align 32 | | 24 | - __asm__ __volatile__(".align 32 |
10 | - cld | | 25 | - cld |
11 | - rep | | 26 | - rep |
| | | 27 | +#if defined(__i386__) || defined(__x86_64__) |
12 | + __asm__ __volatile__(".align 32\n\ | | 28 | + __asm__ __volatile__(".align 32\n\ |
13 | + cld\n\ | | 29 | + cld\n\ |
14 | + rep\n\ | | 30 | + rep\n\ |
15 | movsw" | | 31 | movsw" |
16 | : | | 32 | : |
17 | :"S"(dx_buffer), "D"(g_pDDSPrimary->pixels), "c"((g_pDDSPrimary->h * g_pDDSPrimary->pitch)>>1)); | | 33 | :"S"(dx_buffer), "D"(g_pDDSPrimary->pixels), "c"((g_pDDSPrimary->h * g_pDDSPrimary->pitch)>>1)); |
| | | 34 | +#else |
| | | 35 | + memcpy(g_pDDSPrimary->pixels, dx_buffer, g_pDDSPrimary->h * g_pDDSPrimary->pitch); |
| | | 36 | +#endif |
18 | break; | | 37 | break; |
19 | case 2: | | 38 | case 2: |
20 | - __asm__ __volatile__("pusha | | 39 | - __asm__ __volatile__("pusha |
21 | - xorl %%eax, %%eax | | 40 | - xorl %%eax, %%eax |
22 | - movl $448, %%ebx | | 41 | - movl $448, %%ebx |
23 | - emms | | 42 | - emms |
24 | - sum: | | 43 | - sum: |
25 | - movq (%0, %%eax, 1), %%mm0 | | 44 | - movq (%0, %%eax, 1), %%mm0 |
26 | - movq %%mm0, %%mm1 | | 45 | - movq %%mm0, %%mm1 |
27 | - punpcklwd %%mm0, %%mm0 | | 46 | - punpcklwd %%mm0, %%mm0 |
28 | - addl $8, %%eax | | 47 | - addl $8, %%eax |
29 | - punpckhwd %%mm1,%%mm1 | | 48 | - punpckhwd %%mm1,%%mm1 |
30 | - movq %%mm0, -16(%1, %%eax, 2) | | 49 | - movq %%mm0, -16(%1, %%eax, 2) |
31 | - cmpl %%ebx, %%eax | | 50 | - cmpl %%ebx, %%eax |
32 | - | | 51 | - |
33 | - movq %%mm1, -8(%1, %%eax, 2) | | 52 | - movq %%mm1, -8(%1, %%eax, 2) |
34 | - jnz sum | | 53 | - jnz sum |
35 | - xorl %%eax, %%eax | | 54 | - xorl %%eax, %%eax |
36 | - | | 55 | - |
37 | - addl %2, %0 | | 56 | - addl %2, %0 |
38 | - decl %%edx | | 57 | - decl %%edx |
39 | - | | 58 | - |
40 | - leal (%1, %2, 4), %1 | | 59 | - leal (%1, %2, 4), %1 |
41 | - jnz sum | | 60 | - jnz sum |
42 | - emms | | 61 | - emms |
| | | 62 | +#if defined(__i386__) |
43 | + __asm__ __volatile__("pusha\n\ | | 63 | + __asm__ __volatile__("pusha\n\ |
44 | + xorl %%eax, %%eax\n\ | | 64 | + xorl %%eax, %%eax\n\ |
45 | + movl $448, %%ebx \n\ | | 65 | + movl $448, %%ebx \n\ |
46 | + emms\n\ | | 66 | + emms\n\ |
47 | + sum:\n\ | | 67 | + sum:\n\ |
48 | + movq (%0, %%eax, 1), %%mm0\n\ | | 68 | + movq (%0, %%eax, 1), %%mm0\n\ |
49 | + movq %%mm0, %%mm1\n\ | | 69 | + movq %%mm0, %%mm1\n\ |
50 | + punpcklwd %%mm0, %%mm0\n\ | | 70 | + punpcklwd %%mm0, %%mm0\n\ |
51 | + addl $8, %%eax\n\ | | 71 | + addl $8, %%eax\n\ |
52 | + punpckhwd %%mm1,%%mm1\n\ | | 72 | + punpckhwd %%mm1,%%mm1\n\ |
53 | + movq %%mm0, -16(%1, %%eax, 2)\n\ | | 73 | + movq %%mm0, -16(%1, %%eax, 2)\n\ |
54 | + cmpl %%ebx, %%eax\n\ | | 74 | + cmpl %%ebx, %%eax\n\ |
55 | + \n\ | | 75 | + \n\ |
56 | + movq %%mm1, -8(%1, %%eax, 2)\n\ | | 76 | + movq %%mm1, -8(%1, %%eax, 2)\n\ |
57 | + jnz sum\n\ | | 77 | + jnz sum\n\ |
58 | + xorl %%eax, %%eax\n\ | | 78 | + xorl %%eax, %%eax\n\ |
59 | + \n\ | | 79 | + \n\ |
60 | + addl %2, %0\n\ | | 80 | + addl %2, %0\n\ |
61 | + decl %%edx\n\ | | 81 | + decl %%edx\n\ |
62 | + \n\ | | 82 | + \n\ |
63 | + leal (%1, %2, 4), %1\n\ | | 83 | + leal (%1, %2, 4), %1\n\ |
64 | + jnz sum\n\ | | 84 | + jnz sum\n\ |
65 | + emms\n\ | | 85 | + emms\n\ |
66 | popa" | | 86 | popa" |
67 | : | | 87 | : |
68 | : "S"(dx_buffer), "D"(g_pDDSPrimary->pixels), "c"(g_pDDSPrimary->pitch>>1), "d"(144)); | | 88 | : "S"(dx_buffer), "D"(g_pDDSPrimary->pixels), "c"(g_pDDSPrimary->pitch>>1), "d"(144)); |
| | | 89 | +#else |
| | | 90 | + /* If you put real code here, enable the corresponding option in main() */ |
| | | 91 | + assert(0); |
| | | 92 | +#endif |
69 | break; | | 93 | break; |
70 | | | 94 | |
71 | case 3: | | 95 | case 3: |
72 | - __asm__ __volatile__("pusha | | 96 | - __asm__ __volatile__("pusha |
73 | - xorl %%eax, %%eax | | 97 | - xorl %%eax, %%eax |
74 | - movl $896, %%ebx | | 98 | - movl $896, %%ebx |
75 | - emms | | 99 | - emms |
76 | - sum3: | | 100 | - sum3: |
77 | - movq (%0, %%eax, 1), %%mm0 | | 101 | - movq (%0, %%eax, 1), %%mm0 |
78 | - movq %%mm0, %%mm1 | | 102 | - movq %%mm0, %%mm1 |
79 | - punpcklwd %%mm0, %%mm0 | | 103 | - punpcklwd %%mm0, %%mm0 |
80 | - addl $8, %%eax | | 104 | - addl $8, %%eax |
81 | - punpckhwd %%mm1,%%mm1 | | 105 | - punpckhwd %%mm1,%%mm1 |
82 | - movq %%mm0, -16(%1, %%eax, 2) | | 106 | - movq %%mm0, -16(%1, %%eax, 2) |
83 | - cmpl %%ebx, %%eax | | 107 | - cmpl %%ebx, %%eax |
84 | - | | 108 | - |
85 | - movq %%mm1, -8(%1, %%eax, 2) | | 109 | - movq %%mm1, -8(%1, %%eax, 2) |
86 | - jnz sum3 | | 110 | - jnz sum3 |
87 | - xorl %%eax, %%eax | | 111 | - xorl %%eax, %%eax |
88 | - | | 112 | - |
89 | - addl %2, %0 | | 113 | - addl %2, %0 |
90 | - decl %%edx | | 114 | - decl %%edx |
91 | - | | 115 | - |
92 | - leal (%1, %2, 4), %1 | | 116 | - leal (%1, %2, 4), %1 |
93 | - jnz sum3 | | 117 | - jnz sum3 |
94 | - emms | | 118 | - emms |
| | | 119 | +#if defined(__i386__) |
95 | + __asm__ __volatile__("pusha\n\ | | 120 | + __asm__ __volatile__("pusha\n\ |
96 | + xorl %%eax, %%eax\n\ | | 121 | + xorl %%eax, %%eax\n\ |
97 | + movl $896, %%ebx \n\ | | 122 | + movl $896, %%ebx \n\ |
98 | + emms\n\ | | 123 | + emms\n\ |
99 | + sum3:\n\ | | 124 | + sum3:\n\ |
100 | + movq (%0, %%eax, 1), %%mm0\n\ | | 125 | + movq (%0, %%eax, 1), %%mm0\n\ |
101 | + movq %%mm0, %%mm1\n\ | | 126 | + movq %%mm0, %%mm1\n\ |
102 | + punpcklwd %%mm0, %%mm0\n\ | | 127 | + punpcklwd %%mm0, %%mm0\n\ |
103 | + addl $8, %%eax\n\ | | 128 | + addl $8, %%eax\n\ |
104 | + punpckhwd %%mm1,%%mm1\n\ | | 129 | + punpckhwd %%mm1,%%mm1\n\ |
105 | + movq %%mm0, -16(%1, %%eax, 2)\n\ | | 130 | + movq %%mm0, -16(%1, %%eax, 2)\n\ |
106 | + cmpl %%ebx, %%eax\n\ | | 131 | + cmpl %%ebx, %%eax\n\ |
107 | + \n\ | | 132 | + \n\ |
108 | + movq %%mm1, -8(%1, %%eax, 2)\n\ | | 133 | + movq %%mm1, -8(%1, %%eax, 2)\n\ |
109 | + jnz sum3\n\ | | 134 | + jnz sum3\n\ |
110 | + xorl %%eax, %%eax\n\ | | 135 | + xorl %%eax, %%eax\n\ |
111 | + \n\ | | 136 | + \n\ |
112 | + addl %2, %0\n\ | | 137 | + addl %2, %0\n\ |
113 | + decl %%edx\n\ | | 138 | + decl %%edx\n\ |
114 | + \n\ | | 139 | + \n\ |
115 | + leal (%1, %2, 4), %1\n\ | | 140 | + leal (%1, %2, 4), %1\n\ |
116 | + jnz sum3\n\ | | 141 | + jnz sum3\n\ |
117 | + emms\n\ | | 142 | + emms\n\ |
118 | popa" | | 143 | popa" |
119 | : | | 144 | : |
120 | : "S"(dx_buffer), "D"(g_pDDSPrimary->pixels), "c"(g_pDDSPrimary->pitch>>1), "d"(144)); | | 145 | : "S"(dx_buffer), "D"(g_pDDSPrimary->pixels), "c"(g_pDDSPrimary->pitch>>1), "d"(144)); |
121 | @@ -418,65 +418,65 @@ void UpdateFrame_v() | | 146 | +#else |
| | | 147 | + /* If you put real code here, enable the corresponding option in main() */ |
| | | 148 | + assert(0); |
| | | 149 | +#endif |
| | | 150 | break; |
| | | 151 | } |
| | | 152 | //Niels(adjust to suit) use the asm below, or use standard memcpy/fast_memcpy etc, updateframe_v is not implemented |
| | | 153 | @@ -418,68 +433,82 @@ void UpdateFrame_v() |
122 | { | | 154 | { |
123 | switch(fBlitterMode) { | | 155 | switch(fBlitterMode) { |
124 | case 1: | | 156 | case 1: |
125 | - __asm__ __volatile__(".align 32 | | 157 | - __asm__ __volatile__(".align 32 |
126 | - cld | | 158 | - cld |
127 | - rep | | 159 | - rep |
| | | 160 | +#if defined(__i386__) || defined(__x86_64__) |
128 | + __asm__ __volatile__(".align 32\n\ | | 161 | + __asm__ __volatile__(".align 32\n\ |
129 | + cld\n\ | | 162 | + cld\n\ |
130 | + rep\n\ | | 163 | + rep\n\ |
131 | movsw" | | 164 | movsw" |
132 | : | | 165 | : |
133 | :"S"(dx_buffer), "D"(g_pDDSPrimary->pixels), "c"((g_pDDSPrimary->h * g_pDDSPrimary->pitch)>>1)); | | 166 | :"S"(dx_buffer), "D"(g_pDDSPrimary->pixels), "c"((g_pDDSPrimary->h * g_pDDSPrimary->pitch)>>1)); |
| | | 167 | +#else |
| | | 168 | + memcpy(g_pDDSPrimary->pixels, dx_buffer, g_pDDSPrimary->h * g_pDDSPrimary->pitch); |
| | | 169 | +#endif |
134 | break; | | 170 | break; |
135 | case 2: // double scanlines | | 171 | case 2: // double scanlines |
136 | - __asm__ __volatile__("pusha | | 172 | - __asm__ __volatile__("pusha |
137 | - xorl %%eax, %%eax | | 173 | - xorl %%eax, %%eax |
138 | - movl $288, %%ebx | | 174 | - movl $288, %%ebx |
139 | - emms | | 175 | - emms |
140 | - sumb: | | 176 | - sumb: |
141 | - movq (%0, %%eax, 1), %%mm0 | | 177 | - movq (%0, %%eax, 1), %%mm0 |
142 | - movq %%mm0, %%mm1 | | 178 | - movq %%mm0, %%mm1 |
143 | - punpcklwd %%mm0, %%mm0 | | 179 | - punpcklwd %%mm0, %%mm0 |
144 | - addl $8, %%eax | | 180 | - addl $8, %%eax |
145 | - punpckhwd %%mm1,%%mm1 | | 181 | - punpckhwd %%mm1,%%mm1 |
146 | - movq %%mm0, -16(%1, %%eax, 2) | | 182 | - movq %%mm0, -16(%1, %%eax, 2) |
147 | - cmpl %%ebx, %%eax | | 183 | - cmpl %%ebx, %%eax |
148 | - | | 184 | - |
149 | - movq %%mm1, -8(%1, %%eax, 2) | | 185 | - movq %%mm1, -8(%1, %%eax, 2) |
150 | - jnz sumb | | 186 | - jnz sumb |
151 | - xorl %%eax, %%eax | | 187 | - xorl %%eax, %%eax |
152 | - | | 188 | - |
153 | - addl %2, %0 | | 189 | - addl %2, %0 |
154 | - decl %%edx | | 190 | - decl %%edx |
155 | - | | 191 | - |
156 | - leal (%1, %2, 4), %1 | | 192 | - leal (%1, %2, 4), %1 |
157 | - jnz sumb | | 193 | - jnz sumb |
158 | - emms | | 194 | - emms |
| | | 195 | +#if defined(__i386__) |
159 | + __asm__ __volatile__("pusha\n\ | | 196 | + __asm__ __volatile__("pusha\n\ |
160 | + xorl %%eax, %%eax\n\ | | 197 | + xorl %%eax, %%eax\n\ |
161 | + movl $288, %%ebx \n\ | | 198 | + movl $288, %%ebx \n\ |
162 | + emms\n\ | | 199 | + emms\n\ |
163 | + sumb:\n\ | | 200 | + sumb:\n\ |
164 | + movq (%0, %%eax, 1), %%mm0\n\ | | 201 | + movq (%0, %%eax, 1), %%mm0\n\ |
165 | + movq %%mm0, %%mm1\n\ | | 202 | + movq %%mm0, %%mm1\n\ |
166 | + punpcklwd %%mm0, %%mm0\n\ | | 203 | + punpcklwd %%mm0, %%mm0\n\ |
167 | + addl $8, %%eax\n\ | | 204 | + addl $8, %%eax\n\ |
168 | + punpckhwd %%mm1,%%mm1\n\ | | 205 | + punpckhwd %%mm1,%%mm1\n\ |
169 | + movq %%mm0, -16(%1, %%eax, 2)\n\ | | 206 | + movq %%mm0, -16(%1, %%eax, 2)\n\ |
170 | + cmpl %%ebx, %%eax\n\ | | 207 | + cmpl %%ebx, %%eax\n\ |
171 | + \n\ | | 208 | + \n\ |
172 | + movq %%mm1, -8(%1, %%eax, 2)\n\ | | 209 | + movq %%mm1, -8(%1, %%eax, 2)\n\ |
173 | + jnz sumb\n\ | | 210 | + jnz sumb\n\ |
174 | + xorl %%eax, %%eax\n\ | | 211 | + xorl %%eax, %%eax\n\ |
175 | + \n\ | | 212 | + \n\ |
176 | + addl %2, %0\n\ | | 213 | + addl %2, %0\n\ |
177 | + decl %%edx\n\ | | 214 | + decl %%edx\n\ |
178 | + \n\ | | 215 | + \n\ |
179 | + leal (%1, %2, 4), %1\n\ | | 216 | + leal (%1, %2, 4), %1\n\ |
180 | + jnz sumb\n\ | | 217 | + jnz sumb\n\ |
181 | + emms\n\ | | 218 | + emms\n\ |
182 | popa" | | 219 | popa" |
183 | : | | 220 | : |
184 | : "S"(dx_buffer), "D"(g_pDDSPrimary->pixels), "c"(g_pDDSPrimary->pitch>>1), "d"(224)); | | 221 | : "S"(dx_buffer), "D"(g_pDDSPrimary->pixels), "c"(g_pDDSPrimary->pitch>>1), "d"(224)); |
| | | 222 | +#else |
| | | 223 | + /* If you put real code here, enable the corresponding option in main() */ |
| | | 224 | + assert(0); |
| | | 225 | +#endif |
185 | break; | | 226 | break; |
186 | case 3: // double stretch mode | | 227 | case 3: // double stretch mode |
187 | - __asm__ __volatile__("pusha | | 228 | - __asm__ __volatile__("pusha |
188 | - xorl %%eax, %%eax | | 229 | - xorl %%eax, %%eax |
189 | - movl $576, %%ebx | | 230 | - movl $576, %%ebx |
190 | - emms | | 231 | - emms |
191 | - sumc: | | 232 | - sumc: |
192 | - movq (%0, %%eax, 1), %%mm0 | | 233 | - movq (%0, %%eax, 1), %%mm0 |
193 | - movq %%mm0, %%mm1 | | 234 | - movq %%mm0, %%mm1 |
194 | - punpcklwd %%mm0, %%mm0 | | 235 | - punpcklwd %%mm0, %%mm0 |
195 | - addl $8, %%eax | | 236 | - addl $8, %%eax |
196 | - punpckhwd %%mm1,%%mm1 | | 237 | - punpckhwd %%mm1,%%mm1 |
197 | - movq %%mm0, -16(%1, %%eax, 2) | | 238 | - movq %%mm0, -16(%1, %%eax, 2) |
198 | - cmpl %%ebx, %%eax | | 239 | - cmpl %%ebx, %%eax |
199 | - | | 240 | - |
200 | - movq %%mm1, -8(%1, %%eax, 2) | | 241 | - movq %%mm1, -8(%1, %%eax, 2) |
201 | - jnz sumc | | 242 | - jnz sumc |
202 | - xorl %%eax, %%eax | | 243 | - xorl %%eax, %%eax |
203 | - | | 244 | - |
204 | - addl %2, %0 | | 245 | - addl %2, %0 |
205 | - decl %%edx | | 246 | - decl %%edx |
206 | - | | 247 | - |
207 | - leal (%1, %2, 4), %1 | | 248 | - leal (%1, %2, 4), %1 |
208 | - jnz sumc | | 249 | - jnz sumc |
209 | - emms | | 250 | - emms |
| | | 251 | +#if defined(__i386__) |
210 | + __asm__ __volatile__("pusha\n\ | | 252 | + __asm__ __volatile__("pusha\n\ |
211 | + xorl %%eax, %%eax\n\ | | 253 | + xorl %%eax, %%eax\n\ |
212 | + movl $576, %%ebx \n\ | | 254 | + movl $576, %%ebx \n\ |
213 | + emms\n\ | | 255 | + emms\n\ |
214 | + sumc:\n\ | | 256 | + sumc:\n\ |
215 | + movq (%0, %%eax, 1), %%mm0\n\ | | 257 | + movq (%0, %%eax, 1), %%mm0\n\ |
216 | + movq %%mm0, %%mm1\n\ | | 258 | + movq %%mm0, %%mm1\n\ |
217 | + punpcklwd %%mm0, %%mm0\n\ | | 259 | + punpcklwd %%mm0, %%mm0\n\ |
218 | + addl $8, %%eax\n\ | | 260 | + addl $8, %%eax\n\ |
219 | + punpckhwd %%mm1,%%mm1\n\ | | 261 | + punpckhwd %%mm1,%%mm1\n\ |
220 | + movq %%mm0, -16(%1, %%eax, 2)\n\ | | 262 | + movq %%mm0, -16(%1, %%eax, 2)\n\ |
221 | + cmpl %%ebx, %%eax\n\ | | 263 | + cmpl %%ebx, %%eax\n\ |
222 | + \n\ | | 264 | + \n\ |
223 | + movq %%mm1, -8(%1, %%eax, 2)\n\ | | 265 | + movq %%mm1, -8(%1, %%eax, 2)\n\ |
224 | + jnz sumc\n\ | | 266 | + jnz sumc\n\ |
225 | + xorl %%eax, %%eax\n\ | | 267 | + xorl %%eax, %%eax\n\ |
226 | + \n\ | | 268 | + \n\ |
227 | + addl %2, %0\n\ | | 269 | + addl %2, %0\n\ |
228 | + decl %%edx\n\ | | 270 | + decl %%edx\n\ |
229 | + \n\ | | 271 | + \n\ |
230 | + leal (%1, %2, 4), %1\n\ | | 272 | + leal (%1, %2, 4), %1\n\ |
231 | + jnz sumc\n\ | | 273 | + jnz sumc\n\ |
232 | + emms\n\ | | 274 | + emms\n\ |
233 | popa" | | 275 | popa" |
234 | : | | 276 | : |
235 | : "S"(dx_buffer), "D"(g_pDDSPrimary->pixels), "c"(g_pDDSPrimary->pitch>>1), "d"(224)); | | 277 | : "S"(dx_buffer), "D"(g_pDDSPrimary->pixels), "c"(g_pDDSPrimary->pitch>>1), "d"(224)); |
| | | 278 | +#else |
| | | 279 | + /* If you put real code here, enable the corresponding option in main() */ |
| | | 280 | + assert(0); |
| | | 281 | +#endif |
| | | 282 | break; |
| | | 283 | } |
| | | 284 | SDL_Flip(g_pDDSPrimary); |