| @@ -1,117 +1,115 @@ | | | @@ -1,117 +1,115 @@ |
1 | /* $NetBSD: softfloat-macros.h,v 1.1 2001/04/26 03:10:47 ross Exp $ */ | | 1 | /* $NetBSD: softfloat-macros.h,v 1.2 2020/09/01 15:36:53 thorpej Exp $ */ |
2 | | | 2 | |
3 | /* | | 3 | /*============================================================================ |
4 | =============================================================================== | | | |
5 | | | 4 | |
6 | This C source fragment is part of the SoftFloat IEC/IEEE Floating-point | | 5 | This C source fragment is part of the SoftFloat IEC/IEEE Floating-point |
7 | Arithmetic Package, Release 2a. | | 6 | Arithmetic Package, Release 2b. |
8 | | | 7 | |
9 | Written by John R. Hauser. This work was made possible in part by the | | 8 | Written by John R. Hauser. This work was made possible in part by the |
10 | International Computer Science Institute, located at Suite 600, 1947 Center | | 9 | International Computer Science Institute, located at Suite 600, 1947 Center |
11 | Street, Berkeley, California 94704. Funding was partially provided by the | | 10 | Street, Berkeley, California 94704. Funding was partially provided by the |
12 | National Science Foundation under grant MIP-9311980. The original version | | 11 | National Science Foundation under grant MIP-9311980. The original version |
13 | of this code was written as part of a project to build a fixed-point vector | | 12 | of this code was written as part of a project to build a fixed-point vector |
14 | processor in collaboration with the University of California at Berkeley, | | 13 | processor in collaboration with the University of California at Berkeley, |
15 | overseen by Profs. Nelson Morgan and John Wawrzynek. More information | | 14 | overseen by Profs. Nelson Morgan and John Wawrzynek. More information |
16 | is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ | | 15 | is available through the Web page `http://www.cs.berkeley.edu/~jhauser/ |
17 | arithmetic/SoftFloat.html'. | | 16 | arithmetic/SoftFloat.html'. |
18 | | | 17 | |
19 | THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort | | 18 | THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has |
20 | has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT | | 19 | been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES |
21 | TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO | | 20 | RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS |
22 | PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY | | 21 | AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, |
23 | AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. | | 22 | COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE |
| | | 23 | EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE |
| | | 24 | INSTITUTE (possibly via similar legal notice) AGAINST ALL LOSSES, COSTS, OR |
| | | 25 | OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. |
24 | | | 26 | |
25 | Derivative works are acceptable, even for commercial purposes, so long as | | 27 | Derivative works are acceptable, even for commercial purposes, so long as |
26 | (1) they include prominent notice that the work is derivative, and (2) they | | 28 | (1) the source code for the derivative work includes prominent notice that |
27 | include prominent notice akin to these four paragraphs for those parts of | | 29 | the work is derivative, and (2) the source code includes prominent notice with |
28 | this code that are retained. | | 30 | these four paragraphs for those parts of this code that are retained. |
29 | | | 31 | |
30 | =============================================================================== | | 32 | =============================================================================*/ |
31 | */ | | 33 | |
32 | | | 34 | /*---------------------------------------------------------------------------- |
33 | /* | | 35 | | Shifts `a' right by the number of bits given in `count'. If any nonzero |
34 | ------------------------------------------------------------------------------- | | 36 | | bits are shifted off, they are ``jammed'' into the least significant bit of |
35 | Shifts `a' right by the number of bits given in `count'. If any nonzero | | 37 | | the result by setting the least significant bit to 1. The value of `count' |
36 | bits are shifted off, they are ``jammed'' into the least significant bit of | | 38 | | can be arbitrarily large; in particular, if `count' is greater than 32, the |
37 | the result by setting the least significant bit to 1. The value of `count' | | 39 | | result will be either 0 or 1, depending on whether `a' is zero or nonzero. |
38 | can be arbitrarily large; in particular, if `count' is greater than 32, the | | 40 | | The result is stored in the location pointed to by `zPtr'. |
39 | result will be either 0 or 1, depending on whether `a' is zero or nonzero. | | 41 | *----------------------------------------------------------------------------*/ |
40 | The result is stored in the location pointed to by `zPtr'. | | 42 | |
41 | ------------------------------------------------------------------------------- | | | |
42 | */ | | | |
43 | INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr ) | | 43 | INLINE void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr ) |
44 | { | | 44 | { |
45 | bits32 z; | | 45 | bits32 z; |
46 | | | 46 | |
47 | if ( count == 0 ) { | | 47 | if ( count == 0 ) { |
48 | z = a; | | 48 | z = a; |
49 | } | | 49 | } |
50 | else if ( count < 32 ) { | | 50 | else if ( count < 32 ) { |
51 | z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 ); | | 51 | z = ( a>>count ) | ( ( a<<( ( - count ) & 31 ) ) != 0 ); |
52 | } | | 52 | } |
53 | else { | | 53 | else { |
54 | z = ( a != 0 ); | | 54 | z = ( a != 0 ); |
55 | } | | 55 | } |
56 | *zPtr = z; | | 56 | *zPtr = z; |
57 | | | 57 | |
58 | } | | 58 | } |
59 | | | 59 | |
60 | /* | | 60 | /*---------------------------------------------------------------------------- |
61 | ------------------------------------------------------------------------------- | | 61 | | Shifts `a' right by the number of bits given in `count'. If any nonzero |
62 | Shifts `a' right by the number of bits given in `count'. If any nonzero | | 62 | | bits are shifted off, they are ``jammed'' into the least significant bit of |
63 | bits are shifted off, they are ``jammed'' into the least significant bit of | | 63 | | the result by setting the least significant bit to 1. The value of `count' |
64 | the result by setting the least significant bit to 1. The value of `count' | | 64 | | can be arbitrarily large; in particular, if `count' is greater than 64, the |
65 | can be arbitrarily large; in particular, if `count' is greater than 64, the | | 65 | | result will be either 0 or 1, depending on whether `a' is zero or nonzero. |
66 | result will be either 0 or 1, depending on whether `a' is zero or nonzero. | | 66 | | The result is stored in the location pointed to by `zPtr'. |
67 | The result is stored in the location pointed to by `zPtr'. | | 67 | *----------------------------------------------------------------------------*/ |
68 | ------------------------------------------------------------------------------- | | 68 | |
69 | */ | | | |
70 | INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr ) | | 69 | INLINE void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr ) |
71 | { | | 70 | { |
72 | bits64 z; | | 71 | bits64 z; |
73 | | | 72 | |
74 | if ( count == 0 ) { | | 73 | if ( count == 0 ) { |
75 | z = a; | | 74 | z = a; |
76 | } | | 75 | } |
77 | else if ( count < 64 ) { | | 76 | else if ( count < 64 ) { |
78 | z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 ); | | 77 | z = ( a>>count ) | ( ( a<<( ( - count ) & 63 ) ) != 0 ); |
79 | } | | 78 | } |
80 | else { | | 79 | else { |
81 | z = ( a != 0 ); | | 80 | z = ( a != 0 ); |
82 | } | | 81 | } |
83 | *zPtr = z; | | 82 | *zPtr = z; |
84 | | | 83 | |
85 | } | | 84 | } |
86 | | | 85 | |
87 | /* | | 86 | /*---------------------------------------------------------------------------- |
88 | ------------------------------------------------------------------------------- | | 87 | | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64 |
89 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64 | | 88 | | _plus_ the number of bits given in `count'. The shifted result is at most |
90 | _plus_ the number of bits given in `count'. The shifted result is at most | | 89 | | 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The |
91 | 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'. The | | 90 | | bits shifted off form a second 64-bit result as follows: The _last_ bit |
92 | bits shifted off form a second 64-bit result as follows: The _last_ bit | | 91 | | shifted off is the most-significant bit of the extra result, and the other |
93 | shifted off is the most-significant bit of the extra result, and the other | | 92 | | 63 bits of the extra result are all zero if and only if _all_but_the_last_ |
94 | 63 bits of the extra result are all zero if and only if _all_but_the_last_ | | 93 | | bits shifted off were all zero. This extra result is stored in the location |
95 | bits shifted off were all zero. This extra result is stored in the location | | 94 | | pointed to by `z1Ptr'. The value of `count' can be arbitrarily large. |
96 | pointed to by `z1Ptr'. The value of `count' can be arbitrarily large. | | 95 | | (This routine makes more sense if `a0' and `a1' are considered to form |
97 | (This routine makes more sense if `a0' and `a1' are considered to form a | | 96 | | a fixed-point value with binary point between `a0' and `a1'. This fixed- |
98 | fixed-point value with binary point between `a0' and `a1'. This fixed-point | | 97 | | point value is shifted right by the number of bits given in `count', and |
99 | value is shifted right by the number of bits given in `count', and the | | 98 | | the integer part of the result is returned at the location pointed to by |
100 | integer part of the result is returned at the location pointed to by | | 99 | | `z0Ptr'. The fractional part of the result may be slightly corrupted as |
101 | `z0Ptr'. The fractional part of the result may be slightly corrupted as | | 100 | | described above, and is returned at the location pointed to by `z1Ptr'.) |
102 | described above, and is returned at the location pointed to by `z1Ptr'.) | | 101 | *----------------------------------------------------------------------------*/ |
103 | ------------------------------------------------------------------------------- | | 102 | |
104 | */ | | | |
105 | INLINE void | | 103 | INLINE void |
106 | shift64ExtraRightJamming( | | 104 | shift64ExtraRightJamming( |
107 | bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) | | 105 | bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) |
108 | { | | 106 | { |
109 | bits64 z0, z1; | | 107 | bits64 z0, z1; |
110 | int8 negCount = ( - count ) & 63; | | 108 | int8 negCount = ( - count ) & 63; |
111 | | | 109 | |
112 | if ( count == 0 ) { | | 110 | if ( count == 0 ) { |
113 | z1 = a1; | | 111 | z1 = a1; |
114 | z0 = a0; | | 112 | z0 = a0; |
115 | } | | 113 | } |
116 | else if ( count < 64 ) { | | 114 | else if ( count < 64 ) { |
117 | z1 = ( a0<<negCount ) | ( a1 != 0 ); | | 115 | z1 = ( a0<<negCount ) | ( a1 != 0 ); |
| @@ -121,71 +119,69 @@ INLINE void | | | @@ -121,71 +119,69 @@ INLINE void |
121 | if ( count == 64 ) { | | 119 | if ( count == 64 ) { |
122 | z1 = a0 | ( a1 != 0 ); | | 120 | z1 = a0 | ( a1 != 0 ); |
123 | } | | 121 | } |
124 | else { | | 122 | else { |
125 | z1 = ( ( a0 | a1 ) != 0 ); | | 123 | z1 = ( ( a0 | a1 ) != 0 ); |
126 | } | | 124 | } |
127 | z0 = 0; | | 125 | z0 = 0; |
128 | } | | 126 | } |
129 | *z1Ptr = z1; | | 127 | *z1Ptr = z1; |
130 | *z0Ptr = z0; | | 128 | *z0Ptr = z0; |
131 | | | 129 | |
132 | } | | 130 | } |
133 | | | 131 | |
134 | /* | | 132 | /*---------------------------------------------------------------------------- |
135 | ------------------------------------------------------------------------------- | | 133 | | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the |
136 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the | | 134 | | number of bits given in `count'. Any bits shifted off are lost. The value |
137 | number of bits given in `count'. Any bits shifted off are lost. The value | | 135 | | of `count' can be arbitrarily large; in particular, if `count' is greater |
138 | of `count' can be arbitrarily large; in particular, if `count' is greater | | 136 | | than 128, the result will be 0. The result is broken into two 64-bit pieces |
139 | than 128, the result will be 0. The result is broken into two 64-bit pieces | | 137 | | which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. |
140 | which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. | | 138 | *----------------------------------------------------------------------------*/ |
141 | ------------------------------------------------------------------------------- | | 139 | |
142 | */ | | | |
143 | INLINE void | | 140 | INLINE void |
144 | shift128Right( | | 141 | shift128Right( |
145 | bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) | | 142 | bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) |
146 | { | | 143 | { |
147 | bits64 z0, z1; | | 144 | bits64 z0, z1; |
148 | int8 negCount = ( - count ) & 63; | | 145 | int8 negCount = ( - count ) & 63; |
149 | | | 146 | |
150 | if ( count == 0 ) { | | 147 | if ( count == 0 ) { |
151 | z1 = a1; | | 148 | z1 = a1; |
152 | z0 = a0; | | 149 | z0 = a0; |
153 | } | | 150 | } |
154 | else if ( count < 64 ) { | | 151 | else if ( count < 64 ) { |
155 | z1 = ( a0<<negCount ) | ( a1>>count ); | | 152 | z1 = ( a0<<negCount ) | ( a1>>count ); |
156 | z0 = a0>>count; | | 153 | z0 = a0>>count; |
157 | } | | 154 | } |
158 | else { | | 155 | else { |
159 | z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0; | | 156 | z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0; |
160 | z0 = 0; | | 157 | z0 = 0; |
161 | } | | 158 | } |
162 | *z1Ptr = z1; | | 159 | *z1Ptr = z1; |
163 | *z0Ptr = z0; | | 160 | *z0Ptr = z0; |
164 | | | 161 | |
165 | } | | 162 | } |
166 | | | 163 | |
167 | /* | | 164 | /*---------------------------------------------------------------------------- |
168 | ------------------------------------------------------------------------------- | | 165 | | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the |
169 | Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the | | 166 | | number of bits given in `count'. If any nonzero bits are shifted off, they |
170 | number of bits given in `count'. If any nonzero bits are shifted off, they | | 167 | | are ``jammed'' into the least significant bit of the result by setting the |
171 | are ``jammed'' into the least significant bit of the result by setting the | | 168 | | least significant bit to 1. The value of `count' can be arbitrarily large; |
172 | least significant bit to 1. The value of `count' can be arbitrarily large; | | 169 | | in particular, if `count' is greater than 128, the result will be either |
173 | in particular, if `count' is greater than 128, the result will be either | | 170 | | 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or |
174 | 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or | | 171 | | nonzero. The result is broken into two 64-bit pieces which are stored at |
175 | nonzero. The result is broken into two 64-bit pieces which are stored at | | 172 | | the locations pointed to by `z0Ptr' and `z1Ptr'. |
176 | the locations pointed to by `z0Ptr' and `z1Ptr'. | | 173 | *----------------------------------------------------------------------------*/ |
177 | ------------------------------------------------------------------------------- | | 174 | |
178 | */ | | | |
179 | INLINE void | | 175 | INLINE void |
180 | shift128RightJamming( | | 176 | shift128RightJamming( |
181 | bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) | | 177 | bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) |
182 | { | | 178 | { |
183 | bits64 z0, z1; | | 179 | bits64 z0, z1; |
184 | int8 negCount = ( - count ) & 63; | | 180 | int8 negCount = ( - count ) & 63; |
185 | | | 181 | |
186 | if ( count == 0 ) { | | 182 | if ( count == 0 ) { |
187 | z1 = a1; | | 183 | z1 = a1; |
188 | z0 = a0; | | 184 | z0 = a0; |
189 | } | | 185 | } |
190 | else if ( count < 64 ) { | | 186 | else if ( count < 64 ) { |
191 | z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 ); | | 187 | z1 = ( a0<<negCount ) | ( a1>>count ) | ( ( a1<<negCount ) != 0 ); |
| @@ -198,46 +194,45 @@ INLINE void | | | @@ -198,46 +194,45 @@ INLINE void |
198 | else if ( count < 128 ) { | | 194 | else if ( count < 128 ) { |
199 | z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 ); | | 195 | z1 = ( a0>>( count & 63 ) ) | ( ( ( a0<<negCount ) | a1 ) != 0 ); |
200 | } | | 196 | } |
201 | else { | | 197 | else { |
202 | z1 = ( ( a0 | a1 ) != 0 ); | | 198 | z1 = ( ( a0 | a1 ) != 0 ); |
203 | } | | 199 | } |
204 | z0 = 0; | | 200 | z0 = 0; |
205 | } | | 201 | } |
206 | *z1Ptr = z1; | | 202 | *z1Ptr = z1; |
207 | *z0Ptr = z0; | | 203 | *z0Ptr = z0; |
208 | | | 204 | |
209 | } | | 205 | } |
210 | | | 206 | |
211 | /* | | 207 | /*---------------------------------------------------------------------------- |
212 | ------------------------------------------------------------------------------- | | 208 | | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right |
213 | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right | | 209 | | by 64 _plus_ the number of bits given in `count'. The shifted result is |
214 | by 64 _plus_ the number of bits given in `count'. The shifted result is | | 210 | | at most 128 nonzero bits; these are broken into two 64-bit pieces which are |
215 | at most 128 nonzero bits; these are broken into two 64-bit pieces which are | | 211 | | stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted |
216 | stored at the locations pointed to by `z0Ptr' and `z1Ptr'. The bits shifted | | 212 | | off form a third 64-bit result as follows: The _last_ bit shifted off is |
217 | off form a third 64-bit result as follows: The _last_ bit shifted off is | | 213 | | the most-significant bit of the extra result, and the other 63 bits of the |
218 | the most-significant bit of the extra result, and the other 63 bits of the | | 214 | | extra result are all zero if and only if _all_but_the_last_ bits shifted off |
219 | extra result are all zero if and only if _all_but_the_last_ bits shifted off | | 215 | | were all zero. This extra result is stored in the location pointed to by |
220 | were all zero. This extra result is stored in the location pointed to by | | 216 | | `z2Ptr'. The value of `count' can be arbitrarily large. |
221 | `z2Ptr'. The value of `count' can be arbitrarily large. | | 217 | | (This routine makes more sense if `a0', `a1', and `a2' are considered |
222 | (This routine makes more sense if `a0', `a1', and `a2' are considered | | 218 | | to form a fixed-point value with binary point between `a1' and `a2'. This |
223 | to form a fixed-point value with binary point between `a1' and `a2'. This | | 219 | | fixed-point value is shifted right by the number of bits given in `count', |
224 | fixed-point value is shifted right by the number of bits given in `count', | | 220 | | and the integer part of the result is returned at the locations pointed to |
225 | and the integer part of the result is returned at the locations pointed to | | 221 | | by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly |
226 | by `z0Ptr' and `z1Ptr'. The fractional part of the result may be slightly | | 222 | | corrupted as described above, and is returned at the location pointed to by |
227 | corrupted as described above, and is returned at the location pointed to by | | 223 | | `z2Ptr'.) |
228 | `z2Ptr'.) | | 224 | *----------------------------------------------------------------------------*/ |
229 | ------------------------------------------------------------------------------- | | 225 | |
230 | */ | | | |
231 | INLINE void | | 226 | INLINE void |
232 | shift128ExtraRightJamming( | | 227 | shift128ExtraRightJamming( |
233 | bits64 a0, | | 228 | bits64 a0, |
234 | bits64 a1, | | 229 | bits64 a1, |
235 | bits64 a2, | | 230 | bits64 a2, |
236 | int16 count, | | 231 | int16 count, |
237 | bits64 *z0Ptr, | | 232 | bits64 *z0Ptr, |
238 | bits64 *z1Ptr, | | 233 | bits64 *z1Ptr, |
239 | bits64 *z2Ptr | | 234 | bits64 *z2Ptr |
240 | ) | | 235 | ) |
241 | { | | 236 | { |
242 | bits64 z0, z1, z2; | | 237 | bits64 z0, z1, z2; |
243 | int8 negCount = ( - count ) & 63; | | 238 | int8 negCount = ( - count ) & 63; |
| @@ -269,54 +264,52 @@ INLINE void | | | @@ -269,54 +264,52 @@ INLINE void |
269 | z1 = 0; | | 264 | z1 = 0; |
270 | } | | 265 | } |
271 | } | | 266 | } |
272 | z0 = 0; | | 267 | z0 = 0; |
273 | } | | 268 | } |
274 | z2 |= ( a2 != 0 ); | | 269 | z2 |= ( a2 != 0 ); |
275 | } | | 270 | } |
276 | *z2Ptr = z2; | | 271 | *z2Ptr = z2; |
277 | *z1Ptr = z1; | | 272 | *z1Ptr = z1; |
278 | *z0Ptr = z0; | | 273 | *z0Ptr = z0; |
279 | | | 274 | |
280 | } | | 275 | } |
281 | | | 276 | |
282 | /* | | 277 | /*---------------------------------------------------------------------------- |
283 | ------------------------------------------------------------------------------- | | 278 | | Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the |
284 | Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the | | 279 | | number of bits given in `count'. Any bits shifted off are lost. The value |
285 | number of bits given in `count'. Any bits shifted off are lost. The value | | 280 | | of `count' must be less than 64. The result is broken into two 64-bit |
286 | of `count' must be less than 64. The result is broken into two 64-bit | | 281 | | pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. |
287 | pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. | | 282 | *----------------------------------------------------------------------------*/ |
288 | ------------------------------------------------------------------------------- | | 283 | |
289 | */ | | | |
290 | INLINE void | | 284 | INLINE void |
291 | shortShift128Left( | | 285 | shortShift128Left( |
292 | bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) | | 286 | bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) |
293 | { | | 287 | { |
294 | | | 288 | |
295 | *z1Ptr = a1<<count; | | 289 | *z1Ptr = a1<<count; |
296 | *z0Ptr = | | 290 | *z0Ptr = |
297 | ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) ); | | 291 | ( count == 0 ) ? a0 : ( a0<<count ) | ( a1>>( ( - count ) & 63 ) ); |
298 | | | 292 | |
299 | } | | 293 | } |
300 | | | 294 | |
301 | /* | | 295 | /*---------------------------------------------------------------------------- |
302 | ------------------------------------------------------------------------------- | | 296 | | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left |
303 | Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' left | | 297 | | by the number of bits given in `count'. Any bits shifted off are lost. |
304 | by the number of bits given in `count'. Any bits shifted off are lost. | | 298 | | The value of `count' must be less than 64. The result is broken into three |
305 | The value of `count' must be less than 64. The result is broken into three | | 299 | | 64-bit pieces which are stored at the locations pointed to by `z0Ptr', |
306 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr', | | 300 | | `z1Ptr', and `z2Ptr'. |
307 | `z1Ptr', and `z2Ptr'. | | 301 | *----------------------------------------------------------------------------*/ |
308 | ------------------------------------------------------------------------------- | | 302 | |
309 | */ | | | |
310 | INLINE void | | 303 | INLINE void |
311 | shortShift192Left( | | 304 | shortShift192Left( |
312 | bits64 a0, | | 305 | bits64 a0, |
313 | bits64 a1, | | 306 | bits64 a1, |
314 | bits64 a2, | | 307 | bits64 a2, |
315 | int16 count, | | 308 | int16 count, |
316 | bits64 *z0Ptr, | | 309 | bits64 *z0Ptr, |
317 | bits64 *z1Ptr, | | 310 | bits64 *z1Ptr, |
318 | bits64 *z2Ptr | | 311 | bits64 *z2Ptr |
319 | ) | | 312 | ) |
320 | { | | 313 | { |
321 | bits64 z0, z1, z2; | | 314 | bits64 z0, z1, z2; |
322 | int8 negCount; | | 315 | int8 negCount; |
| @@ -325,55 +318,53 @@ INLINE void | | | @@ -325,55 +318,53 @@ INLINE void |
325 | z1 = a1<<count; | | 318 | z1 = a1<<count; |
326 | z0 = a0<<count; | | 319 | z0 = a0<<count; |
327 | if ( 0 < count ) { | | 320 | if ( 0 < count ) { |
328 | negCount = ( ( - count ) & 63 ); | | 321 | negCount = ( ( - count ) & 63 ); |
329 | z1 |= a2>>negCount; | | 322 | z1 |= a2>>negCount; |
330 | z0 |= a1>>negCount; | | 323 | z0 |= a1>>negCount; |
331 | } | | 324 | } |
332 | *z2Ptr = z2; | | 325 | *z2Ptr = z2; |
333 | *z1Ptr = z1; | | 326 | *z1Ptr = z1; |
334 | *z0Ptr = z0; | | 327 | *z0Ptr = z0; |
335 | | | 328 | |
336 | } | | 329 | } |
337 | | | 330 | |
338 | /* | | 331 | /*---------------------------------------------------------------------------- |
339 | ------------------------------------------------------------------------------- | | 332 | | Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit |
340 | Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit | | 333 | | value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so |
341 | value formed by concatenating `b0' and `b1'. Addition is modulo 2^128, so | | 334 | | any carry out is lost. The result is broken into two 64-bit pieces which |
342 | any carry out is lost. The result is broken into two 64-bit pieces which | | 335 | | are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. |
343 | are stored at the locations pointed to by `z0Ptr' and `z1Ptr'. | | 336 | *----------------------------------------------------------------------------*/ |
344 | ------------------------------------------------------------------------------- | | 337 | |
345 | */ | | | |
346 | INLINE void | | 338 | INLINE void |
347 | add128( | | 339 | add128( |
348 | bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr ) | | 340 | bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr ) |
349 | { | | 341 | { |
350 | bits64 z1; | | 342 | bits64 z1; |
351 | | | 343 | |
352 | z1 = a1 + b1; | | 344 | z1 = a1 + b1; |
353 | *z1Ptr = z1; | | 345 | *z1Ptr = z1; |
354 | *z0Ptr = a0 + b0 + ( z1 < a1 ); | | 346 | *z0Ptr = a0 + b0 + ( z1 < a1 ); |
355 | | | 347 | |
356 | } | | 348 | } |
357 | | | 349 | |
358 | /* | | 350 | /*---------------------------------------------------------------------------- |
359 | ------------------------------------------------------------------------------- | | 351 | | Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the |
360 | Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the | | 352 | | 192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is |
361 | 192-bit value formed by concatenating `b0', `b1', and `b2'. Addition is | | 353 | | modulo 2^192, so any carry out is lost. The result is broken into three |
362 | modulo 2^192, so any carry out is lost. The result is broken into three | | 354 | | 64-bit pieces which are stored at the locations pointed to by `z0Ptr', |
363 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr', | | 355 | | `z1Ptr', and `z2Ptr'. |
364 | `z1Ptr', and `z2Ptr'. | | 356 | *----------------------------------------------------------------------------*/ |
365 | ------------------------------------------------------------------------------- | | 357 | |
366 | */ | | | |
367 | INLINE void | | 358 | INLINE void |
368 | add192( | | 359 | add192( |
369 | bits64 a0, | | 360 | bits64 a0, |
370 | bits64 a1, | | 361 | bits64 a1, |
371 | bits64 a2, | | 362 | bits64 a2, |
372 | bits64 b0, | | 363 | bits64 b0, |
373 | bits64 b1, | | 364 | bits64 b1, |
374 | bits64 b2, | | 365 | bits64 b2, |
375 | bits64 *z0Ptr, | | 366 | bits64 *z0Ptr, |
376 | bits64 *z1Ptr, | | 367 | bits64 *z1Ptr, |
377 | bits64 *z2Ptr | | 368 | bits64 *z2Ptr |
378 | ) | | 369 | ) |
379 | { | | 370 | { |
| @@ -384,54 +375,52 @@ INLINE void | | | @@ -384,54 +375,52 @@ INLINE void |
384 | carry1 = ( z2 < a2 ); | | 375 | carry1 = ( z2 < a2 ); |
385 | z1 = a1 + b1; | | 376 | z1 = a1 + b1; |
386 | carry0 = ( z1 < a1 ); | | 377 | carry0 = ( z1 < a1 ); |
387 | z0 = a0 + b0; | | 378 | z0 = a0 + b0; |
388 | z1 += carry1; | | 379 | z1 += carry1; |
389 | z0 += ( z1 < carry1 ); | | 380 | z0 += ( z1 < carry1 ); |
390 | z0 += carry0; | | 381 | z0 += carry0; |
391 | *z2Ptr = z2; | | 382 | *z2Ptr = z2; |
392 | *z1Ptr = z1; | | 383 | *z1Ptr = z1; |
393 | *z0Ptr = z0; | | 384 | *z0Ptr = z0; |
394 | | | 385 | |
395 | } | | 386 | } |
396 | | | 387 | |
397 | /* | | 388 | /*---------------------------------------------------------------------------- |
398 | ------------------------------------------------------------------------------- | | 389 | | Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the |
399 | Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the | | 390 | | 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo |
400 | 128-bit value formed by concatenating `a0' and `a1'. Subtraction is modulo | | 391 | | 2^128, so any borrow out (carry out) is lost. The result is broken into two |
401 | 2^128, so any borrow out (carry out) is lost. The result is broken into two | | 392 | | 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and |
402 | 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and | | 393 | | `z1Ptr'. |
403 | `z1Ptr'. | | 394 | *----------------------------------------------------------------------------*/ |
404 | ------------------------------------------------------------------------------- | | 395 | |
405 | */ | | | |
406 | INLINE void | | 396 | INLINE void |
407 | sub128( | | 397 | sub128( |
408 | bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr ) | | 398 | bits64 a0, bits64 a1, bits64 b0, bits64 b1, bits64 *z0Ptr, bits64 *z1Ptr ) |
409 | { | | 399 | { |
410 | | | 400 | |
411 | *z1Ptr = a1 - b1; | | 401 | *z1Ptr = a1 - b1; |
412 | *z0Ptr = a0 - b0 - ( a1 < b1 ); | | 402 | *z0Ptr = a0 - b0 - ( a1 < b1 ); |
413 | | | 403 | |
414 | } | | 404 | } |
415 | | | 405 | |
416 | /* | | 406 | /*---------------------------------------------------------------------------- |
417 | ------------------------------------------------------------------------------- | | 407 | | Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2' |
418 | Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2' | | 408 | | from the 192-bit value formed by concatenating `a0', `a1', and `a2'. |
419 | from the 192-bit value formed by concatenating `a0', `a1', and `a2'. | | 409 | | Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The |
420 | Subtraction is modulo 2^192, so any borrow out (carry out) is lost. The | | 410 | | result is broken into three 64-bit pieces which are stored at the locations |
421 | result is broken into three 64-bit pieces which are stored at the locations | | 411 | | pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'. |
422 | pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'. | | 412 | *----------------------------------------------------------------------------*/ |
423 | ------------------------------------------------------------------------------- | | 413 | |
424 | */ | | | |
425 | INLINE void | | 414 | INLINE void |
426 | sub192( | | 415 | sub192( |
427 | bits64 a0, | | 416 | bits64 a0, |
428 | bits64 a1, | | 417 | bits64 a1, |
429 | bits64 a2, | | 418 | bits64 a2, |
430 | bits64 b0, | | 419 | bits64 b0, |
431 | bits64 b1, | | 420 | bits64 b1, |
432 | bits64 b2, | | 421 | bits64 b2, |
433 | bits64 *z0Ptr, | | 422 | bits64 *z0Ptr, |
434 | bits64 *z1Ptr, | | 423 | bits64 *z1Ptr, |
435 | bits64 *z2Ptr | | 424 | bits64 *z2Ptr |
436 | ) | | 425 | ) |
437 | { | | 426 | { |
| @@ -442,93 +431,90 @@ INLINE void | | | @@ -442,93 +431,90 @@ INLINE void |
442 | borrow1 = ( a2 < b2 ); | | 431 | borrow1 = ( a2 < b2 ); |
443 | z1 = a1 - b1; | | 432 | z1 = a1 - b1; |
444 | borrow0 = ( a1 < b1 ); | | 433 | borrow0 = ( a1 < b1 ); |
445 | z0 = a0 - b0; | | 434 | z0 = a0 - b0; |
446 | z0 -= ( z1 < borrow1 ); | | 435 | z0 -= ( z1 < borrow1 ); |
447 | z1 -= borrow1; | | 436 | z1 -= borrow1; |
448 | z0 -= borrow0; | | 437 | z0 -= borrow0; |
449 | *z2Ptr = z2; | | 438 | *z2Ptr = z2; |
450 | *z1Ptr = z1; | | 439 | *z1Ptr = z1; |
451 | *z0Ptr = z0; | | 440 | *z0Ptr = z0; |
452 | | | 441 | |
453 | } | | 442 | } |
454 | | | 443 | |
455 | /* | | 444 | /*---------------------------------------------------------------------------- |
456 | ------------------------------------------------------------------------------- | | 445 | | Multiplies `a' by `b' to obtain a 128-bit product. The product is broken |
457 | Multiplies `a' by `b' to obtain a 128-bit product. The product is broken | | 446 | | into two 64-bit pieces which are stored at the locations pointed to by |
458 | into two 64-bit pieces which are stored at the locations pointed to by | | 447 | | `z0Ptr' and `z1Ptr'. |
459 | `z0Ptr' and `z1Ptr'. | | 448 | *----------------------------------------------------------------------------*/ |
460 | ------------------------------------------------------------------------------- | | 449 | |
461 | */ | | | |
462 | INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr ) | | 450 | INLINE void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr ) |
463 | { | | 451 | { |
464 | bits32 aHigh, aLow, bHigh, bLow; | | 452 | bits32 aHigh, aLow, bHigh, bLow; |
465 | bits64 z0, zMiddleA, zMiddleB, z1; | | 453 | bits64 z0, zMiddleA, zMiddleB, z1; |
466 | | | 454 | |
467 | aLow = a; | | 455 | aLow = a; |
468 | aHigh = a>>32; | | 456 | aHigh = a>>32; |
469 | bLow = b; | | 457 | bLow = b; |
470 | bHigh = b>>32; | | 458 | bHigh = b>>32; |
471 | z1 = ( (bits64) aLow ) * bLow; | | 459 | z1 = ( (bits64) aLow ) * bLow; |
472 | zMiddleA = ( (bits64) aLow ) * bHigh; | | 460 | zMiddleA = ( (bits64) aLow ) * bHigh; |
473 | zMiddleB = ( (bits64) aHigh ) * bLow; | | 461 | zMiddleB = ( (bits64) aHigh ) * bLow; |
474 | z0 = ( (bits64) aHigh ) * bHigh; | | 462 | z0 = ( (bits64) aHigh ) * bHigh; |
475 | zMiddleA += zMiddleB; | | 463 | zMiddleA += zMiddleB; |
476 | z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 ); | | 464 | z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 ); |
477 | zMiddleA <<= 32; | | 465 | zMiddleA <<= 32; |
478 | z1 += zMiddleA; | | 466 | z1 += zMiddleA; |
479 | z0 += ( z1 < zMiddleA ); | | 467 | z0 += ( z1 < zMiddleA ); |
480 | *z1Ptr = z1; | | 468 | *z1Ptr = z1; |
481 | *z0Ptr = z0; | | 469 | *z0Ptr = z0; |
482 | | | 470 | |
483 | } | | 471 | } |
484 | | | 472 | |
485 | /* | | 473 | /*---------------------------------------------------------------------------- |
486 | ------------------------------------------------------------------------------- | | 474 | | Multiplies the 128-bit value formed by concatenating `a0' and `a1' by |
487 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' by | | 475 | | `b' to obtain a 192-bit product. The product is broken into three 64-bit |
488 | `b' to obtain a 192-bit product. The product is broken into three 64-bit | | 476 | | pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and |
489 | pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and | | 477 | | `z2Ptr'. |
490 | `z2Ptr'. | | 478 | *----------------------------------------------------------------------------*/ |
491 | ------------------------------------------------------------------------------- | | 479 | |
492 | */ | | | |
493 | INLINE void | | 480 | INLINE void |
494 | mul128By64To192( | | 481 | mul128By64To192( |
495 | bits64 a0, | | 482 | bits64 a0, |
496 | bits64 a1, | | 483 | bits64 a1, |
497 | bits64 b, | | 484 | bits64 b, |
498 | bits64 *z0Ptr, | | 485 | bits64 *z0Ptr, |
499 | bits64 *z1Ptr, | | 486 | bits64 *z1Ptr, |
500 | bits64 *z2Ptr | | 487 | bits64 *z2Ptr |
501 | ) | | 488 | ) |
502 | { | | 489 | { |
503 | bits64 z0, z1, z2, more1; | | 490 | bits64 z0, z1, z2, more1; |
504 | | | 491 | |
505 | mul64To128( a1, b, &z1, &z2 ); | | 492 | mul64To128( a1, b, &z1, &z2 ); |
506 | mul64To128( a0, b, &z0, &more1 ); | | 493 | mul64To128( a0, b, &z0, &more1 ); |
507 | add128( z0, more1, 0, z1, &z0, &z1 ); | | 494 | add128( z0, more1, 0, z1, &z0, &z1 ); |
508 | *z2Ptr = z2; | | 495 | *z2Ptr = z2; |
509 | *z1Ptr = z1; | | 496 | *z1Ptr = z1; |
510 | *z0Ptr = z0; | | 497 | *z0Ptr = z0; |
511 | | | 498 | |
512 | } | | 499 | } |
513 | | | 500 | |
514 | /* | | 501 | /*---------------------------------------------------------------------------- |
515 | ------------------------------------------------------------------------------- | | 502 | | Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the |
516 | Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the | | 503 | | 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit |
517 | 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit | | 504 | | product. The product is broken into four 64-bit pieces which are stored at |
518 | product. The product is broken into four 64-bit pieces which are stored at | | 505 | | the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'. |
519 | the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'. | | 506 | *----------------------------------------------------------------------------*/ |
520 | ------------------------------------------------------------------------------- | | 507 | |
521 | */ | | | |
522 | INLINE void | | 508 | INLINE void |
523 | mul128To256( | | 509 | mul128To256( |
524 | bits64 a0, | | 510 | bits64 a0, |
525 | bits64 a1, | | 511 | bits64 a1, |
526 | bits64 b0, | | 512 | bits64 b0, |
527 | bits64 b1, | | 513 | bits64 b1, |
528 | bits64 *z0Ptr, | | 514 | bits64 *z0Ptr, |
529 | bits64 *z1Ptr, | | 515 | bits64 *z1Ptr, |
530 | bits64 *z2Ptr, | | 516 | bits64 *z2Ptr, |
531 | bits64 *z3Ptr | | 517 | bits64 *z3Ptr |
532 | ) | | 518 | ) |
533 | { | | 519 | { |
534 | bits64 z0, z1, z2, z3; | | 520 | bits64 z0, z1, z2, z3; |
| @@ -539,70 +525,68 @@ INLINE void | | | @@ -539,70 +525,68 @@ INLINE void |
539 | add128( z1, more2, 0, z2, &z1, &z2 ); | | 525 | add128( z1, more2, 0, z2, &z1, &z2 ); |
540 | mul64To128( a0, b0, &z0, &more1 ); | | 526 | mul64To128( a0, b0, &z0, &more1 ); |
541 | add128( z0, more1, 0, z1, &z0, &z1 ); | | 527 | add128( z0, more1, 0, z1, &z0, &z1 ); |
542 | mul64To128( a0, b1, &more1, &more2 ); | | 528 | mul64To128( a0, b1, &more1, &more2 ); |
543 | add128( more1, more2, 0, z2, &more1, &z2 ); | | 529 | add128( more1, more2, 0, z2, &more1, &z2 ); |
544 | add128( z0, z1, 0, more1, &z0, &z1 ); | | 530 | add128( z0, z1, 0, more1, &z0, &z1 ); |
545 | *z3Ptr = z3; | | 531 | *z3Ptr = z3; |
546 | *z2Ptr = z2; | | 532 | *z2Ptr = z2; |
547 | *z1Ptr = z1; | | 533 | *z1Ptr = z1; |
548 | *z0Ptr = z0; | | 534 | *z0Ptr = z0; |
549 | | | 535 | |
550 | } | | 536 | } |
551 | | | 537 | |
552 | /* | | 538 | /*---------------------------------------------------------------------------- |
553 | ------------------------------------------------------------------------------- | | 539 | | Returns an approximation to the 64-bit integer quotient obtained by dividing |
554 | Returns an approximation to the 64-bit integer quotient obtained by dividing | | 540 | | `b' into the 128-bit value formed by concatenating `a0' and `a1'. The |
555 | `b' into the 128-bit value formed by concatenating `a0' and `a1'. The | | 541 | | divisor `b' must be at least 2^63. If q is the exact quotient truncated |
556 | divisor `b' must be at least 2^63. If q is the exact quotient truncated | | 542 | | toward zero, the approximation returned lies between q and q + 2 inclusive. |
557 | toward zero, the approximation returned lies between q and q + 2 inclusive. | | 543 | | If the exact quotient q is larger than 64 bits, the maximum positive 64-bit |
558 | If the exact quotient q is larger than 64 bits, the maximum positive 64-bit | | 544 | | unsigned integer is returned. |
559 | unsigned integer is returned. | | 545 | *----------------------------------------------------------------------------*/ |
560 | ------------------------------------------------------------------------------- | | 546 | |
561 | */ | | | |
562 | static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b ) | | 547 | static bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b ) |
563 | { | | 548 | { |
564 | bits64 b0, b1; | | 549 | bits64 b0, b1; |
565 | bits64 rem0, rem1, term0, term1; | | 550 | bits64 rem0, rem1, term0, term1; |
566 | bits64 z; | | 551 | bits64 z; |
567 | | | 552 | |
568 | if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF ); | | 553 | if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF ); |
569 | b0 = b>>32; | | 554 | b0 = b>>32; |
570 | z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32; | | 555 | z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32; |
571 | mul64To128( b, z, &term0, &term1 ); | | 556 | mul64To128( b, z, &term0, &term1 ); |
572 | sub128( a0, a1, term0, term1, &rem0, &rem1 ); | | 557 | sub128( a0, a1, term0, term1, &rem0, &rem1 ); |
573 | while ( ( (sbits64) rem0 ) < 0 ) { | | 558 | while ( ( (sbits64) rem0 ) < 0 ) { |
574 | z -= LIT64( 0x100000000 ); | | 559 | z -= LIT64( 0x100000000 ); |
575 | b1 = b<<32; | | 560 | b1 = b<<32; |
576 | add128( rem0, rem1, b0, b1, &rem0, &rem1 ); | | 561 | add128( rem0, rem1, b0, b1, &rem0, &rem1 ); |
577 | } | | 562 | } |
578 | rem0 = ( rem0<<32 ) | ( rem1>>32 ); | | 563 | rem0 = ( rem0<<32 ) | ( rem1>>32 ); |
579 | z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0; | | 564 | z |= ( b0<<32 <= rem0 ) ? 0xFFFFFFFF : rem0 / b0; |
580 | return z; | | 565 | return z; |
581 | | | 566 | |
582 | } | | 567 | } |
583 | | | 568 | |
584 | #ifndef SOFTFLOAT_FOR_GCC /* Not used */ | | 569 | #ifndef SOFTFLOAT_FOR_GCC /* Not used */ |
585 | /* | | 570 | /*---------------------------------------------------------------------------- |
586 | ------------------------------------------------------------------------------- | | 571 | | Returns an approximation to the square root of the 32-bit significand given |
587 | Returns an approximation to the square root of the 32-bit significand given | | 572 | | by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of |
588 | by `a'. Considered as an integer, `a' must be at least 2^31. If bit 0 of | | 573 | | `aExp' (the least significant bit) is 1, the integer returned approximates |
589 | `aExp' (the least significant bit) is 1, the integer returned approximates | | 574 | | 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp' |
590 | 2^31*sqrt(`a'/2^31), where `a' is considered an integer. If bit 0 of `aExp' | | 575 | | is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either |
591 | is 0, the integer returned approximates 2^31*sqrt(`a'/2^30). In either | | 576 | | case, the approximation returned lies strictly within +/-2 of the exact |
592 | case, the approximation returned lies strictly within +/-2 of the exact | | 577 | | value. |
593 | value. | | 578 | *----------------------------------------------------------------------------*/ |
594 | ------------------------------------------------------------------------------- | | 579 | |
595 | */ | | | |
596 | static bits32 estimateSqrt32( int16 aExp, bits32 a ) | | 580 | static bits32 estimateSqrt32( int16 aExp, bits32 a ) |
597 | { | | 581 | { |
598 | static const bits16 sqrtOddAdjustments[] = { | | 582 | static const bits16 sqrtOddAdjustments[] = { |
599 | 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0, | | 583 | 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0, |
600 | 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67 | | 584 | 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67 |
601 | }; | | 585 | }; |
602 | static const bits16 sqrtEvenAdjustments[] = { | | 586 | static const bits16 sqrtEvenAdjustments[] = { |
603 | 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E, | | 587 | 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E, |
604 | 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002 | | 588 | 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002 |
605 | }; | | 589 | }; |
606 | int8 index; | | 590 | int8 index; |
607 | bits32 z; | | 591 | bits32 z; |
608 | | | 592 | |
| @@ -613,32 +597,31 @@ static bits32 estimateSqrt32( int16 aExp | | | @@ -613,32 +597,31 @@ static bits32 estimateSqrt32( int16 aExp |
613 | a >>= 1; | | 597 | a >>= 1; |
614 | } | | 598 | } |
615 | else { | | 599 | else { |
616 | z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ]; | | 600 | z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ]; |
617 | z = a / z + z; | | 601 | z = a / z + z; |
618 | z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 ); | | 602 | z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 ); |
619 | if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 ); | | 603 | if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 ); |
620 | } | | 604 | } |
621 | return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 ); | | 605 | return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 ); |
622 | | | 606 | |
623 | } | | 607 | } |
624 | #endif | | 608 | #endif |
625 | | | 609 | |
626 | /* | | 610 | /*---------------------------------------------------------------------------- |
627 | ------------------------------------------------------------------------------- | | 611 | | Returns the number of leading 0 bits before the most-significant 1 bit of |
628 | Returns the number of leading 0 bits before the most-significant 1 bit of | | 612 | | `a'. If `a' is zero, 32 is returned. |
629 | `a'. If `a' is zero, 32 is returned. | | 613 | *----------------------------------------------------------------------------*/ |
630 | ------------------------------------------------------------------------------- | | 614 | |
631 | */ | | | |
632 | static int8 countLeadingZeros32( bits32 a ) | | 615 | static int8 countLeadingZeros32( bits32 a ) |
633 | { | | 616 | { |
634 | static const int8 countLeadingZerosHigh[] = { | | 617 | static const int8 countLeadingZerosHigh[] = { |
635 | 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, | | 618 | 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, |
636 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, | | 619 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, |
637 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | | 620 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
638 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, | | 621 | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
639 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | | 622 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
640 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | | 623 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
641 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | | 624 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
642 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, | | 625 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, |
643 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | | 626 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
644 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | | 627 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| @@ -655,91 +638,86 @@ static int8 countLeadingZeros32( bits32 | | | @@ -655,91 +638,86 @@ static int8 countLeadingZeros32( bits32 |
655 | if ( a < 0x10000 ) { | | 638 | if ( a < 0x10000 ) { |
656 | shiftCount += 16; | | 639 | shiftCount += 16; |
657 | a <<= 16; | | 640 | a <<= 16; |
658 | } | | 641 | } |
659 | if ( a < 0x1000000 ) { | | 642 | if ( a < 0x1000000 ) { |
660 | shiftCount += 8; | | 643 | shiftCount += 8; |
661 | a <<= 8; | | 644 | a <<= 8; |
662 | } | | 645 | } |
663 | shiftCount += countLeadingZerosHigh[ a>>24 ]; | | 646 | shiftCount += countLeadingZerosHigh[ a>>24 ]; |
664 | return shiftCount; | | 647 | return shiftCount; |
665 | | | 648 | |
666 | } | | 649 | } |
667 | | | 650 | |
668 | /* | | 651 | /*---------------------------------------------------------------------------- |
669 | ------------------------------------------------------------------------------- | | 652 | | Returns the number of leading 0 bits before the most-significant 1 bit of |
670 | Returns the number of leading 0 bits before the most-significant 1 bit of | | 653 | | `a'. If `a' is zero, 64 is returned. |
671 | `a'. If `a' is zero, 64 is returned. | | 654 | *----------------------------------------------------------------------------*/ |
672 | ------------------------------------------------------------------------------- | | 655 | |
673 | */ | | | |
674 | static int8 countLeadingZeros64( bits64 a ) | | 656 | static int8 countLeadingZeros64( bits64 a ) |
675 | { | | 657 | { |
676 | int8 shiftCount; | | 658 | int8 shiftCount; |
677 | | | 659 | |
678 | shiftCount = 0; | | 660 | shiftCount = 0; |
679 | if ( a < ( (bits64) 1 )<<32 ) { | | 661 | if ( a < ( (bits64) 1 )<<32 ) { |
680 | shiftCount += 32; | | 662 | shiftCount += 32; |
681 | } | | 663 | } |
682 | else { | | 664 | else { |
683 | a >>= 32; | | 665 | a >>= 32; |
684 | } | | 666 | } |
685 | shiftCount += countLeadingZeros32( a ); | | 667 | shiftCount += countLeadingZeros32( a ); |
686 | return shiftCount; | | 668 | return shiftCount; |
687 | | | 669 | |
688 | } | | 670 | } |
689 | | | 671 | |
690 | /* | | 672 | /*---------------------------------------------------------------------------- |
691 | ------------------------------------------------------------------------------- | | 673 | | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' |
692 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' | | 674 | | is equal to the 128-bit value formed by concatenating `b0' and `b1'. |
693 | is equal to the 128-bit value formed by concatenating `b0' and `b1'. | | 675 | | Otherwise, returns 0. |
694 | Otherwise, returns 0. | | 676 | *----------------------------------------------------------------------------*/ |
695 | ------------------------------------------------------------------------------- | | 677 | |
696 | */ | | | |
697 | INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) | | 678 | INLINE flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) |
698 | { | | 679 | { |
699 | | | 680 | |
700 | return ( a0 == b0 ) && ( a1 == b1 ); | | 681 | return ( a0 == b0 ) && ( a1 == b1 ); |
701 | | | 682 | |
702 | } | | 683 | } |
703 | | | 684 | |
704 | /* | | 685 | /*---------------------------------------------------------------------------- |
705 | ------------------------------------------------------------------------------- | | 686 | | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less |
706 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less | | 687 | | than or equal to the 128-bit value formed by concatenating `b0' and `b1'. |
707 | than or equal to the 128-bit value formed by concatenating `b0' and `b1'. | | 688 | | Otherwise, returns 0. |
708 | Otherwise, returns 0. | | 689 | *----------------------------------------------------------------------------*/ |
709 | ------------------------------------------------------------------------------- | | 690 | |
710 | */ | | | |
711 | INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) | | 691 | INLINE flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) |
712 | { | | 692 | { |
713 | | | 693 | |
714 | return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) ); | | 694 | return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) ); |
715 | | | 695 | |
716 | } | | 696 | } |
717 | | | 697 | |
718 | /* | | 698 | /*---------------------------------------------------------------------------- |
719 | ------------------------------------------------------------------------------- | | 699 | | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less |
720 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less | | 700 | | than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise, |
721 | than the 128-bit value formed by concatenating `b0' and `b1'. Otherwise, | | 701 | | returns 0. |
722 | returns 0. | | 702 | *----------------------------------------------------------------------------*/ |
723 | ------------------------------------------------------------------------------- | | 703 | |
724 | */ | | | |
725 | INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) | | 704 | INLINE flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) |
726 | { | | 705 | { |
727 | | | 706 | |
728 | return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) ); | | 707 | return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) ); |
729 | | | 708 | |
730 | } | | 709 | } |
731 | | | 710 | |
732 | /* | | 711 | /*---------------------------------------------------------------------------- |
733 | ------------------------------------------------------------------------------- | | 712 | | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is |
734 | Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is | | 713 | | not equal to the 128-bit value formed by concatenating `b0' and `b1'. |
735 | not equal to the 128-bit value formed by concatenating `b0' and `b1'. | | 714 | | Otherwise, returns 0. |
736 | Otherwise, returns 0. | | 715 | *----------------------------------------------------------------------------*/ |
737 | ------------------------------------------------------------------------------- | | 716 | |
738 | */ | | | |
739 | INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) | | 717 | INLINE flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) |
740 | { | | 718 | { |
741 | | | 719 | |
742 | return ( a0 != b0 ) || ( a1 != b1 ); | | 720 | return ( a0 != b0 ) || ( a1 != b1 ); |
743 | | | 721 | |
744 | } | | 722 | } |
745 | | | 723 | |