Tue Mar 29 18:42:29 2016 UTC ()
Avoid warnings (signed/unsigned comparision and unused variable)


(martin)
diff -r1.13 -r1.14 src/lib/libc/softfloat/bits64/softfloat.c

cvs diff -r1.13 -r1.14 src/lib/libc/softfloat/bits64/softfloat.c (switch to unified diff)

--- src/lib/libc/softfloat/bits64/softfloat.c 2013/11/22 17:04:24 1.13
+++ src/lib/libc/softfloat/bits64/softfloat.c 2016/03/29 18:42:29 1.14
@@ -1,1663 +1,1663 @@ @@ -1,1663 +1,1663 @@
1/* $NetBSD: softfloat.c,v 1.13 2013/11/22 17:04:24 martin Exp $ */ 1/* $NetBSD: softfloat.c,v 1.14 2016/03/29 18:42:29 martin Exp $ */
2 2
3/* 3/*
4 * This version hacked for use with gcc -msoft-float by bjh21. 4 * This version hacked for use with gcc -msoft-float by bjh21.
5 * (Mostly a case of #ifdefing out things GCC doesn't need or provides 5 * (Mostly a case of #ifdefing out things GCC doesn't need or provides
6 * itself). 6 * itself).
7 */ 7 */
8 8
9/* 9/*
10 * Things you may want to define: 10 * Things you may want to define:
11 * 11 *
12 * SOFTFLOAT_FOR_GCC - build only those functions necessary for GCC (with 12 * SOFTFLOAT_FOR_GCC - build only those functions necessary for GCC (with
13 * -msoft-float) to work. Include "softfloat-for-gcc.h" to get them 13 * -msoft-float) to work. Include "softfloat-for-gcc.h" to get them
14 * properly renamed. 14 * properly renamed.
15 */ 15 */
16 16
17/* 17/*
18=============================================================================== 18===============================================================================
19 19
20This C source file is part of the SoftFloat IEC/IEEE Floating-point 20This C source file is part of the SoftFloat IEC/IEEE Floating-point
21Arithmetic Package, Release 2a. 21Arithmetic Package, Release 2a.
22 22
23Written by John R. Hauser. This work was made possible in part by the 23Written by John R. Hauser. This work was made possible in part by the
24International Computer Science Institute, located at Suite 600, 1947 Center 24International Computer Science Institute, located at Suite 600, 1947 Center
25Street, Berkeley, California 94704. Funding was partially provided by the 25Street, Berkeley, California 94704. Funding was partially provided by the
26National Science Foundation under grant MIP-9311980. The original version 26National Science Foundation under grant MIP-9311980. The original version
27of this code was written as part of a project to build a fixed-point vector 27of this code was written as part of a project to build a fixed-point vector
28processor in collaboration with the University of California at Berkeley, 28processor in collaboration with the University of California at Berkeley,
29overseen by Profs. Nelson Morgan and John Wawrzynek. More information 29overseen by Profs. Nelson Morgan and John Wawrzynek. More information
30is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ 30is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
31arithmetic/SoftFloat.html'. 31arithmetic/SoftFloat.html'.
32 32
33THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort 33THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
34has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT 34has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
35TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO 35TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
36PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY 36PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
37AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. 37AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
38 38
39Derivative works are acceptable, even for commercial purposes, so long as 39Derivative works are acceptable, even for commercial purposes, so long as
40(1) they include prominent notice that the work is derivative, and (2) they 40(1) they include prominent notice that the work is derivative, and (2) they
41include prominent notice akin to these four paragraphs for those parts of 41include prominent notice akin to these four paragraphs for those parts of
42this code that are retained. 42this code that are retained.
43 43
44=============================================================================== 44===============================================================================
45*/ 45*/
46 46
47#include <sys/cdefs.h> 47#include <sys/cdefs.h>
48#if defined(LIBC_SCCS) && !defined(lint) 48#if defined(LIBC_SCCS) && !defined(lint)
49__RCSID("$NetBSD: softfloat.c,v 1.13 2013/11/22 17:04:24 martin Exp $"); 49__RCSID("$NetBSD: softfloat.c,v 1.14 2016/03/29 18:42:29 martin Exp $");
50#endif /* LIBC_SCCS and not lint */ 50#endif /* LIBC_SCCS and not lint */
51 51
52#ifdef SOFTFLOAT_FOR_GCC 52#ifdef SOFTFLOAT_FOR_GCC
53#include "softfloat-for-gcc.h" 53#include "softfloat-for-gcc.h"
54#endif 54#endif
55 55
56#include "milieu.h" 56#include "milieu.h"
57#include "softfloat.h" 57#include "softfloat.h"
58 58
59/* 59/*
60 * Conversions between floats as stored in memory and floats as 60 * Conversions between floats as stored in memory and floats as
61 * SoftFloat uses them 61 * SoftFloat uses them
62 */ 62 */
63#ifndef FLOAT64_DEMANGLE 63#ifndef FLOAT64_DEMANGLE
64#define FLOAT64_DEMANGLE(a) (a) 64#define FLOAT64_DEMANGLE(a) (a)
65#endif 65#endif
66#ifndef FLOAT64_MANGLE 66#ifndef FLOAT64_MANGLE
67#define FLOAT64_MANGLE(a) (a) 67#define FLOAT64_MANGLE(a) (a)
68#endif 68#endif
69 69
70/* 70/*
71------------------------------------------------------------------------------- 71-------------------------------------------------------------------------------
72Floating-point rounding mode, extended double-precision rounding precision, 72Floating-point rounding mode, extended double-precision rounding precision,
73and exception flags. 73and exception flags.
74------------------------------------------------------------------------------- 74-------------------------------------------------------------------------------
75*/ 75*/
76#ifndef set_float_rounding_mode 76#ifndef set_float_rounding_mode
77fp_rnd float_rounding_mode = float_round_nearest_even; 77fp_rnd float_rounding_mode = float_round_nearest_even;
78fp_except float_exception_flags = 0; 78fp_except float_exception_flags = 0;
79#endif 79#endif
80#ifndef set_float_exception_inexact_flag 80#ifndef set_float_exception_inexact_flag
81#define set_float_exception_inexact_flag() \ 81#define set_float_exception_inexact_flag() \
82 ((void)(float_exception_flags |= float_flag_inexact)) 82 ((void)(float_exception_flags |= float_flag_inexact))
83#endif 83#endif
84#ifdef FLOATX80 84#ifdef FLOATX80
85int8 floatx80_rounding_precision = 80; 85int8 floatx80_rounding_precision = 80;
86#endif 86#endif
87 87
88/* 88/*
89------------------------------------------------------------------------------- 89-------------------------------------------------------------------------------
90Primitive arithmetic functions, including multi-word arithmetic, and 90Primitive arithmetic functions, including multi-word arithmetic, and
91division and square root approximations. (Can be specialized to target if 91division and square root approximations. (Can be specialized to target if
92desired.) 92desired.)
93------------------------------------------------------------------------------- 93-------------------------------------------------------------------------------
94*/ 94*/
95#include "softfloat-macros" 95#include "softfloat-macros"
96 96
97/* 97/*
98------------------------------------------------------------------------------- 98-------------------------------------------------------------------------------
99Functions and definitions to determine: (1) whether tininess for underflow 99Functions and definitions to determine: (1) whether tininess for underflow
100is detected before or after rounding by default, (2) what (if anything) 100is detected before or after rounding by default, (2) what (if anything)
101happens when exceptions are raised, (3) how signaling NaNs are distinguished 101happens when exceptions are raised, (3) how signaling NaNs are distinguished
102from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs 102from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
103are propagated from function inputs to output. These details are target- 103are propagated from function inputs to output. These details are target-
104specific. 104specific.
105------------------------------------------------------------------------------- 105-------------------------------------------------------------------------------
106*/ 106*/
107#include "softfloat-specialize" 107#include "softfloat-specialize"
108 108
109#if !defined(SOFTFLOAT_FOR_GCC) || defined(FLOATX80) || defined(FLOAT128) 109#if !defined(SOFTFLOAT_FOR_GCC) || defined(FLOATX80) || defined(FLOAT128)
110/* 110/*
111------------------------------------------------------------------------------- 111-------------------------------------------------------------------------------
112Takes a 64-bit fixed-point value `absZ' with binary point between bits 6 112Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
113and 7, and returns the properly rounded 32-bit integer corresponding to the 113and 7, and returns the properly rounded 32-bit integer corresponding to the
114input. If `zSign' is 1, the input is negated before being converted to an 114input. If `zSign' is 1, the input is negated before being converted to an
115integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input 115integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
116is simply rounded to an integer, with the inexact exception raised if the 116is simply rounded to an integer, with the inexact exception raised if the
117input cannot be represented exactly as an integer. However, if the fixed- 117input cannot be represented exactly as an integer. However, if the fixed-
118point input is too large, the invalid exception is raised and the largest 118point input is too large, the invalid exception is raised and the largest
119positive or negative integer is returned. 119positive or negative integer is returned.
120------------------------------------------------------------------------------- 120-------------------------------------------------------------------------------
121*/ 121*/
122static int32 roundAndPackInt32( flag zSign, bits64 absZ ) 122static int32 roundAndPackInt32( flag zSign, bits64 absZ )
123{ 123{
124 int8 roundingMode; 124 int8 roundingMode;
125 flag roundNearestEven; 125 flag roundNearestEven;
126 int8 roundIncrement, roundBits; 126 int8 roundIncrement, roundBits;
127 int32 z; 127 int32 z;
128 128
129 roundingMode = float_rounding_mode; 129 roundingMode = float_rounding_mode;
130 roundNearestEven = ( roundingMode == float_round_nearest_even ); 130 roundNearestEven = ( roundingMode == float_round_nearest_even );
131 roundIncrement = 0x40; 131 roundIncrement = 0x40;
132 if ( ! roundNearestEven ) { 132 if ( ! roundNearestEven ) {
133 if ( roundingMode == float_round_to_zero ) { 133 if ( roundingMode == float_round_to_zero ) {
134 roundIncrement = 0; 134 roundIncrement = 0;
135 } 135 }
136 else { 136 else {
137 roundIncrement = 0x7F; 137 roundIncrement = 0x7F;
138 if ( zSign ) { 138 if ( zSign ) {
139 if ( roundingMode == float_round_up ) roundIncrement = 0; 139 if ( roundingMode == float_round_up ) roundIncrement = 0;
140 } 140 }
141 else { 141 else {
142 if ( roundingMode == float_round_down ) roundIncrement = 0; 142 if ( roundingMode == float_round_down ) roundIncrement = 0;
143 } 143 }
144 } 144 }
145 } 145 }
146 roundBits = (int8)(absZ & 0x7F); 146 roundBits = (int8)(absZ & 0x7F);
147 absZ = ( absZ + roundIncrement )>>7; 147 absZ = ( absZ + roundIncrement )>>7;
148 absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); 148 absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
149 z = (int32)absZ; 149 z = (int32)absZ;
150 if ( zSign ) z = - z; 150 if ( zSign ) z = - z;
151 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) { 151 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
152 float_raise( float_flag_invalid ); 152 float_raise( float_flag_invalid );
153 return zSign ? (sbits32) 0x80000000 : 0x7FFFFFFF; 153 return zSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
154 } 154 }
155 if ( roundBits ) set_float_exception_inexact_flag(); 155 if ( roundBits ) set_float_exception_inexact_flag();
156 return z; 156 return z;
157 157
158} 158}
159 159
160/* 160/*
161------------------------------------------------------------------------------- 161-------------------------------------------------------------------------------
162Takes the 128-bit fixed-point value formed by concatenating `absZ0' and 162Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
163`absZ1', with binary point between bits 63 and 64 (between the input words), 163`absZ1', with binary point between bits 63 and 64 (between the input words),
164and returns the properly rounded 64-bit integer corresponding to the input. 164and returns the properly rounded 64-bit integer corresponding to the input.
165If `zSign' is 1, the input is negated before being converted to an integer. 165If `zSign' is 1, the input is negated before being converted to an integer.
166Ordinarily, the fixed-point input is simply rounded to an integer, with 166Ordinarily, the fixed-point input is simply rounded to an integer, with
167the inexact exception raised if the input cannot be represented exactly as 167the inexact exception raised if the input cannot be represented exactly as
168an integer. However, if the fixed-point input is too large, the invalid 168an integer. However, if the fixed-point input is too large, the invalid
169exception is raised and the largest positive or negative integer is 169exception is raised and the largest positive or negative integer is
170returned. 170returned.
171------------------------------------------------------------------------------- 171-------------------------------------------------------------------------------
172*/ 172*/
173static int64 roundAndPackInt64( flag zSign, bits64 absZ0, bits64 absZ1 ) 173static int64 roundAndPackInt64( flag zSign, bits64 absZ0, bits64 absZ1 )
174{ 174{
175 int8 roundingMode; 175 int8 roundingMode;
176 flag roundNearestEven, increment; 176 flag roundNearestEven, increment;
177 int64 z; 177 int64 z;
178 178
179 roundingMode = float_rounding_mode; 179 roundingMode = float_rounding_mode;
180 roundNearestEven = ( roundingMode == float_round_nearest_even ); 180 roundNearestEven = ( roundingMode == float_round_nearest_even );
181 increment = ( (sbits64) absZ1 < 0 ); 181 increment = ( (sbits64) absZ1 < 0 );
182 if ( ! roundNearestEven ) { 182 if ( ! roundNearestEven ) {
183 if ( roundingMode == float_round_to_zero ) { 183 if ( roundingMode == float_round_to_zero ) {
184 increment = 0; 184 increment = 0;
185 } 185 }
186 else { 186 else {
187 if ( zSign ) { 187 if ( zSign ) {
188 increment = ( roundingMode == float_round_down ) && absZ1; 188 increment = ( roundingMode == float_round_down ) && absZ1;
189 } 189 }
190 else { 190 else {
191 increment = ( roundingMode == float_round_up ) && absZ1; 191 increment = ( roundingMode == float_round_up ) && absZ1;
192 } 192 }
193 } 193 }
194 } 194 }
195 if ( increment ) { 195 if ( increment ) {
196 ++absZ0; 196 ++absZ0;
197 if ( absZ0 == 0 ) goto overflow; 197 if ( absZ0 == 0 ) goto overflow;
198 absZ0 &= ~ ( ( (bits64) ( absZ1<<1 ) == 0 ) & roundNearestEven ); 198 absZ0 &= ~ ( ( (bits64) ( absZ1<<1 ) == 0 ) & roundNearestEven );
199 } 199 }
200 z = absZ0; 200 z = absZ0;
201 if ( zSign ) z = - z; 201 if ( zSign ) z = - z;
202 if ( z && ( ( z < 0 ) ^ zSign ) ) { 202 if ( z && ( ( z < 0 ) ^ zSign ) ) {
203 overflow: 203 overflow:
204 float_raise( float_flag_invalid ); 204 float_raise( float_flag_invalid );
205 return 205 return
206 zSign ? (sbits64) LIT64( 0x8000000000000000 ) 206 zSign ? (sbits64) LIT64( 0x8000000000000000 )
207 : LIT64( 0x7FFFFFFFFFFFFFFF ); 207 : LIT64( 0x7FFFFFFFFFFFFFFF );
208 } 208 }
209 if ( absZ1 ) set_float_exception_inexact_flag(); 209 if ( absZ1 ) set_float_exception_inexact_flag();
210 return z; 210 return z;
211 211
212} 212}
213#endif 213#endif
214 214
215/* 215/*
216------------------------------------------------------------------------------- 216-------------------------------------------------------------------------------
217Returns the fraction bits of the single-precision floating-point value `a'. 217Returns the fraction bits of the single-precision floating-point value `a'.
218------------------------------------------------------------------------------- 218-------------------------------------------------------------------------------
219*/ 219*/
220INLINE bits32 extractFloat32Frac( float32 a ) 220INLINE bits32 extractFloat32Frac( float32 a )
221{ 221{
222 222
223 return a & 0x007FFFFF; 223 return a & 0x007FFFFF;
224 224
225} 225}
226 226
227/* 227/*
228------------------------------------------------------------------------------- 228-------------------------------------------------------------------------------
229Returns the exponent bits of the single-precision floating-point value `a'. 229Returns the exponent bits of the single-precision floating-point value `a'.
230------------------------------------------------------------------------------- 230-------------------------------------------------------------------------------
231*/ 231*/
232INLINE int16 extractFloat32Exp( float32 a ) 232INLINE int16 extractFloat32Exp( float32 a )
233{ 233{
234 234
235 return ( a>>23 ) & 0xFF; 235 return ( a>>23 ) & 0xFF;
236 236
237} 237}
238 238
239/* 239/*
240------------------------------------------------------------------------------- 240-------------------------------------------------------------------------------
241Returns the sign bit of the single-precision floating-point value `a'. 241Returns the sign bit of the single-precision floating-point value `a'.
242------------------------------------------------------------------------------- 242-------------------------------------------------------------------------------
243*/ 243*/
244INLINE flag extractFloat32Sign( float32 a ) 244INLINE flag extractFloat32Sign( float32 a )
245{ 245{
246 246
247 return a>>31; 247 return a>>31;
248 248
249} 249}
250 250
251/* 251/*
252------------------------------------------------------------------------------- 252-------------------------------------------------------------------------------
253Normalizes the subnormal single-precision floating-point value represented 253Normalizes the subnormal single-precision floating-point value represented
254by the denormalized significand `aSig'. The normalized exponent and 254by the denormalized significand `aSig'. The normalized exponent and
255significand are stored at the locations pointed to by `zExpPtr' and 255significand are stored at the locations pointed to by `zExpPtr' and
256`zSigPtr', respectively. 256`zSigPtr', respectively.
257------------------------------------------------------------------------------- 257-------------------------------------------------------------------------------
258*/ 258*/
259static void 259static void
260 normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr ) 260 normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr )
261{ 261{
262 int8 shiftCount; 262 int8 shiftCount;
263 263
264 shiftCount = countLeadingZeros32( aSig ) - 8; 264 shiftCount = countLeadingZeros32( aSig ) - 8;
265 *zSigPtr = aSig<<shiftCount; 265 *zSigPtr = aSig<<shiftCount;
266 *zExpPtr = 1 - shiftCount; 266 *zExpPtr = 1 - shiftCount;
267 267
268} 268}
269 269
270/* 270/*
271------------------------------------------------------------------------------- 271-------------------------------------------------------------------------------
272Packs the sign `zSign', exponent `zExp', and significand `zSig' into a 272Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
273single-precision floating-point value, returning the result. After being 273single-precision floating-point value, returning the result. After being
274shifted into the proper positions, the three fields are simply added 274shifted into the proper positions, the three fields are simply added
275together to form the result. This means that any integer portion of `zSig' 275together to form the result. This means that any integer portion of `zSig'
276will be added into the exponent. Since a properly normalized significand 276will be added into the exponent. Since a properly normalized significand
277will have an integer portion equal to 1, the `zExp' input should be 1 less 277will have an integer portion equal to 1, the `zExp' input should be 1 less
278than the desired result exponent whenever `zSig' is a complete, normalized 278than the desired result exponent whenever `zSig' is a complete, normalized
279significand. 279significand.
280------------------------------------------------------------------------------- 280-------------------------------------------------------------------------------
281*/ 281*/
282INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig ) 282INLINE float32 packFloat32( flag zSign, int16 zExp, bits32 zSig )
283{ 283{
284 284
285 return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig; 285 return ( ( (bits32) zSign )<<31 ) + ( ( (bits32) zExp )<<23 ) + zSig;
286 286
287} 287}
288 288
289/* 289/*
290------------------------------------------------------------------------------- 290-------------------------------------------------------------------------------
291Takes an abstract floating-point value having sign `zSign', exponent `zExp', 291Takes an abstract floating-point value having sign `zSign', exponent `zExp',
292and significand `zSig', and returns the proper single-precision floating- 292and significand `zSig', and returns the proper single-precision floating-
293point value corresponding to the abstract input. Ordinarily, the abstract 293point value corresponding to the abstract input. Ordinarily, the abstract
294value is simply rounded and packed into the single-precision format, with 294value is simply rounded and packed into the single-precision format, with
295the inexact exception raised if the abstract input cannot be represented 295the inexact exception raised if the abstract input cannot be represented
296exactly. However, if the abstract value is too large, the overflow and 296exactly. However, if the abstract value is too large, the overflow and
297inexact exceptions are raised and an infinity or maximal finite value is 297inexact exceptions are raised and an infinity or maximal finite value is
298returned. If the abstract value is too small, the input value is rounded to 298returned. If the abstract value is too small, the input value is rounded to
299a subnormal number, and the underflow and inexact exceptions are raised if 299a subnormal number, and the underflow and inexact exceptions are raised if
300the abstract input cannot be represented exactly as a subnormal single- 300the abstract input cannot be represented exactly as a subnormal single-
301precision floating-point number. 301precision floating-point number.
302 The input significand `zSig' has its binary point between bits 30 302 The input significand `zSig' has its binary point between bits 30
303and 29, which is 7 bits to the left of the usual location. This shifted 303and 29, which is 7 bits to the left of the usual location. This shifted
304significand must be normalized or smaller. If `zSig' is not normalized, 304significand must be normalized or smaller. If `zSig' is not normalized,
305`zExp' must be 0; in that case, the result returned is a subnormal number, 305`zExp' must be 0; in that case, the result returned is a subnormal number,
306and it must not require rounding. In the usual case that `zSig' is 306and it must not require rounding. In the usual case that `zSig' is
307normalized, `zExp' must be 1 less than the ``true'' floating-point exponent. 307normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
308The handling of underflow and overflow follows the IEC/IEEE Standard for 308The handling of underflow and overflow follows the IEC/IEEE Standard for
309Binary Floating-Point Arithmetic. 309Binary Floating-Point Arithmetic.
310------------------------------------------------------------------------------- 310-------------------------------------------------------------------------------
311*/ 311*/
312static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig ) 312static float32 roundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
313{ 313{
314 int8 roundingMode; 314 int8 roundingMode;
315 flag roundNearestEven; 315 flag roundNearestEven;
316 int8 roundIncrement, roundBits; 316 int8 roundIncrement, roundBits;
317 flag isTiny; 317 flag isTiny;
318 318
319 roundingMode = float_rounding_mode; 319 roundingMode = float_rounding_mode;
320 roundNearestEven = ( roundingMode == float_round_nearest_even ); 320 roundNearestEven = ( roundingMode == float_round_nearest_even );
321 roundIncrement = 0x40; 321 roundIncrement = 0x40;
322 if ( ! roundNearestEven ) { 322 if ( ! roundNearestEven ) {
323 if ( roundingMode == float_round_to_zero ) { 323 if ( roundingMode == float_round_to_zero ) {
324 roundIncrement = 0; 324 roundIncrement = 0;
325 } 325 }
326 else { 326 else {
327 roundIncrement = 0x7F; 327 roundIncrement = 0x7F;
328 if ( zSign ) { 328 if ( zSign ) {
329 if ( roundingMode == float_round_up ) roundIncrement = 0; 329 if ( roundingMode == float_round_up ) roundIncrement = 0;
330 } 330 }
331 else { 331 else {
332 if ( roundingMode == float_round_down ) roundIncrement = 0; 332 if ( roundingMode == float_round_down ) roundIncrement = 0;
333 } 333 }
334 } 334 }
335 } 335 }
336 roundBits = zSig & 0x7F; 336 roundBits = zSig & 0x7F;
337 if ( 0xFD <= (bits16) zExp ) { 337 if ( 0xFD <= (bits16) zExp ) {
338 if ( ( 0xFD < zExp ) 338 if ( ( 0xFD < zExp )
339 || ( ( zExp == 0xFD ) 339 || ( ( zExp == 0xFD )
340 && ( (sbits32) ( zSig + roundIncrement ) < 0 ) ) 340 && ( (sbits32) ( zSig + roundIncrement ) < 0 ) )
341 ) { 341 ) {
342 float_raise( float_flag_overflow | float_flag_inexact ); 342 float_raise( float_flag_overflow | float_flag_inexact );
343 return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 ); 343 return packFloat32( zSign, 0xFF, 0 ) - ( roundIncrement == 0 );
344 } 344 }
345 if ( zExp < 0 ) { 345 if ( zExp < 0 ) {
346 isTiny = 346 isTiny =
347 ( float_detect_tininess == float_tininess_before_rounding ) 347 ( float_detect_tininess == float_tininess_before_rounding )
348 || ( zExp < -1 ) 348 || ( zExp < -1 )
349 || ( zSig + roundIncrement < 0x80000000U ); 349 || ( zSig + roundIncrement < 0x80000000U );
350 shift32RightJamming( zSig, - zExp, &zSig ); 350 shift32RightJamming( zSig, - zExp, &zSig );
351 zExp = 0; 351 zExp = 0;
352 roundBits = zSig & 0x7F; 352 roundBits = zSig & 0x7F;
353 if ( isTiny && roundBits ) float_raise( float_flag_underflow ); 353 if ( isTiny && roundBits ) float_raise( float_flag_underflow );
354 } 354 }
355 } 355 }
356 if ( roundBits ) set_float_exception_inexact_flag(); 356 if ( roundBits ) set_float_exception_inexact_flag();
357 zSig = ( zSig + roundIncrement )>>7; 357 zSig = ( zSig + roundIncrement )>>7;
358 zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); 358 zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
359 if ( zSig == 0 ) zExp = 0; 359 if ( zSig == 0 ) zExp = 0;
360 return packFloat32( zSign, zExp, zSig ); 360 return packFloat32( zSign, zExp, zSig );
361 361
362} 362}
363 363
364/* 364/*
365------------------------------------------------------------------------------- 365-------------------------------------------------------------------------------
366Takes an abstract floating-point value having sign `zSign', exponent `zExp', 366Takes an abstract floating-point value having sign `zSign', exponent `zExp',
367and significand `zSig', and returns the proper single-precision floating- 367and significand `zSig', and returns the proper single-precision floating-
368point value corresponding to the abstract input. This routine is just like 368point value corresponding to the abstract input. This routine is just like
369`roundAndPackFloat32' except that `zSig' does not have to be normalized. 369`roundAndPackFloat32' except that `zSig' does not have to be normalized.
370Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true'' 370Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
371floating-point exponent. 371floating-point exponent.
372------------------------------------------------------------------------------- 372-------------------------------------------------------------------------------
373*/ 373*/
374static float32 374static float32
375 normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig ) 375 normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig )
376{ 376{
377 int8 shiftCount; 377 int8 shiftCount;
378 378
379 shiftCount = countLeadingZeros32( zSig ) - 1; 379 shiftCount = countLeadingZeros32( zSig ) - 1;
380 return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount ); 380 return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<<shiftCount );
381 381
382} 382}
383 383
384/* 384/*
385------------------------------------------------------------------------------- 385-------------------------------------------------------------------------------
386Returns the fraction bits of the double-precision floating-point value `a'. 386Returns the fraction bits of the double-precision floating-point value `a'.
387------------------------------------------------------------------------------- 387-------------------------------------------------------------------------------
388*/ 388*/
389INLINE bits64 extractFloat64Frac( float64 a ) 389INLINE bits64 extractFloat64Frac( float64 a )
390{ 390{
391 391
392 return FLOAT64_DEMANGLE(a) & LIT64( 0x000FFFFFFFFFFFFF ); 392 return FLOAT64_DEMANGLE(a) & LIT64( 0x000FFFFFFFFFFFFF );
393 393
394} 394}
395 395
396/* 396/*
397------------------------------------------------------------------------------- 397-------------------------------------------------------------------------------
398Returns the exponent bits of the double-precision floating-point value `a'. 398Returns the exponent bits of the double-precision floating-point value `a'.
399------------------------------------------------------------------------------- 399-------------------------------------------------------------------------------
400*/ 400*/
401INLINE int16 extractFloat64Exp( float64 a ) 401INLINE int16 extractFloat64Exp( float64 a )
402{ 402{
403 403
404 return (int16)((FLOAT64_DEMANGLE(a) >> 52) & 0x7FF); 404 return (int16)((FLOAT64_DEMANGLE(a) >> 52) & 0x7FF);
405 405
406} 406}
407 407
408/* 408/*
409------------------------------------------------------------------------------- 409-------------------------------------------------------------------------------
410Returns the sign bit of the double-precision floating-point value `a'. 410Returns the sign bit of the double-precision floating-point value `a'.
411------------------------------------------------------------------------------- 411-------------------------------------------------------------------------------
412*/ 412*/
413INLINE flag extractFloat64Sign( float64 a ) 413INLINE flag extractFloat64Sign( float64 a )
414{ 414{
415 415
416 return (flag)(FLOAT64_DEMANGLE(a) >> 63); 416 return (flag)(FLOAT64_DEMANGLE(a) >> 63);
417 417
418} 418}
419 419
420/* 420/*
421------------------------------------------------------------------------------- 421-------------------------------------------------------------------------------
422Normalizes the subnormal double-precision floating-point value represented 422Normalizes the subnormal double-precision floating-point value represented
423by the denormalized significand `aSig'. The normalized exponent and 423by the denormalized significand `aSig'. The normalized exponent and
424significand are stored at the locations pointed to by `zExpPtr' and 424significand are stored at the locations pointed to by `zExpPtr' and
425`zSigPtr', respectively. 425`zSigPtr', respectively.
426------------------------------------------------------------------------------- 426-------------------------------------------------------------------------------
427*/ 427*/
428static void 428static void
429 normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr ) 429 normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr )
430{ 430{
431 int8 shiftCount; 431 int8 shiftCount;
432 432
433 shiftCount = countLeadingZeros64( aSig ) - 11; 433 shiftCount = countLeadingZeros64( aSig ) - 11;
434 *zSigPtr = aSig<<shiftCount; 434 *zSigPtr = aSig<<shiftCount;
435 *zExpPtr = 1 - shiftCount; 435 *zExpPtr = 1 - shiftCount;
436 436
437} 437}
438 438
439/* 439/*
440------------------------------------------------------------------------------- 440-------------------------------------------------------------------------------
441Packs the sign `zSign', exponent `zExp', and significand `zSig' into a 441Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
442double-precision floating-point value, returning the result. After being 442double-precision floating-point value, returning the result. After being
443shifted into the proper positions, the three fields are simply added 443shifted into the proper positions, the three fields are simply added
444together to form the result. This means that any integer portion of `zSig' 444together to form the result. This means that any integer portion of `zSig'
445will be added into the exponent. Since a properly normalized significand 445will be added into the exponent. Since a properly normalized significand
446will have an integer portion equal to 1, the `zExp' input should be 1 less 446will have an integer portion equal to 1, the `zExp' input should be 1 less
447than the desired result exponent whenever `zSig' is a complete, normalized 447than the desired result exponent whenever `zSig' is a complete, normalized
448significand. 448significand.
449------------------------------------------------------------------------------- 449-------------------------------------------------------------------------------
450*/ 450*/
451INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig ) 451INLINE float64 packFloat64( flag zSign, int16 zExp, bits64 zSig )
452{ 452{
453 453
454 return FLOAT64_MANGLE( ( ( (bits64) zSign )<<63 ) + 454 return FLOAT64_MANGLE( ( ( (bits64) zSign )<<63 ) +
455 ( ( (bits64) zExp )<<52 ) + zSig ); 455 ( ( (bits64) zExp )<<52 ) + zSig );
456 456
457} 457}
458 458
459/* 459/*
460------------------------------------------------------------------------------- 460-------------------------------------------------------------------------------
461Takes an abstract floating-point value having sign `zSign', exponent `zExp', 461Takes an abstract floating-point value having sign `zSign', exponent `zExp',
462and significand `zSig', and returns the proper double-precision floating- 462and significand `zSig', and returns the proper double-precision floating-
463point value corresponding to the abstract input. Ordinarily, the abstract 463point value corresponding to the abstract input. Ordinarily, the abstract
464value is simply rounded and packed into the double-precision format, with 464value is simply rounded and packed into the double-precision format, with
465the inexact exception raised if the abstract input cannot be represented 465the inexact exception raised if the abstract input cannot be represented
466exactly. However, if the abstract value is too large, the overflow and 466exactly. However, if the abstract value is too large, the overflow and
467inexact exceptions are raised and an infinity or maximal finite value is 467inexact exceptions are raised and an infinity or maximal finite value is
468returned. If the abstract value is too small, the input value is rounded to 468returned. If the abstract value is too small, the input value is rounded to
469a subnormal number, and the underflow and inexact exceptions are raised if 469a subnormal number, and the underflow and inexact exceptions are raised if
470the abstract input cannot be represented exactly as a subnormal double- 470the abstract input cannot be represented exactly as a subnormal double-
471precision floating-point number. 471precision floating-point number.
472 The input significand `zSig' has its binary point between bits 62 472 The input significand `zSig' has its binary point between bits 62
473and 61, which is 10 bits to the left of the usual location. This shifted 473and 61, which is 10 bits to the left of the usual location. This shifted
474significand must be normalized or smaller. If `zSig' is not normalized, 474significand must be normalized or smaller. If `zSig' is not normalized,
475`zExp' must be 0; in that case, the result returned is a subnormal number, 475`zExp' must be 0; in that case, the result returned is a subnormal number,
476and it must not require rounding. In the usual case that `zSig' is 476and it must not require rounding. In the usual case that `zSig' is
477normalized, `zExp' must be 1 less than the ``true'' floating-point exponent. 477normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
478The handling of underflow and overflow follows the IEC/IEEE Standard for 478The handling of underflow and overflow follows the IEC/IEEE Standard for
479Binary Floating-Point Arithmetic. 479Binary Floating-Point Arithmetic.
480------------------------------------------------------------------------------- 480-------------------------------------------------------------------------------
481*/ 481*/
482static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig ) 482static float64 roundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
483{ 483{
484 int8 roundingMode; 484 int8 roundingMode;
485 flag roundNearestEven; 485 flag roundNearestEven;
486 int16 roundIncrement, roundBits; 486 int16 roundIncrement, roundBits;
487 flag isTiny; 487 flag isTiny;
488 488
489 roundingMode = float_rounding_mode; 489 roundingMode = float_rounding_mode;
490 roundNearestEven = ( roundingMode == float_round_nearest_even ); 490 roundNearestEven = ( roundingMode == float_round_nearest_even );
491 roundIncrement = 0x200; 491 roundIncrement = 0x200;
492 if ( ! roundNearestEven ) { 492 if ( ! roundNearestEven ) {
493 if ( roundingMode == float_round_to_zero ) { 493 if ( roundingMode == float_round_to_zero ) {
494 roundIncrement = 0; 494 roundIncrement = 0;
495 } 495 }
496 else { 496 else {
497 roundIncrement = 0x3FF; 497 roundIncrement = 0x3FF;
498 if ( zSign ) { 498 if ( zSign ) {
499 if ( roundingMode == float_round_up ) roundIncrement = 0; 499 if ( roundingMode == float_round_up ) roundIncrement = 0;
500 } 500 }
501 else { 501 else {
502 if ( roundingMode == float_round_down ) roundIncrement = 0; 502 if ( roundingMode == float_round_down ) roundIncrement = 0;
503 } 503 }
504 } 504 }
505 } 505 }
506 roundBits = (int16)(zSig & 0x3FF); 506 roundBits = (int16)(zSig & 0x3FF);
507 if ( 0x7FD <= (bits16) zExp ) { 507 if ( 0x7FD <= (bits16) zExp ) {
508 if ( ( 0x7FD < zExp ) 508 if ( ( 0x7FD < zExp )
509 || ( ( zExp == 0x7FD ) 509 || ( ( zExp == 0x7FD )
510 && ( (sbits64) ( zSig + roundIncrement ) < 0 ) ) 510 && ( (sbits64) ( zSig + roundIncrement ) < 0 ) )
511 ) { 511 ) {
512 float_raise( float_flag_overflow | float_flag_inexact ); 512 float_raise( float_flag_overflow | float_flag_inexact );
513 return FLOAT64_MANGLE( 513 return FLOAT64_MANGLE(
514 FLOAT64_DEMANGLE(packFloat64( zSign, 0x7FF, 0 )) - 514 FLOAT64_DEMANGLE(packFloat64( zSign, 0x7FF, 0 )) -
515 ( roundIncrement == 0 )); 515 ( roundIncrement == 0 ));
516 } 516 }
517 if ( zExp < 0 ) { 517 if ( zExp < 0 ) {
518 isTiny = 518 isTiny =
519 ( float_detect_tininess == float_tininess_before_rounding ) 519 ( float_detect_tininess == float_tininess_before_rounding )
520 || ( zExp < -1 ) 520 || ( zExp < -1 )
521 || ( zSig + roundIncrement < (bits64)LIT64( 0x8000000000000000 ) ); 521 || ( zSig + roundIncrement < (bits64)LIT64( 0x8000000000000000 ) );
522 shift64RightJamming( zSig, - zExp, &zSig ); 522 shift64RightJamming( zSig, - zExp, &zSig );
523 zExp = 0; 523 zExp = 0;
524 roundBits = (int16)(zSig & 0x3FF); 524 roundBits = (int16)(zSig & 0x3FF);
525 if ( isTiny && roundBits ) float_raise( float_flag_underflow ); 525 if ( isTiny && roundBits ) float_raise( float_flag_underflow );
526 } 526 }
527 } 527 }
528 if ( roundBits ) set_float_exception_inexact_flag(); 528 if ( roundBits ) set_float_exception_inexact_flag();
529 zSig = ( zSig + roundIncrement )>>10; 529 zSig = ( zSig + roundIncrement )>>10;
530 zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven ); 530 zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
531 if ( zSig == 0 ) zExp = 0; 531 if ( zSig == 0 ) zExp = 0;
532 return packFloat64( zSign, zExp, zSig ); 532 return packFloat64( zSign, zExp, zSig );
533 533
534} 534}
535 535
536/* 536/*
537------------------------------------------------------------------------------- 537-------------------------------------------------------------------------------
538Takes an abstract floating-point value having sign `zSign', exponent `zExp', 538Takes an abstract floating-point value having sign `zSign', exponent `zExp',
539and significand `zSig', and returns the proper double-precision floating- 539and significand `zSig', and returns the proper double-precision floating-
540point value corresponding to the abstract input. This routine is just like 540point value corresponding to the abstract input. This routine is just like
541`roundAndPackFloat64' except that `zSig' does not have to be normalized. 541`roundAndPackFloat64' except that `zSig' does not have to be normalized.
542Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true'' 542Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
543floating-point exponent. 543floating-point exponent.
544------------------------------------------------------------------------------- 544-------------------------------------------------------------------------------
545*/ 545*/
546static float64 546static float64
547 normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig ) 547 normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig )
548{ 548{
549 int8 shiftCount; 549 int8 shiftCount;
550 550
551 shiftCount = countLeadingZeros64( zSig ) - 1; 551 shiftCount = countLeadingZeros64( zSig ) - 1;
552 return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount ); 552 return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<<shiftCount );
553 553
554} 554}
555 555
556#ifdef FLOATX80 556#ifdef FLOATX80
557 557
558/* 558/*
559------------------------------------------------------------------------------- 559-------------------------------------------------------------------------------
560Returns the fraction bits of the extended double-precision floating-point 560Returns the fraction bits of the extended double-precision floating-point
561value `a'. 561value `a'.
562------------------------------------------------------------------------------- 562-------------------------------------------------------------------------------
563*/ 563*/
564INLINE bits64 extractFloatx80Frac( floatx80 a ) 564INLINE bits64 extractFloatx80Frac( floatx80 a )
565{ 565{
566 566
567 return a.low; 567 return a.low;
568 568
569} 569}
570 570
571/* 571/*
572------------------------------------------------------------------------------- 572-------------------------------------------------------------------------------
573Returns the exponent bits of the extended double-precision floating-point 573Returns the exponent bits of the extended double-precision floating-point
574value `a'. 574value `a'.
575------------------------------------------------------------------------------- 575-------------------------------------------------------------------------------
576*/ 576*/
577INLINE int32 extractFloatx80Exp( floatx80 a ) 577INLINE int32 extractFloatx80Exp( floatx80 a )
578{ 578{
579 579
580 return a.high & 0x7FFF; 580 return a.high & 0x7FFF;
581 581
582} 582}
583 583
584/* 584/*
585------------------------------------------------------------------------------- 585-------------------------------------------------------------------------------
586Returns the sign bit of the extended double-precision floating-point value 586Returns the sign bit of the extended double-precision floating-point value
587`a'. 587`a'.
588------------------------------------------------------------------------------- 588-------------------------------------------------------------------------------
589*/ 589*/
590INLINE flag extractFloatx80Sign( floatx80 a ) 590INLINE flag extractFloatx80Sign( floatx80 a )
591{ 591{
592 592
593 return a.high>>15; 593 return a.high>>15;
594 594
595} 595}
596 596
597/* 597/*
598------------------------------------------------------------------------------- 598-------------------------------------------------------------------------------
599Normalizes the subnormal extended double-precision floating-point value 599Normalizes the subnormal extended double-precision floating-point value
600represented by the denormalized significand `aSig'. The normalized exponent 600represented by the denormalized significand `aSig'. The normalized exponent
601and significand are stored at the locations pointed to by `zExpPtr' and 601and significand are stored at the locations pointed to by `zExpPtr' and
602`zSigPtr', respectively. 602`zSigPtr', respectively.
603------------------------------------------------------------------------------- 603-------------------------------------------------------------------------------
604*/ 604*/
605static void 605static void
606 normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr ) 606 normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr )
607{ 607{
608 int8 shiftCount; 608 int8 shiftCount;
609 609
610 shiftCount = countLeadingZeros64( aSig ); 610 shiftCount = countLeadingZeros64( aSig );
611 *zSigPtr = aSig<<shiftCount; 611 *zSigPtr = aSig<<shiftCount;
612 *zExpPtr = 1 - shiftCount; 612 *zExpPtr = 1 - shiftCount;
613 613
614} 614}
615 615
616/* 616/*
617------------------------------------------------------------------------------- 617-------------------------------------------------------------------------------
618Packs the sign `zSign', exponent `zExp', and significand `zSig' into an 618Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
619extended double-precision floating-point value, returning the result. 619extended double-precision floating-point value, returning the result.
620------------------------------------------------------------------------------- 620-------------------------------------------------------------------------------
621*/ 621*/
622INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig ) 622INLINE floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig )
623{ 623{
624 floatx80 z; 624 floatx80 z;
625 625
626 z.low = zSig; 626 z.low = zSig;
627 z.high = ( ( (bits16) zSign )<<15 ) + zExp; 627 z.high = ( ( (bits16) zSign )<<15 ) + zExp;
628 return z; 628 return z;
629 629
630} 630}
631 631
632/* 632/*
633------------------------------------------------------------------------------- 633-------------------------------------------------------------------------------
634Takes an abstract floating-point value having sign `zSign', exponent `zExp', 634Takes an abstract floating-point value having sign `zSign', exponent `zExp',
635and extended significand formed by the concatenation of `zSig0' and `zSig1', 635and extended significand formed by the concatenation of `zSig0' and `zSig1',
636and returns the proper extended double-precision floating-point value 636and returns the proper extended double-precision floating-point value
637corresponding to the abstract input. Ordinarily, the abstract value is 637corresponding to the abstract input. Ordinarily, the abstract value is
638rounded and packed into the extended double-precision format, with the 638rounded and packed into the extended double-precision format, with the
639inexact exception raised if the abstract input cannot be represented 639inexact exception raised if the abstract input cannot be represented
640exactly. However, if the abstract value is too large, the overflow and 640exactly. However, if the abstract value is too large, the overflow and
641inexact exceptions are raised and an infinity or maximal finite value is 641inexact exceptions are raised and an infinity or maximal finite value is
642returned. If the abstract value is too small, the input value is rounded to 642returned. If the abstract value is too small, the input value is rounded to
643a subnormal number, and the underflow and inexact exceptions are raised if 643a subnormal number, and the underflow and inexact exceptions are raised if
644the abstract input cannot be represented exactly as a subnormal extended 644the abstract input cannot be represented exactly as a subnormal extended
645double-precision floating-point number. 645double-precision floating-point number.
646 If `roundingPrecision' is 32 or 64, the result is rounded to the same 646 If `roundingPrecision' is 32 or 64, the result is rounded to the same
647number of bits as single or double precision, respectively. Otherwise, the 647number of bits as single or double precision, respectively. Otherwise, the
648result is rounded to the full precision of the extended double-precision 648result is rounded to the full precision of the extended double-precision
649format. 649format.
650 The input significand must be normalized or smaller. If the input 650 The input significand must be normalized or smaller. If the input
651significand is not normalized, `zExp' must be 0; in that case, the result 651significand is not normalized, `zExp' must be 0; in that case, the result
652returned is a subnormal number, and it must not require rounding. The 652returned is a subnormal number, and it must not require rounding. The
653handling of underflow and overflow follows the IEC/IEEE Standard for Binary 653handling of underflow and overflow follows the IEC/IEEE Standard for Binary
654Floating-Point Arithmetic. 654Floating-Point Arithmetic.
655------------------------------------------------------------------------------- 655-------------------------------------------------------------------------------
656*/ 656*/
657static floatx80 657static floatx80
658 roundAndPackFloatx80( 658 roundAndPackFloatx80(
659 int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 659 int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
660 ) 660 )
661{ 661{
662 int8 roundingMode; 662 int8 roundingMode;
663 flag roundNearestEven, increment, isTiny; 663 flag roundNearestEven, increment, isTiny;
664 int64 roundIncrement, roundMask, roundBits; 664 uint64 roundIncrement, roundMask, roundBits;
665 665
666 roundingMode = float_rounding_mode; 666 roundingMode = float_rounding_mode;
667 roundNearestEven = ( roundingMode == float_round_nearest_even ); 667 roundNearestEven = ( roundingMode == float_round_nearest_even );
668 if ( roundingPrecision == 80 ) goto precision80; 668 if ( roundingPrecision == 80 ) goto precision80;
669 if ( roundingPrecision == 64 ) { 669 if ( roundingPrecision == 64 ) {
670 roundIncrement = LIT64( 0x0000000000000400 ); 670 roundIncrement = LIT64( 0x0000000000000400 );
671 roundMask = LIT64( 0x00000000000007FF ); 671 roundMask = LIT64( 0x00000000000007FF );
672 } 672 }
673 else if ( roundingPrecision == 32 ) { 673 else if ( roundingPrecision == 32 ) {
674 roundIncrement = LIT64( 0x0000008000000000 ); 674 roundIncrement = LIT64( 0x0000008000000000 );
675 roundMask = LIT64( 0x000000FFFFFFFFFF ); 675 roundMask = LIT64( 0x000000FFFFFFFFFF );
676 } 676 }
677 else { 677 else {
678 goto precision80; 678 goto precision80;
679 } 679 }
680 zSig0 |= ( zSig1 != 0 ); 680 zSig0 |= ( zSig1 != 0 );
681 if ( ! roundNearestEven ) { 681 if ( ! roundNearestEven ) {
682 if ( roundingMode == float_round_to_zero ) { 682 if ( roundingMode == float_round_to_zero ) {
683 roundIncrement = 0; 683 roundIncrement = 0;
684 } 684 }
685 else { 685 else {
686 roundIncrement = roundMask; 686 roundIncrement = roundMask;
687 if ( zSign ) { 687 if ( zSign ) {
688 if ( roundingMode == float_round_up ) roundIncrement = 0; 688 if ( roundingMode == float_round_up ) roundIncrement = 0;
689 } 689 }
690 else { 690 else {
691 if ( roundingMode == float_round_down ) roundIncrement = 0; 691 if ( roundingMode == float_round_down ) roundIncrement = 0;
692 } 692 }
693 } 693 }
694 } 694 }
695 roundBits = zSig0 & roundMask; 695 roundBits = zSig0 & roundMask;
696 if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) { 696 if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
697 if ( ( 0x7FFE < zExp ) 697 if ( ( 0x7FFE < zExp )
698 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) ) 698 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
699 ) { 699 ) {
700 goto overflow; 700 goto overflow;
701 } 701 }
702 if ( zExp <= 0 ) { 702 if ( zExp <= 0 ) {
703 isTiny = 703 isTiny =
704 ( float_detect_tininess == float_tininess_before_rounding ) 704 ( float_detect_tininess == float_tininess_before_rounding )
705 || ( zExp < 0 ) 705 || ( zExp < 0 )
706 || ( zSig0 <= zSig0 + roundIncrement ); 706 || ( zSig0 <= zSig0 + roundIncrement );
707 shift64RightJamming( zSig0, 1 - zExp, &zSig0 ); 707 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
708 zExp = 0; 708 zExp = 0;
709 roundBits = zSig0 & roundMask; 709 roundBits = zSig0 & roundMask;
710 if ( isTiny && roundBits ) float_raise( float_flag_underflow ); 710 if ( isTiny && roundBits ) float_raise( float_flag_underflow );
711 if ( roundBits ) set_float_exception_inexact_flag(); 711 if ( roundBits ) set_float_exception_inexact_flag();
712 zSig0 += roundIncrement; 712 zSig0 += roundIncrement;
713 if ( (sbits64) zSig0 < 0 ) zExp = 1; 713 if ( (sbits64) zSig0 < 0 ) zExp = 1;
714 roundIncrement = roundMask + 1; 714 roundIncrement = roundMask + 1;
715 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { 715 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
716 roundMask |= roundIncrement; 716 roundMask |= roundIncrement;
717 } 717 }
718 zSig0 &= ~ roundMask; 718 zSig0 &= ~ roundMask;
719 return packFloatx80( zSign, zExp, zSig0 ); 719 return packFloatx80( zSign, zExp, zSig0 );
720 } 720 }
721 } 721 }
722 if ( roundBits ) set_float_exception_inexact_flag(); 722 if ( roundBits ) set_float_exception_inexact_flag();
723 zSig0 += roundIncrement; 723 zSig0 += roundIncrement;
724 if ( zSig0 < roundIncrement ) { 724 if ( zSig0 < roundIncrement ) {
725 ++zExp; 725 ++zExp;
726 zSig0 = LIT64( 0x8000000000000000 ); 726 zSig0 = LIT64( 0x8000000000000000 );
727 } 727 }
728 roundIncrement = roundMask + 1; 728 roundIncrement = roundMask + 1;
729 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { 729 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
730 roundMask |= roundIncrement; 730 roundMask |= roundIncrement;
731 } 731 }
732 zSig0 &= ~ roundMask; 732 zSig0 &= ~ roundMask;
733 if ( zSig0 == 0 ) zExp = 0; 733 if ( zSig0 == 0 ) zExp = 0;
734 return packFloatx80( zSign, zExp, zSig0 ); 734 return packFloatx80( zSign, zExp, zSig0 );
735 precision80: 735 precision80:
736 increment = ( (sbits64) zSig1 < 0 ); 736 increment = ( (sbits64) zSig1 < 0 );
737 if ( ! roundNearestEven ) { 737 if ( ! roundNearestEven ) {
738 if ( roundingMode == float_round_to_zero ) { 738 if ( roundingMode == float_round_to_zero ) {
739 increment = 0; 739 increment = 0;
740 } 740 }
741 else { 741 else {
742 if ( zSign ) { 742 if ( zSign ) {
743 increment = ( roundingMode == float_round_down ) && zSig1; 743 increment = ( roundingMode == float_round_down ) && zSig1;
744 } 744 }
745 else { 745 else {
746 increment = ( roundingMode == float_round_up ) && zSig1; 746 increment = ( roundingMode == float_round_up ) && zSig1;
747 } 747 }
748 } 748 }
749 } 749 }
750 if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) { 750 if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) {
751 if ( ( 0x7FFE < zExp ) 751 if ( ( 0x7FFE < zExp )
752 || ( ( zExp == 0x7FFE ) 752 || ( ( zExp == 0x7FFE )
753 && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) ) 753 && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
754 && increment 754 && increment
755 ) 755 )
756 ) { 756 ) {
757 roundMask = 0; 757 roundMask = 0;
758 overflow: 758 overflow:
759 float_raise( float_flag_overflow | float_flag_inexact ); 759 float_raise( float_flag_overflow | float_flag_inexact );
760 if ( ( roundingMode == float_round_to_zero ) 760 if ( ( roundingMode == float_round_to_zero )
761 || ( zSign && ( roundingMode == float_round_up ) ) 761 || ( zSign && ( roundingMode == float_round_up ) )
762 || ( ! zSign && ( roundingMode == float_round_down ) ) 762 || ( ! zSign && ( roundingMode == float_round_down ) )
763 ) { 763 ) {
764 return packFloatx80( zSign, 0x7FFE, ~ roundMask ); 764 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
765 } 765 }
766 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 766 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
767 } 767 }
768 if ( zExp <= 0 ) { 768 if ( zExp <= 0 ) {
769 isTiny = 769 isTiny =
770 ( float_detect_tininess == float_tininess_before_rounding ) 770 ( float_detect_tininess == float_tininess_before_rounding )
771 || ( zExp < 0 ) 771 || ( zExp < 0 )
772 || ! increment 772 || ! increment
773 || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) ); 773 || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
774 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 ); 774 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
775 zExp = 0; 775 zExp = 0;
776 if ( isTiny && zSig1 ) float_raise( float_flag_underflow ); 776 if ( isTiny && zSig1 ) float_raise( float_flag_underflow );
777 if ( zSig1 ) set_float_exception_inexact_flag(); 777 if ( zSig1 ) set_float_exception_inexact_flag();
778 if ( roundNearestEven ) { 778 if ( roundNearestEven ) {
779 increment = ( (sbits64) zSig1 < 0 ); 779 increment = ( (sbits64) zSig1 < 0 );
780 } 780 }
781 else { 781 else {
782 if ( zSign ) { 782 if ( zSign ) {
783 increment = ( roundingMode == float_round_down ) && zSig1; 783 increment = ( roundingMode == float_round_down ) && zSig1;
784 } 784 }
785 else { 785 else {
786 increment = ( roundingMode == float_round_up ) && zSig1; 786 increment = ( roundingMode == float_round_up ) && zSig1;
787 } 787 }
788 } 788 }
789 if ( increment ) { 789 if ( increment ) {
790 ++zSig0; 790 ++zSig0;
791 zSig0 &= 791 zSig0 &=
792 ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven ); 792 ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
793 if ( (sbits64) zSig0 < 0 ) zExp = 1; 793 if ( (sbits64) zSig0 < 0 ) zExp = 1;
794 } 794 }
795 return packFloatx80( zSign, zExp, zSig0 ); 795 return packFloatx80( zSign, zExp, zSig0 );
796 } 796 }
797 } 797 }
798 if ( zSig1 ) set_float_exception_inexact_flag(); 798 if ( zSig1 ) set_float_exception_inexact_flag();
799 if ( increment ) { 799 if ( increment ) {
800 ++zSig0; 800 ++zSig0;
801 if ( zSig0 == 0 ) { 801 if ( zSig0 == 0 ) {
802 ++zExp; 802 ++zExp;
803 zSig0 = LIT64( 0x8000000000000000 ); 803 zSig0 = LIT64( 0x8000000000000000 );
804 } 804 }
805 else { 805 else {
806 zSig0 &= ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven ); 806 zSig0 &= ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven );
807 } 807 }
808 } 808 }
809 else { 809 else {
810 if ( zSig0 == 0 ) zExp = 0; 810 if ( zSig0 == 0 ) zExp = 0;
811 } 811 }
812 return packFloatx80( zSign, zExp, zSig0 ); 812 return packFloatx80( zSign, zExp, zSig0 );
813 813
814} 814}
815 815
816/* 816/*
817------------------------------------------------------------------------------- 817-------------------------------------------------------------------------------
818Takes an abstract floating-point value having sign `zSign', exponent 818Takes an abstract floating-point value having sign `zSign', exponent
819`zExp', and significand formed by the concatenation of `zSig0' and `zSig1', 819`zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
820and returns the proper extended double-precision floating-point value 820and returns the proper extended double-precision floating-point value
821corresponding to the abstract input. This routine is just like 821corresponding to the abstract input. This routine is just like
822`roundAndPackFloatx80' except that the input significand does not have to be 822`roundAndPackFloatx80' except that the input significand does not have to be
823normalized. 823normalized.
824------------------------------------------------------------------------------- 824-------------------------------------------------------------------------------
825*/ 825*/
826static floatx80 826static floatx80
827 normalizeRoundAndPackFloatx80( 827 normalizeRoundAndPackFloatx80(
828 int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 828 int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1
829 ) 829 )
830{ 830{
831 int8 shiftCount; 831 int8 shiftCount;
832 832
833 if ( zSig0 == 0 ) { 833 if ( zSig0 == 0 ) {
834 zSig0 = zSig1; 834 zSig0 = zSig1;
835 zSig1 = 0; 835 zSig1 = 0;
836 zExp -= 64; 836 zExp -= 64;
837 } 837 }
838 shiftCount = countLeadingZeros64( zSig0 ); 838 shiftCount = countLeadingZeros64( zSig0 );
839 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); 839 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
840 zExp -= shiftCount; 840 zExp -= shiftCount;
841 return 841 return
842 roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 ); 842 roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 );
843 843
844} 844}
845 845
846#endif 846#endif
847 847
848#ifdef FLOAT128 848#ifdef FLOAT128
849 849
850/* 850/*
851------------------------------------------------------------------------------- 851-------------------------------------------------------------------------------
852Returns the least-significant 64 fraction bits of the quadruple-precision 852Returns the least-significant 64 fraction bits of the quadruple-precision
853floating-point value `a'. 853floating-point value `a'.
854------------------------------------------------------------------------------- 854-------------------------------------------------------------------------------
855*/ 855*/
856INLINE bits64 extractFloat128Frac1( float128 a ) 856INLINE bits64 extractFloat128Frac1( float128 a )
857{ 857{
858 858
859 return a.low; 859 return a.low;
860 860
861} 861}
862 862
863/* 863/*
864------------------------------------------------------------------------------- 864-------------------------------------------------------------------------------
865Returns the most-significant 48 fraction bits of the quadruple-precision 865Returns the most-significant 48 fraction bits of the quadruple-precision
866floating-point value `a'. 866floating-point value `a'.
867------------------------------------------------------------------------------- 867-------------------------------------------------------------------------------
868*/ 868*/
869INLINE bits64 extractFloat128Frac0( float128 a ) 869INLINE bits64 extractFloat128Frac0( float128 a )
870{ 870{
871 871
872 return a.high & LIT64( 0x0000FFFFFFFFFFFF ); 872 return a.high & LIT64( 0x0000FFFFFFFFFFFF );
873 873
874} 874}
875 875
876/* 876/*
877------------------------------------------------------------------------------- 877-------------------------------------------------------------------------------
878Returns the exponent bits of the quadruple-precision floating-point value 878Returns the exponent bits of the quadruple-precision floating-point value
879`a'. 879`a'.
880------------------------------------------------------------------------------- 880-------------------------------------------------------------------------------
881*/ 881*/
882INLINE int32 extractFloat128Exp( float128 a ) 882INLINE int32 extractFloat128Exp( float128 a )
883{ 883{
884 884
885 return (int32)((a.high >> 48) & 0x7FFF); 885 return (int32)((a.high >> 48) & 0x7FFF);
886 886
887} 887}
888 888
889/* 889/*
890------------------------------------------------------------------------------- 890-------------------------------------------------------------------------------
891Returns the sign bit of the quadruple-precision floating-point value `a'. 891Returns the sign bit of the quadruple-precision floating-point value `a'.
892------------------------------------------------------------------------------- 892-------------------------------------------------------------------------------
893*/ 893*/
894INLINE flag extractFloat128Sign( float128 a ) 894INLINE flag extractFloat128Sign( float128 a )
895{ 895{
896 896
897 return (flag)(a.high >> 63); 897 return (flag)(a.high >> 63);
898 898
899} 899}
900 900
901/* 901/*
902------------------------------------------------------------------------------- 902-------------------------------------------------------------------------------
903Normalizes the subnormal quadruple-precision floating-point value 903Normalizes the subnormal quadruple-precision floating-point value
904represented by the denormalized significand formed by the concatenation of 904represented by the denormalized significand formed by the concatenation of
905`aSig0' and `aSig1'. The normalized exponent is stored at the location 905`aSig0' and `aSig1'. The normalized exponent is stored at the location
906pointed to by `zExpPtr'. The most significant 49 bits of the normalized 906pointed to by `zExpPtr'. The most significant 49 bits of the normalized
907significand are stored at the location pointed to by `zSig0Ptr', and the 907significand are stored at the location pointed to by `zSig0Ptr', and the
908least significant 64 bits of the normalized significand are stored at the 908least significant 64 bits of the normalized significand are stored at the
909location pointed to by `zSig1Ptr'. 909location pointed to by `zSig1Ptr'.
910------------------------------------------------------------------------------- 910-------------------------------------------------------------------------------
911*/ 911*/
912static void 912static void
913 normalizeFloat128Subnormal( 913 normalizeFloat128Subnormal(
914 bits64 aSig0, 914 bits64 aSig0,
915 bits64 aSig1, 915 bits64 aSig1,
916 int32 *zExpPtr, 916 int32 *zExpPtr,
917 bits64 *zSig0Ptr, 917 bits64 *zSig0Ptr,
918 bits64 *zSig1Ptr 918 bits64 *zSig1Ptr
919 ) 919 )
920{ 920{
921 int8 shiftCount; 921 int8 shiftCount;
922 922
923 if ( aSig0 == 0 ) { 923 if ( aSig0 == 0 ) {
924 shiftCount = countLeadingZeros64( aSig1 ) - 15; 924 shiftCount = countLeadingZeros64( aSig1 ) - 15;
925 if ( shiftCount < 0 ) { 925 if ( shiftCount < 0 ) {
926 *zSig0Ptr = aSig1>>( - shiftCount ); 926 *zSig0Ptr = aSig1>>( - shiftCount );
927 *zSig1Ptr = aSig1<<( shiftCount & 63 ); 927 *zSig1Ptr = aSig1<<( shiftCount & 63 );
928 } 928 }
929 else { 929 else {
930 *zSig0Ptr = aSig1<<shiftCount; 930 *zSig0Ptr = aSig1<<shiftCount;
931 *zSig1Ptr = 0; 931 *zSig1Ptr = 0;
932 } 932 }
933 *zExpPtr = - shiftCount - 63; 933 *zExpPtr = - shiftCount - 63;
934 } 934 }
935 else { 935 else {
936 shiftCount = countLeadingZeros64( aSig0 ) - 15; 936 shiftCount = countLeadingZeros64( aSig0 ) - 15;
937 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr ); 937 shortShift128Left( aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr );
938 *zExpPtr = 1 - shiftCount; 938 *zExpPtr = 1 - shiftCount;
939 } 939 }
940 940
941} 941}
942 942
943/* 943/*
944------------------------------------------------------------------------------- 944-------------------------------------------------------------------------------
945Packs the sign `zSign', the exponent `zExp', and the significand formed 945Packs the sign `zSign', the exponent `zExp', and the significand formed
946by the concatenation of `zSig0' and `zSig1' into a quadruple-precision 946by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
947floating-point value, returning the result. After being shifted into the 947floating-point value, returning the result. After being shifted into the
948proper positions, the three fields `zSign', `zExp', and `zSig0' are simply 948proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
949added together to form the most significant 32 bits of the result. This 949added together to form the most significant 32 bits of the result. This
950means that any integer portion of `zSig0' will be added into the exponent. 950means that any integer portion of `zSig0' will be added into the exponent.
951Since a properly normalized significand will have an integer portion equal 951Since a properly normalized significand will have an integer portion equal
952to 1, the `zExp' input should be 1 less than the desired result exponent 952to 1, the `zExp' input should be 1 less than the desired result exponent
953whenever `zSig0' and `zSig1' concatenated form a complete, normalized 953whenever `zSig0' and `zSig1' concatenated form a complete, normalized
954significand. 954significand.
955------------------------------------------------------------------------------- 955-------------------------------------------------------------------------------
956*/ 956*/
957INLINE float128 957INLINE float128
958 packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 ) 958 packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
959{ 959{
960 float128 z; 960 float128 z;
961 961
962 z.low = zSig1; 962 z.low = zSig1;
963 z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0; 963 z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0;
964 return z; 964 return z;
965 965
966} 966}
967 967
968/* 968/*
969------------------------------------------------------------------------------- 969-------------------------------------------------------------------------------
970Takes an abstract floating-point value having sign `zSign', exponent `zExp', 970Takes an abstract floating-point value having sign `zSign', exponent `zExp',
971and extended significand formed by the concatenation of `zSig0', `zSig1', 971and extended significand formed by the concatenation of `zSig0', `zSig1',
972and `zSig2', and returns the proper quadruple-precision floating-point value 972and `zSig2', and returns the proper quadruple-precision floating-point value
973corresponding to the abstract input. Ordinarily, the abstract value is 973corresponding to the abstract input. Ordinarily, the abstract value is
974simply rounded and packed into the quadruple-precision format, with the 974simply rounded and packed into the quadruple-precision format, with the
975inexact exception raised if the abstract input cannot be represented 975inexact exception raised if the abstract input cannot be represented
976exactly. However, if the abstract value is too large, the overflow and 976exactly. However, if the abstract value is too large, the overflow and
977inexact exceptions are raised and an infinity or maximal finite value is 977inexact exceptions are raised and an infinity or maximal finite value is
978returned. If the abstract value is too small, the input value is rounded to 978returned. If the abstract value is too small, the input value is rounded to
979a subnormal number, and the underflow and inexact exceptions are raised if 979a subnormal number, and the underflow and inexact exceptions are raised if
980the abstract input cannot be represented exactly as a subnormal quadruple- 980the abstract input cannot be represented exactly as a subnormal quadruple-
981precision floating-point number. 981precision floating-point number.
982 The input significand must be normalized or smaller. If the input 982 The input significand must be normalized or smaller. If the input
983significand is not normalized, `zExp' must be 0; in that case, the result 983significand is not normalized, `zExp' must be 0; in that case, the result
984returned is a subnormal number, and it must not require rounding. In the 984returned is a subnormal number, and it must not require rounding. In the
985usual case that the input significand is normalized, `zExp' must be 1 less 985usual case that the input significand is normalized, `zExp' must be 1 less
986than the ``true'' floating-point exponent. The handling of underflow and 986than the ``true'' floating-point exponent. The handling of underflow and
987overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 987overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
988------------------------------------------------------------------------------- 988-------------------------------------------------------------------------------
989*/ 989*/
990static float128 990static float128
991 roundAndPackFloat128( 991 roundAndPackFloat128(
992 flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 ) 992 flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 )
993{ 993{
994 int8 roundingMode; 994 int8 roundingMode;
995 flag roundNearestEven, increment, isTiny; 995 flag roundNearestEven, increment, isTiny;
996 996
997 roundingMode = float_rounding_mode; 997 roundingMode = float_rounding_mode;
998 roundNearestEven = ( roundingMode == float_round_nearest_even ); 998 roundNearestEven = ( roundingMode == float_round_nearest_even );
999 increment = ( (sbits64) zSig2 < 0 ); 999 increment = ( (sbits64) zSig2 < 0 );
1000 if ( ! roundNearestEven ) { 1000 if ( ! roundNearestEven ) {
1001 if ( roundingMode == float_round_to_zero ) { 1001 if ( roundingMode == float_round_to_zero ) {
1002 increment = 0; 1002 increment = 0;
1003 } 1003 }
1004 else { 1004 else {
1005 if ( zSign ) { 1005 if ( zSign ) {
1006 increment = ( roundingMode == float_round_down ) && zSig2; 1006 increment = ( roundingMode == float_round_down ) && zSig2;
1007 } 1007 }
1008 else { 1008 else {
1009 increment = ( roundingMode == float_round_up ) && zSig2; 1009 increment = ( roundingMode == float_round_up ) && zSig2;
1010 } 1010 }
1011 } 1011 }
1012 } 1012 }
1013 if ( 0x7FFD <= (bits32) zExp ) { 1013 if ( 0x7FFD <= (bits32) zExp ) {
1014 if ( ( 0x7FFD < zExp ) 1014 if ( ( 0x7FFD < zExp )
1015 || ( ( zExp == 0x7FFD ) 1015 || ( ( zExp == 0x7FFD )
1016 && eq128( 1016 && eq128(
1017 LIT64( 0x0001FFFFFFFFFFFF ), 1017 LIT64( 0x0001FFFFFFFFFFFF ),
1018 LIT64( 0xFFFFFFFFFFFFFFFF ), 1018 LIT64( 0xFFFFFFFFFFFFFFFF ),
1019 zSig0, 1019 zSig0,
1020 zSig1 1020 zSig1
1021 ) 1021 )
1022 && increment 1022 && increment
1023 ) 1023 )
1024 ) { 1024 ) {
1025 float_raise( float_flag_overflow | float_flag_inexact ); 1025 float_raise( float_flag_overflow | float_flag_inexact );
1026 if ( ( roundingMode == float_round_to_zero ) 1026 if ( ( roundingMode == float_round_to_zero )
1027 || ( zSign && ( roundingMode == float_round_up ) ) 1027 || ( zSign && ( roundingMode == float_round_up ) )
1028 || ( ! zSign && ( roundingMode == float_round_down ) ) 1028 || ( ! zSign && ( roundingMode == float_round_down ) )
1029 ) { 1029 ) {
1030 return 1030 return
1031 packFloat128( 1031 packFloat128(
1032 zSign, 1032 zSign,
1033 0x7FFE, 1033 0x7FFE,
1034 LIT64( 0x0000FFFFFFFFFFFF ), 1034 LIT64( 0x0000FFFFFFFFFFFF ),
1035 LIT64( 0xFFFFFFFFFFFFFFFF ) 1035 LIT64( 0xFFFFFFFFFFFFFFFF )
1036 ); 1036 );
1037 } 1037 }
1038 return packFloat128( zSign, 0x7FFF, 0, 0 ); 1038 return packFloat128( zSign, 0x7FFF, 0, 0 );
1039 } 1039 }
1040 if ( zExp < 0 ) { 1040 if ( zExp < 0 ) {
1041 isTiny = 1041 isTiny =
1042 ( float_detect_tininess == float_tininess_before_rounding ) 1042 ( float_detect_tininess == float_tininess_before_rounding )
1043 || ( zExp < -1 ) 1043 || ( zExp < -1 )
1044 || ! increment 1044 || ! increment
1045 || lt128( 1045 || lt128(
1046 zSig0, 1046 zSig0,
1047 zSig1, 1047 zSig1,
1048 LIT64( 0x0001FFFFFFFFFFFF ), 1048 LIT64( 0x0001FFFFFFFFFFFF ),
1049 LIT64( 0xFFFFFFFFFFFFFFFF ) 1049 LIT64( 0xFFFFFFFFFFFFFFFF )
1050 ); 1050 );
1051 shift128ExtraRightJamming( 1051 shift128ExtraRightJamming(
1052 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 ); 1052 zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 );
1053 zExp = 0; 1053 zExp = 0;
1054 if ( isTiny && zSig2 ) float_raise( float_flag_underflow ); 1054 if ( isTiny && zSig2 ) float_raise( float_flag_underflow );
1055 if ( roundNearestEven ) { 1055 if ( roundNearestEven ) {
1056 increment = ( (sbits64) zSig2 < 0 ); 1056 increment = ( (sbits64) zSig2 < 0 );
1057 } 1057 }
1058 else { 1058 else {
1059 if ( zSign ) { 1059 if ( zSign ) {
1060 increment = ( roundingMode == float_round_down ) && zSig2; 1060 increment = ( roundingMode == float_round_down ) && zSig2;
1061 } 1061 }
1062 else { 1062 else {
1063 increment = ( roundingMode == float_round_up ) && zSig2; 1063 increment = ( roundingMode == float_round_up ) && zSig2;
1064 } 1064 }
1065 } 1065 }
1066 } 1066 }
1067 } 1067 }
1068 if ( zSig2 ) set_float_exception_inexact_flag(); 1068 if ( zSig2 ) set_float_exception_inexact_flag();
1069 if ( increment ) { 1069 if ( increment ) {
1070 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 ); 1070 add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 );
1071 zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven ); 1071 zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven );
1072 } 1072 }
1073 else { 1073 else {
1074 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0; 1074 if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0;
1075 } 1075 }
1076 return packFloat128( zSign, zExp, zSig0, zSig1 ); 1076 return packFloat128( zSign, zExp, zSig0, zSig1 );
1077 1077
1078} 1078}
1079 1079
1080/* 1080/*
1081------------------------------------------------------------------------------- 1081-------------------------------------------------------------------------------
1082Takes an abstract floating-point value having sign `zSign', exponent `zExp', 1082Takes an abstract floating-point value having sign `zSign', exponent `zExp',
1083and significand formed by the concatenation of `zSig0' and `zSig1', and 1083and significand formed by the concatenation of `zSig0' and `zSig1', and
1084returns the proper quadruple-precision floating-point value corresponding 1084returns the proper quadruple-precision floating-point value corresponding
1085to the abstract input. This routine is just like `roundAndPackFloat128' 1085to the abstract input. This routine is just like `roundAndPackFloat128'
1086except that the input significand has fewer bits and does not have to be 1086except that the input significand has fewer bits and does not have to be
1087normalized. In all cases, `zExp' must be 1 less than the ``true'' floating- 1087normalized. In all cases, `zExp' must be 1 less than the ``true'' floating-
1088point exponent. 1088point exponent.
1089------------------------------------------------------------------------------- 1089-------------------------------------------------------------------------------
1090*/ 1090*/
1091static float128 1091static float128
1092 normalizeRoundAndPackFloat128( 1092 normalizeRoundAndPackFloat128(
1093 flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 ) 1093 flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 )
1094{ 1094{
1095 int8 shiftCount; 1095 int8 shiftCount;
1096 bits64 zSig2; 1096 bits64 zSig2;
1097 1097
1098 if ( zSig0 == 0 ) { 1098 if ( zSig0 == 0 ) {
1099 zSig0 = zSig1; 1099 zSig0 = zSig1;
1100 zSig1 = 0; 1100 zSig1 = 0;
1101 zExp -= 64; 1101 zExp -= 64;
1102 } 1102 }
1103 shiftCount = countLeadingZeros64( zSig0 ) - 15; 1103 shiftCount = countLeadingZeros64( zSig0 ) - 15;
1104 if ( 0 <= shiftCount ) { 1104 if ( 0 <= shiftCount ) {
1105 zSig2 = 0; 1105 zSig2 = 0;
1106 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); 1106 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1107 } 1107 }
1108 else { 1108 else {
1109 shift128ExtraRightJamming( 1109 shift128ExtraRightJamming(
1110 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 ); 1110 zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 );
1111 } 1111 }
1112 zExp -= shiftCount; 1112 zExp -= shiftCount;
1113 return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 ); 1113 return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
1114 1114
1115} 1115}
1116 1116
1117#endif 1117#endif
1118 1118
1119/* 1119/*
1120------------------------------------------------------------------------------- 1120-------------------------------------------------------------------------------
1121Returns the result of converting the 32-bit two's complement integer `a' 1121Returns the result of converting the 32-bit two's complement integer `a'
1122to the single-precision floating-point format. The conversion is performed 1122to the single-precision floating-point format. The conversion is performed
1123according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1123according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1124------------------------------------------------------------------------------- 1124-------------------------------------------------------------------------------
1125*/ 1125*/
1126float32 int32_to_float32( int32 a ) 1126float32 int32_to_float32( int32 a )
1127{ 1127{
1128 flag zSign; 1128 flag zSign;
1129 1129
1130 if ( a == 0 ) return 0; 1130 if ( a == 0 ) return 0;
1131 if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 ); 1131 if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 );
1132 zSign = ( a < 0 ); 1132 zSign = ( a < 0 );
1133 return normalizeRoundAndPackFloat32(zSign, 0x9C, (uint32)(zSign ? - a : a)); 1133 return normalizeRoundAndPackFloat32(zSign, 0x9C, (uint32)(zSign ? - a : a));
1134 1134
1135} 1135}
1136 1136
1137float32 uint32_to_float32( uint32 a ) 1137float32 uint32_to_float32( uint32 a )
1138{ 1138{
1139 if ( a == 0 ) return 0; 1139 if ( a == 0 ) return 0;
1140 if ( a & (bits32) 0x80000000 ) 1140 if ( a & (bits32) 0x80000000 )
1141 return normalizeRoundAndPackFloat32( 0, 0x9D, a >> 1 ); 1141 return normalizeRoundAndPackFloat32( 0, 0x9D, a >> 1 );
1142 return normalizeRoundAndPackFloat32( 0, 0x9C, a ); 1142 return normalizeRoundAndPackFloat32( 0, 0x9C, a );
1143} 1143}
1144 1144
1145 1145
1146/* 1146/*
1147------------------------------------------------------------------------------- 1147-------------------------------------------------------------------------------
1148Returns the result of converting the 32-bit two's complement integer `a' 1148Returns the result of converting the 32-bit two's complement integer `a'
1149to the double-precision floating-point format. The conversion is performed 1149to the double-precision floating-point format. The conversion is performed
1150according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1150according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1151------------------------------------------------------------------------------- 1151-------------------------------------------------------------------------------
1152*/ 1152*/
1153float64 int32_to_float64( int32 a ) 1153float64 int32_to_float64( int32 a )
1154{ 1154{
1155 flag zSign; 1155 flag zSign;
1156 uint32 absA; 1156 uint32 absA;
1157 int8 shiftCount; 1157 int8 shiftCount;
1158 bits64 zSig; 1158 bits64 zSig;
1159 1159
1160 if ( a == 0 ) return 0; 1160 if ( a == 0 ) return 0;
1161 zSign = ( a < 0 ); 1161 zSign = ( a < 0 );
1162 absA = zSign ? - a : a; 1162 absA = zSign ? - a : a;
1163 shiftCount = countLeadingZeros32( absA ) + 21; 1163 shiftCount = countLeadingZeros32( absA ) + 21;
1164 zSig = absA; 1164 zSig = absA;
1165 return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount ); 1165 return packFloat64( zSign, 0x432 - shiftCount, zSig<<shiftCount );
1166 1166
1167} 1167}
1168 1168
1169float64 uint32_to_float64( uint32 a ) 1169float64 uint32_to_float64( uint32 a )
1170{ 1170{
1171 int8 shiftCount; 1171 int8 shiftCount;
1172 bits64 zSig = a; 1172 bits64 zSig = a;
1173 1173
1174 if ( a == 0 ) return 0; 1174 if ( a == 0 ) return 0;
1175 shiftCount = countLeadingZeros32( a ) + 21; 1175 shiftCount = countLeadingZeros32( a ) + 21;
1176 return packFloat64( 0, 0x432 - shiftCount, zSig<<shiftCount ); 1176 return packFloat64( 0, 0x432 - shiftCount, zSig<<shiftCount );
1177 1177
1178} 1178}
1179 1179
1180#ifdef FLOATX80 1180#ifdef FLOATX80
1181 1181
1182/* 1182/*
1183------------------------------------------------------------------------------- 1183-------------------------------------------------------------------------------
1184Returns the result of converting the 32-bit two's complement integer `a' 1184Returns the result of converting the 32-bit two's complement integer `a'
1185to the extended double-precision floating-point format. The conversion 1185to the extended double-precision floating-point format. The conversion
1186is performed according to the IEC/IEEE Standard for Binary Floating-Point 1186is performed according to the IEC/IEEE Standard for Binary Floating-Point
1187Arithmetic. 1187Arithmetic.
1188------------------------------------------------------------------------------- 1188-------------------------------------------------------------------------------
1189*/ 1189*/
1190floatx80 int32_to_floatx80( int32 a ) 1190floatx80 int32_to_floatx80( int32 a )
1191{ 1191{
1192 flag zSign; 1192 flag zSign;
1193 uint32 absA; 1193 uint32 absA;
1194 int8 shiftCount; 1194 int8 shiftCount;
1195 bits64 zSig; 1195 bits64 zSig;
1196 1196
1197 if ( a == 0 ) return packFloatx80( 0, 0, 0 ); 1197 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1198 zSign = ( a < 0 ); 1198 zSign = ( a < 0 );
1199 absA = zSign ? - a : a; 1199 absA = zSign ? - a : a;
1200 shiftCount = countLeadingZeros32( absA ) + 32; 1200 shiftCount = countLeadingZeros32( absA ) + 32;
1201 zSig = absA; 1201 zSig = absA;
1202 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount ); 1202 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
1203 1203
1204} 1204}
1205 1205
1206floatx80 uint32_to_floatx80( uint32 a ) 1206floatx80 uint32_to_floatx80( uint32 a )
1207{ 1207{
1208 int8 shiftCount; 1208 int8 shiftCount;
1209 bits64 zSig = a; 1209 bits64 zSig = a;
1210 1210
1211 if ( a == 0 ) return packFloatx80( 0, 0, 0 ); 1211 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1212 shiftCount = countLeadingZeros32( a ) + 32; 1212 shiftCount = countLeadingZeros32( a ) + 32;
1213 return packFloatx80( 0, 0x403E - shiftCount, zSig<<shiftCount ); 1213 return packFloatx80( 0, 0x403E - shiftCount, zSig<<shiftCount );
1214 1214
1215} 1215}
1216 1216
1217#endif 1217#endif
1218 1218
1219#ifdef FLOAT128 1219#ifdef FLOAT128
1220 1220
1221/* 1221/*
1222------------------------------------------------------------------------------- 1222-------------------------------------------------------------------------------
1223Returns the result of converting the 32-bit two's complement integer `a' to 1223Returns the result of converting the 32-bit two's complement integer `a' to
1224the quadruple-precision floating-point format. The conversion is performed 1224the quadruple-precision floating-point format. The conversion is performed
1225according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1225according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1226------------------------------------------------------------------------------- 1226-------------------------------------------------------------------------------
1227*/ 1227*/
1228float128 int32_to_float128( int32 a ) 1228float128 int32_to_float128( int32 a )
1229{ 1229{
1230 flag zSign; 1230 flag zSign;
1231 uint32 absA; 1231 uint32 absA;
1232 int8 shiftCount; 1232 int8 shiftCount;
1233 bits64 zSig0; 1233 bits64 zSig0;
1234 1234
1235 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 ); 1235 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1236 zSign = ( a < 0 ); 1236 zSign = ( a < 0 );
1237 absA = zSign ? - a : a; 1237 absA = zSign ? - a : a;
1238 shiftCount = countLeadingZeros32( absA ) + 17; 1238 shiftCount = countLeadingZeros32( absA ) + 17;
1239 zSig0 = absA; 1239 zSig0 = absA;
1240 return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 ); 1240 return packFloat128( zSign, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
1241 1241
1242} 1242}
1243 1243
1244float128 uint32_to_float128( uint32 a ) 1244float128 uint32_to_float128( uint32 a )
1245{ 1245{
1246 int8 shiftCount; 1246 int8 shiftCount;
1247 bits64 zSig0 = a; 1247 bits64 zSig0 = a;
1248 1248
1249 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 ); 1249 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1250 shiftCount = countLeadingZeros32( a ) + 17; 1250 shiftCount = countLeadingZeros32( a ) + 17;
1251 return packFloat128( 0, 0x402E - shiftCount, zSig0<<shiftCount, 0 ); 1251 return packFloat128( 0, 0x402E - shiftCount, zSig0<<shiftCount, 0 );
1252 1252
1253} 1253}
1254 1254
1255#endif 1255#endif
1256 1256
1257#ifndef SOFTFLOAT_FOR_GCC /* __floatdi?f is in libgcc2.c */ 1257#ifndef SOFTFLOAT_FOR_GCC /* __floatdi?f is in libgcc2.c */
1258/* 1258/*
1259------------------------------------------------------------------------------- 1259-------------------------------------------------------------------------------
1260Returns the result of converting the 64-bit two's complement integer `a' 1260Returns the result of converting the 64-bit two's complement integer `a'
1261to the single-precision floating-point format. The conversion is performed 1261to the single-precision floating-point format. The conversion is performed
1262according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1262according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1263------------------------------------------------------------------------------- 1263-------------------------------------------------------------------------------
1264*/ 1264*/
1265float32 int64_to_float32( int64 a ) 1265float32 int64_to_float32( int64 a )
1266{ 1266{
1267 flag zSign; 1267 flag zSign;
1268 uint64 absA; 1268 uint64 absA;
1269 int8 shiftCount; 1269 int8 shiftCount;
1270 1270
1271 if ( a == 0 ) return 0; 1271 if ( a == 0 ) return 0;
1272 zSign = ( a < 0 ); 1272 zSign = ( a < 0 );
1273 absA = zSign ? - a : a; 1273 absA = zSign ? - a : a;
1274 shiftCount = countLeadingZeros64( absA ) - 40; 1274 shiftCount = countLeadingZeros64( absA ) - 40;
1275 if ( 0 <= shiftCount ) { 1275 if ( 0 <= shiftCount ) {
1276 return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount ); 1276 return packFloat32( zSign, 0x95 - shiftCount, absA<<shiftCount );
1277 } 1277 }
1278 else { 1278 else {
1279 shiftCount += 7; 1279 shiftCount += 7;
1280 if ( shiftCount < 0 ) { 1280 if ( shiftCount < 0 ) {
1281 shift64RightJamming( absA, - shiftCount, &absA ); 1281 shift64RightJamming( absA, - shiftCount, &absA );
1282 } 1282 }
1283 else { 1283 else {
1284 absA <<= shiftCount; 1284 absA <<= shiftCount;
1285 } 1285 }
1286 return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA ); 1286 return roundAndPackFloat32( zSign, 0x9C - shiftCount, absA );
1287 } 1287 }
1288 1288
1289} 1289}
1290 1290
1291/* 1291/*
1292------------------------------------------------------------------------------- 1292-------------------------------------------------------------------------------
1293Returns the result of converting the 64-bit two's complement integer `a' 1293Returns the result of converting the 64-bit two's complement integer `a'
1294to the double-precision floating-point format. The conversion is performed 1294to the double-precision floating-point format. The conversion is performed
1295according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1295according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1296------------------------------------------------------------------------------- 1296-------------------------------------------------------------------------------
1297*/ 1297*/
1298float64 int64_to_float64( int64 a ) 1298float64 int64_to_float64( int64 a )
1299{ 1299{
1300 flag zSign; 1300 flag zSign;
1301 1301
1302 if ( a == 0 ) return 0; 1302 if ( a == 0 ) return 0;
1303 if ( a == (sbits64) LIT64( 0x8000000000000000 ) ) { 1303 if ( a == (sbits64) LIT64( 0x8000000000000000 ) ) {
1304 return packFloat64( 1, 0x43E, 0 ); 1304 return packFloat64( 1, 0x43E, 0 );
1305 } 1305 }
1306 zSign = ( a < 0 ); 1306 zSign = ( a < 0 );
1307 return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a ); 1307 return normalizeRoundAndPackFloat64( zSign, 0x43C, zSign ? - a : a );
1308 1308
1309} 1309}
1310 1310
1311#ifdef FLOATX80 1311#ifdef FLOATX80
1312 1312
1313/* 1313/*
1314------------------------------------------------------------------------------- 1314-------------------------------------------------------------------------------
1315Returns the result of converting the 64-bit two's complement integer `a' 1315Returns the result of converting the 64-bit two's complement integer `a'
1316to the extended double-precision floating-point format. The conversion 1316to the extended double-precision floating-point format. The conversion
1317is performed according to the IEC/IEEE Standard for Binary Floating-Point 1317is performed according to the IEC/IEEE Standard for Binary Floating-Point
1318Arithmetic. 1318Arithmetic.
1319------------------------------------------------------------------------------- 1319-------------------------------------------------------------------------------
1320*/ 1320*/
1321floatx80 int64_to_floatx80( int64 a ) 1321floatx80 int64_to_floatx80( int64 a )
1322{ 1322{
1323 flag zSign; 1323 flag zSign;
1324 uint64 absA; 1324 uint64 absA;
1325 int8 shiftCount; 1325 int8 shiftCount;
1326 1326
1327 if ( a == 0 ) return packFloatx80( 0, 0, 0 ); 1327 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1328 zSign = ( a < 0 ); 1328 zSign = ( a < 0 );
1329 absA = zSign ? - a : a; 1329 absA = zSign ? - a : a;
1330 shiftCount = countLeadingZeros64( absA ); 1330 shiftCount = countLeadingZeros64( absA );
1331 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount ); 1331 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );
1332 1332
1333} 1333}
1334 1334
1335#endif 1335#endif
1336 1336
1337#endif /* !SOFTFLOAT_FOR_GCC */ 1337#endif /* !SOFTFLOAT_FOR_GCC */
1338 1338
1339#ifdef FLOAT128 1339#ifdef FLOAT128
1340 1340
1341/* 1341/*
1342------------------------------------------------------------------------------- 1342-------------------------------------------------------------------------------
1343Returns the result of converting the 64-bit two's complement integer `a' to 1343Returns the result of converting the 64-bit two's complement integer `a' to
1344the quadruple-precision floating-point format. The conversion is performed 1344the quadruple-precision floating-point format. The conversion is performed
1345according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 1345according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
1346------------------------------------------------------------------------------- 1346-------------------------------------------------------------------------------
1347*/ 1347*/
1348float128 int64_to_float128( int64 a ) 1348float128 int64_to_float128( int64 a )
1349{ 1349{
1350 flag zSign; 1350 flag zSign;
1351 uint64 absA; 1351 uint64 absA;
1352 int8 shiftCount; 1352 int8 shiftCount;
1353 int32 zExp; 1353 int32 zExp;
1354 bits64 zSig0, zSig1; 1354 bits64 zSig0, zSig1;
1355 1355
1356 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 ); 1356 if ( a == 0 ) return packFloat128( 0, 0, 0, 0 );
1357 zSign = ( a < 0 ); 1357 zSign = ( a < 0 );
1358 absA = zSign ? - a : a; 1358 absA = zSign ? - a : a;
1359 shiftCount = countLeadingZeros64( absA ) + 49; 1359 shiftCount = countLeadingZeros64( absA ) + 49;
1360 zExp = 0x406E - shiftCount; 1360 zExp = 0x406E - shiftCount;
1361 if ( 64 <= shiftCount ) { 1361 if ( 64 <= shiftCount ) {
1362 zSig1 = 0; 1362 zSig1 = 0;
1363 zSig0 = absA; 1363 zSig0 = absA;
1364 shiftCount -= 64; 1364 shiftCount -= 64;
1365 } 1365 }
1366 else { 1366 else {
1367 zSig1 = absA; 1367 zSig1 = absA;
1368 zSig0 = 0; 1368 zSig0 = 0;
1369 } 1369 }
1370 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); 1370 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1371 return packFloat128( zSign, zExp, zSig0, zSig1 ); 1371 return packFloat128( zSign, zExp, zSig0, zSig1 );
1372 1372
1373} 1373}
1374 1374
1375#endif 1375#endif
1376 1376
1377#ifndef SOFTFLOAT_FOR_GCC /* Not needed */ 1377#ifndef SOFTFLOAT_FOR_GCC /* Not needed */
1378/* 1378/*
1379------------------------------------------------------------------------------- 1379-------------------------------------------------------------------------------
1380Returns the result of converting the single-precision floating-point value 1380Returns the result of converting the single-precision floating-point value
1381`a' to the 32-bit two's complement integer format. The conversion is 1381`a' to the 32-bit two's complement integer format. The conversion is
1382performed according to the IEC/IEEE Standard for Binary Floating-Point 1382performed according to the IEC/IEEE Standard for Binary Floating-Point
1383Arithmetic---which means in particular that the conversion is rounded 1383Arithmetic---which means in particular that the conversion is rounded
1384according to the current rounding mode. If `a' is a NaN, the largest 1384according to the current rounding mode. If `a' is a NaN, the largest
1385positive integer is returned. Otherwise, if the conversion overflows, the 1385positive integer is returned. Otherwise, if the conversion overflows, the
1386largest integer with the same sign as `a' is returned. 1386largest integer with the same sign as `a' is returned.
1387------------------------------------------------------------------------------- 1387-------------------------------------------------------------------------------
1388*/ 1388*/
1389int32 float32_to_int32( float32 a ) 1389int32 float32_to_int32( float32 a )
1390{ 1390{
1391 flag aSign; 1391 flag aSign;
1392 int16 aExp, shiftCount; 1392 int16 aExp, shiftCount;
1393 bits32 aSig; 1393 bits32 aSig;
1394 bits64 aSig64; 1394 bits64 aSig64;
1395 1395
1396 aSig = extractFloat32Frac( a ); 1396 aSig = extractFloat32Frac( a );
1397 aExp = extractFloat32Exp( a ); 1397 aExp = extractFloat32Exp( a );
1398 aSign = extractFloat32Sign( a ); 1398 aSign = extractFloat32Sign( a );
1399 if ( ( aExp == 0xFF ) && aSig ) aSign = 0; 1399 if ( ( aExp == 0xFF ) && aSig ) aSign = 0;
1400 if ( aExp ) aSig |= 0x00800000; 1400 if ( aExp ) aSig |= 0x00800000;
1401 shiftCount = 0xAF - aExp; 1401 shiftCount = 0xAF - aExp;
1402 aSig64 = aSig; 1402 aSig64 = aSig;
1403 aSig64 <<= 32; 1403 aSig64 <<= 32;
1404 if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 ); 1404 if ( 0 < shiftCount ) shift64RightJamming( aSig64, shiftCount, &aSig64 );
1405 return roundAndPackInt32( aSign, aSig64 ); 1405 return roundAndPackInt32( aSign, aSig64 );
1406 1406
1407} 1407}
1408#endif /* !SOFTFLOAT_FOR_GCC */ 1408#endif /* !SOFTFLOAT_FOR_GCC */
1409 1409
1410/* 1410/*
1411------------------------------------------------------------------------------- 1411-------------------------------------------------------------------------------
1412Returns the result of converting the single-precision floating-point value 1412Returns the result of converting the single-precision floating-point value
1413`a' to the 32-bit two's complement integer format. The conversion is 1413`a' to the 32-bit two's complement integer format. The conversion is
1414performed according to the IEC/IEEE Standard for Binary Floating-Point 1414performed according to the IEC/IEEE Standard for Binary Floating-Point
1415Arithmetic, except that the conversion is always rounded toward zero. 1415Arithmetic, except that the conversion is always rounded toward zero.
1416If `a' is a NaN, the largest positive integer is returned. Otherwise, if 1416If `a' is a NaN, the largest positive integer is returned. Otherwise, if
1417the conversion overflows, the largest integer with the same sign as `a' is 1417the conversion overflows, the largest integer with the same sign as `a' is
1418returned. 1418returned.
1419------------------------------------------------------------------------------- 1419-------------------------------------------------------------------------------
1420*/ 1420*/
1421int32 float32_to_int32_round_to_zero( float32 a ) 1421int32 float32_to_int32_round_to_zero( float32 a )
1422{ 1422{
1423 flag aSign; 1423 flag aSign;
1424 int16 aExp, shiftCount; 1424 int16 aExp, shiftCount;
1425 bits32 aSig; 1425 bits32 aSig;
1426 int32 z; 1426 int32 z;
1427 1427
1428 aSig = extractFloat32Frac( a ); 1428 aSig = extractFloat32Frac( a );
1429 aExp = extractFloat32Exp( a ); 1429 aExp = extractFloat32Exp( a );
1430 aSign = extractFloat32Sign( a ); 1430 aSign = extractFloat32Sign( a );
1431 shiftCount = aExp - 0x9E; 1431 shiftCount = aExp - 0x9E;
1432 if ( 0 <= shiftCount ) { 1432 if ( 0 <= shiftCount ) {
1433 if ( a != 0xCF000000 ) { 1433 if ( a != 0xCF000000 ) {
1434 float_raise( float_flag_invalid ); 1434 float_raise( float_flag_invalid );
1435 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF; 1435 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) return 0x7FFFFFFF;
1436 } 1436 }
1437 return (sbits32) 0x80000000; 1437 return (sbits32) 0x80000000;
1438 } 1438 }
1439 else if ( aExp <= 0x7E ) { 1439 else if ( aExp <= 0x7E ) {
1440 if ( aExp | aSig ) set_float_exception_inexact_flag(); 1440 if ( aExp | aSig ) set_float_exception_inexact_flag();
1441 return 0; 1441 return 0;
1442 } 1442 }
1443 aSig = ( aSig | 0x00800000 )<<8; 1443 aSig = ( aSig | 0x00800000 )<<8;
1444 z = aSig>>( - shiftCount ); 1444 z = aSig>>( - shiftCount );
1445 if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) { 1445 if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) {
1446 set_float_exception_inexact_flag(); 1446 set_float_exception_inexact_flag();
1447 } 1447 }
1448 if ( aSign ) z = - z; 1448 if ( aSign ) z = - z;
1449 return z; 1449 return z;
1450 1450
1451} 1451}
1452 1452
1453#ifndef SOFTFLOAT_FOR_GCC /* __fix?fdi provided by libgcc2.c */ 1453#ifndef SOFTFLOAT_FOR_GCC /* __fix?fdi provided by libgcc2.c */
1454/* 1454/*
1455------------------------------------------------------------------------------- 1455-------------------------------------------------------------------------------
1456Returns the result of converting the single-precision floating-point value 1456Returns the result of converting the single-precision floating-point value
1457`a' to the 64-bit two's complement integer format. The conversion is 1457`a' to the 64-bit two's complement integer format. The conversion is
1458performed according to the IEC/IEEE Standard for Binary Floating-Point 1458performed according to the IEC/IEEE Standard for Binary Floating-Point
1459Arithmetic---which means in particular that the conversion is rounded 1459Arithmetic---which means in particular that the conversion is rounded
1460according to the current rounding mode. If `a' is a NaN, the largest 1460according to the current rounding mode. If `a' is a NaN, the largest
1461positive integer is returned. Otherwise, if the conversion overflows, the 1461positive integer is returned. Otherwise, if the conversion overflows, the
1462largest integer with the same sign as `a' is returned. 1462largest integer with the same sign as `a' is returned.
1463------------------------------------------------------------------------------- 1463-------------------------------------------------------------------------------
1464*/ 1464*/
1465int64 float32_to_int64( float32 a ) 1465int64 float32_to_int64( float32 a )
1466{ 1466{
1467 flag aSign; 1467 flag aSign;
1468 int16 aExp, shiftCount; 1468 int16 aExp, shiftCount;
1469 bits32 aSig; 1469 bits32 aSig;
1470 bits64 aSig64, aSigExtra; 1470 bits64 aSig64, aSigExtra;
1471 1471
1472 aSig = extractFloat32Frac( a ); 1472 aSig = extractFloat32Frac( a );
1473 aExp = extractFloat32Exp( a ); 1473 aExp = extractFloat32Exp( a );
1474 aSign = extractFloat32Sign( a ); 1474 aSign = extractFloat32Sign( a );
1475 shiftCount = 0xBE - aExp; 1475 shiftCount = 0xBE - aExp;
1476 if ( shiftCount < 0 ) { 1476 if ( shiftCount < 0 ) {
1477 float_raise( float_flag_invalid ); 1477 float_raise( float_flag_invalid );
1478 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) { 1478 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1479 return LIT64( 0x7FFFFFFFFFFFFFFF ); 1479 return LIT64( 0x7FFFFFFFFFFFFFFF );
1480 } 1480 }
1481 return (sbits64) LIT64( 0x8000000000000000 ); 1481 return (sbits64) LIT64( 0x8000000000000000 );
1482 } 1482 }
1483 if ( aExp ) aSig |= 0x00800000; 1483 if ( aExp ) aSig |= 0x00800000;
1484 aSig64 = aSig; 1484 aSig64 = aSig;
1485 aSig64 <<= 40; 1485 aSig64 <<= 40;
1486 shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra ); 1486 shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra );
1487 return roundAndPackInt64( aSign, aSig64, aSigExtra ); 1487 return roundAndPackInt64( aSign, aSig64, aSigExtra );
1488 1488
1489} 1489}
1490 1490
1491/* 1491/*
1492------------------------------------------------------------------------------- 1492-------------------------------------------------------------------------------
1493Returns the result of converting the single-precision floating-point value 1493Returns the result of converting the single-precision floating-point value
1494`a' to the 64-bit two's complement integer format. The conversion is 1494`a' to the 64-bit two's complement integer format. The conversion is
1495performed according to the IEC/IEEE Standard for Binary Floating-Point 1495performed according to the IEC/IEEE Standard for Binary Floating-Point
1496Arithmetic, except that the conversion is always rounded toward zero. If 1496Arithmetic, except that the conversion is always rounded toward zero. If
1497`a' is a NaN, the largest positive integer is returned. Otherwise, if the 1497`a' is a NaN, the largest positive integer is returned. Otherwise, if the
1498conversion overflows, the largest integer with the same sign as `a' is 1498conversion overflows, the largest integer with the same sign as `a' is
1499returned. 1499returned.
1500------------------------------------------------------------------------------- 1500-------------------------------------------------------------------------------
1501*/ 1501*/
1502int64 float32_to_int64_round_to_zero( float32 a ) 1502int64 float32_to_int64_round_to_zero( float32 a )
1503{ 1503{
1504 flag aSign; 1504 flag aSign;
1505 int16 aExp, shiftCount; 1505 int16 aExp, shiftCount;
1506 bits32 aSig; 1506 bits32 aSig;
1507 bits64 aSig64; 1507 bits64 aSig64;
1508 int64 z; 1508 int64 z;
1509 1509
1510 aSig = extractFloat32Frac( a ); 1510 aSig = extractFloat32Frac( a );
1511 aExp = extractFloat32Exp( a ); 1511 aExp = extractFloat32Exp( a );
1512 aSign = extractFloat32Sign( a ); 1512 aSign = extractFloat32Sign( a );
1513 shiftCount = aExp - 0xBE; 1513 shiftCount = aExp - 0xBE;
1514 if ( 0 <= shiftCount ) { 1514 if ( 0 <= shiftCount ) {
1515 if ( a != 0xDF000000 ) { 1515 if ( a != 0xDF000000 ) {
1516 float_raise( float_flag_invalid ); 1516 float_raise( float_flag_invalid );
1517 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) { 1517 if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) {
1518 return LIT64( 0x7FFFFFFFFFFFFFFF ); 1518 return LIT64( 0x7FFFFFFFFFFFFFFF );
1519 } 1519 }
1520 } 1520 }
1521 return (sbits64) LIT64( 0x8000000000000000 ); 1521 return (sbits64) LIT64( 0x8000000000000000 );
1522 } 1522 }
1523 else if ( aExp <= 0x7E ) { 1523 else if ( aExp <= 0x7E ) {
1524 if ( aExp | aSig ) set_float_exception_inexact_flag(); 1524 if ( aExp | aSig ) set_float_exception_inexact_flag();
1525 return 0; 1525 return 0;
1526 } 1526 }
1527 aSig64 = aSig | 0x00800000; 1527 aSig64 = aSig | 0x00800000;
1528 aSig64 <<= 40; 1528 aSig64 <<= 40;
1529 z = aSig64>>( - shiftCount ); 1529 z = aSig64>>( - shiftCount );
1530 if ( (bits64) ( aSig64<<( shiftCount & 63 ) ) ) { 1530 if ( (bits64) ( aSig64<<( shiftCount & 63 ) ) ) {
1531 set_float_exception_inexact_flag(); 1531 set_float_exception_inexact_flag();
1532 } 1532 }
1533 if ( aSign ) z = - z; 1533 if ( aSign ) z = - z;
1534 return z; 1534 return z;
1535 1535
1536} 1536}
1537#endif /* !SOFTFLOAT_FOR_GCC */ 1537#endif /* !SOFTFLOAT_FOR_GCC */
1538 1538
1539/* 1539/*
1540------------------------------------------------------------------------------- 1540-------------------------------------------------------------------------------
1541Returns the result of converting the single-precision floating-point value 1541Returns the result of converting the single-precision floating-point value
1542`a' to the double-precision floating-point format. The conversion is 1542`a' to the double-precision floating-point format. The conversion is
1543performed according to the IEC/IEEE Standard for Binary Floating-Point 1543performed according to the IEC/IEEE Standard for Binary Floating-Point
1544Arithmetic. 1544Arithmetic.
1545------------------------------------------------------------------------------- 1545-------------------------------------------------------------------------------
1546*/ 1546*/
1547float64 float32_to_float64( float32 a ) 1547float64 float32_to_float64( float32 a )
1548{ 1548{
1549 flag aSign; 1549 flag aSign;
1550 int16 aExp; 1550 int16 aExp;
1551 bits32 aSig; 1551 bits32 aSig;
1552 1552
1553 aSig = extractFloat32Frac( a ); 1553 aSig = extractFloat32Frac( a );
1554 aExp = extractFloat32Exp( a ); 1554 aExp = extractFloat32Exp( a );
1555 aSign = extractFloat32Sign( a ); 1555 aSign = extractFloat32Sign( a );
1556 if ( aExp == 0xFF ) { 1556 if ( aExp == 0xFF ) {
1557 if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) ); 1557 if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) );
1558 return packFloat64( aSign, 0x7FF, 0 ); 1558 return packFloat64( aSign, 0x7FF, 0 );
1559 } 1559 }
1560 if ( aExp == 0 ) { 1560 if ( aExp == 0 ) {
1561 if ( aSig == 0 ) return packFloat64( aSign, 0, 0 ); 1561 if ( aSig == 0 ) return packFloat64( aSign, 0, 0 );
1562 normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 1562 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1563 --aExp; 1563 --aExp;
1564 } 1564 }
1565 return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 ); 1565 return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 );
1566 1566
1567} 1567}
1568 1568
1569#ifdef FLOATX80 1569#ifdef FLOATX80
1570 1570
1571/* 1571/*
1572------------------------------------------------------------------------------- 1572-------------------------------------------------------------------------------
1573Returns the result of converting the single-precision floating-point value 1573Returns the result of converting the single-precision floating-point value
1574`a' to the extended double-precision floating-point format. The conversion 1574`a' to the extended double-precision floating-point format. The conversion
1575is performed according to the IEC/IEEE Standard for Binary Floating-Point 1575is performed according to the IEC/IEEE Standard for Binary Floating-Point
1576Arithmetic. 1576Arithmetic.
1577------------------------------------------------------------------------------- 1577-------------------------------------------------------------------------------
1578*/ 1578*/
1579floatx80 float32_to_floatx80( float32 a ) 1579floatx80 float32_to_floatx80( float32 a )
1580{ 1580{
1581 flag aSign; 1581 flag aSign;
1582 int16 aExp; 1582 int16 aExp;
1583 bits32 aSig; 1583 bits32 aSig;
1584 1584
1585 aSig = extractFloat32Frac( a ); 1585 aSig = extractFloat32Frac( a );
1586 aExp = extractFloat32Exp( a ); 1586 aExp = extractFloat32Exp( a );
1587 aSign = extractFloat32Sign( a ); 1587 aSign = extractFloat32Sign( a );
1588 if ( aExp == 0xFF ) { 1588 if ( aExp == 0xFF ) {
1589 if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) ); 1589 if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) );
1590 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 1590 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
1591 } 1591 }
1592 if ( aExp == 0 ) { 1592 if ( aExp == 0 ) {
1593 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); 1593 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
1594 normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 1594 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1595 } 1595 }
1596 aSig |= 0x00800000; 1596 aSig |= 0x00800000;
1597 return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 ); 1597 return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 );
1598 1598
1599} 1599}
1600 1600
1601#endif 1601#endif
1602 1602
1603#ifdef FLOAT128 1603#ifdef FLOAT128
1604 1604
1605/* 1605/*
1606------------------------------------------------------------------------------- 1606-------------------------------------------------------------------------------
1607Returns the result of converting the single-precision floating-point value 1607Returns the result of converting the single-precision floating-point value
1608`a' to the double-precision floating-point format. The conversion is 1608`a' to the double-precision floating-point format. The conversion is
1609performed according to the IEC/IEEE Standard for Binary Floating-Point 1609performed according to the IEC/IEEE Standard for Binary Floating-Point
1610Arithmetic. 1610Arithmetic.
1611------------------------------------------------------------------------------- 1611-------------------------------------------------------------------------------
1612*/ 1612*/
1613float128 float32_to_float128( float32 a ) 1613float128 float32_to_float128( float32 a )
1614{ 1614{
1615 flag aSign; 1615 flag aSign;
1616 int16 aExp; 1616 int16 aExp;
1617 bits32 aSig; 1617 bits32 aSig;
1618 1618
1619 aSig = extractFloat32Frac( a ); 1619 aSig = extractFloat32Frac( a );
1620 aExp = extractFloat32Exp( a ); 1620 aExp = extractFloat32Exp( a );
1621 aSign = extractFloat32Sign( a ); 1621 aSign = extractFloat32Sign( a );
1622 if ( aExp == 0xFF ) { 1622 if ( aExp == 0xFF ) {
1623 if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a ) ); 1623 if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a ) );
1624 return packFloat128( aSign, 0x7FFF, 0, 0 ); 1624 return packFloat128( aSign, 0x7FFF, 0, 0 );
1625 } 1625 }
1626 if ( aExp == 0 ) { 1626 if ( aExp == 0 ) {
1627 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 ); 1627 if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 );
1628 normalizeFloat32Subnormal( aSig, &aExp, &aSig ); 1628 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1629 --aExp; 1629 --aExp;
1630 } 1630 }
1631 return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 ); 1631 return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 );
1632 1632
1633} 1633}
1634 1634
1635#endif 1635#endif
1636 1636
1637#ifndef SOFTFLOAT_FOR_GCC /* Not needed */ 1637#ifndef SOFTFLOAT_FOR_GCC /* Not needed */
1638/* 1638/*
1639------------------------------------------------------------------------------- 1639-------------------------------------------------------------------------------
1640Rounds the single-precision floating-point value `a' to an integer, and 1640Rounds the single-precision floating-point value `a' to an integer, and
1641returns the result as a single-precision floating-point value. The 1641returns the result as a single-precision floating-point value. The
1642operation is performed according to the IEC/IEEE Standard for Binary 1642operation is performed according to the IEC/IEEE Standard for Binary
1643Floating-Point Arithmetic. 1643Floating-Point Arithmetic.
1644------------------------------------------------------------------------------- 1644-------------------------------------------------------------------------------
1645*/ 1645*/
1646float32 float32_round_to_int( float32 a ) 1646float32 float32_round_to_int( float32 a )
1647{ 1647{
1648 flag aSign; 1648 flag aSign;
1649 int16 aExp; 1649 int16 aExp;
1650 bits32 lastBitMask, roundBitsMask; 1650 bits32 lastBitMask, roundBitsMask;
1651 int8 roundingMode; 1651 int8 roundingMode;
1652 float32 z; 1652 float32 z;
1653 1653
1654 aExp = extractFloat32Exp( a ); 1654 aExp = extractFloat32Exp( a );
1655 if ( 0x96 <= aExp ) { 1655 if ( 0x96 <= aExp ) {
1656 if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) { 1656 if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) {
1657 return propagateFloat32NaN( a, a ); 1657 return propagateFloat32NaN( a, a );
1658 } 1658 }
1659 return a; 1659 return a;
1660 } 1660 }
1661 if ( aExp <= 0x7E ) { 1661 if ( aExp <= 0x7E ) {
1662 if ( (bits32) ( a<<1 ) == 0 ) return a; 1662 if ( (bits32) ( a<<1 ) == 0 ) return a;
1663 set_float_exception_inexact_flag(); 1663 set_float_exception_inexact_flag();
@@ -2942,2010 +2942,2010 @@ float64 float64_div( float64 a, float64  @@ -2942,2010 +2942,2010 @@ float64 float64_div( float64 a, float64
2942 if ( bExp == 0x7FF ) { 2942 if ( bExp == 0x7FF ) {
2943 if ( bSig ) return propagateFloat64NaN( a, b ); 2943 if ( bSig ) return propagateFloat64NaN( a, b );
2944 float_raise( float_flag_invalid ); 2944 float_raise( float_flag_invalid );
2945 return float64_default_nan; 2945 return float64_default_nan;
2946 } 2946 }
2947 return packFloat64( zSign, 0x7FF, 0 ); 2947 return packFloat64( zSign, 0x7FF, 0 );
2948 } 2948 }
2949 if ( bExp == 0x7FF ) { 2949 if ( bExp == 0x7FF ) {
2950 if ( bSig ) return propagateFloat64NaN( a, b ); 2950 if ( bSig ) return propagateFloat64NaN( a, b );
2951 return packFloat64( zSign, 0, 0 ); 2951 return packFloat64( zSign, 0, 0 );
2952 } 2952 }
2953 if ( bExp == 0 ) { 2953 if ( bExp == 0 ) {
2954 if ( bSig == 0 ) { 2954 if ( bSig == 0 ) {
2955 if ( ( aExp | aSig ) == 0 ) { 2955 if ( ( aExp | aSig ) == 0 ) {
2956 float_raise( float_flag_invalid ); 2956 float_raise( float_flag_invalid );
2957 return float64_default_nan; 2957 return float64_default_nan;
2958 } 2958 }
2959 float_raise( float_flag_divbyzero ); 2959 float_raise( float_flag_divbyzero );
2960 return packFloat64( zSign, 0x7FF, 0 ); 2960 return packFloat64( zSign, 0x7FF, 0 );
2961 } 2961 }
2962 normalizeFloat64Subnormal( bSig, &bExp, &bSig ); 2962 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
2963 } 2963 }
2964 if ( aExp == 0 ) { 2964 if ( aExp == 0 ) {
2965 if ( aSig == 0 ) return packFloat64( zSign, 0, 0 ); 2965 if ( aSig == 0 ) return packFloat64( zSign, 0, 0 );
2966 normalizeFloat64Subnormal( aSig, &aExp, &aSig ); 2966 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
2967 } 2967 }
2968 zExp = aExp - bExp + 0x3FD; 2968 zExp = aExp - bExp + 0x3FD;
2969 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10; 2969 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10;
2970 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11; 2970 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
2971 if ( bSig <= ( aSig + aSig ) ) { 2971 if ( bSig <= ( aSig + aSig ) ) {
2972 aSig >>= 1; 2972 aSig >>= 1;
2973 ++zExp; 2973 ++zExp;
2974 } 2974 }
2975 zSig = estimateDiv128To64( aSig, 0, bSig ); 2975 zSig = estimateDiv128To64( aSig, 0, bSig );
2976 if ( ( zSig & 0x1FF ) <= 2 ) { 2976 if ( ( zSig & 0x1FF ) <= 2 ) {
2977 mul64To128( bSig, zSig, &term0, &term1 ); 2977 mul64To128( bSig, zSig, &term0, &term1 );
2978 sub128( aSig, 0, term0, term1, &rem0, &rem1 ); 2978 sub128( aSig, 0, term0, term1, &rem0, &rem1 );
2979 while ( (sbits64) rem0 < 0 ) { 2979 while ( (sbits64) rem0 < 0 ) {
2980 --zSig; 2980 --zSig;
2981 add128( rem0, rem1, 0, bSig, &rem0, &rem1 ); 2981 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
2982 } 2982 }
2983 zSig |= ( rem1 != 0 ); 2983 zSig |= ( rem1 != 0 );
2984 } 2984 }
2985 return roundAndPackFloat64( zSign, zExp, zSig ); 2985 return roundAndPackFloat64( zSign, zExp, zSig );
2986 2986
2987} 2987}
2988 2988
2989#ifndef SOFTFLOAT_FOR_GCC 2989#ifndef SOFTFLOAT_FOR_GCC
2990/* 2990/*
2991------------------------------------------------------------------------------- 2991-------------------------------------------------------------------------------
2992Returns the remainder of the double-precision floating-point value `a' 2992Returns the remainder of the double-precision floating-point value `a'
2993with respect to the corresponding value `b'. The operation is performed 2993with respect to the corresponding value `b'. The operation is performed
2994according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 2994according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2995------------------------------------------------------------------------------- 2995-------------------------------------------------------------------------------
2996*/ 2996*/
2997float64 float64_rem( float64 a, float64 b ) 2997float64 float64_rem( float64 a, float64 b )
2998{ 2998{
2999 flag aSign, bSign, zSign; 2999 flag aSign, bSign, zSign;
3000 int16 aExp, bExp, expDiff; 3000 int16 aExp, bExp, expDiff;
3001 bits64 aSig, bSig; 3001 bits64 aSig, bSig;
3002 bits64 q, alternateASig; 3002 bits64 q, alternateASig;
3003 sbits64 sigMean; 3003 sbits64 sigMean;
3004 3004
3005 aSig = extractFloat64Frac( a ); 3005 aSig = extractFloat64Frac( a );
3006 aExp = extractFloat64Exp( a ); 3006 aExp = extractFloat64Exp( a );
3007 aSign = extractFloat64Sign( a ); 3007 aSign = extractFloat64Sign( a );
3008 bSig = extractFloat64Frac( b ); 3008 bSig = extractFloat64Frac( b );
3009 bExp = extractFloat64Exp( b ); 3009 bExp = extractFloat64Exp( b );
3010 bSign = extractFloat64Sign( b ); 3010 bSign = extractFloat64Sign( b );
3011 if ( aExp == 0x7FF ) { 3011 if ( aExp == 0x7FF ) {
3012 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) { 3012 if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) {
3013 return propagateFloat64NaN( a, b ); 3013 return propagateFloat64NaN( a, b );
3014 } 3014 }
3015 float_raise( float_flag_invalid ); 3015 float_raise( float_flag_invalid );
3016 return float64_default_nan; 3016 return float64_default_nan;
3017 } 3017 }
3018 if ( bExp == 0x7FF ) { 3018 if ( bExp == 0x7FF ) {
3019 if ( bSig ) return propagateFloat64NaN( a, b ); 3019 if ( bSig ) return propagateFloat64NaN( a, b );
3020 return a; 3020 return a;
3021 } 3021 }
3022 if ( bExp == 0 ) { 3022 if ( bExp == 0 ) {
3023 if ( bSig == 0 ) { 3023 if ( bSig == 0 ) {
3024 float_raise( float_flag_invalid ); 3024 float_raise( float_flag_invalid );
3025 return float64_default_nan; 3025 return float64_default_nan;
3026 } 3026 }
3027 normalizeFloat64Subnormal( bSig, &bExp, &bSig ); 3027 normalizeFloat64Subnormal( bSig, &bExp, &bSig );
3028 } 3028 }
3029 if ( aExp == 0 ) { 3029 if ( aExp == 0 ) {
3030 if ( aSig == 0 ) return a; 3030 if ( aSig == 0 ) return a;
3031 normalizeFloat64Subnormal( aSig, &aExp, &aSig ); 3031 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3032 } 3032 }
3033 expDiff = aExp - bExp; 3033 expDiff = aExp - bExp;
3034 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11; 3034 aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11;
3035 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11; 3035 bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11;
3036 if ( expDiff < 0 ) { 3036 if ( expDiff < 0 ) {
3037 if ( expDiff < -1 ) return a; 3037 if ( expDiff < -1 ) return a;
3038 aSig >>= 1; 3038 aSig >>= 1;
3039 } 3039 }
3040 q = ( bSig <= aSig ); 3040 q = ( bSig <= aSig );
3041 if ( q ) aSig -= bSig; 3041 if ( q ) aSig -= bSig;
3042 expDiff -= 64; 3042 expDiff -= 64;
3043 while ( 0 < expDiff ) { 3043 while ( 0 < expDiff ) {
3044 q = estimateDiv128To64( aSig, 0, bSig ); 3044 q = estimateDiv128To64( aSig, 0, bSig );
3045 q = ( 2 < q ) ? q - 2 : 0; 3045 q = ( 2 < q ) ? q - 2 : 0;
3046 aSig = - ( ( bSig>>2 ) * q ); 3046 aSig = - ( ( bSig>>2 ) * q );
3047 expDiff -= 62; 3047 expDiff -= 62;
3048 } 3048 }
3049 expDiff += 64; 3049 expDiff += 64;
3050 if ( 0 < expDiff ) { 3050 if ( 0 < expDiff ) {
3051 q = estimateDiv128To64( aSig, 0, bSig ); 3051 q = estimateDiv128To64( aSig, 0, bSig );
3052 q = ( 2 < q ) ? q - 2 : 0; 3052 q = ( 2 < q ) ? q - 2 : 0;
3053 q >>= 64 - expDiff; 3053 q >>= 64 - expDiff;
3054 bSig >>= 2; 3054 bSig >>= 2;
3055 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q; 3055 aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q;
3056 } 3056 }
3057 else { 3057 else {
3058 aSig >>= 2; 3058 aSig >>= 2;
3059 bSig >>= 2; 3059 bSig >>= 2;
3060 } 3060 }
3061 do { 3061 do {
3062 alternateASig = aSig; 3062 alternateASig = aSig;
3063 ++q; 3063 ++q;
3064 aSig -= bSig; 3064 aSig -= bSig;
3065 } while ( 0 <= (sbits64) aSig ); 3065 } while ( 0 <= (sbits64) aSig );
3066 sigMean = aSig + alternateASig; 3066 sigMean = aSig + alternateASig;
3067 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) { 3067 if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) {
3068 aSig = alternateASig; 3068 aSig = alternateASig;
3069 } 3069 }
3070 zSign = ( (sbits64) aSig < 0 ); 3070 zSign = ( (sbits64) aSig < 0 );
3071 if ( zSign ) aSig = - aSig; 3071 if ( zSign ) aSig = - aSig;
3072 return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig ); 3072 return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig );
3073 3073
3074} 3074}
3075 3075
3076/* 3076/*
3077------------------------------------------------------------------------------- 3077-------------------------------------------------------------------------------
3078Returns the square root of the double-precision floating-point value `a'. 3078Returns the square root of the double-precision floating-point value `a'.
3079The operation is performed according to the IEC/IEEE Standard for Binary 3079The operation is performed according to the IEC/IEEE Standard for Binary
3080Floating-Point Arithmetic. 3080Floating-Point Arithmetic.
3081------------------------------------------------------------------------------- 3081-------------------------------------------------------------------------------
3082*/ 3082*/
3083float64 float64_sqrt( float64 a ) 3083float64 float64_sqrt( float64 a )
3084{ 3084{
3085 flag aSign; 3085 flag aSign;
3086 int16 aExp, zExp; 3086 int16 aExp, zExp;
3087 bits64 aSig, zSig, doubleZSig; 3087 bits64 aSig, zSig, doubleZSig;
3088 bits64 rem0, rem1, term0, term1; 3088 bits64 rem0, rem1, term0, term1;
3089 3089
3090 aSig = extractFloat64Frac( a ); 3090 aSig = extractFloat64Frac( a );
3091 aExp = extractFloat64Exp( a ); 3091 aExp = extractFloat64Exp( a );
3092 aSign = extractFloat64Sign( a ); 3092 aSign = extractFloat64Sign( a );
3093 if ( aExp == 0x7FF ) { 3093 if ( aExp == 0x7FF ) {
3094 if ( aSig ) return propagateFloat64NaN( a, a ); 3094 if ( aSig ) return propagateFloat64NaN( a, a );
3095 if ( ! aSign ) return a; 3095 if ( ! aSign ) return a;
3096 float_raise( float_flag_invalid ); 3096 float_raise( float_flag_invalid );
3097 return float64_default_nan; 3097 return float64_default_nan;
3098 } 3098 }
3099 if ( aSign ) { 3099 if ( aSign ) {
3100 if ( ( aExp | aSig ) == 0 ) return a; 3100 if ( ( aExp | aSig ) == 0 ) return a;
3101 float_raise( float_flag_invalid ); 3101 float_raise( float_flag_invalid );
3102 return float64_default_nan; 3102 return float64_default_nan;
3103 } 3103 }
3104 if ( aExp == 0 ) { 3104 if ( aExp == 0 ) {
3105 if ( aSig == 0 ) return 0; 3105 if ( aSig == 0 ) return 0;
3106 normalizeFloat64Subnormal( aSig, &aExp, &aSig ); 3106 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
3107 } 3107 }
3108 zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE; 3108 zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE;
3109 aSig |= LIT64( 0x0010000000000000 ); 3109 aSig |= LIT64( 0x0010000000000000 );
3110 zSig = estimateSqrt32( aExp, aSig>>21 ); 3110 zSig = estimateSqrt32( aExp, aSig>>21 );
3111 aSig <<= 9 - ( aExp & 1 ); 3111 aSig <<= 9 - ( aExp & 1 );
3112 zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 ); 3112 zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 );
3113 if ( ( zSig & 0x1FF ) <= 5 ) { 3113 if ( ( zSig & 0x1FF ) <= 5 ) {
3114 doubleZSig = zSig<<1; 3114 doubleZSig = zSig<<1;
3115 mul64To128( zSig, zSig, &term0, &term1 ); 3115 mul64To128( zSig, zSig, &term0, &term1 );
3116 sub128( aSig, 0, term0, term1, &rem0, &rem1 ); 3116 sub128( aSig, 0, term0, term1, &rem0, &rem1 );
3117 while ( (sbits64) rem0 < 0 ) { 3117 while ( (sbits64) rem0 < 0 ) {
3118 --zSig; 3118 --zSig;
3119 doubleZSig -= 2; 3119 doubleZSig -= 2;
3120 add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 ); 3120 add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 );
3121 } 3121 }
3122 zSig |= ( ( rem0 | rem1 ) != 0 ); 3122 zSig |= ( ( rem0 | rem1 ) != 0 );
3123 } 3123 }
3124 return roundAndPackFloat64( 0, zExp, zSig ); 3124 return roundAndPackFloat64( 0, zExp, zSig );
3125 3125
3126} 3126}
3127#endif 3127#endif
3128 3128
3129/* 3129/*
3130------------------------------------------------------------------------------- 3130-------------------------------------------------------------------------------
3131Returns 1 if the double-precision floating-point value `a' is equal to the 3131Returns 1 if the double-precision floating-point value `a' is equal to the
3132corresponding value `b', and 0 otherwise. The comparison is performed 3132corresponding value `b', and 0 otherwise. The comparison is performed
3133according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 3133according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3134------------------------------------------------------------------------------- 3134-------------------------------------------------------------------------------
3135*/ 3135*/
3136flag float64_eq( float64 a, float64 b ) 3136flag float64_eq( float64 a, float64 b )
3137{ 3137{
3138 3138
3139 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) 3139 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3140 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) 3140 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3141 ) { 3141 ) {
3142 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { 3142 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3143 float_raise( float_flag_invalid ); 3143 float_raise( float_flag_invalid );
3144 } 3144 }
3145 return 0; 3145 return 0;
3146 } 3146 }
3147 return ( a == b ) || 3147 return ( a == b ) ||
3148 ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 0 ); 3148 ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 0 );
3149 3149
3150} 3150}
3151 3151
3152/* 3152/*
3153------------------------------------------------------------------------------- 3153-------------------------------------------------------------------------------
3154Returns 1 if the double-precision floating-point value `a' is less than or 3154Returns 1 if the double-precision floating-point value `a' is less than or
3155equal to the corresponding value `b', and 0 otherwise. The comparison is 3155equal to the corresponding value `b', and 0 otherwise. The comparison is
3156performed according to the IEC/IEEE Standard for Binary Floating-Point 3156performed according to the IEC/IEEE Standard for Binary Floating-Point
3157Arithmetic. 3157Arithmetic.
3158------------------------------------------------------------------------------- 3158-------------------------------------------------------------------------------
3159*/ 3159*/
3160flag float64_le( float64 a, float64 b ) 3160flag float64_le( float64 a, float64 b )
3161{ 3161{
3162 flag aSign, bSign; 3162 flag aSign, bSign;
3163 3163
3164 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) 3164 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3165 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) 3165 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3166 ) { 3166 ) {
3167 float_raise( float_flag_invalid ); 3167 float_raise( float_flag_invalid );
3168 return 0; 3168 return 0;
3169 } 3169 }
3170 aSign = extractFloat64Sign( a ); 3170 aSign = extractFloat64Sign( a );
3171 bSign = extractFloat64Sign( b ); 3171 bSign = extractFloat64Sign( b );
3172 if ( aSign != bSign ) 3172 if ( aSign != bSign )
3173 return aSign || 3173 return aSign ||
3174 ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) == 3174 ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) ==
3175 0 ); 3175 0 );
3176 return ( a == b ) || 3176 return ( a == b ) ||
3177 ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) ); 3177 ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
3178 3178
3179} 3179}
3180 3180
3181/* 3181/*
3182------------------------------------------------------------------------------- 3182-------------------------------------------------------------------------------
3183Returns 1 if the double-precision floating-point value `a' is less than 3183Returns 1 if the double-precision floating-point value `a' is less than
3184the corresponding value `b', and 0 otherwise. The comparison is performed 3184the corresponding value `b', and 0 otherwise. The comparison is performed
3185according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 3185according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3186------------------------------------------------------------------------------- 3186-------------------------------------------------------------------------------
3187*/ 3187*/
3188flag float64_lt( float64 a, float64 b ) 3188flag float64_lt( float64 a, float64 b )
3189{ 3189{
3190 flag aSign, bSign; 3190 flag aSign, bSign;
3191 3191
3192 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) 3192 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3193 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) 3193 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3194 ) { 3194 ) {
3195 float_raise( float_flag_invalid ); 3195 float_raise( float_flag_invalid );
3196 return 0; 3196 return 0;
3197 } 3197 }
3198 aSign = extractFloat64Sign( a ); 3198 aSign = extractFloat64Sign( a );
3199 bSign = extractFloat64Sign( b ); 3199 bSign = extractFloat64Sign( b );
3200 if ( aSign != bSign ) 3200 if ( aSign != bSign )
3201 return aSign && 3201 return aSign &&
3202 ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) != 3202 ( (bits64) ( ( FLOAT64_DEMANGLE(a) | FLOAT64_DEMANGLE(b) )<<1 ) !=
3203 0 ); 3203 0 );
3204 return ( a != b ) && 3204 return ( a != b ) &&
3205 ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) ); 3205 ( aSign ^ ( FLOAT64_DEMANGLE(a) < FLOAT64_DEMANGLE(b) ) );
3206 3206
3207} 3207}
3208 3208
3209#ifndef SOFTFLOAT_FOR_GCC 3209#ifndef SOFTFLOAT_FOR_GCC
3210/* 3210/*
3211------------------------------------------------------------------------------- 3211-------------------------------------------------------------------------------
3212Returns 1 if the double-precision floating-point value `a' is equal to the 3212Returns 1 if the double-precision floating-point value `a' is equal to the
3213corresponding value `b', and 0 otherwise. The invalid exception is raised 3213corresponding value `b', and 0 otherwise. The invalid exception is raised
3214if either operand is a NaN. Otherwise, the comparison is performed 3214if either operand is a NaN. Otherwise, the comparison is performed
3215according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 3215according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3216------------------------------------------------------------------------------- 3216-------------------------------------------------------------------------------
3217*/ 3217*/
3218flag float64_eq_signaling( float64 a, float64 b ) 3218flag float64_eq_signaling( float64 a, float64 b )
3219{ 3219{
3220 3220
3221 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) 3221 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3222 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) 3222 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3223 ) { 3223 ) {
3224 float_raise( float_flag_invalid ); 3224 float_raise( float_flag_invalid );
3225 return 0; 3225 return 0;
3226 } 3226 }
3227 return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 ); 3227 return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 );
3228 3228
3229} 3229}
3230 3230
3231/* 3231/*
3232------------------------------------------------------------------------------- 3232-------------------------------------------------------------------------------
3233Returns 1 if the double-precision floating-point value `a' is less than or 3233Returns 1 if the double-precision floating-point value `a' is less than or
3234equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not 3234equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not
3235cause an exception. Otherwise, the comparison is performed according to the 3235cause an exception. Otherwise, the comparison is performed according to the
3236IEC/IEEE Standard for Binary Floating-Point Arithmetic. 3236IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3237------------------------------------------------------------------------------- 3237-------------------------------------------------------------------------------
3238*/ 3238*/
3239flag float64_le_quiet( float64 a, float64 b ) 3239flag float64_le_quiet( float64 a, float64 b )
3240{ 3240{
3241 flag aSign, bSign; 3241 flag aSign, bSign;
3242 3242
3243 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) 3243 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3244 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) 3244 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3245 ) { 3245 ) {
3246 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { 3246 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3247 float_raise( float_flag_invalid ); 3247 float_raise( float_flag_invalid );
3248 } 3248 }
3249 return 0; 3249 return 0;
3250 } 3250 }
3251 aSign = extractFloat64Sign( a ); 3251 aSign = extractFloat64Sign( a );
3252 bSign = extractFloat64Sign( b ); 3252 bSign = extractFloat64Sign( b );
3253 if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 ); 3253 if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 );
3254 return ( a == b ) || ( aSign ^ ( a < b ) ); 3254 return ( a == b ) || ( aSign ^ ( a < b ) );
3255 3255
3256} 3256}
3257 3257
3258/* 3258/*
3259------------------------------------------------------------------------------- 3259-------------------------------------------------------------------------------
3260Returns 1 if the double-precision floating-point value `a' is less than 3260Returns 1 if the double-precision floating-point value `a' is less than
3261the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an 3261the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an
3262exception. Otherwise, the comparison is performed according to the IEC/IEEE 3262exception. Otherwise, the comparison is performed according to the IEC/IEEE
3263Standard for Binary Floating-Point Arithmetic. 3263Standard for Binary Floating-Point Arithmetic.
3264------------------------------------------------------------------------------- 3264-------------------------------------------------------------------------------
3265*/ 3265*/
3266flag float64_lt_quiet( float64 a, float64 b ) 3266flag float64_lt_quiet( float64 a, float64 b )
3267{ 3267{
3268 flag aSign, bSign; 3268 flag aSign, bSign;
3269 3269
3270 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) 3270 if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) )
3271 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) 3271 || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) )
3272 ) { 3272 ) {
3273 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { 3273 if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) {
3274 float_raise( float_flag_invalid ); 3274 float_raise( float_flag_invalid );
3275 } 3275 }
3276 return 0; 3276 return 0;
3277 } 3277 }
3278 aSign = extractFloat64Sign( a ); 3278 aSign = extractFloat64Sign( a );
3279 bSign = extractFloat64Sign( b ); 3279 bSign = extractFloat64Sign( b );
3280 if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 ); 3280 if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 );
3281 return ( a != b ) && ( aSign ^ ( a < b ) ); 3281 return ( a != b ) && ( aSign ^ ( a < b ) );
3282 3282
3283} 3283}
3284#endif 3284#endif
3285 3285
3286#ifdef FLOATX80 3286#ifdef FLOATX80
3287 3287
3288/* 3288/*
3289------------------------------------------------------------------------------- 3289-------------------------------------------------------------------------------
3290Returns the result of converting the extended double-precision floating- 3290Returns the result of converting the extended double-precision floating-
3291point value `a' to the 32-bit two's complement integer format. The 3291point value `a' to the 32-bit two's complement integer format. The
3292conversion is performed according to the IEC/IEEE Standard for Binary 3292conversion is performed according to the IEC/IEEE Standard for Binary
3293Floating-Point Arithmetic---which means in particular that the conversion 3293Floating-Point Arithmetic---which means in particular that the conversion
3294is rounded according to the current rounding mode. If `a' is a NaN, the 3294is rounded according to the current rounding mode. If `a' is a NaN, the
3295largest positive integer is returned. Otherwise, if the conversion 3295largest positive integer is returned. Otherwise, if the conversion
3296overflows, the largest integer with the same sign as `a' is returned. 3296overflows, the largest integer with the same sign as `a' is returned.
3297------------------------------------------------------------------------------- 3297-------------------------------------------------------------------------------
3298*/ 3298*/
3299int32 floatx80_to_int32( floatx80 a ) 3299int32 floatx80_to_int32( floatx80 a )
3300{ 3300{
3301 flag aSign; 3301 flag aSign;
3302 int32 aExp, shiftCount; 3302 int32 aExp, shiftCount;
3303 bits64 aSig; 3303 bits64 aSig;
3304 3304
3305 aSig = extractFloatx80Frac( a ); 3305 aSig = extractFloatx80Frac( a );
3306 aExp = extractFloatx80Exp( a ); 3306 aExp = extractFloatx80Exp( a );
3307 aSign = extractFloatx80Sign( a ); 3307 aSign = extractFloatx80Sign( a );
3308 if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0; 3308 if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
3309 shiftCount = 0x4037 - aExp; 3309 shiftCount = 0x4037 - aExp;
3310 if ( shiftCount <= 0 ) shiftCount = 1; 3310 if ( shiftCount <= 0 ) shiftCount = 1;
3311 shift64RightJamming( aSig, shiftCount, &aSig ); 3311 shift64RightJamming( aSig, shiftCount, &aSig );
3312 return roundAndPackInt32( aSign, aSig ); 3312 return roundAndPackInt32( aSign, aSig );
3313 3313
3314} 3314}
3315 3315
3316/* 3316/*
3317------------------------------------------------------------------------------- 3317-------------------------------------------------------------------------------
3318Returns the result of converting the extended double-precision floating- 3318Returns the result of converting the extended double-precision floating-
3319point value `a' to the 32-bit two's complement integer format. The 3319point value `a' to the 32-bit two's complement integer format. The
3320conversion is performed according to the IEC/IEEE Standard for Binary 3320conversion is performed according to the IEC/IEEE Standard for Binary
3321Floating-Point Arithmetic, except that the conversion is always rounded 3321Floating-Point Arithmetic, except that the conversion is always rounded
3322toward zero. If `a' is a NaN, the largest positive integer is returned. 3322toward zero. If `a' is a NaN, the largest positive integer is returned.
3323Otherwise, if the conversion overflows, the largest integer with the same 3323Otherwise, if the conversion overflows, the largest integer with the same
3324sign as `a' is returned. 3324sign as `a' is returned.
3325------------------------------------------------------------------------------- 3325-------------------------------------------------------------------------------
3326*/ 3326*/
3327int32 floatx80_to_int32_round_to_zero( floatx80 a ) 3327int32 floatx80_to_int32_round_to_zero( floatx80 a )
3328{ 3328{
3329 flag aSign; 3329 flag aSign;
3330 int32 aExp, shiftCount; 3330 int32 aExp, shiftCount;
3331 bits64 aSig, savedASig; 3331 bits64 aSig, savedASig;
3332 int32 z; 3332 int32 z;
3333 3333
3334 aSig = extractFloatx80Frac( a ); 3334 aSig = extractFloatx80Frac( a );
3335 aExp = extractFloatx80Exp( a ); 3335 aExp = extractFloatx80Exp( a );
3336 aSign = extractFloatx80Sign( a ); 3336 aSign = extractFloatx80Sign( a );
3337 if ( 0x401E < aExp ) { 3337 if ( 0x401E < aExp ) {
3338 if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0; 3338 if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
3339 goto invalid; 3339 goto invalid;
3340 } 3340 }
3341 else if ( aExp < 0x3FFF ) { 3341 else if ( aExp < 0x3FFF ) {
3342 if ( aExp || aSig ) set_float_exception_inexact_flag(); 3342 if ( aExp || aSig ) set_float_exception_inexact_flag();
3343 return 0; 3343 return 0;
3344 } 3344 }
3345 shiftCount = 0x403E - aExp; 3345 shiftCount = 0x403E - aExp;
3346 savedASig = aSig; 3346 savedASig = aSig;
3347 aSig >>= shiftCount; 3347 aSig >>= shiftCount;
3348 z = aSig; 3348 z = aSig;
3349 if ( aSign ) z = - z; 3349 if ( aSign ) z = - z;
3350 if ( ( z < 0 ) ^ aSign ) { 3350 if ( ( z < 0 ) ^ aSign ) {
3351 invalid: 3351 invalid:
3352 float_raise( float_flag_invalid ); 3352 float_raise( float_flag_invalid );
3353 return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF; 3353 return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
3354 } 3354 }
3355 if ( ( aSig<<shiftCount ) != savedASig ) { 3355 if ( ( aSig<<shiftCount ) != savedASig ) {
3356 set_float_exception_inexact_flag(); 3356 set_float_exception_inexact_flag();
3357 } 3357 }
3358 return z; 3358 return z;
3359 3359
3360} 3360}
3361 3361
3362/* 3362/*
3363------------------------------------------------------------------------------- 3363-------------------------------------------------------------------------------
3364Returns the result of converting the extended double-precision floating- 3364Returns the result of converting the extended double-precision floating-
3365point value `a' to the 64-bit two's complement integer format. The 3365point value `a' to the 64-bit two's complement integer format. The
3366conversion is performed according to the IEC/IEEE Standard for Binary 3366conversion is performed according to the IEC/IEEE Standard for Binary
3367Floating-Point Arithmetic---which means in particular that the conversion 3367Floating-Point Arithmetic---which means in particular that the conversion
3368is rounded according to the current rounding mode. If `a' is a NaN, 3368is rounded according to the current rounding mode. If `a' is a NaN,
3369the largest positive integer is returned. Otherwise, if the conversion 3369the largest positive integer is returned. Otherwise, if the conversion
3370overflows, the largest integer with the same sign as `a' is returned. 3370overflows, the largest integer with the same sign as `a' is returned.
3371------------------------------------------------------------------------------- 3371-------------------------------------------------------------------------------
3372*/ 3372*/
3373int64 floatx80_to_int64( floatx80 a ) 3373int64 floatx80_to_int64( floatx80 a )
3374{ 3374{
3375 flag aSign; 3375 flag aSign;
3376 int32 aExp, shiftCount; 3376 int32 aExp, shiftCount;
3377 bits64 aSig, aSigExtra; 3377 bits64 aSig, aSigExtra;
3378 3378
3379 aSig = extractFloatx80Frac( a ); 3379 aSig = extractFloatx80Frac( a );
3380 aExp = extractFloatx80Exp( a ); 3380 aExp = extractFloatx80Exp( a );
3381 aSign = extractFloatx80Sign( a ); 3381 aSign = extractFloatx80Sign( a );
3382 shiftCount = 0x403E - aExp; 3382 shiftCount = 0x403E - aExp;
3383 if ( shiftCount <= 0 ) { 3383 if ( shiftCount <= 0 ) {
3384 if ( shiftCount ) { 3384 if ( shiftCount ) {
3385 float_raise( float_flag_invalid ); 3385 float_raise( float_flag_invalid );
3386 if ( ! aSign 3386 if ( ! aSign
3387 || ( ( aExp == 0x7FFF ) 3387 || ( ( aExp == 0x7FFF )
3388 && ( aSig != LIT64( 0x8000000000000000 ) ) ) 3388 && ( aSig != LIT64( 0x8000000000000000 ) ) )
3389 ) { 3389 ) {
3390 return LIT64( 0x7FFFFFFFFFFFFFFF ); 3390 return LIT64( 0x7FFFFFFFFFFFFFFF );
3391 } 3391 }
3392 return (sbits64) LIT64( 0x8000000000000000 ); 3392 return (sbits64) LIT64( 0x8000000000000000 );
3393 } 3393 }
3394 aSigExtra = 0; 3394 aSigExtra = 0;
3395 } 3395 }
3396 else { 3396 else {
3397 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra ); 3397 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
3398 } 3398 }
3399 return roundAndPackInt64( aSign, aSig, aSigExtra ); 3399 return roundAndPackInt64( aSign, aSig, aSigExtra );
3400 3400
3401} 3401}
3402 3402
3403/* 3403/*
3404------------------------------------------------------------------------------- 3404-------------------------------------------------------------------------------
3405Returns the result of converting the extended double-precision floating- 3405Returns the result of converting the extended double-precision floating-
3406point value `a' to the 64-bit two's complement integer format. The 3406point value `a' to the 64-bit two's complement integer format. The
3407conversion is performed according to the IEC/IEEE Standard for Binary 3407conversion is performed according to the IEC/IEEE Standard for Binary
3408Floating-Point Arithmetic, except that the conversion is always rounded 3408Floating-Point Arithmetic, except that the conversion is always rounded
3409toward zero. If `a' is a NaN, the largest positive integer is returned. 3409toward zero. If `a' is a NaN, the largest positive integer is returned.
3410Otherwise, if the conversion overflows, the largest integer with the same 3410Otherwise, if the conversion overflows, the largest integer with the same
3411sign as `a' is returned. 3411sign as `a' is returned.
3412------------------------------------------------------------------------------- 3412-------------------------------------------------------------------------------
3413*/ 3413*/
3414int64 floatx80_to_int64_round_to_zero( floatx80 a ) 3414int64 floatx80_to_int64_round_to_zero( floatx80 a )
3415{ 3415{
3416 flag aSign; 3416 flag aSign;
3417 int32 aExp, shiftCount; 3417 int32 aExp, shiftCount;
3418 bits64 aSig; 3418 bits64 aSig;
3419 int64 z; 3419 int64 z;
3420 3420
3421 aSig = extractFloatx80Frac( a ); 3421 aSig = extractFloatx80Frac( a );
3422 aExp = extractFloatx80Exp( a ); 3422 aExp = extractFloatx80Exp( a );
3423 aSign = extractFloatx80Sign( a ); 3423 aSign = extractFloatx80Sign( a );
3424 shiftCount = aExp - 0x403E; 3424 shiftCount = aExp - 0x403E;
3425 if ( 0 <= shiftCount ) { 3425 if ( 0 <= shiftCount ) {
3426 aSig &= LIT64( 0x7FFFFFFFFFFFFFFF ); 3426 aSig &= LIT64( 0x7FFFFFFFFFFFFFFF );
3427 if ( ( a.high != 0xC03E ) || aSig ) { 3427 if ( ( a.high != 0xC03E ) || aSig ) {
3428 float_raise( float_flag_invalid ); 3428 float_raise( float_flag_invalid );
3429 if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) { 3429 if ( ! aSign || ( ( aExp == 0x7FFF ) && aSig ) ) {
3430 return LIT64( 0x7FFFFFFFFFFFFFFF ); 3430 return LIT64( 0x7FFFFFFFFFFFFFFF );
3431 } 3431 }
3432 } 3432 }
3433 return (sbits64) LIT64( 0x8000000000000000 ); 3433 return (sbits64) LIT64( 0x8000000000000000 );
3434 } 3434 }
3435 else if ( aExp < 0x3FFF ) { 3435 else if ( aExp < 0x3FFF ) {
3436 if ( aExp | aSig ) set_float_exception_inexact_flag(); 3436 if ( aExp | aSig ) set_float_exception_inexact_flag();
3437 return 0; 3437 return 0;
3438 } 3438 }
3439 z = aSig>>( - shiftCount ); 3439 z = aSig>>( - shiftCount );
3440 if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) { 3440 if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) {
3441 set_float_exception_inexact_flag(); 3441 set_float_exception_inexact_flag();
3442 } 3442 }
3443 if ( aSign ) z = - z; 3443 if ( aSign ) z = - z;
3444 return z; 3444 return z;
3445 3445
3446} 3446}
3447 3447
3448/* 3448/*
3449------------------------------------------------------------------------------- 3449-------------------------------------------------------------------------------
3450Returns the result of converting the extended double-precision floating- 3450Returns the result of converting the extended double-precision floating-
3451point value `a' to the single-precision floating-point format. The 3451point value `a' to the single-precision floating-point format. The
3452conversion is performed according to the IEC/IEEE Standard for Binary 3452conversion is performed according to the IEC/IEEE Standard for Binary
3453Floating-Point Arithmetic. 3453Floating-Point Arithmetic.
3454------------------------------------------------------------------------------- 3454-------------------------------------------------------------------------------
3455*/ 3455*/
3456float32 floatx80_to_float32( floatx80 a ) 3456float32 floatx80_to_float32( floatx80 a )
3457{ 3457{
3458 flag aSign; 3458 flag aSign;
3459 int32 aExp; 3459 int32 aExp;
3460 bits64 aSig; 3460 bits64 aSig;
3461 3461
3462 aSig = extractFloatx80Frac( a ); 3462 aSig = extractFloatx80Frac( a );
3463 aExp = extractFloatx80Exp( a ); 3463 aExp = extractFloatx80Exp( a );
3464 aSign = extractFloatx80Sign( a ); 3464 aSign = extractFloatx80Sign( a );
3465 if ( aExp == 0x7FFF ) { 3465 if ( aExp == 0x7FFF ) {
3466 if ( (bits64) ( aSig<<1 ) ) { 3466 if ( (bits64) ( aSig<<1 ) ) {
3467 return commonNaNToFloat32( floatx80ToCommonNaN( a ) ); 3467 return commonNaNToFloat32( floatx80ToCommonNaN( a ) );
3468 } 3468 }
3469 return packFloat32( aSign, 0xFF, 0 ); 3469 return packFloat32( aSign, 0xFF, 0 );
3470 } 3470 }
3471 shift64RightJamming( aSig, 33, &aSig ); 3471 shift64RightJamming( aSig, 33, &aSig );
3472 if ( aExp || aSig ) aExp -= 0x3F81; 3472 if ( aExp || aSig ) aExp -= 0x3F81;
3473 return roundAndPackFloat32( aSign, aExp, aSig ); 3473 return roundAndPackFloat32( aSign, aExp, aSig );
3474 3474
3475} 3475}
3476 3476
3477/* 3477/*
3478------------------------------------------------------------------------------- 3478-------------------------------------------------------------------------------
3479Returns the result of converting the extended double-precision floating- 3479Returns the result of converting the extended double-precision floating-
3480point value `a' to the double-precision floating-point format. The 3480point value `a' to the double-precision floating-point format. The
3481conversion is performed according to the IEC/IEEE Standard for Binary 3481conversion is performed according to the IEC/IEEE Standard for Binary
3482Floating-Point Arithmetic. 3482Floating-Point Arithmetic.
3483------------------------------------------------------------------------------- 3483-------------------------------------------------------------------------------
3484*/ 3484*/
3485float64 floatx80_to_float64( floatx80 a ) 3485float64 floatx80_to_float64( floatx80 a )
3486{ 3486{
3487 flag aSign; 3487 flag aSign;
3488 int32 aExp; 3488 int32 aExp;
3489 bits64 aSig, zSig; 3489 bits64 aSig, zSig;
3490 3490
3491 aSig = extractFloatx80Frac( a ); 3491 aSig = extractFloatx80Frac( a );
3492 aExp = extractFloatx80Exp( a ); 3492 aExp = extractFloatx80Exp( a );
3493 aSign = extractFloatx80Sign( a ); 3493 aSign = extractFloatx80Sign( a );
3494 if ( aExp == 0x7FFF ) { 3494 if ( aExp == 0x7FFF ) {
3495 if ( (bits64) ( aSig<<1 ) ) { 3495 if ( (bits64) ( aSig<<1 ) ) {
3496 return commonNaNToFloat64( floatx80ToCommonNaN( a ) ); 3496 return commonNaNToFloat64( floatx80ToCommonNaN( a ) );
3497 } 3497 }
3498 return packFloat64( aSign, 0x7FF, 0 ); 3498 return packFloat64( aSign, 0x7FF, 0 );
3499 } 3499 }
3500 shift64RightJamming( aSig, 1, &zSig ); 3500 shift64RightJamming( aSig, 1, &zSig );
3501 if ( aExp || aSig ) aExp -= 0x3C01; 3501 if ( aExp || aSig ) aExp -= 0x3C01;
3502 return roundAndPackFloat64( aSign, aExp, zSig ); 3502 return roundAndPackFloat64( aSign, aExp, zSig );
3503 3503
3504} 3504}
3505 3505
3506#ifdef FLOAT128 3506#ifdef FLOAT128
3507 3507
3508/* 3508/*
3509------------------------------------------------------------------------------- 3509-------------------------------------------------------------------------------
3510Returns the result of converting the extended double-precision floating- 3510Returns the result of converting the extended double-precision floating-
3511point value `a' to the quadruple-precision floating-point format. The 3511point value `a' to the quadruple-precision floating-point format. The
3512conversion is performed according to the IEC/IEEE Standard for Binary 3512conversion is performed according to the IEC/IEEE Standard for Binary
3513Floating-Point Arithmetic. 3513Floating-Point Arithmetic.
3514------------------------------------------------------------------------------- 3514-------------------------------------------------------------------------------
3515*/ 3515*/
3516float128 floatx80_to_float128( floatx80 a ) 3516float128 floatx80_to_float128( floatx80 a )
3517{ 3517{
3518 flag aSign; 3518 flag aSign;
3519 int16 aExp; 3519 int16 aExp;
3520 bits64 aSig, zSig0, zSig1; 3520 bits64 aSig, zSig0, zSig1;
3521 3521
3522 aSig = extractFloatx80Frac( a ); 3522 aSig = extractFloatx80Frac( a );
3523 aExp = extractFloatx80Exp( a ); 3523 aExp = extractFloatx80Exp( a );
3524 aSign = extractFloatx80Sign( a ); 3524 aSign = extractFloatx80Sign( a );
3525 if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) { 3525 if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) {
3526 return commonNaNToFloat128( floatx80ToCommonNaN( a ) ); 3526 return commonNaNToFloat128( floatx80ToCommonNaN( a ) );
3527 } 3527 }
3528 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 ); 3528 shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 );
3529 return packFloat128( aSign, aExp, zSig0, zSig1 ); 3529 return packFloat128( aSign, aExp, zSig0, zSig1 );
3530 3530
3531} 3531}
3532 3532
3533#endif 3533#endif
3534 3534
3535/* 3535/*
3536------------------------------------------------------------------------------- 3536-------------------------------------------------------------------------------
3537Rounds the extended double-precision floating-point value `a' to an integer, 3537Rounds the extended double-precision floating-point value `a' to an integer,
3538and returns the result as an extended quadruple-precision floating-point 3538and returns the result as an extended quadruple-precision floating-point
3539value. The operation is performed according to the IEC/IEEE Standard for 3539value. The operation is performed according to the IEC/IEEE Standard for
3540Binary Floating-Point Arithmetic. 3540Binary Floating-Point Arithmetic.
3541------------------------------------------------------------------------------- 3541-------------------------------------------------------------------------------
3542*/ 3542*/
3543floatx80 floatx80_round_to_int( floatx80 a ) 3543floatx80 floatx80_round_to_int( floatx80 a )
3544{ 3544{
3545 flag aSign; 3545 flag aSign;
3546 int32 aExp; 3546 int32 aExp;
3547 bits64 lastBitMask, roundBitsMask; 3547 bits64 lastBitMask, roundBitsMask;
3548 int8 roundingMode; 3548 int8 roundingMode;
3549 floatx80 z; 3549 floatx80 z;
3550 3550
3551 aExp = extractFloatx80Exp( a ); 3551 aExp = extractFloatx80Exp( a );
3552 if ( 0x403E <= aExp ) { 3552 if ( 0x403E <= aExp ) {
3553 if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) { 3553 if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) {
3554 return propagateFloatx80NaN( a, a ); 3554 return propagateFloatx80NaN( a, a );
3555 } 3555 }
3556 return a; 3556 return a;
3557 } 3557 }
3558 if ( aExp < 0x3FFF ) { 3558 if ( aExp < 0x3FFF ) {
3559 if ( ( aExp == 0 ) 3559 if ( ( aExp == 0 )
3560 && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) { 3560 && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
3561 return a; 3561 return a;
3562 } 3562 }
3563 set_float_exception_inexact_flag(); 3563 set_float_exception_inexact_flag();
3564 aSign = extractFloatx80Sign( a ); 3564 aSign = extractFloatx80Sign( a );
3565 switch ( float_rounding_mode ) { 3565 switch ( float_rounding_mode ) {
3566 case float_round_nearest_even: 3566 case float_round_nearest_even:
3567 if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 ) 3567 if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 )
3568 ) { 3568 ) {
3569 return 3569 return
3570 packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) ); 3570 packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
3571 } 3571 }
3572 break; 3572 break;
3573 case float_round_to_zero: 3573 case float_round_to_zero:
3574 break; 3574 break;
3575 case float_round_down: 3575 case float_round_down:
3576 return 3576 return
3577 aSign ? 3577 aSign ?
3578 packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) ) 3578 packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
3579 : packFloatx80( 0, 0, 0 ); 3579 : packFloatx80( 0, 0, 0 );
3580 case float_round_up: 3580 case float_round_up:
3581 return 3581 return
3582 aSign ? packFloatx80( 1, 0, 0 ) 3582 aSign ? packFloatx80( 1, 0, 0 )
3583 : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) ); 3583 : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
3584 } 3584 }
3585 return packFloatx80( aSign, 0, 0 ); 3585 return packFloatx80( aSign, 0, 0 );
3586 } 3586 }
3587 lastBitMask = 1; 3587 lastBitMask = 1;
3588 lastBitMask <<= 0x403E - aExp; 3588 lastBitMask <<= 0x403E - aExp;
3589 roundBitsMask = lastBitMask - 1; 3589 roundBitsMask = lastBitMask - 1;
3590 z = a; 3590 z = a;
3591 roundingMode = float_rounding_mode; 3591 roundingMode = float_rounding_mode;
3592 if ( roundingMode == float_round_nearest_even ) { 3592 if ( roundingMode == float_round_nearest_even ) {
3593 z.low += lastBitMask>>1; 3593 z.low += lastBitMask>>1;
3594 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask; 3594 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
3595 } 3595 }
3596 else if ( roundingMode != float_round_to_zero ) { 3596 else if ( roundingMode != float_round_to_zero ) {
3597 if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) { 3597 if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) {
3598 z.low += roundBitsMask; 3598 z.low += roundBitsMask;
3599 } 3599 }
3600 } 3600 }
3601 z.low &= ~ roundBitsMask; 3601 z.low &= ~ roundBitsMask;
3602 if ( z.low == 0 ) { 3602 if ( z.low == 0 ) {
3603 ++z.high; 3603 ++z.high;
3604 z.low = LIT64( 0x8000000000000000 ); 3604 z.low = LIT64( 0x8000000000000000 );
3605 } 3605 }
3606 if ( z.low != a.low ) set_float_exception_inexact_flag(); 3606 if ( z.low != a.low ) set_float_exception_inexact_flag();
3607 return z; 3607 return z;
3608 3608
3609} 3609}
3610 3610
3611/* 3611/*
3612------------------------------------------------------------------------------- 3612-------------------------------------------------------------------------------
3613Returns the result of adding the absolute values of the extended double- 3613Returns the result of adding the absolute values of the extended double-
3614precision floating-point values `a' and `b'. If `zSign' is 1, the sum is 3614precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
3615negated before being returned. `zSign' is ignored if the result is a NaN. 3615negated before being returned. `zSign' is ignored if the result is a NaN.
3616The addition is performed according to the IEC/IEEE Standard for Binary 3616The addition is performed according to the IEC/IEEE Standard for Binary
3617Floating-Point Arithmetic. 3617Floating-Point Arithmetic.
3618------------------------------------------------------------------------------- 3618-------------------------------------------------------------------------------
3619*/ 3619*/
3620static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) 3620static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
3621{ 3621{
3622 int32 aExp, bExp, zExp; 3622 int32 aExp, bExp, zExp;
3623 bits64 aSig, bSig, zSig0, zSig1; 3623 bits64 aSig, bSig, zSig0, zSig1;
3624 int32 expDiff; 3624 int32 expDiff;
3625 3625
3626 aSig = extractFloatx80Frac( a ); 3626 aSig = extractFloatx80Frac( a );
3627 aExp = extractFloatx80Exp( a ); 3627 aExp = extractFloatx80Exp( a );
3628 bSig = extractFloatx80Frac( b ); 3628 bSig = extractFloatx80Frac( b );
3629 bExp = extractFloatx80Exp( b ); 3629 bExp = extractFloatx80Exp( b );
3630 expDiff = aExp - bExp; 3630 expDiff = aExp - bExp;
3631 if ( 0 < expDiff ) { 3631 if ( 0 < expDiff ) {
3632 if ( aExp == 0x7FFF ) { 3632 if ( aExp == 0x7FFF ) {
3633 if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b ); 3633 if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
3634 return a; 3634 return a;
3635 } 3635 }
3636 if ( bExp == 0 ) --expDiff; 3636 if ( bExp == 0 ) --expDiff;
3637 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 ); 3637 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
3638 zExp = aExp; 3638 zExp = aExp;
3639 } 3639 }
3640 else if ( expDiff < 0 ) { 3640 else if ( expDiff < 0 ) {
3641 if ( bExp == 0x7FFF ) { 3641 if ( bExp == 0x7FFF ) {
3642 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); 3642 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3643 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 3643 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3644 } 3644 }
3645 if ( aExp == 0 ) ++expDiff; 3645 if ( aExp == 0 ) ++expDiff;
3646 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 ); 3646 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
3647 zExp = bExp; 3647 zExp = bExp;
3648 } 3648 }
3649 else { 3649 else {
3650 if ( aExp == 0x7FFF ) { 3650 if ( aExp == 0x7FFF ) {
3651 if ( (bits64) ( ( aSig | bSig )<<1 ) ) { 3651 if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
3652 return propagateFloatx80NaN( a, b ); 3652 return propagateFloatx80NaN( a, b );
3653 } 3653 }
3654 return a; 3654 return a;
3655 } 3655 }
3656 zSig1 = 0; 3656 zSig1 = 0;
3657 zSig0 = aSig + bSig; 3657 zSig0 = aSig + bSig;
3658 if ( aExp == 0 ) { 3658 if ( aExp == 0 ) {
3659 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 ); 3659 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
3660 goto roundAndPack; 3660 goto roundAndPack;
3661 } 3661 }
3662 zExp = aExp; 3662 zExp = aExp;
3663 goto shiftRight1; 3663 goto shiftRight1;
3664 } 3664 }
3665 zSig0 = aSig + bSig; 3665 zSig0 = aSig + bSig;
3666 if ( (sbits64) zSig0 < 0 ) goto roundAndPack; 3666 if ( (sbits64) zSig0 < 0 ) goto roundAndPack;
3667 shiftRight1: 3667 shiftRight1:
3668 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 ); 3668 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
3669 zSig0 |= LIT64( 0x8000000000000000 ); 3669 zSig0 |= LIT64( 0x8000000000000000 );
3670 ++zExp; 3670 ++zExp;
3671 roundAndPack: 3671 roundAndPack:
3672 return 3672 return
3673 roundAndPackFloatx80( 3673 roundAndPackFloatx80(
3674 floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); 3674 floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
3675 3675
3676} 3676}
3677 3677
3678/* 3678/*
3679------------------------------------------------------------------------------- 3679-------------------------------------------------------------------------------
3680Returns the result of subtracting the absolute values of the extended 3680Returns the result of subtracting the absolute values of the extended
3681double-precision floating-point values `a' and `b'. If `zSign' is 1, the 3681double-precision floating-point values `a' and `b'. If `zSign' is 1, the
3682difference is negated before being returned. `zSign' is ignored if the 3682difference is negated before being returned. `zSign' is ignored if the
3683result is a NaN. The subtraction is performed according to the IEC/IEEE 3683result is a NaN. The subtraction is performed according to the IEC/IEEE
3684Standard for Binary Floating-Point Arithmetic. 3684Standard for Binary Floating-Point Arithmetic.
3685------------------------------------------------------------------------------- 3685-------------------------------------------------------------------------------
3686*/ 3686*/
3687static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) 3687static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign )
3688{ 3688{
3689 int32 aExp, bExp, zExp; 3689 int32 aExp, bExp, zExp;
3690 bits64 aSig, bSig, zSig0, zSig1; 3690 bits64 aSig, bSig, zSig0, zSig1;
3691 int32 expDiff; 3691 int32 expDiff;
3692 floatx80 z; 3692 floatx80 z;
3693 3693
3694 aSig = extractFloatx80Frac( a ); 3694 aSig = extractFloatx80Frac( a );
3695 aExp = extractFloatx80Exp( a ); 3695 aExp = extractFloatx80Exp( a );
3696 bSig = extractFloatx80Frac( b ); 3696 bSig = extractFloatx80Frac( b );
3697 bExp = extractFloatx80Exp( b ); 3697 bExp = extractFloatx80Exp( b );
3698 expDiff = aExp - bExp; 3698 expDiff = aExp - bExp;
3699 if ( 0 < expDiff ) goto aExpBigger; 3699 if ( 0 < expDiff ) goto aExpBigger;
3700 if ( expDiff < 0 ) goto bExpBigger; 3700 if ( expDiff < 0 ) goto bExpBigger;
3701 if ( aExp == 0x7FFF ) { 3701 if ( aExp == 0x7FFF ) {
3702 if ( (bits64) ( ( aSig | bSig )<<1 ) ) { 3702 if ( (bits64) ( ( aSig | bSig )<<1 ) ) {
3703 return propagateFloatx80NaN( a, b ); 3703 return propagateFloatx80NaN( a, b );
3704 } 3704 }
3705 float_raise( float_flag_invalid ); 3705 float_raise( float_flag_invalid );
3706 z.low = floatx80_default_nan_low; 3706 z.low = floatx80_default_nan_low;
3707 z.high = floatx80_default_nan_high; 3707 z.high = floatx80_default_nan_high;
3708 return z; 3708 return z;
3709 } 3709 }
3710 if ( aExp == 0 ) { 3710 if ( aExp == 0 ) {
3711 aExp = 1; 3711 aExp = 1;
3712 bExp = 1; 3712 bExp = 1;
3713 } 3713 }
3714 zSig1 = 0; 3714 zSig1 = 0;
3715 if ( bSig < aSig ) goto aBigger; 3715 if ( bSig < aSig ) goto aBigger;
3716 if ( aSig < bSig ) goto bBigger; 3716 if ( aSig < bSig ) goto bBigger;
3717 return packFloatx80( float_rounding_mode == float_round_down, 0, 0 ); 3717 return packFloatx80( float_rounding_mode == float_round_down, 0, 0 );
3718 bExpBigger: 3718 bExpBigger:
3719 if ( bExp == 0x7FFF ) { 3719 if ( bExp == 0x7FFF ) {
3720 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); 3720 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3721 return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) ); 3721 return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) );
3722 } 3722 }
3723 if ( aExp == 0 ) ++expDiff; 3723 if ( aExp == 0 ) ++expDiff;
3724 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 ); 3724 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
3725 bBigger: 3725 bBigger:
3726 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 ); 3726 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
3727 zExp = bExp; 3727 zExp = bExp;
3728 zSign ^= 1; 3728 zSign ^= 1;
3729 goto normalizeRoundAndPack; 3729 goto normalizeRoundAndPack;
3730 aExpBigger: 3730 aExpBigger:
3731 if ( aExp == 0x7FFF ) { 3731 if ( aExp == 0x7FFF ) {
3732 if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b ); 3732 if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
3733 return a; 3733 return a;
3734 } 3734 }
3735 if ( bExp == 0 ) --expDiff; 3735 if ( bExp == 0 ) --expDiff;
3736 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 ); 3736 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
3737 aBigger: 3737 aBigger:
3738 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 ); 3738 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
3739 zExp = aExp; 3739 zExp = aExp;
3740 normalizeRoundAndPack: 3740 normalizeRoundAndPack:
3741 return 3741 return
3742 normalizeRoundAndPackFloatx80( 3742 normalizeRoundAndPackFloatx80(
3743 floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); 3743 floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
3744 3744
3745} 3745}
3746 3746
3747/* 3747/*
3748------------------------------------------------------------------------------- 3748-------------------------------------------------------------------------------
3749Returns the result of adding the extended double-precision floating-point 3749Returns the result of adding the extended double-precision floating-point
3750values `a' and `b'. The operation is performed according to the IEC/IEEE 3750values `a' and `b'. The operation is performed according to the IEC/IEEE
3751Standard for Binary Floating-Point Arithmetic. 3751Standard for Binary Floating-Point Arithmetic.
3752------------------------------------------------------------------------------- 3752-------------------------------------------------------------------------------
3753*/ 3753*/
3754floatx80 floatx80_add( floatx80 a, floatx80 b ) 3754floatx80 floatx80_add( floatx80 a, floatx80 b )
3755{ 3755{
3756 flag aSign, bSign; 3756 flag aSign, bSign;
3757 3757
3758 aSign = extractFloatx80Sign( a ); 3758 aSign = extractFloatx80Sign( a );
3759 bSign = extractFloatx80Sign( b ); 3759 bSign = extractFloatx80Sign( b );
3760 if ( aSign == bSign ) { 3760 if ( aSign == bSign ) {
3761 return addFloatx80Sigs( a, b, aSign ); 3761 return addFloatx80Sigs( a, b, aSign );
3762 } 3762 }
3763 else { 3763 else {
3764 return subFloatx80Sigs( a, b, aSign ); 3764 return subFloatx80Sigs( a, b, aSign );
3765 } 3765 }
3766 3766
3767} 3767}
3768 3768
3769/* 3769/*
3770------------------------------------------------------------------------------- 3770-------------------------------------------------------------------------------
3771Returns the result of subtracting the extended double-precision floating- 3771Returns the result of subtracting the extended double-precision floating-
3772point values `a' and `b'. The operation is performed according to the 3772point values `a' and `b'. The operation is performed according to the
3773IEC/IEEE Standard for Binary Floating-Point Arithmetic. 3773IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3774------------------------------------------------------------------------------- 3774-------------------------------------------------------------------------------
3775*/ 3775*/
3776floatx80 floatx80_sub( floatx80 a, floatx80 b ) 3776floatx80 floatx80_sub( floatx80 a, floatx80 b )
3777{ 3777{
3778 flag aSign, bSign; 3778 flag aSign, bSign;
3779 3779
3780 aSign = extractFloatx80Sign( a ); 3780 aSign = extractFloatx80Sign( a );
3781 bSign = extractFloatx80Sign( b ); 3781 bSign = extractFloatx80Sign( b );
3782 if ( aSign == bSign ) { 3782 if ( aSign == bSign ) {
3783 return subFloatx80Sigs( a, b, aSign ); 3783 return subFloatx80Sigs( a, b, aSign );
3784 } 3784 }
3785 else { 3785 else {
3786 return addFloatx80Sigs( a, b, aSign ); 3786 return addFloatx80Sigs( a, b, aSign );
3787 } 3787 }
3788 3788
3789} 3789}
3790 3790
3791/* 3791/*
3792------------------------------------------------------------------------------- 3792-------------------------------------------------------------------------------
3793Returns the result of multiplying the extended double-precision floating- 3793Returns the result of multiplying the extended double-precision floating-
3794point values `a' and `b'. The operation is performed according to the 3794point values `a' and `b'. The operation is performed according to the
3795IEC/IEEE Standard for Binary Floating-Point Arithmetic. 3795IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3796------------------------------------------------------------------------------- 3796-------------------------------------------------------------------------------
3797*/ 3797*/
3798floatx80 floatx80_mul( floatx80 a, floatx80 b ) 3798floatx80 floatx80_mul( floatx80 a, floatx80 b )
3799{ 3799{
3800 flag aSign, bSign, zSign; 3800 flag aSign, bSign, zSign;
3801 int32 aExp, bExp, zExp; 3801 int32 aExp, bExp, zExp;
3802 bits64 aSig, bSig, zSig0, zSig1; 3802 bits64 aSig, bSig, zSig0, zSig1;
3803 floatx80 z; 3803 floatx80 z;
3804 3804
3805 aSig = extractFloatx80Frac( a ); 3805 aSig = extractFloatx80Frac( a );
3806 aExp = extractFloatx80Exp( a ); 3806 aExp = extractFloatx80Exp( a );
3807 aSign = extractFloatx80Sign( a ); 3807 aSign = extractFloatx80Sign( a );
3808 bSig = extractFloatx80Frac( b ); 3808 bSig = extractFloatx80Frac( b );
3809 bExp = extractFloatx80Exp( b ); 3809 bExp = extractFloatx80Exp( b );
3810 bSign = extractFloatx80Sign( b ); 3810 bSign = extractFloatx80Sign( b );
3811 zSign = aSign ^ bSign; 3811 zSign = aSign ^ bSign;
3812 if ( aExp == 0x7FFF ) { 3812 if ( aExp == 0x7FFF ) {
3813 if ( (bits64) ( aSig<<1 ) 3813 if ( (bits64) ( aSig<<1 )
3814 || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) { 3814 || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
3815 return propagateFloatx80NaN( a, b ); 3815 return propagateFloatx80NaN( a, b );
3816 } 3816 }
3817 if ( ( bExp | bSig ) == 0 ) goto invalid; 3817 if ( ( bExp | bSig ) == 0 ) goto invalid;
3818 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 3818 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3819 } 3819 }
3820 if ( bExp == 0x7FFF ) { 3820 if ( bExp == 0x7FFF ) {
3821 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); 3821 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3822 if ( ( aExp | aSig ) == 0 ) { 3822 if ( ( aExp | aSig ) == 0 ) {
3823 invalid: 3823 invalid:
3824 float_raise( float_flag_invalid ); 3824 float_raise( float_flag_invalid );
3825 z.low = floatx80_default_nan_low; 3825 z.low = floatx80_default_nan_low;
3826 z.high = floatx80_default_nan_high; 3826 z.high = floatx80_default_nan_high;
3827 return z; 3827 return z;
3828 } 3828 }
3829 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 3829 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3830 } 3830 }
3831 if ( aExp == 0 ) { 3831 if ( aExp == 0 ) {
3832 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); 3832 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
3833 normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); 3833 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
3834 } 3834 }
3835 if ( bExp == 0 ) { 3835 if ( bExp == 0 ) {
3836 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 ); 3836 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
3837 normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); 3837 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3838 } 3838 }
3839 zExp = aExp + bExp - 0x3FFE; 3839 zExp = aExp + bExp - 0x3FFE;
3840 mul64To128( aSig, bSig, &zSig0, &zSig1 ); 3840 mul64To128( aSig, bSig, &zSig0, &zSig1 );
3841 if ( 0 < (sbits64) zSig0 ) { 3841 if ( 0 < (sbits64) zSig0 ) {
3842 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 ); 3842 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
3843 --zExp; 3843 --zExp;
3844 } 3844 }
3845 return 3845 return
3846 roundAndPackFloatx80( 3846 roundAndPackFloatx80(
3847 floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); 3847 floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
3848 3848
3849} 3849}
3850 3850
3851/* 3851/*
3852------------------------------------------------------------------------------- 3852-------------------------------------------------------------------------------
3853Returns the result of dividing the extended double-precision floating-point 3853Returns the result of dividing the extended double-precision floating-point
3854value `a' by the corresponding value `b'. The operation is performed 3854value `a' by the corresponding value `b'. The operation is performed
3855according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 3855according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3856------------------------------------------------------------------------------- 3856-------------------------------------------------------------------------------
3857*/ 3857*/
3858floatx80 floatx80_div( floatx80 a, floatx80 b ) 3858floatx80 floatx80_div( floatx80 a, floatx80 b )
3859{ 3859{
3860 flag aSign, bSign, zSign; 3860 flag aSign, bSign, zSign;
3861 int32 aExp, bExp, zExp; 3861 int32 aExp, bExp, zExp;
3862 bits64 aSig, bSig, zSig0, zSig1; 3862 bits64 aSig, bSig, zSig0, zSig1;
3863 bits64 rem0, rem1, rem2, term0, term1, term2; 3863 bits64 rem0, rem1, rem2, term0, term1, term2;
3864 floatx80 z; 3864 floatx80 z;
3865 3865
3866 aSig = extractFloatx80Frac( a ); 3866 aSig = extractFloatx80Frac( a );
3867 aExp = extractFloatx80Exp( a ); 3867 aExp = extractFloatx80Exp( a );
3868 aSign = extractFloatx80Sign( a ); 3868 aSign = extractFloatx80Sign( a );
3869 bSig = extractFloatx80Frac( b ); 3869 bSig = extractFloatx80Frac( b );
3870 bExp = extractFloatx80Exp( b ); 3870 bExp = extractFloatx80Exp( b );
3871 bSign = extractFloatx80Sign( b ); 3871 bSign = extractFloatx80Sign( b );
3872 zSign = aSign ^ bSign; 3872 zSign = aSign ^ bSign;
3873 if ( aExp == 0x7FFF ) { 3873 if ( aExp == 0x7FFF ) {
3874 if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b ); 3874 if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b );
3875 if ( bExp == 0x7FFF ) { 3875 if ( bExp == 0x7FFF ) {
3876 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); 3876 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3877 goto invalid; 3877 goto invalid;
3878 } 3878 }
3879 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 3879 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3880 } 3880 }
3881 if ( bExp == 0x7FFF ) { 3881 if ( bExp == 0x7FFF ) {
3882 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); 3882 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3883 return packFloatx80( zSign, 0, 0 ); 3883 return packFloatx80( zSign, 0, 0 );
3884 } 3884 }
3885 if ( bExp == 0 ) { 3885 if ( bExp == 0 ) {
3886 if ( bSig == 0 ) { 3886 if ( bSig == 0 ) {
3887 if ( ( aExp | aSig ) == 0 ) { 3887 if ( ( aExp | aSig ) == 0 ) {
3888 invalid: 3888 invalid:
3889 float_raise( float_flag_invalid ); 3889 float_raise( float_flag_invalid );
3890 z.low = floatx80_default_nan_low; 3890 z.low = floatx80_default_nan_low;
3891 z.high = floatx80_default_nan_high; 3891 z.high = floatx80_default_nan_high;
3892 return z; 3892 return z;
3893 } 3893 }
3894 float_raise( float_flag_divbyzero ); 3894 float_raise( float_flag_divbyzero );
3895 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 3895 return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
3896 } 3896 }
3897 normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); 3897 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3898 } 3898 }
3899 if ( aExp == 0 ) { 3899 if ( aExp == 0 ) {
3900 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); 3900 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
3901 normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); 3901 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
3902 } 3902 }
3903 zExp = aExp - bExp + 0x3FFE; 3903 zExp = aExp - bExp + 0x3FFE;
3904 rem1 = 0; 3904 rem1 = 0;
3905 if ( bSig <= aSig ) { 3905 if ( bSig <= aSig ) {
3906 shift128Right( aSig, 0, 1, &aSig, &rem1 ); 3906 shift128Right( aSig, 0, 1, &aSig, &rem1 );
3907 ++zExp; 3907 ++zExp;
3908 } 3908 }
3909 zSig0 = estimateDiv128To64( aSig, rem1, bSig ); 3909 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
3910 mul64To128( bSig, zSig0, &term0, &term1 ); 3910 mul64To128( bSig, zSig0, &term0, &term1 );
3911 sub128( aSig, rem1, term0, term1, &rem0, &rem1 ); 3911 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
3912 while ( (sbits64) rem0 < 0 ) { 3912 while ( (sbits64) rem0 < 0 ) {
3913 --zSig0; 3913 --zSig0;
3914 add128( rem0, rem1, 0, bSig, &rem0, &rem1 ); 3914 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
3915 } 3915 }
3916 zSig1 = estimateDiv128To64( rem1, 0, bSig ); 3916 zSig1 = estimateDiv128To64( rem1, 0, bSig );
3917 if ( (bits64) ( zSig1<<1 ) <= 8 ) { 3917 if ( (bits64) ( zSig1<<1 ) <= 8 ) {
3918 mul64To128( bSig, zSig1, &term1, &term2 ); 3918 mul64To128( bSig, zSig1, &term1, &term2 );
3919 sub128( rem1, 0, term1, term2, &rem1, &rem2 ); 3919 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
3920 while ( (sbits64) rem1 < 0 ) { 3920 while ( (sbits64) rem1 < 0 ) {
3921 --zSig1; 3921 --zSig1;
3922 add128( rem1, rem2, 0, bSig, &rem1, &rem2 ); 3922 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
3923 } 3923 }
3924 zSig1 |= ( ( rem1 | rem2 ) != 0 ); 3924 zSig1 |= ( ( rem1 | rem2 ) != 0 );
3925 } 3925 }
3926 return 3926 return
3927 roundAndPackFloatx80( 3927 roundAndPackFloatx80(
3928 floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); 3928 floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 );
3929 3929
3930} 3930}
3931 3931
3932/* 3932/*
3933------------------------------------------------------------------------------- 3933-------------------------------------------------------------------------------
3934Returns the remainder of the extended double-precision floating-point value 3934Returns the remainder of the extended double-precision floating-point value
3935`a' with respect to the corresponding value `b'. The operation is performed 3935`a' with respect to the corresponding value `b'. The operation is performed
3936according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 3936according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
3937------------------------------------------------------------------------------- 3937-------------------------------------------------------------------------------
3938*/ 3938*/
3939floatx80 floatx80_rem( floatx80 a, floatx80 b ) 3939floatx80 floatx80_rem( floatx80 a, floatx80 b )
3940{ 3940{
3941 flag aSign, bSign, zSign; 3941 flag aSign, zSign;
3942 int32 aExp, bExp, expDiff; 3942 int32 aExp, bExp, expDiff;
3943 bits64 aSig0, aSig1, bSig; 3943 bits64 aSig0, aSig1, bSig;
3944 bits64 q, term0, term1, alternateASig0, alternateASig1; 3944 bits64 q, term0, term1, alternateASig0, alternateASig1;
3945 floatx80 z; 3945 floatx80 z;
3946 3946
3947 aSig0 = extractFloatx80Frac( a ); 3947 aSig0 = extractFloatx80Frac( a );
3948 aExp = extractFloatx80Exp( a ); 3948 aExp = extractFloatx80Exp( a );
3949 aSign = extractFloatx80Sign( a ); 3949 aSign = extractFloatx80Sign( a );
3950 bSig = extractFloatx80Frac( b ); 3950 bSig = extractFloatx80Frac( b );
3951 bExp = extractFloatx80Exp( b ); 3951 bExp = extractFloatx80Exp( b );
3952 bSign = extractFloatx80Sign( b ); 3952
3953 if ( aExp == 0x7FFF ) { 3953 if ( aExp == 0x7FFF ) {
3954 if ( (bits64) ( aSig0<<1 ) 3954 if ( (bits64) ( aSig0<<1 )
3955 || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) { 3955 || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) {
3956 return propagateFloatx80NaN( a, b ); 3956 return propagateFloatx80NaN( a, b );
3957 } 3957 }
3958 goto invalid; 3958 goto invalid;
3959 } 3959 }
3960 if ( bExp == 0x7FFF ) { 3960 if ( bExp == 0x7FFF ) {
3961 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); 3961 if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b );
3962 return a; 3962 return a;
3963 } 3963 }
3964 if ( bExp == 0 ) { 3964 if ( bExp == 0 ) {
3965 if ( bSig == 0 ) { 3965 if ( bSig == 0 ) {
3966 invalid: 3966 invalid:
3967 float_raise( float_flag_invalid ); 3967 float_raise( float_flag_invalid );
3968 z.low = floatx80_default_nan_low; 3968 z.low = floatx80_default_nan_low;
3969 z.high = floatx80_default_nan_high; 3969 z.high = floatx80_default_nan_high;
3970 return z; 3970 return z;
3971 } 3971 }
3972 normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); 3972 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3973 } 3973 }
3974 if ( aExp == 0 ) { 3974 if ( aExp == 0 ) {
3975 if ( (bits64) ( aSig0<<1 ) == 0 ) return a; 3975 if ( (bits64) ( aSig0<<1 ) == 0 ) return a;
3976 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); 3976 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
3977 } 3977 }
3978 bSig |= LIT64( 0x8000000000000000 ); 3978 bSig |= LIT64( 0x8000000000000000 );
3979 zSign = aSign; 3979 zSign = aSign;
3980 expDiff = aExp - bExp; 3980 expDiff = aExp - bExp;
3981 aSig1 = 0; 3981 aSig1 = 0;
3982 if ( expDiff < 0 ) { 3982 if ( expDiff < 0 ) {
3983 if ( expDiff < -1 ) return a; 3983 if ( expDiff < -1 ) return a;
3984 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 ); 3984 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
3985 expDiff = 0; 3985 expDiff = 0;
3986 } 3986 }
3987 q = ( bSig <= aSig0 ); 3987 q = ( bSig <= aSig0 );
3988 if ( q ) aSig0 -= bSig; 3988 if ( q ) aSig0 -= bSig;
3989 expDiff -= 64; 3989 expDiff -= 64;
3990 while ( 0 < expDiff ) { 3990 while ( 0 < expDiff ) {
3991 q = estimateDiv128To64( aSig0, aSig1, bSig ); 3991 q = estimateDiv128To64( aSig0, aSig1, bSig );
3992 q = ( 2 < q ) ? q - 2 : 0; 3992 q = ( 2 < q ) ? q - 2 : 0;
3993 mul64To128( bSig, q, &term0, &term1 ); 3993 mul64To128( bSig, q, &term0, &term1 );
3994 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); 3994 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
3995 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 ); 3995 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
3996 expDiff -= 62; 3996 expDiff -= 62;
3997 } 3997 }
3998 expDiff += 64; 3998 expDiff += 64;
3999 if ( 0 < expDiff ) { 3999 if ( 0 < expDiff ) {
4000 q = estimateDiv128To64( aSig0, aSig1, bSig ); 4000 q = estimateDiv128To64( aSig0, aSig1, bSig );
4001 q = ( 2 < q ) ? q - 2 : 0; 4001 q = ( 2 < q ) ? q - 2 : 0;
4002 q >>= 64 - expDiff; 4002 q >>= 64 - expDiff;
4003 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 ); 4003 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
4004 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); 4004 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
4005 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 ); 4005 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
4006 while ( le128( term0, term1, aSig0, aSig1 ) ) { 4006 while ( le128( term0, term1, aSig0, aSig1 ) ) {
4007 ++q; 4007 ++q;
4008 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); 4008 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
4009 } 4009 }
4010 } 4010 }
4011 else { 4011 else {
4012 term1 = 0; 4012 term1 = 0;
4013 term0 = bSig; 4013 term0 = bSig;
4014 } 4014 }
4015 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 ); 4015 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
4016 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 ) 4016 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
4017 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 ) 4017 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
4018 && ( q & 1 ) ) 4018 && ( q & 1 ) )
4019 ) { 4019 ) {
4020 aSig0 = alternateASig0; 4020 aSig0 = alternateASig0;
4021 aSig1 = alternateASig1; 4021 aSig1 = alternateASig1;
4022 zSign = ! zSign; 4022 zSign = ! zSign;
4023 } 4023 }
4024 return 4024 return
4025 normalizeRoundAndPackFloatx80( 4025 normalizeRoundAndPackFloatx80(
4026 80, zSign, bExp + expDiff, aSig0, aSig1 ); 4026 80, zSign, bExp + expDiff, aSig0, aSig1 );
4027 4027
4028} 4028}
4029 4029
4030/* 4030/*
4031------------------------------------------------------------------------------- 4031-------------------------------------------------------------------------------
4032Returns the square root of the extended double-precision floating-point 4032Returns the square root of the extended double-precision floating-point
4033value `a'. The operation is performed according to the IEC/IEEE Standard 4033value `a'. The operation is performed according to the IEC/IEEE Standard
4034for Binary Floating-Point Arithmetic. 4034for Binary Floating-Point Arithmetic.
4035------------------------------------------------------------------------------- 4035-------------------------------------------------------------------------------
4036*/ 4036*/
4037floatx80 floatx80_sqrt( floatx80 a ) 4037floatx80 floatx80_sqrt( floatx80 a )
4038{ 4038{
4039 flag aSign; 4039 flag aSign;
4040 int32 aExp, zExp; 4040 int32 aExp, zExp;
4041 bits64 aSig0, aSig1, zSig0, zSig1, doubleZSig0; 4041 bits64 aSig0, aSig1, zSig0, zSig1, doubleZSig0;
4042 bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3; 4042 bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3;
4043 floatx80 z; 4043 floatx80 z;
4044 4044
4045 aSig0 = extractFloatx80Frac( a ); 4045 aSig0 = extractFloatx80Frac( a );
4046 aExp = extractFloatx80Exp( a ); 4046 aExp = extractFloatx80Exp( a );
4047 aSign = extractFloatx80Sign( a ); 4047 aSign = extractFloatx80Sign( a );
4048 if ( aExp == 0x7FFF ) { 4048 if ( aExp == 0x7FFF ) {
4049 if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a ); 4049 if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a );
4050 if ( ! aSign ) return a; 4050 if ( ! aSign ) return a;
4051 goto invalid; 4051 goto invalid;
4052 } 4052 }
4053 if ( aSign ) { 4053 if ( aSign ) {
4054 if ( ( aExp | aSig0 ) == 0 ) return a; 4054 if ( ( aExp | aSig0 ) == 0 ) return a;
4055 invalid: 4055 invalid:
4056 float_raise( float_flag_invalid ); 4056 float_raise( float_flag_invalid );
4057 z.low = floatx80_default_nan_low; 4057 z.low = floatx80_default_nan_low;
4058 z.high = floatx80_default_nan_high; 4058 z.high = floatx80_default_nan_high;
4059 return z; 4059 return z;
4060 } 4060 }
4061 if ( aExp == 0 ) { 4061 if ( aExp == 0 ) {
4062 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 ); 4062 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
4063 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); 4063 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
4064 } 4064 }
4065 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF; 4065 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
4066 zSig0 = estimateSqrt32( aExp, aSig0>>32 ); 4066 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
4067 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 ); 4067 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
4068 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 ); 4068 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
4069 doubleZSig0 = zSig0<<1; 4069 doubleZSig0 = zSig0<<1;
4070 mul64To128( zSig0, zSig0, &term0, &term1 ); 4070 mul64To128( zSig0, zSig0, &term0, &term1 );
4071 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 ); 4071 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
4072 while ( (sbits64) rem0 < 0 ) { 4072 while ( (sbits64) rem0 < 0 ) {
4073 --zSig0; 4073 --zSig0;
4074 doubleZSig0 -= 2; 4074 doubleZSig0 -= 2;
4075 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 ); 4075 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
4076 } 4076 }
4077 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 ); 4077 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
4078 if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) { 4078 if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
4079 if ( zSig1 == 0 ) zSig1 = 1; 4079 if ( zSig1 == 0 ) zSig1 = 1;
4080 mul64To128( doubleZSig0, zSig1, &term1, &term2 ); 4080 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
4081 sub128( rem1, 0, term1, term2, &rem1, &rem2 ); 4081 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
4082 mul64To128( zSig1, zSig1, &term2, &term3 ); 4082 mul64To128( zSig1, zSig1, &term2, &term3 );
4083 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 ); 4083 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
4084 while ( (sbits64) rem1 < 0 ) { 4084 while ( (sbits64) rem1 < 0 ) {
4085 --zSig1; 4085 --zSig1;
4086 shortShift128Left( 0, zSig1, 1, &term2, &term3 ); 4086 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
4087 term3 |= 1; 4087 term3 |= 1;
4088 term2 |= doubleZSig0; 4088 term2 |= doubleZSig0;
4089 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 ); 4089 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
4090 } 4090 }
4091 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); 4091 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
4092 } 4092 }
4093 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 ); 4093 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
4094 zSig0 |= doubleZSig0; 4094 zSig0 |= doubleZSig0;
4095 return 4095 return
4096 roundAndPackFloatx80( 4096 roundAndPackFloatx80(
4097 floatx80_rounding_precision, 0, zExp, zSig0, zSig1 ); 4097 floatx80_rounding_precision, 0, zExp, zSig0, zSig1 );
4098 4098
4099} 4099}
4100 4100
4101/* 4101/*
4102------------------------------------------------------------------------------- 4102-------------------------------------------------------------------------------
4103Returns 1 if the extended double-precision floating-point value `a' is 4103Returns 1 if the extended double-precision floating-point value `a' is
4104equal to the corresponding value `b', and 0 otherwise. The comparison is 4104equal to the corresponding value `b', and 0 otherwise. The comparison is
4105performed according to the IEC/IEEE Standard for Binary Floating-Point 4105performed according to the IEC/IEEE Standard for Binary Floating-Point
4106Arithmetic. 4106Arithmetic.
4107------------------------------------------------------------------------------- 4107-------------------------------------------------------------------------------
4108*/ 4108*/
4109flag floatx80_eq( floatx80 a, floatx80 b ) 4109flag floatx80_eq( floatx80 a, floatx80 b )
4110{ 4110{
4111 4111
4112 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) 4112 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
4113 && (bits64) ( extractFloatx80Frac( a )<<1 ) ) 4113 && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4114 || ( ( extractFloatx80Exp( b ) == 0x7FFF ) 4114 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
4115 && (bits64) ( extractFloatx80Frac( b )<<1 ) ) 4115 && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4116 ) { 4116 ) {
4117 if ( floatx80_is_signaling_nan( a ) 4117 if ( floatx80_is_signaling_nan( a )
4118 || floatx80_is_signaling_nan( b ) ) { 4118 || floatx80_is_signaling_nan( b ) ) {
4119 float_raise( float_flag_invalid ); 4119 float_raise( float_flag_invalid );
4120 } 4120 }
4121 return 0; 4121 return 0;
4122 } 4122 }
4123 return 4123 return
4124 ( a.low == b.low ) 4124 ( a.low == b.low )
4125 && ( ( a.high == b.high ) 4125 && ( ( a.high == b.high )
4126 || ( ( a.low == 0 ) 4126 || ( ( a.low == 0 )
4127 && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) ) 4127 && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
4128 ); 4128 );
4129 4129
4130} 4130}
4131 4131
4132/* 4132/*
4133------------------------------------------------------------------------------- 4133-------------------------------------------------------------------------------
4134Returns 1 if the extended double-precision floating-point value `a' is 4134Returns 1 if the extended double-precision floating-point value `a' is
4135less than or equal to the corresponding value `b', and 0 otherwise. The 4135less than or equal to the corresponding value `b', and 0 otherwise. The
4136comparison is performed according to the IEC/IEEE Standard for Binary 4136comparison is performed according to the IEC/IEEE Standard for Binary
4137Floating-Point Arithmetic. 4137Floating-Point Arithmetic.
4138------------------------------------------------------------------------------- 4138-------------------------------------------------------------------------------
4139*/ 4139*/
4140flag floatx80_le( floatx80 a, floatx80 b ) 4140flag floatx80_le( floatx80 a, floatx80 b )
4141{ 4141{
4142 flag aSign, bSign; 4142 flag aSign, bSign;
4143 4143
4144 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) 4144 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
4145 && (bits64) ( extractFloatx80Frac( a )<<1 ) ) 4145 && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4146 || ( ( extractFloatx80Exp( b ) == 0x7FFF ) 4146 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
4147 && (bits64) ( extractFloatx80Frac( b )<<1 ) ) 4147 && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4148 ) { 4148 ) {
4149 float_raise( float_flag_invalid ); 4149 float_raise( float_flag_invalid );
4150 return 0; 4150 return 0;
4151 } 4151 }
4152 aSign = extractFloatx80Sign( a ); 4152 aSign = extractFloatx80Sign( a );
4153 bSign = extractFloatx80Sign( b ); 4153 bSign = extractFloatx80Sign( b );
4154 if ( aSign != bSign ) { 4154 if ( aSign != bSign ) {
4155 return 4155 return
4156 aSign 4156 aSign
4157 || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) 4157 || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4158 == 0 ); 4158 == 0 );
4159 } 4159 }
4160 return 4160 return
4161 aSign ? le128( b.high, b.low, a.high, a.low ) 4161 aSign ? le128( b.high, b.low, a.high, a.low )
4162 : le128( a.high, a.low, b.high, b.low ); 4162 : le128( a.high, a.low, b.high, b.low );
4163 4163
4164} 4164}
4165 4165
4166/* 4166/*
4167------------------------------------------------------------------------------- 4167-------------------------------------------------------------------------------
4168Returns 1 if the extended double-precision floating-point value `a' is 4168Returns 1 if the extended double-precision floating-point value `a' is
4169less than the corresponding value `b', and 0 otherwise. The comparison 4169less than the corresponding value `b', and 0 otherwise. The comparison
4170is performed according to the IEC/IEEE Standard for Binary Floating-Point 4170is performed according to the IEC/IEEE Standard for Binary Floating-Point
4171Arithmetic. 4171Arithmetic.
4172------------------------------------------------------------------------------- 4172-------------------------------------------------------------------------------
4173*/ 4173*/
4174flag floatx80_lt( floatx80 a, floatx80 b ) 4174flag floatx80_lt( floatx80 a, floatx80 b )
4175{ 4175{
4176 flag aSign, bSign; 4176 flag aSign, bSign;
4177 4177
4178 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) 4178 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
4179 && (bits64) ( extractFloatx80Frac( a )<<1 ) ) 4179 && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4180 || ( ( extractFloatx80Exp( b ) == 0x7FFF ) 4180 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
4181 && (bits64) ( extractFloatx80Frac( b )<<1 ) ) 4181 && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4182 ) { 4182 ) {
4183 float_raise( float_flag_invalid ); 4183 float_raise( float_flag_invalid );
4184 return 0; 4184 return 0;
4185 } 4185 }
4186 aSign = extractFloatx80Sign( a ); 4186 aSign = extractFloatx80Sign( a );
4187 bSign = extractFloatx80Sign( b ); 4187 bSign = extractFloatx80Sign( b );
4188 if ( aSign != bSign ) { 4188 if ( aSign != bSign ) {
4189 return 4189 return
4190 aSign 4190 aSign
4191 && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) 4191 && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4192 != 0 ); 4192 != 0 );
4193 } 4193 }
4194 return 4194 return
4195 aSign ? lt128( b.high, b.low, a.high, a.low ) 4195 aSign ? lt128( b.high, b.low, a.high, a.low )
4196 : lt128( a.high, a.low, b.high, b.low ); 4196 : lt128( a.high, a.low, b.high, b.low );
4197 4197
4198} 4198}
4199 4199
4200/* 4200/*
4201------------------------------------------------------------------------------- 4201-------------------------------------------------------------------------------
4202Returns 1 if the extended double-precision floating-point value `a' is equal 4202Returns 1 if the extended double-precision floating-point value `a' is equal
4203to the corresponding value `b', and 0 otherwise. The invalid exception is 4203to the corresponding value `b', and 0 otherwise. The invalid exception is
4204raised if either operand is a NaN. Otherwise, the comparison is performed 4204raised if either operand is a NaN. Otherwise, the comparison is performed
4205according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 4205according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4206------------------------------------------------------------------------------- 4206-------------------------------------------------------------------------------
4207*/ 4207*/
4208flag floatx80_eq_signaling( floatx80 a, floatx80 b ) 4208flag floatx80_eq_signaling( floatx80 a, floatx80 b )
4209{ 4209{
4210 4210
4211 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) 4211 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
4212 && (bits64) ( extractFloatx80Frac( a )<<1 ) ) 4212 && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4213 || ( ( extractFloatx80Exp( b ) == 0x7FFF ) 4213 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
4214 && (bits64) ( extractFloatx80Frac( b )<<1 ) ) 4214 && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4215 ) { 4215 ) {
4216 float_raise( float_flag_invalid ); 4216 float_raise( float_flag_invalid );
4217 return 0; 4217 return 0;
4218 } 4218 }
4219 return 4219 return
4220 ( a.low == b.low ) 4220 ( a.low == b.low )
4221 && ( ( a.high == b.high ) 4221 && ( ( a.high == b.high )
4222 || ( ( a.low == 0 ) 4222 || ( ( a.low == 0 )
4223 && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) ) 4223 && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) )
4224 ); 4224 );
4225 4225
4226} 4226}
4227 4227
4228/* 4228/*
4229------------------------------------------------------------------------------- 4229-------------------------------------------------------------------------------
4230Returns 1 if the extended double-precision floating-point value `a' is less 4230Returns 1 if the extended double-precision floating-point value `a' is less
4231than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs 4231than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs
4232do not cause an exception. Otherwise, the comparison is performed according 4232do not cause an exception. Otherwise, the comparison is performed according
4233to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. 4233to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4234------------------------------------------------------------------------------- 4234-------------------------------------------------------------------------------
4235*/ 4235*/
4236flag floatx80_le_quiet( floatx80 a, floatx80 b ) 4236flag floatx80_le_quiet( floatx80 a, floatx80 b )
4237{ 4237{
4238 flag aSign, bSign; 4238 flag aSign, bSign;
4239 4239
4240 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) 4240 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
4241 && (bits64) ( extractFloatx80Frac( a )<<1 ) ) 4241 && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4242 || ( ( extractFloatx80Exp( b ) == 0x7FFF ) 4242 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
4243 && (bits64) ( extractFloatx80Frac( b )<<1 ) ) 4243 && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4244 ) { 4244 ) {
4245 if ( floatx80_is_signaling_nan( a ) 4245 if ( floatx80_is_signaling_nan( a )
4246 || floatx80_is_signaling_nan( b ) ) { 4246 || floatx80_is_signaling_nan( b ) ) {
4247 float_raise( float_flag_invalid ); 4247 float_raise( float_flag_invalid );
4248 } 4248 }
4249 return 0; 4249 return 0;
4250 } 4250 }
4251 aSign = extractFloatx80Sign( a ); 4251 aSign = extractFloatx80Sign( a );
4252 bSign = extractFloatx80Sign( b ); 4252 bSign = extractFloatx80Sign( b );
4253 if ( aSign != bSign ) { 4253 if ( aSign != bSign ) {
4254 return 4254 return
4255 aSign 4255 aSign
4256 || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) 4256 || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4257 == 0 ); 4257 == 0 );
4258 } 4258 }
4259 return 4259 return
4260 aSign ? le128( b.high, b.low, a.high, a.low ) 4260 aSign ? le128( b.high, b.low, a.high, a.low )
4261 : le128( a.high, a.low, b.high, b.low ); 4261 : le128( a.high, a.low, b.high, b.low );
4262 4262
4263} 4263}
4264 4264
4265/* 4265/*
4266------------------------------------------------------------------------------- 4266-------------------------------------------------------------------------------
4267Returns 1 if the extended double-precision floating-point value `a' is less 4267Returns 1 if the extended double-precision floating-point value `a' is less
4268than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause 4268than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause
4269an exception. Otherwise, the comparison is performed according to the 4269an exception. Otherwise, the comparison is performed according to the
4270IEC/IEEE Standard for Binary Floating-Point Arithmetic. 4270IEC/IEEE Standard for Binary Floating-Point Arithmetic.
4271------------------------------------------------------------------------------- 4271-------------------------------------------------------------------------------
4272*/ 4272*/
4273flag floatx80_lt_quiet( floatx80 a, floatx80 b ) 4273flag floatx80_lt_quiet( floatx80 a, floatx80 b )
4274{ 4274{
4275 flag aSign, bSign; 4275 flag aSign, bSign;
4276 4276
4277 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) 4277 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
4278 && (bits64) ( extractFloatx80Frac( a )<<1 ) ) 4278 && (bits64) ( extractFloatx80Frac( a )<<1 ) )
4279 || ( ( extractFloatx80Exp( b ) == 0x7FFF ) 4279 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
4280 && (bits64) ( extractFloatx80Frac( b )<<1 ) ) 4280 && (bits64) ( extractFloatx80Frac( b )<<1 ) )
4281 ) { 4281 ) {
4282 if ( floatx80_is_signaling_nan( a ) 4282 if ( floatx80_is_signaling_nan( a )
4283 || floatx80_is_signaling_nan( b ) ) { 4283 || floatx80_is_signaling_nan( b ) ) {
4284 float_raise( float_flag_invalid ); 4284 float_raise( float_flag_invalid );
4285 } 4285 }
4286 return 0; 4286 return 0;
4287 } 4287 }
4288 aSign = extractFloatx80Sign( a ); 4288 aSign = extractFloatx80Sign( a );
4289 bSign = extractFloatx80Sign( b ); 4289 bSign = extractFloatx80Sign( b );
4290 if ( aSign != bSign ) { 4290 if ( aSign != bSign ) {
4291 return 4291 return
4292 aSign 4292 aSign
4293 && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) 4293 && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
4294 != 0 ); 4294 != 0 );
4295 } 4295 }
4296 return 4296 return
4297 aSign ? lt128( b.high, b.low, a.high, a.low ) 4297 aSign ? lt128( b.high, b.low, a.high, a.low )
4298 : lt128( a.high, a.low, b.high, b.low ); 4298 : lt128( a.high, a.low, b.high, b.low );
4299 4299
4300} 4300}
4301 4301
4302#endif 4302#endif
4303 4303
4304#ifdef FLOAT128 4304#ifdef FLOAT128
4305 4305
4306/* 4306/*
4307------------------------------------------------------------------------------- 4307-------------------------------------------------------------------------------
4308Returns the result of converting the quadruple-precision floating-point 4308Returns the result of converting the quadruple-precision floating-point
4309value `a' to the 32-bit two's complement integer format. The conversion 4309value `a' to the 32-bit two's complement integer format. The conversion
4310is performed according to the IEC/IEEE Standard for Binary Floating-Point 4310is performed according to the IEC/IEEE Standard for Binary Floating-Point
4311Arithmetic---which means in particular that the conversion is rounded 4311Arithmetic---which means in particular that the conversion is rounded
4312according to the current rounding mode. If `a' is a NaN, the largest 4312according to the current rounding mode. If `a' is a NaN, the largest
4313positive integer is returned. Otherwise, if the conversion overflows, the 4313positive integer is returned. Otherwise, if the conversion overflows, the
4314largest integer with the same sign as `a' is returned. 4314largest integer with the same sign as `a' is returned.
4315------------------------------------------------------------------------------- 4315-------------------------------------------------------------------------------
4316*/ 4316*/
4317int32 float128_to_int32( float128 a ) 4317int32 float128_to_int32( float128 a )
4318{ 4318{
4319 flag aSign; 4319 flag aSign;
4320 int32 aExp, shiftCount; 4320 int32 aExp, shiftCount;
4321 bits64 aSig0, aSig1; 4321 bits64 aSig0, aSig1;
4322 4322
4323 aSig1 = extractFloat128Frac1( a ); 4323 aSig1 = extractFloat128Frac1( a );
4324 aSig0 = extractFloat128Frac0( a ); 4324 aSig0 = extractFloat128Frac0( a );
4325 aExp = extractFloat128Exp( a ); 4325 aExp = extractFloat128Exp( a );
4326 aSign = extractFloat128Sign( a ); 4326 aSign = extractFloat128Sign( a );
4327 if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0; 4327 if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0;
4328 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 ); 4328 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4329 aSig0 |= ( aSig1 != 0 ); 4329 aSig0 |= ( aSig1 != 0 );
4330 shiftCount = 0x4028 - aExp; 4330 shiftCount = 0x4028 - aExp;
4331 if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 ); 4331 if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 );
4332 return roundAndPackInt32( aSign, aSig0 ); 4332 return roundAndPackInt32( aSign, aSig0 );
4333 4333
4334} 4334}
4335 4335
4336/* 4336/*
4337------------------------------------------------------------------------------- 4337-------------------------------------------------------------------------------
4338Returns the result of converting the quadruple-precision floating-point 4338Returns the result of converting the quadruple-precision floating-point
4339value `a' to the 32-bit two's complement integer format. The conversion 4339value `a' to the 32-bit two's complement integer format. The conversion
4340is performed according to the IEC/IEEE Standard for Binary Floating-Point 4340is performed according to the IEC/IEEE Standard for Binary Floating-Point
4341Arithmetic, except that the conversion is always rounded toward zero. If 4341Arithmetic, except that the conversion is always rounded toward zero. If
4342`a' is a NaN, the largest positive integer is returned. Otherwise, if the 4342`a' is a NaN, the largest positive integer is returned. Otherwise, if the
4343conversion overflows, the largest integer with the same sign as `a' is 4343conversion overflows, the largest integer with the same sign as `a' is
4344returned. 4344returned.
4345------------------------------------------------------------------------------- 4345-------------------------------------------------------------------------------
4346*/ 4346*/
4347int32 float128_to_int32_round_to_zero( float128 a ) 4347int32 float128_to_int32_round_to_zero( float128 a )
4348{ 4348{
4349 flag aSign; 4349 flag aSign;
4350 int32 aExp, shiftCount; 4350 int32 aExp, shiftCount;
4351 bits64 aSig0, aSig1, savedASig; 4351 bits64 aSig0, aSig1, savedASig;
4352 int32 z; 4352 int32 z;
4353 4353
4354 aSig1 = extractFloat128Frac1( a ); 4354 aSig1 = extractFloat128Frac1( a );
4355 aSig0 = extractFloat128Frac0( a ); 4355 aSig0 = extractFloat128Frac0( a );
4356 aExp = extractFloat128Exp( a ); 4356 aExp = extractFloat128Exp( a );
4357 aSign = extractFloat128Sign( a ); 4357 aSign = extractFloat128Sign( a );
4358 aSig0 |= ( aSig1 != 0 ); 4358 aSig0 |= ( aSig1 != 0 );
4359 if ( 0x401E < aExp ) { 4359 if ( 0x401E < aExp ) {
4360 if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0; 4360 if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0;
4361 goto invalid; 4361 goto invalid;
4362 } 4362 }
4363 else if ( aExp < 0x3FFF ) { 4363 else if ( aExp < 0x3FFF ) {
4364 if ( aExp || aSig0 ) set_float_exception_inexact_flag(); 4364 if ( aExp || aSig0 ) set_float_exception_inexact_flag();
4365 return 0; 4365 return 0;
4366 } 4366 }
4367 aSig0 |= LIT64( 0x0001000000000000 ); 4367 aSig0 |= LIT64( 0x0001000000000000 );
4368 shiftCount = 0x402F - aExp; 4368 shiftCount = 0x402F - aExp;
4369 savedASig = aSig0; 4369 savedASig = aSig0;
4370 aSig0 >>= shiftCount; 4370 aSig0 >>= shiftCount;
4371 z = (int32)aSig0; 4371 z = (int32)aSig0;
4372 if ( aSign ) z = - z; 4372 if ( aSign ) z = - z;
4373 if ( ( z < 0 ) ^ aSign ) { 4373 if ( ( z < 0 ) ^ aSign ) {
4374 invalid: 4374 invalid:
4375 float_raise( float_flag_invalid ); 4375 float_raise( float_flag_invalid );
4376 return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF; 4376 return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF;
4377 } 4377 }
4378 if ( ( aSig0<<shiftCount ) != savedASig ) { 4378 if ( ( aSig0<<shiftCount ) != savedASig ) {
4379 set_float_exception_inexact_flag(); 4379 set_float_exception_inexact_flag();
4380 } 4380 }
4381 return z; 4381 return z;
4382 4382
4383} 4383}
4384 4384
4385/* 4385/*
4386------------------------------------------------------------------------------- 4386-------------------------------------------------------------------------------
4387Returns the result of converting the quadruple-precision floating-point 4387Returns the result of converting the quadruple-precision floating-point
4388value `a' to the 64-bit two's complement integer format. The conversion 4388value `a' to the 64-bit two's complement integer format. The conversion
4389is performed according to the IEC/IEEE Standard for Binary Floating-Point 4389is performed according to the IEC/IEEE Standard for Binary Floating-Point
4390Arithmetic---which means in particular that the conversion is rounded 4390Arithmetic---which means in particular that the conversion is rounded
4391according to the current rounding mode. If `a' is a NaN, the largest 4391according to the current rounding mode. If `a' is a NaN, the largest
4392positive integer is returned. Otherwise, if the conversion overflows, the 4392positive integer is returned. Otherwise, if the conversion overflows, the
4393largest integer with the same sign as `a' is returned. 4393largest integer with the same sign as `a' is returned.
4394------------------------------------------------------------------------------- 4394-------------------------------------------------------------------------------
4395*/ 4395*/
4396int64 float128_to_int64( float128 a ) 4396int64 float128_to_int64( float128 a )
4397{ 4397{
4398 flag aSign; 4398 flag aSign;
4399 int32 aExp, shiftCount; 4399 int32 aExp, shiftCount;
4400 bits64 aSig0, aSig1; 4400 bits64 aSig0, aSig1;
4401 4401
4402 aSig1 = extractFloat128Frac1( a ); 4402 aSig1 = extractFloat128Frac1( a );
4403 aSig0 = extractFloat128Frac0( a ); 4403 aSig0 = extractFloat128Frac0( a );
4404 aExp = extractFloat128Exp( a ); 4404 aExp = extractFloat128Exp( a );
4405 aSign = extractFloat128Sign( a ); 4405 aSign = extractFloat128Sign( a );
4406 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 ); 4406 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4407 shiftCount = 0x402F - aExp; 4407 shiftCount = 0x402F - aExp;
4408 if ( shiftCount <= 0 ) { 4408 if ( shiftCount <= 0 ) {
4409 if ( 0x403E < aExp ) { 4409 if ( 0x403E < aExp ) {
4410 float_raise( float_flag_invalid ); 4410 float_raise( float_flag_invalid );
4411 if ( ! aSign 4411 if ( ! aSign
4412 || ( ( aExp == 0x7FFF ) 4412 || ( ( aExp == 0x7FFF )
4413 && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) ) 4413 && ( aSig1 || ( aSig0 != LIT64( 0x0001000000000000 ) ) )
4414 ) 4414 )
4415 ) { 4415 ) {
4416 return LIT64( 0x7FFFFFFFFFFFFFFF ); 4416 return LIT64( 0x7FFFFFFFFFFFFFFF );
4417 } 4417 }
4418 return (sbits64) LIT64( 0x8000000000000000 ); 4418 return (sbits64) LIT64( 0x8000000000000000 );
4419 } 4419 }
4420 shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 ); 4420 shortShift128Left( aSig0, aSig1, - shiftCount, &aSig0, &aSig1 );
4421 } 4421 }
4422 else { 4422 else {
4423 shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 ); 4423 shift64ExtraRightJamming( aSig0, aSig1, shiftCount, &aSig0, &aSig1 );
4424 } 4424 }
4425 return roundAndPackInt64( aSign, aSig0, aSig1 ); 4425 return roundAndPackInt64( aSign, aSig0, aSig1 );
4426 4426
4427} 4427}
4428 4428
4429/* 4429/*
4430------------------------------------------------------------------------------- 4430-------------------------------------------------------------------------------
4431Returns the result of converting the quadruple-precision floating-point 4431Returns the result of converting the quadruple-precision floating-point
4432value `a' to the 64-bit two's complement integer format. The conversion 4432value `a' to the 64-bit two's complement integer format. The conversion
4433is performed according to the IEC/IEEE Standard for Binary Floating-Point 4433is performed according to the IEC/IEEE Standard for Binary Floating-Point
4434Arithmetic, except that the conversion is always rounded toward zero. 4434Arithmetic, except that the conversion is always rounded toward zero.
4435If `a' is a NaN, the largest positive integer is returned. Otherwise, if 4435If `a' is a NaN, the largest positive integer is returned. Otherwise, if
4436the conversion overflows, the largest integer with the same sign as `a' is 4436the conversion overflows, the largest integer with the same sign as `a' is
4437returned. 4437returned.
4438------------------------------------------------------------------------------- 4438-------------------------------------------------------------------------------
4439*/ 4439*/
4440int64 float128_to_int64_round_to_zero( float128 a ) 4440int64 float128_to_int64_round_to_zero( float128 a )
4441{ 4441{
4442 flag aSign; 4442 flag aSign;
4443 int32 aExp, shiftCount; 4443 int32 aExp, shiftCount;
4444 bits64 aSig0, aSig1; 4444 bits64 aSig0, aSig1;
4445 int64 z; 4445 int64 z;
4446 4446
4447 aSig1 = extractFloat128Frac1( a ); 4447 aSig1 = extractFloat128Frac1( a );
4448 aSig0 = extractFloat128Frac0( a ); 4448 aSig0 = extractFloat128Frac0( a );
4449 aExp = extractFloat128Exp( a ); 4449 aExp = extractFloat128Exp( a );
4450 aSign = extractFloat128Sign( a ); 4450 aSign = extractFloat128Sign( a );
4451 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 ); 4451 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4452 shiftCount = aExp - 0x402F; 4452 shiftCount = aExp - 0x402F;
4453 if ( 0 < shiftCount ) { 4453 if ( 0 < shiftCount ) {
4454 if ( 0x403E <= aExp ) { 4454 if ( 0x403E <= aExp ) {
4455 aSig0 &= LIT64( 0x0000FFFFFFFFFFFF ); 4455 aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
4456 if ( ( a.high == LIT64( 0xC03E000000000000 ) ) 4456 if ( ( a.high == LIT64( 0xC03E000000000000 ) )
4457 && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) { 4457 && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
4458 if ( aSig1 ) set_float_exception_inexact_flag(); 4458 if ( aSig1 ) set_float_exception_inexact_flag();
4459 } 4459 }
4460 else { 4460 else {
4461 float_raise( float_flag_invalid ); 4461 float_raise( float_flag_invalid );
4462 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) { 4462 if ( ! aSign || ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) ) {
4463 return LIT64( 0x7FFFFFFFFFFFFFFF ); 4463 return LIT64( 0x7FFFFFFFFFFFFFFF );
4464 } 4464 }
4465 } 4465 }
4466 return (sbits64) LIT64( 0x8000000000000000 ); 4466 return (sbits64) LIT64( 0x8000000000000000 );
4467 } 4467 }
4468 z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) ); 4468 z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
4469 if ( (bits64) ( aSig1<<shiftCount ) ) { 4469 if ( (bits64) ( aSig1<<shiftCount ) ) {
4470 set_float_exception_inexact_flag(); 4470 set_float_exception_inexact_flag();
4471 } 4471 }
4472 } 4472 }
4473 else { 4473 else {
4474 if ( aExp < 0x3FFF ) { 4474 if ( aExp < 0x3FFF ) {
4475 if ( aExp | aSig0 | aSig1 ) { 4475 if ( aExp | aSig0 | aSig1 ) {
4476 set_float_exception_inexact_flag(); 4476 set_float_exception_inexact_flag();
4477 } 4477 }
4478 return 0; 4478 return 0;
4479 } 4479 }
4480 z = aSig0>>( - shiftCount ); 4480 z = aSig0>>( - shiftCount );
4481 if ( aSig1 4481 if ( aSig1
4482 || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) { 4482 || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) {
4483 set_float_exception_inexact_flag(); 4483 set_float_exception_inexact_flag();
4484 } 4484 }
4485 } 4485 }
4486 if ( aSign ) z = - z; 4486 if ( aSign ) z = - z;
4487 return z; 4487 return z;
4488 4488
4489} 4489}
4490 4490
4491#if (defined(SOFTFLOATSPARC64_FOR_GCC) || defined(SOFTFLOAT_FOR_GCC)) \ 4491#if (defined(SOFTFLOATSPARC64_FOR_GCC) || defined(SOFTFLOAT_FOR_GCC)) \
4492 && defined(SOFTFLOAT_NEED_FIXUNS) 4492 && defined(SOFTFLOAT_NEED_FIXUNS)
4493/* 4493/*
4494 * just like above - but do not care for overflow of signed results 4494 * just like above - but do not care for overflow of signed results
4495 */ 4495 */
4496uint64 float128_to_uint64_round_to_zero( float128 a ) 4496uint64 float128_to_uint64_round_to_zero( float128 a )
4497{ 4497{
4498 flag aSign; 4498 flag aSign;
4499 int32 aExp, shiftCount; 4499 int32 aExp, shiftCount;
4500 bits64 aSig0, aSig1; 4500 bits64 aSig0, aSig1;
4501 uint64 z; 4501 uint64 z;
4502 4502
4503 aSig1 = extractFloat128Frac1( a ); 4503 aSig1 = extractFloat128Frac1( a );
4504 aSig0 = extractFloat128Frac0( a ); 4504 aSig0 = extractFloat128Frac0( a );
4505 aExp = extractFloat128Exp( a ); 4505 aExp = extractFloat128Exp( a );
4506 aSign = extractFloat128Sign( a ); 4506 aSign = extractFloat128Sign( a );
4507 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 ); 4507 if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 );
4508 shiftCount = aExp - 0x402F; 4508 shiftCount = aExp - 0x402F;
4509 if ( 0 < shiftCount ) { 4509 if ( 0 < shiftCount ) {
4510 if ( 0x403F <= aExp ) { 4510 if ( 0x403F <= aExp ) {
4511 aSig0 &= LIT64( 0x0000FFFFFFFFFFFF ); 4511 aSig0 &= LIT64( 0x0000FFFFFFFFFFFF );
4512 if ( ( a.high == LIT64( 0xC03E000000000000 ) ) 4512 if ( ( a.high == LIT64( 0xC03E000000000000 ) )
4513 && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) { 4513 && ( aSig1 < LIT64( 0x0002000000000000 ) ) ) {
4514 if ( aSig1 ) set_float_exception_inexact_flag(); 4514 if ( aSig1 ) set_float_exception_inexact_flag();
4515 } 4515 }
4516 else { 4516 else {
4517 float_raise( float_flag_invalid ); 4517 float_raise( float_flag_invalid );
4518 } 4518 }
4519 return LIT64( 0xFFFFFFFFFFFFFFFF ); 4519 return LIT64( 0xFFFFFFFFFFFFFFFF );
4520 } 4520 }
4521 z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) ); 4521 z = ( aSig0<<shiftCount ) | ( aSig1>>( ( - shiftCount ) & 63 ) );
4522 if ( (bits64) ( aSig1<<shiftCount ) ) { 4522 if ( (bits64) ( aSig1<<shiftCount ) ) {
4523 set_float_exception_inexact_flag(); 4523 set_float_exception_inexact_flag();
4524 } 4524 }
4525 } 4525 }
4526 else { 4526 else {
4527 if ( aExp < 0x3FFF ) { 4527 if ( aExp < 0x3FFF ) {
4528 if ( aExp | aSig0 | aSig1 ) { 4528 if ( aExp | aSig0 | aSig1 ) {
4529 set_float_exception_inexact_flag(); 4529 set_float_exception_inexact_flag();
4530 } 4530 }
4531 return 0; 4531 return 0;
4532 } 4532 }
4533 z = aSig0>>( - shiftCount ); 4533 z = aSig0>>( - shiftCount );
4534 if (aSig1 || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) { 4534 if (aSig1 || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) {
4535 set_float_exception_inexact_flag(); 4535 set_float_exception_inexact_flag();
4536 } 4536 }
4537 } 4537 }
4538 if ( aSign ) z = - z; 4538 if ( aSign ) z = - z;
4539 return z; 4539 return z;
4540 4540
4541} 4541}
4542#endif /* (SOFTFLOATSPARC64_FOR_GCC || SOFTFLOAT_FOR_GCC) && SOFTFLOAT_NEED_FIXUNS */ 4542#endif /* (SOFTFLOATSPARC64_FOR_GCC || SOFTFLOAT_FOR_GCC) && SOFTFLOAT_NEED_FIXUNS */
4543 4543
4544/* 4544/*
4545------------------------------------------------------------------------------- 4545-------------------------------------------------------------------------------
4546Returns the result of converting the quadruple-precision floating-point 4546Returns the result of converting the quadruple-precision floating-point
4547value `a' to the single-precision floating-point format. The conversion 4547value `a' to the single-precision floating-point format. The conversion
4548is performed according to the IEC/IEEE Standard for Binary Floating-Point 4548is performed according to the IEC/IEEE Standard for Binary Floating-Point
4549Arithmetic. 4549Arithmetic.
4550------------------------------------------------------------------------------- 4550-------------------------------------------------------------------------------
4551*/ 4551*/
4552float32 float128_to_float32( float128 a ) 4552float32 float128_to_float32( float128 a )
4553{ 4553{
4554 flag aSign; 4554 flag aSign;
4555 int32 aExp; 4555 int32 aExp;
4556 bits64 aSig0, aSig1; 4556 bits64 aSig0, aSig1;
4557 bits32 zSig; 4557 bits32 zSig;
4558 4558
4559 aSig1 = extractFloat128Frac1( a ); 4559 aSig1 = extractFloat128Frac1( a );
4560 aSig0 = extractFloat128Frac0( a ); 4560 aSig0 = extractFloat128Frac0( a );
4561 aExp = extractFloat128Exp( a ); 4561 aExp = extractFloat128Exp( a );
4562 aSign = extractFloat128Sign( a ); 4562 aSign = extractFloat128Sign( a );
4563 if ( aExp == 0x7FFF ) { 4563 if ( aExp == 0x7FFF ) {
4564 if ( aSig0 | aSig1 ) { 4564 if ( aSig0 | aSig1 ) {
4565 return commonNaNToFloat32( float128ToCommonNaN( a ) ); 4565 return commonNaNToFloat32( float128ToCommonNaN( a ) );
4566 } 4566 }
4567 return packFloat32( aSign, 0xFF, 0 ); 4567 return packFloat32( aSign, 0xFF, 0 );
4568 } 4568 }
4569 aSig0 |= ( aSig1 != 0 ); 4569 aSig0 |= ( aSig1 != 0 );
4570 shift64RightJamming( aSig0, 18, &aSig0 ); 4570 shift64RightJamming( aSig0, 18, &aSig0 );
4571 zSig = (bits32)aSig0; 4571 zSig = (bits32)aSig0;
4572 if ( aExp || zSig ) { 4572 if ( aExp || zSig ) {
4573 zSig |= 0x40000000; 4573 zSig |= 0x40000000;
4574 aExp -= 0x3F81; 4574 aExp -= 0x3F81;
4575 } 4575 }
4576 return roundAndPackFloat32( aSign, aExp, zSig ); 4576 return roundAndPackFloat32( aSign, aExp, zSig );
4577 4577
4578} 4578}
4579 4579
4580/* 4580/*
4581------------------------------------------------------------------------------- 4581-------------------------------------------------------------------------------
4582Returns the result of converting the quadruple-precision floating-point 4582Returns the result of converting the quadruple-precision floating-point
4583value `a' to the double-precision floating-point format. The conversion 4583value `a' to the double-precision floating-point format. The conversion
4584is performed according to the IEC/IEEE Standard for Binary Floating-Point 4584is performed according to the IEC/IEEE Standard for Binary Floating-Point
4585Arithmetic. 4585Arithmetic.
4586------------------------------------------------------------------------------- 4586-------------------------------------------------------------------------------
4587*/ 4587*/
4588float64 float128_to_float64( float128 a ) 4588float64 float128_to_float64( float128 a )
4589{ 4589{
4590 flag aSign; 4590 flag aSign;
4591 int32 aExp; 4591 int32 aExp;
4592 bits64 aSig0, aSig1; 4592 bits64 aSig0, aSig1;
4593 4593
4594 aSig1 = extractFloat128Frac1( a ); 4594 aSig1 = extractFloat128Frac1( a );
4595 aSig0 = extractFloat128Frac0( a ); 4595 aSig0 = extractFloat128Frac0( a );
4596 aExp = extractFloat128Exp( a ); 4596 aExp = extractFloat128Exp( a );
4597 aSign = extractFloat128Sign( a ); 4597 aSign = extractFloat128Sign( a );
4598 if ( aExp == 0x7FFF ) { 4598 if ( aExp == 0x7FFF ) {
4599 if ( aSig0 | aSig1 ) { 4599 if ( aSig0 | aSig1 ) {
4600 return commonNaNToFloat64( float128ToCommonNaN( a ) ); 4600 return commonNaNToFloat64( float128ToCommonNaN( a ) );
4601 } 4601 }
4602 return packFloat64( aSign, 0x7FF, 0 ); 4602 return packFloat64( aSign, 0x7FF, 0 );
4603 } 4603 }
4604 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 ); 4604 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
4605 aSig0 |= ( aSig1 != 0 ); 4605 aSig0 |= ( aSig1 != 0 );
4606 if ( aExp || aSig0 ) { 4606 if ( aExp || aSig0 ) {
4607 aSig0 |= LIT64( 0x4000000000000000 ); 4607 aSig0 |= LIT64( 0x4000000000000000 );
4608 aExp -= 0x3C01; 4608 aExp -= 0x3C01;
4609 } 4609 }
4610 return roundAndPackFloat64( aSign, aExp, aSig0 ); 4610 return roundAndPackFloat64( aSign, aExp, aSig0 );
4611 4611
4612} 4612}
4613 4613
4614#ifdef FLOATX80 4614#ifdef FLOATX80
4615 4615
4616/* 4616/*
4617------------------------------------------------------------------------------- 4617-------------------------------------------------------------------------------
4618Returns the result of converting the quadruple-precision floating-point 4618Returns the result of converting the quadruple-precision floating-point
4619value `a' to the extended double-precision floating-point format. The 4619value `a' to the extended double-precision floating-point format. The
4620conversion is performed according to the IEC/IEEE Standard for Binary 4620conversion is performed according to the IEC/IEEE Standard for Binary
4621Floating-Point Arithmetic. 4621Floating-Point Arithmetic.
4622------------------------------------------------------------------------------- 4622-------------------------------------------------------------------------------
4623*/ 4623*/
4624floatx80 float128_to_floatx80( float128 a ) 4624floatx80 float128_to_floatx80( float128 a )
4625{ 4625{
4626 flag aSign; 4626 flag aSign;
4627 int32 aExp; 4627 int32 aExp;
4628 bits64 aSig0, aSig1; 4628 bits64 aSig0, aSig1;
4629 4629
4630 aSig1 = extractFloat128Frac1( a ); 4630 aSig1 = extractFloat128Frac1( a );
4631 aSig0 = extractFloat128Frac0( a ); 4631 aSig0 = extractFloat128Frac0( a );
4632 aExp = extractFloat128Exp( a ); 4632 aExp = extractFloat128Exp( a );
4633 aSign = extractFloat128Sign( a ); 4633 aSign = extractFloat128Sign( a );
4634 if ( aExp == 0x7FFF ) { 4634 if ( aExp == 0x7FFF ) {
4635 if ( aSig0 | aSig1 ) { 4635 if ( aSig0 | aSig1 ) {
4636 return commonNaNToFloatx80( float128ToCommonNaN( a ) ); 4636 return commonNaNToFloatx80( float128ToCommonNaN( a ) );
4637 } 4637 }
4638 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); 4638 return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) );
4639 } 4639 }
4640 if ( aExp == 0 ) { 4640 if ( aExp == 0 ) {
4641 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 ); 4641 if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 );
4642 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); 4642 normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 );
4643 } 4643 }
4644 else { 4644 else {
4645 aSig0 |= LIT64( 0x0001000000000000 ); 4645 aSig0 |= LIT64( 0x0001000000000000 );
4646 } 4646 }
4647 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 ); 4647 shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 );
4648 return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 ); 4648 return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 );
4649 4649
4650} 4650}
4651 4651
4652#endif 4652#endif
4653 4653
4654/* 4654/*
4655------------------------------------------------------------------------------- 4655-------------------------------------------------------------------------------
4656Rounds the quadruple-precision floating-point value `a' to an integer, and 4656Rounds the quadruple-precision floating-point value `a' to an integer, and
4657returns the result as a quadruple-precision floating-point value. The 4657returns the result as a quadruple-precision floating-point value. The
4658operation is performed according to the IEC/IEEE Standard for Binary 4658operation is performed according to the IEC/IEEE Standard for Binary
4659Floating-Point Arithmetic. 4659Floating-Point Arithmetic.
4660------------------------------------------------------------------------------- 4660-------------------------------------------------------------------------------
4661*/ 4661*/
4662float128 float128_round_to_int( float128 a ) 4662float128 float128_round_to_int( float128 a )
4663{ 4663{
4664 flag aSign; 4664 flag aSign;
4665 int32 aExp; 4665 int32 aExp;
4666 bits64 lastBitMask, roundBitsMask; 4666 bits64 lastBitMask, roundBitsMask;
4667 int8 roundingMode; 4667 int8 roundingMode;
4668 float128 z; 4668 float128 z;
4669 4669
4670 aExp = extractFloat128Exp( a ); 4670 aExp = extractFloat128Exp( a );
4671 if ( 0x402F <= aExp ) { 4671 if ( 0x402F <= aExp ) {
4672 if ( 0x406F <= aExp ) { 4672 if ( 0x406F <= aExp ) {
4673 if ( ( aExp == 0x7FFF ) 4673 if ( ( aExp == 0x7FFF )
4674 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) 4674 && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) )
4675 ) { 4675 ) {
4676 return propagateFloat128NaN( a, a ); 4676 return propagateFloat128NaN( a, a );
4677 } 4677 }
4678 return a; 4678 return a;
4679 } 4679 }
4680 lastBitMask = 1; 4680 lastBitMask = 1;
4681 lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1; 4681 lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1;
4682 roundBitsMask = lastBitMask - 1; 4682 roundBitsMask = lastBitMask - 1;
4683 z = a; 4683 z = a;
4684 roundingMode = float_rounding_mode; 4684 roundingMode = float_rounding_mode;
4685 if ( roundingMode == float_round_nearest_even ) { 4685 if ( roundingMode == float_round_nearest_even ) {
4686 if ( lastBitMask ) { 4686 if ( lastBitMask ) {
4687 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low ); 4687 add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low );
4688 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask; 4688 if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask;
4689 } 4689 }
4690 else { 4690 else {
4691 if ( (sbits64) z.low < 0 ) { 4691 if ( (sbits64) z.low < 0 ) {
4692 ++z.high; 4692 ++z.high;
4693 if ( (bits64) ( z.low<<1 ) == 0 ) z.high &= ~1; 4693 if ( (bits64) ( z.low<<1 ) == 0 ) z.high &= ~1;
4694 } 4694 }
4695 } 4695 }
4696 } 4696 }
4697 else if ( roundingMode != float_round_to_zero ) { 4697 else if ( roundingMode != float_round_to_zero ) {
4698 if ( extractFloat128Sign( z ) 4698 if ( extractFloat128Sign( z )
4699 ^ ( roundingMode == float_round_up ) ) { 4699 ^ ( roundingMode == float_round_up ) ) {
4700 add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low ); 4700 add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low );
4701 } 4701 }
4702 } 4702 }
4703 z.low &= ~ roundBitsMask; 4703 z.low &= ~ roundBitsMask;
4704 } 4704 }
4705 else { 4705 else {
4706 if ( aExp < 0x3FFF ) { 4706 if ( aExp < 0x3FFF ) {
4707 if ( ( ( (bits64) ( a.high<<1 ) ) | a.low ) == 0 ) return a; 4707 if ( ( ( (bits64) ( a.high<<1 ) ) | a.low ) == 0 ) return a;
4708 set_float_exception_inexact_flag(); 4708 set_float_exception_inexact_flag();
4709 aSign = extractFloat128Sign( a ); 4709 aSign = extractFloat128Sign( a );
4710 switch ( float_rounding_mode ) { 4710 switch ( float_rounding_mode ) {
4711 case float_round_nearest_even: 4711 case float_round_nearest_even:
4712 if ( ( aExp == 0x3FFE ) 4712 if ( ( aExp == 0x3FFE )
4713 && ( extractFloat128Frac0( a ) 4713 && ( extractFloat128Frac0( a )
4714 | extractFloat128Frac1( a ) ) 4714 | extractFloat128Frac1( a ) )
4715 ) { 4715 ) {
4716 return packFloat128( aSign, 0x3FFF, 0, 0 ); 4716 return packFloat128( aSign, 0x3FFF, 0, 0 );
4717 } 4717 }
4718 break; 4718 break;
4719 case float_round_to_zero: 4719 case float_round_to_zero:
4720 break; 4720 break;
4721 case float_round_down: 4721 case float_round_down:
4722 return 4722 return
4723 aSign ? packFloat128( 1, 0x3FFF, 0, 0 ) 4723 aSign ? packFloat128( 1, 0x3FFF, 0, 0 )
4724 : packFloat128( 0, 0, 0, 0 ); 4724 : packFloat128( 0, 0, 0, 0 );
4725 case float_round_up: 4725 case float_round_up:
4726 return 4726 return
4727 aSign ? packFloat128( 1, 0, 0, 0 ) 4727 aSign ? packFloat128( 1, 0, 0, 0 )
4728 : packFloat128( 0, 0x3FFF, 0, 0 ); 4728 : packFloat128( 0, 0x3FFF, 0, 0 );
4729 } 4729 }
4730 return packFloat128( aSign, 0, 0, 0 ); 4730 return packFloat128( aSign, 0, 0, 0 );
4731 } 4731 }
4732 lastBitMask = 1; 4732 lastBitMask = 1;
4733 lastBitMask <<= 0x402F - aExp; 4733 lastBitMask <<= 0x402F - aExp;
4734 roundBitsMask = lastBitMask - 1; 4734 roundBitsMask = lastBitMask - 1;
4735 z.low = 0; 4735 z.low = 0;
4736 z.high = a.high; 4736 z.high = a.high;
4737 roundingMode = float_rounding_mode; 4737 roundingMode = float_rounding_mode;
4738 if ( roundingMode == float_round_nearest_even ) { 4738 if ( roundingMode == float_round_nearest_even ) {
4739 z.high += lastBitMask>>1; 4739 z.high += lastBitMask>>1;
4740 if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) { 4740 if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) {
4741 z.high &= ~ lastBitMask; 4741 z.high &= ~ lastBitMask;
4742 } 4742 }
4743 } 4743 }
4744 else if ( roundingMode != float_round_to_zero ) { 4744 else if ( roundingMode != float_round_to_zero ) {
4745 if ( extractFloat128Sign( z ) 4745 if ( extractFloat128Sign( z )
4746 ^ ( roundingMode == float_round_up ) ) { 4746 ^ ( roundingMode == float_round_up ) ) {
4747 z.high |= ( a.low != 0 ); 4747 z.high |= ( a.low != 0 );
4748 z.high += roundBitsMask; 4748 z.high += roundBitsMask;
4749 } 4749 }
4750 } 4750 }
4751 z.high &= ~ roundBitsMask; 4751 z.high &= ~ roundBitsMask;
4752 } 4752 }
4753 if ( ( z.low != a.low ) || ( z.high != a.high ) ) { 4753 if ( ( z.low != a.low ) || ( z.high != a.high ) ) {
4754 set_float_exception_inexact_flag(); 4754 set_float_exception_inexact_flag();
4755 } 4755 }
4756 return z; 4756 return z;
4757 4757
4758} 4758}
4759 4759
4760/* 4760/*
4761------------------------------------------------------------------------------- 4761-------------------------------------------------------------------------------
4762Returns the result of adding the absolute values of the quadruple-precision 4762Returns the result of adding the absolute values of the quadruple-precision
4763floating-point values `a' and `b'. If `zSign' is 1, the sum is negated 4763floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
4764before being returned. `zSign' is ignored if the result is a NaN. 4764before being returned. `zSign' is ignored if the result is a NaN.
4765The addition is performed according to the IEC/IEEE Standard for Binary 4765The addition is performed according to the IEC/IEEE Standard for Binary
4766Floating-Point Arithmetic. 4766Floating-Point Arithmetic.
4767------------------------------------------------------------------------------- 4767-------------------------------------------------------------------------------
4768*/ 4768*/
4769static float128 addFloat128Sigs( float128 a, float128 b, flag zSign ) 4769static float128 addFloat128Sigs( float128 a, float128 b, flag zSign )
4770{ 4770{
4771 int32 aExp, bExp, zExp; 4771 int32 aExp, bExp, zExp;
4772 bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; 4772 bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
4773 int32 expDiff; 4773 int32 expDiff;
4774 4774
4775 aSig1 = extractFloat128Frac1( a ); 4775 aSig1 = extractFloat128Frac1( a );
4776 aSig0 = extractFloat128Frac0( a ); 4776 aSig0 = extractFloat128Frac0( a );
4777 aExp = extractFloat128Exp( a ); 4777 aExp = extractFloat128Exp( a );
4778 bSig1 = extractFloat128Frac1( b ); 4778 bSig1 = extractFloat128Frac1( b );
4779 bSig0 = extractFloat128Frac0( b ); 4779 bSig0 = extractFloat128Frac0( b );
4780 bExp = extractFloat128Exp( b ); 4780 bExp = extractFloat128Exp( b );
4781 expDiff = aExp - bExp; 4781 expDiff = aExp - bExp;
4782 if ( 0 < expDiff ) { 4782 if ( 0 < expDiff ) {
4783 if ( aExp == 0x7FFF ) { 4783 if ( aExp == 0x7FFF ) {
4784 if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b ); 4784 if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
4785 return a; 4785 return a;
4786 } 4786 }
4787 if ( bExp == 0 ) { 4787 if ( bExp == 0 ) {
4788 --expDiff; 4788 --expDiff;
4789 } 4789 }
4790 else { 4790 else {
4791 bSig0 |= LIT64( 0x0001000000000000 ); 4791 bSig0 |= LIT64( 0x0001000000000000 );
4792 } 4792 }
4793 shift128ExtraRightJamming( 4793 shift128ExtraRightJamming(
4794 bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 ); 4794 bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 );
4795 zExp = aExp; 4795 zExp = aExp;
4796 } 4796 }
4797 else if ( expDiff < 0 ) { 4797 else if ( expDiff < 0 ) {
4798 if ( bExp == 0x7FFF ) { 4798 if ( bExp == 0x7FFF ) {
4799 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); 4799 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
4800 return packFloat128( zSign, 0x7FFF, 0, 0 ); 4800 return packFloat128( zSign, 0x7FFF, 0, 0 );
4801 } 4801 }
4802 if ( aExp == 0 ) { 4802 if ( aExp == 0 ) {
4803 ++expDiff; 4803 ++expDiff;
4804 } 4804 }
4805 else { 4805 else {
4806 aSig0 |= LIT64( 0x0001000000000000 ); 4806 aSig0 |= LIT64( 0x0001000000000000 );
4807 } 4807 }
4808 shift128ExtraRightJamming( 4808 shift128ExtraRightJamming(
4809 aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 ); 4809 aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 );
4810 zExp = bExp; 4810 zExp = bExp;
4811 } 4811 }
4812 else { 4812 else {
4813 if ( aExp == 0x7FFF ) { 4813 if ( aExp == 0x7FFF ) {
4814 if ( aSig0 | aSig1 | bSig0 | bSig1 ) { 4814 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
4815 return propagateFloat128NaN( a, b ); 4815 return propagateFloat128NaN( a, b );
4816 } 4816 }
4817 return a; 4817 return a;
4818 } 4818 }
4819 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); 4819 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
4820 if ( aExp == 0 ) return packFloat128( zSign, 0, zSig0, zSig1 ); 4820 if ( aExp == 0 ) return packFloat128( zSign, 0, zSig0, zSig1 );
4821 zSig2 = 0; 4821 zSig2 = 0;
4822 zSig0 |= LIT64( 0x0002000000000000 ); 4822 zSig0 |= LIT64( 0x0002000000000000 );
4823 zExp = aExp; 4823 zExp = aExp;
4824 goto shiftRight1; 4824 goto shiftRight1;
4825 } 4825 }
4826 aSig0 |= LIT64( 0x0001000000000000 ); 4826 aSig0 |= LIT64( 0x0001000000000000 );
4827 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); 4827 add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
4828 --zExp; 4828 --zExp;
4829 if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack; 4829 if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack;
4830 ++zExp; 4830 ++zExp;
4831 shiftRight1: 4831 shiftRight1:
4832 shift128ExtraRightJamming( 4832 shift128ExtraRightJamming(
4833 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 ); 4833 zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 );
4834 roundAndPack: 4834 roundAndPack:
4835 return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 ); 4835 return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 );
4836 4836
4837} 4837}
4838 4838
4839/* 4839/*
4840------------------------------------------------------------------------------- 4840-------------------------------------------------------------------------------
4841Returns the result of subtracting the absolute values of the quadruple- 4841Returns the result of subtracting the absolute values of the quadruple-
4842precision floating-point values `a' and `b'. If `zSign' is 1, the 4842precision floating-point values `a' and `b'. If `zSign' is 1, the
4843difference is negated before being returned. `zSign' is ignored if the 4843difference is negated before being returned. `zSign' is ignored if the
4844result is a NaN. The subtraction is performed according to the IEC/IEEE 4844result is a NaN. The subtraction is performed according to the IEC/IEEE
4845Standard for Binary Floating-Point Arithmetic. 4845Standard for Binary Floating-Point Arithmetic.
4846------------------------------------------------------------------------------- 4846-------------------------------------------------------------------------------
4847*/ 4847*/
4848static float128 subFloat128Sigs( float128 a, float128 b, flag zSign ) 4848static float128 subFloat128Sigs( float128 a, float128 b, flag zSign )
4849{ 4849{
4850 int32 aExp, bExp, zExp; 4850 int32 aExp, bExp, zExp;
4851 bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1; 4851 bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
4852 int32 expDiff; 4852 int32 expDiff;
4853 float128 z; 4853 float128 z;
4854 4854
4855 aSig1 = extractFloat128Frac1( a ); 4855 aSig1 = extractFloat128Frac1( a );
4856 aSig0 = extractFloat128Frac0( a ); 4856 aSig0 = extractFloat128Frac0( a );
4857 aExp = extractFloat128Exp( a ); 4857 aExp = extractFloat128Exp( a );
4858 bSig1 = extractFloat128Frac1( b ); 4858 bSig1 = extractFloat128Frac1( b );
4859 bSig0 = extractFloat128Frac0( b ); 4859 bSig0 = extractFloat128Frac0( b );
4860 bExp = extractFloat128Exp( b ); 4860 bExp = extractFloat128Exp( b );
4861 expDiff = aExp - bExp; 4861 expDiff = aExp - bExp;
4862 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 ); 4862 shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 );
4863 shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 ); 4863 shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 );
4864 if ( 0 < expDiff ) goto aExpBigger; 4864 if ( 0 < expDiff ) goto aExpBigger;
4865 if ( expDiff < 0 ) goto bExpBigger; 4865 if ( expDiff < 0 ) goto bExpBigger;
4866 if ( aExp == 0x7FFF ) { 4866 if ( aExp == 0x7FFF ) {
4867 if ( aSig0 | aSig1 | bSig0 | bSig1 ) { 4867 if ( aSig0 | aSig1 | bSig0 | bSig1 ) {
4868 return propagateFloat128NaN( a, b ); 4868 return propagateFloat128NaN( a, b );
4869 } 4869 }
4870 float_raise( float_flag_invalid ); 4870 float_raise( float_flag_invalid );
4871 z.low = float128_default_nan_low; 4871 z.low = float128_default_nan_low;
4872 z.high = float128_default_nan_high; 4872 z.high = float128_default_nan_high;
4873 return z; 4873 return z;
4874 } 4874 }
4875 if ( aExp == 0 ) { 4875 if ( aExp == 0 ) {
4876 aExp = 1; 4876 aExp = 1;
4877 bExp = 1; 4877 bExp = 1;
4878 } 4878 }
4879 if ( bSig0 < aSig0 ) goto aBigger; 4879 if ( bSig0 < aSig0 ) goto aBigger;
4880 if ( aSig0 < bSig0 ) goto bBigger; 4880 if ( aSig0 < bSig0 ) goto bBigger;
4881 if ( bSig1 < aSig1 ) goto aBigger; 4881 if ( bSig1 < aSig1 ) goto aBigger;
4882 if ( aSig1 < bSig1 ) goto bBigger; 4882 if ( aSig1 < bSig1 ) goto bBigger;
4883 return packFloat128( float_rounding_mode == float_round_down, 0, 0, 0 ); 4883 return packFloat128( float_rounding_mode == float_round_down, 0, 0, 0 );
4884 bExpBigger: 4884 bExpBigger:
4885 if ( bExp == 0x7FFF ) { 4885 if ( bExp == 0x7FFF ) {
4886 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); 4886 if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b );
4887 return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 ); 4887 return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 );
4888 } 4888 }
4889 if ( aExp == 0 ) { 4889 if ( aExp == 0 ) {
4890 ++expDiff; 4890 ++expDiff;
4891 } 4891 }
4892 else { 4892 else {
4893 aSig0 |= LIT64( 0x4000000000000000 ); 4893 aSig0 |= LIT64( 0x4000000000000000 );
4894 } 4894 }
4895 shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 ); 4895 shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 );
4896 bSig0 |= LIT64( 0x4000000000000000 ); 4896 bSig0 |= LIT64( 0x4000000000000000 );
4897 bBigger: 4897 bBigger:
4898 sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 ); 4898 sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 );
4899 zExp = bExp; 4899 zExp = bExp;
4900 zSign ^= 1; 4900 zSign ^= 1;
4901 goto normalizeRoundAndPack; 4901 goto normalizeRoundAndPack;
4902 aExpBigger: 4902 aExpBigger:
4903 if ( aExp == 0x7FFF ) { 4903 if ( aExp == 0x7FFF ) {
4904 if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b ); 4904 if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b );
4905 return a; 4905 return a;
4906 } 4906 }
4907 if ( bExp == 0 ) { 4907 if ( bExp == 0 ) {
4908 --expDiff; 4908 --expDiff;
4909 } 4909 }
4910 else { 4910 else {
4911 bSig0 |= LIT64( 0x4000000000000000 ); 4911 bSig0 |= LIT64( 0x4000000000000000 );
4912 } 4912 }
4913 shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 ); 4913 shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 );
4914 aSig0 |= LIT64( 0x4000000000000000 ); 4914 aSig0 |= LIT64( 0x4000000000000000 );
4915 aBigger: 4915 aBigger:
4916 sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); 4916 sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 );
4917 zExp = aExp; 4917 zExp = aExp;
4918 normalizeRoundAndPack: 4918 normalizeRoundAndPack:
4919 --zExp; 4919 --zExp;
4920 return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 ); 4920 return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 );
4921 4921
4922} 4922}
4923 4923
4924/* 4924/*
4925------------------------------------------------------------------------------- 4925-------------------------------------------------------------------------------
4926Returns the result of adding the quadruple-precision floating-point values 4926Returns the result of adding the quadruple-precision floating-point values
4927`a' and `b'. The operation is performed according to the IEC/IEEE Standard 4927`a' and `b'. The operation is performed according to the IEC/IEEE Standard
4928for Binary Floating-Point Arithmetic. 4928for Binary Floating-Point Arithmetic.
4929------------------------------------------------------------------------------- 4929-------------------------------------------------------------------------------
4930*/ 4930*/
4931float128 float128_add( float128 a, float128 b ) 4931float128 float128_add( float128 a, float128 b )
4932{ 4932{
4933 flag aSign, bSign; 4933 flag aSign, bSign;
4934 4934
4935 aSign = extractFloat128Sign( a ); 4935 aSign = extractFloat128Sign( a );
4936 bSign = extractFloat128Sign( b ); 4936 bSign = extractFloat128Sign( b );
4937 if ( aSign == bSign ) { 4937 if ( aSign == bSign ) {
4938 return addFloat128Sigs( a, b, aSign ); 4938 return addFloat128Sigs( a, b, aSign );
4939 } 4939 }
4940 else { 4940 else {
4941 return subFloat128Sigs( a, b, aSign ); 4941 return subFloat128Sigs( a, b, aSign );
4942 } 4942 }
4943 4943
4944} 4944}
4945 4945
4946/* 4946/*
4947------------------------------------------------------------------------------- 4947-------------------------------------------------------------------------------
4948Returns the result of subtracting the quadruple-precision floating-point 4948Returns the result of subtracting the quadruple-precision floating-point
4949values `a' and `b'. The operation is performed according to the IEC/IEEE 4949values `a' and `b'. The operation is performed according to the IEC/IEEE
4950Standard for Binary Floating-Point Arithmetic. 4950Standard for Binary Floating-Point Arithmetic.
4951------------------------------------------------------------------------------- 4951-------------------------------------------------------------------------------