Wed Sep 2 12:38:07 2020 UTC ()
Pull up following revision(s) (requested by thorpej in ticket #1071):

	sys/arch/alpha/alpha/fp_complete.c: revision 1.24

- compare_{le,lt)(): Use float64_{le,lt}_quiet() to avoid raising
exceptions on QNaNs.
- alpha_fp_interpret(): Instructions are 32-bits wide, so don't use a
uint64_t to contain them.
- alpha_fp_complete(): Operations on NaNs trap on Alpha, but the exception
summary reports INV (invalid operation) rather than SWC (software
completion) in this case.  So also interpret the instruction if INV
is set in the exception summary.  This will emulate operations on
NaN and correctly suppress FP traps for QNaNs.

This fixes bin/55633, which was caused by:
-> Input string "nanotime" is passed to awk's internal is_number().
-> strtod() interprets as "nan" and returns QNaN as the result.
-> Result compared against HUGE_VAL, blows up because cmptle is called
with a NaN operand, and the hardware doesn't care that it's quiet.


(martin)
diff -r1.23 -r1.23.4.1 src/sys/arch/alpha/alpha/fp_complete.c

cvs diff -r1.23 -r1.23.4.1 src/sys/arch/alpha/alpha/fp_complete.c (switch to unified diff)

--- src/sys/arch/alpha/alpha/fp_complete.c 2019/03/25 19:24:30 1.23
+++ src/sys/arch/alpha/alpha/fp_complete.c 2020/09/02 12:38:07 1.23.4.1
@@ -1,775 +1,777 @@ @@ -1,775 +1,777 @@
1/* $NetBSD: fp_complete.c,v 1.23 2019/03/25 19:24:30 maxv Exp $ */ 1/* $NetBSD: fp_complete.c,v 1.23.4.1 2020/09/02 12:38:07 martin Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 2001 Ross Harvey 4 * Copyright (c) 2001 Ross Harvey
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * Redistribution and use in source and binary forms, with or without 7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions 8 * modification, are permitted provided that the following conditions
9 * are met: 9 * are met:
10 * 1. Redistributions of source code must retain the above copyright 10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer. 11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright 12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the 13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution. 14 * documentation and/or other materials provided with the distribution.
15 * 3. All advertising materials mentioning features or use of this software 15 * 3. All advertising materials mentioning features or use of this software
16 * must display the following acknowledgement: 16 * must display the following acknowledgement:
17 * This product includes software developed by the NetBSD 17 * This product includes software developed by the NetBSD
18 * Foundation, Inc. and its contributors. 18 * Foundation, Inc. and its contributors.
19 * 4. Neither the name of The NetBSD Foundation nor the names of its 19 * 4. Neither the name of The NetBSD Foundation nor the names of its
20 * contributors may be used to endorse or promote products derived 20 * contributors may be used to endorse or promote products derived
21 * from this software without specific prior written permission. 21 * from this software without specific prior written permission.
22 * 22 *
23 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 23 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
25 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 25 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
26 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 26 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
27 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 27 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 31 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGE. 33 * POSSIBILITY OF SUCH DAMAGE.
34 */ 34 */
35 35
36#include <sys/cdefs.h> /* RCS ID & Copyright macro defns */ 36#include <sys/cdefs.h> /* RCS ID & Copyright macro defns */
37 37
38__KERNEL_RCSID(0, "$NetBSD: fp_complete.c,v 1.23 2019/03/25 19:24:30 maxv Exp $"); 38__KERNEL_RCSID(0, "$NetBSD: fp_complete.c,v 1.23.4.1 2020/09/02 12:38:07 martin Exp $");
39 39
40#include <sys/param.h> 40#include <sys/param.h>
41#include <sys/systm.h> 41#include <sys/systm.h>
42#include <sys/proc.h> 42#include <sys/proc.h>
43#include <sys/atomic.h> 43#include <sys/atomic.h>
44#include <sys/evcnt.h> 44#include <sys/evcnt.h>
45 45
46#include <machine/cpu.h> 46#include <machine/cpu.h>
47#include <machine/fpu.h> 47#include <machine/fpu.h>
48#include <machine/reg.h> 48#include <machine/reg.h>
49#include <machine/alpha.h> 49#include <machine/alpha.h>
50#include <alpha/alpha/db_instruction.h> 50#include <alpha/alpha/db_instruction.h>
51 51
52#include <lib/libkern/softfloat.h> 52#include <lib/libkern/softfloat.h>
53 53
54#define TSWINSIZE 4 /* size of trap shadow window in uint32_t units */ 54#define TSWINSIZE 4 /* size of trap shadow window in uint32_t units */
55 55
56/* Set Name Opcodes AARM C.* Symbols */ 56/* Set Name Opcodes AARM C.* Symbols */
57 57
58#define CPUREG_CLASS (0xfUL << 0x10) /* INT[ALSM] */ 58#define CPUREG_CLASS (0xfUL << 0x10) /* INT[ALSM] */
59#define FPUREG_CLASS (0xfUL << 0x14) /* ITFP, FLT[ILV] */ 59#define FPUREG_CLASS (0xfUL << 0x14) /* ITFP, FLT[ILV] */
60#define CHECKFUNCTIONCODE (1UL << 0x18) /* MISC */ 60#define CHECKFUNCTIONCODE (1UL << 0x18) /* MISC */
61#define TRAPSHADOWBOUNDARY (1UL << 0x00 | /* PAL */\ 61#define TRAPSHADOWBOUNDARY (1UL << 0x00 | /* PAL */\
62 1UL << 0x19 | /* \PAL\ */\ 62 1UL << 0x19 | /* \PAL\ */\
63 1UL << 0x1a | /* JSR */\ 63 1UL << 0x1a | /* JSR */\
64 1UL << 0x1b | /* \PAL\ */\ 64 1UL << 0x1b | /* \PAL\ */\
65 1UL << 0x1d | /* \PAL\ */\ 65 1UL << 0x1d | /* \PAL\ */\
66 1UL << 0x1e | /* \PAL\ */\ 66 1UL << 0x1e | /* \PAL\ */\
67 1UL << 0x1f | /* \PAL\ */\ 67 1UL << 0x1f | /* \PAL\ */\
68 0xffffUL << 0x30 | /* branch ops */\ 68 0xffffUL << 0x30 | /* branch ops */\
69 CHECKFUNCTIONCODE) 69 CHECKFUNCTIONCODE)
70 70
71#define MAKE_FLOATXX(width, expwidth, sign, exp, msb, rest_of_frac) \ 71#define MAKE_FLOATXX(width, expwidth, sign, exp, msb, rest_of_frac) \
72 (u_int ## width ## _t)(sign) << ((width) - 1) |\ 72 (u_int ## width ## _t)(sign) << ((width) - 1) |\
73 (u_int ## width ## _t)(exp) << ((width) - 1 - (expwidth)) |\ 73 (u_int ## width ## _t)(exp) << ((width) - 1 - (expwidth)) |\
74 (u_int ## width ## _t)(msb) << ((width) - 1 - (expwidth) - 1) |\ 74 (u_int ## width ## _t)(msb) << ((width) - 1 - (expwidth) - 1) |\
75 (u_int ## width ## _t)(rest_of_frac) 75 (u_int ## width ## _t)(rest_of_frac)
76 76
77#define FLOAT32QNAN MAKE_FLOATXX(32, 8, 0, 0xff, 1, 0) 77#define FLOAT32QNAN MAKE_FLOATXX(32, 8, 0, 0xff, 1, 0)
78#define FLOAT64QNAN MAKE_FLOATXX(64, 11, 0, 0x7ff, 1, 0) 78#define FLOAT64QNAN MAKE_FLOATXX(64, 11, 0, 0x7ff, 1, 0)
79 79
80#define IS_SUBNORMAL(v) ((v)->exp == 0 && (v)->frac != 0) 80#define IS_SUBNORMAL(v) ((v)->exp == 0 && (v)->frac != 0)
81 81
82#define PREFILTER_SUBNORMAL(l,v) if ((l)->l_md.md_flags & IEEE_MAP_DMZ \ 82#define PREFILTER_SUBNORMAL(l,v) if ((l)->l_md.md_flags & IEEE_MAP_DMZ \
83 && IS_SUBNORMAL(v)) \ 83 && IS_SUBNORMAL(v)) \
84 (v)->frac = 0; else 84 (v)->frac = 0; else
85 85
86#define POSTFILTER_SUBNORMAL(l,v) if ((l)->l_md.md_flags & IEEE_MAP_UMZ \ 86#define POSTFILTER_SUBNORMAL(l,v) if ((l)->l_md.md_flags & IEEE_MAP_UMZ \
87 && IS_SUBNORMAL(v)) \ 87 && IS_SUBNORMAL(v)) \
88 (v)->frac = 0; else 88 (v)->frac = 0; else
89 89
90 /* Alpha returns 2.0 for true, all zeroes for false. */ 90 /* Alpha returns 2.0 for true, all zeroes for false. */
91 91
92#define CMP_RESULT(flag) ((flag) ? 4UL << 60 : 0L) 92#define CMP_RESULT(flag) ((flag) ? 4UL << 60 : 0L)
93 93
94 /* Move bits from sw fp_c to hw fpcr. */ 94 /* Move bits from sw fp_c to hw fpcr. */
95 95
96#define CRBLIT(sw, hw, m, offs) (((sw) & ~(m)) | ((hw) >> (offs) & (m))) 96#define CRBLIT(sw, hw, m, offs) (((sw) & ~(m)) | ((hw) >> (offs) & (m)))
97 97
98struct evcnt fpevent_use; 98struct evcnt fpevent_use;
99struct evcnt fpevent_reuse; 99struct evcnt fpevent_reuse;
100 100
101/* 101/*
102 * Temporary trap shadow instrumentation. The [un]resolved counters 102 * Temporary trap shadow instrumentation. The [un]resolved counters
103 * could be kept permanently, as they provide information on whether 103 * could be kept permanently, as they provide information on whether
104 * user code has met AARM trap shadow generation requirements. 104 * user code has met AARM trap shadow generation requirements.
105 */ 105 */
106 106
107struct alpha_shadow { 107struct alpha_shadow {
108 uint64_t resolved; /* cases trigger pc found */ 108 uint64_t resolved; /* cases trigger pc found */
109 uint64_t unresolved; /* cases it wasn't, code problems? */ 109 uint64_t unresolved; /* cases it wasn't, code problems? */
110 uint64_t scans; /* trap shadow scans */ 110 uint64_t scans; /* trap shadow scans */
111 uint64_t len; /* number of instructions examined */ 111 uint64_t len; /* number of instructions examined */
112 uint64_t uop; /* bit mask of unexpected opcodes */ 112 uint64_t uop; /* bit mask of unexpected opcodes */
113 uint64_t sqrts; /* ev6+ square root single count */ 113 uint64_t sqrts; /* ev6+ square root single count */
114 uint64_t sqrtt; /* ev6+ square root double count */ 114 uint64_t sqrtt; /* ev6+ square root double count */
115 uint32_t ufunc; /* bit mask of unexpected functions */ 115 uint32_t ufunc; /* bit mask of unexpected functions */
116 uint32_t max; /* max trap shadow scan */ 116 uint32_t max; /* max trap shadow scan */
117 uint32_t nilswop; /* unexpected op codes */ 117 uint32_t nilswop; /* unexpected op codes */
118 uint32_t nilswfunc; /* unexpected function codes */ 118 uint32_t nilswfunc; /* unexpected function codes */
119 uint32_t nilanyop; /* this "cannot happen" */ 119 uint32_t nilanyop; /* this "cannot happen" */
120 uint32_t vax; /* sigs from vax fp opcodes */ 120 uint32_t vax; /* sigs from vax fp opcodes */
121} alpha_shadow, alpha_shadow_zero; 121} alpha_shadow, alpha_shadow_zero;
122 122
123static float64 float64_unk(float64, float64); 123static float64 float64_unk(float64, float64);
124static float64 compare_un(float64, float64); 124static float64 compare_un(float64, float64);
125static float64 compare_eq(float64, float64); 125static float64 compare_eq(float64, float64);
126static float64 compare_lt(float64, float64); 126static float64 compare_lt(float64, float64);
127static float64 compare_le(float64, float64); 127static float64 compare_le(float64, float64);
128static void cvt_qs_ts_st_gf_qf(uint32_t, struct lwp *); 128static void cvt_qs_ts_st_gf_qf(uint32_t, struct lwp *);
129static void cvt_gd(uint32_t, struct lwp *); 129static void cvt_gd(uint32_t, struct lwp *);
130static void cvt_qt_dg_qg(uint32_t, struct lwp *); 130static void cvt_qt_dg_qg(uint32_t, struct lwp *);
131static void cvt_tq_gq(uint32_t, struct lwp *); 131static void cvt_tq_gq(uint32_t, struct lwp *);
132 132
133static float32 (*swfp_s[])(float32, float32) = { 133static float32 (*swfp_s[])(float32, float32) = {
134 float32_add, float32_sub, float32_mul, float32_div, 134 float32_add, float32_sub, float32_mul, float32_div,
135}; 135};
136 136
137static float64 (*swfp_t[])(float64, float64) = { 137static float64 (*swfp_t[])(float64, float64) = {
138 float64_add, float64_sub, float64_mul, float64_div, 138 float64_add, float64_sub, float64_mul, float64_div,
139 compare_un, compare_eq, compare_lt, compare_le, 139 compare_un, compare_eq, compare_lt, compare_le,
140 float64_unk, float64_unk, float64_unk, float64_unk 140 float64_unk, float64_unk, float64_unk, float64_unk
141}; 141};
142 142
143static void (*swfp_cvt[])(uint32_t, struct lwp *) = { 143static void (*swfp_cvt[])(uint32_t, struct lwp *) = {
144 cvt_qs_ts_st_gf_qf, cvt_gd, cvt_qt_dg_qg, cvt_tq_gq 144 cvt_qs_ts_st_gf_qf, cvt_gd, cvt_qt_dg_qg, cvt_tq_gq
145}; 145};
146 146
147static void 147static void
148this_cannot_happen(int what_cannot_happen, int64_t bits) 148this_cannot_happen(int what_cannot_happen, int64_t bits)
149{ 149{
150 static int total; 150 static int total;
151 alpha_instruction inst; 151 alpha_instruction inst;
152 static uint64_t reported; 152 static uint64_t reported;
153 153
154 inst.bits = bits; 154 inst.bits = bits;
155 ++alpha_shadow.nilswfunc; 155 ++alpha_shadow.nilswfunc;
156 if (bits != -1) 156 if (bits != -1)
157 alpha_shadow.uop |= 1UL << inst.generic_format.opcode; 157 alpha_shadow.uop |= 1UL << inst.generic_format.opcode;
158 if (1UL << what_cannot_happen & reported) 158 if (1UL << what_cannot_happen & reported)
159 return; 159 return;
160 reported |= 1UL << what_cannot_happen; 160 reported |= 1UL << what_cannot_happen;
161 if (total >= 1000) 161 if (total >= 1000)
162 return; /* right now, this return "cannot happen" */ 162 return; /* right now, this return "cannot happen" */
163 ++total; 163 ++total;
164 if (bits) 164 if (bits)
165 printf("FP instruction %x\n", (unsigned int)bits); 165 printf("FP instruction %x\n", (unsigned int)bits);
166 printf("FP event %d/%lx/%lx\n", what_cannot_happen, reported, 166 printf("FP event %d/%lx/%lx\n", what_cannot_happen, reported,
167 alpha_shadow.uop); 167 alpha_shadow.uop);
168 printf("Please report this to port-alpha-maintainer@NetBSD.org\n"); 168 printf("Please report this to port-alpha-maintainer@NetBSD.org\n");
169} 169}
170 170
171static inline void 171static inline void
172sts(unsigned int rn, s_float *v, struct lwp *l) 172sts(unsigned int rn, s_float *v, struct lwp *l)
173{ 173{
174 alpha_sts(rn, v); 174 alpha_sts(rn, v);
175 PREFILTER_SUBNORMAL(l, v); 175 PREFILTER_SUBNORMAL(l, v);
176} 176}
177 177
178static inline void 178static inline void
179stt(unsigned int rn, t_float *v, struct lwp *l) 179stt(unsigned int rn, t_float *v, struct lwp *l)
180{ 180{
181 alpha_stt(rn, v); 181 alpha_stt(rn, v);
182 PREFILTER_SUBNORMAL(l, v); 182 PREFILTER_SUBNORMAL(l, v);
183} 183}
184 184
185static inline void 185static inline void
186lds(unsigned int rn, s_float *v, struct lwp *l) 186lds(unsigned int rn, s_float *v, struct lwp *l)
187{ 187{
188 POSTFILTER_SUBNORMAL(l, v); 188 POSTFILTER_SUBNORMAL(l, v);
189 alpha_lds(rn, v); 189 alpha_lds(rn, v);
190} 190}
191 191
192static inline void 192static inline void
193ldt(unsigned int rn, t_float *v, struct lwp *l) 193ldt(unsigned int rn, t_float *v, struct lwp *l)
194{ 194{
195 POSTFILTER_SUBNORMAL(l, v); 195 POSTFILTER_SUBNORMAL(l, v);
196 alpha_ldt(rn, v); 196 alpha_ldt(rn, v);
197} 197}
198 198
199static float64 199static float64
200compare_lt(float64 a, float64 b) 200compare_lt(float64 a, float64 b)
201{ 201{
202 return CMP_RESULT(float64_lt(a, b)); 202 return CMP_RESULT(float64_lt_quiet(a, b));
203} 203}
204 204
205static float64 205static float64
206compare_le(float64 a, float64 b) 206compare_le(float64 a, float64 b)
207{ 207{
208 return CMP_RESULT(float64_le(a, b)); 208 return CMP_RESULT(float64_le_quiet(a, b));
209} 209}
210 210
211static float64 211static float64
212compare_un(float64 a, float64 b) 212compare_un(float64 a, float64 b)
213{ 213{
214 if (float64_is_nan(a) | float64_is_nan(b)) { 214 if (float64_is_nan(a) | float64_is_nan(b)) {
215 if (float64_is_signaling_nan(a) | float64_is_signaling_nan(b)) 215 if (float64_is_signaling_nan(a) | float64_is_signaling_nan(b))
216 float_set_invalid(); 216 float_set_invalid();
217 return CMP_RESULT(1); 217 return CMP_RESULT(1);
218 } 218 }
219 return CMP_RESULT(0); 219 return CMP_RESULT(0);
220} 220}
221 221
222static float64 222static float64
223compare_eq(float64 a, float64 b) 223compare_eq(float64 a, float64 b)
224{ 224{
225 return CMP_RESULT(float64_eq(a, b)); 225 return CMP_RESULT(float64_eq(a, b));
226} 226}
227/* 227/*
228 * A note regarding the VAX FP ops. 228 * A note regarding the VAX FP ops.
229 * 229 *
230 * The AARM gives us complete leeway to set or not set status flags on VAX 230 * The AARM gives us complete leeway to set or not set status flags on VAX
231 * ops, but we do any subnorm, NaN and dirty zero fixups anyway, and we set 231 * ops, but we do any subnorm, NaN and dirty zero fixups anyway, and we set
232 * flags by IEEE rules. Many ops are common to d/f/g and s/t source types. 232 * flags by IEEE rules. Many ops are common to d/f/g and s/t source types.
233 * For the purely vax ones, it's hard to imagine ever running them. 233 * For the purely vax ones, it's hard to imagine ever running them.
234 * (Generated VAX fp ops with completion flags? Hmm.) We are careful never 234 * (Generated VAX fp ops with completion flags? Hmm.) We are careful never
235 * to panic, assert, or print unlimited output based on a path through the 235 * to panic, assert, or print unlimited output based on a path through the
236 * decoder, so weird cases don't become security issues. 236 * decoder, so weird cases don't become security issues.
237 */ 237 */
238static void 238static void
239cvt_qs_ts_st_gf_qf(uint32_t inst_bits, struct lwp *l) 239cvt_qs_ts_st_gf_qf(uint32_t inst_bits, struct lwp *l)
240{ 240{
241 t_float tfb, tfc; 241 t_float tfb, tfc;
242 s_float sfb, sfc; 242 s_float sfb, sfc;
243 alpha_instruction inst; 243 alpha_instruction inst;
244 244
245 inst.bits = inst_bits; 245 inst.bits = inst_bits;
246 /* 246 /*
247 * cvtst and cvtts have the same opcode, function, and source. The 247 * cvtst and cvtts have the same opcode, function, and source. The
248 * distinction for cvtst is hidden in the illegal modifier combinations. 248 * distinction for cvtst is hidden in the illegal modifier combinations.
249 * We decode even the non-/s modifier, so that the fix-up-always mode 249 * We decode even the non-/s modifier, so that the fix-up-always mode
250 * works on ev6 and later. The rounding bits are unused and fixed for 250 * works on ev6 and later. The rounding bits are unused and fixed for
251 * cvtst, so we check those too. 251 * cvtst, so we check those too.
252 */ 252 */
253 switch(inst.float_format.function) { 253 switch(inst.float_format.function) {
254 case op_cvtst: 254 case op_cvtst:
255 case op_cvtst_u: 255 case op_cvtst_u:
256 sts(inst.float_detail.fb, &sfb, l); 256 sts(inst.float_detail.fb, &sfb, l);
257 tfc.i = float32_to_float64(sfb.i); 257 tfc.i = float32_to_float64(sfb.i);
258 ldt(inst.float_detail.fc, &tfc, l); 258 ldt(inst.float_detail.fc, &tfc, l);
259 return; 259 return;
260 } 260 }
261 if(inst.float_detail.src == 2) { 261 if(inst.float_detail.src == 2) {
262 stt(inst.float_detail.fb, &tfb, l); 262 stt(inst.float_detail.fb, &tfb, l);
263 sfc.i = float64_to_float32(tfb.i); 263 sfc.i = float64_to_float32(tfb.i);
264 lds(inst.float_detail.fc, &sfc, l); 264 lds(inst.float_detail.fc, &sfc, l);
265 return; 265 return;
266 } 266 }
267 /* 0: S/F */ 267 /* 0: S/F */
268 /* 1: /D */ 268 /* 1: /D */
269 /* 3: Q/Q */ 269 /* 3: Q/Q */
270 this_cannot_happen(5, inst.generic_format.opcode); 270 this_cannot_happen(5, inst.generic_format.opcode);
271 tfc.i = FLOAT64QNAN; 271 tfc.i = FLOAT64QNAN;
272 ldt(inst.float_detail.fc, &tfc, l); 272 ldt(inst.float_detail.fc, &tfc, l);
273 return; 273 return;
274} 274}
275 275
276static void 276static void
277cvt_gd(uint32_t inst_bits, struct lwp *l) 277cvt_gd(uint32_t inst_bits, struct lwp *l)
278{ 278{
279 t_float tfb, tfc; 279 t_float tfb, tfc;
280 alpha_instruction inst; 280 alpha_instruction inst;
281 281
282 inst.bits = inst_bits; 282 inst.bits = inst_bits;
283 stt(inst.float_detail.fb, &tfb, l); 283 stt(inst.float_detail.fb, &tfb, l);
284 (void) float64_to_float32(tfb.i); 284 (void) float64_to_float32(tfb.i);
285 l->l_md.md_flags &= ~NETBSD_FLAG_TO_FP_C(FP_X_IMP); 285 l->l_md.md_flags &= ~NETBSD_FLAG_TO_FP_C(FP_X_IMP);
286 tfc.i = float64_add(tfb.i, (float64)0); 286 tfc.i = float64_add(tfb.i, (float64)0);
287 ldt(inst.float_detail.fc, &tfc, l); 287 ldt(inst.float_detail.fc, &tfc, l);
288} 288}
289 289
290static void 290static void
291cvt_qt_dg_qg(uint32_t inst_bits, struct lwp *l) 291cvt_qt_dg_qg(uint32_t inst_bits, struct lwp *l)
292{ 292{
293 t_float tfb, tfc; 293 t_float tfb, tfc;
294 alpha_instruction inst; 294 alpha_instruction inst;
295 295
296 inst.bits = inst_bits; 296 inst.bits = inst_bits;
297 switch(inst.float_detail.src) { 297 switch(inst.float_detail.src) {
298 case 0: /* S/F */ 298 case 0: /* S/F */
299 this_cannot_happen(3, inst.bits); 299 this_cannot_happen(3, inst.bits);
300 /* fall thru */ 300 /* fall thru */
301 case 1: /* D */ 301 case 1: /* D */
302 /* VAX dirty 0's and reserved ops => UNPREDICTABLE */ 302 /* VAX dirty 0's and reserved ops => UNPREDICTABLE */
303 /* We've done what's important by just not trapping */ 303 /* We've done what's important by just not trapping */
304 tfc.i = 0; 304 tfc.i = 0;
305 break; 305 break;
306 case 2: /* T/G */ 306 case 2: /* T/G */
307 this_cannot_happen(4, inst.bits); 307 this_cannot_happen(4, inst.bits);
308 tfc.i = 0; 308 tfc.i = 0;
309 break; 309 break;
310 case 3: /* Q/Q */ 310 case 3: /* Q/Q */
311 stt(inst.float_detail.fb, &tfb, l); 311 stt(inst.float_detail.fb, &tfb, l);
312 tfc.i = int64_to_float64(tfb.i); 312 tfc.i = int64_to_float64(tfb.i);
313 break; 313 break;
314 } 314 }
315 alpha_ldt(inst.float_detail.fc, &tfc); 315 alpha_ldt(inst.float_detail.fc, &tfc);
316} 316}
317/* 317/*
318 * XXX: AARM and 754 seem to disagree here, also, beware of softfloat's 318 * XXX: AARM and 754 seem to disagree here, also, beware of softfloat's
319 * unfortunate habit of always returning the nontrapping result. 319 * unfortunate habit of always returning the nontrapping result.
320 * XXX: there are several apparent AARM/AAH disagreements, as well as 320 * XXX: there are several apparent AARM/AAH disagreements, as well as
321 * the issue of trap handler pc and trapping results. 321 * the issue of trap handler pc and trapping results.
322 */ 322 */
323static void 323static void
324cvt_tq_gq(uint32_t inst_bits, struct lwp *l) 324cvt_tq_gq(uint32_t inst_bits, struct lwp *l)
325{ 325{
326 t_float tfb, tfc; 326 t_float tfb, tfc;
327 alpha_instruction inst; 327 alpha_instruction inst;
328 328
329 inst.bits = inst_bits; 329 inst.bits = inst_bits;
330 stt(inst.float_detail.fb, &tfb, l); 330 stt(inst.float_detail.fb, &tfb, l);
331 tfc.i = tfb.sign ? float64_to_int64(tfb.i) : float64_to_uint64(tfb.i); 331 tfc.i = tfb.sign ? float64_to_int64(tfb.i) : float64_to_uint64(tfb.i);
332 alpha_ldt(inst.float_detail.fc, &tfc); /* yes, ldt */ 332 alpha_ldt(inst.float_detail.fc, &tfc); /* yes, ldt */
333} 333}
334 334
335static uint64_t 335static uint64_t
336fp_c_to_fpcr_1(uint64_t fpcr, uint64_t fp_c) 336fp_c_to_fpcr_1(uint64_t fpcr, uint64_t fp_c)
337{ 337{
338 uint64_t disables; 338 uint64_t disables;
339 339
340 /* 340 /*
341 * It's hard to arrange for conforming bit fields, because the FP_C 341 * It's hard to arrange for conforming bit fields, because the FP_C
342 * and the FPCR are both architected, with specified (and relatively 342 * and the FPCR are both architected, with specified (and relatively
343 * scrambled) bit numbers. Defining an internal unscrambled FP_C 343 * scrambled) bit numbers. Defining an internal unscrambled FP_C
344 * wouldn't help much, because every user exception requires the 344 * wouldn't help much, because every user exception requires the
345 * architected bit order in the sigcontext. 345 * architected bit order in the sigcontext.
346 * 346 *
347 * Programs that fiddle with the fpcr exception bits (instead of fp_c) 347 * Programs that fiddle with the fpcr exception bits (instead of fp_c)
348 * will lose, because those bits can be and usually are subsetted; 348 * will lose, because those bits can be and usually are subsetted;
349 * the official home is in the fp_c. Furthermore, the kernel puts 349 * the official home is in the fp_c. Furthermore, the kernel puts
350 * phony enables (it lies :-) in the fpcr in order to get control when 350 * phony enables (it lies :-) in the fpcr in order to get control when
351 * it is necessary to initially set a sticky bit. 351 * it is necessary to initially set a sticky bit.
352 */ 352 */
353 353
354 fpcr &= FPCR_DYN(3); 354 fpcr &= FPCR_DYN(3);
355 355
356 /* 356 /*
357 * enable traps = case where flag bit is clear OR program wants a trap 357 * enable traps = case where flag bit is clear OR program wants a trap
358 * enables = ~flags | mask 358 * enables = ~flags | mask
359 * disables = ~(~flags | mask) 359 * disables = ~(~flags | mask)
360 * disables = flags & ~mask. Thank you, Augustus De Morgan (1806-1871) 360 * disables = flags & ~mask. Thank you, Augustus De Morgan (1806-1871)
361 */ 361 */
362 disables = FP_C_TO_NETBSD_FLAG(fp_c) & ~FP_C_TO_NETBSD_MASK(fp_c); 362 disables = FP_C_TO_NETBSD_FLAG(fp_c) & ~FP_C_TO_NETBSD_MASK(fp_c);
363 363
364 fpcr |= (disables & (FP_X_IMP | FP_X_UFL)) << (61 - 3); 364 fpcr |= (disables & (FP_X_IMP | FP_X_UFL)) << (61 - 3);
365 fpcr |= (disables & (FP_X_OFL | FP_X_DZ | FP_X_INV)) << (49 - 0); 365 fpcr |= (disables & (FP_X_OFL | FP_X_DZ | FP_X_INV)) << (49 - 0);
366 366
367# if !(FP_X_INV == 1 && FP_X_DZ == 2 && FP_X_OFL == 4 && \ 367# if !(FP_X_INV == 1 && FP_X_DZ == 2 && FP_X_OFL == 4 && \
368 FP_X_UFL == 8 && FP_X_IMP == 16 && FP_X_IOV == 32 && \ 368 FP_X_UFL == 8 && FP_X_IMP == 16 && FP_X_IOV == 32 && \
369 FP_X_UFL << (61 - 3) == FPCR_UNFD && \ 369 FP_X_UFL << (61 - 3) == FPCR_UNFD && \
370 FP_X_IMP << (61 - 3) == FPCR_INED && \ 370 FP_X_IMP << (61 - 3) == FPCR_INED && \
371 FP_X_OFL << (49 - 0) == FPCR_OVFD) 371 FP_X_OFL << (49 - 0) == FPCR_OVFD)
372# error "Assertion failed" 372# error "Assertion failed"
373 /* 373 /*
374 * We don't care about the other built-in bit numbers because they 374 * We don't care about the other built-in bit numbers because they
375 * have been architecturally specified. 375 * have been architecturally specified.
376 */ 376 */
377# endif 377# endif
378 378
379 fpcr |= fp_c & FP_C_MIRRORED << (FPCR_MIR_START - FP_C_MIR_START); 379 fpcr |= fp_c & FP_C_MIRRORED << (FPCR_MIR_START - FP_C_MIR_START);
380 fpcr |= (fp_c & IEEE_MAP_DMZ) << 36; 380 fpcr |= (fp_c & IEEE_MAP_DMZ) << 36;
381 if (fp_c & FP_C_MIRRORED) 381 if (fp_c & FP_C_MIRRORED)
382 fpcr |= FPCR_SUM; 382 fpcr |= FPCR_SUM;
383 if (fp_c & IEEE_MAP_UMZ) 383 if (fp_c & IEEE_MAP_UMZ)
384 fpcr |= FPCR_UNDZ | FPCR_UNFD; 384 fpcr |= FPCR_UNDZ | FPCR_UNFD;
385 fpcr |= (~fp_c & IEEE_TRAP_ENABLE_DNO) << 41; 385 fpcr |= (~fp_c & IEEE_TRAP_ENABLE_DNO) << 41;
386 return fpcr; 386 return fpcr;
387} 387}
388 388
389static void 389static void
390fp_c_to_fpcr(struct lwp *l) 390fp_c_to_fpcr(struct lwp *l)
391{ 391{
392 alpha_write_fpcr(fp_c_to_fpcr_1(alpha_read_fpcr(), l->l_md.md_flags)); 392 alpha_write_fpcr(fp_c_to_fpcr_1(alpha_read_fpcr(), l->l_md.md_flags));
393} 393}
394 394
395void 395void
396alpha_write_fp_c(struct lwp *l, uint64_t fp_c) 396alpha_write_fp_c(struct lwp *l, uint64_t fp_c)
397{ 397{
398 uint64_t md_flags; 398 uint64_t md_flags;
399 399
400 fp_c &= MDLWP_FP_C; 400 fp_c &= MDLWP_FP_C;
401 md_flags = l->l_md.md_flags; 401 md_flags = l->l_md.md_flags;
402 if ((md_flags & MDLWP_FP_C) == fp_c) 402 if ((md_flags & MDLWP_FP_C) == fp_c)
403 return; 403 return;
404 l->l_md.md_flags = (md_flags & ~MDLWP_FP_C) | fp_c; 404 l->l_md.md_flags = (md_flags & ~MDLWP_FP_C) | fp_c;
405 kpreempt_disable(); 405 kpreempt_disable();
406 if (md_flags & MDLWP_FPACTIVE) { 406 if (md_flags & MDLWP_FPACTIVE) {
407 alpha_pal_wrfen(1); 407 alpha_pal_wrfen(1);
408 fp_c_to_fpcr(l); 408 fp_c_to_fpcr(l);
409 alpha_pal_wrfen(0); 409 alpha_pal_wrfen(0);
410 } 410 }
411 kpreempt_enable(); 411 kpreempt_enable();
412} 412}
413 413
414uint64_t 414uint64_t
415alpha_read_fp_c(struct lwp *l) 415alpha_read_fp_c(struct lwp *l)
416{ 416{
417 /* 417 /*
418 * A possibly-desireable EV6-specific optimization would deviate from 418 * A possibly-desireable EV6-specific optimization would deviate from
419 * the Alpha Architecture spec and keep some FP_C bits in the FPCR, 419 * the Alpha Architecture spec and keep some FP_C bits in the FPCR,
420 * but in a transparent way. Some of the code for that would need to 420 * but in a transparent way. Some of the code for that would need to
421 * go right here. 421 * go right here.
422 */ 422 */
423 return l->l_md.md_flags & MDLWP_FP_C; 423 return l->l_md.md_flags & MDLWP_FP_C;
424} 424}
425 425
426static float64 426static float64
427float64_unk(float64 a, float64 b) 427float64_unk(float64 a, float64 b)
428{ 428{
429 return 0; 429 return 0;
430} 430}
431 431
432/* 432/*
433 * The real function field encodings for IEEE and VAX FP instructions. 433 * The real function field encodings for IEEE and VAX FP instructions.
434 * 434 *
435 * Since there is only one operand type field, the cvtXX instructions 435 * Since there is only one operand type field, the cvtXX instructions
436 * require a variety of special cases, and these have to be analyzed as 436 * require a variety of special cases, and these have to be analyzed as
437 * they don't always fit into the field descriptions in AARM section I. 437 * they don't always fit into the field descriptions in AARM section I.
438 * 438 *
439 * Lots of staring at bits in the appendix shows what's really going on. 439 * Lots of staring at bits in the appendix shows what's really going on.
440 * 440 *
441 * | | 441 * | |
442 * 15 14 13|12 11 10 09|08 07 06 05 442 * 15 14 13|12 11 10 09|08 07 06 05
443 * --------======------============ 443 * --------======------============
444 * TRAP : RND : SRC : FUNCTION : 444 * TRAP : RND : SRC : FUNCTION :
445 * 0 0 0:. . .:. . . . . . . . . . . . Imprecise 445 * 0 0 0:. . .:. . . . . . . . . . . . Imprecise
446 * 0 0 1|. . .:. . . . . . . . . . . ./U underflow enable (if FP output) 446 * 0 0 1|. . .:. . . . . . . . . . . ./U underflow enable (if FP output)
447 * | /V overfloat enable (if int output) 447 * | /V overfloat enable (if int output)
448 * 0 1 0:. . .:. . . . . . . . . . . ."Unsupported", but used for CVTST 448 * 0 1 0:. . .:. . . . . . . . . . . ."Unsupported", but used for CVTST
449 * 0 1 1|. . .:. . . . . . . . . . . . Unsupported 449 * 0 1 1|. . .:. . . . . . . . . . . . Unsupported
450 * 1 0 0:. . .:. . . . . . . . . . . ./S software completion (VAX only) 450 * 1 0 0:. . .:. . . . . . . . . . . ./S software completion (VAX only)
451 * 1 0 1|. . .:. . . . . . . . . . . ./SU 451 * 1 0 1|. . .:. . . . . . . . . . . ./SU
452 * | /SV 452 * | /SV
453 * 1 1 0:. . .:. . . . . . . . . . . ."Unsupported", but used for CVTST/S 453 * 1 1 0:. . .:. . . . . . . . . . . ."Unsupported", but used for CVTST/S
454 * 1 1 1|. . .:. . . . . . . . . . . ./SUI (if FP output) (IEEE only) 454 * 1 1 1|. . .:. . . . . . . . . . . ./SUI (if FP output) (IEEE only)
455 * | /SVI (if int output) (IEEE only) 455 * | /SVI (if int output) (IEEE only)
456 * S I UV: In other words: bits 15:13 are S:I:UV, except that _usually_ 456 * S I UV: In other words: bits 15:13 are S:I:UV, except that _usually_
457 * | not all combinations are valid. 457 * | not all combinations are valid.
458 * | | 458 * | |
459 * 15 14 13|12 11 10 09|08 07 06 05 459 * 15 14 13|12 11 10 09|08 07 06 05
460 * --------======------============ 460 * --------======------============
461 * TRAP : RND : SRC : FUNCTION : 461 * TRAP : RND : SRC : FUNCTION :
462 * | 0 0 . . . . . . . . . . . ./C Chopped 462 * | 0 0 . . . . . . . . . . . ./C Chopped
463 * : 0 1 . . . . . . . . . . . ./M Minus Infinity 463 * : 0 1 . . . . . . . . . . . ./M Minus Infinity
464 * | 1 0 . . . . . . . . . . . . Normal 464 * | 1 0 . . . . . . . . . . . . Normal
465 * : 1 1 . . . . . . . . . . . ./D Dynamic (in FPCR: Plus Infinity) 465 * : 1 1 . . . . . . . . . . . ./D Dynamic (in FPCR: Plus Infinity)
466 * | | 466 * | |
467 * 15 14 13|12 11 10 09|08 07 06 05 467 * 15 14 13|12 11 10 09|08 07 06 05
468 * --------======------============ 468 * --------======------============
469 * TRAP : RND : SRC : FUNCTION : 469 * TRAP : RND : SRC : FUNCTION :
470 * 0 0. . . . . . . . . . S/F 470 * 0 0. . . . . . . . . . S/F
471 * 0 1. . . . . . . . . . -/D 471 * 0 1. . . . . . . . . . -/D
472 * 1 0. . . . . . . . . . T/G 472 * 1 0. . . . . . . . . . T/G
473 * 1 1. . . . . . . . . . Q/Q 473 * 1 1. . . . . . . . . . Q/Q
474 * | | 474 * | |
475 * 15 14 13|12 11 10 09|08 07 06 05 475 * 15 14 13|12 11 10 09|08 07 06 05
476 * --------======------============ 476 * --------======------============
477 * TRAP : RND : SRC : FUNCTION : 477 * TRAP : RND : SRC : FUNCTION :
478 * 0 0 0 0 . . . addX 478 * 0 0 0 0 . . . addX
479 * 0 0 0 1 . . . subX 479 * 0 0 0 1 . . . subX
480 * 0 0 1 0 . . . mulX 480 * 0 0 1 0 . . . mulX
481 * 0 0 1 1 . . . divX 481 * 0 0 1 1 . . . divX
482 * 0 1 0 0 . . . cmpXun 482 * 0 1 0 0 . . . cmpXun
483 * 0 1 0 1 . . . cmpXeq 483 * 0 1 0 1 . . . cmpXeq
484 * 0 1 1 0 . . . cmpXlt 484 * 0 1 1 0 . . . cmpXlt
485 * 0 1 1 1 . . . cmpXle 485 * 0 1 1 1 . . . cmpXle
486 * 1 0 0 0 . . . reserved 486 * 1 0 0 0 . . . reserved
487 * 1 0 0 1 . . . reserved 487 * 1 0 0 1 . . . reserved
488 * 1 0 1 0 . . . sqrt[fg] (op_fix, not exactly "vax") 488 * 1 0 1 0 . . . sqrt[fg] (op_fix, not exactly "vax")
489 * 1 0 1 1 . . . sqrt[st] (op_fix, not exactly "ieee") 489 * 1 0 1 1 . . . sqrt[st] (op_fix, not exactly "ieee")
490 * 1 1 0 0 . . . cvtXs/f (cvt[qt]s, cvtst(!), cvt[gq]f) 490 * 1 1 0 0 . . . cvtXs/f (cvt[qt]s, cvtst(!), cvt[gq]f)
491 * 1 1 0 1 . . . cvtXd (vax only) 491 * 1 1 0 1 . . . cvtXd (vax only)
492 * 1 1 1 0 . . . cvtXt/g (cvtqt, cvt[dq]g only) 492 * 1 1 1 0 . . . cvtXt/g (cvtqt, cvt[dq]g only)
493 * 1 1 1 1 . . . cvtXq/q (cvttq, cvtgq) 493 * 1 1 1 1 . . . cvtXq/q (cvttq, cvtgq)
494 * | | 494 * | |
495 * 15 14 13|12 11 10 09|08 07 06 05 the twilight zone 495 * 15 14 13|12 11 10 09|08 07 06 05 the twilight zone
496 * --------======------============ 496 * --------======------============
497 * TRAP : RND : SRC : FUNCTION : 497 * TRAP : RND : SRC : FUNCTION :
498 * /s /i /u x x 1 0 1 1 0 0 . . . cvtts, /siu only 0, 1, 5, 7 498 * /s /i /u x x 1 0 1 1 0 0 . . . cvtts, /siu only 0, 1, 5, 7
499 * 0 1 0 1 0 1 0 1 1 0 0 . . . cvtst (src == T (!)) 2ac NOT /S 499 * 0 1 0 1 0 1 0 1 1 0 0 . . . cvtst (src == T (!)) 2ac NOT /S
500 * 1 1 0 1 0 1 0 1 1 0 0 . . . cvtst/s (src == T (!)) 6ac 500 * 1 1 0 1 0 1 0 1 1 0 0 . . . cvtst/s (src == T (!)) 6ac
501 * x 0 x x x x 0 1 1 1 1 . . . cvttq/_ (src == T) 501 * x 0 x x x x 0 1 1 1 1 . . . cvttq/_ (src == T)
502 */ 502 */
503 503
504static void 504static void
505alpha_fp_interpret(alpha_instruction *pc, struct lwp *l, uint64_t bits) 505alpha_fp_interpret(alpha_instruction *pc, struct lwp *l, uint32_t bits)
506{ 506{
507 s_float sfa, sfb, sfc; 507 s_float sfa, sfb, sfc;
508 t_float tfa, tfb, tfc; 508 t_float tfa, tfb, tfc;
509 alpha_instruction inst; 509 alpha_instruction inst;
510 510
511 inst.bits = bits; 511 inst.bits = bits;
512 switch(inst.generic_format.opcode) { 512 switch(inst.generic_format.opcode) {
513 default: 513 default:
514 /* this "cannot happen" */ 514 /* this "cannot happen" */
515 this_cannot_happen(2, inst.bits); 515 this_cannot_happen(2, inst.bits);
516 return; 516 return;
517 case op_any_float: 517 case op_any_float:
518 if (inst.float_format.function == op_cvtql_sv || 518 if (inst.float_format.function == op_cvtql_sv ||
519 inst.float_format.function == op_cvtql_v) { 519 inst.float_format.function == op_cvtql_v) {
520 alpha_stt(inst.float_detail.fb, &tfb); 520 alpha_stt(inst.float_detail.fb, &tfb);
521 sfc.i = (int64_t)tfb.i >= 0L ? INT_MAX : INT_MIN; 521 sfc.i = (int64_t)tfb.i >= 0L ? INT_MAX : INT_MIN;
522 alpha_lds(inst.float_detail.fc, &sfc); 522 alpha_lds(inst.float_detail.fc, &sfc);
523 float_raise(FP_X_INV); 523 float_raise(FP_X_INV);
524 } else { 524 } else {
525 ++alpha_shadow.nilanyop; 525 ++alpha_shadow.nilanyop;
526 this_cannot_happen(3, inst.bits); 526 this_cannot_happen(3, inst.bits);
527 } 527 }
528 break; 528 break;
529 case op_vax_float: 529 case op_vax_float:
530 ++alpha_shadow.vax; /* fall thru */ 530 ++alpha_shadow.vax; /* fall thru */
531 case op_ieee_float: 531 case op_ieee_float:
532 case op_fix_float: 532 case op_fix_float:
533 switch(inst.float_detail.src) { 533 switch(inst.float_detail.src) {
534 case op_src_sf: 534 case op_src_sf:
535 sts(inst.float_detail.fb, &sfb, l); 535 sts(inst.float_detail.fb, &sfb, l);
536 if (inst.float_detail.opclass == 10) 536 if (inst.float_detail.opclass == 10)
537 sfc.i = float32_sqrt(sfb.i); 537 sfc.i = float32_sqrt(sfb.i);
538 else if (inst.float_detail.opclass & ~3) { 538 else if (inst.float_detail.opclass & ~3) {
539 this_cannot_happen(1, inst.bits); 539 this_cannot_happen(1, inst.bits);
540 sfc.i = FLOAT32QNAN; 540 sfc.i = FLOAT32QNAN;
541 } else { 541 } else {
542 sts(inst.float_detail.fa, &sfa, l); 542 sts(inst.float_detail.fa, &sfa, l);
543 sfc.i = (*swfp_s[inst.float_detail.opclass])( 543 sfc.i = (*swfp_s[inst.float_detail.opclass])(
544 sfa.i, sfb.i); 544 sfa.i, sfb.i);
545 } 545 }
546 lds(inst.float_detail.fc, &sfc, l); 546 lds(inst.float_detail.fc, &sfc, l);
547 break; 547 break;
548 case op_src_xd: 548 case op_src_xd:
549 case op_src_tg: 549 case op_src_tg:
550 if (inst.float_detail.opclass >= 12) 550 if (inst.float_detail.opclass >= 12)
551 (*swfp_cvt[inst.float_detail.opclass - 12])( 551 (*swfp_cvt[inst.float_detail.opclass - 12])(
552 inst.bits, l); 552 inst.bits, l);
553 else { 553 else {
554 stt(inst.float_detail.fb, &tfb, l); 554 stt(inst.float_detail.fb, &tfb, l);
555 if (inst.float_detail.opclass == 10) 555 if (inst.float_detail.opclass == 10)
556 tfc.i = float64_sqrt(tfb.i); 556 tfc.i = float64_sqrt(tfb.i);
557 else { 557 else {
558 stt(inst.float_detail.fa, &tfa, l); 558 stt(inst.float_detail.fa, &tfa, l);
559 tfc.i = (*swfp_t[inst.float_detail 559 tfc.i = (*swfp_t[inst.float_detail
560 .opclass])(tfa.i, tfb.i); 560 .opclass])(tfa.i, tfb.i);
561 } 561 }
562 ldt(inst.float_detail.fc, &tfc, l); 562 ldt(inst.float_detail.fc, &tfc, l);
563 } 563 }
564 break; 564 break;
565 case op_src_qq: 565 case op_src_qq:
566 float_raise(FP_X_IMP); 566 float_raise(FP_X_IMP);
567 break; 567 break;
568 } 568 }
569 } 569 }
570} 570}
571 571
572static int 572static int
573alpha_fp_complete_at(alpha_instruction *trigger_pc, struct lwp *l, 573alpha_fp_complete_at(alpha_instruction *trigger_pc, struct lwp *l,
574 uint64_t *ucode) 574 uint64_t *ucode)
575{ 575{
576 int needsig; 576 int needsig;
577 alpha_instruction inst; 577 alpha_instruction inst;
578 uint64_t rm, fpcr, orig_fpcr; 578 uint64_t rm, fpcr, orig_fpcr;
579 uint64_t orig_flags, new_flags, changed_flags, md_flags; 579 uint64_t orig_flags, new_flags, changed_flags, md_flags;
580 580
581 if (__predict_false(copyin(trigger_pc, &inst, sizeof inst))) { 581 if (__predict_false(copyin(trigger_pc, &inst, sizeof inst))) {
582 this_cannot_happen(6, -1); 582 this_cannot_happen(6, -1);
583 return SIGSEGV; 583 return SIGSEGV;
584 } 584 }
585 kpreempt_disable(); 585 kpreempt_disable();
586 if ((curlwp->l_md.md_flags & MDLWP_FPACTIVE) == 0) { 586 if ((curlwp->l_md.md_flags & MDLWP_FPACTIVE) == 0) {
587 fpu_load(); 587 fpu_load();
588 } 588 }
589 alpha_pal_wrfen(1); 589 alpha_pal_wrfen(1);
590 /* 590 /*
591 * If necessary, lie about the dynamic rounding mode so emulation 591 * If necessary, lie about the dynamic rounding mode so emulation
592 * software need go to only one place for it, and so we don't have to 592 * software need go to only one place for it, and so we don't have to
593 * lock any memory locations or pass a third parameter to every 593 * lock any memory locations or pass a third parameter to every
594 * SoftFloat entry point. 594 * SoftFloat entry point.
595 */ 595 */
596 orig_fpcr = fpcr = alpha_read_fpcr(); 596 orig_fpcr = fpcr = alpha_read_fpcr();
597 rm = inst.float_detail.rnd; 597 rm = inst.float_detail.rnd;
598 if (__predict_false(rm != 3 /* dynamic */ && rm != (fpcr >> 58 & 3))) { 598 if (__predict_false(rm != 3 /* dynamic */ && rm != (fpcr >> 58 & 3))) {
599 fpcr = (fpcr & ~FPCR_DYN(3)) | FPCR_DYN(rm); 599 fpcr = (fpcr & ~FPCR_DYN(3)) | FPCR_DYN(rm);
600 alpha_write_fpcr(fpcr); 600 alpha_write_fpcr(fpcr);
601 } 601 }
602 orig_flags = FP_C_TO_NETBSD_FLAG(l->l_md.md_flags); 602 orig_flags = FP_C_TO_NETBSD_FLAG(l->l_md.md_flags);
603 603
604 alpha_fp_interpret(trigger_pc, l, inst.bits); 604 alpha_fp_interpret(trigger_pc, l, inst.bits);
605 605
606 md_flags = l->l_md.md_flags; 606 md_flags = l->l_md.md_flags;
607 607
608 new_flags = FP_C_TO_NETBSD_FLAG(md_flags); 608 new_flags = FP_C_TO_NETBSD_FLAG(md_flags);
609 changed_flags = orig_flags ^ new_flags; 609 changed_flags = orig_flags ^ new_flags;
610 KASSERT((orig_flags | changed_flags) == new_flags); /* panic on 1->0 */ 610 KASSERT((orig_flags | changed_flags) == new_flags); /* panic on 1->0 */
611 alpha_write_fpcr(fp_c_to_fpcr_1(orig_fpcr, md_flags)); 611 alpha_write_fpcr(fp_c_to_fpcr_1(orig_fpcr, md_flags));
612 needsig = changed_flags & FP_C_TO_NETBSD_MASK(md_flags); 612 needsig = changed_flags & FP_C_TO_NETBSD_MASK(md_flags);
613 alpha_pal_wrfen(0); 613 alpha_pal_wrfen(0);
614 kpreempt_enable(); 614 kpreempt_enable();
615 if (__predict_false(needsig)) { 615 if (__predict_false(needsig)) {
616 *ucode = needsig; 616 *ucode = needsig;
617 return SIGFPE; 617 return SIGFPE;
618 } 618 }
619 return 0; 619 return 0;
620} 620}
621 621
622int 622int
623alpha_fp_complete(u_long a0, u_long a1, struct lwp *l, uint64_t *ucode) 623alpha_fp_complete(u_long a0, u_long a1, struct lwp *l, uint64_t *ucode)
624{ 624{
625 int t; 625 int t;
626 int sig; 626 int sig;
627 uint64_t op_class; 627 uint64_t op_class;
628 alpha_instruction inst; 628 alpha_instruction inst;
629 /* "trigger_pc" is Compaq's term for the earliest faulting op */ 629 /* "trigger_pc" is Compaq's term for the earliest faulting op */
630 alpha_instruction *trigger_pc, *usertrap_pc; 630 alpha_instruction *trigger_pc, *usertrap_pc;
631 alpha_instruction *pc, *win_begin, tsw[TSWINSIZE]; 631 alpha_instruction *pc, *win_begin, tsw[TSWINSIZE];
632 632
633 sig = SIGFPE; 633 sig = SIGFPE;
634 pc = (alpha_instruction *)l->l_md.md_tf->tf_regs[FRAME_PC]; 634 pc = (alpha_instruction *)l->l_md.md_tf->tf_regs[FRAME_PC];
635 trigger_pc = pc - 1; /* for ALPHA_AMASK_PAT case */ 635 trigger_pc = pc - 1; /* for ALPHA_AMASK_PAT case */
636 if (cpu_amask & ALPHA_AMASK_PAT) { 636 if (cpu_amask & ALPHA_AMASK_PAT) {
637 if (a0 & 1 || alpha_fp_sync_complete) { 637 /* SWC | INV */
 638 if (a0 & 3 || alpha_fp_sync_complete) {
638 sig = alpha_fp_complete_at(trigger_pc, l, ucode); 639 sig = alpha_fp_complete_at(trigger_pc, l, ucode);
639 goto done; 640 goto done;
640 } 641 }
641 } 642 }
642 *ucode = a0; 643 *ucode = a0;
643 if (!(a0 & 1)) 644 /* SWC | INV */
 645 if (!(a0 & 3))
644 return sig; 646 return sig;
645/* 647/*
646 * At this point we are somewhere in the trap shadow of one or more instruc- 648 * At this point we are somewhere in the trap shadow of one or more instruc-
647 * tions that have trapped with software completion specified. We have a mask 649 * tions that have trapped with software completion specified. We have a mask
648 * of the registers written by trapping instructions. 650 * of the registers written by trapping instructions.
649 * 651 *
650 * Now step backwards through the trap shadow, clearing bits in the 652 * Now step backwards through the trap shadow, clearing bits in the
651 * destination write mask until the trigger instruction is found, and 653 * destination write mask until the trigger instruction is found, and
652 * interpret this one instruction in SW. If a SIGFPE is not required, back up 654 * interpret this one instruction in SW. If a SIGFPE is not required, back up
653 * the PC until just after this instruction and restart. This will execute all 655 * the PC until just after this instruction and restart. This will execute all
654 * trap shadow instructions between the trigger pc and the trap pc twice. 656 * trap shadow instructions between the trigger pc and the trap pc twice.
655 */ 657 */
656 trigger_pc = 0; 658 trigger_pc = 0;
657 win_begin = pc; 659 win_begin = pc;
658 ++alpha_shadow.scans; 660 ++alpha_shadow.scans;
659 t = alpha_shadow.len; 661 t = alpha_shadow.len;
660 for (--pc; a1; --pc) { 662 for (--pc; a1; --pc) {
661 ++alpha_shadow.len; 663 ++alpha_shadow.len;
662 if (pc < win_begin) { 664 if (pc < win_begin) {
663 win_begin = pc - TSWINSIZE + 1; 665 win_begin = pc - TSWINSIZE + 1;
664 if (copyin(win_begin, tsw, sizeof tsw)) { 666 if (copyin(win_begin, tsw, sizeof tsw)) {
665 /* sigh, try to get just one */ 667 /* sigh, try to get just one */
666 win_begin = pc; 668 win_begin = pc;
667 if (copyin(win_begin, tsw, 4)) 669 if (copyin(win_begin, tsw, 4))
668 return SIGSEGV; 670 return SIGSEGV;
669 } 671 }
670 } 672 }
671 assert(win_begin <= pc && !((long)pc & 3)); 673 assert(win_begin <= pc && !((long)pc & 3));
672 inst = tsw[pc - win_begin]; 674 inst = tsw[pc - win_begin];
673 op_class = 1UL << inst.generic_format.opcode; 675 op_class = 1UL << inst.generic_format.opcode;
674 if (op_class & FPUREG_CLASS) { 676 if (op_class & FPUREG_CLASS) {
675 a1 &= ~(1UL << (inst.operate_generic_format.rc + 32)); 677 a1 &= ~(1UL << (inst.operate_generic_format.rc + 32));
676 trigger_pc = pc; 678 trigger_pc = pc;
677 } else if (op_class & CPUREG_CLASS) { 679 } else if (op_class & CPUREG_CLASS) {
678 a1 &= ~(1UL << inst.operate_generic_format.rc); 680 a1 &= ~(1UL << inst.operate_generic_format.rc);
679 trigger_pc = pc; 681 trigger_pc = pc;
680 } else if (op_class & TRAPSHADOWBOUNDARY) { 682 } else if (op_class & TRAPSHADOWBOUNDARY) {
681 if (op_class & CHECKFUNCTIONCODE) { 683 if (op_class & CHECKFUNCTIONCODE) {
682 if (inst.mem_format.displacement == op_trapb || 684 if (inst.mem_format.displacement == op_trapb ||
683 inst.mem_format.displacement == op_excb) 685 inst.mem_format.displacement == op_excb)
684 break; /* code breaks AARM rules */ 686 break; /* code breaks AARM rules */
685 } else 687 } else
686 break; /* code breaks AARM rules */ 688 break; /* code breaks AARM rules */
687 } 689 }
688 /* Some shadow-safe op, probably load, store, or FPTI class */ 690 /* Some shadow-safe op, probably load, store, or FPTI class */
689 } 691 }
690 t = alpha_shadow.len - t; 692 t = alpha_shadow.len - t;
691 if (t > alpha_shadow.max) 693 if (t > alpha_shadow.max)
692 alpha_shadow.max = t; 694 alpha_shadow.max = t;
693 if (__predict_true(trigger_pc != 0 && a1 == 0)) { 695 if (__predict_true(trigger_pc != 0 && a1 == 0)) {
694 ++alpha_shadow.resolved; 696 ++alpha_shadow.resolved;
695 sig = alpha_fp_complete_at(trigger_pc, l, ucode); 697 sig = alpha_fp_complete_at(trigger_pc, l, ucode);
696 } else { 698 } else {
697 ++alpha_shadow.unresolved; 699 ++alpha_shadow.unresolved;
698 return sig; 700 return sig;
699 } 701 }
700done: 702done:
701 if (sig) { 703 if (sig) {
702 usertrap_pc = trigger_pc + 1; 704 usertrap_pc = trigger_pc + 1;
703 l->l_md.md_tf->tf_regs[FRAME_PC] = (unsigned long)usertrap_pc; 705 l->l_md.md_tf->tf_regs[FRAME_PC] = (unsigned long)usertrap_pc;
704 return sig; 706 return sig;
705 } 707 }
706 return 0; 708 return 0;
707} 709}
708 710
709/* 711/*
710 * Load the float-point context for the current lwp. 712 * Load the float-point context for the current lwp.
711 */ 713 */
712void 714void
713fpu_state_load(struct lwp *l, u_int flags) 715fpu_state_load(struct lwp *l, u_int flags)
714{ 716{
715 struct pcb * const pcb = lwp_getpcb(l); 717 struct pcb * const pcb = lwp_getpcb(l);
716 KASSERT(l == curlwp); 718 KASSERT(l == curlwp);
717 719
718#ifdef MULTIPROCESSOR 720#ifdef MULTIPROCESSOR
719 /* 721 /*
720 * If the LWP got switched to another CPU, pcu_switchpoint would have 722 * If the LWP got switched to another CPU, pcu_switchpoint would have
721 * called state_release to clear MDLWP_FPACTIVE. Now that we are back 723 * called state_release to clear MDLWP_FPACTIVE. Now that we are back
722 * on the CPU that has our FP context, set MDLWP_FPACTIVE again. 724 * on the CPU that has our FP context, set MDLWP_FPACTIVE again.
723 */ 725 */
724 if (flags & PCU_REENABLE) { 726 if (flags & PCU_REENABLE) {
725 KASSERT(flags & PCU_VALID); 727 KASSERT(flags & PCU_VALID);
726 l->l_md.md_flags |= MDLWP_FPACTIVE; 728 l->l_md.md_flags |= MDLWP_FPACTIVE;
727 return; 729 return;
728 } 730 }
729#else 731#else
730 KASSERT((flags & PCU_REENABLE) == 0); 732 KASSERT((flags & PCU_REENABLE) == 0);
731#endif 733#endif
732 734
733 /* 735 /*
734 * Instrument FP usage -- if a process had not previously 736 * Instrument FP usage -- if a process had not previously
735 * used FP, mark it as having used FP for the first time, 737 * used FP, mark it as having used FP for the first time,
736 * and count this event. 738 * and count this event.
737 * 739 *
738 * If a process has used FP, count a "used FP, and took 740 * If a process has used FP, count a "used FP, and took
739 * a trap to use it again" event. 741 * a trap to use it again" event.
740 */ 742 */
741 if ((flags & PCU_VALID) == 0) { 743 if ((flags & PCU_VALID) == 0) {
742 atomic_inc_ulong(&fpevent_use.ev_count); 744 atomic_inc_ulong(&fpevent_use.ev_count);
743 } else { 745 } else {
744 atomic_inc_ulong(&fpevent_reuse.ev_count); 746 atomic_inc_ulong(&fpevent_reuse.ev_count);
745 } 747 }
746 748
747 alpha_pal_wrfen(1); 749 alpha_pal_wrfen(1);
748 restorefpstate(&pcb->pcb_fp); 750 restorefpstate(&pcb->pcb_fp);
749 alpha_pal_wrfen(0); 751 alpha_pal_wrfen(0);
750 752
751 l->l_md.md_flags |= MDLWP_FPACTIVE; 753 l->l_md.md_flags |= MDLWP_FPACTIVE;
752} 754}
753 755
754/* 756/*
755 * Save the FPU state. 757 * Save the FPU state.
756 */ 758 */
757 759
758void 760void
759fpu_state_save(struct lwp *l) 761fpu_state_save(struct lwp *l)
760{ 762{
761 struct pcb * const pcb = lwp_getpcb(l); 763 struct pcb * const pcb = lwp_getpcb(l);
762 764
763 alpha_pal_wrfen(1); 765 alpha_pal_wrfen(1);
764 savefpstate(&pcb->pcb_fp); 766 savefpstate(&pcb->pcb_fp);
765 alpha_pal_wrfen(0); 767 alpha_pal_wrfen(0);
766} 768}
767 769
768/* 770/*
769 * Release the FPU. 771 * Release the FPU.
770 */ 772 */
771void 773void
772fpu_state_release(struct lwp *l) 774fpu_state_release(struct lwp *l)
773{ 775{
774 l->l_md.md_flags &= ~MDLWP_FPACTIVE; 776 l->l_md.md_flags &= ~MDLWP_FPACTIVE;
775} 777}