| @@ -1,14 +1,14 @@ | | | @@ -1,14 +1,14 @@ |
1 | /* $NetBSD: fp_complete.c,v 1.24 2020/09/01 08:22:36 thorpej Exp $ */ | | 1 | /* $NetBSD: fp_complete.c,v 1.25 2021/07/22 01:39:18 thorpej Exp $ */ |
2 | | | 2 | |
3 | /*- | | 3 | /*- |
4 | * Copyright (c) 2001 Ross Harvey | | 4 | * Copyright (c) 2001 Ross Harvey |
5 | * All rights reserved. | | 5 | * All rights reserved. |
6 | * | | 6 | * |
7 | * Redistribution and use in source and binary forms, with or without | | 7 | * Redistribution and use in source and binary forms, with or without |
8 | * modification, are permitted provided that the following conditions | | 8 | * modification, are permitted provided that the following conditions |
9 | * are met: | | 9 | * are met: |
10 | * 1. Redistributions of source code must retain the above copyright | | 10 | * 1. Redistributions of source code must retain the above copyright |
11 | * notice, this list of conditions and the following disclaimer. | | 11 | * notice, this list of conditions and the following disclaimer. |
12 | * 2. Redistributions in binary form must reproduce the above copyright | | 12 | * 2. Redistributions in binary form must reproduce the above copyright |
13 | * notice, this list of conditions and the following disclaimer in the | | 13 | * notice, this list of conditions and the following disclaimer in the |
14 | * documentation and/or other materials provided with the distribution. | | 14 | * documentation and/or other materials provided with the distribution. |
| @@ -23,44 +23,70 @@ | | | @@ -23,44 +23,70 @@ |
23 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | | 23 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS |
24 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | | 24 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
25 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | | 25 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR |
26 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | | 26 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS |
27 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | | 27 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
28 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | | 28 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
29 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | | 29 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
30 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | | 30 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
31 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | | 31 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
32 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | | 32 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
33 | * POSSIBILITY OF SUCH DAMAGE. | | 33 | * POSSIBILITY OF SUCH DAMAGE. |
34 | */ | | 34 | */ |
35 | | | 35 | |
| | | 36 | #include "opt_ddb.h" |
| | | 37 | |
36 | #include <sys/cdefs.h> /* RCS ID & Copyright macro defns */ | | 38 | #include <sys/cdefs.h> /* RCS ID & Copyright macro defns */ |
37 | | | 39 | |
38 | __KERNEL_RCSID(0, "$NetBSD: fp_complete.c,v 1.24 2020/09/01 08:22:36 thorpej Exp $"); | | 40 | __KERNEL_RCSID(0, "$NetBSD: fp_complete.c,v 1.25 2021/07/22 01:39:18 thorpej Exp $"); |
39 | | | 41 | |
40 | #include <sys/param.h> | | 42 | #include <sys/param.h> |
41 | #include <sys/systm.h> | | 43 | #include <sys/systm.h> |
42 | #include <sys/proc.h> | | 44 | #include <sys/proc.h> |
43 | #include <sys/atomic.h> | | 45 | #include <sys/atomic.h> |
44 | #include <sys/evcnt.h> | | 46 | #include <sys/evcnt.h> |
45 | | | 47 | |
46 | #include <machine/cpu.h> | | 48 | #include <machine/cpu.h> |
47 | #include <machine/fpu.h> | | 49 | #include <machine/fpu.h> |
48 | #include <machine/reg.h> | | 50 | #include <machine/reg.h> |
49 | #include <machine/alpha.h> | | 51 | #include <machine/alpha.h> |
50 | #include <alpha/alpha/db_instruction.h> | | 52 | #include <alpha/alpha/db_instruction.h> |
51 | | | 53 | |
52 | #include <lib/libkern/softfloat.h> | | 54 | #include <lib/libkern/softfloat.h> |
53 | | | 55 | |
| | | 56 | /* |
| | | 57 | * Validate our assumptions about bit positions. |
| | | 58 | */ |
| | | 59 | __CTASSERT(ALPHA_AESR_INV == (FP_X_INV << 1)); |
| | | 60 | __CTASSERT(ALPHA_AESR_DZE == (FP_X_DZ << 1)); |
| | | 61 | __CTASSERT(ALPHA_AESR_OVF == (FP_X_OFL << 1)); |
| | | 62 | __CTASSERT(ALPHA_AESR_UNF == (FP_X_UFL << 1)); |
| | | 63 | __CTASSERT(ALPHA_AESR_INE == (FP_X_IMP << 1)); |
| | | 64 | __CTASSERT(ALPHA_AESR_IOV == (FP_X_IOV << 1)); |
| | | 65 | |
| | | 66 | __CTASSERT(IEEE_TRAP_ENABLE_INV == (FP_X_INV << 1)); |
| | | 67 | __CTASSERT(IEEE_TRAP_ENABLE_DZE == (FP_X_DZ << 1)); |
| | | 68 | __CTASSERT(IEEE_TRAP_ENABLE_OVF == (FP_X_OFL << 1)); |
| | | 69 | __CTASSERT(IEEE_TRAP_ENABLE_UNF == (FP_X_UFL << 1)); |
| | | 70 | __CTASSERT(IEEE_TRAP_ENABLE_INE == (FP_X_IMP << 1)); |
| | | 71 | |
| | | 72 | __CTASSERT((uint64_t)FP_X_IMP << (61 - 3) == FPCR_INED); |
| | | 73 | __CTASSERT((uint64_t)FP_X_UFL << (61 - 3) == FPCR_UNFD); |
| | | 74 | __CTASSERT((uint64_t)FP_X_OFL << (49 - 0) == FPCR_OVFD); |
| | | 75 | __CTASSERT((uint64_t)FP_X_DZ << (49 - 0) == FPCR_DZED); |
| | | 76 | __CTASSERT((uint64_t)FP_X_INV << (49 - 0) == FPCR_INVD); |
| | | 77 | |
| | | 78 | __CTASSERT(FP_C_ALLBITS == MDLWP_FP_C); |
| | | 79 | |
54 | #define TSWINSIZE 4 /* size of trap shadow window in uint32_t units */ | | 80 | #define TSWINSIZE 4 /* size of trap shadow window in uint32_t units */ |
55 | | | 81 | |
56 | /* Set Name Opcodes AARM C.* Symbols */ | | 82 | /* Set Name Opcodes AARM C.* Symbols */ |
57 | | | 83 | |
58 | #define CPUREG_CLASS (0xfUL << 0x10) /* INT[ALSM] */ | | 84 | #define CPUREG_CLASS (0xfUL << 0x10) /* INT[ALSM] */ |
59 | #define FPUREG_CLASS (0xfUL << 0x14) /* ITFP, FLT[ILV] */ | | 85 | #define FPUREG_CLASS (0xfUL << 0x14) /* ITFP, FLT[ILV] */ |
60 | #define CHECKFUNCTIONCODE (1UL << 0x18) /* MISC */ | | 86 | #define CHECKFUNCTIONCODE (1UL << 0x18) /* MISC */ |
61 | #define TRAPSHADOWBOUNDARY (1UL << 0x00 | /* PAL */\ | | 87 | #define TRAPSHADOWBOUNDARY (1UL << 0x00 | /* PAL */\ |
62 | 1UL << 0x19 | /* \PAL\ */\ | | 88 | 1UL << 0x19 | /* \PAL\ */\ |
63 | 1UL << 0x1a | /* JSR */\ | | 89 | 1UL << 0x1a | /* JSR */\ |
64 | 1UL << 0x1b | /* \PAL\ */\ | | 90 | 1UL << 0x1b | /* \PAL\ */\ |
65 | 1UL << 0x1d | /* \PAL\ */\ | | 91 | 1UL << 0x1d | /* \PAL\ */\ |
66 | 1UL << 0x1e | /* \PAL\ */\ | | 92 | 1UL << 0x1e | /* \PAL\ */\ |
| @@ -341,51 +367,40 @@ fp_c_to_fpcr_1(uint64_t fpcr, uint64_t f | | | @@ -341,51 +367,40 @@ fp_c_to_fpcr_1(uint64_t fpcr, uint64_t f |
341 | * It's hard to arrange for conforming bit fields, because the FP_C | | 367 | * It's hard to arrange for conforming bit fields, because the FP_C |
342 | * and the FPCR are both architected, with specified (and relatively | | 368 | * and the FPCR are both architected, with specified (and relatively |
343 | * scrambled) bit numbers. Defining an internal unscrambled FP_C | | 369 | * scrambled) bit numbers. Defining an internal unscrambled FP_C |
344 | * wouldn't help much, because every user exception requires the | | 370 | * wouldn't help much, because every user exception requires the |
345 | * architected bit order in the sigcontext. | | 371 | * architected bit order in the sigcontext. |
346 | * | | 372 | * |
347 | * Programs that fiddle with the fpcr exception bits (instead of fp_c) | | 373 | * Programs that fiddle with the fpcr exception bits (instead of fp_c) |
348 | * will lose, because those bits can be and usually are subsetted; | | 374 | * will lose, because those bits can be and usually are subsetted; |
349 | * the official home is in the fp_c. Furthermore, the kernel puts | | 375 | * the official home is in the fp_c. Furthermore, the kernel puts |
350 | * phony enables (it lies :-) in the fpcr in order to get control when | | 376 | * phony enables (it lies :-) in the fpcr in order to get control when |
351 | * it is necessary to initially set a sticky bit. | | 377 | * it is necessary to initially set a sticky bit. |
352 | */ | | 378 | */ |
353 | | | 379 | |
354 | fpcr &= FPCR_DYN(3); | | 380 | fpcr &= FPCR_DYN_RM; |
355 | | | 381 | |
356 | /* | | 382 | /* |
357 | * enable traps = case where flag bit is clear OR program wants a trap | | 383 | * enable traps = case where flag bit is clear AND program wants a trap |
358 | * enables = ~flags | mask | | 384 | * |
| | | 385 | * enables = ~flags & mask |
359 | * disables = ~(~flags | mask) | | 386 | * disables = ~(~flags | mask) |
360 | * disables = flags & ~mask. Thank you, Augustus De Morgan (1806-1871) | | 387 | * disables = flags & ~mask. Thank you, Augustus De Morgan (1806-1871) |
361 | */ | | 388 | */ |
362 | disables = FP_C_TO_NETBSD_FLAG(fp_c) & ~FP_C_TO_NETBSD_MASK(fp_c); | | 389 | disables = FP_C_TO_NETBSD_FLAG(fp_c) & ~FP_C_TO_NETBSD_MASK(fp_c); |
363 | | | 390 | |
364 | fpcr |= (disables & (FP_X_IMP | FP_X_UFL)) << (61 - 3); | | 391 | fpcr |= (disables & (FP_X_IMP | FP_X_UFL)) << (61 - 3); |
365 | fpcr |= (disables & (FP_X_OFL | FP_X_DZ | FP_X_INV)) << (49 - 0); | | 392 | fpcr |= (disables & (FP_X_OFL | FP_X_DZ | FP_X_INV)) << (49 - 0); |
366 | | | 393 | |
367 | # if !(FP_X_INV == 1 && FP_X_DZ == 2 && FP_X_OFL == 4 && \ | | | |
368 | FP_X_UFL == 8 && FP_X_IMP == 16 && FP_X_IOV == 32 && \ | | | |
369 | FP_X_UFL << (61 - 3) == FPCR_UNFD && \ | | | |
370 | FP_X_IMP << (61 - 3) == FPCR_INED && \ | | | |
371 | FP_X_OFL << (49 - 0) == FPCR_OVFD) | | | |
372 | # error "Assertion failed" | | | |
373 | /* | | | |
374 | * We don't care about the other built-in bit numbers because they | | | |
375 | * have been architecturally specified. | | | |
376 | */ | | | |
377 | # endif | | | |
378 | | | | |
379 | fpcr |= fp_c & FP_C_MIRRORED << (FPCR_MIR_START - FP_C_MIR_START); | | 394 | fpcr |= fp_c & FP_C_MIRRORED << (FPCR_MIR_START - FP_C_MIR_START); |
380 | fpcr |= (fp_c & IEEE_MAP_DMZ) << 36; | | 395 | fpcr |= (fp_c & IEEE_MAP_DMZ) << 36; |
381 | if (fp_c & FP_C_MIRRORED) | | 396 | if (fp_c & FP_C_MIRRORED) |
382 | fpcr |= FPCR_SUM; | | 397 | fpcr |= FPCR_SUM; |
383 | if (fp_c & IEEE_MAP_UMZ) | | 398 | if (fp_c & IEEE_MAP_UMZ) |
384 | fpcr |= FPCR_UNDZ | FPCR_UNFD; | | 399 | fpcr |= FPCR_UNDZ | FPCR_UNFD; |
385 | fpcr |= (~fp_c & IEEE_TRAP_ENABLE_DNO) << 41; | | 400 | fpcr |= (~fp_c & IEEE_TRAP_ENABLE_DNO) << 41; |
386 | return fpcr; | | 401 | return fpcr; |
387 | } | | 402 | } |
388 | | | 403 | |
389 | static void | | 404 | static void |
390 | fp_c_to_fpcr(struct lwp *l) | | 405 | fp_c_to_fpcr(struct lwp *l) |
391 | { | | 406 | { |
| @@ -397,26 +412,31 @@ alpha_write_fp_c(struct lwp *l, uint64_t | | | @@ -397,26 +412,31 @@ alpha_write_fp_c(struct lwp *l, uint64_t |
397 | { | | 412 | { |
398 | uint64_t md_flags; | | 413 | uint64_t md_flags; |
399 | | | 414 | |
400 | fp_c &= MDLWP_FP_C; | | 415 | fp_c &= MDLWP_FP_C; |
401 | md_flags = l->l_md.md_flags; | | 416 | md_flags = l->l_md.md_flags; |
402 | if ((md_flags & MDLWP_FP_C) == fp_c) | | 417 | if ((md_flags & MDLWP_FP_C) == fp_c) |
403 | return; | | 418 | return; |
404 | l->l_md.md_flags = (md_flags & ~MDLWP_FP_C) | fp_c; | | 419 | l->l_md.md_flags = (md_flags & ~MDLWP_FP_C) | fp_c; |
405 | kpreempt_disable(); | | 420 | kpreempt_disable(); |
406 | if (md_flags & MDLWP_FPACTIVE) { | | 421 | if (md_flags & MDLWP_FPACTIVE) { |
407 | alpha_pal_wrfen(1); | | 422 | alpha_pal_wrfen(1); |
408 | fp_c_to_fpcr(l); | | 423 | fp_c_to_fpcr(l); |
409 | alpha_pal_wrfen(0); | | 424 | alpha_pal_wrfen(0); |
| | | 425 | } else { |
| | | 426 | struct pcb *pcb = l->l_addr; |
| | | 427 | |
| | | 428 | pcb->pcb_fp.fpr_cr = |
| | | 429 | fp_c_to_fpcr_1(pcb->pcb_fp.fpr_cr, l->l_md.md_flags); |
410 | } | | 430 | } |
411 | kpreempt_enable(); | | 431 | kpreempt_enable(); |
412 | } | | 432 | } |
413 | | | 433 | |
414 | uint64_t | | 434 | uint64_t |
415 | alpha_read_fp_c(struct lwp *l) | | 435 | alpha_read_fp_c(struct lwp *l) |
416 | { | | 436 | { |
417 | /* | | 437 | /* |
418 | * A possibly-desireable EV6-specific optimization would deviate from | | 438 | * A possibly-desireable EV6-specific optimization would deviate from |
419 | * the Alpha Architecture spec and keep some FP_C bits in the FPCR, | | 439 | * the Alpha Architecture spec and keep some FP_C bits in the FPCR, |
420 | * but in a transparent way. Some of the code for that would need to | | 440 | * but in a transparent way. Some of the code for that would need to |
421 | * go right here. | | 441 | * go right here. |
422 | */ | | 442 | */ |
| @@ -492,32 +512,67 @@ float64_unk(float64 a, float64 b) | | | @@ -492,32 +512,67 @@ float64_unk(float64 a, float64 b) |
492 | * 1 1 1 0 . . . cvtXt/g (cvtqt, cvt[dq]g only) | | 512 | * 1 1 1 0 . . . cvtXt/g (cvtqt, cvt[dq]g only) |
493 | * 1 1 1 1 . . . cvtXq/q (cvttq, cvtgq) | | 513 | * 1 1 1 1 . . . cvtXq/q (cvttq, cvtgq) |
494 | * | | | | 514 | * | | |
495 | * 15 14 13|12 11 10 09|08 07 06 05 the twilight zone | | 515 | * 15 14 13|12 11 10 09|08 07 06 05 the twilight zone |
496 | * --------======------============ | | 516 | * --------======------============ |
497 | * TRAP : RND : SRC : FUNCTION : | | 517 | * TRAP : RND : SRC : FUNCTION : |
498 | * /s /i /u x x 1 0 1 1 0 0 . . . cvtts, /siu only 0, 1, 5, 7 | | 518 | * /s /i /u x x 1 0 1 1 0 0 . . . cvtts, /siu only 0, 1, 5, 7 |
499 | * 0 1 0 1 0 1 0 1 1 0 0 . . . cvtst (src == T (!)) 2ac NOT /S | | 519 | * 0 1 0 1 0 1 0 1 1 0 0 . . . cvtst (src == T (!)) 2ac NOT /S |
500 | * 1 1 0 1 0 1 0 1 1 0 0 . . . cvtst/s (src == T (!)) 6ac | | 520 | * 1 1 0 1 0 1 0 1 1 0 0 . . . cvtst/s (src == T (!)) 6ac |
501 | * x 0 x x x x 0 1 1 1 1 . . . cvttq/_ (src == T) | | 521 | * x 0 x x x x 0 1 1 1 1 . . . cvttq/_ (src == T) |
502 | */ | | 522 | */ |
503 | | | 523 | |
504 | static void | | 524 | static void |
| | | 525 | print_fp_instruction(alpha_instruction *pc, struct lwp *l, uint32_t bits) |
| | | 526 | { |
| | | 527 | #if defined(DDB) |
| | | 528 | char buf[32]; |
| | | 529 | struct alpha_print_instruction_context ctx = { |
| | | 530 | .insn.bits = bits, |
| | | 531 | .pc = (unsigned long)pc, |
| | | 532 | .buf = buf, |
| | | 533 | .bufsize = sizeof(buf), |
| | | 534 | }; |
| | | 535 | |
| | | 536 | (void) alpha_print_instruction(&ctx); |
| | | 537 | |
| | | 538 | printf("INSN [%s:%d] @0x%lx -> %s\n", |
| | | 539 | l->l_proc->p_comm, l->l_proc->p_pid, ctx.pc, ctx.buf); |
| | | 540 | #else |
| | | 541 | alpha_instruction insn = { |
| | | 542 | .bits = bits, |
| | | 543 | }; |
| | | 544 | printf("INSN [%s:%d] @0x%lx -> opc=0x%x func=0x%x fa=%d fb=%d fc=%d\n", |
| | | 545 | l->l_proc->p_comm, l->l_proc->p_pid, (unsigned long)pc, |
| | | 546 | insn.float_format.opcode, insn.float_format.function, |
| | | 547 | insn.float_format.fa, insn.float_format.fb, insn.float_format.fc); |
| | | 548 | printf("INSN [%s:%d] @0x%lx -> trp=0x%x rnd=0x%x src=0x%x fn=0x%x\n", |
| | | 549 | l->l_proc->p_comm, l->l_proc->p_pid, (unsigned long)pc, |
| | | 550 | insn.float_detail.trp, insn.float_detail.rnd, |
| | | 551 | insn.float_detail.src, insn.float_detail.opclass); |
| | | 552 | #endif /* DDB */ |
| | | 553 | } |
| | | 554 | |
| | | 555 | static void |
505 | alpha_fp_interpret(alpha_instruction *pc, struct lwp *l, uint32_t bits) | | 556 | alpha_fp_interpret(alpha_instruction *pc, struct lwp *l, uint32_t bits) |
506 | { | | 557 | { |
507 | s_float sfa, sfb, sfc; | | 558 | s_float sfa, sfb, sfc; |
508 | t_float tfa, tfb, tfc; | | 559 | t_float tfa, tfb, tfc; |
509 | alpha_instruction inst; | | 560 | alpha_instruction inst; |
510 | | | 561 | |
| | | 562 | if (alpha_fp_complete_debug) { |
| | | 563 | print_fp_instruction(pc, l, bits); |
| | | 564 | } |
| | | 565 | |
511 | inst.bits = bits; | | 566 | inst.bits = bits; |
512 | switch(inst.generic_format.opcode) { | | 567 | switch(inst.generic_format.opcode) { |
513 | default: | | 568 | default: |
514 | /* this "cannot happen" */ | | 569 | /* this "cannot happen" */ |
515 | this_cannot_happen(2, inst.bits); | | 570 | this_cannot_happen(2, inst.bits); |
516 | return; | | 571 | return; |
517 | case op_any_float: | | 572 | case op_any_float: |
518 | if (inst.float_format.function == op_cvtql_sv || | | 573 | if (inst.float_format.function == op_cvtql_sv || |
519 | inst.float_format.function == op_cvtql_v) { | | 574 | inst.float_format.function == op_cvtql_v) { |
520 | alpha_stt(inst.float_detail.fb, &tfb); | | 575 | alpha_stt(inst.float_detail.fb, &tfb); |
521 | sfc.i = (int64_t)tfb.i >= 0L ? INT_MAX : INT_MIN; | | 576 | sfc.i = (int64_t)tfb.i >= 0L ? INT_MAX : INT_MIN; |
522 | alpha_lds(inst.float_detail.fc, &sfc); | | 577 | alpha_lds(inst.float_detail.fc, &sfc); |
523 | float_raise(FP_X_INV); | | 578 | float_raise(FP_X_INV); |
| @@ -578,35 +633,41 @@ alpha_fp_complete_at(alpha_instruction * | | | @@ -578,35 +633,41 @@ alpha_fp_complete_at(alpha_instruction * |
578 | uint64_t rm, fpcr, orig_fpcr; | | 633 | uint64_t rm, fpcr, orig_fpcr; |
579 | uint64_t orig_flags, new_flags, changed_flags, md_flags; | | 634 | uint64_t orig_flags, new_flags, changed_flags, md_flags; |
580 | | | 635 | |
581 | if (__predict_false(copyin(trigger_pc, &inst, sizeof inst))) { | | 636 | if (__predict_false(copyin(trigger_pc, &inst, sizeof inst))) { |
582 | this_cannot_happen(6, -1); | | 637 | this_cannot_happen(6, -1); |
583 | return SIGSEGV; | | 638 | return SIGSEGV; |
584 | } | | 639 | } |
585 | kpreempt_disable(); | | 640 | kpreempt_disable(); |
586 | if ((curlwp->l_md.md_flags & MDLWP_FPACTIVE) == 0) { | | 641 | if ((curlwp->l_md.md_flags & MDLWP_FPACTIVE) == 0) { |
587 | fpu_load(); | | 642 | fpu_load(); |
588 | } | | 643 | } |
589 | alpha_pal_wrfen(1); | | 644 | alpha_pal_wrfen(1); |
590 | /* | | 645 | /* |
591 | * If necessary, lie about the dynamic rounding mode so emulation | | 646 | * Alpha FLOAT instructions can override the rounding mode on a |
592 | * software need go to only one place for it, and so we don't have to | | 647 | * per-instruction basis. If necessary, lie about the dynamic |
593 | * lock any memory locations or pass a third parameter to every | | 648 | * rounding mode so emulation software need go to only one place |
594 | * SoftFloat entry point. | | 649 | * for it, and so we don't have to lock any memory locations or |
| | | 650 | * pass a third parameter to every SoftFloat entry point. |
| | | 651 | * |
| | | 652 | * N.B. the rounding mode field of the the FLOAT format instructions |
| | | 653 | * matches that of the FPCR *except* for the value 3, which means |
| | | 654 | * "dynamic" rounding mode (i.e. what is programmed into the FPCR). |
595 | */ | | 655 | */ |
596 | orig_fpcr = fpcr = alpha_read_fpcr(); | | 656 | orig_fpcr = fpcr = alpha_read_fpcr(); |
597 | rm = inst.float_detail.rnd; | | 657 | rm = inst.float_detail.rnd; |
598 | if (__predict_false(rm != 3 /* dynamic */ && rm != (fpcr >> 58 & 3))) { | | 658 | if (__predict_false(rm != 3 /* dynamic */ && |
599 | fpcr = (fpcr & ~FPCR_DYN(3)) | FPCR_DYN(rm); | | 659 | rm != __SHIFTOUT(fpcr, FPCR_DYN_RM))) { |
| | | 660 | fpcr = (fpcr & ~FPCR_DYN_RM) | __SHIFTIN(rm, FPCR_DYN_RM); |
600 | alpha_write_fpcr(fpcr); | | 661 | alpha_write_fpcr(fpcr); |
601 | } | | 662 | } |
602 | orig_flags = FP_C_TO_NETBSD_FLAG(l->l_md.md_flags); | | 663 | orig_flags = FP_C_TO_NETBSD_FLAG(l->l_md.md_flags); |
603 | | | 664 | |
604 | alpha_fp_interpret(trigger_pc, l, inst.bits); | | 665 | alpha_fp_interpret(trigger_pc, l, inst.bits); |
605 | | | 666 | |
606 | md_flags = l->l_md.md_flags; | | 667 | md_flags = l->l_md.md_flags; |
607 | | | 668 | |
608 | new_flags = FP_C_TO_NETBSD_FLAG(md_flags); | | 669 | new_flags = FP_C_TO_NETBSD_FLAG(md_flags); |
609 | changed_flags = orig_flags ^ new_flags; | | 670 | changed_flags = orig_flags ^ new_flags; |
610 | KASSERT((orig_flags | changed_flags) == new_flags); /* panic on 1->0 */ | | 671 | KASSERT((orig_flags | changed_flags) == new_flags); /* panic on 1->0 */ |
611 | alpha_write_fpcr(fp_c_to_fpcr_1(orig_fpcr, md_flags)); | | 672 | alpha_write_fpcr(fp_c_to_fpcr_1(orig_fpcr, md_flags)); |
612 | needsig = changed_flags & FP_C_TO_NETBSD_MASK(md_flags); | | 673 | needsig = changed_flags & FP_C_TO_NETBSD_MASK(md_flags); |
| @@ -620,64 +681,82 @@ alpha_fp_complete_at(alpha_instruction * | | | @@ -620,64 +681,82 @@ alpha_fp_complete_at(alpha_instruction * |
620 | } | | 681 | } |
621 | | | 682 | |
622 | int | | 683 | int |
623 | alpha_fp_complete(u_long a0, u_long a1, struct lwp *l, uint64_t *ucode) | | 684 | alpha_fp_complete(u_long a0, u_long a1, struct lwp *l, uint64_t *ucode) |
624 | { | | 685 | { |
625 | int t; | | 686 | int t; |
626 | int sig; | | 687 | int sig; |
627 | uint64_t op_class; | | 688 | uint64_t op_class; |
628 | alpha_instruction inst; | | 689 | alpha_instruction inst; |
629 | /* "trigger_pc" is Compaq's term for the earliest faulting op */ | | 690 | /* "trigger_pc" is Compaq's term for the earliest faulting op */ |
630 | alpha_instruction *trigger_pc, *usertrap_pc; | | 691 | alpha_instruction *trigger_pc, *usertrap_pc; |
631 | alpha_instruction *pc, *win_begin, tsw[TSWINSIZE]; | | 692 | alpha_instruction *pc, *win_begin, tsw[TSWINSIZE]; |
632 | | | 693 | |
633 | sig = SIGFPE; | | 694 | if (alpha_fp_complete_debug) { |
| | | 695 | printf("%s: [%s:%d] a0[AESR]=0x%lx a1[regmask]=0x%lx " |
| | | 696 | "FPCR=0x%lx FP_C=0x%lx\n", |
| | | 697 | __func__, l->l_proc->p_comm, l->l_proc->p_pid, |
| | | 698 | a0, a1, alpha_read_fpcr(), |
| | | 699 | l->l_md.md_flags & (MDLWP_FP_C|MDLWP_FPACTIVE)); |
| | | 700 | } |
| | | 701 | |
634 | pc = (alpha_instruction *)l->l_md.md_tf->tf_regs[FRAME_PC]; | | 702 | pc = (alpha_instruction *)l->l_md.md_tf->tf_regs[FRAME_PC]; |
635 | trigger_pc = pc - 1; /* for ALPHA_AMASK_PAT case */ | | 703 | trigger_pc = pc - 1; /* for ALPHA_AMASK_PAT case */ |
| | | 704 | |
| | | 705 | /* |
| | | 706 | * Start out with the code mirroring the exception flags |
| | | 707 | * (FP_X_*). Shift right 1 bit to discard SWC to achive |
| | | 708 | * this. |
| | | 709 | */ |
| | | 710 | *ucode = a0 >> 1; |
| | | 711 | |
636 | if (cpu_amask & ALPHA_AMASK_PAT) { | | 712 | if (cpu_amask & ALPHA_AMASK_PAT) { |
637 | /* SWC | INV */ | | 713 | if ((a0 & (ALPHA_AESR_SWC | ALPHA_AESR_INV)) != 0 || |
638 | if (a0 & 3 || alpha_fp_sync_complete) { | | 714 | alpha_fp_sync_complete) { |
639 | sig = alpha_fp_complete_at(trigger_pc, l, ucode); | | 715 | sig = alpha_fp_complete_at(trigger_pc, l, ucode); |
640 | goto done; | | 716 | goto resolved; |
641 | } | | 717 | } |
642 | } | | 718 | } |
643 | *ucode = a0; | | 719 | if ((a0 & (ALPHA_AESR_SWC | ALPHA_AESR_INV)) == 0) |
644 | /* SWC | INV */ | | 720 | goto unresolved; |
645 | if (!(a0 & 3)) | | | |
646 | return sig; | | | |
647 | /* | | 721 | /* |
648 | * At this point we are somewhere in the trap shadow of one or more instruc- | | 722 | * At this point we are somewhere in the trap shadow of one or more instruc- |
649 | * tions that have trapped with software completion specified. We have a mask | | 723 | * tions that have trapped with software completion specified. We have a mask |
650 | * of the registers written by trapping instructions. | | 724 | * of the registers written by trapping instructions. |
651 | * | | 725 | * |
652 | * Now step backwards through the trap shadow, clearing bits in the | | 726 | * Now step backwards through the trap shadow, clearing bits in the |
653 | * destination write mask until the trigger instruction is found, and | | 727 | * destination write mask until the trigger instruction is found, and |
654 | * interpret this one instruction in SW. If a SIGFPE is not required, back up | | 728 | * interpret this one instruction in SW. If a SIGFPE is not required, back up |
655 | * the PC until just after this instruction and restart. This will execute all | | 729 | * the PC until just after this instruction and restart. This will execute all |
656 | * trap shadow instructions between the trigger pc and the trap pc twice. | | 730 | * trap shadow instructions between the trigger pc and the trap pc twice. |
657 | */ | | 731 | */ |
658 | trigger_pc = 0; | | 732 | trigger_pc = 0; |
659 | win_begin = pc; | | 733 | win_begin = pc; |
660 | ++alpha_shadow.scans; | | 734 | ++alpha_shadow.scans; |
661 | t = alpha_shadow.len; | | 735 | t = alpha_shadow.len; |
662 | for (--pc; a1; --pc) { | | 736 | for (--pc; a1; --pc) { |
663 | ++alpha_shadow.len; | | 737 | ++alpha_shadow.len; |
664 | if (pc < win_begin) { | | 738 | if (pc < win_begin) { |
665 | win_begin = pc - TSWINSIZE + 1; | | 739 | win_begin = pc - TSWINSIZE + 1; |
666 | if (copyin(win_begin, tsw, sizeof tsw)) { | | 740 | if (copyin(win_begin, tsw, sizeof tsw)) { |
667 | /* sigh, try to get just one */ | | 741 | /* sigh, try to get just one */ |
668 | win_begin = pc; | | 742 | win_begin = pc; |
669 | if (copyin(win_begin, tsw, 4)) | | 743 | if (copyin(win_begin, tsw, 4)) { |
| | | 744 | /* |
| | | 745 | * We're off the rails here; don't |
| | | 746 | * bother updating the FP_C. |
| | | 747 | */ |
670 | return SIGSEGV; | | 748 | return SIGSEGV; |
| | | 749 | } |
671 | } | | 750 | } |
672 | } | | 751 | } |
673 | assert(win_begin <= pc && !((long)pc & 3)); | | 752 | assert(win_begin <= pc && !((long)pc & 3)); |
674 | inst = tsw[pc - win_begin]; | | 753 | inst = tsw[pc - win_begin]; |
675 | op_class = 1UL << inst.generic_format.opcode; | | 754 | op_class = 1UL << inst.generic_format.opcode; |
676 | if (op_class & FPUREG_CLASS) { | | 755 | if (op_class & FPUREG_CLASS) { |
677 | a1 &= ~(1UL << (inst.operate_generic_format.rc + 32)); | | 756 | a1 &= ~(1UL << (inst.operate_generic_format.rc + 32)); |
678 | trigger_pc = pc; | | 757 | trigger_pc = pc; |
679 | } else if (op_class & CPUREG_CLASS) { | | 758 | } else if (op_class & CPUREG_CLASS) { |
680 | a1 &= ~(1UL << inst.operate_generic_format.rc); | | 759 | a1 &= ~(1UL << inst.operate_generic_format.rc); |
681 | trigger_pc = pc; | | 760 | trigger_pc = pc; |
682 | } else if (op_class & TRAPSHADOWBOUNDARY) { | | 761 | } else if (op_class & TRAPSHADOWBOUNDARY) { |
683 | if (op_class & CHECKFUNCTIONCODE) { | | 762 | if (op_class & CHECKFUNCTIONCODE) { |
| @@ -685,37 +764,68 @@ alpha_fp_complete(u_long a0, u_long a1, | | | @@ -685,37 +764,68 @@ alpha_fp_complete(u_long a0, u_long a1, |
685 | inst.mem_format.displacement == op_excb) | | 764 | inst.mem_format.displacement == op_excb) |
686 | break; /* code breaks AARM rules */ | | 765 | break; /* code breaks AARM rules */ |
687 | } else | | 766 | } else |
688 | break; /* code breaks AARM rules */ | | 767 | break; /* code breaks AARM rules */ |
689 | } | | 768 | } |
690 | /* Some shadow-safe op, probably load, store, or FPTI class */ | | 769 | /* Some shadow-safe op, probably load, store, or FPTI class */ |
691 | } | | 770 | } |
692 | t = alpha_shadow.len - t; | | 771 | t = alpha_shadow.len - t; |
693 | if (t > alpha_shadow.max) | | 772 | if (t > alpha_shadow.max) |
694 | alpha_shadow.max = t; | | 773 | alpha_shadow.max = t; |
695 | if (__predict_true(trigger_pc != 0 && a1 == 0)) { | | 774 | if (__predict_true(trigger_pc != 0 && a1 == 0)) { |
696 | ++alpha_shadow.resolved; | | 775 | ++alpha_shadow.resolved; |
697 | sig = alpha_fp_complete_at(trigger_pc, l, ucode); | | 776 | sig = alpha_fp_complete_at(trigger_pc, l, ucode); |
| | | 777 | goto resolved; |
698 | } else { | | 778 | } else { |
699 | ++alpha_shadow.unresolved; | | 779 | ++alpha_shadow.unresolved; |
700 | return sig; | | | |
701 | } | | 780 | } |
702 | done: | | 781 | |
| | | 782 | unresolved: /* obligatory statement */; |
| | | 783 | /* |
| | | 784 | * *ucode contains the exception bits (FP_X_*). We need to |
| | | 785 | * update the FP_C and FPCR, and send a signal for any new |
| | | 786 | * trap that is enabled. |
| | | 787 | */ |
| | | 788 | uint64_t orig_flags = FP_C_TO_NETBSD_FLAG(l->l_md.md_flags); |
| | | 789 | uint64_t new_flags = orig_flags | *ucode; |
| | | 790 | uint64_t changed_flags = orig_flags ^ new_flags; |
| | | 791 | KASSERT((orig_flags | changed_flags) == new_flags); /* panic on 1->0 */ |
| | | 792 | |
| | | 793 | l->l_md.md_flags |= NETBSD_FLAG_TO_FP_C(new_flags); |
| | | 794 | |
| | | 795 | kpreempt_disable(); |
| | | 796 | if ((curlwp->l_md.md_flags & MDLWP_FPACTIVE) == 0) { |
| | | 797 | fpu_load(); |
| | | 798 | } |
| | | 799 | alpha_pal_wrfen(1); |
| | | 800 | uint64_t orig_fpcr = alpha_read_fpcr(); |
| | | 801 | alpha_write_fpcr(fp_c_to_fpcr_1(orig_fpcr, l->l_md.md_flags)); |
| | | 802 | uint64_t needsig = |
| | | 803 | changed_flags & FP_C_TO_NETBSD_MASK(l->l_md.md_flags); |
| | | 804 | alpha_pal_wrfen(0); |
| | | 805 | kpreempt_enable(); |
| | | 806 | |
| | | 807 | if (__predict_false(needsig)) { |
| | | 808 | *ucode = needsig; |
| | | 809 | return SIGFPE; |
| | | 810 | } |
| | | 811 | return 0; |
| | | 812 | |
| | | 813 | resolved: |
703 | if (sig) { | | 814 | if (sig) { |
704 | usertrap_pc = trigger_pc + 1; | | 815 | usertrap_pc = trigger_pc + 1; |
705 | l->l_md.md_tf->tf_regs[FRAME_PC] = (unsigned long)usertrap_pc; | | 816 | l->l_md.md_tf->tf_regs[FRAME_PC] = (unsigned long)usertrap_pc; |
706 | return sig; | | | |
707 | } | | 817 | } |
708 | return 0; | | 818 | return sig; |
709 | } | | 819 | } |
710 | | | 820 | |
711 | /* | | 821 | /* |
712 | * Load the float-point context for the current lwp. | | 822 | * Load the float-point context for the current lwp. |
713 | */ | | 823 | */ |
714 | void | | 824 | void |
715 | fpu_state_load(struct lwp *l, u_int flags) | | 825 | fpu_state_load(struct lwp *l, u_int flags) |
716 | { | | 826 | { |
717 | struct pcb * const pcb = lwp_getpcb(l); | | 827 | struct pcb * const pcb = lwp_getpcb(l); |
718 | KASSERT(l == curlwp); | | 828 | KASSERT(l == curlwp); |
719 | | | 829 | |
720 | #ifdef MULTIPROCESSOR | | 830 | #ifdef MULTIPROCESSOR |
721 | /* | | 831 | /* |
| @@ -736,42 +846,52 @@ fpu_state_load(struct lwp *l, u_int flag | | | @@ -736,42 +846,52 @@ fpu_state_load(struct lwp *l, u_int flag |
736 | * Instrument FP usage -- if a process had not previously | | 846 | * Instrument FP usage -- if a process had not previously |
737 | * used FP, mark it as having used FP for the first time, | | 847 | * used FP, mark it as having used FP for the first time, |
738 | * and count this event. | | 848 | * and count this event. |
739 | * | | 849 | * |
740 | * If a process has used FP, count a "used FP, and took | | 850 | * If a process has used FP, count a "used FP, and took |
741 | * a trap to use it again" event. | | 851 | * a trap to use it again" event. |
742 | */ | | 852 | */ |
743 | if ((flags & PCU_VALID) == 0) { | | 853 | if ((flags & PCU_VALID) == 0) { |
744 | atomic_inc_ulong(&fpevent_use.ev_count); | | 854 | atomic_inc_ulong(&fpevent_use.ev_count); |
745 | } else { | | 855 | } else { |
746 | atomic_inc_ulong(&fpevent_reuse.ev_count); | | 856 | atomic_inc_ulong(&fpevent_reuse.ev_count); |
747 | } | | 857 | } |
748 | | | 858 | |
| | | 859 | if (alpha_fp_complete_debug) { |
| | | 860 | printf("%s: [%s:%d] loading FPCR=0x%lx\n", |
| | | 861 | __func__, l->l_proc->p_comm, l->l_proc->p_pid, |
| | | 862 | pcb->pcb_fp.fpr_cr); |
| | | 863 | } |
749 | alpha_pal_wrfen(1); | | 864 | alpha_pal_wrfen(1); |
750 | restorefpstate(&pcb->pcb_fp); | | 865 | restorefpstate(&pcb->pcb_fp); |
751 | alpha_pal_wrfen(0); | | 866 | alpha_pal_wrfen(0); |
752 | | | 867 | |
753 | l->l_md.md_flags |= MDLWP_FPACTIVE; | | 868 | l->l_md.md_flags |= MDLWP_FPACTIVE; |
754 | } | | 869 | } |
755 | | | 870 | |
756 | /* | | 871 | /* |
757 | * Save the FPU state. | | 872 | * Save the FPU state. |
758 | */ | | 873 | */ |
759 | | | 874 | |
760 | void | | 875 | void |
761 | fpu_state_save(struct lwp *l) | | 876 | fpu_state_save(struct lwp *l) |
762 | { | | 877 | { |
763 | struct pcb * const pcb = lwp_getpcb(l); | | 878 | struct pcb * const pcb = lwp_getpcb(l); |
764 | | | 879 | |
765 | alpha_pal_wrfen(1); | | 880 | alpha_pal_wrfen(1); |
766 | savefpstate(&pcb->pcb_fp); | | 881 | savefpstate(&pcb->pcb_fp); |
767 | alpha_pal_wrfen(0); | | 882 | alpha_pal_wrfen(0); |
| | | 883 | if (alpha_fp_complete_debug) { |
| | | 884 | printf("%s: [%s:%d] saved FPCR=0x%lx\n", |
| | | 885 | __func__, l->l_proc->p_comm, l->l_proc->p_pid, |
| | | 886 | pcb->pcb_fp.fpr_cr); |
| | | 887 | } |
768 | } | | 888 | } |
769 | | | 889 | |
770 | /* | | 890 | /* |
771 | * Release the FPU. | | 891 | * Release the FPU. |
772 | */ | | 892 | */ |
773 | void | | 893 | void |
774 | fpu_state_release(struct lwp *l) | | 894 | fpu_state_release(struct lwp *l) |
775 | { | | 895 | { |
776 | l->l_md.md_flags &= ~MDLWP_FPACTIVE; | | 896 | l->l_md.md_flags &= ~MDLWP_FPACTIVE; |
777 | } | | 897 | } |