Thu Jul 22 01:39:18 2021 UTC ()
Various minor cleanups and bug fixes to the FP software completion code:
- Use __CTASSERT() instead of rolling our own compile-time assertion
  using cpp.
- Use __BIT() &c instead of rolling our own.
- Improve some comments.
- Define a default FP_C and FPCR value that is self-consistent, and
  initialize it properly at process creation time.
- Fix signal information when the trap shadow cannot be resolved.
- Use defined constants rather than magic numbers for the exception
  summary bits.
- Add a machdep sysctl to enable FP software-completion debugging.


(thorpej)
diff -r1.24 -r1.25 src/sys/arch/alpha/alpha/fp_complete.c
diff -r1.374 -r1.375 src/sys/arch/alpha/alpha/machdep.c
diff -r1.102 -r1.103 src/sys/arch/alpha/include/cpu.h
diff -r1.7 -r1.8 src/sys/arch/alpha/include/fpu.h

cvs diff -r1.24 -r1.25 src/sys/arch/alpha/alpha/fp_complete.c (expand / switch to unified diff)

--- src/sys/arch/alpha/alpha/fp_complete.c 2020/09/01 08:22:36 1.24
+++ src/sys/arch/alpha/alpha/fp_complete.c 2021/07/22 01:39:18 1.25
@@ -1,14 +1,14 @@ @@ -1,14 +1,14 @@
1/* $NetBSD: fp_complete.c,v 1.24 2020/09/01 08:22:36 thorpej Exp $ */ 1/* $NetBSD: fp_complete.c,v 1.25 2021/07/22 01:39:18 thorpej Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 2001 Ross Harvey 4 * Copyright (c) 2001 Ross Harvey
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * Redistribution and use in source and binary forms, with or without 7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions 8 * modification, are permitted provided that the following conditions
9 * are met: 9 * are met:
10 * 1. Redistributions of source code must retain the above copyright 10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer. 11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright 12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the 13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution. 14 * documentation and/or other materials provided with the distribution.
@@ -23,44 +23,70 @@ @@ -23,44 +23,70 @@
23 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 23 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
25 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 25 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
26 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 26 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
27 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 27 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 31 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGE. 33 * POSSIBILITY OF SUCH DAMAGE.
34 */ 34 */
35 35
 36#include "opt_ddb.h"
 37
36#include <sys/cdefs.h> /* RCS ID & Copyright macro defns */ 38#include <sys/cdefs.h> /* RCS ID & Copyright macro defns */
37 39
38__KERNEL_RCSID(0, "$NetBSD: fp_complete.c,v 1.24 2020/09/01 08:22:36 thorpej Exp $"); 40__KERNEL_RCSID(0, "$NetBSD: fp_complete.c,v 1.25 2021/07/22 01:39:18 thorpej Exp $");
39 41
40#include <sys/param.h> 42#include <sys/param.h>
41#include <sys/systm.h> 43#include <sys/systm.h>
42#include <sys/proc.h> 44#include <sys/proc.h>
43#include <sys/atomic.h> 45#include <sys/atomic.h>
44#include <sys/evcnt.h> 46#include <sys/evcnt.h>
45 47
46#include <machine/cpu.h> 48#include <machine/cpu.h>
47#include <machine/fpu.h> 49#include <machine/fpu.h>
48#include <machine/reg.h> 50#include <machine/reg.h>
49#include <machine/alpha.h> 51#include <machine/alpha.h>
50#include <alpha/alpha/db_instruction.h> 52#include <alpha/alpha/db_instruction.h>
51 53
52#include <lib/libkern/softfloat.h> 54#include <lib/libkern/softfloat.h>
53 55
 56/*
 57 * Validate our assumptions about bit positions.
 58 */
 59__CTASSERT(ALPHA_AESR_INV == (FP_X_INV << 1));
 60__CTASSERT(ALPHA_AESR_DZE == (FP_X_DZ << 1));
 61__CTASSERT(ALPHA_AESR_OVF == (FP_X_OFL << 1));
 62__CTASSERT(ALPHA_AESR_UNF == (FP_X_UFL << 1));
 63__CTASSERT(ALPHA_AESR_INE == (FP_X_IMP << 1));
 64__CTASSERT(ALPHA_AESR_IOV == (FP_X_IOV << 1));
 65
 66__CTASSERT(IEEE_TRAP_ENABLE_INV == (FP_X_INV << 1));
 67__CTASSERT(IEEE_TRAP_ENABLE_DZE == (FP_X_DZ << 1));
 68__CTASSERT(IEEE_TRAP_ENABLE_OVF == (FP_X_OFL << 1));
 69__CTASSERT(IEEE_TRAP_ENABLE_UNF == (FP_X_UFL << 1));
 70__CTASSERT(IEEE_TRAP_ENABLE_INE == (FP_X_IMP << 1));
 71
 72__CTASSERT((uint64_t)FP_X_IMP << (61 - 3) == FPCR_INED);
 73__CTASSERT((uint64_t)FP_X_UFL << (61 - 3) == FPCR_UNFD);
 74__CTASSERT((uint64_t)FP_X_OFL << (49 - 0) == FPCR_OVFD);
 75__CTASSERT((uint64_t)FP_X_DZ << (49 - 0) == FPCR_DZED);
 76__CTASSERT((uint64_t)FP_X_INV << (49 - 0) == FPCR_INVD);
 77
 78__CTASSERT(FP_C_ALLBITS == MDLWP_FP_C);
 79
54#define TSWINSIZE 4 /* size of trap shadow window in uint32_t units */ 80#define TSWINSIZE 4 /* size of trap shadow window in uint32_t units */
55 81
56/* Set Name Opcodes AARM C.* Symbols */ 82/* Set Name Opcodes AARM C.* Symbols */
57 83
58#define CPUREG_CLASS (0xfUL << 0x10) /* INT[ALSM] */ 84#define CPUREG_CLASS (0xfUL << 0x10) /* INT[ALSM] */
59#define FPUREG_CLASS (0xfUL << 0x14) /* ITFP, FLT[ILV] */ 85#define FPUREG_CLASS (0xfUL << 0x14) /* ITFP, FLT[ILV] */
60#define CHECKFUNCTIONCODE (1UL << 0x18) /* MISC */ 86#define CHECKFUNCTIONCODE (1UL << 0x18) /* MISC */
61#define TRAPSHADOWBOUNDARY (1UL << 0x00 | /* PAL */\ 87#define TRAPSHADOWBOUNDARY (1UL << 0x00 | /* PAL */\
62 1UL << 0x19 | /* \PAL\ */\ 88 1UL << 0x19 | /* \PAL\ */\
63 1UL << 0x1a | /* JSR */\ 89 1UL << 0x1a | /* JSR */\
64 1UL << 0x1b | /* \PAL\ */\ 90 1UL << 0x1b | /* \PAL\ */\
65 1UL << 0x1d | /* \PAL\ */\ 91 1UL << 0x1d | /* \PAL\ */\
66 1UL << 0x1e | /* \PAL\ */\ 92 1UL << 0x1e | /* \PAL\ */\
@@ -341,51 +367,40 @@ fp_c_to_fpcr_1(uint64_t fpcr, uint64_t f @@ -341,51 +367,40 @@ fp_c_to_fpcr_1(uint64_t fpcr, uint64_t f
341 * It's hard to arrange for conforming bit fields, because the FP_C 367 * It's hard to arrange for conforming bit fields, because the FP_C
342 * and the FPCR are both architected, with specified (and relatively 368 * and the FPCR are both architected, with specified (and relatively
343 * scrambled) bit numbers. Defining an internal unscrambled FP_C 369 * scrambled) bit numbers. Defining an internal unscrambled FP_C
344 * wouldn't help much, because every user exception requires the 370 * wouldn't help much, because every user exception requires the
345 * architected bit order in the sigcontext. 371 * architected bit order in the sigcontext.
346 * 372 *
347 * Programs that fiddle with the fpcr exception bits (instead of fp_c) 373 * Programs that fiddle with the fpcr exception bits (instead of fp_c)
348 * will lose, because those bits can be and usually are subsetted; 374 * will lose, because those bits can be and usually are subsetted;
349 * the official home is in the fp_c. Furthermore, the kernel puts 375 * the official home is in the fp_c. Furthermore, the kernel puts
350 * phony enables (it lies :-) in the fpcr in order to get control when 376 * phony enables (it lies :-) in the fpcr in order to get control when
351 * it is necessary to initially set a sticky bit. 377 * it is necessary to initially set a sticky bit.
352 */ 378 */
353 379
354 fpcr &= FPCR_DYN(3); 380 fpcr &= FPCR_DYN_RM;
355 381
356 /* 382 /*
357 * enable traps = case where flag bit is clear OR program wants a trap 383 * enable traps = case where flag bit is clear AND program wants a trap
358 * enables = ~flags | mask 384 *
 385 * enables = ~flags & mask
359 * disables = ~(~flags | mask) 386 * disables = ~(~flags | mask)
360 * disables = flags & ~mask. Thank you, Augustus De Morgan (1806-1871) 387 * disables = flags & ~mask. Thank you, Augustus De Morgan (1806-1871)
361 */ 388 */
362 disables = FP_C_TO_NETBSD_FLAG(fp_c) & ~FP_C_TO_NETBSD_MASK(fp_c); 389 disables = FP_C_TO_NETBSD_FLAG(fp_c) & ~FP_C_TO_NETBSD_MASK(fp_c);
363 390
364 fpcr |= (disables & (FP_X_IMP | FP_X_UFL)) << (61 - 3); 391 fpcr |= (disables & (FP_X_IMP | FP_X_UFL)) << (61 - 3);
365 fpcr |= (disables & (FP_X_OFL | FP_X_DZ | FP_X_INV)) << (49 - 0); 392 fpcr |= (disables & (FP_X_OFL | FP_X_DZ | FP_X_INV)) << (49 - 0);
366 393
367# if !(FP_X_INV == 1 && FP_X_DZ == 2 && FP_X_OFL == 4 && \ 
368 FP_X_UFL == 8 && FP_X_IMP == 16 && FP_X_IOV == 32 && \ 
369 FP_X_UFL << (61 - 3) == FPCR_UNFD && \ 
370 FP_X_IMP << (61 - 3) == FPCR_INED && \ 
371 FP_X_OFL << (49 - 0) == FPCR_OVFD) 
372# error "Assertion failed" 
373 /* 
374 * We don't care about the other built-in bit numbers because they 
375 * have been architecturally specified. 
376 */ 
377# endif 
378 
379 fpcr |= fp_c & FP_C_MIRRORED << (FPCR_MIR_START - FP_C_MIR_START); 394 fpcr |= fp_c & FP_C_MIRRORED << (FPCR_MIR_START - FP_C_MIR_START);
380 fpcr |= (fp_c & IEEE_MAP_DMZ) << 36; 395 fpcr |= (fp_c & IEEE_MAP_DMZ) << 36;
381 if (fp_c & FP_C_MIRRORED) 396 if (fp_c & FP_C_MIRRORED)
382 fpcr |= FPCR_SUM; 397 fpcr |= FPCR_SUM;
383 if (fp_c & IEEE_MAP_UMZ) 398 if (fp_c & IEEE_MAP_UMZ)
384 fpcr |= FPCR_UNDZ | FPCR_UNFD; 399 fpcr |= FPCR_UNDZ | FPCR_UNFD;
385 fpcr |= (~fp_c & IEEE_TRAP_ENABLE_DNO) << 41; 400 fpcr |= (~fp_c & IEEE_TRAP_ENABLE_DNO) << 41;
386 return fpcr; 401 return fpcr;
387} 402}
388 403
389static void 404static void
390fp_c_to_fpcr(struct lwp *l) 405fp_c_to_fpcr(struct lwp *l)
391{ 406{
@@ -397,26 +412,31 @@ alpha_write_fp_c(struct lwp *l, uint64_t @@ -397,26 +412,31 @@ alpha_write_fp_c(struct lwp *l, uint64_t
397{ 412{
398 uint64_t md_flags; 413 uint64_t md_flags;
399 414
400 fp_c &= MDLWP_FP_C; 415 fp_c &= MDLWP_FP_C;
401 md_flags = l->l_md.md_flags; 416 md_flags = l->l_md.md_flags;
402 if ((md_flags & MDLWP_FP_C) == fp_c) 417 if ((md_flags & MDLWP_FP_C) == fp_c)
403 return; 418 return;
404 l->l_md.md_flags = (md_flags & ~MDLWP_FP_C) | fp_c; 419 l->l_md.md_flags = (md_flags & ~MDLWP_FP_C) | fp_c;
405 kpreempt_disable(); 420 kpreempt_disable();
406 if (md_flags & MDLWP_FPACTIVE) { 421 if (md_flags & MDLWP_FPACTIVE) {
407 alpha_pal_wrfen(1); 422 alpha_pal_wrfen(1);
408 fp_c_to_fpcr(l); 423 fp_c_to_fpcr(l);
409 alpha_pal_wrfen(0); 424 alpha_pal_wrfen(0);
 425 } else {
 426 struct pcb *pcb = l->l_addr;
 427
 428 pcb->pcb_fp.fpr_cr =
 429 fp_c_to_fpcr_1(pcb->pcb_fp.fpr_cr, l->l_md.md_flags);
410 } 430 }
411 kpreempt_enable(); 431 kpreempt_enable();
412} 432}
413 433
414uint64_t 434uint64_t
415alpha_read_fp_c(struct lwp *l) 435alpha_read_fp_c(struct lwp *l)
416{ 436{
417 /* 437 /*
418 * A possibly-desireable EV6-specific optimization would deviate from 438 * A possibly-desireable EV6-specific optimization would deviate from
419 * the Alpha Architecture spec and keep some FP_C bits in the FPCR, 439 * the Alpha Architecture spec and keep some FP_C bits in the FPCR,
420 * but in a transparent way. Some of the code for that would need to 440 * but in a transparent way. Some of the code for that would need to
421 * go right here. 441 * go right here.
422 */ 442 */
@@ -492,32 +512,67 @@ float64_unk(float64 a, float64 b) @@ -492,32 +512,67 @@ float64_unk(float64 a, float64 b)
492 * 1 1 1 0 . . . cvtXt/g (cvtqt, cvt[dq]g only) 512 * 1 1 1 0 . . . cvtXt/g (cvtqt, cvt[dq]g only)
493 * 1 1 1 1 . . . cvtXq/q (cvttq, cvtgq) 513 * 1 1 1 1 . . . cvtXq/q (cvttq, cvtgq)
494 * | | 514 * | |
495 * 15 14 13|12 11 10 09|08 07 06 05 the twilight zone 515 * 15 14 13|12 11 10 09|08 07 06 05 the twilight zone
496 * --------======------============ 516 * --------======------============
497 * TRAP : RND : SRC : FUNCTION : 517 * TRAP : RND : SRC : FUNCTION :
498 * /s /i /u x x 1 0 1 1 0 0 . . . cvtts, /siu only 0, 1, 5, 7 518 * /s /i /u x x 1 0 1 1 0 0 . . . cvtts, /siu only 0, 1, 5, 7
499 * 0 1 0 1 0 1 0 1 1 0 0 . . . cvtst (src == T (!)) 2ac NOT /S 519 * 0 1 0 1 0 1 0 1 1 0 0 . . . cvtst (src == T (!)) 2ac NOT /S
500 * 1 1 0 1 0 1 0 1 1 0 0 . . . cvtst/s (src == T (!)) 6ac 520 * 1 1 0 1 0 1 0 1 1 0 0 . . . cvtst/s (src == T (!)) 6ac
501 * x 0 x x x x 0 1 1 1 1 . . . cvttq/_ (src == T) 521 * x 0 x x x x 0 1 1 1 1 . . . cvttq/_ (src == T)
502 */ 522 */
503 523
504static void 524static void
 525print_fp_instruction(alpha_instruction *pc, struct lwp *l, uint32_t bits)
 526{
 527#if defined(DDB)
 528 char buf[32];
 529 struct alpha_print_instruction_context ctx = {
 530 .insn.bits = bits,
 531 .pc = (unsigned long)pc,
 532 .buf = buf,
 533 .bufsize = sizeof(buf),
 534 };
 535
 536 (void) alpha_print_instruction(&ctx);
 537
 538 printf("INSN [%s:%d] @0x%lx -> %s\n",
 539 l->l_proc->p_comm, l->l_proc->p_pid, ctx.pc, ctx.buf);
 540#else
 541 alpha_instruction insn = {
 542 .bits = bits,
 543 };
 544 printf("INSN [%s:%d] @0x%lx -> opc=0x%x func=0x%x fa=%d fb=%d fc=%d\n",
 545 l->l_proc->p_comm, l->l_proc->p_pid, (unsigned long)pc,
 546 insn.float_format.opcode, insn.float_format.function,
 547 insn.float_format.fa, insn.float_format.fb, insn.float_format.fc);
 548 printf("INSN [%s:%d] @0x%lx -> trp=0x%x rnd=0x%x src=0x%x fn=0x%x\n",
 549 l->l_proc->p_comm, l->l_proc->p_pid, (unsigned long)pc,
 550 insn.float_detail.trp, insn.float_detail.rnd,
 551 insn.float_detail.src, insn.float_detail.opclass);
 552#endif /* DDB */
 553}
 554
 555static void
505alpha_fp_interpret(alpha_instruction *pc, struct lwp *l, uint32_t bits) 556alpha_fp_interpret(alpha_instruction *pc, struct lwp *l, uint32_t bits)
506{ 557{
507 s_float sfa, sfb, sfc; 558 s_float sfa, sfb, sfc;
508 t_float tfa, tfb, tfc; 559 t_float tfa, tfb, tfc;
509 alpha_instruction inst; 560 alpha_instruction inst;
510 561
 562 if (alpha_fp_complete_debug) {
 563 print_fp_instruction(pc, l, bits);
 564 }
 565
511 inst.bits = bits; 566 inst.bits = bits;
512 switch(inst.generic_format.opcode) { 567 switch(inst.generic_format.opcode) {
513 default: 568 default:
514 /* this "cannot happen" */ 569 /* this "cannot happen" */
515 this_cannot_happen(2, inst.bits); 570 this_cannot_happen(2, inst.bits);
516 return; 571 return;
517 case op_any_float: 572 case op_any_float:
518 if (inst.float_format.function == op_cvtql_sv || 573 if (inst.float_format.function == op_cvtql_sv ||
519 inst.float_format.function == op_cvtql_v) { 574 inst.float_format.function == op_cvtql_v) {
520 alpha_stt(inst.float_detail.fb, &tfb); 575 alpha_stt(inst.float_detail.fb, &tfb);
521 sfc.i = (int64_t)tfb.i >= 0L ? INT_MAX : INT_MIN; 576 sfc.i = (int64_t)tfb.i >= 0L ? INT_MAX : INT_MIN;
522 alpha_lds(inst.float_detail.fc, &sfc); 577 alpha_lds(inst.float_detail.fc, &sfc);
523 float_raise(FP_X_INV); 578 float_raise(FP_X_INV);
@@ -578,35 +633,41 @@ alpha_fp_complete_at(alpha_instruction * @@ -578,35 +633,41 @@ alpha_fp_complete_at(alpha_instruction *
578 uint64_t rm, fpcr, orig_fpcr; 633 uint64_t rm, fpcr, orig_fpcr;
579 uint64_t orig_flags, new_flags, changed_flags, md_flags; 634 uint64_t orig_flags, new_flags, changed_flags, md_flags;
580 635
581 if (__predict_false(copyin(trigger_pc, &inst, sizeof inst))) { 636 if (__predict_false(copyin(trigger_pc, &inst, sizeof inst))) {
582 this_cannot_happen(6, -1); 637 this_cannot_happen(6, -1);
583 return SIGSEGV; 638 return SIGSEGV;
584 } 639 }
585 kpreempt_disable(); 640 kpreempt_disable();
586 if ((curlwp->l_md.md_flags & MDLWP_FPACTIVE) == 0) { 641 if ((curlwp->l_md.md_flags & MDLWP_FPACTIVE) == 0) {
587 fpu_load(); 642 fpu_load();
588 } 643 }
589 alpha_pal_wrfen(1); 644 alpha_pal_wrfen(1);
590 /* 645 /*
591 * If necessary, lie about the dynamic rounding mode so emulation 646 * Alpha FLOAT instructions can override the rounding mode on a
592 * software need go to only one place for it, and so we don't have to 647 * per-instruction basis. If necessary, lie about the dynamic
593 * lock any memory locations or pass a third parameter to every 648 * rounding mode so emulation software need go to only one place
594 * SoftFloat entry point. 649 * for it, and so we don't have to lock any memory locations or
 650 * pass a third parameter to every SoftFloat entry point.
 651 *
 652 * N.B. the rounding mode field of the the FLOAT format instructions
 653 * matches that of the FPCR *except* for the value 3, which means
 654 * "dynamic" rounding mode (i.e. what is programmed into the FPCR).
595 */ 655 */
596 orig_fpcr = fpcr = alpha_read_fpcr(); 656 orig_fpcr = fpcr = alpha_read_fpcr();
597 rm = inst.float_detail.rnd; 657 rm = inst.float_detail.rnd;
598 if (__predict_false(rm != 3 /* dynamic */ && rm != (fpcr >> 58 & 3))) { 658 if (__predict_false(rm != 3 /* dynamic */ &&
599 fpcr = (fpcr & ~FPCR_DYN(3)) | FPCR_DYN(rm); 659 rm != __SHIFTOUT(fpcr, FPCR_DYN_RM))) {
 660 fpcr = (fpcr & ~FPCR_DYN_RM) | __SHIFTIN(rm, FPCR_DYN_RM);
600 alpha_write_fpcr(fpcr); 661 alpha_write_fpcr(fpcr);
601 } 662 }
602 orig_flags = FP_C_TO_NETBSD_FLAG(l->l_md.md_flags); 663 orig_flags = FP_C_TO_NETBSD_FLAG(l->l_md.md_flags);
603 664
604 alpha_fp_interpret(trigger_pc, l, inst.bits); 665 alpha_fp_interpret(trigger_pc, l, inst.bits);
605 666
606 md_flags = l->l_md.md_flags; 667 md_flags = l->l_md.md_flags;
607 668
608 new_flags = FP_C_TO_NETBSD_FLAG(md_flags); 669 new_flags = FP_C_TO_NETBSD_FLAG(md_flags);
609 changed_flags = orig_flags ^ new_flags; 670 changed_flags = orig_flags ^ new_flags;
610 KASSERT((orig_flags | changed_flags) == new_flags); /* panic on 1->0 */ 671 KASSERT((orig_flags | changed_flags) == new_flags); /* panic on 1->0 */
611 alpha_write_fpcr(fp_c_to_fpcr_1(orig_fpcr, md_flags)); 672 alpha_write_fpcr(fp_c_to_fpcr_1(orig_fpcr, md_flags));
612 needsig = changed_flags & FP_C_TO_NETBSD_MASK(md_flags); 673 needsig = changed_flags & FP_C_TO_NETBSD_MASK(md_flags);
@@ -620,64 +681,82 @@ alpha_fp_complete_at(alpha_instruction * @@ -620,64 +681,82 @@ alpha_fp_complete_at(alpha_instruction *
620} 681}
621 682
622int 683int
623alpha_fp_complete(u_long a0, u_long a1, struct lwp *l, uint64_t *ucode) 684alpha_fp_complete(u_long a0, u_long a1, struct lwp *l, uint64_t *ucode)
624{ 685{
625 int t; 686 int t;
626 int sig; 687 int sig;
627 uint64_t op_class; 688 uint64_t op_class;
628 alpha_instruction inst; 689 alpha_instruction inst;
629 /* "trigger_pc" is Compaq's term for the earliest faulting op */ 690 /* "trigger_pc" is Compaq's term for the earliest faulting op */
630 alpha_instruction *trigger_pc, *usertrap_pc; 691 alpha_instruction *trigger_pc, *usertrap_pc;
631 alpha_instruction *pc, *win_begin, tsw[TSWINSIZE]; 692 alpha_instruction *pc, *win_begin, tsw[TSWINSIZE];
632 693
633 sig = SIGFPE; 694 if (alpha_fp_complete_debug) {
 695 printf("%s: [%s:%d] a0[AESR]=0x%lx a1[regmask]=0x%lx "
 696 "FPCR=0x%lx FP_C=0x%lx\n",
 697 __func__, l->l_proc->p_comm, l->l_proc->p_pid,
 698 a0, a1, alpha_read_fpcr(),
 699 l->l_md.md_flags & (MDLWP_FP_C|MDLWP_FPACTIVE));
 700 }
 701
634 pc = (alpha_instruction *)l->l_md.md_tf->tf_regs[FRAME_PC]; 702 pc = (alpha_instruction *)l->l_md.md_tf->tf_regs[FRAME_PC];
635 trigger_pc = pc - 1; /* for ALPHA_AMASK_PAT case */ 703 trigger_pc = pc - 1; /* for ALPHA_AMASK_PAT case */
 704
 705 /*
 706 * Start out with the code mirroring the exception flags
 707 * (FP_X_*). Shift right 1 bit to discard SWC to achive
 708 * this.
 709 */
 710 *ucode = a0 >> 1;
 711
636 if (cpu_amask & ALPHA_AMASK_PAT) { 712 if (cpu_amask & ALPHA_AMASK_PAT) {
637 /* SWC | INV */ 713 if ((a0 & (ALPHA_AESR_SWC | ALPHA_AESR_INV)) != 0 ||
638 if (a0 & 3 || alpha_fp_sync_complete) { 714 alpha_fp_sync_complete) {
639 sig = alpha_fp_complete_at(trigger_pc, l, ucode); 715 sig = alpha_fp_complete_at(trigger_pc, l, ucode);
640 goto done; 716 goto resolved;
641 } 717 }
642 } 718 }
643 *ucode = a0; 719 if ((a0 & (ALPHA_AESR_SWC | ALPHA_AESR_INV)) == 0)
644 /* SWC | INV */ 720 goto unresolved;
645 if (!(a0 & 3)) 
646 return sig; 
647/* 721/*
648 * At this point we are somewhere in the trap shadow of one or more instruc- 722 * At this point we are somewhere in the trap shadow of one or more instruc-
649 * tions that have trapped with software completion specified. We have a mask 723 * tions that have trapped with software completion specified. We have a mask
650 * of the registers written by trapping instructions. 724 * of the registers written by trapping instructions.
651 * 725 *
652 * Now step backwards through the trap shadow, clearing bits in the 726 * Now step backwards through the trap shadow, clearing bits in the
653 * destination write mask until the trigger instruction is found, and 727 * destination write mask until the trigger instruction is found, and
654 * interpret this one instruction in SW. If a SIGFPE is not required, back up 728 * interpret this one instruction in SW. If a SIGFPE is not required, back up
655 * the PC until just after this instruction and restart. This will execute all 729 * the PC until just after this instruction and restart. This will execute all
656 * trap shadow instructions between the trigger pc and the trap pc twice. 730 * trap shadow instructions between the trigger pc and the trap pc twice.
657 */ 731 */
658 trigger_pc = 0; 732 trigger_pc = 0;
659 win_begin = pc; 733 win_begin = pc;
660 ++alpha_shadow.scans; 734 ++alpha_shadow.scans;
661 t = alpha_shadow.len; 735 t = alpha_shadow.len;
662 for (--pc; a1; --pc) { 736 for (--pc; a1; --pc) {
663 ++alpha_shadow.len; 737 ++alpha_shadow.len;
664 if (pc < win_begin) { 738 if (pc < win_begin) {
665 win_begin = pc - TSWINSIZE + 1; 739 win_begin = pc - TSWINSIZE + 1;
666 if (copyin(win_begin, tsw, sizeof tsw)) { 740 if (copyin(win_begin, tsw, sizeof tsw)) {
667 /* sigh, try to get just one */ 741 /* sigh, try to get just one */
668 win_begin = pc; 742 win_begin = pc;
669 if (copyin(win_begin, tsw, 4)) 743 if (copyin(win_begin, tsw, 4)) {
 744 /*
 745 * We're off the rails here; don't
 746 * bother updating the FP_C.
 747 */
670 return SIGSEGV; 748 return SIGSEGV;
 749 }
671 } 750 }
672 } 751 }
673 assert(win_begin <= pc && !((long)pc & 3)); 752 assert(win_begin <= pc && !((long)pc & 3));
674 inst = tsw[pc - win_begin]; 753 inst = tsw[pc - win_begin];
675 op_class = 1UL << inst.generic_format.opcode; 754 op_class = 1UL << inst.generic_format.opcode;
676 if (op_class & FPUREG_CLASS) { 755 if (op_class & FPUREG_CLASS) {
677 a1 &= ~(1UL << (inst.operate_generic_format.rc + 32)); 756 a1 &= ~(1UL << (inst.operate_generic_format.rc + 32));
678 trigger_pc = pc; 757 trigger_pc = pc;
679 } else if (op_class & CPUREG_CLASS) { 758 } else if (op_class & CPUREG_CLASS) {
680 a1 &= ~(1UL << inst.operate_generic_format.rc); 759 a1 &= ~(1UL << inst.operate_generic_format.rc);
681 trigger_pc = pc; 760 trigger_pc = pc;
682 } else if (op_class & TRAPSHADOWBOUNDARY) { 761 } else if (op_class & TRAPSHADOWBOUNDARY) {
683 if (op_class & CHECKFUNCTIONCODE) { 762 if (op_class & CHECKFUNCTIONCODE) {
@@ -685,37 +764,68 @@ alpha_fp_complete(u_long a0, u_long a1,  @@ -685,37 +764,68 @@ alpha_fp_complete(u_long a0, u_long a1,
685 inst.mem_format.displacement == op_excb) 764 inst.mem_format.displacement == op_excb)
686 break; /* code breaks AARM rules */ 765 break; /* code breaks AARM rules */
687 } else 766 } else
688 break; /* code breaks AARM rules */ 767 break; /* code breaks AARM rules */
689 } 768 }
690 /* Some shadow-safe op, probably load, store, or FPTI class */ 769 /* Some shadow-safe op, probably load, store, or FPTI class */
691 } 770 }
692 t = alpha_shadow.len - t; 771 t = alpha_shadow.len - t;
693 if (t > alpha_shadow.max) 772 if (t > alpha_shadow.max)
694 alpha_shadow.max = t; 773 alpha_shadow.max = t;
695 if (__predict_true(trigger_pc != 0 && a1 == 0)) { 774 if (__predict_true(trigger_pc != 0 && a1 == 0)) {
696 ++alpha_shadow.resolved; 775 ++alpha_shadow.resolved;
697 sig = alpha_fp_complete_at(trigger_pc, l, ucode); 776 sig = alpha_fp_complete_at(trigger_pc, l, ucode);
 777 goto resolved;
698 } else { 778 } else {
699 ++alpha_shadow.unresolved; 779 ++alpha_shadow.unresolved;
700 return sig; 
701 } 780 }
702done: 781
 782 unresolved: /* obligatory statement */;
 783 /*
 784 * *ucode contains the exception bits (FP_X_*). We need to
 785 * update the FP_C and FPCR, and send a signal for any new
 786 * trap that is enabled.
 787 */
 788 uint64_t orig_flags = FP_C_TO_NETBSD_FLAG(l->l_md.md_flags);
 789 uint64_t new_flags = orig_flags | *ucode;
 790 uint64_t changed_flags = orig_flags ^ new_flags;
 791 KASSERT((orig_flags | changed_flags) == new_flags); /* panic on 1->0 */
 792
 793 l->l_md.md_flags |= NETBSD_FLAG_TO_FP_C(new_flags);
 794
 795 kpreempt_disable();
 796 if ((curlwp->l_md.md_flags & MDLWP_FPACTIVE) == 0) {
 797 fpu_load();
 798 }
 799 alpha_pal_wrfen(1);
 800 uint64_t orig_fpcr = alpha_read_fpcr();
 801 alpha_write_fpcr(fp_c_to_fpcr_1(orig_fpcr, l->l_md.md_flags));
 802 uint64_t needsig =
 803 changed_flags & FP_C_TO_NETBSD_MASK(l->l_md.md_flags);
 804 alpha_pal_wrfen(0);
 805 kpreempt_enable();
 806
 807 if (__predict_false(needsig)) {
 808 *ucode = needsig;
 809 return SIGFPE;
 810 }
 811 return 0;
 812
 813 resolved:
703 if (sig) { 814 if (sig) {
704 usertrap_pc = trigger_pc + 1; 815 usertrap_pc = trigger_pc + 1;
705 l->l_md.md_tf->tf_regs[FRAME_PC] = (unsigned long)usertrap_pc; 816 l->l_md.md_tf->tf_regs[FRAME_PC] = (unsigned long)usertrap_pc;
706 return sig; 
707 } 817 }
708 return 0; 818 return sig;
709} 819}
710 820
711/* 821/*
712 * Load the float-point context for the current lwp. 822 * Load the float-point context for the current lwp.
713 */ 823 */
714void 824void
715fpu_state_load(struct lwp *l, u_int flags) 825fpu_state_load(struct lwp *l, u_int flags)
716{ 826{
717 struct pcb * const pcb = lwp_getpcb(l); 827 struct pcb * const pcb = lwp_getpcb(l);
718 KASSERT(l == curlwp); 828 KASSERT(l == curlwp);
719 829
720#ifdef MULTIPROCESSOR 830#ifdef MULTIPROCESSOR
721 /* 831 /*
@@ -736,42 +846,52 @@ fpu_state_load(struct lwp *l, u_int flag @@ -736,42 +846,52 @@ fpu_state_load(struct lwp *l, u_int flag
736 * Instrument FP usage -- if a process had not previously 846 * Instrument FP usage -- if a process had not previously
737 * used FP, mark it as having used FP for the first time, 847 * used FP, mark it as having used FP for the first time,
738 * and count this event. 848 * and count this event.
739 * 849 *
740 * If a process has used FP, count a "used FP, and took 850 * If a process has used FP, count a "used FP, and took
741 * a trap to use it again" event. 851 * a trap to use it again" event.
742 */ 852 */
743 if ((flags & PCU_VALID) == 0) { 853 if ((flags & PCU_VALID) == 0) {
744 atomic_inc_ulong(&fpevent_use.ev_count); 854 atomic_inc_ulong(&fpevent_use.ev_count);
745 } else { 855 } else {
746 atomic_inc_ulong(&fpevent_reuse.ev_count); 856 atomic_inc_ulong(&fpevent_reuse.ev_count);
747 } 857 }
748 858
 859 if (alpha_fp_complete_debug) {
 860 printf("%s: [%s:%d] loading FPCR=0x%lx\n",
 861 __func__, l->l_proc->p_comm, l->l_proc->p_pid,
 862 pcb->pcb_fp.fpr_cr);
 863 }
749 alpha_pal_wrfen(1); 864 alpha_pal_wrfen(1);
750 restorefpstate(&pcb->pcb_fp); 865 restorefpstate(&pcb->pcb_fp);
751 alpha_pal_wrfen(0); 866 alpha_pal_wrfen(0);
752 867
753 l->l_md.md_flags |= MDLWP_FPACTIVE; 868 l->l_md.md_flags |= MDLWP_FPACTIVE;
754} 869}
755 870
756/* 871/*
757 * Save the FPU state. 872 * Save the FPU state.
758 */ 873 */
759 874
760void 875void
761fpu_state_save(struct lwp *l) 876fpu_state_save(struct lwp *l)
762{ 877{
763 struct pcb * const pcb = lwp_getpcb(l); 878 struct pcb * const pcb = lwp_getpcb(l);
764 879
765 alpha_pal_wrfen(1); 880 alpha_pal_wrfen(1);
766 savefpstate(&pcb->pcb_fp); 881 savefpstate(&pcb->pcb_fp);
767 alpha_pal_wrfen(0); 882 alpha_pal_wrfen(0);
 883 if (alpha_fp_complete_debug) {
 884 printf("%s: [%s:%d] saved FPCR=0x%lx\n",
 885 __func__, l->l_proc->p_comm, l->l_proc->p_pid,
 886 pcb->pcb_fp.fpr_cr);
 887 }
768} 888}
769 889
770/* 890/*
771 * Release the FPU. 891 * Release the FPU.
772 */ 892 */
773void 893void
774fpu_state_release(struct lwp *l) 894fpu_state_release(struct lwp *l)
775{ 895{
776 l->l_md.md_flags &= ~MDLWP_FPACTIVE; 896 l->l_md.md_flags &= ~MDLWP_FPACTIVE;
777} 897}

cvs diff -r1.374 -r1.375 src/sys/arch/alpha/alpha/machdep.c (expand / switch to unified diff)

--- src/sys/arch/alpha/alpha/machdep.c 2021/07/11 01:58:41 1.374
+++ src/sys/arch/alpha/alpha/machdep.c 2021/07/22 01:39:18 1.375
@@ -1,14 +1,14 @@ @@ -1,14 +1,14 @@
1/* $NetBSD: machdep.c,v 1.374 2021/07/11 01:58:41 thorpej Exp $ */ 1/* $NetBSD: machdep.c,v 1.375 2021/07/22 01:39:18 thorpej Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 1998, 1999, 2000, 2019, 2020 The NetBSD Foundation, Inc. 4 * Copyright (c) 1998, 1999, 2000, 2019, 2020 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to The NetBSD Foundation 7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center and by Chris G. Demetriou. 9 * NASA Ames Research Center and by Chris G. Demetriou.
10 * 10 *
11 * Redistribution and use in source and binary forms, with or without 11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions 12 * modification, are permitted provided that the following conditions
13 * are met: 13 * are met:
14 * 1. Redistributions of source code must retain the above copyright 14 * 1. Redistributions of source code must retain the above copyright
@@ -55,31 +55,31 @@ @@ -55,31 +55,31 @@
55 * 55 *
56 * any improvements or extensions that they make and grant Carnegie the 56 * any improvements or extensions that they make and grant Carnegie the
57 * rights to redistribute these changes. 57 * rights to redistribute these changes.
58 */ 58 */
59 59
60#include "opt_ddb.h" 60#include "opt_ddb.h"
61#include "opt_kgdb.h" 61#include "opt_kgdb.h"
62#include "opt_modular.h" 62#include "opt_modular.h"
63#include "opt_multiprocessor.h" 63#include "opt_multiprocessor.h"
64#include "opt_dec_3000_300.h" 64#include "opt_dec_3000_300.h"
65#include "opt_dec_3000_500.h" 65#include "opt_dec_3000_500.h"
66#include "opt_execfmt.h" 66#include "opt_execfmt.h"
67 67
68#define __RWLOCK_PRIVATE  68#define __RWLOCK_PRIVATE
69 69
70#include <sys/cdefs.h> /* RCS ID & Copyright macro defns */ 70#include <sys/cdefs.h> /* RCS ID & Copyright macro defns */
71 71
72__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.374 2021/07/11 01:58:41 thorpej Exp $"); 72__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.375 2021/07/22 01:39:18 thorpej Exp $");
73 73
74#include <sys/param.h> 74#include <sys/param.h>
75#include <sys/systm.h> 75#include <sys/systm.h>
76#include <sys/signalvar.h> 76#include <sys/signalvar.h>
77#include <sys/kernel.h> 77#include <sys/kernel.h>
78#include <sys/cpu.h> 78#include <sys/cpu.h>
79#include <sys/proc.h> 79#include <sys/proc.h>
80#include <sys/ras.h> 80#include <sys/ras.h>
81#include <sys/sched.h> 81#include <sys/sched.h>
82#include <sys/reboot.h> 82#include <sys/reboot.h>
83#include <sys/device.h> 83#include <sys/device.h>
84#include <sys/module.h> 84#include <sys/module.h>
85#include <sys/mman.h> 85#include <sys/mman.h>
@@ -185,26 +185,27 @@ uint8_t dec_3000_scsiid[3], dec_3000_scs @@ -185,26 +185,27 @@ uint8_t dec_3000_scsiid[3], dec_3000_scs
185 185
186struct platform platform; 186struct platform platform;
187 187
188#if NKSYMS || defined(DDB) || defined(MODULAR) 188#if NKSYMS || defined(DDB) || defined(MODULAR)
189/* start and end of kernel symbol table */ 189/* start and end of kernel symbol table */
190void *ksym_start, *ksym_end; 190void *ksym_start, *ksym_end;
191#endif 191#endif
192 192
193/* for cpu_sysctl() */ 193/* for cpu_sysctl() */
194int alpha_unaligned_print = 1; /* warn about unaligned accesses */ 194int alpha_unaligned_print = 1; /* warn about unaligned accesses */
195int alpha_unaligned_fix = 1; /* fix up unaligned accesses */ 195int alpha_unaligned_fix = 1; /* fix up unaligned accesses */
196int alpha_unaligned_sigbus = 0; /* don't SIGBUS on fixed-up accesses */ 196int alpha_unaligned_sigbus = 0; /* don't SIGBUS on fixed-up accesses */
197int alpha_fp_sync_complete = 0; /* fp fixup if sync even without /s */ 197int alpha_fp_sync_complete = 0; /* fp fixup if sync even without /s */
 198int alpha_fp_complete_debug = 0; /* fp completion debug enabled */
198 199
199/* 200/*
200 * XXX This should be dynamically sized, but we have the chicken-egg problem! 201 * XXX This should be dynamically sized, but we have the chicken-egg problem!
201 * XXX it should also be larger than it is, because not all of the mddt 202 * XXX it should also be larger than it is, because not all of the mddt
202 * XXX clusters end up being used for VM. 203 * XXX clusters end up being used for VM.
203 */ 204 */
204phys_ram_seg_t mem_clusters[VM_PHYSSEG_MAX]; /* low size bits overloaded */ 205phys_ram_seg_t mem_clusters[VM_PHYSSEG_MAX]; /* low size bits overloaded */
205int mem_cluster_cnt; 206int mem_cluster_cnt;
206 207
207int cpu_dump(void); 208int cpu_dump(void);
208int cpu_dumpsize(void); 209int cpu_dumpsize(void);
209u_long cpu_dump_mempagecnt(void); 210u_long cpu_dump_mempagecnt(void);
210void dumpsys(void); 211void dumpsys(void);
@@ -1636,26 +1637,31 @@ SYSCTL_SETUP(sysctl_machdep_setup, "sysc @@ -1636,26 +1637,31 @@ SYSCTL_SETUP(sysctl_machdep_setup, "sysc
1636 CTLTYPE_INT, "fp_sync_complete", NULL, 1637 CTLTYPE_INT, "fp_sync_complete", NULL,
1637 NULL, 0, &alpha_fp_sync_complete, 0, 1638 NULL, 0, &alpha_fp_sync_complete, 0,
1638 CTL_MACHDEP, CPU_FP_SYNC_COMPLETE, CTL_EOL); 1639 CTL_MACHDEP, CPU_FP_SYNC_COMPLETE, CTL_EOL);
1639 sysctl_createv(clog, 0, NULL, NULL, 1640 sysctl_createv(clog, 0, NULL, NULL,
1640 CTLFLAG_PERMANENT, 1641 CTLFLAG_PERMANENT,
1641 CTLTYPE_INT, "cctr", NULL, 1642 CTLTYPE_INT, "cctr", NULL,
1642 NULL, 0, &alpha_use_cctr, 0, 1643 NULL, 0, &alpha_use_cctr, 0,
1643 CTL_MACHDEP, CPU_CCTR, CTL_EOL); 1644 CTL_MACHDEP, CPU_CCTR, CTL_EOL);
1644 sysctl_createv(clog, 0, NULL, NULL, 1645 sysctl_createv(clog, 0, NULL, NULL,
1645 CTLFLAG_PERMANENT, 1646 CTLFLAG_PERMANENT,
1646 CTLTYPE_BOOL, "is_qemu", NULL, 1647 CTLTYPE_BOOL, "is_qemu", NULL,
1647 NULL, 0, &alpha_is_qemu, 0, 1648 NULL, 0, &alpha_is_qemu, 0,
1648 CTL_MACHDEP, CPU_IS_QEMU, CTL_EOL); 1649 CTL_MACHDEP, CPU_IS_QEMU, CTL_EOL);
 1650 sysctl_createv(clog, 0, NULL, NULL,
 1651 CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
 1652 CTLTYPE_INT, "fp_complete_debug", NULL,
 1653 NULL, 0, &alpha_fp_complete_debug, 0,
 1654 CTL_MACHDEP, CPU_FP_COMPLETE_DEBUG, CTL_EOL);
1649} 1655}
1650 1656
1651/* 1657/*
1652 * Set registers on exec. 1658 * Set registers on exec.
1653 */ 1659 */
1654void 1660void
1655setregs(register struct lwp *l, struct exec_package *pack, vaddr_t stack) 1661setregs(register struct lwp *l, struct exec_package *pack, vaddr_t stack)
1656{ 1662{
1657 struct trapframe *tfp = l->l_md.md_tf; 1663 struct trapframe *tfp = l->l_md.md_tf;
1658 struct pcb *pcb; 1664 struct pcb *pcb;
1659#ifdef DEBUG 1665#ifdef DEBUG
1660 int i; 1666 int i;
1661#endif 1667#endif
@@ -1677,28 +1683,30 @@ setregs(register struct lwp *l, struct e @@ -1677,28 +1683,30 @@ setregs(register struct lwp *l, struct e
1677 pcb = lwp_getpcb(l); 1683 pcb = lwp_getpcb(l);
1678 memset(&pcb->pcb_fp, 0, sizeof(pcb->pcb_fp)); 1684 memset(&pcb->pcb_fp, 0, sizeof(pcb->pcb_fp));
1679 alpha_pal_wrusp(stack); 1685 alpha_pal_wrusp(stack);
1680 tfp->tf_regs[FRAME_PS] = ALPHA_PSL_USERSET; 1686 tfp->tf_regs[FRAME_PS] = ALPHA_PSL_USERSET;
1681 tfp->tf_regs[FRAME_PC] = pack->ep_entry & ~3; 1687 tfp->tf_regs[FRAME_PC] = pack->ep_entry & ~3;
1682 1688
1683 tfp->tf_regs[FRAME_A0] = stack; /* a0 = sp */ 1689 tfp->tf_regs[FRAME_A0] = stack; /* a0 = sp */
1684 tfp->tf_regs[FRAME_A1] = 0; /* a1 = rtld cleanup */ 1690 tfp->tf_regs[FRAME_A1] = 0; /* a1 = rtld cleanup */
1685 tfp->tf_regs[FRAME_A2] = 0; /* a2 = rtld object */ 1691 tfp->tf_regs[FRAME_A2] = 0; /* a2 = rtld object */
1686 tfp->tf_regs[FRAME_A3] = l->l_proc->p_psstrp; /* a3 = ps_strings */ 1692 tfp->tf_regs[FRAME_A3] = l->l_proc->p_psstrp; /* a3 = ps_strings */
1687 tfp->tf_regs[FRAME_T12] = tfp->tf_regs[FRAME_PC]; /* a.k.a. PV */ 1693 tfp->tf_regs[FRAME_T12] = tfp->tf_regs[FRAME_PC]; /* a.k.a. PV */
1688 1694
1689 if (__predict_true((l->l_md.md_flags & IEEE_INHERIT) == 0)) { 1695 if (__predict_true((l->l_md.md_flags & IEEE_INHERIT) == 0)) {
1690 l->l_md.md_flags &= ~MDLWP_FP_C; 1696 l->l_md.md_flags =
1691 pcb->pcb_fp.fpr_cr = FPCR_DYN(FP_RN); 1697 (l->l_md.md_flags & ~(MDLWP_FP_C | MDLWP_FPACTIVE)) |
 1698 FP_C_DEFAULT;
 1699 pcb->pcb_fp.fpr_cr = FPCR_DEFAULT;
1692 } 1700 }
1693} 1701}
1694 1702
1695void (*alpha_delay_fn)(unsigned long); 1703void (*alpha_delay_fn)(unsigned long);
1696 1704
1697/* 1705/*
1698 * Wait "n" microseconds. 1706 * Wait "n" microseconds.
1699 */ 1707 */
1700void 1708void
1701delay(unsigned long n) 1709delay(unsigned long n)
1702{ 1710{
1703 unsigned long pcc0, pcc1, curcycle, cycles, usec; 1711 unsigned long pcc0, pcc1, curcycle, cycles, usec;
1704 1712

cvs diff -r1.102 -r1.103 src/sys/arch/alpha/include/cpu.h (expand / switch to unified diff)

--- src/sys/arch/alpha/include/cpu.h 2021/06/26 15:02:19 1.102
+++ src/sys/arch/alpha/include/cpu.h 2021/07/22 01:39:18 1.103
@@ -1,14 +1,14 @@ @@ -1,14 +1,14 @@
1/* $NetBSD: cpu.h,v 1.102 2021/06/26 15:02:19 skrll Exp $ */ 1/* $NetBSD: cpu.h,v 1.103 2021/07/22 01:39:18 thorpej Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 1998, 1999, 2000, 2001 The NetBSD Foundation, Inc. 4 * Copyright (c) 1998, 1999, 2000, 2001 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to The NetBSD Foundation 7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center, and by Charles M. Hannum. 9 * NASA Ames Research Center, and by Charles M. Hannum.
10 * 10 *
11 * Redistribution and use in source and binary forms, with or without 11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions 12 * modification, are permitted provided that the following conditions
13 * are met: 13 * are met:
14 * 1. Redistributions of source code must retain the above copyright 14 * 1. Redistributions of source code must retain the above copyright
@@ -222,26 +222,27 @@ void cpu_signotify(struct lwp *); @@ -222,26 +222,27 @@ void cpu_signotify(struct lwp *);
222 222
223/* 223/*
224 * CTL_MACHDEP definitions. 224 * CTL_MACHDEP definitions.
225 */ 225 */
226#define CPU_CONSDEV 1 /* dev_t: console terminal device */ 226#define CPU_CONSDEV 1 /* dev_t: console terminal device */
227#define CPU_ROOT_DEVICE 2 /* string: root device name */ 227#define CPU_ROOT_DEVICE 2 /* string: root device name */
228#define CPU_UNALIGNED_PRINT 3 /* int: print unaligned accesses */ 228#define CPU_UNALIGNED_PRINT 3 /* int: print unaligned accesses */
229#define CPU_UNALIGNED_FIX 4 /* int: fix unaligned accesses */ 229#define CPU_UNALIGNED_FIX 4 /* int: fix unaligned accesses */
230#define CPU_UNALIGNED_SIGBUS 5 /* int: SIGBUS unaligned accesses */ 230#define CPU_UNALIGNED_SIGBUS 5 /* int: SIGBUS unaligned accesses */
231#define CPU_BOOTED_KERNEL 6 /* string: booted kernel name */ 231#define CPU_BOOTED_KERNEL 6 /* string: booted kernel name */
232#define CPU_FP_SYNC_COMPLETE 7 /* int: always fixup sync fp traps */ 232#define CPU_FP_SYNC_COMPLETE 7 /* int: always fixup sync fp traps */
233#define CPU_CCTR 8 /* int: using CC timecounter */ 233#define CPU_CCTR 8 /* int: using CC timecounter */
234#define CPU_IS_QEMU 9 /* int: running under Qemu */ 234#define CPU_IS_QEMU 9 /* int: running under Qemu */
 235#define CPU_FP_COMPLETE_DEBUG 10 /* int: enable FP completion debug */
235 236
236 237
237#ifdef _KERNEL 238#ifdef _KERNEL
238 239
239struct pcb; 240struct pcb;
240struct proc; 241struct proc;
241struct reg; 242struct reg;
242struct rpb; 243struct rpb;
243struct trapframe; 244struct trapframe;
244 245
245int badaddr(void *, size_t); 246int badaddr(void *, size_t);
246void * cpu_uarea_alloc(bool); 247void * cpu_uarea_alloc(bool);
247bool cpu_uarea_free(void *); 248bool cpu_uarea_free(void *);

cvs diff -r1.7 -r1.8 src/sys/arch/alpha/include/fpu.h (expand / switch to unified diff)

--- src/sys/arch/alpha/include/fpu.h 2017/10/17 00:26:35 1.7
+++ src/sys/arch/alpha/include/fpu.h 2021/07/22 01:39:18 1.8
@@ -1,14 +1,14 @@ @@ -1,14 +1,14 @@
1/* $NetBSD: fpu.h,v 1.7 2017/10/17 00:26:35 maya Exp $ */ 1/* $NetBSD: fpu.h,v 1.8 2021/07/22 01:39:18 thorpej Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 2001 Ross Harvey 4 * Copyright (c) 2001 Ross Harvey
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This software was written for NetBSD. 7 * This software was written for NetBSD.
8 * 8 *
9 * Redistribution and use in source and binary forms, with or without 9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions 10 * modification, are permitted provided that the following conditions
11 * are met: 11 * are met:
12 * 1. Redistributions of source code must retain the above copyright 12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer. 13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright 14 * 2. Redistributions in binary form must reproduce the above copyright
@@ -28,93 +28,119 @@ @@ -28,93 +28,119 @@
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE. 35 * POSSIBILITY OF SUCH DAMAGE.
36 */ 36 */
37 37
38#ifndef _ALPHA_FPU_H_ 38#ifndef _ALPHA_FPU_H_
39#define _ALPHA_FPU_H_ 39#define _ALPHA_FPU_H_
40 40
41#define _FP_C_DEF(n) (1UL << (n)) 
42 
43/* 41/*
44 * Most of these next definitions were moved from <ieeefp.h>. Apparently the 42 * Most of these next definitions were moved from <ieeefp.h>. Apparently the
45 * names happen to match those exported by Compaq and Linux from their fpu.h 43 * names happen to match those exported by Compaq and Linux from their fpu.h
46 * files. 44 * files.
47 */ 45 */
48 46
49#define FPCR_SUM _FP_C_DEF(63) 47/*
50#define FPCR_INED _FP_C_DEF(62) 48 * Bits in the Alpha Floating Point Control register. This is the hardware
51#define FPCR_UNFD _FP_C_DEF(61) 49 * register, and should not be directly manipulated by application software.
52#define FPCR_UNDZ _FP_C_DEF(60) 50 */
53#define FPCR_DYN(rm) ((unsigned long)(rm) << 58) 51#define FPCR_SUM __BIT(63) /* Summary (OR of all exception bits) */
54#define FPCR_IOV _FP_C_DEF(57) 52#define FPCR_INED __BIT(62) /* Inexact trap Disable */
55#define FPCR_INE _FP_C_DEF(56) 53#define FPCR_UNFD __BIT(61) /* Underflow trap Disable */
56#define FPCR_UNF _FP_C_DEF(55) 54#define FPCR_UNDZ __BIT(60) /* Underflow to Zero */
57#define FPCR_OVF _FP_C_DEF(54) 55#define FPCR_DYN_RM __BITS(58,59) /* Dynamic Rounding Mode */
58#define FPCR_DZE _FP_C_DEF(53) 56 /* 00 Chopped */
59#define FPCR_INV _FP_C_DEF(52) 57 /* 01 Minus Infinity */
60#define FPCR_OVFD _FP_C_DEF(51) 58 /* 10 Normal (round nearest) */
61#define FPCR_DZED _FP_C_DEF(50) 59 /* 11 Plus Infinity */
62#define FPCR_INVD _FP_C_DEF(49) 60#define FPCR_IOV __BIT(57) /* Integer Overflow */
63#define FPCR_DNZ _FP_C_DEF(48) 61#define FPCR_INE __BIT(56) /* Inexact Result */
64#define FPCR_DNOD _FP_C_DEF(47) 62#define FPCR_UNF __BIT(55) /* Underflow */
 63#define FPCR_OVF __BIT(54) /* Overflow */
 64#define FPCR_DZE __BIT(53) /* Division By Zero */
 65#define FPCR_INV __BIT(52) /* Invalid Operation */
 66#define FPCR_OVFD __BIT(51) /* Overflow trap Disable */
 67#define FPCR_DZED __BIT(50) /* Division By Zero trap Disable */
 68#define FPCR_INVD __BIT(49) /* Invalid Operation trap Disable */
 69#define FPCR_DNZ __BIT(48) /* Denormal Operands to Zero */
 70#define FPCR_DNOD __BIT(47) /* Denormal Operation tap Disable */
65 71
66#define FPCR_MIRRORED (FPCR_INE | FPCR_UNF | FPCR_OVF | FPCR_DZE | FPCR_INV) 72#define FPCR_MIRRORED (FPCR_INE | FPCR_UNF | FPCR_OVF | FPCR_DZE | FPCR_INV)
67#define FPCR_MIR_START 52 73#define FPCR_MIR_START 52
68 74
 75/* NetBSD default - no traps enabled, round-to-nearest */
 76#define FPCR_DEFAULT (__SHIFTIN(FP_RN, FPCR_DYN_RM) | \
 77 FPCR_INED | FPCR_UNFD | FPCR_OVFD | \
 78 FPCR_DZED | FPCR_INVD | FPCR_DNOD)
 79
69/* 80/*
 81 * IEEE Floating Point Control (FP_C) Quadword. This is a software
 82 * virtual register that abstracts the FPCR and software complation
 83 * performed by the kernel.
 84 *
70 * The AARM specifies the bit positions of the software word used for 85 * The AARM specifies the bit positions of the software word used for
71 * user mode interface to the control and status of the kernel completion 86 * user mode interface to the control and status of the kernel completion
72 * routines. Although it largely just redefines the FPCR, it shuffles 87 * routines. Although it largely just redefines the FPCR, it shuffles
73 * the bit order. The names of the bits are defined in the AARM, and 88 * the bit order. The names of the bits are defined in the AARM, and
74 * the definition prefix can easily be determined from public domain 89 * the definition prefix can easily be determined from public domain
75 * programs written to either the Compaq or Linux interfaces, which 90 * programs written to either the Compaq or Linux interfaces, which
76 * appear to be identical. 91 * appear to be identical.
 92 *
 93 * Bits 63-48 are reserved for implementation software.
 94 * Bits 47-23 are reserved for future archiecture definition.
 95 * Bits 16-12 are reserved for implementation software.
 96 * Bits 11-7 are reserved for future architecture definition.
 97 * Bit 0 is reserved for implementation software.
77 */ 98 */
78 99
79#define IEEE_STATUS_DNO _FP_C_DEF(22) 100#define IEEE_STATUS_DNO __BIT(22) /* Denormal Operand */
80#define IEEE_STATUS_INE _FP_C_DEF(21) 101#define IEEE_STATUS_INE __BIT(21) /* Inexact Result */
81#define IEEE_STATUS_UNF _FP_C_DEF(20) 102#define IEEE_STATUS_UNF __BIT(20) /* Underflow */
82#define IEEE_STATUS_OVF _FP_C_DEF(19) 103#define IEEE_STATUS_OVF __BIT(19) /* Overflow */
83#define IEEE_STATUS_DZE _FP_C_DEF(18) 104#define IEEE_STATUS_DZE __BIT(18) /* Division By Zero */
84#define IEEE_STATUS_INV _FP_C_DEF(17) 105#define IEEE_STATUS_INV __BIT(17) /* Invalid Operation */
85 106
86#define IEEE_TRAP_ENABLE_DNO _FP_C_DEF(6) 107#define IEEE_TRAP_ENABLE_DNO __BIT(6) /* Denormal Operation trap */
87#define IEEE_TRAP_ENABLE_INE _FP_C_DEF(5) 108#define IEEE_TRAP_ENABLE_INE __BIT(5) /* Inexact Result trap */
88#define IEEE_TRAP_ENABLE_UNF _FP_C_DEF(4) 109#define IEEE_TRAP_ENABLE_UNF __BIT(4) /* Underflow trap */
89#define IEEE_TRAP_ENABLE_OVF _FP_C_DEF(3) 110#define IEEE_TRAP_ENABLE_OVF __BIT(3) /* Overflow trap */
90#define IEEE_TRAP_ENABLE_DZE _FP_C_DEF(2) 111#define IEEE_TRAP_ENABLE_DZE __BIT(2) /* Division By Zero trap */
91#define IEEE_TRAP_ENABLE_INV _FP_C_DEF(1) 112#define IEEE_TRAP_ENABLE_INV __BIT(1) /* Invalid Operation trap */
92 113
93#define IEEE_INHERIT _FP_C_DEF(14) 114#define IEEE_INHERIT __BIT(14)
94#define IEEE_MAP_UMZ _FP_C_DEF(13) /* Map underflowed outputs to zero */ 115#define IEEE_MAP_UMZ __BIT(13) /* Map underflowed outputs to zero */
95#define IEEE_MAP_DMZ _FP_C_DEF(12) /* Map denormal inputs to zero */ 116#define IEEE_MAP_DMZ __BIT(12) /* Map denormal inputs to zero */
96 117
97#define FP_C_MIRRORED (IEEE_STATUS_INE | IEEE_STATUS_UNF | IEEE_STATUS_OVF\ 118#define FP_C_ALLBITS __BITS(1,22)
98 | IEEE_STATUS_DZE | IEEE_STATUS_INV) 119
 120#define FP_C_MIRRORED (IEEE_STATUS_INE | IEEE_STATUS_UNF | IEEE_STATUS_OVF \
 121 | IEEE_STATUS_DZE | IEEE_STATUS_INV)
99#define FP_C_MIR_START 17 122#define FP_C_MIR_START 17
100 123
 124/* NetBSD default - no traps enabled (see FPCR default) */
 125#define FP_C_DEFAULT 0
 126
101#ifdef _KERNEL 127#ifdef _KERNEL
102 128
103#define FLD_MASK(len) ((1UL << (len)) - 1) 129#define FLD_MASK(len) ((1UL << (len)) - 1)
104#define FLD_CLEAR(obj, origin, len) \ 130#define FLD_CLEAR(obj, origin, len) \
105 ((obj) & ~(FLD_MASK(len) << (origin))) 131 ((obj) & ~(FLD_MASK(len) << (origin)))
106#define FLD_INSERT(obj, origin, len, value) \ 132#define FLD_INSERT(obj, origin, len, value) \
107 (FLD_CLEAR(obj, origin, len) | (value) << origin) 133 (FLD_CLEAR(obj, origin, len) | (value) << origin)
108 134
109#define FP_C_TO_NETBSD_MASK(fp_c) ((fp_c) >> 1 & 0x3f) 135#define FP_C_TO_NETBSD_MASK(fp_c) ((fp_c) >> 1 & 0x3f)
110#define FP_C_TO_NETBSD_FLAG(fp_c) ((fp_c) >> 17 & 0x3f) 136#define FP_C_TO_NETBSD_FLAG(fp_c) ((fp_c) >> 17 & 0x3f)
111#define NETBSD_MASK_TO_FP_C(m) (((m) & 0x3f) << 1) 137#define NETBSD_MASK_TO_FP_C(m) (((m) & 0x3f) << 1)
112#define NETBSD_FLAG_TO_FP_C(s) (((s) & 0x3f) << 17) 138#define NETBSD_FLAG_TO_FP_C(s) (((s) & 0x3f) << 17)
113#define CLEAR_FP_C_MASK(fp_c) ((fp_c) & ~(0x3f << 1)) 139#define CLEAR_FP_C_MASK(fp_c) ((fp_c) & ~(0x3f << 1))
114#define CLEAR_FP_C_FLAG(fp_c) ((fp_c) & ~(0x3f << 17)) 140#define CLEAR_FP_C_FLAG(fp_c) ((fp_c) & ~(0x3f << 17))
115#define SET_FP_C_MASK(fp_c, m) (CLEAR_FP_C_MASK(fp_c) | NETBSD_MASK_TO_FP_C(m)) 141#define SET_FP_C_MASK(fp_c, m) (CLEAR_FP_C_MASK(fp_c) | NETBSD_MASK_TO_FP_C(m))
116#define SET_FP_C_FLAG(fp_c, m) (CLEAR_FP_C_FLAG(fp_c) | NETBSD_FLAG_TO_FP_C(m)) 142#define SET_FP_C_FLAG(fp_c, m) (CLEAR_FP_C_FLAG(fp_c) | NETBSD_FLAG_TO_FP_C(m))
117 143
118#endif 144#endif /* _KERNEL */
119 145
120#endif 146#endif /* _ALPHA_FPU_H_ */