@@ -1,14 +1,14 @@
-/* $NetBSD: fp_complete.c,v 1.24 2020/09/01 08:22:36 thorpej Exp $ */
+/* $NetBSD: fp_complete.c,v 1.25 2021/07/22 01:39:18 thorpej Exp $ */
 /*-
  * Copyright (c) 2001 Ross Harvey
  * All rights reserved.
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in the
  *    documentation and/or other materials provided with the distribution.
 @@ -23,44 +23,70 @@
  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 #include "opt_ddb.h"
 #include <sys/cdefs.h>			/* RCS ID & Copyright macro defns */
-__KERNEL_RCSID(0, "$NetBSD: fp_complete.c,v 1.24 2020/09/01 08:22:36 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: fp_complete.c,v 1.25 2021/07/22 01:39:18 thorpej Exp $");
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/proc.h>
 #include <sys/atomic.h>
 #include <sys/evcnt.h>
 #include <machine/cpu.h>
 #include <machine/fpu.h>
 #include <machine/reg.h>
 #include <machine/alpha.h>
 #include <alpha/alpha/db_instruction.h>
 #include <lib/libkern/softfloat.h>
 /*
  * Validate our assumptions about bit positions.
  */
 __CTASSERT(ALPHA_AESR_INV == (FP_X_INV << 1));
 __CTASSERT(ALPHA_AESR_DZE == (FP_X_DZ  << 1));
 __CTASSERT(ALPHA_AESR_OVF == (FP_X_OFL << 1));
 __CTASSERT(ALPHA_AESR_UNF == (FP_X_UFL << 1));
 __CTASSERT(ALPHA_AESR_INE == (FP_X_IMP << 1));
 __CTASSERT(ALPHA_AESR_IOV == (FP_X_IOV << 1));
 __CTASSERT(IEEE_TRAP_ENABLE_INV == (FP_X_INV << 1));
 __CTASSERT(IEEE_TRAP_ENABLE_DZE == (FP_X_DZ  << 1));
 __CTASSERT(IEEE_TRAP_ENABLE_OVF == (FP_X_OFL << 1));
 __CTASSERT(IEEE_TRAP_ENABLE_UNF == (FP_X_UFL << 1));
 __CTASSERT(IEEE_TRAP_ENABLE_INE == (FP_X_IMP << 1));
 __CTASSERT((uint64_t)FP_X_IMP << (61 - 3) == FPCR_INED);
 __CTASSERT((uint64_t)FP_X_UFL << (61 - 3) == FPCR_UNFD);
 __CTASSERT((uint64_t)FP_X_OFL << (49 - 0) == FPCR_OVFD);
 __CTASSERT((uint64_t)FP_X_DZ  << (49 - 0) == FPCR_DZED);
 __CTASSERT((uint64_t)FP_X_INV << (49 - 0) == FPCR_INVD);
 __CTASSERT(FP_C_ALLBITS == MDLWP_FP_C);
 #define	TSWINSIZE 4	/* size of trap shadow window in uint32_t units */
 /*	Set Name		Opcodes			AARM C.* Symbols  */
 #define	CPUREG_CLASS		(0xfUL << 0x10)		/* INT[ALSM]	  */
 #define	FPUREG_CLASS		(0xfUL << 0x14)		/* ITFP, FLT[ILV] */
 #define	CHECKFUNCTIONCODE	(1UL << 0x18)		/* MISC		  */
 #define	TRAPSHADOWBOUNDARY	(1UL << 0x00 |		/* PAL		  */\
 UL << 0x19 |		/* \PAL\	  */\
 UL << 0x1a |		/* JSR		  */\
 UL << 0x1b |		/* \PAL\	  */\
 UL << 0x1d |		/* \PAL\	  */\
 UL << 0x1e |		/* \PAL\	  */\
 @@ -341,51 +367,40 @@ fp_c_to_fpcr_1(uint64_t fpcr, uint64_t f
 	 * It's hard to arrange for conforming bit fields, because the FP_C
 	 * and the FPCR are both architected, with specified (and relatively
 	 * scrambled) bit numbers. Defining an internal unscrambled FP_C
 	 * wouldn't help much, because every user exception requires the
 	 * architected bit order in the sigcontext.
+	 *
 	 * Programs that fiddle with the fpcr exception bits (instead of fp_c)
 	 * will lose, because those bits can be and usually are subsetted;
 	 * the official home is in the fp_c. Furthermore, the kernel puts
 	 * phony enables (it lies :-) in the fpcr in order to get control when
 	 * it is necessary to initially set a sticky bit.
 	 */
-	fpcr &= FPCR_DYN(3);
+	fpcr &= FPCR_DYN_RM;
 	/*
-	 * enable traps = case where flag bit is clear OR program wants a trap
+	 * enable traps = case where flag bit is clear AND program wants a trap
 	 * enables = ~flags | mask
 	 * enables = ~flags & mask
 	 * disables = ~(~flags | mask)
 	 * disables = flags & ~mask. Thank you, Augustus De Morgan (1806-1871)
 	 */
 	disables = FP_C_TO_NETBSD_FLAG(fp_c) & ~FP_C_TO_NETBSD_MASK(fp_c);
 	fpcr |= (disables & (FP_X_IMP | FP_X_UFL)) << (61 - 3);
 	fpcr |= (disables & (FP_X_OFL | FP_X_DZ | FP_X_INV)) << (49 - 0);
 #	if !(FP_X_INV == 1 && FP_X_DZ == 2 && FP_X_OFL == 4 &&		\
 	    FP_X_UFL == 8 && FP_X_IMP == 16 && FP_X_IOV == 32 &&	\
 	    FP_X_UFL << (61 - 3) == FPCR_UNFD &&			\
 	    FP_X_IMP << (61 - 3) == FPCR_INED &&			\
 	    FP_X_OFL << (49 - 0) == FPCR_OVFD)
 #		error "Assertion failed"
 	/*
 	 * We don't care about the other built-in bit numbers because they
 	 * have been architecturally specified.
 	 */
 #	endif
 	fpcr |= fp_c & FP_C_MIRRORED << (FPCR_MIR_START - FP_C_MIR_START);
 	fpcr |= (fp_c & IEEE_MAP_DMZ) << 36;
 	if (fp_c & FP_C_MIRRORED)
 		fpcr |= FPCR_SUM;
 	if (fp_c & IEEE_MAP_UMZ)
 		fpcr |= FPCR_UNDZ | FPCR_UNFD;
 	fpcr |= (~fp_c & IEEE_TRAP_ENABLE_DNO) << 41;
 	return fpcr;
+}
 static void
 fp_c_to_fpcr(struct lwp *l)
+{
 @@ -397,26 +412,31 @@ alpha_write_fp_c(struct lwp *l, uint64_t
+{
 	uint64_t md_flags;
 	fp_c &= MDLWP_FP_C;
 	md_flags = l->l_md.md_flags;
 	if ((md_flags & MDLWP_FP_C) == fp_c)
 		return;
 	l->l_md.md_flags = (md_flags & ~MDLWP_FP_C) | fp_c;
 	kpreempt_disable();
 	if (md_flags & MDLWP_FPACTIVE) {
 		alpha_pal_wrfen(1);
 		fp_c_to_fpcr(l);
 		alpha_pal_wrfen(0);
 	} else {
 		struct pcb *pcb = l->l_addr;
 		pcb->pcb_fp.fpr_cr =
 		    fp_c_to_fpcr_1(pcb->pcb_fp.fpr_cr, l->l_md.md_flags);
+	}
 	kpreempt_enable();
+}
 uint64_t
 alpha_read_fp_c(struct lwp *l)
+{
 	/*
 	 * A possibly-desireable EV6-specific optimization would deviate from
 	 * the Alpha Architecture spec and keep some FP_C bits in the FPCR,
 	 * but in a transparent way. Some of the code for that would need to
 	 * go right here.
 	 */
 @@ -492,32 +512,67 @@ float64_unk(float64 a, float64 b)
  *			 1  1  1  0 . . . cvtXt/g (cvtqt, cvt[dq]g only)
  *			 1  1  1  1 . . . cvtXq/q (cvttq, cvtgq)
  *	   |	       |
  * 15 14 13|12 11 10 09|08 07 06 05	  the twilight zone
  * --------======------============
  *  TRAP   : RND : SRC : FUNCTION  :
  * /s /i /u  x  x  1  0  1  1  0  0 . . . cvtts, /siu only 0, 1, 5, 7
  *  0  1  0  1  0  1  0  1  1  0  0 . . . cvtst   (src == T (!)) 2ac NOT /S
  *  1  1  0  1  0  1  0  1  1  0  0 . . . cvtst/s (src == T (!)) 6ac
  *  x  0  x  x  x  x  0	 1  1  1  1 . . . cvttq/_ (src == T)
  */
 static void
 print_fp_instruction(alpha_instruction *pc, struct lwp *l, uint32_t bits)
+{
 #if defined(DDB)
 	char buf[32];
 	struct alpha_print_instruction_context ctx = {
 		.insn.bits = bits,
 		.pc = (unsigned long)pc,
 		.buf = buf,
 		.bufsize = sizeof(buf),
 	};
 	(void) alpha_print_instruction(&ctx);
 	printf("INSN [%s:%d] @0x%lx -> %s\n",
 	    l->l_proc->p_comm, l->l_proc->p_pid, ctx.pc, ctx.buf);
 #else
 	alpha_instruction insn = {
 		.bits = bits,
 	};
 	printf("INSN [%s:%d] @0x%lx -> opc=0x%x func=0x%x fa=%d fb=%d fc=%d\n",
 	    l->l_proc->p_comm, l->l_proc->p_pid, (unsigned long)pc,
 	    insn.float_format.opcode, insn.float_format.function,
 	    insn.float_format.fa, insn.float_format.fb, insn.float_format.fc);
 	printf("INSN [%s:%d] @0x%lx -> trp=0x%x rnd=0x%x src=0x%x fn=0x%x\n",
 	    l->l_proc->p_comm, l->l_proc->p_pid, (unsigned long)pc,
 	    insn.float_detail.trp, insn.float_detail.rnd,
 	    insn.float_detail.src, insn.float_detail.opclass);
 #endif /* DDB */
+}
 static void
 alpha_fp_interpret(alpha_instruction *pc, struct lwp *l, uint32_t bits)
+{
 	s_float sfa, sfb, sfc;
 	t_float tfa, tfb, tfc;
 	alpha_instruction inst;
 	if (alpha_fp_complete_debug) {
 		print_fp_instruction(pc, l, bits);
+	}
 	inst.bits = bits;
 	switch(inst.generic_format.opcode) {
 	default:
 		/* this "cannot happen" */
 		this_cannot_happen(2, inst.bits);
 		return;
 	case op_any_float:
 		if (inst.float_format.function == op_cvtql_sv ||
 		    inst.float_format.function == op_cvtql_v) {
 			alpha_stt(inst.float_detail.fb, &tfb);
 			sfc.i = (int64_t)tfb.i >= 0L ? INT_MAX : INT_MIN;
 			alpha_lds(inst.float_detail.fc, &sfc);
 			float_raise(FP_X_INV);
 @@ -578,35 +633,41 @@ alpha_fp_complete_at(alpha_instruction *
 	uint64_t rm, fpcr, orig_fpcr;
 	uint64_t orig_flags, new_flags, changed_flags, md_flags;
 	if (__predict_false(copyin(trigger_pc, &inst, sizeof inst))) {
 		this_cannot_happen(6, -1);
 		return SIGSEGV;
+	}
 	kpreempt_disable();
 	if ((curlwp->l_md.md_flags & MDLWP_FPACTIVE) == 0) {
 		fpu_load();
+	}
 	alpha_pal_wrfen(1);
 	/*
-	 * If necessary, lie about the dynamic rounding mode so emulation
+	 * Alpha FLOAT instructions can override the rounding mode on a
-	 * software need go to only one place for it, and so we don't have to
+	 * per-instruction basis.  If necessary, lie about the dynamic
-	 * lock any memory locations or pass a third parameter to every
+	 * rounding mode so emulation software need go to only one place
-	 * SoftFloat entry point.
+	 * for it, and so we don't have to lock any memory locations or
 	 * pass a third parameter to every SoftFloat entry point.
+	 *
 	 * N.B. the rounding mode field of the the FLOAT format instructions
 	 * matches that of the FPCR *except* for the value 3, which means
 	 * "dynamic" rounding mode (i.e. what is programmed into the FPCR).
 	 */
 	orig_fpcr = fpcr = alpha_read_fpcr();
 	rm = inst.float_detail.rnd;
-	if (__predict_false(rm != 3 /* dynamic */ && rm != (fpcr >> 58 & 3))) {
+	if (__predict_false(rm != 3 /* dynamic */ &&
-		fpcr = (fpcr & ~FPCR_DYN(3)) | FPCR_DYN(rm);
+			    rm != __SHIFTOUT(fpcr, FPCR_DYN_RM))) {
 		fpcr = (fpcr & ~FPCR_DYN_RM) | __SHIFTIN(rm, FPCR_DYN_RM);
 		alpha_write_fpcr(fpcr);
+	}
 	orig_flags = FP_C_TO_NETBSD_FLAG(l->l_md.md_flags);
 	alpha_fp_interpret(trigger_pc, l, inst.bits);
 	md_flags = l->l_md.md_flags;
 	new_flags = FP_C_TO_NETBSD_FLAG(md_flags);
 	changed_flags = orig_flags ^ new_flags;
 	KASSERT((orig_flags | changed_flags) == new_flags); /* panic on 1->0 */
 	alpha_write_fpcr(fp_c_to_fpcr_1(orig_fpcr, md_flags));
 	needsig = changed_flags & FP_C_TO_NETBSD_MASK(md_flags);
 @@ -620,64 +681,82 @@ alpha_fp_complete_at(alpha_instruction *
+}
 int
 alpha_fp_complete(u_long a0, u_long a1, struct lwp *l, uint64_t *ucode)
+{
 	int t;
 	int sig;
 	uint64_t op_class;
 	alpha_instruction inst;
 	/* "trigger_pc" is Compaq's term for the earliest faulting op */
 	alpha_instruction *trigger_pc, *usertrap_pc;
 	alpha_instruction *pc, *win_begin, tsw[TSWINSIZE];
-	sig = SIGFPE;
+	if (alpha_fp_complete_debug) {
 		printf("%s: [%s:%d] a0[AESR]=0x%lx a1[regmask]=0x%lx "
 		       "FPCR=0x%lx FP_C=0x%lx\n",
 		    __func__, l->l_proc->p_comm, l->l_proc->p_pid,
 		    a0, a1, alpha_read_fpcr(),
 		    l->l_md.md_flags & (MDLWP_FP_C|MDLWP_FPACTIVE));
+	}
 	pc = (alpha_instruction *)l->l_md.md_tf->tf_regs[FRAME_PC];
 	trigger_pc = pc - 1;	/* for ALPHA_AMASK_PAT case */
 	/*
 	 * Start out with the code mirroring the exception flags
 	 * (FP_X_*).  Shift right 1 bit to discard SWC to achive
 	 * this.
 	 */
 	*ucode = a0 >> 1;
 	if (cpu_amask & ALPHA_AMASK_PAT) {
-		/* SWC | INV */
+		if ((a0 & (ALPHA_AESR_SWC | ALPHA_AESR_INV)) != 0 ||
-		if (a0 & 3 || alpha_fp_sync_complete) {
+		    alpha_fp_sync_complete) {
 			sig = alpha_fp_complete_at(trigger_pc, l, ucode);
-			goto done;
+			goto resolved;
+		}
+	}
-	*ucode = a0;
+	if ((a0 & (ALPHA_AESR_SWC | ALPHA_AESR_INV)) == 0)
-	/* SWC | INV */
+		goto unresolved;
 	if (!(a0 & 3))
 		return sig;
 /*
  * At this point we are somewhere in the trap shadow of one or more instruc-
  * tions that have trapped with software completion specified.  We have a mask
  * of the registers written by trapping instructions.
+ *
  * Now step backwards through the trap shadow, clearing bits in the
  * destination write mask until the trigger instruction is found, and
  * interpret this one instruction in SW. If a SIGFPE is not required, back up
  * the PC until just after this instruction and restart. This will execute all
  * trap shadow instructions between the trigger pc and the trap pc twice.
  */
 	trigger_pc = 0;
 	win_begin = pc;
 	++alpha_shadow.scans;
 	t = alpha_shadow.len;
 	for (--pc; a1; --pc) {
 		++alpha_shadow.len;
 		if (pc < win_begin) {
 			win_begin = pc - TSWINSIZE + 1;
 			if (copyin(win_begin, tsw, sizeof tsw)) {
 				/* sigh, try to get just one */
 				win_begin = pc;
-				if (copyin(win_begin, tsw, 4))
+				if (copyin(win_begin, tsw, 4)) {
 					/*
 					 * We're off the rails here; don't
 					 * bother updating the FP_C.
 					 */
 					return SIGSEGV;
+				}
+			}
+		}
 		assert(win_begin <= pc && !((long)pc  & 3));
 		inst = tsw[pc - win_begin];
 		op_class = 1UL << inst.generic_format.opcode;
 		if (op_class & FPUREG_CLASS) {
 			a1 &= ~(1UL << (inst.operate_generic_format.rc + 32));
 			trigger_pc = pc;
 		} else if (op_class & CPUREG_CLASS) {
 			a1 &= ~(1UL << inst.operate_generic_format.rc);
 			trigger_pc = pc;
 		} else if (op_class & TRAPSHADOWBOUNDARY) {
 			if (op_class & CHECKFUNCTIONCODE) {
 @@ -685,37 +764,68 @@ alpha_fp_complete(u_long a0, u_long a1,
 				    inst.mem_format.displacement == op_excb)
 					break;	/* code breaks AARM rules */
 			} else
 				break; /* code breaks AARM rules */
+		}
 		/* Some shadow-safe op, probably load, store, or FPTI class */
+	}
 	t = alpha_shadow.len - t;
 	if (t > alpha_shadow.max)
 		alpha_shadow.max = t;
 	if (__predict_true(trigger_pc != 0 && a1 == 0)) {
 		++alpha_shadow.resolved;
 		sig = alpha_fp_complete_at(trigger_pc, l, ucode);
 		goto resolved;
 	} else {
 		++alpha_shadow.unresolved;
 		return sig;
+	}
 done:
  unresolved: /* obligatory statement */;
 	/*
 	 * *ucode contains the exception bits (FP_X_*).  We need to
 	 * update the FP_C and FPCR, and send a signal for any new
 	 * trap that is enabled.
 	 */
 	uint64_t orig_flags = FP_C_TO_NETBSD_FLAG(l->l_md.md_flags);
 	uint64_t new_flags = orig_flags | *ucode;
 	uint64_t changed_flags = orig_flags ^ new_flags;
 	KASSERT((orig_flags | changed_flags) == new_flags); /* panic on 1->0 */
 	l->l_md.md_flags |= NETBSD_FLAG_TO_FP_C(new_flags);
 	kpreempt_disable();
 	if ((curlwp->l_md.md_flags & MDLWP_FPACTIVE) == 0) {
 		fpu_load();
+	}
 	alpha_pal_wrfen(1);
 	uint64_t orig_fpcr = alpha_read_fpcr();
 	alpha_write_fpcr(fp_c_to_fpcr_1(orig_fpcr, l->l_md.md_flags));
 	uint64_t needsig =
 	    changed_flags & FP_C_TO_NETBSD_MASK(l->l_md.md_flags);
 	alpha_pal_wrfen(0);
 	kpreempt_enable();
 	if (__predict_false(needsig)) {
 		*ucode = needsig;
 		return SIGFPE;
+	}
 	return 0;
  resolved:
 	if (sig) {
 		usertrap_pc = trigger_pc + 1;
 		l->l_md.md_tf->tf_regs[FRAME_PC] = (unsigned long)usertrap_pc;
 		return sig;
+	}
-	return 0;
+	return sig;
+}
 /*
  * Load the float-point context for the current lwp.
  */
 void
 fpu_state_load(struct lwp *l, u_int flags)
+{
 	struct pcb * const pcb = lwp_getpcb(l);
 	KASSERT(l == curlwp);
 #ifdef MULTIPROCESSOR
 	/*
 @@ -736,42 +846,52 @@ fpu_state_load(struct lwp *l, u_int flag
 	 * Instrument FP usage -- if a process had not previously
 	 * used FP, mark it as having used FP for the first time,
 	 * and count this event.
+	 *
 	 * If a process has used FP, count a "used FP, and took
 	 * a trap to use it again" event.
 	 */
 	if ((flags & PCU_VALID) == 0) {
 		atomic_inc_ulong(&fpevent_use.ev_count);
 	} else {
 		atomic_inc_ulong(&fpevent_reuse.ev_count);
+	}
 	if (alpha_fp_complete_debug) {
 		printf("%s: [%s:%d] loading FPCR=0x%lx\n",
 		    __func__, l->l_proc->p_comm, l->l_proc->p_pid,
 		    pcb->pcb_fp.fpr_cr);
+	}
 	alpha_pal_wrfen(1);
 	restorefpstate(&pcb->pcb_fp);
 	alpha_pal_wrfen(0);
 	l->l_md.md_flags |= MDLWP_FPACTIVE;
+}
 /*
  * Save the FPU state.
  */
 void
 fpu_state_save(struct lwp *l)
+{
 	struct pcb * const pcb = lwp_getpcb(l);
 	alpha_pal_wrfen(1);
 	savefpstate(&pcb->pcb_fp);
 	alpha_pal_wrfen(0);
 	if (alpha_fp_complete_debug) {
 		printf("%s: [%s:%d] saved FPCR=0x%lx\n",
 		    __func__, l->l_proc->p_comm, l->l_proc->p_pid,
 		    pcb->pcb_fp.fpr_cr);
+	}
+}
 /*
  * Release the FPU.
  */
 void
 fpu_state_release(struct lwp *l)
+{
 	l->l_md.md_flags &= ~MDLWP_FPACTIVE;
+}

 @@ -1,14 +1,14 @@
-/* $NetBSD: machdep.c,v 1.374 2021/07/11 01:58:41 thorpej Exp $ */
+/* $NetBSD: machdep.c,v 1.375 2021/07/22 01:39:18 thorpej Exp $ */
 /*-
  * Copyright (c) 1998, 1999, 2000, 2019, 2020 The NetBSD Foundation, Inc.
  * All rights reserved.
+ *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
  * NASA Ames Research Center and by Chris G. Demetriou.
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
 @@ -55,31 +55,31 @@
+ *
  * any improvements or extensions that they make and grant Carnegie the
  * rights to redistribute these changes.
  */
 #include "opt_ddb.h"
 #include "opt_kgdb.h"
 #include "opt_modular.h"
 #include "opt_multiprocessor.h"
 #include "opt_dec_3000_300.h"
 #include "opt_dec_3000_500.h"
 #include "opt_execfmt.h"
 #define	__RWLOCK_PRIVATE
 #include <sys/cdefs.h>			/* RCS ID & Copyright macro defns */
-__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.374 2021/07/11 01:58:41 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: machdep.c,v 1.375 2021/07/22 01:39:18 thorpej Exp $");
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/signalvar.h>
 #include <sys/kernel.h>
 #include <sys/cpu.h>
 #include <sys/proc.h>
 #include <sys/ras.h>
 #include <sys/sched.h>
 #include <sys/reboot.h>
 #include <sys/device.h>
 #include <sys/module.h>
 #include <sys/mman.h>
 @@ -185,26 +185,27 @@ uint8_t	dec_3000_scsiid[3], dec_3000_scs
 struct platform platform;
 #if NKSYMS || defined(DDB) || defined(MODULAR)
 /* start and end of kernel symbol table */
 void	*ksym_start, *ksym_end;
 #endif
 /* for cpu_sysctl() */
 int	alpha_unaligned_print = 1;	/* warn about unaligned accesses */
 int	alpha_unaligned_fix = 1;	/* fix up unaligned accesses */
 int	alpha_unaligned_sigbus = 0;	/* don't SIGBUS on fixed-up accesses */
 int	alpha_fp_sync_complete = 0;	/* fp fixup if sync even without /s */
 int	alpha_fp_complete_debug = 0;	/* fp completion debug enabled */
 /*
  * XXX This should be dynamically sized, but we have the chicken-egg problem!
  * XXX it should also be larger than it is, because not all of the mddt
  * XXX clusters end up being used for VM.
  */
 phys_ram_seg_t mem_clusters[VM_PHYSSEG_MAX];	/* low size bits overloaded */
 int	mem_cluster_cnt;
 int	cpu_dump(void);
 int	cpu_dumpsize(void);
 u_long	cpu_dump_mempagecnt(void);
 void	dumpsys(void);
 @@ -1636,26 +1637,31 @@ SYSCTL_SETUP(sysctl_machdep_setup, "sysc
 		       CTLTYPE_INT, "fp_sync_complete", NULL,
 		       NULL, 0, &alpha_fp_sync_complete, 0,
 		       CTL_MACHDEP, CPU_FP_SYNC_COMPLETE, CTL_EOL);
 	sysctl_createv(clog, 0, NULL, NULL,
 		       CTLFLAG_PERMANENT,
 		       CTLTYPE_INT, "cctr", NULL,
 		       NULL, 0, &alpha_use_cctr, 0,
 		       CTL_MACHDEP, CPU_CCTR, CTL_EOL);
 	sysctl_createv(clog, 0, NULL, NULL,
 		       CTLFLAG_PERMANENT,
 		       CTLTYPE_BOOL, "is_qemu", NULL,
 		       NULL, 0, &alpha_is_qemu, 0,
 		       CTL_MACHDEP, CPU_IS_QEMU, CTL_EOL);
 	sysctl_createv(clog, 0, NULL, NULL,
 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
 		       CTLTYPE_INT, "fp_complete_debug", NULL,
 		       NULL, 0, &alpha_fp_complete_debug, 0,
 		       CTL_MACHDEP, CPU_FP_COMPLETE_DEBUG, CTL_EOL);
+}
 /*
  * Set registers on exec.
  */
 void
 setregs(register struct lwp *l, struct exec_package *pack, vaddr_t stack)
+{
 	struct trapframe *tfp = l->l_md.md_tf;
 	struct pcb *pcb;
 #ifdef DEBUG
 	int i;
 #endif
 @@ -1677,28 +1683,30 @@ setregs(register struct lwp *l, struct e
 	pcb = lwp_getpcb(l);
 	memset(&pcb->pcb_fp, 0, sizeof(pcb->pcb_fp));
 	alpha_pal_wrusp(stack);
 	tfp->tf_regs[FRAME_PS] = ALPHA_PSL_USERSET;
 	tfp->tf_regs[FRAME_PC] = pack->ep_entry & ~3;
 	tfp->tf_regs[FRAME_A0] = stack;			/* a0 = sp */
 	tfp->tf_regs[FRAME_A1] = 0;			/* a1 = rtld cleanup */
 	tfp->tf_regs[FRAME_A2] = 0;			/* a2 = rtld object */
 	tfp->tf_regs[FRAME_A3] = l->l_proc->p_psstrp;	/* a3 = ps_strings */
 	tfp->tf_regs[FRAME_T12] = tfp->tf_regs[FRAME_PC];	/* a.k.a. PV */
 	if (__predict_true((l->l_md.md_flags & IEEE_INHERIT) == 0)) {
-		l->l_md.md_flags &= ~MDLWP_FP_C;
+		l->l_md.md_flags =
-		pcb->pcb_fp.fpr_cr = FPCR_DYN(FP_RN);
+		    (l->l_md.md_flags & ~(MDLWP_FP_C | MDLWP_FPACTIVE)) |
 		    FP_C_DEFAULT;
 		pcb->pcb_fp.fpr_cr = FPCR_DEFAULT;
+	}
+}
 void	(*alpha_delay_fn)(unsigned long);
 /*
  * Wait "n" microseconds.
  */
 void
 delay(unsigned long n)
+{
 	unsigned long pcc0, pcc1, curcycle, cycles, usec;

 @@ -1,14 +1,14 @@
-/* $NetBSD: fpu.h,v 1.7 2017/10/17 00:26:35 maya Exp $ */
+/* $NetBSD: fpu.h,v 1.8 2021/07/22 01:39:18 thorpej Exp $ */
 /*-
  * Copyright (c) 2001 Ross Harvey
  * All rights reserved.
+ *
  * This software was written for NetBSD.
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
  *    notice, this list of conditions and the following disclaimer.
  * 2. Redistributions in binary form must reproduce the above copyright
 @@ -28,93 +28,119 @@
  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  * POSSIBILITY OF SUCH DAMAGE.
  */
 #ifndef _ALPHA_FPU_H_
 #define _ALPHA_FPU_H_
 #define	_FP_C_DEF(n) (1UL << (n))
 /*
  * Most of these next definitions were moved from <ieeefp.h>. Apparently the
  * names happen to match those exported by Compaq and Linux from their fpu.h
  * files.
  */
-#define	FPCR_SUM	_FP_C_DEF(63)
+/*
-#define	FPCR_INED	_FP_C_DEF(62)
+ * Bits in the Alpha Floating Point Control register.  This is the hardware
-#define	FPCR_UNFD	_FP_C_DEF(61)
+ * register, and should not be directly manipulated by application software.
-#define	FPCR_UNDZ	_FP_C_DEF(60)
+ */
-#define	FPCR_DYN(rm)	((unsigned long)(rm) << 58)
+#define	FPCR_SUM	__BIT(63)	/* Summary (OR of all exception bits) */
-#define	FPCR_IOV	_FP_C_DEF(57)
+#define	FPCR_INED	__BIT(62)	/* Inexact trap Disable */
-#define	FPCR_INE	_FP_C_DEF(56)
+#define	FPCR_UNFD	__BIT(61)	/* Underflow trap Disable */
-#define	FPCR_UNF	_FP_C_DEF(55)
+#define	FPCR_UNDZ	__BIT(60)	/* Underflow to Zero */
-#define	FPCR_OVF	_FP_C_DEF(54)
+#define	FPCR_DYN_RM	__BITS(58,59)	/* Dynamic Rounding Mode */
-#define	FPCR_DZE	_FP_C_DEF(53)
+					/* 00 Chopped */
-#define	FPCR_INV	_FP_C_DEF(52)
+					/* 01 Minus Infinity */
-#define	FPCR_OVFD	_FP_C_DEF(51)
+					/* 10 Normal (round nearest) */
-#define	FPCR_DZED	_FP_C_DEF(50)
+					/* 11 Plus Infinity */
-#define	FPCR_INVD	_FP_C_DEF(49)
+#define	FPCR_IOV	__BIT(57)	/* Integer Overflow */
-#define	FPCR_DNZ	_FP_C_DEF(48)
+#define	FPCR_INE	__BIT(56)	/* Inexact Result */
-#define	FPCR_DNOD	_FP_C_DEF(47)
+#define	FPCR_UNF	__BIT(55)	/* Underflow */
 #define	FPCR_OVF	__BIT(54)	/* Overflow */
 #define	FPCR_DZE	__BIT(53)	/* Division By Zero */
 #define	FPCR_INV	__BIT(52)	/* Invalid Operation */
 #define	FPCR_OVFD	__BIT(51)	/* Overflow trap Disable */
 #define	FPCR_DZED	__BIT(50)	/* Division By Zero trap Disable */
 #define	FPCR_INVD	__BIT(49)	/* Invalid Operation trap Disable */
 #define	FPCR_DNZ	__BIT(48)	/* Denormal Operands to Zero */
 #define	FPCR_DNOD	__BIT(47)	/* Denormal Operation tap Disable */
 #define	FPCR_MIRRORED (FPCR_INE | FPCR_UNF | FPCR_OVF | FPCR_DZE | FPCR_INV)
 #define FPCR_MIR_START 52
 /* NetBSD default - no traps enabled, round-to-nearest */
 #define	FPCR_DEFAULT	(__SHIFTIN(FP_RN, FPCR_DYN_RM) |		\
 			 FPCR_INED | FPCR_UNFD | FPCR_OVFD |		\
 			 FPCR_DZED | FPCR_INVD | FPCR_DNOD)
 /*
  * IEEE Floating Point Control (FP_C) Quadword.  This is a software
  * virtual register that abstracts the FPCR and software complation
  * performed by the kernel.
+ *
  * The AARM specifies the bit positions of the software word used for
  * user mode interface to the control and status of the kernel completion
  * routines. Although it largely just redefines the FPCR, it shuffles
  * the bit order. The names of the bits are defined in the AARM, and
  * the definition prefix can easily be determined from public domain
  * programs written to either the Compaq or Linux interfaces, which
  * appear to be identical.
+ *
  * Bits 63-48 are reserved for implementation software.
  * Bits 47-23 are reserved for future archiecture definition.
  * Bits 16-12 are reserved for implementation software.
  * Bits 11-7 are reserved for future architecture definition.
  * Bit 0 is reserved for implementation software.
  */
-#define IEEE_STATUS_DNO _FP_C_DEF(22)
+#define	IEEE_STATUS_DNO __BIT(22)	/* Denormal Operand */
-#define IEEE_STATUS_INE _FP_C_DEF(21)
+#define	IEEE_STATUS_INE __BIT(21)	/* Inexact Result */
-#define IEEE_STATUS_UNF _FP_C_DEF(20)
+#define	IEEE_STATUS_UNF __BIT(20)	/* Underflow */
-#define IEEE_STATUS_OVF _FP_C_DEF(19)
+#define	IEEE_STATUS_OVF __BIT(19)	/* Overflow */
-#define IEEE_STATUS_DZE _FP_C_DEF(18)
+#define	IEEE_STATUS_DZE __BIT(18)	/* Division By Zero */
-#define IEEE_STATUS_INV _FP_C_DEF(17)
+#define	IEEE_STATUS_INV __BIT(17)	/* Invalid Operation */
-#define	IEEE_TRAP_ENABLE_DNO _FP_C_DEF(6)
+#define	IEEE_TRAP_ENABLE_DNO __BIT(6)	/* Denormal Operation trap */
-#define	IEEE_TRAP_ENABLE_INE _FP_C_DEF(5)
+#define	IEEE_TRAP_ENABLE_INE __BIT(5)	/* Inexact Result trap */
-#define	IEEE_TRAP_ENABLE_UNF _FP_C_DEF(4)
+#define	IEEE_TRAP_ENABLE_UNF __BIT(4)	/* Underflow trap */
-#define	IEEE_TRAP_ENABLE_OVF _FP_C_DEF(3)
+#define	IEEE_TRAP_ENABLE_OVF __BIT(3)	/* Overflow trap */
-#define	IEEE_TRAP_ENABLE_DZE _FP_C_DEF(2)
+#define	IEEE_TRAP_ENABLE_DZE __BIT(2)	/* Division By Zero trap */
-#define	IEEE_TRAP_ENABLE_INV _FP_C_DEF(1)
+#define	IEEE_TRAP_ENABLE_INV __BIT(1)	/* Invalid Operation trap */
-#define	IEEE_INHERIT _FP_C_DEF(14)
+#define	IEEE_INHERIT __BIT(14)
-#define	IEEE_MAP_UMZ _FP_C_DEF(13)	/* Map underflowed outputs to zero */
+#define	IEEE_MAP_UMZ __BIT(13)		/* Map underflowed outputs to zero */
-#define	IEEE_MAP_DMZ _FP_C_DEF(12)	/* Map denormal inputs to zero */
+#define	IEEE_MAP_DMZ __BIT(12)		/* Map denormal inputs to zero */
-#define FP_C_MIRRORED (IEEE_STATUS_INE | IEEE_STATUS_UNF | IEEE_STATUS_OVF\
+#define	FP_C_ALLBITS	__BITS(1,22)
 				| IEEE_STATUS_DZE | IEEE_STATUS_INV)
 #define	FP_C_MIRRORED	(IEEE_STATUS_INE | IEEE_STATUS_UNF | IEEE_STATUS_OVF \
 			 | IEEE_STATUS_DZE | IEEE_STATUS_INV)
 #define	FP_C_MIR_START 17
 /* NetBSD default - no traps enabled (see FPCR default) */
 #define	FP_C_DEFAULT	0
 #ifdef _KERNEL
 #define	FLD_MASK(len) ((1UL << (len)) - 1)
 #define FLD_CLEAR(obj, origin, len)	\
 		((obj) & ~(FLD_MASK(len) << (origin)))
 #define	FLD_INSERT(obj, origin, len, value)	\
 		(FLD_CLEAR(obj, origin, len) | (value) << origin)
 #define	FP_C_TO_NETBSD_MASK(fp_c) 	((fp_c) >> 1 & 0x3f)
 #define	FP_C_TO_NETBSD_FLAG(fp_c) 	((fp_c) >> 17 & 0x3f)
 #define NETBSD_MASK_TO_FP_C(m)		(((m) & 0x3f) << 1)
 #define NETBSD_FLAG_TO_FP_C(s)		(((s) & 0x3f) << 17)
 #define	CLEAR_FP_C_MASK(fp_c)		((fp_c) & ~(0x3f << 1))
 #define	CLEAR_FP_C_FLAG(fp_c)		((fp_c) & ~(0x3f << 17))
 #define	SET_FP_C_MASK(fp_c, m) (CLEAR_FP_C_MASK(fp_c) | NETBSD_MASK_TO_FP_C(m))
 #define	SET_FP_C_FLAG(fp_c, m) (CLEAR_FP_C_FLAG(fp_c) | NETBSD_FLAG_TO_FP_C(m))
-#endif
+#endif /* _KERNEL */
-#endif
+#endif /* _ALPHA_FPU_H_ */

 @@ -1,14 +1,14 @@
-/* $NetBSD: cpu.h,v 1.102 2021/06/26 15:02:19 skrll Exp $ */
+/* $NetBSD: cpu.h,v 1.103 2021/07/22 01:39:18 thorpej Exp $ */
 /*-
  * Copyright (c) 1998, 1999, 2000, 2001 The NetBSD Foundation, Inc.
  * All rights reserved.
+ *
  * This code is derived from software contributed to The NetBSD Foundation
  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
  * NASA Ames Research Center, and by Charles M. Hannum.
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
  * 1. Redistributions of source code must retain the above copyright
 @@ -222,26 +222,27 @@ void	cpu_signotify(struct lwp *);
 /*
  * CTL_MACHDEP definitions.
  */
 #define	CPU_CONSDEV		1	/* dev_t: console terminal device */
 #define	CPU_ROOT_DEVICE		2	/* string: root device name */
 #define	CPU_UNALIGNED_PRINT	3	/* int: print unaligned accesses */
 #define	CPU_UNALIGNED_FIX	4	/* int: fix unaligned accesses */
 #define	CPU_UNALIGNED_SIGBUS	5	/* int: SIGBUS unaligned accesses */
 #define	CPU_BOOTED_KERNEL	6	/* string: booted kernel name */
 #define	CPU_FP_SYNC_COMPLETE	7	/* int: always fixup sync fp traps */
 #define	CPU_CCTR		8	/* int: using CC timecounter */
 #define	CPU_IS_QEMU		9	/* int: running under Qemu */
 #define	CPU_FP_COMPLETE_DEBUG	10	/* int: enable FP completion debug */
 #ifdef _KERNEL
 struct pcb;
 struct proc;
 struct reg;
 struct rpb;
 struct trapframe;
 int	badaddr(void *, size_t);
 void *	cpu_uarea_alloc(bool);
 bool	cpu_uarea_free(void *);