Mon Jul 12 15:21:51 2021 UTC ()
Optimized fast-paths for mutex_spin_enter() and mutex_spin_exit().


(thorpej)
diff -r1.29 -r1.30 src/sys/arch/alpha/alpha/genassym.cf
diff -r1.5 -r1.6 src/sys/arch/alpha/alpha/lock_stubs.s
diff -r1.8 -r1.9 src/sys/arch/alpha/include/mutex.h

cvs diff -r1.29 -r1.30 src/sys/arch/alpha/alpha/genassym.cf (expand / switch to context diff)
--- src/sys/arch/alpha/alpha/genassym.cf 2021/07/11 01:58:41 1.29
+++ src/sys/arch/alpha/alpha/genassym.cf 2021/07/12 15:21:51 1.30
@@ -1,4 +1,4 @@
-# $NetBSD: genassym.cf,v 1.29 2021/07/11 01:58:41 thorpej Exp $
+# $NetBSD: genassym.cf,v 1.30 2021/07/12 15:21:51 thorpej Exp $
 
 #
 # Copyright (c) 1982, 1990, 1993
@@ -67,6 +67,7 @@
 #
 
 quote #define __RWLOCK_PRIVATE
+quote #define __MUTEX_PRIVATE
 
 include <sys/param.h>
 include <sys/buf.h>
@@ -75,6 +76,7 @@
 include <sys/mbuf.h>
 include <sys/msgbuf.h>
 include <sys/rwlock.h>
+include <sys/mutex.h>
 include <sys/syscall.h>
 
 include <machine/cpu.h>
@@ -198,6 +200,7 @@
 define	CPU_INFO_IDLE_LWP	offsetof(struct cpu_info, ci_data.cpu_idlelwp)
 define	CPU_INFO_SSIR		offsetof(struct cpu_info, ci_ssir)
 define	CPU_INFO_MTX_COUNT	offsetof(struct cpu_info, ci_mtx_count)
+define	CPU_INFO_MTX_OLDSPL	offsetof(struct cpu_info, ci_mtx_oldspl)
 define	CPU_INFO_SIZEOF		sizeof(struct cpu_info)
 
 # Bits in lock fields
@@ -205,3 +208,6 @@
 define	RW_WRITE_LOCKED		RW_WRITE_LOCKED
 define	RW_READ_INCR		RW_READ_INCR
 define	RW_READ_COUNT_SHIFT	RW_READ_COUNT_SHIFT
+define	MUTEX_IPL		offsetof(struct kmutex, mtx_ipl)
+define	MUTEX_SIMPLELOCK	offsetof(struct kmutex, mtx_lock)
+define	__SIMPLELOCK_LOCKED	__SIMPLELOCK_LOCKED

cvs diff -r1.5 -r1.6 src/sys/arch/alpha/alpha/lock_stubs.s (expand / switch to context diff)
--- src/sys/arch/alpha/alpha/lock_stubs.s 2021/07/11 01:58:41 1.5
+++ src/sys/arch/alpha/alpha/lock_stubs.s 2021/07/12 15:21:51 1.6
@@ -1,4 +1,4 @@
-/*	$NetBSD: lock_stubs.s,v 1.5 2021/07/11 01:58:41 thorpej Exp $	*/
+/*	$NetBSD: lock_stubs.s,v 1.6 2021/07/12 15:21:51 thorpej Exp $	*/
 
 /*-
  * Copyright (c) 2007, 2021 The NetBSD Foundation, Inc.
@@ -34,7 +34,7 @@
 
 #include <machine/asm.h>
 
-__KERNEL_RCSID(0, "$NetBSD: lock_stubs.s,v 1.5 2021/07/11 01:58:41 thorpej Exp $");
+__KERNEL_RCSID(0, "$NetBSD: lock_stubs.s,v 1.6 2021/07/12 15:21:51 thorpej Exp $");
 
 #include "assym.h"
 
@@ -109,6 +109,120 @@
 3:
 	br	1b
 	END(mutex_exit)
+
+/*
+ * void mutex_spin_enter(kmutex_t *mtx);
+ */
+LEAF(mutex_spin_enter, 1);
+	LDGP(pv)
+
+	/*
+	 * STEP 1: Perform the MUTEX_SPIN_SPLRAISE() function.
+	 * (see sys/kern/kern_mutex.c)
+	 *
+	 *	s = splraise(mtx->mtx_ipl);
+	 *	if (curcpu->ci_mtx_count-- == 0)
+	 *		curcpu->ci_mtx_oldspl = s;
+	 */
+
+	call_pal PAL_OSF1_rdps		/* clobbers v0, t0, t8..t11 */
+					/* v0 = cur_ipl */
+#ifdef __BWX__
+	mov	a0, a1			/* a1 = mtx */
+	ldbu	a0, MUTEX_IPL(a0)	/* a0 = new_ipl */
+	mov	v0, a4			/* save cur_ipl in a4 */
+#else
+	mov	a0, a1			/* a1 = mtx */
+	ldq_u	a2, MUTEX_IPL(a0)
+	mov	v0, a4			/* save cur_ipl in a4 */
+	extbl	a2, MUTEX_IPL, a0	/* a0 = new_ipl */
+#endif /* __BWX__ */
+	cmplt	v0, a0, a3		/* a3 = (cur_ipl < new_ipl) */
+	GET_CURLWP	/* Note: GET_CURLWP clobbers v0, t0, t8...t11. */
+	mov	v0, a5			/* save curlwp in a5 */
+	/*
+	 * The forward-branch over the SWPIPL call is correctly predicted
+	 * not-taken by the CPU because it's rare for a code path to acquire
+	 * 2 spin mutexes.
+	 */
+	beq	a3, 1f			/*      no? -> skip... */
+	call_pal PAL_OSF1_swpipl	/* clobbers v0, t0, t8..t11 */
+	/*
+	 * v0 returns the old_ipl, which will be the same as the
+	 * cur_ipl we squirreled away in a4 earlier.
+	 */
+1:
+	/*
+	 * curlwp->l_cpu is now stable.  Update the counter and
+	 * stash the old_ipl.  Just in case it's not clear what's
+	 * going on, we:
+	 *
+	 *	- Load previous value of mtx_oldspl into t1.
+	 *	- Conditionally move old_ipl into t1 if mtx_count == 0.
+	 *	- Store t1 back to mtx_oldspl; if mtx_count != 0,
+	 *	  the store is redundant, but it's faster than a forward
+	 *	  branch.
+	 */
+	ldq	a3, L_CPU(a5)		/* a3 = curlwp->l_cpu (curcpu) */
+	ldl	t0, CPU_INFO_MTX_COUNT(a3)
+	ldl	t1, CPU_INFO_MTX_OLDSPL(a3)
+	cmoveq	t0, a4, t1		/* mtx_count == 0? -> t1 = old_ipl */
+	subl	t0, 1, t2		/* mtx_count-- */
+	stl	t1, CPU_INFO_MTX_OLDSPL(a3)
+	stl	t2, CPU_INFO_MTX_COUNT(a3)
+
+	/*
+	 * STEP 2: __cpu_simple_lock_try(&mtx->mtx_lock)
+	 */
+	ldl_l	t0, MUTEX_SIMPLELOCK(a1)
+	ldiq	t1, __SIMPLELOCK_LOCKED
+	bne	t0, 2f			/* contended */
+	stl_c	t1, MUTEX_SIMPLELOCK(a1)
+	beq	t1, 2f			/* STL_C failed; consider contended */
+	MB
+	RET
+2:
+	mov	a1, a0			/* restore first argument */
+	lda	pv, mutex_spin_retry
+	jmp	(pv)
+	END(mutex_spin_enter)
+
+/*
+ * void mutex_spin_exit(kmutex_t *mtx);
+ */
+LEAF(mutex_spin_exit, 1)
+	LDGP(pv);
+	MB
+
+	/*
+	 * STEP 1: __cpu_simple_unlock(&mtx->mtx_lock)
+	 */
+	stl	zero, MUTEX_SIMPLELOCK(a0)
+
+	/*
+	 * STEP 2: Perform the MUTEX_SPIN_SPLRESTORE() function.
+	 * (see sys/kern/kern_mutex.c)
+	 *
+	 *	s = curcpu->ci_mtx_oldspl;
+	 *	if (++curcpu->ci_mtx_count == 0)
+	 *		splx(s);
+	 */
+	GET_CURLWP	/* Note: GET_CURLWP clobbers v0, t0, t8...t11. */
+	ldq	a3, L_CPU(v0)		/* a3 = curlwp->l_cpu (curcpu) */
+	ldl	t0, CPU_INFO_MTX_COUNT(a3)
+	ldl	a0, CPU_INFO_MTX_OLDSPL(a3)
+	addl	t0, 1, t2		/* mtx_count++ */
+	stl	t2, CPU_INFO_MTX_COUNT(a3)
+	/*
+	 * The forward-branch over the SWPIPL call is correctly predicted
+	 * not-taken by the CPU because it's rare for a code path to acquire
+	 * 2 spin mutexes.
+	 */
+	bne	t2, 1f			/* t2 != 0? Skip... */
+	call_pal PAL_OSF1_swpipl	/* clobbers v0, t0, t8..t11 */
+1:
+	RET
+	END(mutex_spin_exit)
 
 /*
  * void rw_enter(krwlock_t *rwl, krw_t op);

cvs diff -r1.8 -r1.9 src/sys/arch/alpha/include/mutex.h (expand / switch to context diff)
--- src/sys/arch/alpha/include/mutex.h 2020/09/23 00:52:49 1.8
+++ src/sys/arch/alpha/include/mutex.h 2021/07/12 15:21:51 1.9
@@ -1,4 +1,4 @@
-/*	$NetBSD: mutex.h,v 1.8 2020/09/23 00:52:49 thorpej Exp $	*/
+/*	$NetBSD: mutex.h,v 1.9 2021/07/12 15:21:51 thorpej Exp $	*/
 
 /*-
  * Copyright (c) 2002, 2006, 2007 The NetBSD Foundation, Inc.
@@ -59,6 +59,7 @@
 
 #define	__HAVE_SIMPLE_MUTEXES		1
 #define	__HAVE_MUTEX_STUBS		1
+#define	__HAVE_SPIN_MUTEX_STUBS		1
 
 #define	MUTEX_CAS(p, o, n)		_lock_cas((p), (o), (n))