Sat Apr 9 12:06:39 2022 UTC ()
sparc: Fix membar_sync with LDSTUB.

membar_sync is required to be a full sequential consistency barrier,
equivalent to MEMBAR #StoreStore|LoadStore|StoreLoad|LoadLoad on
sparcv9.  LDSTUB and SWAP are the only pre-v9 instructions that do
this and SWAP doesn't exist on all v7 hardware, so use LDSTUB.

Note: I'm having a hard time nailing down a reference for the
ordering implied by LDSTUB and SWAP.  I'm _pretty sure_ SWAP has to
imply store-load ordering since the SPARCv8 manual recommends it for
Dekker's algorithm (which notoriously requires store-load ordering),
and the formal memory model treats LDSTUB and SWAP the same for
ordering.  But the v8 and v9 manuals aren't clear.

GCC issues STBAR and LDSTUB, but (a) I don't see why STBAR is
necessary here, (b) STBAR doesn't exist on v7 so it'd be a pain to
use, and (c) from what I've heard (although again it's hard to nail
down authoritative references here) all actual SPARC hardware is TSO
or SC anyway so STBAR is a noop in all the silicon anyway.

Either way, certainly this is better than what we had before, which
was nothing implying ordering at all, just a store!


(riastradh)
diff -r1.5 -r1.6 src/common/lib/libc/arch/sparc/atomic/membar_ops.S

cvs diff -r1.5 -r1.6 src/common/lib/libc/arch/sparc/atomic/membar_ops.S (expand / switch to context diff)
--- src/common/lib/libc/arch/sparc/atomic/membar_ops.S 2022/04/06 22:47:56 1.5
+++ src/common/lib/libc/arch/sparc/atomic/membar_ops.S 2022/04/09 12:06:39 1.6
@@ -1,4 +1,4 @@
-/*	$NetBSD: membar_ops.S,v 1.5 2022/04/06 22:47:56 riastradh Exp $	*/
+/*	$NetBSD: membar_ops.S,v 1.6 2022/04/09 12:06:39 riastradh Exp $	*/
 
 /*-
  * Copyright (c) 2007 The NetBSD Foundation, Inc.
@@ -31,25 +31,43 @@
 
 #include "atomic_op_asm.h"
 
+#ifdef _KERNEL_OPT
+#include "opt_multiprocessor.h"
+#endif
+
 	.text
 
-/* These assume Total Store Order (TSO) */
+/*
+ * These assume Total Store Order (TSO), which may reorder
+ * store-before-load but nothing else.  Hence, only membar_sync must
+ * issue anything -- specifically, an LDSTUB, which (along with SWAP)
+ * is the only instruction that implies a sequential consistency
+ * barrier.
+ *
+ * If we ran with Partial Store Order (PSO), we would also need to
+ * issue STBAR for membar_exit (load/store-before-store) and
+ * membar_producer (store-before-store).
+ */
 
-ENTRY(_membar_producer)
+ENTRY(_membar_consumer)
 	retl
 	 nop
+END(_membar_consumer)
 
-ENTRY(_membar_consumer)
-	add	%sp, -112, %sp
-	st	%g0, [%sp+100]
+ENTRY(_membar_sync)
 	retl
-	 sub	%sp, -112, %sp
+#if !defined(_KERNEL) || defined(MULTIPROCESSOR)
+	 ldstub	[%sp - 4], %g0
+#else
+	 nop
+#endif
+END(_membar_sync)
 
-ATOMIC_OP_ALIAS(membar_producer,_membar_producer)
+ATOMIC_OP_ALIAS(membar_producer,_membar_consumer)
+STRONG_ALIAS(_membar_producer,_membar_consumer)
 ATOMIC_OP_ALIAS(membar_consumer,_membar_consumer)
 ATOMIC_OP_ALIAS(membar_enter,_membar_consumer)
 STRONG_ALIAS(_membar_enter,_membar_consumer)
 ATOMIC_OP_ALIAS(membar_exit,_membar_consumer)
 STRONG_ALIAS(_membar_exit,_membar_consumer)
-ATOMIC_OP_ALIAS(membar_sync,_membar_consumer)
+ATOMIC_OP_ALIAS(membar_sync,_membar_sync)
-STRONG_ALIAS(_membar_sync,_membar_consumer)