Sat Sep 15 17:15:01 2012 UTC ()
In the "interlock" case (where the scheduler lock is used as the condvar
lock), we need to take the CPU interlock before releasing the CPU.
Otherwise other threads can be scheduled before we get the interlock,
leading to e.g. missed condvar wakeups.  This affected only "locks_up.c"
locking (nomen est omen?).

Also, remove various __predicts since they don't have a positive
performance impact in any setup.


(pooka)
diff -r1.28 -r1.29 src/sys/rump/librump/rumpkern/scheduler.c

cvs diff -r1.28 -r1.29 src/sys/rump/librump/rumpkern/scheduler.c (expand / switch to context diff)
--- src/sys/rump/librump/rumpkern/scheduler.c 2012/06/22 12:45:43 1.28
+++ src/sys/rump/librump/rumpkern/scheduler.c 2012/09/15 17:15:01 1.29
@@ -1,4 +1,4 @@
-/*      $NetBSD: scheduler.c,v 1.28 2012/06/22 12:45:43 rmind Exp $	*/
+/*      $NetBSD: scheduler.c,v 1.29 2012/09/15 17:15:01 pooka Exp $	*/
 
 /*
  * Copyright (c) 2010, 2011 Antti Kantee.  All Rights Reserved.
@@ -26,7 +26,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: scheduler.c,v 1.28 2012/06/22 12:45:43 rmind Exp $");
+__KERNEL_RCSID(0, "$NetBSD: scheduler.c,v 1.29 2012/09/15 17:15:01 pooka Exp $");
 
 #include <sys/param.h>
 #include <sys/atomic.h>
@@ -300,7 +300,7 @@
 	KASSERT(l->l_target_cpu != NULL);
 	rcpu = &rcpu_storage[l->l_target_cpu-&rump_cpus[0]];
 	if (atomic_cas_ptr(&rcpu->rcpu_prevlwp, l, RCPULWP_BUSY) == l) {
-		if (__predict_true(interlock == rcpu->rcpu_mtx))
+		if (interlock == rcpu->rcpu_mtx)
 			rumpuser_mutex_exit(rcpu->rcpu_mtx);
 		SCHED_FASTPATH(rcpu);
 		/* jones, you're the man */
@@ -317,7 +317,7 @@
 		domigrate = true;
 
 	/* Take lock.  This acts as a load barrier too. */
-	if (__predict_true(interlock != rcpu->rcpu_mtx))
+	if (interlock != rcpu->rcpu_mtx)
 		rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx);
 
 	for (;;) {
@@ -442,17 +442,23 @@
 	 * is relevant only in the non-fastpath scheduling case, but
 	 * we don't know here if that's going to happen, so need to
 	 * expect the worst.
+	 *
+	 * If the scheduler interlock was requested by the caller, we
+	 * need to obtain it before we release the CPU.  Otherwise, we risk a
+	 * race condition where another thread is scheduled onto the
+	 * rump kernel CPU before our current thread can
+	 * grab the interlock.
 	 */
-	membar_exit();
+	if (interlock == rcpu->rcpu_mtx)
+		rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx);
+	else
+		membar_exit();
 
 	/* Release the CPU. */
 	old = atomic_swap_ptr(&rcpu->rcpu_prevlwp, l);
 
 	/* No waiters?  No problems.  We're outta here. */
 	if (old == RCPULWP_BUSY) {
-		/* Was the scheduler interlock requested? */
-		if (__predict_false(interlock == rcpu->rcpu_mtx))
-			rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx);
 		return;
 	}
 
@@ -464,11 +470,11 @@
 	 * Snailpath: take lock and signal anyone waiting for this CPU.
 	 */
 
-	rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx);
+	if (interlock != rcpu->rcpu_mtx)
+		rumpuser_mutex_enter_nowrap(rcpu->rcpu_mtx);
 	if (rcpu->rcpu_wanted)
 		rumpuser_cv_broadcast(rcpu->rcpu_cv);
-
-	if (__predict_true(interlock != rcpu->rcpu_mtx))
+	if (interlock != rcpu->rcpu_mtx)
 		rumpuser_mutex_exit(rcpu->rcpu_mtx);
 }