Sat Feb 3 12:39:17 2024 UTC (119d)
Pull up following revision(s) (requested by riastradh in ticket #566):

	sys/dev/sdmmc/ld_sdmmc.c: revision 1.43

ld@sdmmc(4): Hack around deadlock in cache sync on detach.

Yanking a card triggers the sdmmc discovery task, which runs in the
sdmmc task thread, to detach any attached child devices.

Detaching ld@sdmmc triggers a cache flush (via ldbegindetach ->
disk_begindetach -> ld_lastclose -> ld_flush -> ioctl DIOCCACHESYNC),
which is implemented by scheduling a task to do sdmmc_mem_flush_cache
and then waiting for it to complete.

The sdmmc_mem_cache_flush is done by an sdmmc task so it happens
after all previously scheduled I/O operations -- that way the cache
flush doesn't complete until the previously scheduled I/O operations
are complete.

However, when the cache flush task is issued from the discovery task,
this doesn't work, because the cache flush task can't start until the
discovery task has returned -- but the discovery task won't return
until the cache flush task has completed.

To work around this deadlock, which usually happens only when the
device has been yanked anyway so further I/O would be lost anyway,
just do the cache flush synchronously in DIOCCACHESYNC if we're
running in the task thread.

This isn't quite right -- implementation details of the task thread
shouldn't bleed into ld@sdmmc, and running the cache sync _before_
any subsequently scheduled I/O tasks is asking for trouble -- but it
should serve to avoid the deadlock in PR kern/57870 until we can fix
a host of concurrency bugs in sdmmc by fixing the locking scheme and
running discovery in a separate thread from tasks.


(martin)
diff -r1.42 -r1.42.4.1 src/sys/dev/sdmmc/ld_sdmmc.c

cvs diff -r1.42 -r1.42.4.1 src/sys/dev/sdmmc/ld_sdmmc.c (expand / switch to unified diff)

--- src/sys/dev/sdmmc/ld_sdmmc.c 2022/05/16 10:03:23 1.42
+++ src/sys/dev/sdmmc/ld_sdmmc.c 2024/02/03 12:39:17 1.42.4.1
@@ -1,14 +1,14 @@ @@ -1,14 +1,14 @@
1/* $NetBSD: ld_sdmmc.c,v 1.42 2022/05/16 10:03:23 jmcneill Exp $ */ 1/* $NetBSD: ld_sdmmc.c,v 1.42.4.1 2024/02/03 12:39:17 martin Exp $ */
2 2
3/* 3/*
4 * Copyright (c) 2008 KIYOHARA Takashi 4 * Copyright (c) 2008 KIYOHARA Takashi
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * Redistribution and use in source and binary forms, with or without 7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions 8 * modification, are permitted provided that the following conditions
9 * are met: 9 * are met:
10 * 1. Redistributions of source code must retain the above copyright 10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer. 11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright 12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the 13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution. 14 * documentation and/or other materials provided with the distribution.
@@ -18,27 +18,27 @@ @@ -18,27 +18,27 @@
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, 19 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
24 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 24 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
25 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 25 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE. 26 * POSSIBILITY OF SUCH DAMAGE.
27 * 27 *
28 */ 28 */
29 29
30#include <sys/cdefs.h> 30#include <sys/cdefs.h>
31__KERNEL_RCSID(0, "$NetBSD: ld_sdmmc.c,v 1.42 2022/05/16 10:03:23 jmcneill Exp $"); 31__KERNEL_RCSID(0, "$NetBSD: ld_sdmmc.c,v 1.42.4.1 2024/02/03 12:39:17 martin Exp $");
32 32
33#ifdef _KERNEL_OPT 33#ifdef _KERNEL_OPT
34#include "opt_sdmmc.h" 34#include "opt_sdmmc.h"
35#endif 35#endif
36 36
37#include <sys/param.h> 37#include <sys/param.h>
38#include <sys/types.h> 38#include <sys/types.h>
39 39
40#include <sys/buf.h> 40#include <sys/buf.h>
41#include <sys/bufq.h> 41#include <sys/bufq.h>
42#include <sys/bus.h> 42#include <sys/bus.h>
43#include <sys/device.h> 43#include <sys/device.h>
44#include <sys/disk.h> 44#include <sys/disk.h>
@@ -589,29 +589,44 @@ ld_sdmmc_docachesync(void *arg) @@ -589,29 +589,44 @@ ld_sdmmc_docachesync(void *arg)
589 *task->task_errorp = error; 589 *task->task_errorp = error;
590 cv_broadcast(&sc->sc_cv); 590 cv_broadcast(&sc->sc_cv);
591 591
592 /* Release the task. */ 592 /* Release the task. */
593 ld_sdmmc_task_put(sc, task); 593 ld_sdmmc_task_put(sc, task);
594 594
595 mutex_exit(&sc->sc_lock); 595 mutex_exit(&sc->sc_lock);
596} 596}
597 597
598static int 598static int
599ld_sdmmc_cachesync(struct ld_softc *ld, bool poll) 599ld_sdmmc_cachesync(struct ld_softc *ld, bool poll)
600{ 600{
601 struct ld_sdmmc_softc *sc = device_private(ld->sc_dv); 601 struct ld_sdmmc_softc *sc = device_private(ld->sc_dv);
 602 struct sdmmc_softc *sdmmc = device_private(device_parent(ld->sc_dv));
602 struct ld_sdmmc_task *task; 603 struct ld_sdmmc_task *task;
603 int error = -1; 604 int error = -1;
604 605
 606 /*
 607 * If we come here through the sdmmc discovery task, we can't
 608 * wait for a new task because the new task can't even begin
 609 * until the sdmmc discovery task has completed.
 610 *
 611 * XXX This is wrong, because there may already be queued I/O
 612 * tasks ahead of us. Fixing this properly requires doing
 613 * discovery in a separate thread. But this should avoid the
 614 * deadlock of PR kern/57870 (https://gnats.NetBSD.org/57870)
 615 * until we do split that up.
 616 */
 617 if (curlwp == sdmmc->sc_tskq_lwp)
 618 return sdmmc_mem_flush_cache(sc->sc_sf, poll);
 619
605 mutex_enter(&sc->sc_lock); 620 mutex_enter(&sc->sc_lock);
606 621
607 /* Acquire a free task, or fail with EBUSY. */ 622 /* Acquire a free task, or fail with EBUSY. */
608 if ((task = ld_sdmmc_task_get(sc)) == NULL) { 623 if ((task = ld_sdmmc_task_get(sc)) == NULL) {
609 sc->sc_ev_cachesyncbusy.ev_count++; 624 sc->sc_ev_cachesyncbusy.ev_count++;
610 error = EBUSY; 625 error = EBUSY;
611 goto out; 626 goto out;
612 } 627 }
613 628
614 /* Set up the task and schedule it. */ 629 /* Set up the task and schedule it. */
615 task->task_poll = poll; 630 task->task_poll = poll;
616 task->task_errorp = &error; 631 task->task_errorp = &error;
617 sdmmc_init_task(&task->task, ld_sdmmc_docachesync, task); 632 sdmmc_init_task(&task->task, ld_sdmmc_docachesync, task);