Tue Aug 21 18:11:10 2018 UTC ()
avoid race condition between I/O submission in xbd_diskstart() and
interrupt handling in xbd_handler() - need to protect it with splbio()

fixes PR port-xen/53506 by Emmanuel Dreyfus, and likely also port-xen/53074
by Brad Spencer


(jdolecek)
diff -r1.82 -r1.83 src/sys/arch/xen/xen/xbd_xenbus.c

cvs diff -r1.82 -r1.83 src/sys/arch/xen/xen/xbd_xenbus.c (expand / switch to unified diff)

--- src/sys/arch/xen/xen/xbd_xenbus.c 2018/08/15 15:49:15 1.82
+++ src/sys/arch/xen/xen/xbd_xenbus.c 2018/08/21 18:11:10 1.83
@@ -1,14 +1,14 @@ @@ -1,14 +1,14 @@
1/* $NetBSD: xbd_xenbus.c,v 1.82 2018/08/15 15:49:15 jdolecek Exp $ */ 1/* $NetBSD: xbd_xenbus.c,v 1.83 2018/08/21 18:11:10 jdolecek Exp $ */
2 2
3/* 3/*
4 * Copyright (c) 2006 Manuel Bouyer. 4 * Copyright (c) 2006 Manuel Bouyer.
5 * 5 *
6 * Redistribution and use in source and binary forms, with or without 6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions 7 * modification, are permitted provided that the following conditions
8 * are met: 8 * are met:
9 * 1. Redistributions of source code must retain the above copyright 9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer. 10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright 11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the 12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution. 13 * documentation and/or other materials provided with the distribution.
14 * 14 *
@@ -40,27 +40,27 @@ @@ -40,27 +40,27 @@
40 * - initiate request: xbdread/write/open/ioctl/.. 40 * - initiate request: xbdread/write/open/ioctl/..
41 * - depending on operation, it is handled directly by disk(9) subsystem or 41 * - depending on operation, it is handled directly by disk(9) subsystem or
42 * goes through physio(9) first. 42 * goes through physio(9) first.
43 * - the request is ultimately processed by xbd_diskstart() that prepares the 43 * - the request is ultimately processed by xbd_diskstart() that prepares the
44 * xbd requests, post them in the ring I/O queue, then signal the backend. 44 * xbd requests, post them in the ring I/O queue, then signal the backend.
45 * 45 *
46 * When a response is available in the queue, the backend signals the frontend 46 * When a response is available in the queue, the backend signals the frontend
47 * via its event channel. This triggers xbd_handler(), which will link back 47 * via its event channel. This triggers xbd_handler(), which will link back
48 * the response to its request through the request ID, and mark the I/O as 48 * the response to its request through the request ID, and mark the I/O as
49 * completed. 49 * completed.
50 */ 50 */
51 51
52#include <sys/cdefs.h> 52#include <sys/cdefs.h>
53__KERNEL_RCSID(0, "$NetBSD: xbd_xenbus.c,v 1.82 2018/08/15 15:49:15 jdolecek Exp $"); 53__KERNEL_RCSID(0, "$NetBSD: xbd_xenbus.c,v 1.83 2018/08/21 18:11:10 jdolecek Exp $");
54 54
55#include "opt_xen.h" 55#include "opt_xen.h"
56 56
57 57
58#include <sys/param.h> 58#include <sys/param.h>
59#include <sys/buf.h> 59#include <sys/buf.h>
60#include <sys/bufq.h> 60#include <sys/bufq.h>
61#include <sys/device.h> 61#include <sys/device.h>
62#include <sys/disk.h> 62#include <sys/disk.h>
63#include <sys/disklabel.h> 63#include <sys/disklabel.h>
64#include <sys/conf.h> 64#include <sys/conf.h>
65#include <sys/fcntl.h> 65#include <sys/fcntl.h>
66#include <sys/kernel.h> 66#include <sys/kernel.h>
@@ -922,41 +922,44 @@ xbddump(dev_t dev, daddr_t blkno, void * @@ -922,41 +922,44 @@ xbddump(dev_t dev, daddr_t blkno, void *
922} 922}
923 923
924static int 924static int
925xbd_diskstart(device_t self, struct buf *bp) 925xbd_diskstart(device_t self, struct buf *bp)
926{ 926{
927 struct xbd_xenbus_softc *sc = device_private(self); 927 struct xbd_xenbus_softc *sc = device_private(self);
928 struct xbd_req *xbdreq; 928 struct xbd_req *xbdreq;
929 blkif_request_t *req; 929 blkif_request_t *req;
930 size_t bcount, off; 930 size_t bcount, off;
931 paddr_t ma; 931 paddr_t ma;
932 vaddr_t va; 932 vaddr_t va;
933 int nsects, nbytes, seg; 933 int nsects, nbytes, seg;
934 int notify, error = 0; 934 int notify, error = 0;
 935 int s;
935 936
936 DPRINTF(("xbd_diskstart(%p): b_bcount = %ld\n", 937 DPRINTF(("xbd_diskstart(%p): b_bcount = %ld\n",
937 bp, (long)bp->b_bcount)); 938 bp, (long)bp->b_bcount));
938 939
939 if (sc->sc_shutdown != BLKIF_SHUTDOWN_RUN) { 940 if (sc->sc_shutdown != BLKIF_SHUTDOWN_RUN) {
940 error = EIO; 941 error = EIO;
941 goto err; 942 goto err;
942 } 943 }
943 944
944 if (bp->b_rawblkno < 0 || bp->b_rawblkno > sc->sc_xbdsize) { 945 if (bp->b_rawblkno < 0 || bp->b_rawblkno > sc->sc_xbdsize) {
945 /* invalid block number */ 946 /* invalid block number */
946 error = EINVAL; 947 error = EINVAL;
947 goto err; 948 goto err;
948 } 949 }
949 950
 951 s = splbio(); /* XXX SMP */
 952
950 if (__predict_false( 953 if (__predict_false(
951 sc->sc_backend_status == BLKIF_STATE_SUSPENDED)) { 954 sc->sc_backend_status == BLKIF_STATE_SUSPENDED)) {
952 /* device is suspended, do not consume buffer */ 955 /* device is suspended, do not consume buffer */
953 DPRINTF(("%s: (xbd_diskstart) device suspended\n", 956 DPRINTF(("%s: (xbd_diskstart) device suspended\n",
954 sc->sc_dksc.sc_xname)); 957 sc->sc_dksc.sc_xname));
955 error = EAGAIN; 958 error = EAGAIN;
956 goto out; 959 goto out;
957 } 960 }
958 961
959 if (RING_FULL(&sc->sc_ring) || sc->sc_xbdreq_wait) { 962 if (RING_FULL(&sc->sc_ring) || sc->sc_xbdreq_wait) {
960 DPRINTF(("xbd_diskstart: ring_full\n")); 963 DPRINTF(("xbd_diskstart: ring_full\n"));
961 error = EAGAIN; 964 error = EAGAIN;
962 goto out; 965 goto out;
@@ -1014,26 +1017,27 @@ xbd_diskstart(device_t self, struct buf  @@ -1014,26 +1017,27 @@ xbd_diskstart(device_t self, struct buf
1014 seg++; 1017 seg++;
1015 KASSERT(seg <= BLKIF_MAX_SEGMENTS_PER_REQUEST); 1018 KASSERT(seg <= BLKIF_MAX_SEGMENTS_PER_REQUEST);
1016 va += PAGE_SIZE; 1019 va += PAGE_SIZE;
1017 off = 0; 1020 off = 0;
1018 bcount -= nbytes; 1021 bcount -= nbytes;
1019 } 1022 }
1020 xbdreq->req_nr_segments = req->nr_segments = seg; 1023 xbdreq->req_nr_segments = req->nr_segments = seg;
1021 sc->sc_ring.req_prod_pvt++; 1024 sc->sc_ring.req_prod_pvt++;
1022 1025
1023out: 1026out:
1024 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_ring, notify); 1027 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_ring, notify);
1025 if (notify) 1028 if (notify)
1026 hypervisor_notify_via_evtchn(sc->sc_evtchn); 1029 hypervisor_notify_via_evtchn(sc->sc_evtchn);
 1030 splx(s); /* XXXSMP */
1027err: 1031err:
1028 return error; 1032 return error;
1029} 1033}
1030 1034
1031static int 1035static int
1032xbd_map_align(struct xbd_req *req) 1036xbd_map_align(struct xbd_req *req)
1033{ 1037{
1034 int s = splvm(); /* XXXSMP - bogus? */ 1038 int s = splvm(); /* XXXSMP - bogus? */
1035 int rc; 1039 int rc;
1036 1040
1037 rc = uvm_km_kmem_alloc(kmem_va_arena, 1041 rc = uvm_km_kmem_alloc(kmem_va_arena,
1038 req->req_bp->b_bcount, (VM_NOSLEEP | VM_INSTANTFIT), 1042 req->req_bp->b_bcount, (VM_NOSLEEP | VM_INSTANTFIT),
1039 (vmem_addr_t *)&req->req_data); 1043 (vmem_addr_t *)&req->req_data);