Thu Apr 23 09:16:21 2020 UTC ()
make xbdback actually MPSAFE and stop using KERNEL_LOCK()

remove no longer necessary atomics, the counters are now always
updated with held mutex


(jdolecek)
diff -r1.89 -r1.90 src/sys/arch/xen/xen/xbdback_xenbus.c

cvs diff -r1.89 -r1.90 src/sys/arch/xen/xen/xbdback_xenbus.c (expand / switch to unified diff)

--- src/sys/arch/xen/xen/xbdback_xenbus.c 2020/04/23 08:09:25 1.89
+++ src/sys/arch/xen/xen/xbdback_xenbus.c 2020/04/23 09:16:21 1.90
@@ -1,14 +1,14 @@ @@ -1,14 +1,14 @@
1/* $NetBSD: xbdback_xenbus.c,v 1.89 2020/04/23 08:09:25 jdolecek Exp $ */ 1/* $NetBSD: xbdback_xenbus.c,v 1.90 2020/04/23 09:16:21 jdolecek Exp $ */
2 2
3/* 3/*
4 * Copyright (c) 2006 Manuel Bouyer. 4 * Copyright (c) 2006 Manuel Bouyer.
5 * 5 *
6 * Redistribution and use in source and binary forms, with or without 6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions 7 * modification, are permitted provided that the following conditions
8 * are met: 8 * are met:
9 * 1. Redistributions of source code must retain the above copyright 9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer. 10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright 11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the 12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution. 13 * documentation and/or other materials provided with the distribution.
14 * 14 *
@@ -16,29 +16,28 @@ @@ -16,29 +16,28 @@
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 * 25 *
26 */ 26 */
27 27
28#include <sys/cdefs.h> 28#include <sys/cdefs.h>
29__KERNEL_RCSID(0, "$NetBSD: xbdback_xenbus.c,v 1.89 2020/04/23 08:09:25 jdolecek Exp $"); 29__KERNEL_RCSID(0, "$NetBSD: xbdback_xenbus.c,v 1.90 2020/04/23 09:16:21 jdolecek Exp $");
30 30
31#include <sys/atomic.h> 
32#include <sys/buf.h> 31#include <sys/buf.h>
33#include <sys/condvar.h> 32#include <sys/condvar.h>
34#include <sys/conf.h> 33#include <sys/conf.h>
35#include <sys/disk.h> 34#include <sys/disk.h>
36#include <sys/device.h> 35#include <sys/device.h>
37#include <sys/fcntl.h> 36#include <sys/fcntl.h>
38#include <sys/kauth.h> 37#include <sys/kauth.h>
39#include <sys/kernel.h> 38#include <sys/kernel.h>
40#include <sys/kmem.h> 39#include <sys/kmem.h>
41#include <sys/kthread.h> 40#include <sys/kthread.h>
42#include <sys/mutex.h> 41#include <sys/mutex.h>
43#include <sys/param.h> 42#include <sys/param.h>
44#include <sys/queue.h> 43#include <sys/queue.h>
@@ -217,31 +216,31 @@ struct xbdback_instance { @@ -217,31 +216,31 @@ struct xbdback_instance {
217 */ 216 */
218 RING_IDX xbdi_req_prod; /* limit on request indices */ 217 RING_IDX xbdi_req_prod; /* limit on request indices */
219 xbdback_cont_t xbdi_cont; 218 xbdback_cont_t xbdi_cont;
220 /* _request state: track requests fetched from ring */ 219 /* _request state: track requests fetched from ring */
221 blkif_request_t xbdi_xen_req; 220 blkif_request_t xbdi_xen_req;
222 struct blkif_request_segment xbdi_seg[VBD_MAX_INDIRECT_SEGMENTS]; 221 struct blkif_request_segment xbdi_seg[VBD_MAX_INDIRECT_SEGMENTS];
223 bus_dmamap_t xbdi_seg_dmamap; 222 bus_dmamap_t xbdi_seg_dmamap;
224 grant_ref_t xbdi_in_gntref; 223 grant_ref_t xbdi_in_gntref;
225 /* other state */ 224 /* other state */
226 uint xbdi_pendingreqs; /* number of I/O in fly */ 225 uint xbdi_pendingreqs; /* number of I/O in fly */
227 struct timeval xbdi_lasterr_time; /* error time tracking */ 226 struct timeval xbdi_lasterr_time; /* error time tracking */
228}; 227};
229/* Manipulation of the above reference count. */ 228/* Manipulation of the above reference count. */
230#define xbdi_get(xbdip) atomic_inc_uint(&(xbdip)->xbdi_refcnt) 229#define xbdi_get(xbdip) (xbdip)->xbdi_refcnt++
231#define xbdi_put(xbdip) \ 230#define xbdi_put(xbdip) \
232do { \ 231do { \
233 if (atomic_dec_uint_nv(&(xbdip)->xbdi_refcnt) == 0) \ 232 if (--((xbdip)->xbdi_refcnt) == 0) \
234 xbdback_finish_disconnect(xbdip); \ 233 xbdback_finish_disconnect(xbdip); \
235} while (/* CONSTCOND */ 0) 234} while (/* CONSTCOND */ 0)
236 235
237static SLIST_HEAD(, xbdback_instance) xbdback_instances; 236static SLIST_HEAD(, xbdback_instance) xbdback_instances;
238static kmutex_t xbdback_lock; 237static kmutex_t xbdback_lock;
239 238
240/* Interval between reports of I/O errors from frontend */ 239/* Interval between reports of I/O errors from frontend */
241static const struct timeval xbdback_err_intvl = { 1, 0 }; 240static const struct timeval xbdback_err_intvl = { 1, 0 };
242 241
243 void xbdbackattach(int); 242 void xbdbackattach(int);
244static int xbdback_xenbus_create(struct xenbus_device *); 243static int xbdback_xenbus_create(struct xenbus_device *);
245static int xbdback_xenbus_destroy(void *); 244static int xbdback_xenbus_destroy(void *);
246static void xbdback_frontend_changed(void *, XenbusState); 245static void xbdback_frontend_changed(void *, XenbusState);
247static void xbdback_backend_changed(struct xenbus_watch *, 246static void xbdback_backend_changed(struct xenbus_watch *,
@@ -259,26 +258,28 @@ static void *xbdback_co_main_loop(struct @@ -259,26 +258,28 @@ static void *xbdback_co_main_loop(struct
259static void *xbdback_co_main_incr(struct xbdback_instance *, void *); 258static void *xbdback_co_main_incr(struct xbdback_instance *, void *);
260static void *xbdback_co_main_done2(struct xbdback_instance *, void *); 259static void *xbdback_co_main_done2(struct xbdback_instance *, void *);
261 260
262static void *xbdback_co_cache_flush(struct xbdback_instance *, void *); 261static void *xbdback_co_cache_flush(struct xbdback_instance *, void *);
263static void *xbdback_co_cache_doflush(struct xbdback_instance *, void *); 262static void *xbdback_co_cache_doflush(struct xbdback_instance *, void *);
264 263
265static void *xbdback_co_io(struct xbdback_instance *, void *); 264static void *xbdback_co_io(struct xbdback_instance *, void *);
266static void *xbdback_co_io_gotio(struct xbdback_instance *, void *); 265static void *xbdback_co_io_gotio(struct xbdback_instance *, void *);
267 266
268static void *xbdback_co_do_io(struct xbdback_instance *, void *); 267static void *xbdback_co_do_io(struct xbdback_instance *, void *);
269 268
270static void xbdback_io_error(struct xbdback_io *, int); 269static void xbdback_io_error(struct xbdback_io *, int);
271static void xbdback_iodone(struct buf *); 270static void xbdback_iodone(struct buf *);
 271static void xbdback_iodone_locked(struct xbdback_instance *,
 272 struct xbdback_io *, struct buf *);
272static void xbdback_send_reply(struct xbdback_instance *, uint64_t , int , int); 273static void xbdback_send_reply(struct xbdback_instance *, uint64_t , int , int);
273 274
274static void *xbdback_map_shm(struct xbdback_io *); 275static void *xbdback_map_shm(struct xbdback_io *);
275static void xbdback_unmap_shm(struct xbdback_io *); 276static void xbdback_unmap_shm(struct xbdback_io *);
276 277
277static struct xbdback_io *xbdback_io_get(struct xbdback_instance *); 278static struct xbdback_io *xbdback_io_get(struct xbdback_instance *);
278static void xbdback_io_put(struct xbdback_instance *, struct xbdback_io *); 279static void xbdback_io_put(struct xbdback_instance *, struct xbdback_io *);
279static void xbdback_thread(void *); 280static void xbdback_thread(void *);
280static void xbdback_wakeup_thread(struct xbdback_instance *); 281static void xbdback_wakeup_thread(struct xbdback_instance *);
281static void xbdback_trampoline(struct xbdback_instance *, void *); 282static void xbdback_trampoline(struct xbdback_instance *, void *);
282 283
283static struct xenbus_backend_driver xbd_backend_driver = { 284static struct xenbus_backend_driver xbd_backend_driver = {
284 .xbakd_create = xbdback_xenbus_create, 285 .xbakd_create = xbdback_xenbus_create,
@@ -326,46 +327,48 @@ xbdback_xenbus_create(struct xenbus_devi @@ -326,46 +327,48 @@ xbdback_xenbus_create(struct xenbus_devi
326 } 327 }
327 if (i == 0) { 328 if (i == 0) {
328 aprint_error("xbdback: can't parse %s\n", 329 aprint_error("xbdback: can't parse %s\n",
329 xbusd->xbusd_path); 330 xbusd->xbusd_path);
330 return EFTYPE; 331 return EFTYPE;
331 } 332 }
332 handle = strtoul(&xbusd->xbusd_path[i+1], &ep, 10); 333 handle = strtoul(&xbusd->xbusd_path[i+1], &ep, 10);
333 if (*ep != '\0') { 334 if (*ep != '\0') {
334 aprint_error("xbdback: can't parse %s\n", 335 aprint_error("xbdback: can't parse %s\n",
335 xbusd->xbusd_path); 336 xbusd->xbusd_path);
336 return EFTYPE; 337 return EFTYPE;
337 } 338 }
338 339
339 /* XXXSMP unlocked search */ 
340 if (xbdif_lookup(domid, handle)) { 
341 return EEXIST; 
342 } 
343 xbdi = kmem_zalloc(sizeof(*xbdi), KM_SLEEP); 340 xbdi = kmem_zalloc(sizeof(*xbdi), KM_SLEEP);
344 341
345 xbdi->xbdi_domid = domid; 342 xbdi->xbdi_domid = domid;
346 xbdi->xbdi_handle = handle; 343 xbdi->xbdi_handle = handle;
347 snprintf(xbdi->xbdi_name, sizeof(xbdi->xbdi_name), "xbdb%di%d", 344 snprintf(xbdi->xbdi_name, sizeof(xbdi->xbdi_name), "xbdb%di%d",
348 xbdi->xbdi_domid, xbdi->xbdi_handle); 345 xbdi->xbdi_domid, xbdi->xbdi_handle);
349 346
 347 mutex_enter(&xbdback_lock);
 348 if (xbdif_lookup(domid, handle)) {
 349 mutex_exit(&xbdback_lock);
 350 kmem_free(xbdi, sizeof(*xbdi));
 351 return EEXIST;
 352 }
 353 SLIST_INSERT_HEAD(&xbdback_instances, xbdi, next);
 354 mutex_exit(&xbdback_lock);
 355
350 /* initialize status and reference counter */ 356 /* initialize status and reference counter */
351 xbdi->xbdi_status = DISCONNECTED; 357 xbdi->xbdi_status = DISCONNECTED;
352 xbdi_get(xbdi); 358 xbdi_get(xbdi);
353 359
354 mutex_init(&xbdi->xbdi_lock, MUTEX_DEFAULT, IPL_BIO); 360 mutex_init(&xbdi->xbdi_lock, MUTEX_DEFAULT, IPL_BIO);
355 cv_init(&xbdi->xbdi_cv, xbdi->xbdi_name); 361 cv_init(&xbdi->xbdi_cv, xbdi->xbdi_name);
356 mutex_enter(&xbdback_lock); 
357 SLIST_INSERT_HEAD(&xbdback_instances, xbdi, next); 
358 mutex_exit(&xbdback_lock); 
359 362
360 xbusd->xbusd_u.b.b_cookie = xbdi;  363 xbusd->xbusd_u.b.b_cookie = xbdi;
361 xbusd->xbusd_u.b.b_detach = xbdback_xenbus_destroy; 364 xbusd->xbusd_u.b.b_detach = xbdback_xenbus_destroy;
362 xbusd->xbusd_otherend_changed = xbdback_frontend_changed; 365 xbusd->xbusd_otherend_changed = xbdback_frontend_changed;
363 xbdi->xbdi_xbusd = xbusd; 366 xbdi->xbdi_xbusd = xbusd;
364 367
365 if (bus_dmamap_create(xbdi->xbdi_xbusd->xbusd_dmat, PAGE_SIZE, 368 if (bus_dmamap_create(xbdi->xbdi_xbusd->xbusd_dmat, PAGE_SIZE,
366 1, PAGE_SIZE, PAGE_SIZE, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, 369 1, PAGE_SIZE, PAGE_SIZE, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW,
367 &xbdi->xbdi_seg_dmamap) != 0) { 370 &xbdi->xbdi_seg_dmamap) != 0) {
368 printf("%s: can't create dma map for indirect segments\n", 371 printf("%s: can't create dma map for indirect segments\n",
369 xbdi->xbdi_name); 372 xbdi->xbdi_name);
370 goto fail; 373 goto fail;
371 } 374 }
@@ -842,106 +845,108 @@ abort: @@ -842,106 +845,108 @@ abort:
842} 845}
843 846
844/* 847/*
845 * Used by a xbdi thread to signal that it is now disconnected. 848 * Used by a xbdi thread to signal that it is now disconnected.
846 */ 849 */
847static void 850static void
848xbdback_finish_disconnect(struct xbdback_instance *xbdi) 851xbdback_finish_disconnect(struct xbdback_instance *xbdi)
849{ 852{
850 KASSERT(mutex_owned(&xbdi->xbdi_lock)); 853 KASSERT(mutex_owned(&xbdi->xbdi_lock));
851 KASSERT(xbdi->xbdi_status == DISCONNECTING); 854 KASSERT(xbdi->xbdi_status == DISCONNECTING);
852 855
853 xbdi->xbdi_status = DISCONNECTED; 856 xbdi->xbdi_status = DISCONNECTED;
854 857
855 cv_signal(&xbdi->xbdi_cv); 858 cv_broadcast(&xbdi->xbdi_cv);
856} 859}
857 860
858static bool 861static bool
859xbdif_lookup(domid_t dom , uint32_t handle) 862xbdif_lookup(domid_t dom , uint32_t handle)
860{ 863{
861 struct xbdback_instance *xbdi; 864 struct xbdback_instance *xbdi;
862 bool found = false; 865 bool found = false;
863 866
864 mutex_enter(&xbdback_lock); 867 KASSERT(mutex_owned(&xbdback_lock));
 868
865 SLIST_FOREACH(xbdi, &xbdback_instances, next) { 869 SLIST_FOREACH(xbdi, &xbdback_instances, next) {
866 if (xbdi->xbdi_domid == dom && xbdi->xbdi_handle == handle) { 870 if (xbdi->xbdi_domid == dom && xbdi->xbdi_handle == handle) {
867 found = true; 871 found = true;
868 break; 872 break;
869 } 873 }
870 } 874 }
871 mutex_exit(&xbdback_lock); 
872 875
873 return found; 876 return found;
874} 877}
875 878
876static int 879static int
877xbdback_evthandler(void *arg) 880xbdback_evthandler(void *arg)
878{ 881{
879 struct xbdback_instance *xbdi = arg; 882 struct xbdback_instance *xbdi = arg;
880 883
881 XENPRINTF(("xbdback_evthandler domain %d: cont %p\n", 884 XENPRINTF(("xbdback_evthandler domain %d: cont %p\n",
882 xbdi->xbdi_domid, xbdi->xbdi_cont)); 885 xbdi->xbdi_domid, xbdi->xbdi_cont));
883 886
 887 mutex_enter(&xbdi->xbdi_lock);
884 xbdback_wakeup_thread(xbdi); 888 xbdback_wakeup_thread(xbdi);
 889 mutex_exit(&xbdi->xbdi_lock);
885 890
886 return 1; 891 return 1;
887} 892}
888 893
889/* 894/*
890 * Main thread routine for one xbdback instance. Woken up by 895 * Main thread routine for one xbdback instance. Woken up by
891 * xbdback_evthandler when a domain has I/O work scheduled in a I/O ring. 896 * xbdback_evthandler when a domain has I/O work scheduled in a I/O ring.
892 */ 897 */
893static void 898static void
894xbdback_thread(void *arg) 899xbdback_thread(void *arg)
895{ 900{
896 struct xbdback_instance *xbdi = arg; 901 struct xbdback_instance *xbdi = arg;
897 902
 903 mutex_enter(&xbdi->xbdi_lock);
898 for (;;) { 904 for (;;) {
899 mutex_enter(&xbdi->xbdi_lock); 
900 switch (xbdi->xbdi_status) { 905 switch (xbdi->xbdi_status) {
901 case WAITING: 906 case WAITING:
902 cv_wait(&xbdi->xbdi_cv, &xbdi->xbdi_lock); 907 cv_wait(&xbdi->xbdi_cv, &xbdi->xbdi_lock);
903 mutex_exit(&xbdi->xbdi_lock); 
904 break; 908 break;
905 case RUN: 909 case RUN:
906 xbdi->xbdi_status = WAITING; /* reset state */ 910 xbdi->xbdi_status = WAITING; /* reset state */
907 mutex_exit(&xbdi->xbdi_lock); 
908 911
909 if (xbdi->xbdi_cont == NULL) { 912 if (xbdi->xbdi_cont == NULL) {
910 xbdi->xbdi_cont = xbdback_co_main; 913 xbdi->xbdi_cont = xbdback_co_main;
911 } 914 }
912 915
913 xbdback_trampoline(xbdi, xbdi); 916 xbdback_trampoline(xbdi, xbdi);
914 break; 917 break;
915 case DISCONNECTING: 918 case DISCONNECTING:
916 if (xbdi->xbdi_pendingreqs > 0) { 919 if (xbdi->xbdi_pendingreqs > 0) {
917 /* there are pending I/Os. Wait for them. */ 920 /* there are pending I/Os. Wait for them. */
918 cv_wait(&xbdi->xbdi_cv, &xbdi->xbdi_lock); 921 cv_wait(&xbdi->xbdi_cv, &xbdi->xbdi_lock);
919 mutex_exit(&xbdi->xbdi_lock); 922 continue;
920 break; 
921 } 923 }
922  924
923 /* All I/Os should have been processed by now, 925 /* All I/Os should have been processed by now,
924 * xbdi_refcnt should drop to 0 */ 926 * xbdi_refcnt should drop to 0 */
925 xbdi_put(xbdi); 927 xbdi_put(xbdi);
926 KASSERT(xbdi->xbdi_refcnt == 0); 928 KASSERT(xbdi->xbdi_refcnt == 0);
927 mutex_exit(&xbdi->xbdi_lock); 929 goto out;
928 kthread_exit(0); 930 /* NOTREACHED */
929 break; 
930 default: 931 default:
931 panic("%s: invalid state %d", 932 panic("%s: invalid state %d",
932 xbdi->xbdi_name, xbdi->xbdi_status); 933 xbdi->xbdi_name, xbdi->xbdi_status);
933 } 934 }
934 } 935 }
 936out:
 937 mutex_exit(&xbdi->xbdi_lock);
 938
 939 kthread_exit(0);
935} 940}
936 941
937static void * 942static void *
938xbdback_co_main(struct xbdback_instance *xbdi, void *obj) 943xbdback_co_main(struct xbdback_instance *xbdi, void *obj)
939{ 944{
940 (void)obj; 945 (void)obj;
941 946
942 xbdi->xbdi_req_prod = xbdi->xbdi_ring.ring_n.sring->req_prod; 947 xbdi->xbdi_req_prod = xbdi->xbdi_ring.ring_n.sring->req_prod;
943 xen_rmb(); /* ensure we see all requests up to req_prod */ 948 xen_rmb(); /* ensure we see all requests up to req_prod */
944 /* 949 /*
945 * note that we'll eventually get a full ring of request. 950 * note that we'll eventually get a full ring of request.
946 * in this case, MASK_BLKIF_IDX(req_cons) == MASK_BLKIF_IDX(req_prod) 951 * in this case, MASK_BLKIF_IDX(req_cons) == MASK_BLKIF_IDX(req_prod)
947 */ 952 */
@@ -1032,50 +1037,39 @@ fail: @@ -1032,50 +1037,39 @@ fail:
1032 break; 1037 break;
1033 } 1038 }
1034 } else { 1039 } else {
1035 xbdi->xbdi_cont = xbdback_co_main_done2; 1040 xbdi->xbdi_cont = xbdback_co_main_done2;
1036 } 1041 }
1037 return xbdi; 1042 return xbdi;
1038} 1043}
1039 1044
1040/* 1045/*
1041 * Increment consumer index and move on to the next request. In case 1046 * Increment consumer index and move on to the next request. In case
1042 * we want to disconnect, leave continuation now. 1047 * we want to disconnect, leave continuation now.
1043 */ 1048 */
1044static void * 1049static void *
1045xbdback_co_main_incr(struct xbdback_instance *xbdi, void *obj) 1050xbdback_co_main_incr(struct xbdback_instance *xbdi, void *obj __unused)
1046{ 1051{
1047 (void)obj; 1052 KASSERT(mutex_owned(&xbdi->xbdi_lock));
 1053
1048 blkif_back_ring_t *ring = &xbdi->xbdi_ring.ring_n; 1054 blkif_back_ring_t *ring = &xbdi->xbdi_ring.ring_n;
1049 1055
1050 ring->req_cons++; 1056 ring->req_cons++;
1051 1057
1052 /* 
1053 * Do not bother with locking here when checking for xbdi_status: if 
1054 * we get a transient state, we will get the right value at 
1055 * the next increment. 
1056 */ 
1057 if (xbdi->xbdi_status == DISCONNECTING) 1058 if (xbdi->xbdi_status == DISCONNECTING)
1058 xbdi->xbdi_cont = NULL; 1059 xbdi->xbdi_cont = NULL;
1059 else 1060 else
1060 xbdi->xbdi_cont = xbdback_co_main_loop; 1061 xbdi->xbdi_cont = xbdback_co_main_loop;
1061 1062
1062 /* 
1063 * Each time the thread processes a full ring of requests, give 
1064 * a chance to other threads to process I/Os too 
1065 */ 
1066 if ((ring->req_cons % BLKIF_RING_SIZE) == 0) 
1067 yield(); 
1068 
1069 return xbdi; 1063 return xbdi;
1070} 1064}
1071 1065
1072/* 1066/*
1073 * Check for requests in the instance's ring. In case there are, start again 1067 * Check for requests in the instance's ring. In case there are, start again
1074 * from the beginning. If not, stall. 1068 * from the beginning. If not, stall.
1075 */ 1069 */
1076static void * 1070static void *
1077xbdback_co_main_done2(struct xbdback_instance *xbdi, void *obj) 1071xbdback_co_main_done2(struct xbdback_instance *xbdi, void *obj)
1078{ 1072{
1079 int work_to_do; 1073 int work_to_do;
1080 1074
1081 RING_FINAL_CHECK_FOR_REQUESTS(&xbdi->xbdi_ring.ring_n, work_to_do); 1075 RING_FINAL_CHECK_FOR_REQUESTS(&xbdi->xbdi_ring.ring_n, work_to_do);
@@ -1240,28 +1234,30 @@ xbdback_co_io(struct xbdback_instance *x @@ -1240,28 +1234,30 @@ xbdback_co_io(struct xbdback_instance *x
1240 xbdi->xbdi_cont = xbdback_co_main_incr; 1234 xbdi->xbdi_cont = xbdback_co_main_incr;
1241 return xbdi; 1235 return xbdi;
1242} 1236}
1243 1237
1244/* Prepare an I/O buffer for a xbdback instance */ 1238/* Prepare an I/O buffer for a xbdback instance */
1245static void * 1239static void *
1246xbdback_co_io_gotio(struct xbdback_instance *xbdi, void *obj) 1240xbdback_co_io_gotio(struct xbdback_instance *xbdi, void *obj)
1247{ 1241{
1248 struct xbdback_io *xbd_io; 1242 struct xbdback_io *xbd_io;
1249 int buf_flags; 1243 int buf_flags;
1250 size_t bcount; 1244 size_t bcount;
1251 blkif_request_t *req; 1245 blkif_request_t *req;
1252 1246
 1247 KASSERT(mutex_owned(&xbdi->xbdi_lock));
 1248
1253 xbdi_get(xbdi); 1249 xbdi_get(xbdi);
1254 atomic_inc_uint(&xbdi->xbdi_pendingreqs); 1250 xbdi->xbdi_pendingreqs++;
1255  1251
1256 req = &xbdi->xbdi_xen_req; 1252 req = &xbdi->xbdi_xen_req;
1257 xbd_io = obj; 1253 xbd_io = obj;
1258 memset(xbd_io, 0, sizeof(*xbd_io)); 1254 memset(xbd_io, 0, sizeof(*xbd_io));
1259 buf_init(&xbd_io->xio_buf); 1255 buf_init(&xbd_io->xio_buf);
1260 xbd_io->xio_xbdi = xbdi; 1256 xbd_io->xio_xbdi = xbdi;
1261 xbd_io->xio_operation = req->operation; 1257 xbd_io->xio_operation = req->operation;
1262 xbd_io->xio_id = req->id; 1258 xbd_io->xio_id = req->id;
1263 1259
1264 /* If segments are on an indirect page, copy them now */ 1260 /* If segments are on an indirect page, copy them now */
1265 if (xbdi->xbdi_in_gntref) { 1261 if (xbdi->xbdi_in_gntref) {
1266 gnttab_copy_t gop; 1262 gnttab_copy_t gop;
1267 paddr_t ma; 1263 paddr_t ma;
@@ -1321,47 +1317,54 @@ xbdback_co_io_gotio(struct xbdback_insta @@ -1321,47 +1317,54 @@ xbdback_co_io_gotio(struct xbdback_insta
1321 xbd_io->xio_buf.b_dev = xbdi->xbdi_dev; 1317 xbd_io->xio_buf.b_dev = xbdi->xbdi_dev;
1322 xbd_io->xio_buf.b_blkno = req->sector_number; 1318 xbd_io->xio_buf.b_blkno = req->sector_number;
1323 xbd_io->xio_buf.b_bcount = bcount; 1319 xbd_io->xio_buf.b_bcount = bcount;
1324 xbd_io->xio_buf.b_data = NULL; 1320 xbd_io->xio_buf.b_data = NULL;
1325 xbd_io->xio_buf.b_private = xbd_io; 1321 xbd_io->xio_buf.b_private = xbd_io;
1326 1322
1327 xbdi->xbdi_cont = xbdback_co_do_io; 1323 xbdi->xbdi_cont = xbdback_co_do_io;
1328 return xbdback_map_shm(xbd_io); 1324 return xbdback_map_shm(xbd_io);
1329} 1325}
1330 1326
1331static void 1327static void
1332xbdback_io_error(struct xbdback_io *xbd_io, int error) 1328xbdback_io_error(struct xbdback_io *xbd_io, int error)
1333{ 1329{
1334 xbd_io->xio_buf.b_error = error; 1330 KASSERT(mutex_owned(&xbd_io->xio_xbdi->xbdi_lock));
1335 xbdback_iodone(&xbd_io->xio_buf); 1331
 1332 struct buf *bp = &xbd_io->xio_buf;
 1333
 1334 bp->b_error = error;
 1335 xbdback_iodone_locked(xbd_io->xio_xbdi, xbd_io, bp);
1336} 1336}
1337 1337
1338/* 1338/*
1339 * Main xbdback I/O routine. It can either perform a flush operation or 1339 * Main xbdback I/O routine. It can either perform a flush operation or
1340 * schedule a read/write operation. 1340 * schedule a read/write operation.
1341 */ 1341 */
1342static void * 1342static void *
1343xbdback_co_do_io(struct xbdback_instance *xbdi, void *obj) 1343xbdback_co_do_io(struct xbdback_instance *xbdi, void *obj)
1344{ 1344{
1345 struct xbdback_io *xbd_io = obj; 1345 struct xbdback_io *xbd_io = obj;
1346 1346
1347 switch (xbd_io->xio_operation) { 1347 switch (xbd_io->xio_operation) {
1348 case BLKIF_OP_FLUSH_DISKCACHE: 1348 case BLKIF_OP_FLUSH_DISKCACHE:
1349 { 1349 {
1350 int error; 1350 int error;
1351 int force = 1; 1351 int force = 1;
1352 1352
 1353 KASSERT(mutex_owned(&xbdi->xbdi_lock));
 1354 mutex_exit(&xbdi->xbdi_lock);
1353 error = VOP_IOCTL(xbdi->xbdi_vp, DIOCCACHESYNC, &force, FWRITE, 1355 error = VOP_IOCTL(xbdi->xbdi_vp, DIOCCACHESYNC, &force, FWRITE,
1354 kauth_cred_get()); 1356 kauth_cred_get());
 1357 mutex_enter(&xbdi->xbdi_lock);
1355 if (error) { 1358 if (error) {
1356 aprint_error("xbdback %s: DIOCCACHESYNC returned %d\n", 1359 aprint_error("xbdback %s: DIOCCACHESYNC returned %d\n",
1357 xbdi->xbdi_xbusd->xbusd_path, error); 1360 xbdi->xbdi_xbusd->xbusd_path, error);
1358 if (error == EOPNOTSUPP || error == ENOTTY) 1361 if (error == EOPNOTSUPP || error == ENOTTY)
1359 error = BLKIF_RSP_EOPNOTSUPP; 1362 error = BLKIF_RSP_EOPNOTSUPP;
1360 else 1363 else
1361 error = BLKIF_RSP_ERROR; 1364 error = BLKIF_RSP_ERROR;
1362 } else 1365 } else
1363 error = BLKIF_RSP_OKAY; 1366 error = BLKIF_RSP_OKAY;
1364 xbdback_send_reply(xbdi, xbd_io->xio_id, 1367 xbdback_send_reply(xbdi, xbd_io->xio_id,
1365 xbd_io->xio_operation, error); 1368 xbd_io->xio_operation, error);
1366 xbdback_io_put(xbdi, xbd_io); 1369 xbdback_io_put(xbdi, xbd_io);
1367 xbdi_put(xbdi); 1370 xbdi_put(xbdi);
@@ -1382,157 +1385,168 @@ xbdback_co_do_io(struct xbdback_instance @@ -1382,157 +1385,168 @@ xbdback_co_do_io(struct xbdback_instance
1382 bdev_strategy(&xbd_io->xio_buf); 1385 bdev_strategy(&xbd_io->xio_buf);
1383 xbdi->xbdi_cont = xbdback_co_main_incr; 1386 xbdi->xbdi_cont = xbdback_co_main_incr;
1384 return xbdi; 1387 return xbdi;
1385 default: 1388 default:
1386 /* Should never happen */ 1389 /* Should never happen */
1387 panic("xbdback_co_do_io: unsupported operation %d", 1390 panic("xbdback_co_do_io: unsupported operation %d",
1388 xbd_io->xio_operation); 1391 xbd_io->xio_operation);
1389 } 1392 }
1390} 1393}
1391 1394
1392/* 1395/*
1393 * Called from softint(9) context when an I/O is done: for each request, send 1396 * Called from softint(9) context when an I/O is done: for each request, send
1394 * back the associated reply to the domain. 1397 * back the associated reply to the domain.
1395 * 
1396 * This gets reused by xbdback_io_error to report errors from other sources. 
1397 */ 1398 */
1398static void 1399static void
1399xbdback_iodone(struct buf *bp) 1400xbdback_iodone(struct buf *bp)
1400{ 1401{
1401 struct xbdback_io *xbd_io; 1402 struct xbdback_io *xbd_io;
1402 struct xbdback_instance *xbdi; 1403 struct xbdback_instance *xbdi;
1403 int status; 
1404 
1405 KERNEL_LOCK(1, NULL); /* XXXSMP */ 
1406 1404
1407 xbd_io = bp->b_private; 1405 xbd_io = bp->b_private;
 1406 KASSERT(bp == &xbd_io->xio_buf);
1408 xbdi = xbd_io->xio_xbdi; 1407 xbdi = xbd_io->xio_xbdi;
1409 1408
 1409 mutex_enter(&xbdi->xbdi_lock);
 1410 xbdback_iodone_locked(xbdi, xbd_io, bp);
 1411 mutex_exit(&xbdi->xbdi_lock);
 1412}
 1413
 1414/*
 1415 * This gets reused by xbdback_io_error to report errors from other sources.
 1416 */
 1417static void
 1418xbdback_iodone_locked(struct xbdback_instance *xbdi, struct xbdback_io *xbd_io,
 1419 struct buf *bp)
 1420{
 1421 int status;
 1422
1410 XENPRINTF(("xbdback_io domain %d: iodone ptr 0x%lx\n", 1423 XENPRINTF(("xbdback_io domain %d: iodone ptr 0x%lx\n",
1411 xbdi->xbdi_domid, (long)xbd_io)); 1424 xbdi->xbdi_domid, (long)xbd_io));
1412 1425
 1426 KASSERT(mutex_owned(&xbdi->xbdi_lock));
 1427
1413 KASSERT(bp->b_error != 0 || xbd_io->xio_xv != NULL); 1428 KASSERT(bp->b_error != 0 || xbd_io->xio_xv != NULL);
1414 if (xbd_io->xio_xv != NULL) 1429 if (xbd_io->xio_xv != NULL)
1415 xbdback_unmap_shm(xbd_io); 1430 xbdback_unmap_shm(xbd_io);
1416 1431
1417 if (bp->b_error != 0) { 1432 if (bp->b_error != 0) {
1418 printf("xbd IO domain %d: error %d\n", 1433 printf("xbd IO domain %d: error %d\n",
1419 xbdi->xbdi_domid, bp->b_error); 1434 xbdi->xbdi_domid, bp->b_error);
1420 status = BLKIF_RSP_ERROR; 1435 status = BLKIF_RSP_ERROR;
1421 } else 1436 } else
1422 status = BLKIF_RSP_OKAY; 1437 status = BLKIF_RSP_OKAY;
1423  1438
1424 xbdback_send_reply(xbdi, xbd_io->xio_id, xbd_io->xio_operation, status); 1439 xbdback_send_reply(xbdi, xbd_io->xio_id, xbd_io->xio_operation, status);
1425 1440
1426 xbdi_put(xbdi); 1441 xbdi_put(xbdi);
1427 atomic_dec_uint(&xbdi->xbdi_pendingreqs); 1442 KASSERT(xbdi->xbdi_pendingreqs > 0);
 1443 xbdi->xbdi_pendingreqs--;
1428 buf_destroy(&xbd_io->xio_buf); 1444 buf_destroy(&xbd_io->xio_buf);
1429 xbdback_io_put(xbdi, xbd_io); 1445 xbdback_io_put(xbdi, xbd_io);
1430 1446
1431 xbdback_wakeup_thread(xbdi); 1447 xbdback_wakeup_thread(xbdi);
1432 KERNEL_UNLOCK_ONE(NULL); /* XXXSMP */ 
1433} 1448}
1434 1449
1435/* 1450/*
1436 * Wake up the per xbdback instance thread. 1451 * Wake up the per xbdback instance thread.
1437 */ 1452 */
1438static void 1453static void
1439xbdback_wakeup_thread(struct xbdback_instance *xbdi) 1454xbdback_wakeup_thread(struct xbdback_instance *xbdi)
1440{ 1455{
 1456 KASSERT(mutex_owned(&xbdi->xbdi_lock));
1441 1457
1442 mutex_enter(&xbdi->xbdi_lock); 
1443 /* only set RUN state when we are WAITING for work */ 1458 /* only set RUN state when we are WAITING for work */
1444 if (xbdi->xbdi_status == WAITING) 1459 if (xbdi->xbdi_status == WAITING)
1445 xbdi->xbdi_status = RUN; 1460 xbdi->xbdi_status = RUN;
1446 cv_broadcast(&xbdi->xbdi_cv); 1461 cv_signal(&xbdi->xbdi_cv);
1447 mutex_exit(&xbdi->xbdi_lock); 
1448} 1462}
1449 1463
1450/* 1464/*
1451 * called once a request has completed. Place the reply in the ring and 1465 * called once a request has completed. Place the reply in the ring and
1452 * notify the guest OS. 1466 * notify the guest OS.
1453 */ 1467 */
1454static void 1468static void
1455xbdback_send_reply(struct xbdback_instance *xbdi, uint64_t id, 1469xbdback_send_reply(struct xbdback_instance *xbdi, uint64_t id,
1456 int op, int status) 1470 int op, int status)
1457{ 1471{
1458 blkif_response_t *resp_n; 1472 blkif_response_t *resp_n;
1459 blkif_x86_32_response_t *resp32; 1473 blkif_x86_32_response_t *resp32;
1460 blkif_x86_64_response_t *resp64; 1474 blkif_x86_64_response_t *resp64;
1461 int notify; 1475 int notify;
1462 1476
 1477 KASSERT(mutex_owned(&xbdi->xbdi_lock));
 1478
1463 /* 1479 /*
1464 * The ring can be accessed by the xbdback thread, xbdback_iodone() 1480 * The ring can be accessed by the xbdback thread, xbdback_iodone()
1465 * handler, or any handler that triggered the shm callback. So 1481 * handler, or any handler that triggered the shm callback. So
1466 * protect ring access via the xbdi_lock mutex. 1482 * protect ring access via the xbdi_lock mutex.
1467 */ 1483 */
1468 mutex_enter(&xbdi->xbdi_lock); 
1469 switch (xbdi->xbdi_proto) { 1484 switch (xbdi->xbdi_proto) {
1470 case XBDIP_NATIVE: 1485 case XBDIP_NATIVE:
1471 resp_n = RING_GET_RESPONSE(&xbdi->xbdi_ring.ring_n, 1486 resp_n = RING_GET_RESPONSE(&xbdi->xbdi_ring.ring_n,
1472 xbdi->xbdi_ring.ring_n.rsp_prod_pvt); 1487 xbdi->xbdi_ring.ring_n.rsp_prod_pvt);
1473 resp_n->id = id; 1488 resp_n->id = id;
1474 resp_n->operation = op; 1489 resp_n->operation = op;
1475 resp_n->status = status; 1490 resp_n->status = status;
1476 break; 1491 break;
1477 case XBDIP_32: 1492 case XBDIP_32:
1478 resp32 = RING_GET_RESPONSE(&xbdi->xbdi_ring.ring_32, 1493 resp32 = RING_GET_RESPONSE(&xbdi->xbdi_ring.ring_32,
1479 xbdi->xbdi_ring.ring_n.rsp_prod_pvt); 1494 xbdi->xbdi_ring.ring_n.rsp_prod_pvt);
1480 resp32->id = id; 1495 resp32->id = id;
1481 resp32->operation = op; 1496 resp32->operation = op;
1482 resp32->status = status; 1497 resp32->status = status;
1483 break; 1498 break;
1484 case XBDIP_64: 1499 case XBDIP_64:
1485 resp64 = RING_GET_RESPONSE(&xbdi->xbdi_ring.ring_64, 1500 resp64 = RING_GET_RESPONSE(&xbdi->xbdi_ring.ring_64,
1486 xbdi->xbdi_ring.ring_n.rsp_prod_pvt); 1501 xbdi->xbdi_ring.ring_n.rsp_prod_pvt);
1487 resp64->id = id; 1502 resp64->id = id;
1488 resp64->operation = op; 1503 resp64->operation = op;
1489 resp64->status = status; 1504 resp64->status = status;
1490 break; 1505 break;
1491 } 1506 }
1492 xbdi->xbdi_ring.ring_n.rsp_prod_pvt++; 1507 xbdi->xbdi_ring.ring_n.rsp_prod_pvt++;
1493 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xbdi->xbdi_ring.ring_n, notify); 1508 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xbdi->xbdi_ring.ring_n, notify);
1494 mutex_exit(&xbdi->xbdi_lock); 
1495 1509
1496 if (notify) { 1510 if (notify) {
1497 XENPRINTF(("xbdback_send_reply notify %d\n", xbdi->xbdi_domid)); 1511 XENPRINTF(("xbdback_send_reply notify %d\n", xbdi->xbdi_domid));
1498 hypervisor_notify_via_evtchn(xbdi->xbdi_evtchn); 1512 hypervisor_notify_via_evtchn(xbdi->xbdi_evtchn);
1499 } 1513 }
1500} 1514}
1501 1515
1502/* 1516/*
1503 * Map multiple entries of an I/O request into backend's VA space. 1517 * Map multiple entries of an I/O request into backend's VA space.
1504 * The xbd_io->xio_gref array has to be filled out by the caller. 1518 * The xbd_io->xio_gref array has to be filled out by the caller.
1505 */ 1519 */
1506static void * 1520static void *
1507xbdback_map_shm(struct xbdback_io *xbd_io) 1521xbdback_map_shm(struct xbdback_io *xbd_io)
1508{ 1522{
1509 struct xbdback_instance *xbdi = xbd_io->xio_xbdi; 1523 struct xbdback_instance *xbdi = xbd_io->xio_xbdi;
1510 int error, s; 1524 int error;
1511 1525
1512#ifdef XENDEBUG_VBD 1526#ifdef XENDEBUG_VBD
1513 int i; 1527 int i;
1514 printf("xbdback_map_shm map grant "); 1528 printf("xbdback_map_shm map grant ");
1515 for (i = 0; i < xbd_io->xio_nrma; i++) { 1529 for (i = 0; i < xbd_io->xio_nrma; i++) {
1516 printf("%u ", (u_int)xbd_io->xio_gref[i]); 1530 printf("%u ", (u_int)xbd_io->xio_gref[i]);
1517 } 1531 }
1518#endif 1532#endif
1519 1533
1520 s = splvm(); /* XXXSMP */ 1534 KASSERT(mutex_owned(&xbdi->xbdi_lock));
 1535
1521 xbd_io->xio_xv = SLIST_FIRST(&xbdi->xbdi_va_free); 1536 xbd_io->xio_xv = SLIST_FIRST(&xbdi->xbdi_va_free);
1522 KASSERT(xbd_io->xio_xv != NULL); 1537 KASSERT(xbd_io->xio_xv != NULL);
1523 SLIST_REMOVE_HEAD(&xbdi->xbdi_va_free, xv_next); 1538 SLIST_REMOVE_HEAD(&xbdi->xbdi_va_free, xv_next);
1524 xbd_io->xio_vaddr = xbd_io->xio_xv->xv_vaddr; 1539 xbd_io->xio_vaddr = xbd_io->xio_xv->xv_vaddr;
1525 splx(s); 
1526 1540
1527 error = xen_shm_map(xbd_io->xio_nrma, xbdi->xbdi_domid, 1541 error = xen_shm_map(xbd_io->xio_nrma, xbdi->xbdi_domid,
1528 xbd_io->xio_gref, xbd_io->xio_vaddr, xbd_io->xio_gh,  1542 xbd_io->xio_gref, xbd_io->xio_vaddr, xbd_io->xio_gh,
1529 (xbd_io->xio_operation == BLKIF_OP_WRITE) ? XSHM_RO : 0); 1543 (xbd_io->xio_operation == BLKIF_OP_WRITE) ? XSHM_RO : 0);
1530 1544
1531 switch(error) { 1545 switch(error) {
1532 case 0: 1546 case 0:
1533#ifdef XENDEBUG_VBD 1547#ifdef XENDEBUG_VBD
1534 printf("handle "); 1548 printf("handle ");
1535 for (i = 0; i < xbd_io->xio_nrma; i++) { 1549 for (i = 0; i < xbd_io->xio_nrma; i++) {
1536 printf("%u ", (u_int)xbd_io->xio_gh[i]); 1550 printf("%u ", (u_int)xbd_io->xio_gh[i]);
1537 } 1551 }
1538 printf("\n"); 1552 printf("\n");