Thu Apr 23 09:16:21 2020 UTC ()
make xbdback actually MPSAFE and stop using KERNEL_LOCK()

remove no longer necessary atomics, the counters are now always
updated with held mutex


(jdolecek)
diff -r1.89 -r1.90 src/sys/arch/xen/xen/xbdback_xenbus.c

cvs diff -r1.89 -r1.90 src/sys/arch/xen/xen/xbdback_xenbus.c (switch to unified diff)

--- src/sys/arch/xen/xen/xbdback_xenbus.c 2020/04/23 08:09:25 1.89
+++ src/sys/arch/xen/xen/xbdback_xenbus.c 2020/04/23 09:16:21 1.90
@@ -1,1619 +1,1633 @@ @@ -1,1619 +1,1633 @@
1/* $NetBSD: xbdback_xenbus.c,v 1.89 2020/04/23 08:09:25 jdolecek Exp $ */ 1/* $NetBSD: xbdback_xenbus.c,v 1.90 2020/04/23 09:16:21 jdolecek Exp $ */
2 2
3/* 3/*
4 * Copyright (c) 2006 Manuel Bouyer. 4 * Copyright (c) 2006 Manuel Bouyer.
5 * 5 *
6 * Redistribution and use in source and binary forms, with or without 6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions 7 * modification, are permitted provided that the following conditions
8 * are met: 8 * are met:
9 * 1. Redistributions of source code must retain the above copyright 9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer. 10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright 11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the 12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution. 13 * documentation and/or other materials provided with the distribution.
14 * 14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 * 25 *
26 */ 26 */
27 27
28#include <sys/cdefs.h> 28#include <sys/cdefs.h>
29__KERNEL_RCSID(0, "$NetBSD: xbdback_xenbus.c,v 1.89 2020/04/23 08:09:25 jdolecek Exp $"); 29__KERNEL_RCSID(0, "$NetBSD: xbdback_xenbus.c,v 1.90 2020/04/23 09:16:21 jdolecek Exp $");
30 30
31#include <sys/atomic.h> 
32#include <sys/buf.h> 31#include <sys/buf.h>
33#include <sys/condvar.h> 32#include <sys/condvar.h>
34#include <sys/conf.h> 33#include <sys/conf.h>
35#include <sys/disk.h> 34#include <sys/disk.h>
36#include <sys/device.h> 35#include <sys/device.h>
37#include <sys/fcntl.h> 36#include <sys/fcntl.h>
38#include <sys/kauth.h> 37#include <sys/kauth.h>
39#include <sys/kernel.h> 38#include <sys/kernel.h>
40#include <sys/kmem.h> 39#include <sys/kmem.h>
41#include <sys/kthread.h> 40#include <sys/kthread.h>
42#include <sys/mutex.h> 41#include <sys/mutex.h>
43#include <sys/param.h> 42#include <sys/param.h>
44#include <sys/queue.h> 43#include <sys/queue.h>
45#include <sys/systm.h> 44#include <sys/systm.h>
46#include <sys/time.h> 45#include <sys/time.h>
47#include <sys/types.h> 46#include <sys/types.h>
48#include <sys/vnode.h> 47#include <sys/vnode.h>
49 48
50#include <xen/xen.h> 49#include <xen/xen.h>
51#include <xen/xen_shm.h> 50#include <xen/xen_shm.h>
52#include <xen/evtchn.h> 51#include <xen/evtchn.h>
53#include <xen/xenbus.h> 52#include <xen/xenbus.h>
54#include <xen/xenring.h> 53#include <xen/xenring.h>
55#include <xen/include/public/io/protocols.h> 54#include <xen/include/public/io/protocols.h>
56 55
57/* #define XENDEBUG_VBD */ 56/* #define XENDEBUG_VBD */
58#ifdef XENDEBUG_VBD 57#ifdef XENDEBUG_VBD
59#define XENPRINTF(x) printf x 58#define XENPRINTF(x) printf x
60#else 59#else
61#define XENPRINTF(x) 60#define XENPRINTF(x)
62#endif 61#endif
63 62
64#define BLKIF_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE) 63#define BLKIF_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
65 64
66/* 65/*
67 * Backend block device driver for Xen 66 * Backend block device driver for Xen
68 */ 67 */
69 68
70/* Values are expressed in 512-byte sectors */ 69/* Values are expressed in 512-byte sectors */
71#define VBD_BSIZE 512 70#define VBD_BSIZE 512
72#define VBD_MAXSECT ((PAGE_SIZE / VBD_BSIZE) - 1) 71#define VBD_MAXSECT ((PAGE_SIZE / VBD_BSIZE) - 1)
73 72
74/* Need to alloc one extra page to account for possible mapping offset */ 73/* Need to alloc one extra page to account for possible mapping offset */
75#define VBD_VA_SIZE (MAXPHYS + PAGE_SIZE) 74#define VBD_VA_SIZE (MAXPHYS + PAGE_SIZE)
76#define VBD_MAX_INDIRECT_SEGMENTS VBD_VA_SIZE >> PAGE_SHIFT 75#define VBD_MAX_INDIRECT_SEGMENTS VBD_VA_SIZE >> PAGE_SHIFT
77 76
78CTASSERT(XENSHM_MAX_PAGES_PER_REQUEST >= VBD_MAX_INDIRECT_SEGMENTS); 77CTASSERT(XENSHM_MAX_PAGES_PER_REQUEST >= VBD_MAX_INDIRECT_SEGMENTS);
79 78
80struct xbdback_instance; 79struct xbdback_instance;
81 80
82/* 81/*
83 * status of a xbdback instance: 82 * status of a xbdback instance:
84 * WAITING: xbdback instance is connected, waiting for requests 83 * WAITING: xbdback instance is connected, waiting for requests
85 * RUN: xbdi thread must be woken up, I/Os have to be processed 84 * RUN: xbdi thread must be woken up, I/Os have to be processed
86 * DISCONNECTING: the instance is closing, no more I/Os can be scheduled 85 * DISCONNECTING: the instance is closing, no more I/Os can be scheduled
87 * DISCONNECTED: no I/Os, no ring, the thread should terminate. 86 * DISCONNECTED: no I/Os, no ring, the thread should terminate.
88 */ 87 */
89typedef enum {WAITING, RUN, DISCONNECTING, DISCONNECTED} xbdback_state_t; 88typedef enum {WAITING, RUN, DISCONNECTING, DISCONNECTED} xbdback_state_t;
90 89
91/* 90/*
92 * Each xbdback instance is managed by a single thread that handles all 91 * Each xbdback instance is managed by a single thread that handles all
93 * the I/O processing. As there are a variety of conditions that can block, 92 * the I/O processing. As there are a variety of conditions that can block,
94 * everything will be done in a sort of continuation-passing style. 93 * everything will be done in a sort of continuation-passing style.
95 * 94 *
96 * When the execution has to block to delay processing, for example to 95 * When the execution has to block to delay processing, for example to
97 * allow system to recover because of memory shortage (via shared memory 96 * allow system to recover because of memory shortage (via shared memory
98 * callback), the return value of a continuation can be set to NULL. In that 97 * callback), the return value of a continuation can be set to NULL. In that
99 * case, the thread will go back to sleeping and wait for the proper 98 * case, the thread will go back to sleeping and wait for the proper
100 * condition before it starts processing requests again from where it left. 99 * condition before it starts processing requests again from where it left.
101 * Continuation state is "stored" in the xbdback instance (xbdi_cont), 100 * Continuation state is "stored" in the xbdback instance (xbdi_cont),
102 * and should only be manipulated by the instance thread. 101 * and should only be manipulated by the instance thread.
103 * 102 *
104 * As xbdback(4) has to handle different sort of asynchronous events (Xen 103 * As xbdback(4) has to handle different sort of asynchronous events (Xen
105 * event channels, biointr() soft interrupts, xenbus commands), the xbdi_lock 104 * event channels, biointr() soft interrupts, xenbus commands), the xbdi_lock
106 * mutex is used to protect specific elements of the xbdback instance from 105 * mutex is used to protect specific elements of the xbdback instance from
107 * concurrent access: thread status and ring access (when pushing responses). 106 * concurrent access: thread status and ring access (when pushing responses).
108 *  107 *
109 * Here's how the call graph is supposed to be for a single I/O: 108 * Here's how the call graph is supposed to be for a single I/O:
110 * 109 *
111 * xbdback_co_main() 110 * xbdback_co_main()
112 * | --> xbdback_co_cache_flush() 111 * | --> xbdback_co_cache_flush()
113 * | | | 112 * | | |
114 * | | -> xbdback_co_cache_doflush() or NULL 113 * | | -> xbdback_co_cache_doflush() or NULL
115 * | | | 114 * | | |
116 * | | -> xbdback_co_do_io() 115 * | | -> xbdback_co_do_io()
117 * xbdback_co_main_loop()-| 116 * xbdback_co_main_loop()-|
118 * | |-> xbdback_co_main_done2() or NULL 117 * | |-> xbdback_co_main_done2() or NULL
119 * | | 118 * | |
120 * | --> xbdback_co_main_incr() -> xbdback_co_main_loop() 119 * | --> xbdback_co_main_incr() -> xbdback_co_main_loop()
121 * | 120 * |
122 * xbdback_co_io() -> xbdback_co_main_incr() -> xbdback_co_main_loop() 121 * xbdback_co_io() -> xbdback_co_main_incr() -> xbdback_co_main_loop()
123 * | 122 * |
124 * xbdback_co_io_gotio() -> xbdback_map_shm() 123 * xbdback_co_io_gotio() -> xbdback_map_shm()
125 * | | 124 * | |
126 * | xbdback_co_main_incr() -> xbdback_co_main_loop() 125 * | xbdback_co_main_incr() -> xbdback_co_main_loop()
127 * | 126 * |
128 * xbdback_co_do_io()  127 * xbdback_co_do_io()
129 * | 128 * |
130 * xbdback_co_main_incr() -> xbdback_co_main_loop() 129 * xbdback_co_main_incr() -> xbdback_co_main_loop()
131 */ 130 */
132typedef void *(* xbdback_cont_t)(struct xbdback_instance *, void *); 131typedef void *(* xbdback_cont_t)(struct xbdback_instance *, void *);
133 132
134enum xbdi_proto { 133enum xbdi_proto {
135 XBDIP_NATIVE, 134 XBDIP_NATIVE,
136 XBDIP_32, 135 XBDIP_32,
137 XBDIP_64 136 XBDIP_64
138}; 137};
139 138
140struct xbdback_va { 139struct xbdback_va {
141 SLIST_ENTRY(xbdback_va) xv_next; 140 SLIST_ENTRY(xbdback_va) xv_next;
142 vaddr_t xv_vaddr; 141 vaddr_t xv_vaddr;
143}; 142};
144 143
145/* 144/*
146 * For each I/O operation associated with one of those requests, an 145 * For each I/O operation associated with one of those requests, an
147 * xbdback_io is allocated from a pool. It may correspond to multiple 146 * xbdback_io is allocated from a pool. It may correspond to multiple
148 * Xen disk requests, or parts of them, if several arrive at once that 147 * Xen disk requests, or parts of them, if several arrive at once that
149 * can be coalesced. 148 * can be coalesced.
150 */ 149 */
151struct xbdback_io { 150struct xbdback_io {
152 SLIST_ENTRY(xbdback_io) xio_next; 151 SLIST_ENTRY(xbdback_io) xio_next;
153 /* The instance pointer is duplicated for convenience. */ 152 /* The instance pointer is duplicated for convenience. */
154 struct xbdback_instance *xio_xbdi; /* our xbd instance */ 153 struct xbdback_instance *xio_xbdi; /* our xbd instance */
155 uint8_t xio_operation; 154 uint8_t xio_operation;
156 uint64_t xio_id; 155 uint64_t xio_id;
157 union { 156 union {
158 struct { 157 struct {
159 struct buf xio_buf; /* our I/O */ 158 struct buf xio_buf; /* our I/O */
160 /* the virtual address to map the request at */ 159 /* the virtual address to map the request at */
161 vaddr_t xio_vaddr; 160 vaddr_t xio_vaddr;
162 struct xbdback_va *xio_xv; 161 struct xbdback_va *xio_xv;
163 vaddr_t xio_start_offset; /* I/O start offset */ 162 vaddr_t xio_start_offset; /* I/O start offset */
164 /* grants to map */ 163 /* grants to map */
165 grant_ref_t xio_gref[VBD_MAX_INDIRECT_SEGMENTS]; 164 grant_ref_t xio_gref[VBD_MAX_INDIRECT_SEGMENTS];
166 /* grants release */ 165 /* grants release */
167 grant_handle_t xio_gh[VBD_MAX_INDIRECT_SEGMENTS]; 166 grant_handle_t xio_gh[VBD_MAX_INDIRECT_SEGMENTS];
168 uint16_t xio_nrma; /* number of guest pages */ 167 uint16_t xio_nrma; /* number of guest pages */
169 } xio_rw; 168 } xio_rw;
170 } u; 169 } u;
171}; 170};
172#define xio_buf u.xio_rw.xio_buf 171#define xio_buf u.xio_rw.xio_buf
173#define xio_vaddr u.xio_rw.xio_vaddr 172#define xio_vaddr u.xio_rw.xio_vaddr
174#define xio_start_offset u.xio_rw.xio_start_offset 173#define xio_start_offset u.xio_rw.xio_start_offset
175#define xio_xv u.xio_rw.xio_xv 174#define xio_xv u.xio_rw.xio_xv
176#define xio_gref u.xio_rw.xio_gref 175#define xio_gref u.xio_rw.xio_gref
177#define xio_gh u.xio_rw.xio_gh 176#define xio_gh u.xio_rw.xio_gh
178#define xio_nrma u.xio_rw.xio_nrma 177#define xio_nrma u.xio_rw.xio_nrma
179 178
180/* we keep the xbdback instances in a linked list */ 179/* we keep the xbdback instances in a linked list */
181struct xbdback_instance { 180struct xbdback_instance {
182 SLIST_ENTRY(xbdback_instance) next; 181 SLIST_ENTRY(xbdback_instance) next;
183 struct xenbus_device *xbdi_xbusd; /* our xenstore entry */ 182 struct xenbus_device *xbdi_xbusd; /* our xenstore entry */
184 struct xenbus_watch xbdi_watch; /* to watch our store */ 183 struct xenbus_watch xbdi_watch; /* to watch our store */
185 domid_t xbdi_domid; /* attached to this domain */ 184 domid_t xbdi_domid; /* attached to this domain */
186 uint32_t xbdi_handle; /* domain-specific handle */ 185 uint32_t xbdi_handle; /* domain-specific handle */
187 char xbdi_name[16]; /* name of this instance */ 186 char xbdi_name[16]; /* name of this instance */
188 /* mutex that protects concurrent access to the xbdback instance */ 187 /* mutex that protects concurrent access to the xbdback instance */
189 kmutex_t xbdi_lock; 188 kmutex_t xbdi_lock;
190 kcondvar_t xbdi_cv; /* wait channel for thread work */ 189 kcondvar_t xbdi_cv; /* wait channel for thread work */
191 xbdback_state_t xbdi_status; /* thread's status */ 190 xbdback_state_t xbdi_status; /* thread's status */
192 /* context and KVA for mapping transfers */ 191 /* context and KVA for mapping transfers */
193 struct xbdback_io xbdi_io[BLKIF_RING_SIZE]; 192 struct xbdback_io xbdi_io[BLKIF_RING_SIZE];
194 SLIST_HEAD(, xbdback_io) xbdi_io_free; 193 SLIST_HEAD(, xbdback_io) xbdi_io_free;
195 struct xbdback_va xbdi_va[BLKIF_RING_SIZE]; 194 struct xbdback_va xbdi_va[BLKIF_RING_SIZE];
196 SLIST_HEAD(, xbdback_va) xbdi_va_free; 195 SLIST_HEAD(, xbdback_va) xbdi_va_free;
197 /* backing device parameters */ 196 /* backing device parameters */
198 dev_t xbdi_dev; 197 dev_t xbdi_dev;
199 const struct bdevsw *xbdi_bdevsw; /* pointer to the device's bdevsw */ 198 const struct bdevsw *xbdi_bdevsw; /* pointer to the device's bdevsw */
200 struct vnode *xbdi_vp; 199 struct vnode *xbdi_vp;
201 uint64_t xbdi_size; 200 uint64_t xbdi_size;
202 bool xbdi_ro; /* is device read-only ? */ 201 bool xbdi_ro; /* is device read-only ? */
203 /* parameters for the communication */ 202 /* parameters for the communication */
204 unsigned int xbdi_evtchn; 203 unsigned int xbdi_evtchn;
205 struct intrhand *xbdi_ih; 204 struct intrhand *xbdi_ih;
206 /* private parameters for communication */ 205 /* private parameters for communication */
207 blkif_back_ring_proto_t xbdi_ring; 206 blkif_back_ring_proto_t xbdi_ring;
208 enum xbdi_proto xbdi_proto; 207 enum xbdi_proto xbdi_proto;
209 grant_handle_t xbdi_ring_handle; /* to unmap the ring */ 208 grant_handle_t xbdi_ring_handle; /* to unmap the ring */
210 vaddr_t xbdi_ring_va; /* to unmap the ring */ 209 vaddr_t xbdi_ring_va; /* to unmap the ring */
211 /* disconnection must be postponed until all I/O is done */ 210 /* disconnection must be postponed until all I/O is done */
212 int xbdi_refcnt; 211 int xbdi_refcnt;
213 /*  212 /*
214 * State for I/O processing/coalescing follows; this has to 213 * State for I/O processing/coalescing follows; this has to
215 * live here instead of on the stack because of the 214 * live here instead of on the stack because of the
216 * continuation-ness (see above). 215 * continuation-ness (see above).
217 */ 216 */
218 RING_IDX xbdi_req_prod; /* limit on request indices */ 217 RING_IDX xbdi_req_prod; /* limit on request indices */
219 xbdback_cont_t xbdi_cont; 218 xbdback_cont_t xbdi_cont;
220 /* _request state: track requests fetched from ring */ 219 /* _request state: track requests fetched from ring */
221 blkif_request_t xbdi_xen_req; 220 blkif_request_t xbdi_xen_req;
222 struct blkif_request_segment xbdi_seg[VBD_MAX_INDIRECT_SEGMENTS]; 221 struct blkif_request_segment xbdi_seg[VBD_MAX_INDIRECT_SEGMENTS];
223 bus_dmamap_t xbdi_seg_dmamap; 222 bus_dmamap_t xbdi_seg_dmamap;
224 grant_ref_t xbdi_in_gntref; 223 grant_ref_t xbdi_in_gntref;
225 /* other state */ 224 /* other state */
226 uint xbdi_pendingreqs; /* number of I/O in fly */ 225 uint xbdi_pendingreqs; /* number of I/O in fly */
227 struct timeval xbdi_lasterr_time; /* error time tracking */ 226 struct timeval xbdi_lasterr_time; /* error time tracking */
228}; 227};
229/* Manipulation of the above reference count. */ 228/* Manipulation of the above reference count. */
230#define xbdi_get(xbdip) atomic_inc_uint(&(xbdip)->xbdi_refcnt) 229#define xbdi_get(xbdip) (xbdip)->xbdi_refcnt++
231#define xbdi_put(xbdip) \ 230#define xbdi_put(xbdip) \
232do { \ 231do { \
233 if (atomic_dec_uint_nv(&(xbdip)->xbdi_refcnt) == 0) \ 232 if (--((xbdip)->xbdi_refcnt) == 0) \
234 xbdback_finish_disconnect(xbdip); \ 233 xbdback_finish_disconnect(xbdip); \
235} while (/* CONSTCOND */ 0) 234} while (/* CONSTCOND */ 0)
236 235
237static SLIST_HEAD(, xbdback_instance) xbdback_instances; 236static SLIST_HEAD(, xbdback_instance) xbdback_instances;
238static kmutex_t xbdback_lock; 237static kmutex_t xbdback_lock;
239 238
240/* Interval between reports of I/O errors from frontend */ 239/* Interval between reports of I/O errors from frontend */
241static const struct timeval xbdback_err_intvl = { 1, 0 }; 240static const struct timeval xbdback_err_intvl = { 1, 0 };
242 241
243 void xbdbackattach(int); 242 void xbdbackattach(int);
244static int xbdback_xenbus_create(struct xenbus_device *); 243static int xbdback_xenbus_create(struct xenbus_device *);
245static int xbdback_xenbus_destroy(void *); 244static int xbdback_xenbus_destroy(void *);
246static void xbdback_frontend_changed(void *, XenbusState); 245static void xbdback_frontend_changed(void *, XenbusState);
247static void xbdback_backend_changed(struct xenbus_watch *, 246static void xbdback_backend_changed(struct xenbus_watch *,
248 const char **, unsigned int); 247 const char **, unsigned int);
249static int xbdback_evthandler(void *); 248static int xbdback_evthandler(void *);
250 249
251static int xbdback_connect(struct xbdback_instance *); 250static int xbdback_connect(struct xbdback_instance *);
252static void xbdback_disconnect(struct xbdback_instance *); 251static void xbdback_disconnect(struct xbdback_instance *);
253static void xbdback_finish_disconnect(struct xbdback_instance *); 252static void xbdback_finish_disconnect(struct xbdback_instance *);
254 253
255static bool xbdif_lookup(domid_t, uint32_t); 254static bool xbdif_lookup(domid_t, uint32_t);
256 255
257static void *xbdback_co_main(struct xbdback_instance *, void *); 256static void *xbdback_co_main(struct xbdback_instance *, void *);
258static void *xbdback_co_main_loop(struct xbdback_instance *, void *); 257static void *xbdback_co_main_loop(struct xbdback_instance *, void *);
259static void *xbdback_co_main_incr(struct xbdback_instance *, void *); 258static void *xbdback_co_main_incr(struct xbdback_instance *, void *);
260static void *xbdback_co_main_done2(struct xbdback_instance *, void *); 259static void *xbdback_co_main_done2(struct xbdback_instance *, void *);
261 260
262static void *xbdback_co_cache_flush(struct xbdback_instance *, void *); 261static void *xbdback_co_cache_flush(struct xbdback_instance *, void *);
263static void *xbdback_co_cache_doflush(struct xbdback_instance *, void *); 262static void *xbdback_co_cache_doflush(struct xbdback_instance *, void *);
264 263
265static void *xbdback_co_io(struct xbdback_instance *, void *); 264static void *xbdback_co_io(struct xbdback_instance *, void *);
266static void *xbdback_co_io_gotio(struct xbdback_instance *, void *); 265static void *xbdback_co_io_gotio(struct xbdback_instance *, void *);
267 266
268static void *xbdback_co_do_io(struct xbdback_instance *, void *); 267static void *xbdback_co_do_io(struct xbdback_instance *, void *);
269 268
270static void xbdback_io_error(struct xbdback_io *, int); 269static void xbdback_io_error(struct xbdback_io *, int);
271static void xbdback_iodone(struct buf *); 270static void xbdback_iodone(struct buf *);
 271static void xbdback_iodone_locked(struct xbdback_instance *,
 272 struct xbdback_io *, struct buf *);
272static void xbdback_send_reply(struct xbdback_instance *, uint64_t , int , int); 273static void xbdback_send_reply(struct xbdback_instance *, uint64_t , int , int);
273 274
274static void *xbdback_map_shm(struct xbdback_io *); 275static void *xbdback_map_shm(struct xbdback_io *);
275static void xbdback_unmap_shm(struct xbdback_io *); 276static void xbdback_unmap_shm(struct xbdback_io *);
276 277
277static struct xbdback_io *xbdback_io_get(struct xbdback_instance *); 278static struct xbdback_io *xbdback_io_get(struct xbdback_instance *);
278static void xbdback_io_put(struct xbdback_instance *, struct xbdback_io *); 279static void xbdback_io_put(struct xbdback_instance *, struct xbdback_io *);
279static void xbdback_thread(void *); 280static void xbdback_thread(void *);
280static void xbdback_wakeup_thread(struct xbdback_instance *); 281static void xbdback_wakeup_thread(struct xbdback_instance *);
281static void xbdback_trampoline(struct xbdback_instance *, void *); 282static void xbdback_trampoline(struct xbdback_instance *, void *);
282 283
283static struct xenbus_backend_driver xbd_backend_driver = { 284static struct xenbus_backend_driver xbd_backend_driver = {
284 .xbakd_create = xbdback_xenbus_create, 285 .xbakd_create = xbdback_xenbus_create,
285 .xbakd_type = "vbd" 286 .xbakd_type = "vbd"
286}; 287};
287 288
288void 289void
289xbdbackattach(int n) 290xbdbackattach(int n)
290{ 291{
291 XENPRINTF(("xbdbackattach\n")); 292 XENPRINTF(("xbdbackattach\n"));
292 293
293 /* 294 /*
294 * initialize the backend driver, register the control message handler 295 * initialize the backend driver, register the control message handler
295 * and send driver up message. 296 * and send driver up message.
296 */ 297 */
297 SLIST_INIT(&xbdback_instances); 298 SLIST_INIT(&xbdback_instances);
298 mutex_init(&xbdback_lock, MUTEX_DEFAULT, IPL_NONE); 299 mutex_init(&xbdback_lock, MUTEX_DEFAULT, IPL_NONE);
299 300
300 xenbus_backend_register(&xbd_backend_driver); 301 xenbus_backend_register(&xbd_backend_driver);
301} 302}
302 303
303static int 304static int
304xbdback_xenbus_create(struct xenbus_device *xbusd) 305xbdback_xenbus_create(struct xenbus_device *xbusd)
305{ 306{
306 struct xbdback_instance *xbdi; 307 struct xbdback_instance *xbdi;
307 long domid, handle; 308 long domid, handle;
308 int error, i; 309 int error, i;
309 char *ep; 310 char *ep;
310 311
311 if ((error = xenbus_read_ul(NULL, xbusd->xbusd_path, 312 if ((error = xenbus_read_ul(NULL, xbusd->xbusd_path,
312 "frontend-id", &domid, 10)) != 0) { 313 "frontend-id", &domid, 10)) != 0) {
313 aprint_error("xbdback: can't read %s/frontend-id: %d\n", 314 aprint_error("xbdback: can't read %s/frontend-id: %d\n",
314 xbusd->xbusd_path, error); 315 xbusd->xbusd_path, error);
315 return error; 316 return error;
316 } 317 }
317 318
318 /* 319 /*
319 * get handle: this is the last component of the path; which is 320 * get handle: this is the last component of the path; which is
320 * a decimal number. $path/dev contains the device name, which is not 321 * a decimal number. $path/dev contains the device name, which is not
321 * appropriate. 322 * appropriate.
322 */ 323 */
323 for (i = strlen(xbusd->xbusd_path); i > 0; i--) { 324 for (i = strlen(xbusd->xbusd_path); i > 0; i--) {
324 if (xbusd->xbusd_path[i] == '/') 325 if (xbusd->xbusd_path[i] == '/')
325 break; 326 break;
326 } 327 }
327 if (i == 0) { 328 if (i == 0) {
328 aprint_error("xbdback: can't parse %s\n", 329 aprint_error("xbdback: can't parse %s\n",
329 xbusd->xbusd_path); 330 xbusd->xbusd_path);
330 return EFTYPE; 331 return EFTYPE;
331 } 332 }
332 handle = strtoul(&xbusd->xbusd_path[i+1], &ep, 10); 333 handle = strtoul(&xbusd->xbusd_path[i+1], &ep, 10);
333 if (*ep != '\0') { 334 if (*ep != '\0') {
334 aprint_error("xbdback: can't parse %s\n", 335 aprint_error("xbdback: can't parse %s\n",
335 xbusd->xbusd_path); 336 xbusd->xbusd_path);
336 return EFTYPE; 337 return EFTYPE;
337 } 338 }
338 339
339 /* XXXSMP unlocked search */ 
340 if (xbdif_lookup(domid, handle)) { 
341 return EEXIST; 
342 } 
343 xbdi = kmem_zalloc(sizeof(*xbdi), KM_SLEEP); 340 xbdi = kmem_zalloc(sizeof(*xbdi), KM_SLEEP);
344 341
345 xbdi->xbdi_domid = domid; 342 xbdi->xbdi_domid = domid;
346 xbdi->xbdi_handle = handle; 343 xbdi->xbdi_handle = handle;
347 snprintf(xbdi->xbdi_name, sizeof(xbdi->xbdi_name), "xbdb%di%d", 344 snprintf(xbdi->xbdi_name, sizeof(xbdi->xbdi_name), "xbdb%di%d",
348 xbdi->xbdi_domid, xbdi->xbdi_handle); 345 xbdi->xbdi_domid, xbdi->xbdi_handle);
349 346
 347 mutex_enter(&xbdback_lock);
 348 if (xbdif_lookup(domid, handle)) {
 349 mutex_exit(&xbdback_lock);
 350 kmem_free(xbdi, sizeof(*xbdi));
 351 return EEXIST;
 352 }
 353 SLIST_INSERT_HEAD(&xbdback_instances, xbdi, next);
 354 mutex_exit(&xbdback_lock);
 355
350 /* initialize status and reference counter */ 356 /* initialize status and reference counter */
351 xbdi->xbdi_status = DISCONNECTED; 357 xbdi->xbdi_status = DISCONNECTED;
352 xbdi_get(xbdi); 358 xbdi_get(xbdi);
353 359
354 mutex_init(&xbdi->xbdi_lock, MUTEX_DEFAULT, IPL_BIO); 360 mutex_init(&xbdi->xbdi_lock, MUTEX_DEFAULT, IPL_BIO);
355 cv_init(&xbdi->xbdi_cv, xbdi->xbdi_name); 361 cv_init(&xbdi->xbdi_cv, xbdi->xbdi_name);
356 mutex_enter(&xbdback_lock); 
357 SLIST_INSERT_HEAD(&xbdback_instances, xbdi, next); 
358 mutex_exit(&xbdback_lock); 
359 362
360 xbusd->xbusd_u.b.b_cookie = xbdi;  363 xbusd->xbusd_u.b.b_cookie = xbdi;
361 xbusd->xbusd_u.b.b_detach = xbdback_xenbus_destroy; 364 xbusd->xbusd_u.b.b_detach = xbdback_xenbus_destroy;
362 xbusd->xbusd_otherend_changed = xbdback_frontend_changed; 365 xbusd->xbusd_otherend_changed = xbdback_frontend_changed;
363 xbdi->xbdi_xbusd = xbusd; 366 xbdi->xbdi_xbusd = xbusd;
364 367
365 if (bus_dmamap_create(xbdi->xbdi_xbusd->xbusd_dmat, PAGE_SIZE, 368 if (bus_dmamap_create(xbdi->xbdi_xbusd->xbusd_dmat, PAGE_SIZE,
366 1, PAGE_SIZE, PAGE_SIZE, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, 369 1, PAGE_SIZE, PAGE_SIZE, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW,
367 &xbdi->xbdi_seg_dmamap) != 0) { 370 &xbdi->xbdi_seg_dmamap) != 0) {
368 printf("%s: can't create dma map for indirect segments\n", 371 printf("%s: can't create dma map for indirect segments\n",
369 xbdi->xbdi_name); 372 xbdi->xbdi_name);
370 goto fail; 373 goto fail;
371 } 374 }
372 if (bus_dmamap_load(xbdi->xbdi_xbusd->xbusd_dmat, 375 if (bus_dmamap_load(xbdi->xbdi_xbusd->xbusd_dmat,
373 xbdi->xbdi_seg_dmamap, xbdi->xbdi_seg, 376 xbdi->xbdi_seg_dmamap, xbdi->xbdi_seg,
374 sizeof(xbdi->xbdi_seg), NULL, BUS_DMA_WAITOK) != 0) { 377 sizeof(xbdi->xbdi_seg), NULL, BUS_DMA_WAITOK) != 0) {
375 printf("%s: can't load dma map for indirect segments\n", 378 printf("%s: can't load dma map for indirect segments\n",
376 xbdi->xbdi_name); 379 xbdi->xbdi_name);
377 goto fail; 380 goto fail;
378 } 381 }
379 KASSERT(xbdi->xbdi_seg_dmamap->dm_nsegs == 1); 382 KASSERT(xbdi->xbdi_seg_dmamap->dm_nsegs == 1);
380 383
381 SLIST_INIT(&xbdi->xbdi_va_free); 384 SLIST_INIT(&xbdi->xbdi_va_free);
382 for (i = 0; i < BLKIF_RING_SIZE; i++) { 385 for (i = 0; i < BLKIF_RING_SIZE; i++) {
383 xbdi->xbdi_va[i].xv_vaddr = uvm_km_alloc(kernel_map, 386 xbdi->xbdi_va[i].xv_vaddr = uvm_km_alloc(kernel_map,
384 VBD_VA_SIZE, 0, UVM_KMF_VAONLY|UVM_KMF_WAITVA); 387 VBD_VA_SIZE, 0, UVM_KMF_VAONLY|UVM_KMF_WAITVA);
385 SLIST_INSERT_HEAD(&xbdi->xbdi_va_free, &xbdi->xbdi_va[i], 388 SLIST_INSERT_HEAD(&xbdi->xbdi_va_free, &xbdi->xbdi_va[i],
386 xv_next); 389 xv_next);
387 } 390 }
388 391
389 SLIST_INIT(&xbdi->xbdi_io_free); 392 SLIST_INIT(&xbdi->xbdi_io_free);
390 for (i = 0; i < BLKIF_RING_SIZE; i++) { 393 for (i = 0; i < BLKIF_RING_SIZE; i++) {
391 SLIST_INSERT_HEAD(&xbdi->xbdi_io_free, &xbdi->xbdi_io[i], 394 SLIST_INSERT_HEAD(&xbdi->xbdi_io_free, &xbdi->xbdi_io[i],
392 xio_next); 395 xio_next);
393 } 396 }
394 397
395 error = xenbus_watch_path2(xbusd, xbusd->xbusd_path, "physical-device", 398 error = xenbus_watch_path2(xbusd, xbusd->xbusd_path, "physical-device",
396 &xbdi->xbdi_watch, xbdback_backend_changed); 399 &xbdi->xbdi_watch, xbdback_backend_changed);
397 if (error) { 400 if (error) {
398 printf("failed to watch on %s/physical-device: %d\n", 401 printf("failed to watch on %s/physical-device: %d\n",
399 xbusd->xbusd_path, error); 402 xbusd->xbusd_path, error);
400 goto fail; 403 goto fail;
401 } 404 }
402 xbdi->xbdi_watch.xbw_dev = xbusd; 405 xbdi->xbdi_watch.xbw_dev = xbusd;
403 error = xenbus_switch_state(xbusd, NULL, XenbusStateInitWait); 406 error = xenbus_switch_state(xbusd, NULL, XenbusStateInitWait);
404 if (error) { 407 if (error) {
405 printf("failed to switch state on %s: %d\n", 408 printf("failed to switch state on %s: %d\n",
406 xbusd->xbusd_path, error); 409 xbusd->xbusd_path, error);
407 goto fail2; 410 goto fail2;
408 } 411 }
409 return 0; 412 return 0;
410fail2: 413fail2:
411 unregister_xenbus_watch(&xbdi->xbdi_watch); 414 unregister_xenbus_watch(&xbdi->xbdi_watch);
412fail: 415fail:
413 kmem_free(xbdi, sizeof(*xbdi)); 416 kmem_free(xbdi, sizeof(*xbdi));
414 return error; 417 return error;
415} 418}
416 419
417static int 420static int
418xbdback_xenbus_destroy(void *arg) 421xbdback_xenbus_destroy(void *arg)
419{ 422{
420 struct xbdback_instance *xbdi = arg; 423 struct xbdback_instance *xbdi = arg;
421 struct xenbus_device *xbusd = xbdi->xbdi_xbusd; 424 struct xenbus_device *xbusd = xbdi->xbdi_xbusd;
422 struct gnttab_unmap_grant_ref ungrop; 425 struct gnttab_unmap_grant_ref ungrop;
423 int err; 426 int err;
424 427
425 XENPRINTF(("xbdback_xenbus_destroy state %d\n", xbdi->xbdi_status)); 428 XENPRINTF(("xbdback_xenbus_destroy state %d\n", xbdi->xbdi_status));
426 429
427 xbdback_disconnect(xbdi); 430 xbdback_disconnect(xbdi);
428 431
429 /* unregister watch */ 432 /* unregister watch */
430 if (xbdi->xbdi_watch.node) 433 if (xbdi->xbdi_watch.node)
431 xenbus_unwatch_path(&xbdi->xbdi_watch); 434 xenbus_unwatch_path(&xbdi->xbdi_watch);
432 435
433 /* unmap ring */ 436 /* unmap ring */
434 if (xbdi->xbdi_ring_va != 0) { 437 if (xbdi->xbdi_ring_va != 0) {
435 ungrop.host_addr = xbdi->xbdi_ring_va; 438 ungrop.host_addr = xbdi->xbdi_ring_va;
436 ungrop.handle = xbdi->xbdi_ring_handle; 439 ungrop.handle = xbdi->xbdi_ring_handle;
437 ungrop.dev_bus_addr = 0; 440 ungrop.dev_bus_addr = 0;
438 err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 441 err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
439 &ungrop, 1); 442 &ungrop, 1);
440 if (err) 443 if (err)
441 printf("xbdback %s: unmap_grant_ref failed: %d\n", 444 printf("xbdback %s: unmap_grant_ref failed: %d\n",
442 xbusd->xbusd_otherend, err); 445 xbusd->xbusd_otherend, err);
443 uvm_km_free(kernel_map, xbdi->xbdi_ring_va, 446 uvm_km_free(kernel_map, xbdi->xbdi_ring_va,
444 PAGE_SIZE, UVM_KMF_VAONLY); 447 PAGE_SIZE, UVM_KMF_VAONLY);
445 } 448 }
446 /* close device */ 449 /* close device */
447 if (xbdi->xbdi_size) { 450 if (xbdi->xbdi_size) {
448 const char *name; 451 const char *name;
449 struct dkwedge_info wi; 452 struct dkwedge_info wi;
450 if (getdiskinfo(xbdi->xbdi_vp, &wi) == 0) 453 if (getdiskinfo(xbdi->xbdi_vp, &wi) == 0)
451 name = wi.dkw_devname; 454 name = wi.dkw_devname;
452 else 455 else
453 name = "*unknown*"; 456 name = "*unknown*";
454 printf("xbd backend: detach device %s for domain %d\n", 457 printf("xbd backend: detach device %s for domain %d\n",
455 name, xbdi->xbdi_domid); 458 name, xbdi->xbdi_domid);
456 vn_close(xbdi->xbdi_vp, FREAD, NOCRED); 459 vn_close(xbdi->xbdi_vp, FREAD, NOCRED);
457 } 460 }
458 mutex_enter(&xbdback_lock); 461 mutex_enter(&xbdback_lock);
459 SLIST_REMOVE(&xbdback_instances, xbdi, xbdback_instance, next); 462 SLIST_REMOVE(&xbdback_instances, xbdi, xbdback_instance, next);
460 mutex_exit(&xbdback_lock); 463 mutex_exit(&xbdback_lock);
461 464
462 for (int i = 0; i < BLKIF_RING_SIZE; i++) { 465 for (int i = 0; i < BLKIF_RING_SIZE; i++) {
463 if (xbdi->xbdi_va[i].xv_vaddr != 0) { 466 if (xbdi->xbdi_va[i].xv_vaddr != 0) {
464 uvm_km_free(kernel_map, xbdi->xbdi_va[i].xv_vaddr, 467 uvm_km_free(kernel_map, xbdi->xbdi_va[i].xv_vaddr,
465 VBD_VA_SIZE, UVM_KMF_VAONLY); 468 VBD_VA_SIZE, UVM_KMF_VAONLY);
466 xbdi->xbdi_va[i].xv_vaddr = 0; 469 xbdi->xbdi_va[i].xv_vaddr = 0;
467 } 470 }
468 } 471 }
469 472
470 bus_dmamap_unload(xbdi->xbdi_xbusd->xbusd_dmat, xbdi->xbdi_seg_dmamap); 473 bus_dmamap_unload(xbdi->xbdi_xbusd->xbusd_dmat, xbdi->xbdi_seg_dmamap);
471 bus_dmamap_destroy(xbdi->xbdi_xbusd->xbusd_dmat, xbdi->xbdi_seg_dmamap); 474 bus_dmamap_destroy(xbdi->xbdi_xbusd->xbusd_dmat, xbdi->xbdi_seg_dmamap);
472 475
473 mutex_destroy(&xbdi->xbdi_lock); 476 mutex_destroy(&xbdi->xbdi_lock);
474 cv_destroy(&xbdi->xbdi_cv); 477 cv_destroy(&xbdi->xbdi_cv);
475 kmem_free(xbdi, sizeof(*xbdi)); 478 kmem_free(xbdi, sizeof(*xbdi));
476 return 0; 479 return 0;
477} 480}
478 481
479static int 482static int
480xbdback_connect(struct xbdback_instance *xbdi) 483xbdback_connect(struct xbdback_instance *xbdi)
481{ 484{
482 int err; 485 int err;
483 struct gnttab_map_grant_ref grop; 486 struct gnttab_map_grant_ref grop;
484 struct gnttab_unmap_grant_ref ungrop; 487 struct gnttab_unmap_grant_ref ungrop;
485 evtchn_op_t evop; 488 evtchn_op_t evop;
486 u_long ring_ref, revtchn; 489 u_long ring_ref, revtchn;
487 char xsproto[32]; 490 char xsproto[32];
488 const char *proto; 491 const char *proto;
489 struct xenbus_device *xbusd = xbdi->xbdi_xbusd; 492 struct xenbus_device *xbusd = xbdi->xbdi_xbusd;
490 493
491 XENPRINTF(("xbdback %s: connect\n", xbusd->xbusd_path)); 494 XENPRINTF(("xbdback %s: connect\n", xbusd->xbusd_path));
492 /* read comunication informations */ 495 /* read comunication informations */
493 err = xenbus_read_ul(NULL, xbusd->xbusd_otherend, 496 err = xenbus_read_ul(NULL, xbusd->xbusd_otherend,
494 "ring-ref", &ring_ref, 10); 497 "ring-ref", &ring_ref, 10);
495 if (err) { 498 if (err) {
496 xenbus_dev_fatal(xbusd, err, "reading %s/ring-ref", 499 xenbus_dev_fatal(xbusd, err, "reading %s/ring-ref",
497 xbusd->xbusd_otherend); 500 xbusd->xbusd_otherend);
498 return -1; 501 return -1;
499 } 502 }
500 XENPRINTF(("xbdback %s: connect ring-ref %lu\n", xbusd->xbusd_path, ring_ref)); 503 XENPRINTF(("xbdback %s: connect ring-ref %lu\n", xbusd->xbusd_path, ring_ref));
501 err = xenbus_read_ul(NULL, xbusd->xbusd_otherend, 504 err = xenbus_read_ul(NULL, xbusd->xbusd_otherend,
502 "event-channel", &revtchn, 10); 505 "event-channel", &revtchn, 10);
503 if (err) { 506 if (err) {
504 xenbus_dev_fatal(xbusd, err, "reading %s/event-channel", 507 xenbus_dev_fatal(xbusd, err, "reading %s/event-channel",
505 xbusd->xbusd_otherend); 508 xbusd->xbusd_otherend);
506 return -1; 509 return -1;
507 } 510 }
508 XENPRINTF(("xbdback %s: connect revtchn %lu\n", xbusd->xbusd_path, revtchn)); 511 XENPRINTF(("xbdback %s: connect revtchn %lu\n", xbusd->xbusd_path, revtchn));
509 err = xenbus_read(NULL, xbusd->xbusd_otherend, "protocol", 512 err = xenbus_read(NULL, xbusd->xbusd_otherend, "protocol",
510 xsproto, sizeof(xsproto)); 513 xsproto, sizeof(xsproto));
511 if (err) { 514 if (err) {
512 xbdi->xbdi_proto = XBDIP_NATIVE; 515 xbdi->xbdi_proto = XBDIP_NATIVE;
513 proto = "unspecified"; 516 proto = "unspecified";
514 XENPRINTF(("xbdback %s: connect no xsproto\n", xbusd->xbusd_path)); 517 XENPRINTF(("xbdback %s: connect no xsproto\n", xbusd->xbusd_path));
515 } else { 518 } else {
516 XENPRINTF(("xbdback %s: connect xsproto %s\n", xbusd->xbusd_path, xsproto)); 519 XENPRINTF(("xbdback %s: connect xsproto %s\n", xbusd->xbusd_path, xsproto));
517 if (strcmp(xsproto, XEN_IO_PROTO_ABI_NATIVE) == 0) { 520 if (strcmp(xsproto, XEN_IO_PROTO_ABI_NATIVE) == 0) {
518 xbdi->xbdi_proto = XBDIP_NATIVE; 521 xbdi->xbdi_proto = XBDIP_NATIVE;
519 proto = XEN_IO_PROTO_ABI_NATIVE; 522 proto = XEN_IO_PROTO_ABI_NATIVE;
520 } else if (strcmp(xsproto, XEN_IO_PROTO_ABI_X86_32) == 0) { 523 } else if (strcmp(xsproto, XEN_IO_PROTO_ABI_X86_32) == 0) {
521 xbdi->xbdi_proto = XBDIP_32; 524 xbdi->xbdi_proto = XBDIP_32;
522 proto = XEN_IO_PROTO_ABI_X86_32; 525 proto = XEN_IO_PROTO_ABI_X86_32;
523 } else if (strcmp(xsproto, XEN_IO_PROTO_ABI_X86_64) == 0) { 526 } else if (strcmp(xsproto, XEN_IO_PROTO_ABI_X86_64) == 0) {
524 xbdi->xbdi_proto = XBDIP_64; 527 xbdi->xbdi_proto = XBDIP_64;
525 proto = XEN_IO_PROTO_ABI_X86_64; 528 proto = XEN_IO_PROTO_ABI_X86_64;
526 } else { 529 } else {
527 aprint_error("xbd domain %d: unknown proto %s\n", 530 aprint_error("xbd domain %d: unknown proto %s\n",
528 xbdi->xbdi_domid, xsproto); 531 xbdi->xbdi_domid, xsproto);
529 return -1; 532 return -1;
530 } 533 }
531 } 534 }
532 535
533 /* allocate VA space and map rings */ 536 /* allocate VA space and map rings */
534 xbdi->xbdi_ring_va = uvm_km_alloc(kernel_map, PAGE_SIZE, 0, 537 xbdi->xbdi_ring_va = uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
535 UVM_KMF_VAONLY); 538 UVM_KMF_VAONLY);
536 if (xbdi->xbdi_ring_va == 0) { 539 if (xbdi->xbdi_ring_va == 0) {
537 xenbus_dev_fatal(xbusd, ENOMEM, 540 xenbus_dev_fatal(xbusd, ENOMEM,
538 "can't get VA for ring", xbusd->xbusd_otherend); 541 "can't get VA for ring", xbusd->xbusd_otherend);
539 return -1; 542 return -1;
540 } 543 }
541 XENPRINTF(("xbdback %s: connect va 0x%" PRIxVADDR "\n", xbusd->xbusd_path, xbdi->xbdi_ring_va)); 544 XENPRINTF(("xbdback %s: connect va 0x%" PRIxVADDR "\n", xbusd->xbusd_path, xbdi->xbdi_ring_va));
542 545
543 grop.host_addr = xbdi->xbdi_ring_va; 546 grop.host_addr = xbdi->xbdi_ring_va;
544 grop.flags = GNTMAP_host_map; 547 grop.flags = GNTMAP_host_map;
545 grop.ref = ring_ref; 548 grop.ref = ring_ref;
546 grop.dom = xbdi->xbdi_domid; 549 grop.dom = xbdi->xbdi_domid;
547 err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, 550 err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
548 &grop, 1); 551 &grop, 1);
549 if (err || grop.status) { 552 if (err || grop.status) {
550 aprint_error("xbdback %s: can't map grant ref: %d/%d\n", 553 aprint_error("xbdback %s: can't map grant ref: %d/%d\n",
551 xbusd->xbusd_path, err, grop.status); 554 xbusd->xbusd_path, err, grop.status);
552 xenbus_dev_fatal(xbusd, EINVAL, 555 xenbus_dev_fatal(xbusd, EINVAL,
553 "can't map ring", xbusd->xbusd_otherend); 556 "can't map ring", xbusd->xbusd_otherend);
554 goto err; 557 goto err;
555 } 558 }
556 xbdi->xbdi_ring_handle = grop.handle; 559 xbdi->xbdi_ring_handle = grop.handle;
557 XENPRINTF(("xbdback %s: connect grhandle %d\n", xbusd->xbusd_path, grop.handle)); 560 XENPRINTF(("xbdback %s: connect grhandle %d\n", xbusd->xbusd_path, grop.handle));
558 561
559 switch(xbdi->xbdi_proto) { 562 switch(xbdi->xbdi_proto) {
560 case XBDIP_NATIVE: 563 case XBDIP_NATIVE:
561 { 564 {
562 blkif_sring_t *sring = (void *)xbdi->xbdi_ring_va; 565 blkif_sring_t *sring = (void *)xbdi->xbdi_ring_va;
563 BACK_RING_INIT(&xbdi->xbdi_ring.ring_n, sring, PAGE_SIZE); 566 BACK_RING_INIT(&xbdi->xbdi_ring.ring_n, sring, PAGE_SIZE);
564 break; 567 break;
565 } 568 }
566 case XBDIP_32: 569 case XBDIP_32:
567 { 570 {
568 blkif_x86_32_sring_t *sring = (void *)xbdi->xbdi_ring_va; 571 blkif_x86_32_sring_t *sring = (void *)xbdi->xbdi_ring_va;
569 BACK_RING_INIT(&xbdi->xbdi_ring.ring_32, sring, PAGE_SIZE); 572 BACK_RING_INIT(&xbdi->xbdi_ring.ring_32, sring, PAGE_SIZE);
570 break; 573 break;
571 } 574 }
572 case XBDIP_64: 575 case XBDIP_64:
573 { 576 {
574 blkif_x86_64_sring_t *sring = (void *)xbdi->xbdi_ring_va; 577 blkif_x86_64_sring_t *sring = (void *)xbdi->xbdi_ring_va;
575 BACK_RING_INIT(&xbdi->xbdi_ring.ring_64, sring, PAGE_SIZE); 578 BACK_RING_INIT(&xbdi->xbdi_ring.ring_64, sring, PAGE_SIZE);
576 break; 579 break;
577 } 580 }
578 } 581 }
579 582
580 evop.cmd = EVTCHNOP_bind_interdomain; 583 evop.cmd = EVTCHNOP_bind_interdomain;
581 evop.u.bind_interdomain.remote_dom = xbdi->xbdi_domid; 584 evop.u.bind_interdomain.remote_dom = xbdi->xbdi_domid;
582 evop.u.bind_interdomain.remote_port = revtchn; 585 evop.u.bind_interdomain.remote_port = revtchn;
583 err = HYPERVISOR_event_channel_op(&evop); 586 err = HYPERVISOR_event_channel_op(&evop);
584 if (err) { 587 if (err) {
585 aprint_error("blkback %s: " 588 aprint_error("blkback %s: "
586 "can't get event channel: %d\n", 589 "can't get event channel: %d\n",
587 xbusd->xbusd_otherend, err); 590 xbusd->xbusd_otherend, err);
588 xenbus_dev_fatal(xbusd, err, 591 xenbus_dev_fatal(xbusd, err,
589 "can't bind event channel", xbusd->xbusd_otherend); 592 "can't bind event channel", xbusd->xbusd_otherend);
590 goto err2; 593 goto err2;
591 } 594 }
592 XENPRINTF(("xbdback %s: connect evchannel %d\n", xbusd->xbusd_path, xbdi->xbdi_evtchn)); 595 XENPRINTF(("xbdback %s: connect evchannel %d\n", xbusd->xbusd_path, xbdi->xbdi_evtchn));
593 xbdi->xbdi_evtchn = evop.u.bind_interdomain.local_port; 596 xbdi->xbdi_evtchn = evop.u.bind_interdomain.local_port;
594 597
595 xbdi->xbdi_ih = xen_intr_establish_xname(-1, &xen_pic, xbdi->xbdi_evtchn, 598 xbdi->xbdi_ih = xen_intr_establish_xname(-1, &xen_pic, xbdi->xbdi_evtchn,
596 IST_LEVEL, IPL_BIO, xbdback_evthandler, xbdi, false, 599 IST_LEVEL, IPL_BIO, xbdback_evthandler, xbdi, false,
597 xbdi->xbdi_name); 600 xbdi->xbdi_name);
598 KASSERT(xbdi->xbdi_ih != NULL); 601 KASSERT(xbdi->xbdi_ih != NULL);
599 aprint_verbose("xbd backend domain %d handle %#x (%d) " 602 aprint_verbose("xbd backend domain %d handle %#x (%d) "
600 "using event channel %d, protocol %s\n", xbdi->xbdi_domid, 603 "using event channel %d, protocol %s\n", xbdi->xbdi_domid,
601 xbdi->xbdi_handle, xbdi->xbdi_handle, xbdi->xbdi_evtchn, proto); 604 xbdi->xbdi_handle, xbdi->xbdi_handle, xbdi->xbdi_evtchn, proto);
602 605
603 /* enable the xbdback event handler machinery */ 606 /* enable the xbdback event handler machinery */
604 xbdi->xbdi_status = WAITING; 607 xbdi->xbdi_status = WAITING;
605 hypervisor_unmask_event(xbdi->xbdi_evtchn); 608 hypervisor_unmask_event(xbdi->xbdi_evtchn);
606 hypervisor_notify_via_evtchn(xbdi->xbdi_evtchn); 609 hypervisor_notify_via_evtchn(xbdi->xbdi_evtchn);
607 610
608 if (kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL, 611 if (kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL,
609 xbdback_thread, xbdi, NULL, "%s", xbdi->xbdi_name) == 0) 612 xbdback_thread, xbdi, NULL, "%s", xbdi->xbdi_name) == 0)
610 return 0; 613 return 0;
611 614
612err2: 615err2:
613 /* unmap ring */ 616 /* unmap ring */
614 ungrop.host_addr = xbdi->xbdi_ring_va; 617 ungrop.host_addr = xbdi->xbdi_ring_va;
615 ungrop.handle = xbdi->xbdi_ring_handle; 618 ungrop.handle = xbdi->xbdi_ring_handle;
616 ungrop.dev_bus_addr = 0; 619 ungrop.dev_bus_addr = 0;
617 err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 620 err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
618 &ungrop, 1); 621 &ungrop, 1);
619 if (err) 622 if (err)
620 aprint_error("xbdback %s: unmap_grant_ref failed: %d\n", 623 aprint_error("xbdback %s: unmap_grant_ref failed: %d\n",
621 xbusd->xbusd_path, err); 624 xbusd->xbusd_path, err);
622 625
623err: 626err:
624 /* free ring VA space */ 627 /* free ring VA space */
625 uvm_km_free(kernel_map, xbdi->xbdi_ring_va, PAGE_SIZE, UVM_KMF_VAONLY); 628 uvm_km_free(kernel_map, xbdi->xbdi_ring_va, PAGE_SIZE, UVM_KMF_VAONLY);
626 return -1; 629 return -1;
627} 630}
628 631
629/* 632/*
630 * Signal a xbdback thread to disconnect. Done in 'xenwatch' thread context. 633 * Signal a xbdback thread to disconnect. Done in 'xenwatch' thread context.
631 */ 634 */
632static void 635static void
633xbdback_disconnect(struct xbdback_instance *xbdi) 636xbdback_disconnect(struct xbdback_instance *xbdi)
634{ 637{
635  638
636 mutex_enter(&xbdi->xbdi_lock); 639 mutex_enter(&xbdi->xbdi_lock);
637 if (xbdi->xbdi_status == DISCONNECTED) { 640 if (xbdi->xbdi_status == DISCONNECTED) {
638 mutex_exit(&xbdi->xbdi_lock); 641 mutex_exit(&xbdi->xbdi_lock);
639 return; 642 return;
640 } 643 }
641 hypervisor_mask_event(xbdi->xbdi_evtchn); 644 hypervisor_mask_event(xbdi->xbdi_evtchn);
642 xen_intr_disestablish(xbdi->xbdi_ih); 645 xen_intr_disestablish(xbdi->xbdi_ih);
643 646
644 /* signal thread that we want to disconnect, then wait for it */ 647 /* signal thread that we want to disconnect, then wait for it */
645 xbdi->xbdi_status = DISCONNECTING; 648 xbdi->xbdi_status = DISCONNECTING;
646 cv_signal(&xbdi->xbdi_cv); 649 cv_signal(&xbdi->xbdi_cv);
647 650
648 while (xbdi->xbdi_status != DISCONNECTED) 651 while (xbdi->xbdi_status != DISCONNECTED)
649 cv_wait(&xbdi->xbdi_cv, &xbdi->xbdi_lock); 652 cv_wait(&xbdi->xbdi_cv, &xbdi->xbdi_lock);
650 653
651 mutex_exit(&xbdi->xbdi_lock); 654 mutex_exit(&xbdi->xbdi_lock);
652 655
653 xenbus_switch_state(xbdi->xbdi_xbusd, NULL, XenbusStateClosing); 656 xenbus_switch_state(xbdi->xbdi_xbusd, NULL, XenbusStateClosing);
654} 657}
655 658
656static void 659static void
657xbdback_frontend_changed(void *arg, XenbusState new_state) 660xbdback_frontend_changed(void *arg, XenbusState new_state)
658{ 661{
659 struct xbdback_instance *xbdi = arg; 662 struct xbdback_instance *xbdi = arg;
660 struct xenbus_device *xbusd = xbdi->xbdi_xbusd; 663 struct xenbus_device *xbusd = xbdi->xbdi_xbusd;
661 664
662 XENPRINTF(("xbdback %s: new state %d\n", xbusd->xbusd_path, new_state)); 665 XENPRINTF(("xbdback %s: new state %d\n", xbusd->xbusd_path, new_state));
663 switch(new_state) { 666 switch(new_state) {
664 case XenbusStateInitialising: 667 case XenbusStateInitialising:
665 break; 668 break;
666 case XenbusStateInitialised: 669 case XenbusStateInitialised:
667 case XenbusStateConnected: 670 case XenbusStateConnected:
668 if (xbdi->xbdi_status == WAITING || xbdi->xbdi_status == RUN) 671 if (xbdi->xbdi_status == WAITING || xbdi->xbdi_status == RUN)
669 break; 672 break;
670 xbdback_connect(xbdi); 673 xbdback_connect(xbdi);
671 break; 674 break;
672 case XenbusStateClosing: 675 case XenbusStateClosing:
673 xbdback_disconnect(xbdi); 676 xbdback_disconnect(xbdi);
674 break; 677 break;
675 case XenbusStateClosed: 678 case XenbusStateClosed:
676 /* otherend_changed() should handle it for us */ 679 /* otherend_changed() should handle it for us */
677 panic("xbdback_frontend_changed: closed\n"); 680 panic("xbdback_frontend_changed: closed\n");
678 case XenbusStateUnknown: 681 case XenbusStateUnknown:
679 case XenbusStateInitWait: 682 case XenbusStateInitWait:
680 default: 683 default:
681 aprint_error("xbdback %s: invalid frontend state %d\n", 684 aprint_error("xbdback %s: invalid frontend state %d\n",
682 xbusd->xbusd_path, new_state); 685 xbusd->xbusd_path, new_state);
683 } 686 }
684 return; 687 return;
685} 688}
686 689
687static void 690static void
688xbdback_backend_changed(struct xenbus_watch *watch, 691xbdback_backend_changed(struct xenbus_watch *watch,
689 const char **vec, unsigned int len) 692 const char **vec, unsigned int len)
690{ 693{
691 struct xenbus_device *xbusd = watch->xbw_dev; 694 struct xenbus_device *xbusd = watch->xbw_dev;
692 struct xbdback_instance *xbdi = xbusd->xbusd_u.b.b_cookie; 695 struct xbdback_instance *xbdi = xbusd->xbusd_u.b.b_cookie;
693 int err; 696 int err;
694 long dev; 697 long dev;
695 char mode[32]; 698 char mode[32];
696 struct xenbus_transaction *xbt; 699 struct xenbus_transaction *xbt;
697 const char *devname; 700 const char *devname;
698 int major; 701 int major;
699 702
700 err = xenbus_read_ul(NULL, xbusd->xbusd_path, "physical-device", 703 err = xenbus_read_ul(NULL, xbusd->xbusd_path, "physical-device",
701 &dev, 10); 704 &dev, 10);
702 /* 705 /*
703 * An error can occur as the watch can fire up just after being 706 * An error can occur as the watch can fire up just after being
704 * registered. So we have to ignore error :( 707 * registered. So we have to ignore error :(
705 */ 708 */
706 if (err) 709 if (err)
707 return; 710 return;
708 /* 711 /*
709 * we can also fire up after having opened the device, don't try 712 * we can also fire up after having opened the device, don't try
710 * to do it twice. 713 * to do it twice.
711 */ 714 */
712 if (xbdi->xbdi_vp != NULL) { 715 if (xbdi->xbdi_vp != NULL) {
713 if (xbdi->xbdi_status == WAITING || xbdi->xbdi_status == RUN) { 716 if (xbdi->xbdi_status == WAITING || xbdi->xbdi_status == RUN) {
714 if (xbdi->xbdi_dev != dev) { 717 if (xbdi->xbdi_dev != dev) {
715 printf("xbdback %s: changing physical device " 718 printf("xbdback %s: changing physical device "
716 "from %#"PRIx64" to %#lx not supported\n", 719 "from %#"PRIx64" to %#lx not supported\n",
717 xbusd->xbusd_path, xbdi->xbdi_dev, dev); 720 xbusd->xbusd_path, xbdi->xbdi_dev, dev);
718 } 721 }
719 } 722 }
720 return; 723 return;
721 } 724 }
722 xbdi->xbdi_dev = dev; 725 xbdi->xbdi_dev = dev;
723 err = xenbus_read(NULL, xbusd->xbusd_path, "mode", mode, sizeof(mode)); 726 err = xenbus_read(NULL, xbusd->xbusd_path, "mode", mode, sizeof(mode));
724 if (err) { 727 if (err) {
725 printf("xbdback: failed to read %s/mode: %d\n", 728 printf("xbdback: failed to read %s/mode: %d\n",
726 xbusd->xbusd_path, err); 729 xbusd->xbusd_path, err);
727 return; 730 return;
728 } 731 }
729 if (mode[0] == 'w') 732 if (mode[0] == 'w')
730 xbdi->xbdi_ro = false; 733 xbdi->xbdi_ro = false;
731 else 734 else
732 xbdi->xbdi_ro = true; 735 xbdi->xbdi_ro = true;
733 major = major(xbdi->xbdi_dev); 736 major = major(xbdi->xbdi_dev);
734 devname = devsw_blk2name(major); 737 devname = devsw_blk2name(major);
735 if (devname == NULL) { 738 if (devname == NULL) {
736 printf("xbdback %s: unknown device 0x%"PRIx64"\n", 739 printf("xbdback %s: unknown device 0x%"PRIx64"\n",
737 xbusd->xbusd_path, xbdi->xbdi_dev); 740 xbusd->xbusd_path, xbdi->xbdi_dev);
738 return; 741 return;
739 } 742 }
740 xbdi->xbdi_bdevsw = bdevsw_lookup(xbdi->xbdi_dev); 743 xbdi->xbdi_bdevsw = bdevsw_lookup(xbdi->xbdi_dev);
741 if (xbdi->xbdi_bdevsw == NULL) { 744 if (xbdi->xbdi_bdevsw == NULL) {
742 printf("xbdback %s: no bdevsw for device 0x%"PRIx64"\n", 745 printf("xbdback %s: no bdevsw for device 0x%"PRIx64"\n",
743 xbusd->xbusd_path, xbdi->xbdi_dev); 746 xbusd->xbusd_path, xbdi->xbdi_dev);
744 return; 747 return;
745 } 748 }
746 err = bdevvp(xbdi->xbdi_dev, &xbdi->xbdi_vp); 749 err = bdevvp(xbdi->xbdi_dev, &xbdi->xbdi_vp);
747 if (err) { 750 if (err) {
748 printf("xbdback %s: can't open device 0x%"PRIx64": %d\n", 751 printf("xbdback %s: can't open device 0x%"PRIx64": %d\n",
749 xbusd->xbusd_path, xbdi->xbdi_dev, err); 752 xbusd->xbusd_path, xbdi->xbdi_dev, err);
750 return; 753 return;
751 } 754 }
752 err = vn_lock(xbdi->xbdi_vp, LK_EXCLUSIVE | LK_RETRY); 755 err = vn_lock(xbdi->xbdi_vp, LK_EXCLUSIVE | LK_RETRY);
753 if (err) { 756 if (err) {
754 printf("xbdback %s: can't vn_lock device 0x%"PRIx64": %d\n", 757 printf("xbdback %s: can't vn_lock device 0x%"PRIx64": %d\n",
755 xbusd->xbusd_path, xbdi->xbdi_dev, err); 758 xbusd->xbusd_path, xbdi->xbdi_dev, err);
756 vrele(xbdi->xbdi_vp); 759 vrele(xbdi->xbdi_vp);
757 return; 760 return;
758 } 761 }
759 err = VOP_OPEN(xbdi->xbdi_vp, FREAD, NOCRED); 762 err = VOP_OPEN(xbdi->xbdi_vp, FREAD, NOCRED);
760 if (err) { 763 if (err) {
761 printf("xbdback %s: can't VOP_OPEN device 0x%"PRIx64": %d\n", 764 printf("xbdback %s: can't VOP_OPEN device 0x%"PRIx64": %d\n",
762 xbusd->xbusd_path, xbdi->xbdi_dev, err); 765 xbusd->xbusd_path, xbdi->xbdi_dev, err);
763 vput(xbdi->xbdi_vp); 766 vput(xbdi->xbdi_vp);
764 return; 767 return;
765 } 768 }
766 VOP_UNLOCK(xbdi->xbdi_vp); 769 VOP_UNLOCK(xbdi->xbdi_vp);
767 770
768 /* dk device; get wedge data */ 771 /* dk device; get wedge data */
769 struct dkwedge_info wi; 772 struct dkwedge_info wi;
770 if ((err = getdiskinfo(xbdi->xbdi_vp, &wi)) == 0) { 773 if ((err = getdiskinfo(xbdi->xbdi_vp, &wi)) == 0) {
771 xbdi->xbdi_size = wi.dkw_size; 774 xbdi->xbdi_size = wi.dkw_size;
772 printf("xbd backend: attach device %s (size %" PRIu64 ") " 775 printf("xbd backend: attach device %s (size %" PRIu64 ") "
773 "for domain %d\n", wi.dkw_devname, xbdi->xbdi_size, 776 "for domain %d\n", wi.dkw_devname, xbdi->xbdi_size,
774 xbdi->xbdi_domid); 777 xbdi->xbdi_domid);
775 } else { 778 } else {
776 /* If both Ioctls failed set device size to 0 and return */ 779 /* If both Ioctls failed set device size to 0 and return */
777 printf("xbdback %s: can't DIOCGWEDGEINFO device " 780 printf("xbdback %s: can't DIOCGWEDGEINFO device "
778 "0x%"PRIx64": %d\n", xbusd->xbusd_path, 781 "0x%"PRIx64": %d\n", xbusd->xbusd_path,
779 xbdi->xbdi_dev, err);  782 xbdi->xbdi_dev, err);
780 xbdi->xbdi_size = xbdi->xbdi_dev = 0; 783 xbdi->xbdi_size = xbdi->xbdi_dev = 0;
781 vn_close(xbdi->xbdi_vp, FREAD, NOCRED); 784 vn_close(xbdi->xbdi_vp, FREAD, NOCRED);
782 xbdi->xbdi_vp = NULL; 785 xbdi->xbdi_vp = NULL;
783 return; 786 return;
784 } 787 }
785again: 788again:
786 xbt = xenbus_transaction_start(); 789 xbt = xenbus_transaction_start();
787 if (xbt == NULL) { 790 if (xbt == NULL) {
788 printf("xbdback %s: can't start transaction\n", 791 printf("xbdback %s: can't start transaction\n",
789 xbusd->xbusd_path); 792 xbusd->xbusd_path);
790 return; 793 return;
791 } 794 }
792 err = xenbus_printf(xbt, xbusd->xbusd_path, "sectors", "%" PRIu64 , 795 err = xenbus_printf(xbt, xbusd->xbusd_path, "sectors", "%" PRIu64 ,
793 xbdi->xbdi_size); 796 xbdi->xbdi_size);
794 if (err) { 797 if (err) {
795 printf("xbdback: failed to write %s/sectors: %d\n", 798 printf("xbdback: failed to write %s/sectors: %d\n",
796 xbusd->xbusd_path, err); 799 xbusd->xbusd_path, err);
797 goto abort; 800 goto abort;
798 } 801 }
799 err = xenbus_printf(xbt, xbusd->xbusd_path, "info", "%u", 802 err = xenbus_printf(xbt, xbusd->xbusd_path, "info", "%u",
800 xbdi->xbdi_ro ? VDISK_READONLY : 0); 803 xbdi->xbdi_ro ? VDISK_READONLY : 0);
801 if (err) { 804 if (err) {
802 printf("xbdback: failed to write %s/info: %d\n", 805 printf("xbdback: failed to write %s/info: %d\n",
803 xbusd->xbusd_path, err); 806 xbusd->xbusd_path, err);
804 goto abort; 807 goto abort;
805 } 808 }
806 err = xenbus_printf(xbt, xbusd->xbusd_path, "sector-size", "%lu", 809 err = xenbus_printf(xbt, xbusd->xbusd_path, "sector-size", "%lu",
807 (u_long)DEV_BSIZE); 810 (u_long)DEV_BSIZE);
808 if (err) { 811 if (err) {
809 printf("xbdback: failed to write %s/sector-size: %d\n", 812 printf("xbdback: failed to write %s/sector-size: %d\n",
810 xbusd->xbusd_path, err); 813 xbusd->xbusd_path, err);
811 goto abort; 814 goto abort;
812 } 815 }
813 err = xenbus_printf(xbt, xbusd->xbusd_path, "feature-flush-cache", 816 err = xenbus_printf(xbt, xbusd->xbusd_path, "feature-flush-cache",
814 "%u", 1); 817 "%u", 1);
815 if (err) { 818 if (err) {
816 printf("xbdback: failed to write %s/feature-flush-cache: %d\n", 819 printf("xbdback: failed to write %s/feature-flush-cache: %d\n",
817 xbusd->xbusd_path, err); 820 xbusd->xbusd_path, err);
818 goto abort; 821 goto abort;
819 } 822 }
820 err = xenbus_printf(xbt, xbusd->xbusd_path, 823 err = xenbus_printf(xbt, xbusd->xbusd_path,
821 "feature-max-indirect-segments", "%u", VBD_MAX_INDIRECT_SEGMENTS); 824 "feature-max-indirect-segments", "%u", VBD_MAX_INDIRECT_SEGMENTS);
822 if (err) { 825 if (err) {
823 printf("xbdback: failed to write %s/feature-indirect: %d\n", 826 printf("xbdback: failed to write %s/feature-indirect: %d\n",
824 xbusd->xbusd_path, err); 827 xbusd->xbusd_path, err);
825 goto abort; 828 goto abort;
826 } 829 }
827 err = xenbus_transaction_end(xbt, 0); 830 err = xenbus_transaction_end(xbt, 0);
828 if (err == EAGAIN) 831 if (err == EAGAIN)
829 goto again; 832 goto again;
830 if (err) { 833 if (err) {
831 printf("xbdback %s: can't end transaction: %d\n", 834 printf("xbdback %s: can't end transaction: %d\n",
832 xbusd->xbusd_path, err); 835 xbusd->xbusd_path, err);
833 } 836 }
834 err = xenbus_switch_state(xbusd, NULL, XenbusStateConnected); 837 err = xenbus_switch_state(xbusd, NULL, XenbusStateConnected);
835 if (err) { 838 if (err) {
836 printf("xbdback %s: can't switch state: %d\n", 839 printf("xbdback %s: can't switch state: %d\n",
837 xbusd->xbusd_path, err); 840 xbusd->xbusd_path, err);
838 } 841 }
839 return; 842 return;
840abort: 843abort:
841 xenbus_transaction_end(xbt, 1); 844 xenbus_transaction_end(xbt, 1);
842} 845}
843 846
844/* 847/*
845 * Used by a xbdi thread to signal that it is now disconnected. 848 * Used by a xbdi thread to signal that it is now disconnected.
846 */ 849 */
847static void 850static void
848xbdback_finish_disconnect(struct xbdback_instance *xbdi) 851xbdback_finish_disconnect(struct xbdback_instance *xbdi)
849{ 852{
850 KASSERT(mutex_owned(&xbdi->xbdi_lock)); 853 KASSERT(mutex_owned(&xbdi->xbdi_lock));
851 KASSERT(xbdi->xbdi_status == DISCONNECTING); 854 KASSERT(xbdi->xbdi_status == DISCONNECTING);
852 855
853 xbdi->xbdi_status = DISCONNECTED; 856 xbdi->xbdi_status = DISCONNECTED;
854 857
855 cv_signal(&xbdi->xbdi_cv); 858 cv_broadcast(&xbdi->xbdi_cv);
856} 859}
857 860
858static bool 861static bool
859xbdif_lookup(domid_t dom , uint32_t handle) 862xbdif_lookup(domid_t dom , uint32_t handle)
860{ 863{
861 struct xbdback_instance *xbdi; 864 struct xbdback_instance *xbdi;
862 bool found = false; 865 bool found = false;
863 866
864 mutex_enter(&xbdback_lock); 867 KASSERT(mutex_owned(&xbdback_lock));
 868
865 SLIST_FOREACH(xbdi, &xbdback_instances, next) { 869 SLIST_FOREACH(xbdi, &xbdback_instances, next) {
866 if (xbdi->xbdi_domid == dom && xbdi->xbdi_handle == handle) { 870 if (xbdi->xbdi_domid == dom && xbdi->xbdi_handle == handle) {
867 found = true; 871 found = true;
868 break; 872 break;
869 } 873 }
870 } 874 }
871 mutex_exit(&xbdback_lock); 
872 875
873 return found; 876 return found;
874} 877}
875 878
876static int 879static int
877xbdback_evthandler(void *arg) 880xbdback_evthandler(void *arg)
878{ 881{
879 struct xbdback_instance *xbdi = arg; 882 struct xbdback_instance *xbdi = arg;
880 883
881 XENPRINTF(("xbdback_evthandler domain %d: cont %p\n", 884 XENPRINTF(("xbdback_evthandler domain %d: cont %p\n",
882 xbdi->xbdi_domid, xbdi->xbdi_cont)); 885 xbdi->xbdi_domid, xbdi->xbdi_cont));
883 886
 887 mutex_enter(&xbdi->xbdi_lock);
884 xbdback_wakeup_thread(xbdi); 888 xbdback_wakeup_thread(xbdi);
 889 mutex_exit(&xbdi->xbdi_lock);
885 890
886 return 1; 891 return 1;
887} 892}
888 893
889/* 894/*
890 * Main thread routine for one xbdback instance. Woken up by 895 * Main thread routine for one xbdback instance. Woken up by
891 * xbdback_evthandler when a domain has I/O work scheduled in a I/O ring. 896 * xbdback_evthandler when a domain has I/O work scheduled in a I/O ring.
892 */ 897 */
893static void 898static void
894xbdback_thread(void *arg) 899xbdback_thread(void *arg)
895{ 900{
896 struct xbdback_instance *xbdi = arg; 901 struct xbdback_instance *xbdi = arg;
897 902
 903 mutex_enter(&xbdi->xbdi_lock);
898 for (;;) { 904 for (;;) {
899 mutex_enter(&xbdi->xbdi_lock); 
900 switch (xbdi->xbdi_status) { 905 switch (xbdi->xbdi_status) {
901 case WAITING: 906 case WAITING:
902 cv_wait(&xbdi->xbdi_cv, &xbdi->xbdi_lock); 907 cv_wait(&xbdi->xbdi_cv, &xbdi->xbdi_lock);
903 mutex_exit(&xbdi->xbdi_lock); 
904 break; 908 break;
905 case RUN: 909 case RUN:
906 xbdi->xbdi_status = WAITING; /* reset state */ 910 xbdi->xbdi_status = WAITING; /* reset state */
907 mutex_exit(&xbdi->xbdi_lock); 
908 911
909 if (xbdi->xbdi_cont == NULL) { 912 if (xbdi->xbdi_cont == NULL) {
910 xbdi->xbdi_cont = xbdback_co_main; 913 xbdi->xbdi_cont = xbdback_co_main;
911 } 914 }
912 915
913 xbdback_trampoline(xbdi, xbdi); 916 xbdback_trampoline(xbdi, xbdi);
914 break; 917 break;
915 case DISCONNECTING: 918 case DISCONNECTING:
916 if (xbdi->xbdi_pendingreqs > 0) { 919 if (xbdi->xbdi_pendingreqs > 0) {
917 /* there are pending I/Os. Wait for them. */ 920 /* there are pending I/Os. Wait for them. */
918 cv_wait(&xbdi->xbdi_cv, &xbdi->xbdi_lock); 921 cv_wait(&xbdi->xbdi_cv, &xbdi->xbdi_lock);
919 mutex_exit(&xbdi->xbdi_lock); 922 continue;
920 break; 
921 } 923 }
922  924
923 /* All I/Os should have been processed by now, 925 /* All I/Os should have been processed by now,
924 * xbdi_refcnt should drop to 0 */ 926 * xbdi_refcnt should drop to 0 */
925 xbdi_put(xbdi); 927 xbdi_put(xbdi);
926 KASSERT(xbdi->xbdi_refcnt == 0); 928 KASSERT(xbdi->xbdi_refcnt == 0);
927 mutex_exit(&xbdi->xbdi_lock); 929 goto out;
928 kthread_exit(0); 930 /* NOTREACHED */
929 break; 
930 default: 931 default:
931 panic("%s: invalid state %d", 932 panic("%s: invalid state %d",
932 xbdi->xbdi_name, xbdi->xbdi_status); 933 xbdi->xbdi_name, xbdi->xbdi_status);
933 } 934 }
934 } 935 }
 936out:
 937 mutex_exit(&xbdi->xbdi_lock);
 938
 939 kthread_exit(0);
935} 940}
936 941
937static void * 942static void *
938xbdback_co_main(struct xbdback_instance *xbdi, void *obj) 943xbdback_co_main(struct xbdback_instance *xbdi, void *obj)
939{ 944{
940 (void)obj; 945 (void)obj;
941 946
942 xbdi->xbdi_req_prod = xbdi->xbdi_ring.ring_n.sring->req_prod; 947 xbdi->xbdi_req_prod = xbdi->xbdi_ring.ring_n.sring->req_prod;
943 xen_rmb(); /* ensure we see all requests up to req_prod */ 948 xen_rmb(); /* ensure we see all requests up to req_prod */
944 /* 949 /*
945 * note that we'll eventually get a full ring of request. 950 * note that we'll eventually get a full ring of request.
946 * in this case, MASK_BLKIF_IDX(req_cons) == MASK_BLKIF_IDX(req_prod) 951 * in this case, MASK_BLKIF_IDX(req_cons) == MASK_BLKIF_IDX(req_prod)
947 */ 952 */
948 xbdi->xbdi_cont = xbdback_co_main_loop; 953 xbdi->xbdi_cont = xbdback_co_main_loop;
949 return xbdi; 954 return xbdi;
950} 955}
951 956
952/* 957/*
953 * Fetch a blkif request from the ring, and pass control to the appropriate 958 * Fetch a blkif request from the ring, and pass control to the appropriate
954 * continuation. 959 * continuation.
955 * If someone asked for disconnection, do not fetch any more request from 960 * If someone asked for disconnection, do not fetch any more request from
956 * the ring. 961 * the ring.
957 */ 962 */
958static void * 963static void *
959xbdback_co_main_loop(struct xbdback_instance *xbdi, void *obj __unused)  964xbdback_co_main_loop(struct xbdback_instance *xbdi, void *obj __unused)
960{ 965{
961 blkif_request_t *req, *reqn; 966 blkif_request_t *req, *reqn;
962 blkif_x86_32_request_t *req32; 967 blkif_x86_32_request_t *req32;
963 blkif_x86_64_request_t *req64; 968 blkif_x86_64_request_t *req64;
964 blkif_request_indirect_t *rin; 969 blkif_request_indirect_t *rin;
965 970
966 if (xbdi->xbdi_ring.ring_n.req_cons != xbdi->xbdi_req_prod) { 971 if (xbdi->xbdi_ring.ring_n.req_cons != xbdi->xbdi_req_prod) {
967 req = &xbdi->xbdi_xen_req; 972 req = &xbdi->xbdi_xen_req;
968 memset(req, 0, sizeof(*req)); 973 memset(req, 0, sizeof(*req));
969 974
970 switch(xbdi->xbdi_proto) { 975 switch(xbdi->xbdi_proto) {
971 case XBDIP_NATIVE: 976 case XBDIP_NATIVE:
972 reqn = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_n, 977 reqn = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_n,
973 xbdi->xbdi_ring.ring_n.req_cons); 978 xbdi->xbdi_ring.ring_n.req_cons);
974 req->operation = reqn->operation; 979 req->operation = reqn->operation;
975 req->id = reqn->id; 980 req->id = reqn->id;
976 break; 981 break;
977 case XBDIP_32: 982 case XBDIP_32:
978 req32 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_32, 983 req32 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_32,
979 xbdi->xbdi_ring.ring_n.req_cons); 984 xbdi->xbdi_ring.ring_n.req_cons);
980 req->operation = req32->operation; 985 req->operation = req32->operation;
981 req->id = req32->id; 986 req->id = req32->id;
982 break; 987 break;
983 case XBDIP_64: 988 case XBDIP_64:
984 req64 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_64, 989 req64 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_64,
985 xbdi->xbdi_ring.ring_n.req_cons); 990 xbdi->xbdi_ring.ring_n.req_cons);
986 req->operation = req64->operation; 991 req->operation = req64->operation;
987 req->id = req64->id; 992 req->id = req64->id;
988 break; 993 break;
989 } 994 }
990 __insn_barrier(); 995 __insn_barrier();
991 XENPRINTF(("xbdback op %d req_cons 0x%x req_prod 0x%x " 996 XENPRINTF(("xbdback op %d req_cons 0x%x req_prod 0x%x "
992 "resp_prod 0x%x id %" PRIu64 "\n", req->operation, 997 "resp_prod 0x%x id %" PRIu64 "\n", req->operation,
993 xbdi->xbdi_ring.ring_n.req_cons, 998 xbdi->xbdi_ring.ring_n.req_cons,
994 xbdi->xbdi_req_prod, 999 xbdi->xbdi_req_prod,
995 xbdi->xbdi_ring.ring_n.rsp_prod_pvt, 1000 xbdi->xbdi_ring.ring_n.rsp_prod_pvt,
996 req->id)); 1001 req->id));
997 switch (req->operation) { 1002 switch (req->operation) {
998 case BLKIF_OP_INDIRECT: 1003 case BLKIF_OP_INDIRECT:
999 /* just check indirect_op, rest is handled later */ 1004 /* just check indirect_op, rest is handled later */
1000 rin = (blkif_request_indirect_t *) 1005 rin = (blkif_request_indirect_t *)
1001 RING_GET_REQUEST(&xbdi->xbdi_ring.ring_n, 1006 RING_GET_REQUEST(&xbdi->xbdi_ring.ring_n,
1002 xbdi->xbdi_ring.ring_n.req_cons); 1007 xbdi->xbdi_ring.ring_n.req_cons);
1003 if (rin->indirect_op != BLKIF_OP_READ && 1008 if (rin->indirect_op != BLKIF_OP_READ &&
1004 rin->indirect_op != BLKIF_OP_WRITE) { 1009 rin->indirect_op != BLKIF_OP_WRITE) {
1005 if (ratecheck(&xbdi->xbdi_lasterr_time, 1010 if (ratecheck(&xbdi->xbdi_lasterr_time,
1006 &xbdback_err_intvl)) { 1011 &xbdback_err_intvl)) {
1007 printf("%s: unknown ind operation %d\n", 1012 printf("%s: unknown ind operation %d\n",
1008 xbdi->xbdi_name, 1013 xbdi->xbdi_name,
1009 rin->indirect_op); 1014 rin->indirect_op);
1010 } 1015 }
1011 goto fail; 1016 goto fail;
1012 } 1017 }
1013 /* FALLTHROUGH */ 1018 /* FALLTHROUGH */
1014 case BLKIF_OP_READ: 1019 case BLKIF_OP_READ:
1015 case BLKIF_OP_WRITE: 1020 case BLKIF_OP_WRITE:
1016 xbdi->xbdi_cont = xbdback_co_io; 1021 xbdi->xbdi_cont = xbdback_co_io;
1017 break; 1022 break;
1018 case BLKIF_OP_FLUSH_DISKCACHE: 1023 case BLKIF_OP_FLUSH_DISKCACHE:
1019 xbdi_get(xbdi); 1024 xbdi_get(xbdi);
1020 xbdi->xbdi_cont = xbdback_co_cache_flush; 1025 xbdi->xbdi_cont = xbdback_co_cache_flush;
1021 break; 1026 break;
1022 default: 1027 default:
1023 if (ratecheck(&xbdi->xbdi_lasterr_time, 1028 if (ratecheck(&xbdi->xbdi_lasterr_time,
1024 &xbdback_err_intvl)) { 1029 &xbdback_err_intvl)) {
1025 printf("%s: unknown operation %d\n", 1030 printf("%s: unknown operation %d\n",
1026 xbdi->xbdi_name, req->operation); 1031 xbdi->xbdi_name, req->operation);
1027 } 1032 }
1028fail: 1033fail:
1029 xbdback_send_reply(xbdi, req->id, req->operation, 1034 xbdback_send_reply(xbdi, req->id, req->operation,
1030 BLKIF_RSP_ERROR); 1035 BLKIF_RSP_ERROR);
1031 xbdi->xbdi_cont = xbdback_co_main_incr; 1036 xbdi->xbdi_cont = xbdback_co_main_incr;
1032 break; 1037 break;
1033 } 1038 }
1034 } else { 1039 } else {
1035 xbdi->xbdi_cont = xbdback_co_main_done2; 1040 xbdi->xbdi_cont = xbdback_co_main_done2;
1036 } 1041 }
1037 return xbdi; 1042 return xbdi;
1038} 1043}
1039 1044
1040/* 1045/*
1041 * Increment consumer index and move on to the next request. In case 1046 * Increment consumer index and move on to the next request. In case
1042 * we want to disconnect, leave continuation now. 1047 * we want to disconnect, leave continuation now.
1043 */ 1048 */
1044static void * 1049static void *
1045xbdback_co_main_incr(struct xbdback_instance *xbdi, void *obj) 1050xbdback_co_main_incr(struct xbdback_instance *xbdi, void *obj __unused)
1046{ 1051{
1047 (void)obj; 1052 KASSERT(mutex_owned(&xbdi->xbdi_lock));
 1053
1048 blkif_back_ring_t *ring = &xbdi->xbdi_ring.ring_n; 1054 blkif_back_ring_t *ring = &xbdi->xbdi_ring.ring_n;
1049 1055
1050 ring->req_cons++; 1056 ring->req_cons++;
1051 1057
1052 /* 
1053 * Do not bother with locking here when checking for xbdi_status: if 
1054 * we get a transient state, we will get the right value at 
1055 * the next increment. 
1056 */ 
1057 if (xbdi->xbdi_status == DISCONNECTING) 1058 if (xbdi->xbdi_status == DISCONNECTING)
1058 xbdi->xbdi_cont = NULL; 1059 xbdi->xbdi_cont = NULL;
1059 else 1060 else
1060 xbdi->xbdi_cont = xbdback_co_main_loop; 1061 xbdi->xbdi_cont = xbdback_co_main_loop;
1061 1062
1062 /* 
1063 * Each time the thread processes a full ring of requests, give 
1064 * a chance to other threads to process I/Os too 
1065 */ 
1066 if ((ring->req_cons % BLKIF_RING_SIZE) == 0) 
1067 yield(); 
1068 
1069 return xbdi; 1063 return xbdi;
1070} 1064}
1071 1065
1072/* 1066/*
1073 * Check for requests in the instance's ring. In case there are, start again 1067 * Check for requests in the instance's ring. In case there are, start again
1074 * from the beginning. If not, stall. 1068 * from the beginning. If not, stall.
1075 */ 1069 */
1076static void * 1070static void *
1077xbdback_co_main_done2(struct xbdback_instance *xbdi, void *obj) 1071xbdback_co_main_done2(struct xbdback_instance *xbdi, void *obj)
1078{ 1072{
1079 int work_to_do; 1073 int work_to_do;
1080 1074
1081 RING_FINAL_CHECK_FOR_REQUESTS(&xbdi->xbdi_ring.ring_n, work_to_do); 1075 RING_FINAL_CHECK_FOR_REQUESTS(&xbdi->xbdi_ring.ring_n, work_to_do);
1082 if (work_to_do) 1076 if (work_to_do)
1083 xbdi->xbdi_cont = xbdback_co_main; 1077 xbdi->xbdi_cont = xbdback_co_main;
1084 else 1078 else
1085 xbdi->xbdi_cont = NULL; 1079 xbdi->xbdi_cont = NULL;
1086 1080
1087 return xbdi; 1081 return xbdi;
1088} 1082}
1089 1083
1090/* 1084/*
1091 * Frontend requested a cache flush operation. 1085 * Frontend requested a cache flush operation.
1092 */ 1086 */
1093static void * 1087static void *
1094xbdback_co_cache_flush(struct xbdback_instance *xbdi, void *obj __unused) 1088xbdback_co_cache_flush(struct xbdback_instance *xbdi, void *obj __unused)
1095{ 1089{
1096 if (xbdi->xbdi_pendingreqs > 0) { 1090 if (xbdi->xbdi_pendingreqs > 0) {
1097 /* 1091 /*
1098 * There are pending requests. 1092 * There are pending requests.
1099 * Event or iodone() will restart processing 1093 * Event or iodone() will restart processing
1100 */ 1094 */
1101 xbdi->xbdi_cont = NULL; 1095 xbdi->xbdi_cont = NULL;
1102 xbdi_put(xbdi); 1096 xbdi_put(xbdi);
1103 return NULL; 1097 return NULL;
1104 } 1098 }
1105 xbdi->xbdi_cont = xbdback_co_cache_doflush; 1099 xbdi->xbdi_cont = xbdback_co_cache_doflush;
1106 return xbdback_io_get(xbdi); 1100 return xbdback_io_get(xbdi);
1107} 1101}
1108 1102
1109/* Start the flush work */ 1103/* Start the flush work */
1110static void * 1104static void *
1111xbdback_co_cache_doflush(struct xbdback_instance *xbdi, void *obj) 1105xbdback_co_cache_doflush(struct xbdback_instance *xbdi, void *obj)
1112{ 1106{
1113 struct xbdback_io *xbd_io; 1107 struct xbdback_io *xbd_io;
1114 1108
1115 XENPRINTF(("xbdback_co_cache_doflush %p %p\n", xbdi, obj)); 1109 XENPRINTF(("xbdback_co_cache_doflush %p %p\n", xbdi, obj));
1116 xbd_io = obj; 1110 xbd_io = obj;
1117 xbd_io->xio_xbdi = xbdi; 1111 xbd_io->xio_xbdi = xbdi;
1118 xbd_io->xio_operation = xbdi->xbdi_xen_req.operation; 1112 xbd_io->xio_operation = xbdi->xbdi_xen_req.operation;
1119 xbd_io->xio_id = xbdi->xbdi_xen_req.id; 1113 xbd_io->xio_id = xbdi->xbdi_xen_req.id;
1120 xbdi->xbdi_cont = xbdback_co_do_io; 1114 xbdi->xbdi_cont = xbdback_co_do_io;
1121 return xbd_io; 1115 return xbd_io;
1122} 1116}
1123 1117
1124/* 1118/*
1125 * A read or write I/O request must be processed. Do some checks first, 1119 * A read or write I/O request must be processed. Do some checks first,
1126 * then get the segment information directly from the ring request. 1120 * then get the segment information directly from the ring request.
1127 */ 1121 */
1128static void * 1122static void *
1129xbdback_co_io(struct xbdback_instance *xbdi, void *obj __unused) 1123xbdback_co_io(struct xbdback_instance *xbdi, void *obj __unused)
1130{  1124{
1131 int i, error; 1125 int i, error;
1132 blkif_request_t *req, *reqn; 1126 blkif_request_t *req, *reqn;
1133 blkif_x86_32_request_t *req32; 1127 blkif_x86_32_request_t *req32;
1134 blkif_x86_64_request_t *req64; 1128 blkif_x86_64_request_t *req64;
1135 blkif_request_indirect_t *rinn; 1129 blkif_request_indirect_t *rinn;
1136 blkif_x86_32_request_indirect_t *rin32; 1130 blkif_x86_32_request_indirect_t *rin32;
1137 blkif_x86_64_request_indirect_t *rin64; 1131 blkif_x86_64_request_indirect_t *rin64;
1138 1132
1139 req = &xbdi->xbdi_xen_req; 1133 req = &xbdi->xbdi_xen_req;
1140 1134
1141 /* some sanity checks */ 1135 /* some sanity checks */
1142 KASSERT(req->operation == BLKIF_OP_READ || 1136 KASSERT(req->operation == BLKIF_OP_READ ||
1143 req->operation == BLKIF_OP_WRITE || 1137 req->operation == BLKIF_OP_WRITE ||
1144 req->operation == BLKIF_OP_INDIRECT); 1138 req->operation == BLKIF_OP_INDIRECT);
1145 if (req->operation == BLKIF_OP_WRITE) { 1139 if (req->operation == BLKIF_OP_WRITE) {
1146 if (xbdi->xbdi_ro) { 1140 if (xbdi->xbdi_ro) {
1147 error = EROFS; 1141 error = EROFS;
1148 goto end; 1142 goto end;
1149 } 1143 }
1150 } 1144 }
1151 1145
1152 /* copy request segments */ 1146 /* copy request segments */
1153 switch (xbdi->xbdi_proto) { 1147 switch (xbdi->xbdi_proto) {
1154 case XBDIP_NATIVE: 1148 case XBDIP_NATIVE:
1155 reqn = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_n, 1149 reqn = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_n,
1156 xbdi->xbdi_ring.ring_n.req_cons); 1150 xbdi->xbdi_ring.ring_n.req_cons);
1157 req->handle = reqn->handle; 1151 req->handle = reqn->handle;
1158 req->sector_number = reqn->sector_number; 1152 req->sector_number = reqn->sector_number;
1159 if (reqn->operation == BLKIF_OP_INDIRECT) { 1153 if (reqn->operation == BLKIF_OP_INDIRECT) {
1160 rinn = (blkif_request_indirect_t *)reqn; 1154 rinn = (blkif_request_indirect_t *)reqn;
1161 req->operation = rinn->indirect_op; 1155 req->operation = rinn->indirect_op;
1162 req->nr_segments = (uint8_t)rinn->nr_segments; 1156 req->nr_segments = (uint8_t)rinn->nr_segments;
1163 if (req->nr_segments > VBD_MAX_INDIRECT_SEGMENTS) 1157 if (req->nr_segments > VBD_MAX_INDIRECT_SEGMENTS)
1164 goto bad_nr_segments; 1158 goto bad_nr_segments;
1165 xbdi->xbdi_in_gntref = rinn->indirect_grefs[0]; 1159 xbdi->xbdi_in_gntref = rinn->indirect_grefs[0];
1166 /* first_sect and segment grefs fetched later */ 1160 /* first_sect and segment grefs fetched later */
1167 } else { 1161 } else {
1168 req->nr_segments = reqn->nr_segments; 1162 req->nr_segments = reqn->nr_segments;
1169 if (req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST) 1163 if (req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST)
1170 goto bad_nr_segments; 1164 goto bad_nr_segments;
1171 for (i = 0; i < req->nr_segments; i++) 1165 for (i = 0; i < req->nr_segments; i++)
1172 xbdi->xbdi_seg[i] = reqn->seg[i]; 1166 xbdi->xbdi_seg[i] = reqn->seg[i];
1173 xbdi->xbdi_in_gntref = 0; 1167 xbdi->xbdi_in_gntref = 0;
1174 } 1168 }
1175 break; 1169 break;
1176 case XBDIP_32: 1170 case XBDIP_32:
1177 req32 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_32, 1171 req32 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_32,
1178 xbdi->xbdi_ring.ring_n.req_cons); 1172 xbdi->xbdi_ring.ring_n.req_cons);
1179 req->handle = req32->handle; 1173 req->handle = req32->handle;
1180 req->sector_number = req32->sector_number; 1174 req->sector_number = req32->sector_number;
1181 if (req32->operation == BLKIF_OP_INDIRECT) { 1175 if (req32->operation == BLKIF_OP_INDIRECT) {
1182 rin32 = (blkif_x86_32_request_indirect_t *)req32; 1176 rin32 = (blkif_x86_32_request_indirect_t *)req32;
1183 req->operation = rin32->indirect_op; 1177 req->operation = rin32->indirect_op;
1184 req->nr_segments = (uint8_t)rin32->nr_segments; 1178 req->nr_segments = (uint8_t)rin32->nr_segments;
1185 if (req->nr_segments > VBD_MAX_INDIRECT_SEGMENTS) 1179 if (req->nr_segments > VBD_MAX_INDIRECT_SEGMENTS)
1186 goto bad_nr_segments; 1180 goto bad_nr_segments;
1187 xbdi->xbdi_in_gntref = rin32->indirect_grefs[0]; 1181 xbdi->xbdi_in_gntref = rin32->indirect_grefs[0];
1188 /* first_sect and segment grefs fetched later */ 1182 /* first_sect and segment grefs fetched later */
1189 } else { 1183 } else {
1190 req->nr_segments = req32->nr_segments; 1184 req->nr_segments = req32->nr_segments;
1191 if (req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST) 1185 if (req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST)
1192 goto bad_nr_segments; 1186 goto bad_nr_segments;
1193 for (i = 0; i < req->nr_segments; i++) 1187 for (i = 0; i < req->nr_segments; i++)
1194 xbdi->xbdi_seg[i] = req32->seg[i]; 1188 xbdi->xbdi_seg[i] = req32->seg[i];
1195 xbdi->xbdi_in_gntref = 0; 1189 xbdi->xbdi_in_gntref = 0;
1196 } 1190 }
1197 break; 1191 break;
1198 case XBDIP_64: 1192 case XBDIP_64:
1199 req64 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_64, 1193 req64 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_64,
1200 xbdi->xbdi_ring.ring_n.req_cons); 1194 xbdi->xbdi_ring.ring_n.req_cons);
1201 req->handle = req64->handle; 1195 req->handle = req64->handle;
1202 req->sector_number = req64->sector_number; 1196 req->sector_number = req64->sector_number;
1203 if (req64->operation == BLKIF_OP_INDIRECT) { 1197 if (req64->operation == BLKIF_OP_INDIRECT) {
1204 rin64 = (blkif_x86_64_request_indirect_t *)req64; 1198 rin64 = (blkif_x86_64_request_indirect_t *)req64;
1205 req->nr_segments = (uint8_t)rin64->nr_segments; 1199 req->nr_segments = (uint8_t)rin64->nr_segments;
1206 if (req->nr_segments > VBD_MAX_INDIRECT_SEGMENTS) 1200 if (req->nr_segments > VBD_MAX_INDIRECT_SEGMENTS)
1207 goto bad_nr_segments; 1201 goto bad_nr_segments;
1208 xbdi->xbdi_in_gntref = rin64->indirect_grefs[0]; 1202 xbdi->xbdi_in_gntref = rin64->indirect_grefs[0];
1209 /* first_sect and segment grefs fetched later */ 1203 /* first_sect and segment grefs fetched later */
1210 } else { 1204 } else {
1211 req->nr_segments = req64->nr_segments; 1205 req->nr_segments = req64->nr_segments;
1212 if (req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST) 1206 if (req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST)
1213 goto bad_nr_segments; 1207 goto bad_nr_segments;
1214 for (i = 0; i < req->nr_segments; i++) 1208 for (i = 0; i < req->nr_segments; i++)
1215 xbdi->xbdi_seg[i] = req64->seg[i]; 1209 xbdi->xbdi_seg[i] = req64->seg[i];
1216 xbdi->xbdi_in_gntref = 0; 1210 xbdi->xbdi_in_gntref = 0;
1217 } 1211 }
1218 break; 1212 break;
1219 } 1213 }
1220 1214
1221 /* Max value checked already earlier */ 1215 /* Max value checked already earlier */
1222 if (req->nr_segments < 1) 1216 if (req->nr_segments < 1)
1223 goto bad_nr_segments; 1217 goto bad_nr_segments;
1224 1218
1225 xbdi->xbdi_cont = xbdback_co_io_gotio; 1219 xbdi->xbdi_cont = xbdback_co_io_gotio;
1226 return xbdback_io_get(xbdi); 1220 return xbdback_io_get(xbdi);
1227 1221
1228 bad_nr_segments: 1222 bad_nr_segments:
1229 if (ratecheck(&xbdi->xbdi_lasterr_time, &xbdback_err_intvl)) { 1223 if (ratecheck(&xbdi->xbdi_lasterr_time, &xbdback_err_intvl)) {
1230 printf("%s: invalid number of segments: %d\n", 1224 printf("%s: invalid number of segments: %d\n",
1231 xbdi->xbdi_name, req->nr_segments); 1225 xbdi->xbdi_name, req->nr_segments);
1232 } 1226 }
1233 error = EINVAL; 1227 error = EINVAL;
1234 /* FALLTHROUGH */ 1228 /* FALLTHROUGH */
1235 1229
1236 end: 1230 end:
1237 xbdback_send_reply(xbdi, xbdi->xbdi_xen_req.id, 1231 xbdback_send_reply(xbdi, xbdi->xbdi_xen_req.id,
1238 xbdi->xbdi_xen_req.operation, 1232 xbdi->xbdi_xen_req.operation,
1239 (error == EROFS) ? BLKIF_RSP_EOPNOTSUPP : BLKIF_RSP_ERROR); 1233 (error == EROFS) ? BLKIF_RSP_EOPNOTSUPP : BLKIF_RSP_ERROR);
1240 xbdi->xbdi_cont = xbdback_co_main_incr; 1234 xbdi->xbdi_cont = xbdback_co_main_incr;
1241 return xbdi; 1235 return xbdi;
1242} 1236}
1243 1237
1244/* Prepare an I/O buffer for a xbdback instance */ 1238/* Prepare an I/O buffer for a xbdback instance */
1245static void * 1239static void *
1246xbdback_co_io_gotio(struct xbdback_instance *xbdi, void *obj) 1240xbdback_co_io_gotio(struct xbdback_instance *xbdi, void *obj)
1247{ 1241{
1248 struct xbdback_io *xbd_io; 1242 struct xbdback_io *xbd_io;
1249 int buf_flags; 1243 int buf_flags;
1250 size_t bcount; 1244 size_t bcount;
1251 blkif_request_t *req; 1245 blkif_request_t *req;
1252 1246
 1247 KASSERT(mutex_owned(&xbdi->xbdi_lock));
 1248
1253 xbdi_get(xbdi); 1249 xbdi_get(xbdi);
1254 atomic_inc_uint(&xbdi->xbdi_pendingreqs); 1250 xbdi->xbdi_pendingreqs++;
1255  1251
1256 req = &xbdi->xbdi_xen_req; 1252 req = &xbdi->xbdi_xen_req;
1257 xbd_io = obj; 1253 xbd_io = obj;
1258 memset(xbd_io, 0, sizeof(*xbd_io)); 1254 memset(xbd_io, 0, sizeof(*xbd_io));
1259 buf_init(&xbd_io->xio_buf); 1255 buf_init(&xbd_io->xio_buf);
1260 xbd_io->xio_xbdi = xbdi; 1256 xbd_io->xio_xbdi = xbdi;
1261 xbd_io->xio_operation = req->operation; 1257 xbd_io->xio_operation = req->operation;
1262 xbd_io->xio_id = req->id; 1258 xbd_io->xio_id = req->id;
1263 1259
1264 /* If segments are on an indirect page, copy them now */ 1260 /* If segments are on an indirect page, copy them now */
1265 if (xbdi->xbdi_in_gntref) { 1261 if (xbdi->xbdi_in_gntref) {
1266 gnttab_copy_t gop; 1262 gnttab_copy_t gop;
1267 paddr_t ma; 1263 paddr_t ma;
1268 1264
1269 gop.flags = GNTCOPY_source_gref; 1265 gop.flags = GNTCOPY_source_gref;
1270 gop.len = req->nr_segments 1266 gop.len = req->nr_segments
1271 * sizeof(struct blkif_request_segment); 1267 * sizeof(struct blkif_request_segment);
1272 1268
1273 gop.source.u.ref = xbdi->xbdi_in_gntref; 1269 gop.source.u.ref = xbdi->xbdi_in_gntref;
1274 gop.source.offset = 0; 1270 gop.source.offset = 0;
1275 gop.source.domid = xbdi->xbdi_domid; 1271 gop.source.domid = xbdi->xbdi_domid;
1276 1272
1277 ma = xbdi->xbdi_seg_dmamap->dm_segs[0].ds_addr; 1273 ma = xbdi->xbdi_seg_dmamap->dm_segs[0].ds_addr;
1278 gop.dest.offset = ma & PAGE_MASK; 1274 gop.dest.offset = ma & PAGE_MASK;
1279 gop.dest.domid = DOMID_SELF; 1275 gop.dest.domid = DOMID_SELF;
1280 gop.dest.u.gmfn = ma >> PAGE_SHIFT; 1276 gop.dest.u.gmfn = ma >> PAGE_SHIFT;
1281 1277
1282 if (HYPERVISOR_grant_table_op(GNTTABOP_copy, &gop, 1) != 0) { 1278 if (HYPERVISOR_grant_table_op(GNTTABOP_copy, &gop, 1) != 0) {
1283 printf("%s: GNTTABOP_copy failed\n", xbdi->xbdi_name); 1279 printf("%s: GNTTABOP_copy failed\n", xbdi->xbdi_name);
1284 xbdback_send_reply(xbdi, xbdi->xbdi_xen_req.id, 1280 xbdback_send_reply(xbdi, xbdi->xbdi_xen_req.id,
1285 xbdi->xbdi_xen_req.operation, 1281 xbdi->xbdi_xen_req.operation,
1286 BLKIF_RSP_ERROR); 1282 BLKIF_RSP_ERROR);
1287 xbdi->xbdi_cont = xbdback_co_main_incr; 1283 xbdi->xbdi_cont = xbdback_co_main_incr;
1288 return NULL; 1284 return NULL;
1289 } 1285 }
1290 } 1286 }
1291 1287
1292 /* Process segments */ 1288 /* Process segments */
1293 bcount = 0; 1289 bcount = 0;
1294 for (int i = 0; i < req->nr_segments; i++) { 1290 for (int i = 0; i < req->nr_segments; i++) {
1295 struct blkif_request_segment *seg = &xbdi->xbdi_seg[i]; 1291 struct blkif_request_segment *seg = &xbdi->xbdi_seg[i];
1296 xbd_io->xio_gref[i] = seg->gref; 1292 xbd_io->xio_gref[i] = seg->gref;
1297 bcount += (seg->last_sect - seg->first_sect + 1) 1293 bcount += (seg->last_sect - seg->first_sect + 1)
1298 * VBD_BSIZE; 1294 * VBD_BSIZE;
1299 } 1295 }
1300 xbd_io->xio_nrma = req->nr_segments; 1296 xbd_io->xio_nrma = req->nr_segments;
1301 xbd_io->xio_start_offset = xbdi->xbdi_seg[0].first_sect * VBD_BSIZE; 1297 xbd_io->xio_start_offset = xbdi->xbdi_seg[0].first_sect * VBD_BSIZE;
1302 1298
1303 KASSERT(bcount <= MAXPHYS); 1299 KASSERT(bcount <= MAXPHYS);
1304 KASSERT(xbd_io->xio_start_offset < PAGE_SIZE); 1300 KASSERT(xbd_io->xio_start_offset < PAGE_SIZE);
1305 KASSERT(bcount + xbd_io->xio_start_offset < VBD_VA_SIZE); 1301 KASSERT(bcount + xbd_io->xio_start_offset < VBD_VA_SIZE);
1306 1302
1307 /* Fill-in the buf */ 1303 /* Fill-in the buf */
1308 if (xbdi->xbdi_xen_req.operation == BLKIF_OP_WRITE) { 1304 if (xbdi->xbdi_xen_req.operation == BLKIF_OP_WRITE) {
1309 buf_flags = B_WRITE; 1305 buf_flags = B_WRITE;
1310 } else { 1306 } else {
1311 buf_flags = B_READ; 1307 buf_flags = B_READ;
1312 } 1308 }
1313 1309
1314 xbd_io->xio_buf.b_flags = buf_flags; 1310 xbd_io->xio_buf.b_flags = buf_flags;
1315 xbd_io->xio_buf.b_cflags = 0; 1311 xbd_io->xio_buf.b_cflags = 0;
1316 xbd_io->xio_buf.b_oflags = 0; 1312 xbd_io->xio_buf.b_oflags = 0;
1317 xbd_io->xio_buf.b_iodone = xbdback_iodone; 1313 xbd_io->xio_buf.b_iodone = xbdback_iodone;
1318 xbd_io->xio_buf.b_proc = NULL; 1314 xbd_io->xio_buf.b_proc = NULL;
1319 xbd_io->xio_buf.b_vp = xbdi->xbdi_vp; 1315 xbd_io->xio_buf.b_vp = xbdi->xbdi_vp;
1320 xbd_io->xio_buf.b_objlock = xbdi->xbdi_vp->v_interlock; 1316 xbd_io->xio_buf.b_objlock = xbdi->xbdi_vp->v_interlock;
1321 xbd_io->xio_buf.b_dev = xbdi->xbdi_dev; 1317 xbd_io->xio_buf.b_dev = xbdi->xbdi_dev;
1322 xbd_io->xio_buf.b_blkno = req->sector_number; 1318 xbd_io->xio_buf.b_blkno = req->sector_number;
1323 xbd_io->xio_buf.b_bcount = bcount; 1319 xbd_io->xio_buf.b_bcount = bcount;
1324 xbd_io->xio_buf.b_data = NULL; 1320 xbd_io->xio_buf.b_data = NULL;
1325 xbd_io->xio_buf.b_private = xbd_io; 1321 xbd_io->xio_buf.b_private = xbd_io;
1326 1322
1327 xbdi->xbdi_cont = xbdback_co_do_io; 1323 xbdi->xbdi_cont = xbdback_co_do_io;
1328 return xbdback_map_shm(xbd_io); 1324 return xbdback_map_shm(xbd_io);
1329} 1325}
1330 1326
1331static void 1327static void
1332xbdback_io_error(struct xbdback_io *xbd_io, int error) 1328xbdback_io_error(struct xbdback_io *xbd_io, int error)
1333{ 1329{
1334 xbd_io->xio_buf.b_error = error; 1330 KASSERT(mutex_owned(&xbd_io->xio_xbdi->xbdi_lock));
1335 xbdback_iodone(&xbd_io->xio_buf); 1331
 1332 struct buf *bp = &xbd_io->xio_buf;
 1333
 1334 bp->b_error = error;
 1335 xbdback_iodone_locked(xbd_io->xio_xbdi, xbd_io, bp);
1336} 1336}
1337 1337
1338/* 1338/*
1339 * Main xbdback I/O routine. It can either perform a flush operation or 1339 * Main xbdback I/O routine. It can either perform a flush operation or
1340 * schedule a read/write operation. 1340 * schedule a read/write operation.
1341 */ 1341 */
1342static void * 1342static void *
1343xbdback_co_do_io(struct xbdback_instance *xbdi, void *obj) 1343xbdback_co_do_io(struct xbdback_instance *xbdi, void *obj)
1344{ 1344{
1345 struct xbdback_io *xbd_io = obj; 1345 struct xbdback_io *xbd_io = obj;
1346 1346
1347 switch (xbd_io->xio_operation) { 1347 switch (xbd_io->xio_operation) {
1348 case BLKIF_OP_FLUSH_DISKCACHE: 1348 case BLKIF_OP_FLUSH_DISKCACHE:
1349 { 1349 {
1350 int error; 1350 int error;
1351 int force = 1; 1351 int force = 1;
1352 1352
 1353 KASSERT(mutex_owned(&xbdi->xbdi_lock));
 1354 mutex_exit(&xbdi->xbdi_lock);
1353 error = VOP_IOCTL(xbdi->xbdi_vp, DIOCCACHESYNC, &force, FWRITE, 1355 error = VOP_IOCTL(xbdi->xbdi_vp, DIOCCACHESYNC, &force, FWRITE,
1354 kauth_cred_get()); 1356 kauth_cred_get());
 1357 mutex_enter(&xbdi->xbdi_lock);
1355 if (error) { 1358 if (error) {
1356 aprint_error("xbdback %s: DIOCCACHESYNC returned %d\n", 1359 aprint_error("xbdback %s: DIOCCACHESYNC returned %d\n",
1357 xbdi->xbdi_xbusd->xbusd_path, error); 1360 xbdi->xbdi_xbusd->xbusd_path, error);
1358 if (error == EOPNOTSUPP || error == ENOTTY) 1361 if (error == EOPNOTSUPP || error == ENOTTY)
1359 error = BLKIF_RSP_EOPNOTSUPP; 1362 error = BLKIF_RSP_EOPNOTSUPP;
1360 else 1363 else
1361 error = BLKIF_RSP_ERROR; 1364 error = BLKIF_RSP_ERROR;
1362 } else 1365 } else
1363 error = BLKIF_RSP_OKAY; 1366 error = BLKIF_RSP_OKAY;
1364 xbdback_send_reply(xbdi, xbd_io->xio_id, 1367 xbdback_send_reply(xbdi, xbd_io->xio_id,
1365 xbd_io->xio_operation, error); 1368 xbd_io->xio_operation, error);
1366 xbdback_io_put(xbdi, xbd_io); 1369 xbdback_io_put(xbdi, xbd_io);
1367 xbdi_put(xbdi); 1370 xbdi_put(xbdi);
1368 xbdi->xbdi_cont = xbdback_co_main_incr; 1371 xbdi->xbdi_cont = xbdback_co_main_incr;
1369 return xbdi; 1372 return xbdi;
1370 } 1373 }
1371 case BLKIF_OP_READ: 1374 case BLKIF_OP_READ:
1372 case BLKIF_OP_WRITE: 1375 case BLKIF_OP_WRITE:
1373 xbd_io->xio_buf.b_data = (void *) 1376 xbd_io->xio_buf.b_data = (void *)
1374 (xbd_io->xio_vaddr + xbd_io->xio_start_offset); 1377 (xbd_io->xio_vaddr + xbd_io->xio_start_offset);
1375 1378
1376 if ((xbd_io->xio_buf.b_flags & B_READ) == 0) { 1379 if ((xbd_io->xio_buf.b_flags & B_READ) == 0) {
1377 mutex_enter(xbd_io->xio_buf.b_vp->v_interlock); 1380 mutex_enter(xbd_io->xio_buf.b_vp->v_interlock);
1378 xbd_io->xio_buf.b_vp->v_numoutput++; 1381 xbd_io->xio_buf.b_vp->v_numoutput++;
1379 mutex_exit(xbd_io->xio_buf.b_vp->v_interlock); 1382 mutex_exit(xbd_io->xio_buf.b_vp->v_interlock);
1380 } 1383 }
1381 /* will call xbdback_iodone() asynchronously when done */ 1384 /* will call xbdback_iodone() asynchronously when done */
1382 bdev_strategy(&xbd_io->xio_buf); 1385 bdev_strategy(&xbd_io->xio_buf);
1383 xbdi->xbdi_cont = xbdback_co_main_incr; 1386 xbdi->xbdi_cont = xbdback_co_main_incr;
1384 return xbdi; 1387 return xbdi;
1385 default: 1388 default:
1386 /* Should never happen */ 1389 /* Should never happen */
1387 panic("xbdback_co_do_io: unsupported operation %d", 1390 panic("xbdback_co_do_io: unsupported operation %d",
1388 xbd_io->xio_operation); 1391 xbd_io->xio_operation);
1389 } 1392 }
1390} 1393}
1391 1394
1392/* 1395/*
1393 * Called from softint(9) context when an I/O is done: for each request, send 1396 * Called from softint(9) context when an I/O is done: for each request, send
1394 * back the associated reply to the domain. 1397 * back the associated reply to the domain.
1395 * 
1396 * This gets reused by xbdback_io_error to report errors from other sources. 
1397 */ 1398 */
1398static void 1399static void
1399xbdback_iodone(struct buf *bp) 1400xbdback_iodone(struct buf *bp)
1400{ 1401{
1401 struct xbdback_io *xbd_io; 1402 struct xbdback_io *xbd_io;
1402 struct xbdback_instance *xbdi; 1403 struct xbdback_instance *xbdi;
1403 int status; 
1404 
1405 KERNEL_LOCK(1, NULL); /* XXXSMP */ 
1406 1404
1407 xbd_io = bp->b_private; 1405 xbd_io = bp->b_private;
 1406 KASSERT(bp == &xbd_io->xio_buf);
1408 xbdi = xbd_io->xio_xbdi; 1407 xbdi = xbd_io->xio_xbdi;
1409 1408
 1409 mutex_enter(&xbdi->xbdi_lock);
 1410 xbdback_iodone_locked(xbdi, xbd_io, bp);
 1411 mutex_exit(&xbdi->xbdi_lock);
 1412}
 1413
 1414/*
 1415 * This gets reused by xbdback_io_error to report errors from other sources.
 1416 */
 1417static void
 1418xbdback_iodone_locked(struct xbdback_instance *xbdi, struct xbdback_io *xbd_io,
 1419 struct buf *bp)
 1420{
 1421 int status;
 1422
1410 XENPRINTF(("xbdback_io domain %d: iodone ptr 0x%lx\n", 1423 XENPRINTF(("xbdback_io domain %d: iodone ptr 0x%lx\n",
1411 xbdi->xbdi_domid, (long)xbd_io)); 1424 xbdi->xbdi_domid, (long)xbd_io));
1412 1425
 1426 KASSERT(mutex_owned(&xbdi->xbdi_lock));
 1427
1413 KASSERT(bp->b_error != 0 || xbd_io->xio_xv != NULL); 1428 KASSERT(bp->b_error != 0 || xbd_io->xio_xv != NULL);
1414 if (xbd_io->xio_xv != NULL) 1429 if (xbd_io->xio_xv != NULL)
1415 xbdback_unmap_shm(xbd_io); 1430 xbdback_unmap_shm(xbd_io);
1416 1431
1417 if (bp->b_error != 0) { 1432 if (bp->b_error != 0) {
1418 printf("xbd IO domain %d: error %d\n", 1433 printf("xbd IO domain %d: error %d\n",
1419 xbdi->xbdi_domid, bp->b_error); 1434 xbdi->xbdi_domid, bp->b_error);
1420 status = BLKIF_RSP_ERROR; 1435 status = BLKIF_RSP_ERROR;
1421 } else 1436 } else
1422 status = BLKIF_RSP_OKAY; 1437 status = BLKIF_RSP_OKAY;
1423  1438
1424 xbdback_send_reply(xbdi, xbd_io->xio_id, xbd_io->xio_operation, status); 1439 xbdback_send_reply(xbdi, xbd_io->xio_id, xbd_io->xio_operation, status);
1425 1440
1426 xbdi_put(xbdi); 1441 xbdi_put(xbdi);
1427 atomic_dec_uint(&xbdi->xbdi_pendingreqs); 1442 KASSERT(xbdi->xbdi_pendingreqs > 0);
 1443 xbdi->xbdi_pendingreqs--;
1428 buf_destroy(&xbd_io->xio_buf); 1444 buf_destroy(&xbd_io->xio_buf);
1429 xbdback_io_put(xbdi, xbd_io); 1445 xbdback_io_put(xbdi, xbd_io);
1430 1446
1431 xbdback_wakeup_thread(xbdi); 1447 xbdback_wakeup_thread(xbdi);
1432 KERNEL_UNLOCK_ONE(NULL); /* XXXSMP */ 
1433} 1448}
1434 1449
1435/* 1450/*
1436 * Wake up the per xbdback instance thread. 1451 * Wake up the per xbdback instance thread.
1437 */ 1452 */
1438static void 1453static void
1439xbdback_wakeup_thread(struct xbdback_instance *xbdi) 1454xbdback_wakeup_thread(struct xbdback_instance *xbdi)
1440{ 1455{
 1456 KASSERT(mutex_owned(&xbdi->xbdi_lock));
1441 1457
1442 mutex_enter(&xbdi->xbdi_lock); 
1443 /* only set RUN state when we are WAITING for work */ 1458 /* only set RUN state when we are WAITING for work */
1444 if (xbdi->xbdi_status == WAITING) 1459 if (xbdi->xbdi_status == WAITING)
1445 xbdi->xbdi_status = RUN; 1460 xbdi->xbdi_status = RUN;
1446 cv_broadcast(&xbdi->xbdi_cv); 1461 cv_signal(&xbdi->xbdi_cv);
1447 mutex_exit(&xbdi->xbdi_lock); 
1448} 1462}
1449 1463
1450/* 1464/*
1451 * called once a request has completed. Place the reply in the ring and 1465 * called once a request has completed. Place the reply in the ring and
1452 * notify the guest OS. 1466 * notify the guest OS.
1453 */ 1467 */
1454static void 1468static void
1455xbdback_send_reply(struct xbdback_instance *xbdi, uint64_t id, 1469xbdback_send_reply(struct xbdback_instance *xbdi, uint64_t id,
1456 int op, int status) 1470 int op, int status)
1457{ 1471{
1458 blkif_response_t *resp_n; 1472 blkif_response_t *resp_n;
1459 blkif_x86_32_response_t *resp32; 1473 blkif_x86_32_response_t *resp32;
1460 blkif_x86_64_response_t *resp64; 1474 blkif_x86_64_response_t *resp64;
1461 int notify; 1475 int notify;
1462 1476
 1477 KASSERT(mutex_owned(&xbdi->xbdi_lock));
 1478
1463 /* 1479 /*
1464 * The ring can be accessed by the xbdback thread, xbdback_iodone() 1480 * The ring can be accessed by the xbdback thread, xbdback_iodone()
1465 * handler, or any handler that triggered the shm callback. So 1481 * handler, or any handler that triggered the shm callback. So
1466 * protect ring access via the xbdi_lock mutex. 1482 * protect ring access via the xbdi_lock mutex.
1467 */ 1483 */
1468 mutex_enter(&xbdi->xbdi_lock); 
1469 switch (xbdi->xbdi_proto) { 1484 switch (xbdi->xbdi_proto) {
1470 case XBDIP_NATIVE: 1485 case XBDIP_NATIVE:
1471 resp_n = RING_GET_RESPONSE(&xbdi->xbdi_ring.ring_n, 1486 resp_n = RING_GET_RESPONSE(&xbdi->xbdi_ring.ring_n,
1472 xbdi->xbdi_ring.ring_n.rsp_prod_pvt); 1487 xbdi->xbdi_ring.ring_n.rsp_prod_pvt);
1473 resp_n->id = id; 1488 resp_n->id = id;
1474 resp_n->operation = op; 1489 resp_n->operation = op;
1475 resp_n->status = status; 1490 resp_n->status = status;
1476 break; 1491 break;
1477 case XBDIP_32: 1492 case XBDIP_32:
1478 resp32 = RING_GET_RESPONSE(&xbdi->xbdi_ring.ring_32, 1493 resp32 = RING_GET_RESPONSE(&xbdi->xbdi_ring.ring_32,
1479 xbdi->xbdi_ring.ring_n.rsp_prod_pvt); 1494 xbdi->xbdi_ring.ring_n.rsp_prod_pvt);
1480 resp32->id = id; 1495 resp32->id = id;
1481 resp32->operation = op; 1496 resp32->operation = op;
1482 resp32->status = status; 1497 resp32->status = status;
1483 break; 1498 break;
1484 case XBDIP_64: 1499 case XBDIP_64:
1485 resp64 = RING_GET_RESPONSE(&xbdi->xbdi_ring.ring_64, 1500 resp64 = RING_GET_RESPONSE(&xbdi->xbdi_ring.ring_64,
1486 xbdi->xbdi_ring.ring_n.rsp_prod_pvt); 1501 xbdi->xbdi_ring.ring_n.rsp_prod_pvt);
1487 resp64->id = id; 1502 resp64->id = id;
1488 resp64->operation = op; 1503 resp64->operation = op;
1489 resp64->status = status; 1504 resp64->status = status;
1490 break; 1505 break;
1491 } 1506 }
1492 xbdi->xbdi_ring.ring_n.rsp_prod_pvt++; 1507 xbdi->xbdi_ring.ring_n.rsp_prod_pvt++;
1493 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xbdi->xbdi_ring.ring_n, notify); 1508 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xbdi->xbdi_ring.ring_n, notify);
1494 mutex_exit(&xbdi->xbdi_lock); 
1495 1509
1496 if (notify) { 1510 if (notify) {
1497 XENPRINTF(("xbdback_send_reply notify %d\n", xbdi->xbdi_domid)); 1511 XENPRINTF(("xbdback_send_reply notify %d\n", xbdi->xbdi_domid));
1498 hypervisor_notify_via_evtchn(xbdi->xbdi_evtchn); 1512 hypervisor_notify_via_evtchn(xbdi->xbdi_evtchn);
1499 } 1513 }
1500} 1514}
1501 1515
1502/* 1516/*
1503 * Map multiple entries of an I/O request into backend's VA space. 1517 * Map multiple entries of an I/O request into backend's VA space.
1504 * The xbd_io->xio_gref array has to be filled out by the caller. 1518 * The xbd_io->xio_gref array has to be filled out by the caller.
1505 */ 1519 */
1506static void * 1520static void *
1507xbdback_map_shm(struct xbdback_io *xbd_io) 1521xbdback_map_shm(struct xbdback_io *xbd_io)
1508{ 1522{
1509 struct xbdback_instance *xbdi = xbd_io->xio_xbdi; 1523 struct xbdback_instance *xbdi = xbd_io->xio_xbdi;
1510 int error, s; 1524 int error;
1511 1525
1512#ifdef XENDEBUG_VBD 1526#ifdef XENDEBUG_VBD
1513 int i; 1527 int i;
1514 printf("xbdback_map_shm map grant "); 1528 printf("xbdback_map_shm map grant ");
1515 for (i = 0; i < xbd_io->xio_nrma; i++) { 1529 for (i = 0; i < xbd_io->xio_nrma; i++) {
1516 printf("%u ", (u_int)xbd_io->xio_gref[i]); 1530 printf("%u ", (u_int)xbd_io->xio_gref[i]);
1517 } 1531 }
1518#endif 1532#endif
1519 1533
1520 s = splvm(); /* XXXSMP */ 1534 KASSERT(mutex_owned(&xbdi->xbdi_lock));
 1535
1521 xbd_io->xio_xv = SLIST_FIRST(&xbdi->xbdi_va_free); 1536 xbd_io->xio_xv = SLIST_FIRST(&xbdi->xbdi_va_free);
1522 KASSERT(xbd_io->xio_xv != NULL); 1537 KASSERT(xbd_io->xio_xv != NULL);
1523 SLIST_REMOVE_HEAD(&xbdi->xbdi_va_free, xv_next); 1538 SLIST_REMOVE_HEAD(&xbdi->xbdi_va_free, xv_next);
1524 xbd_io->xio_vaddr = xbd_io->xio_xv->xv_vaddr; 1539 xbd_io->xio_vaddr = xbd_io->xio_xv->xv_vaddr;
1525 splx(s); 
1526 1540
1527 error = xen_shm_map(xbd_io->xio_nrma, xbdi->xbdi_domid, 1541 error = xen_shm_map(xbd_io->xio_nrma, xbdi->xbdi_domid,
1528 xbd_io->xio_gref, xbd_io->xio_vaddr, xbd_io->xio_gh,  1542 xbd_io->xio_gref, xbd_io->xio_vaddr, xbd_io->xio_gh,
1529 (xbd_io->xio_operation == BLKIF_OP_WRITE) ? XSHM_RO : 0); 1543 (xbd_io->xio_operation == BLKIF_OP_WRITE) ? XSHM_RO : 0);
1530 1544
1531 switch(error) { 1545 switch(error) {
1532 case 0: 1546 case 0:
1533#ifdef XENDEBUG_VBD 1547#ifdef XENDEBUG_VBD
1534 printf("handle "); 1548 printf("handle ");
1535 for (i = 0; i < xbd_io->xio_nrma; i++) { 1549 for (i = 0; i < xbd_io->xio_nrma; i++) {
1536 printf("%u ", (u_int)xbd_io->xio_gh[i]); 1550 printf("%u ", (u_int)xbd_io->xio_gh[i]);
1537 } 1551 }
1538 printf("\n"); 1552 printf("\n");
1539#endif 1553#endif
1540 return xbd_io; 1554 return xbd_io;
1541 default: 1555 default:
1542 if (ratecheck(&xbdi->xbdi_lasterr_time, &xbdback_err_intvl)) { 1556 if (ratecheck(&xbdi->xbdi_lasterr_time, &xbdback_err_intvl)) {
1543 printf("xbdback_map_shm: xen_shm error %d ", error); 1557 printf("xbdback_map_shm: xen_shm error %d ", error);
1544 } 1558 }
1545 /* this will also free xbd_io via xbdback_iodone() */ 1559 /* this will also free xbd_io via xbdback_iodone() */
1546 xbdback_io_error(xbd_io, error); 1560 xbdback_io_error(xbd_io, error);
1547 SLIST_INSERT_HEAD(&xbdi->xbdi_va_free, xbd_io->xio_xv, xv_next); 1561 SLIST_INSERT_HEAD(&xbdi->xbdi_va_free, xbd_io->xio_xv, xv_next);
1548 xbd_io->xio_xv = NULL; 1562 xbd_io->xio_xv = NULL;
1549 /* do not retry */ 1563 /* do not retry */
1550 xbdi->xbdi_cont = xbdback_co_main_incr; 1564 xbdi->xbdi_cont = xbdback_co_main_incr;
1551 return xbdi; 1565 return xbdi;
1552 } 1566 }
1553} 1567}
1554 1568
1555/* unmap a request from our virtual address space (request is done) */ 1569/* unmap a request from our virtual address space (request is done) */
1556static void 1570static void
1557xbdback_unmap_shm(struct xbdback_io *xbd_io) 1571xbdback_unmap_shm(struct xbdback_io *xbd_io)
1558{ 1572{
1559 struct xbdback_instance *xbdi = xbd_io->xio_xbdi; 1573 struct xbdback_instance *xbdi = xbd_io->xio_xbdi;
1560 1574
1561#ifdef XENDEBUG_VBD 1575#ifdef XENDEBUG_VBD
1562 int i; 1576 int i;
1563 printf("xbdback_unmap_shm handle "); 1577 printf("xbdback_unmap_shm handle ");
1564 for (i = 0; i < xbd_io->xio_nrma; i++) { 1578 for (i = 0; i < xbd_io->xio_nrma; i++) {
1565 printf("%u ", (u_int)xbd_io->xio_gh[i]); 1579 printf("%u ", (u_int)xbd_io->xio_gh[i]);
1566 } 1580 }
1567 printf("\n"); 1581 printf("\n");
1568#endif 1582#endif
1569 1583
1570 KASSERT(xbd_io->xio_xv != NULL); 1584 KASSERT(xbd_io->xio_xv != NULL);
1571 xen_shm_unmap(xbd_io->xio_vaddr, xbd_io->xio_nrma, 1585 xen_shm_unmap(xbd_io->xio_vaddr, xbd_io->xio_nrma,
1572 xbd_io->xio_gh); 1586 xbd_io->xio_gh);
1573 SLIST_INSERT_HEAD(&xbdi->xbdi_va_free, xbd_io->xio_xv, xv_next); 1587 SLIST_INSERT_HEAD(&xbdi->xbdi_va_free, xbd_io->xio_xv, xv_next);
1574 xbd_io->xio_xv = NULL; 1588 xbd_io->xio_xv = NULL;
1575 xbd_io->xio_vaddr = -1; 1589 xbd_io->xio_vaddr = -1;
1576} 1590}
1577 1591
1578/* Obtain memory from a pool */ 1592/* Obtain memory from a pool */
1579static struct xbdback_io * 1593static struct xbdback_io *
1580xbdback_io_get(struct xbdback_instance *xbdi) 1594xbdback_io_get(struct xbdback_instance *xbdi)
1581{ 1595{
1582 struct xbdback_io *xbd_io = SLIST_FIRST(&xbdi->xbdi_io_free); 1596 struct xbdback_io *xbd_io = SLIST_FIRST(&xbdi->xbdi_io_free);
1583 KASSERT(xbd_io != NULL); 1597 KASSERT(xbd_io != NULL);
1584 SLIST_REMOVE_HEAD(&xbdi->xbdi_io_free, xio_next); 1598 SLIST_REMOVE_HEAD(&xbdi->xbdi_io_free, xio_next);
1585 return xbd_io; 1599 return xbd_io;
1586} 1600}
1587 1601
1588/* Restore memory to a pool */ 1602/* Restore memory to a pool */
1589static void 1603static void
1590xbdback_io_put(struct xbdback_instance *xbdi, struct xbdback_io *xbd_io) 1604xbdback_io_put(struct xbdback_instance *xbdi, struct xbdback_io *xbd_io)
1591{ 1605{
1592 KASSERT(xbd_io != NULL); 1606 KASSERT(xbd_io != NULL);
1593 SLIST_INSERT_HEAD(&xbdi->xbdi_io_free, xbd_io, xio_next); 1607 SLIST_INSERT_HEAD(&xbdi->xbdi_io_free, xbd_io, xio_next);
1594} 1608}
1595 1609
1596/* 1610/*
1597 * Trampoline routine. Calls continuations in a loop and only exits when 1611 * Trampoline routine. Calls continuations in a loop and only exits when
1598 * either the returned object or the next callback is NULL. 1612 * either the returned object or the next callback is NULL.
1599 */ 1613 */
1600static void 1614static void
1601xbdback_trampoline(struct xbdback_instance *xbdi, void *obj) 1615xbdback_trampoline(struct xbdback_instance *xbdi, void *obj)
1602{ 1616{
1603 xbdback_cont_t cont; 1617 xbdback_cont_t cont;
1604 1618
1605 while(obj != NULL && xbdi->xbdi_cont != NULL) { 1619 while(obj != NULL && xbdi->xbdi_cont != NULL) {
1606 cont = xbdi->xbdi_cont; 1620 cont = xbdi->xbdi_cont;
1607#ifdef DIAGNOSTIC 1621#ifdef DIAGNOSTIC
1608 xbdi->xbdi_cont = (xbdback_cont_t)0xDEADBEEF; 1622 xbdi->xbdi_cont = (xbdback_cont_t)0xDEADBEEF;
1609#endif 1623#endif
1610 obj = (*cont)(xbdi, obj); 1624 obj = (*cont)(xbdi, obj);
1611#ifdef DIAGNOSTIC 1625#ifdef DIAGNOSTIC
1612 if (xbdi->xbdi_cont == (xbdback_cont_t)0xDEADBEEF) { 1626 if (xbdi->xbdi_cont == (xbdback_cont_t)0xDEADBEEF) {
1613 printf("xbdback_trampoline: 0x%lx didn't set " 1627 printf("xbdback_trampoline: 0x%lx didn't set "
1614 "xbdi->xbdi_cont!\n", (long)cont); 1628 "xbdi->xbdi_cont!\n", (long)cont);
1615 panic("xbdback_trampoline: bad continuation"); 1629 panic("xbdback_trampoline: bad continuation");
1616 } 1630 }
1617#endif 1631#endif
1618 } 1632 }
1619} 1633}