Thu Apr 23 07:24:40 2020 UTC ()
g/c no longer needed xbdi_io structure member, just pass it as continuation
parameter


(jdolecek)
diff -r1.86 -r1.87 src/sys/arch/xen/xen/xbdback_xenbus.c

cvs diff -r1.86 -r1.87 src/sys/arch/xen/xen/xbdback_xenbus.c (switch to unified diff)

--- src/sys/arch/xen/xen/xbdback_xenbus.c 2020/04/21 13:56:18 1.86
+++ src/sys/arch/xen/xen/xbdback_xenbus.c 2020/04/23 07:24:40 1.87
@@ -1,1632 +1,1624 @@ @@ -1,1632 +1,1624 @@
1/* $NetBSD: xbdback_xenbus.c,v 1.86 2020/04/21 13:56:18 jdolecek Exp $ */ 1/* $NetBSD: xbdback_xenbus.c,v 1.87 2020/04/23 07:24:40 jdolecek Exp $ */
2 2
3/* 3/*
4 * Copyright (c) 2006 Manuel Bouyer. 4 * Copyright (c) 2006 Manuel Bouyer.
5 * 5 *
6 * Redistribution and use in source and binary forms, with or without 6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions 7 * modification, are permitted provided that the following conditions
8 * are met: 8 * are met:
9 * 1. Redistributions of source code must retain the above copyright 9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer. 10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright 11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the 12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution. 13 * documentation and/or other materials provided with the distribution.
14 * 14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 * 25 *
26 */ 26 */
27 27
28#include <sys/cdefs.h> 28#include <sys/cdefs.h>
29__KERNEL_RCSID(0, "$NetBSD: xbdback_xenbus.c,v 1.86 2020/04/21 13:56:18 jdolecek Exp $"); 29__KERNEL_RCSID(0, "$NetBSD: xbdback_xenbus.c,v 1.87 2020/04/23 07:24:40 jdolecek Exp $");
30 30
31#include <sys/atomic.h> 31#include <sys/atomic.h>
32#include <sys/buf.h> 32#include <sys/buf.h>
33#include <sys/condvar.h> 33#include <sys/condvar.h>
34#include <sys/conf.h> 34#include <sys/conf.h>
35#include <sys/disk.h> 35#include <sys/disk.h>
36#include <sys/device.h> 36#include <sys/device.h>
37#include <sys/fcntl.h> 37#include <sys/fcntl.h>
38#include <sys/kauth.h> 38#include <sys/kauth.h>
39#include <sys/kernel.h> 39#include <sys/kernel.h>
40#include <sys/kmem.h> 40#include <sys/kmem.h>
41#include <sys/kthread.h> 41#include <sys/kthread.h>
42#include <sys/mutex.h> 42#include <sys/mutex.h>
43#include <sys/param.h> 43#include <sys/param.h>
44#include <sys/queue.h> 44#include <sys/queue.h>
45#include <sys/systm.h> 45#include <sys/systm.h>
46#include <sys/time.h> 46#include <sys/time.h>
47#include <sys/types.h> 47#include <sys/types.h>
48#include <sys/vnode.h> 48#include <sys/vnode.h>
49 49
50#include <xen/xen.h> 50#include <xen/xen.h>
51#include <xen/xen_shm.h> 51#include <xen/xen_shm.h>
52#include <xen/evtchn.h> 52#include <xen/evtchn.h>
53#include <xen/xenbus.h> 53#include <xen/xenbus.h>
54#include <xen/xenring.h> 54#include <xen/xenring.h>
55#include <xen/include/public/io/protocols.h> 55#include <xen/include/public/io/protocols.h>
56 56
57/* #define XENDEBUG_VBD */ 57/* #define XENDEBUG_VBD */
58#ifdef XENDEBUG_VBD 58#ifdef XENDEBUG_VBD
59#define XENPRINTF(x) printf x 59#define XENPRINTF(x) printf x
60#else 60#else
61#define XENPRINTF(x) 61#define XENPRINTF(x)
62#endif 62#endif
63 63
64#define BLKIF_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE) 64#define BLKIF_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
65 65
66/* 66/*
67 * Backend block device driver for Xen 67 * Backend block device driver for Xen
68 */ 68 */
69 69
70/* Values are expressed in 512-byte sectors */ 70/* Values are expressed in 512-byte sectors */
71#define VBD_BSIZE 512 71#define VBD_BSIZE 512
72#define VBD_MAXSECT ((PAGE_SIZE / VBD_BSIZE) - 1) 72#define VBD_MAXSECT ((PAGE_SIZE / VBD_BSIZE) - 1)
73 73
74/* Need to alloc one extra page to account for possible mapping offset */ 74/* Need to alloc one extra page to account for possible mapping offset */
75#define VBD_VA_SIZE (MAXPHYS + PAGE_SIZE) 75#define VBD_VA_SIZE (MAXPHYS + PAGE_SIZE)
76#define VBD_MAX_INDIRECT_SEGMENTS VBD_VA_SIZE >> PAGE_SHIFT 76#define VBD_MAX_INDIRECT_SEGMENTS VBD_VA_SIZE >> PAGE_SHIFT
77 77
78CTASSERT(XENSHM_MAX_PAGES_PER_REQUEST >= VBD_MAX_INDIRECT_SEGMENTS); 78CTASSERT(XENSHM_MAX_PAGES_PER_REQUEST >= VBD_MAX_INDIRECT_SEGMENTS);
79 79
80struct xbdback_io; 
81struct xbdback_instance; 80struct xbdback_instance;
82 81
83/* 82/*
84 * status of a xbdback instance: 83 * status of a xbdback instance:
85 * WAITING: xbdback instance is connected, waiting for requests 84 * WAITING: xbdback instance is connected, waiting for requests
86 * RUN: xbdi thread must be woken up, I/Os have to be processed 85 * RUN: xbdi thread must be woken up, I/Os have to be processed
87 * DISCONNECTING: the instance is closing, no more I/Os can be scheduled 86 * DISCONNECTING: the instance is closing, no more I/Os can be scheduled
88 * DISCONNECTED: no I/Os, no ring, the thread should terminate. 87 * DISCONNECTED: no I/Os, no ring, the thread should terminate.
89 */ 88 */
90typedef enum {WAITING, RUN, DISCONNECTING, DISCONNECTED} xbdback_state_t; 89typedef enum {WAITING, RUN, DISCONNECTING, DISCONNECTED} xbdback_state_t;
91 90
92/* 91/*
93 * Each xbdback instance is managed by a single thread that handles all 92 * Each xbdback instance is managed by a single thread that handles all
94 * the I/O processing. As there are a variety of conditions that can block, 93 * the I/O processing. As there are a variety of conditions that can block,
95 * everything will be done in a sort of continuation-passing style. 94 * everything will be done in a sort of continuation-passing style.
96 * 95 *
97 * When the execution has to block to delay processing, for example to 96 * When the execution has to block to delay processing, for example to
98 * allow system to recover because of memory shortage (via shared memory 97 * allow system to recover because of memory shortage (via shared memory
99 * callback), the return value of a continuation can be set to NULL. In that 98 * callback), the return value of a continuation can be set to NULL. In that
100 * case, the thread will go back to sleeping and wait for the proper 99 * case, the thread will go back to sleeping and wait for the proper
101 * condition before it starts processing requests again from where it left. 100 * condition before it starts processing requests again from where it left.
102 * Continuation state is "stored" in the xbdback instance (xbdi_cont), 101 * Continuation state is "stored" in the xbdback instance (xbdi_cont),
103 * and should only be manipulated by the instance thread. 102 * and should only be manipulated by the instance thread.
104 * 103 *
105 * As xbdback(4) has to handle different sort of asynchronous events (Xen 104 * As xbdback(4) has to handle different sort of asynchronous events (Xen
106 * event channels, biointr() soft interrupts, xenbus commands), the xbdi_lock 105 * event channels, biointr() soft interrupts, xenbus commands), the xbdi_lock
107 * mutex is used to protect specific elements of the xbdback instance from 106 * mutex is used to protect specific elements of the xbdback instance from
108 * concurrent access: thread status and ring access (when pushing responses). 107 * concurrent access: thread status and ring access (when pushing responses).
109 *  108 *
110 * Here's how the call graph is supposed to be for a single I/O: 109 * Here's how the call graph is supposed to be for a single I/O:
111 * 110 *
112 * xbdback_co_main() 111 * xbdback_co_main()
113 * | --> xbdback_co_cache_flush() 112 * | --> xbdback_co_cache_flush()
114 * | | | 113 * | | |
115 * | | -> xbdback_co_cache_doflush() or NULL 114 * | | -> xbdback_co_cache_doflush() or NULL
116 * | | | 115 * | | |
117 * | | -> xbdback_co_do_io() 116 * | | -> xbdback_co_do_io()
118 * xbdback_co_main_loop()-| 117 * xbdback_co_main_loop()-|
119 * | |-> xbdback_co_main_done2() or NULL 118 * | |-> xbdback_co_main_done2() or NULL
120 * | | 119 * | |
121 * | --> xbdback_co_main_incr() -> xbdback_co_main_loop() 120 * | --> xbdback_co_main_incr() -> xbdback_co_main_loop()
122 * | 121 * |
123 * xbdback_co_io() -> xbdback_co_main_incr() -> xbdback_co_main_loop() 122 * xbdback_co_io() -> xbdback_co_main_incr() -> xbdback_co_main_loop()
124 * | 123 * |
125 * xbdback_co_io_gotio() -> xbdback_map_shm() 124 * xbdback_co_io_gotio() -> xbdback_map_shm()
126 * | | 125 * | |
127 * | xbdback_co_main_incr() -> xbdback_co_main_loop() 126 * | xbdback_co_main_incr() -> xbdback_co_main_loop()
128 * | 127 * |
129 * xbdback_co_do_io()  128 * xbdback_co_do_io()
130 * | 129 * |
131 * xbdback_co_main_incr() -> xbdback_co_main_loop() 130 * xbdback_co_main_incr() -> xbdback_co_main_loop()
132 */ 131 */
133typedef void *(* xbdback_cont_t)(struct xbdback_instance *, void *); 132typedef void *(* xbdback_cont_t)(struct xbdback_instance *, void *);
134 133
135enum xbdi_proto { 134enum xbdi_proto {
136 XBDIP_NATIVE, 135 XBDIP_NATIVE,
137 XBDIP_32, 136 XBDIP_32,
138 XBDIP_64 137 XBDIP_64
139}; 138};
140 139
141struct xbdback_va { 140struct xbdback_va {
142 SLIST_ENTRY(xbdback_va) xv_next; 141 SLIST_ENTRY(xbdback_va) xv_next;
143 vaddr_t xv_vaddr; 142 vaddr_t xv_vaddr;
144}; 143};
145 144
146/* we keep the xbdback instances in a linked list */ 145/* we keep the xbdback instances in a linked list */
147struct xbdback_instance { 146struct xbdback_instance {
148 SLIST_ENTRY(xbdback_instance) next; 147 SLIST_ENTRY(xbdback_instance) next;
149 struct xenbus_device *xbdi_xbusd; /* our xenstore entry */ 148 struct xenbus_device *xbdi_xbusd; /* our xenstore entry */
150 struct xenbus_watch xbdi_watch; /* to watch our store */ 149 struct xenbus_watch xbdi_watch; /* to watch our store */
151 domid_t xbdi_domid; /* attached to this domain */ 150 domid_t xbdi_domid; /* attached to this domain */
152 uint32_t xbdi_handle; /* domain-specific handle */ 151 uint32_t xbdi_handle; /* domain-specific handle */
153 char xbdi_name[16]; /* name of this instance */ 152 char xbdi_name[16]; /* name of this instance */
154 /* mutex that protects concurrent access to the xbdback instance */ 153 /* mutex that protects concurrent access to the xbdback instance */
155 kmutex_t xbdi_lock; 154 kmutex_t xbdi_lock;
156 kcondvar_t xbdi_cv; /* wait channel for thread work */ 155 kcondvar_t xbdi_cv; /* wait channel for thread work */
157 xbdback_state_t xbdi_status; /* thread's status */ 156 xbdback_state_t xbdi_status; /* thread's status */
158 /* KVA for mapping transfers */ 157 /* KVA for mapping transfers */
159 struct xbdback_va xbdi_va[BLKIF_RING_SIZE]; 158 struct xbdback_va xbdi_va[BLKIF_RING_SIZE];
160 SLIST_HEAD(, xbdback_va) xbdi_va_free; 159 SLIST_HEAD(, xbdback_va) xbdi_va_free;
161 /* backing device parameters */ 160 /* backing device parameters */
162 dev_t xbdi_dev; 161 dev_t xbdi_dev;
163 const struct bdevsw *xbdi_bdevsw; /* pointer to the device's bdevsw */ 162 const struct bdevsw *xbdi_bdevsw; /* pointer to the device's bdevsw */
164 struct vnode *xbdi_vp; 163 struct vnode *xbdi_vp;
165 uint64_t xbdi_size; 164 uint64_t xbdi_size;
166 bool xbdi_ro; /* is device read-only ? */ 165 bool xbdi_ro; /* is device read-only ? */
167 /* parameters for the communication */ 166 /* parameters for the communication */
168 unsigned int xbdi_evtchn; 167 unsigned int xbdi_evtchn;
169 struct intrhand *xbdi_ih; 168 struct intrhand *xbdi_ih;
170 /* private parameters for communication */ 169 /* private parameters for communication */
171 blkif_back_ring_proto_t xbdi_ring; 170 blkif_back_ring_proto_t xbdi_ring;
172 enum xbdi_proto xbdi_proto; 171 enum xbdi_proto xbdi_proto;
173 grant_handle_t xbdi_ring_handle; /* to unmap the ring */ 172 grant_handle_t xbdi_ring_handle; /* to unmap the ring */
174 vaddr_t xbdi_ring_va; /* to unmap the ring */ 173 vaddr_t xbdi_ring_va; /* to unmap the ring */
175 /* disconnection must be postponed until all I/O is done */ 174 /* disconnection must be postponed until all I/O is done */
176 int xbdi_refcnt; 175 int xbdi_refcnt;
177 /*  176 /*
178 * State for I/O processing/coalescing follows; this has to 177 * State for I/O processing/coalescing follows; this has to
179 * live here instead of on the stack because of the 178 * live here instead of on the stack because of the
180 * continuation-ness (see above). 179 * continuation-ness (see above).
181 */ 180 */
182 RING_IDX xbdi_req_prod; /* limit on request indices */ 181 RING_IDX xbdi_req_prod; /* limit on request indices */
183 xbdback_cont_t xbdi_cont; 182 xbdback_cont_t xbdi_cont;
184 /* _request state: track requests fetched from ring */ 183 /* _request state: track requests fetched from ring */
185 blkif_request_t xbdi_xen_req; 184 blkif_request_t xbdi_xen_req;
186 struct blkif_request_segment xbdi_seg[VBD_MAX_INDIRECT_SEGMENTS]; 185 struct blkif_request_segment xbdi_seg[VBD_MAX_INDIRECT_SEGMENTS];
187 bus_dmamap_t xbdi_seg_dmamap; 186 bus_dmamap_t xbdi_seg_dmamap;
188 grant_ref_t xbdi_in_gntref; 187 grant_ref_t xbdi_in_gntref;
189 /* _io state: I/O associated to this instance */ 
190 struct xbdback_io *xbdi_io; 
191 /* other state */ 188 /* other state */
192 int xbdi_same_page; /* are we merging two segments on the same page? */ 189 int xbdi_same_page; /* are we merging two segments on the same page? */
193 uint xbdi_pendingreqs; /* number of I/O in fly */ 190 uint xbdi_pendingreqs; /* number of I/O in fly */
194 struct timeval xbdi_lasterr_time; /* error time tracking */ 191 struct timeval xbdi_lasterr_time; /* error time tracking */
195#ifdef DEBUG 192#ifdef DEBUG
196 struct timeval xbdi_lastfragio_time; /* fragmented I/O tracking */ 193 struct timeval xbdi_lastfragio_time; /* fragmented I/O tracking */
197#endif 194#endif
198}; 195};
199/* Manipulation of the above reference count. */ 196/* Manipulation of the above reference count. */
200#define xbdi_get(xbdip) atomic_inc_uint(&(xbdip)->xbdi_refcnt) 197#define xbdi_get(xbdip) atomic_inc_uint(&(xbdip)->xbdi_refcnt)
201#define xbdi_put(xbdip) \ 198#define xbdi_put(xbdip) \
202do { \ 199do { \
203 if (atomic_dec_uint_nv(&(xbdip)->xbdi_refcnt) == 0) \ 200 if (atomic_dec_uint_nv(&(xbdip)->xbdi_refcnt) == 0) \
204 xbdback_finish_disconnect(xbdip); \ 201 xbdback_finish_disconnect(xbdip); \
205} while (/* CONSTCOND */ 0) 202} while (/* CONSTCOND */ 0)
206 203
207static SLIST_HEAD(, xbdback_instance) xbdback_instances; 204static SLIST_HEAD(, xbdback_instance) xbdback_instances;
208static kmutex_t xbdback_lock; 205static kmutex_t xbdback_lock;
209 206
210/* 207/*
211 * For each I/O operation associated with one of those requests, an 208 * For each I/O operation associated with one of those requests, an
212 * xbdback_io is allocated from a pool. It may correspond to multiple 209 * xbdback_io is allocated from a pool. It may correspond to multiple
213 * Xen disk requests, or parts of them, if several arrive at once that 210 * Xen disk requests, or parts of them, if several arrive at once that
214 * can be coalesced. 211 * can be coalesced.
215 */ 212 */
216struct xbdback_io { 213struct xbdback_io {
217 /* The instance pointer is duplicated for convenience. */ 214 /* The instance pointer is duplicated for convenience. */
218 struct xbdback_instance *xio_xbdi; /* our xbd instance */ 215 struct xbdback_instance *xio_xbdi; /* our xbd instance */
219 uint8_t xio_operation; 216 uint8_t xio_operation;
220 uint64_t xio_id; 217 uint64_t xio_id;
221 union { 218 union {
222 struct { 219 struct {
223 struct buf xio_buf; /* our I/O */ 220 struct buf xio_buf; /* our I/O */
224 /* the virtual address to map the request at */ 221 /* the virtual address to map the request at */
225 vaddr_t xio_vaddr; 222 vaddr_t xio_vaddr;
226 struct xbdback_va *xio_xv; 223 struct xbdback_va *xio_xv;
227 vaddr_t xio_start_offset; /* I/O start offset */ 224 vaddr_t xio_start_offset; /* I/O start offset */
228 /* grants to map */ 225 /* grants to map */
229 grant_ref_t xio_gref[VBD_MAX_INDIRECT_SEGMENTS]; 226 grant_ref_t xio_gref[VBD_MAX_INDIRECT_SEGMENTS];
230 /* grants release */ 227 /* grants release */
231 grant_handle_t xio_gh[VBD_MAX_INDIRECT_SEGMENTS]; 228 grant_handle_t xio_gh[VBD_MAX_INDIRECT_SEGMENTS];
232 uint16_t xio_nrma; /* number of guest pages */ 229 uint16_t xio_nrma; /* number of guest pages */
233 } xio_rw; 230 } xio_rw;
234 } u; 231 } u;
235}; 232};
236#define xio_buf u.xio_rw.xio_buf 233#define xio_buf u.xio_rw.xio_buf
237#define xio_vaddr u.xio_rw.xio_vaddr 234#define xio_vaddr u.xio_rw.xio_vaddr
238#define xio_start_offset u.xio_rw.xio_start_offset 235#define xio_start_offset u.xio_rw.xio_start_offset
239#define xio_xv u.xio_rw.xio_xv 236#define xio_xv u.xio_rw.xio_xv
240#define xio_gref u.xio_rw.xio_gref 237#define xio_gref u.xio_rw.xio_gref
241#define xio_gh u.xio_rw.xio_gh 238#define xio_gh u.xio_rw.xio_gh
242#define xio_nrma u.xio_rw.xio_nrma 239#define xio_nrma u.xio_rw.xio_nrma
243 240
244/* 241/*
245 * Pools to manage the chain of block requests and I/Os fragments 242 * Pools to manage the chain of block requests and I/Os fragments
246 * submitted by frontend. 243 * submitted by frontend.
247 */ 244 */
248static struct pool_cache xbdback_io_pool; 245static struct pool_cache xbdback_io_pool;
249 246
250/* Interval between reports of I/O errors from frontend */ 247/* Interval between reports of I/O errors from frontend */
251static const struct timeval xbdback_err_intvl = { 1, 0 }; 248static const struct timeval xbdback_err_intvl = { 1, 0 };
252 249
253 void xbdbackattach(int); 250 void xbdbackattach(int);
254static int xbdback_xenbus_create(struct xenbus_device *); 251static int xbdback_xenbus_create(struct xenbus_device *);
255static int xbdback_xenbus_destroy(void *); 252static int xbdback_xenbus_destroy(void *);
256static void xbdback_frontend_changed(void *, XenbusState); 253static void xbdback_frontend_changed(void *, XenbusState);
257static void xbdback_backend_changed(struct xenbus_watch *, 254static void xbdback_backend_changed(struct xenbus_watch *,
258 const char **, unsigned int); 255 const char **, unsigned int);
259static int xbdback_evthandler(void *); 256static int xbdback_evthandler(void *);
260 257
261static int xbdback_connect(struct xbdback_instance *); 258static int xbdback_connect(struct xbdback_instance *);
262static void xbdback_disconnect(struct xbdback_instance *); 259static void xbdback_disconnect(struct xbdback_instance *);
263static void xbdback_finish_disconnect(struct xbdback_instance *); 260static void xbdback_finish_disconnect(struct xbdback_instance *);
264 261
265static bool xbdif_lookup(domid_t, uint32_t); 262static bool xbdif_lookup(domid_t, uint32_t);
266 263
267static void *xbdback_co_main(struct xbdback_instance *, void *); 264static void *xbdback_co_main(struct xbdback_instance *, void *);
268static void *xbdback_co_main_loop(struct xbdback_instance *, void *); 265static void *xbdback_co_main_loop(struct xbdback_instance *, void *);
269static void *xbdback_co_main_incr(struct xbdback_instance *, void *); 266static void *xbdback_co_main_incr(struct xbdback_instance *, void *);
270static void *xbdback_co_main_done2(struct xbdback_instance *, void *); 267static void *xbdback_co_main_done2(struct xbdback_instance *, void *);
271 268
272static void *xbdback_co_cache_flush(struct xbdback_instance *, void *); 269static void *xbdback_co_cache_flush(struct xbdback_instance *, void *);
273static void *xbdback_co_cache_doflush(struct xbdback_instance *, void *); 270static void *xbdback_co_cache_doflush(struct xbdback_instance *, void *);
274 271
275static void *xbdback_co_io(struct xbdback_instance *, void *); 272static void *xbdback_co_io(struct xbdback_instance *, void *);
276static void *xbdback_co_io_gotio(struct xbdback_instance *, void *); 273static void *xbdback_co_io_gotio(struct xbdback_instance *, void *);
277 274
278static void *xbdback_co_do_io(struct xbdback_instance *, void *); 275static void *xbdback_co_do_io(struct xbdback_instance *, void *);
279 276
280static void xbdback_io_error(struct xbdback_io *, int); 277static void xbdback_io_error(struct xbdback_io *, int);
281static void xbdback_iodone(struct buf *); 278static void xbdback_iodone(struct buf *);
282static void xbdback_send_reply(struct xbdback_instance *, uint64_t , int , int); 279static void xbdback_send_reply(struct xbdback_instance *, uint64_t , int , int);
283 280
284static void *xbdback_map_shm(struct xbdback_io *); 281static void *xbdback_map_shm(struct xbdback_io *);
285static void xbdback_unmap_shm(struct xbdback_io *); 282static void xbdback_unmap_shm(struct xbdback_io *);
286 283
287static void *xbdback_pool_get(struct pool_cache *, 284static void *xbdback_pool_get(struct pool_cache *,
288 struct xbdback_instance *); 285 struct xbdback_instance *);
289static void xbdback_pool_put(struct pool_cache *, void *); 286static void xbdback_pool_put(struct pool_cache *, void *);
290static void xbdback_thread(void *); 287static void xbdback_thread(void *);
291static void xbdback_wakeup_thread(struct xbdback_instance *); 288static void xbdback_wakeup_thread(struct xbdback_instance *);
292static void xbdback_trampoline(struct xbdback_instance *, void *); 289static void xbdback_trampoline(struct xbdback_instance *, void *);
293 290
294static struct xenbus_backend_driver xbd_backend_driver = { 291static struct xenbus_backend_driver xbd_backend_driver = {
295 .xbakd_create = xbdback_xenbus_create, 292 .xbakd_create = xbdback_xenbus_create,
296 .xbakd_type = "vbd" 293 .xbakd_type = "vbd"
297}; 294};
298 295
299void 296void
300xbdbackattach(int n) 297xbdbackattach(int n)
301{ 298{
302 XENPRINTF(("xbdbackattach\n")); 299 XENPRINTF(("xbdbackattach\n"));
303 300
304 /* 301 /*
305 * initialize the backend driver, register the control message handler 302 * initialize the backend driver, register the control message handler
306 * and send driver up message. 303 * and send driver up message.
307 */ 304 */
308 SLIST_INIT(&xbdback_instances); 305 SLIST_INIT(&xbdback_instances);
309 mutex_init(&xbdback_lock, MUTEX_DEFAULT, IPL_NONE); 306 mutex_init(&xbdback_lock, MUTEX_DEFAULT, IPL_NONE);
310 307
311 pool_cache_bootstrap(&xbdback_io_pool, 308 pool_cache_bootstrap(&xbdback_io_pool,
312 sizeof(struct xbdback_io), 0, 0, 0, "xbbip", NULL, 309 sizeof(struct xbdback_io), 0, 0, 0, "xbbip", NULL,
313 IPL_SOFTBIO, NULL, NULL, NULL); 310 IPL_SOFTBIO, NULL, NULL, NULL);
314 311
315 /* we allocate enough to handle a whole ring at once */ 312 /* we allocate enough to handle a whole ring at once */
316 pool_prime(&xbdback_io_pool.pc_pool, BLKIF_RING_SIZE); 313 pool_prime(&xbdback_io_pool.pc_pool, BLKIF_RING_SIZE);
317 314
318 xenbus_backend_register(&xbd_backend_driver); 315 xenbus_backend_register(&xbd_backend_driver);
319} 316}
320 317
321static int 318static int
322xbdback_xenbus_create(struct xenbus_device *xbusd) 319xbdback_xenbus_create(struct xenbus_device *xbusd)
323{ 320{
324 struct xbdback_instance *xbdi; 321 struct xbdback_instance *xbdi;
325 long domid, handle; 322 long domid, handle;
326 int error, i; 323 int error, i;
327 char *ep; 324 char *ep;
328 325
329 if ((error = xenbus_read_ul(NULL, xbusd->xbusd_path, 326 if ((error = xenbus_read_ul(NULL, xbusd->xbusd_path,
330 "frontend-id", &domid, 10)) != 0) { 327 "frontend-id", &domid, 10)) != 0) {
331 aprint_error("xbdback: can't read %s/frontend-id: %d\n", 328 aprint_error("xbdback: can't read %s/frontend-id: %d\n",
332 xbusd->xbusd_path, error); 329 xbusd->xbusd_path, error);
333 return error; 330 return error;
334 } 331 }
335 332
336 /* 333 /*
337 * get handle: this is the last component of the path; which is 334 * get handle: this is the last component of the path; which is
338 * a decimal number. $path/dev contains the device name, which is not 335 * a decimal number. $path/dev contains the device name, which is not
339 * appropriate. 336 * appropriate.
340 */ 337 */
341 for (i = strlen(xbusd->xbusd_path); i > 0; i--) { 338 for (i = strlen(xbusd->xbusd_path); i > 0; i--) {
342 if (xbusd->xbusd_path[i] == '/') 339 if (xbusd->xbusd_path[i] == '/')
343 break; 340 break;
344 } 341 }
345 if (i == 0) { 342 if (i == 0) {
346 aprint_error("xbdback: can't parse %s\n", 343 aprint_error("xbdback: can't parse %s\n",
347 xbusd->xbusd_path); 344 xbusd->xbusd_path);
348 return EFTYPE; 345 return EFTYPE;
349 } 346 }
350 handle = strtoul(&xbusd->xbusd_path[i+1], &ep, 10); 347 handle = strtoul(&xbusd->xbusd_path[i+1], &ep, 10);
351 if (*ep != '\0') { 348 if (*ep != '\0') {
352 aprint_error("xbdback: can't parse %s\n", 349 aprint_error("xbdback: can't parse %s\n",
353 xbusd->xbusd_path); 350 xbusd->xbusd_path);
354 return EFTYPE; 351 return EFTYPE;
355 } 352 }
356  353
357 if (xbdif_lookup(domid, handle)) { 354 if (xbdif_lookup(domid, handle)) {
358 return EEXIST; 355 return EEXIST;
359 } 356 }
360 xbdi = kmem_zalloc(sizeof(*xbdi), KM_SLEEP); 357 xbdi = kmem_zalloc(sizeof(*xbdi), KM_SLEEP);
361 358
362 xbdi->xbdi_domid = domid; 359 xbdi->xbdi_domid = domid;
363 xbdi->xbdi_handle = handle; 360 xbdi->xbdi_handle = handle;
364 snprintf(xbdi->xbdi_name, sizeof(xbdi->xbdi_name), "xbdb%di%d", 361 snprintf(xbdi->xbdi_name, sizeof(xbdi->xbdi_name), "xbdb%di%d",
365 xbdi->xbdi_domid, xbdi->xbdi_handle); 362 xbdi->xbdi_domid, xbdi->xbdi_handle);
366 363
367 /* initialize status and reference counter */ 364 /* initialize status and reference counter */
368 xbdi->xbdi_status = DISCONNECTED; 365 xbdi->xbdi_status = DISCONNECTED;
369 xbdi_get(xbdi); 366 xbdi_get(xbdi);
370 367
371 mutex_init(&xbdi->xbdi_lock, MUTEX_DEFAULT, IPL_BIO); 368 mutex_init(&xbdi->xbdi_lock, MUTEX_DEFAULT, IPL_BIO);
372 cv_init(&xbdi->xbdi_cv, xbdi->xbdi_name); 369 cv_init(&xbdi->xbdi_cv, xbdi->xbdi_name);
373 mutex_enter(&xbdback_lock); 370 mutex_enter(&xbdback_lock);
374 SLIST_INSERT_HEAD(&xbdback_instances, xbdi, next); 371 SLIST_INSERT_HEAD(&xbdback_instances, xbdi, next);
375 mutex_exit(&xbdback_lock); 372 mutex_exit(&xbdback_lock);
376 373
377 xbusd->xbusd_u.b.b_cookie = xbdi;  374 xbusd->xbusd_u.b.b_cookie = xbdi;
378 xbusd->xbusd_u.b.b_detach = xbdback_xenbus_destroy; 375 xbusd->xbusd_u.b.b_detach = xbdback_xenbus_destroy;
379 xbusd->xbusd_otherend_changed = xbdback_frontend_changed; 376 xbusd->xbusd_otherend_changed = xbdback_frontend_changed;
380 xbdi->xbdi_xbusd = xbusd; 377 xbdi->xbdi_xbusd = xbusd;
381 378
382 if (bus_dmamap_create(xbdi->xbdi_xbusd->xbusd_dmat, PAGE_SIZE, 379 if (bus_dmamap_create(xbdi->xbdi_xbusd->xbusd_dmat, PAGE_SIZE,
383 1, PAGE_SIZE, PAGE_SIZE, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, 380 1, PAGE_SIZE, PAGE_SIZE, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW,
384 &xbdi->xbdi_seg_dmamap) != 0) { 381 &xbdi->xbdi_seg_dmamap) != 0) {
385 printf("%s: can't create dma map for indirect segments\n", 382 printf("%s: can't create dma map for indirect segments\n",
386 xbdi->xbdi_name); 383 xbdi->xbdi_name);
387 goto fail; 384 goto fail;
388 } 385 }
389 if (bus_dmamap_load(xbdi->xbdi_xbusd->xbusd_dmat, 386 if (bus_dmamap_load(xbdi->xbdi_xbusd->xbusd_dmat,
390 xbdi->xbdi_seg_dmamap, xbdi->xbdi_seg, 387 xbdi->xbdi_seg_dmamap, xbdi->xbdi_seg,
391 sizeof(xbdi->xbdi_seg), NULL, BUS_DMA_WAITOK) != 0) { 388 sizeof(xbdi->xbdi_seg), NULL, BUS_DMA_WAITOK) != 0) {
392 printf("%s: can't load dma map for indirect segments\n", 389 printf("%s: can't load dma map for indirect segments\n",
393 xbdi->xbdi_name); 390 xbdi->xbdi_name);
394 goto fail; 391 goto fail;
395 } 392 }
396 KASSERT(xbdi->xbdi_seg_dmamap->dm_nsegs == 1); 393 KASSERT(xbdi->xbdi_seg_dmamap->dm_nsegs == 1);
397 394
398 SLIST_INIT(&xbdi->xbdi_va_free); 395 SLIST_INIT(&xbdi->xbdi_va_free);
399 for (i = 0; i < BLKIF_RING_SIZE; i++) { 396 for (i = 0; i < BLKIF_RING_SIZE; i++) {
400 xbdi->xbdi_va[i].xv_vaddr = uvm_km_alloc(kernel_map, 397 xbdi->xbdi_va[i].xv_vaddr = uvm_km_alloc(kernel_map,
401 VBD_VA_SIZE, 0, UVM_KMF_VAONLY|UVM_KMF_WAITVA); 398 VBD_VA_SIZE, 0, UVM_KMF_VAONLY|UVM_KMF_WAITVA);
402 SLIST_INSERT_HEAD(&xbdi->xbdi_va_free, &xbdi->xbdi_va[i], 399 SLIST_INSERT_HEAD(&xbdi->xbdi_va_free, &xbdi->xbdi_va[i],
403 xv_next); 400 xv_next);
404 } 401 }
405 402
406 error = xenbus_watch_path2(xbusd, xbusd->xbusd_path, "physical-device", 403 error = xenbus_watch_path2(xbusd, xbusd->xbusd_path, "physical-device",
407 &xbdi->xbdi_watch, xbdback_backend_changed); 404 &xbdi->xbdi_watch, xbdback_backend_changed);
408 if (error) { 405 if (error) {
409 printf("failed to watch on %s/physical-device: %d\n", 406 printf("failed to watch on %s/physical-device: %d\n",
410 xbusd->xbusd_path, error); 407 xbusd->xbusd_path, error);
411 goto fail; 408 goto fail;
412 } 409 }
413 xbdi->xbdi_watch.xbw_dev = xbusd; 410 xbdi->xbdi_watch.xbw_dev = xbusd;
414 error = xenbus_switch_state(xbusd, NULL, XenbusStateInitWait); 411 error = xenbus_switch_state(xbusd, NULL, XenbusStateInitWait);
415 if (error) { 412 if (error) {
416 printf("failed to switch state on %s: %d\n", 413 printf("failed to switch state on %s: %d\n",
417 xbusd->xbusd_path, error); 414 xbusd->xbusd_path, error);
418 goto fail2; 415 goto fail2;
419 } 416 }
420 return 0; 417 return 0;
421fail2: 418fail2:
422 unregister_xenbus_watch(&xbdi->xbdi_watch); 419 unregister_xenbus_watch(&xbdi->xbdi_watch);
423fail: 420fail:
424 kmem_free(xbdi, sizeof(*xbdi)); 421 kmem_free(xbdi, sizeof(*xbdi));
425 return error; 422 return error;
426} 423}
427 424
428static int 425static int
429xbdback_xenbus_destroy(void *arg) 426xbdback_xenbus_destroy(void *arg)
430{ 427{
431 struct xbdback_instance *xbdi = arg; 428 struct xbdback_instance *xbdi = arg;
432 struct xenbus_device *xbusd = xbdi->xbdi_xbusd; 429 struct xenbus_device *xbusd = xbdi->xbdi_xbusd;
433 struct gnttab_unmap_grant_ref ungrop; 430 struct gnttab_unmap_grant_ref ungrop;
434 int err; 431 int err;
435 432
436 XENPRINTF(("xbdback_xenbus_destroy state %d\n", xbdi->xbdi_status)); 433 XENPRINTF(("xbdback_xenbus_destroy state %d\n", xbdi->xbdi_status));
437 434
438 xbdback_disconnect(xbdi); 435 xbdback_disconnect(xbdi);
439 436
440 /* unregister watch */ 437 /* unregister watch */
441 if (xbdi->xbdi_watch.node) 438 if (xbdi->xbdi_watch.node)
442 xenbus_unwatch_path(&xbdi->xbdi_watch); 439 xenbus_unwatch_path(&xbdi->xbdi_watch);
443 440
444 /* unmap ring */ 441 /* unmap ring */
445 if (xbdi->xbdi_ring_va != 0) { 442 if (xbdi->xbdi_ring_va != 0) {
446 ungrop.host_addr = xbdi->xbdi_ring_va; 443 ungrop.host_addr = xbdi->xbdi_ring_va;
447 ungrop.handle = xbdi->xbdi_ring_handle; 444 ungrop.handle = xbdi->xbdi_ring_handle;
448 ungrop.dev_bus_addr = 0; 445 ungrop.dev_bus_addr = 0;
449 err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 446 err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
450 &ungrop, 1); 447 &ungrop, 1);
451 if (err) 448 if (err)
452 printf("xbdback %s: unmap_grant_ref failed: %d\n", 449 printf("xbdback %s: unmap_grant_ref failed: %d\n",
453 xbusd->xbusd_otherend, err); 450 xbusd->xbusd_otherend, err);
454 uvm_km_free(kernel_map, xbdi->xbdi_ring_va, 451 uvm_km_free(kernel_map, xbdi->xbdi_ring_va,
455 PAGE_SIZE, UVM_KMF_VAONLY); 452 PAGE_SIZE, UVM_KMF_VAONLY);
456 } 453 }
457 /* close device */ 454 /* close device */
458 if (xbdi->xbdi_size) { 455 if (xbdi->xbdi_size) {
459 const char *name; 456 const char *name;
460 struct dkwedge_info wi; 457 struct dkwedge_info wi;
461 if (getdiskinfo(xbdi->xbdi_vp, &wi) == 0) 458 if (getdiskinfo(xbdi->xbdi_vp, &wi) == 0)
462 name = wi.dkw_devname; 459 name = wi.dkw_devname;
463 else 460 else
464 name = "*unknown*"; 461 name = "*unknown*";
465 printf("xbd backend: detach device %s for domain %d\n", 462 printf("xbd backend: detach device %s for domain %d\n",
466 name, xbdi->xbdi_domid); 463 name, xbdi->xbdi_domid);
467 vn_close(xbdi->xbdi_vp, FREAD, NOCRED); 464 vn_close(xbdi->xbdi_vp, FREAD, NOCRED);
468 } 465 }
469 mutex_enter(&xbdback_lock); 466 mutex_enter(&xbdback_lock);
470 SLIST_REMOVE(&xbdback_instances, xbdi, xbdback_instance, next); 467 SLIST_REMOVE(&xbdback_instances, xbdi, xbdback_instance, next);
471 mutex_exit(&xbdback_lock); 468 mutex_exit(&xbdback_lock);
472 469
473 for (int i = 0; i < BLKIF_RING_SIZE; i++) { 470 for (int i = 0; i < BLKIF_RING_SIZE; i++) {
474 if (xbdi->xbdi_va[i].xv_vaddr != 0) { 471 if (xbdi->xbdi_va[i].xv_vaddr != 0) {
475 uvm_km_free(kernel_map, xbdi->xbdi_va[i].xv_vaddr, 472 uvm_km_free(kernel_map, xbdi->xbdi_va[i].xv_vaddr,
476 VBD_VA_SIZE, UVM_KMF_VAONLY); 473 VBD_VA_SIZE, UVM_KMF_VAONLY);
477 xbdi->xbdi_va[i].xv_vaddr = 0; 474 xbdi->xbdi_va[i].xv_vaddr = 0;
478 } 475 }
479 } 476 }
480 477
481 bus_dmamap_unload(xbdi->xbdi_xbusd->xbusd_dmat, xbdi->xbdi_seg_dmamap); 478 bus_dmamap_unload(xbdi->xbdi_xbusd->xbusd_dmat, xbdi->xbdi_seg_dmamap);
482 bus_dmamap_destroy(xbdi->xbdi_xbusd->xbusd_dmat, xbdi->xbdi_seg_dmamap); 479 bus_dmamap_destroy(xbdi->xbdi_xbusd->xbusd_dmat, xbdi->xbdi_seg_dmamap);
483 480
484 mutex_destroy(&xbdi->xbdi_lock); 481 mutex_destroy(&xbdi->xbdi_lock);
485 cv_destroy(&xbdi->xbdi_cv); 482 cv_destroy(&xbdi->xbdi_cv);
486 kmem_free(xbdi, sizeof(*xbdi)); 483 kmem_free(xbdi, sizeof(*xbdi));
487 return 0; 484 return 0;
488} 485}
489 486
490static int 487static int
491xbdback_connect(struct xbdback_instance *xbdi) 488xbdback_connect(struct xbdback_instance *xbdi)
492{ 489{
493 int err; 490 int err;
494 struct gnttab_map_grant_ref grop; 491 struct gnttab_map_grant_ref grop;
495 struct gnttab_unmap_grant_ref ungrop; 492 struct gnttab_unmap_grant_ref ungrop;
496 evtchn_op_t evop; 493 evtchn_op_t evop;
497 u_long ring_ref, revtchn; 494 u_long ring_ref, revtchn;
498 char xsproto[32]; 495 char xsproto[32];
499 const char *proto; 496 const char *proto;
500 struct xenbus_device *xbusd = xbdi->xbdi_xbusd; 497 struct xenbus_device *xbusd = xbdi->xbdi_xbusd;
501 498
502 XENPRINTF(("xbdback %s: connect\n", xbusd->xbusd_path)); 499 XENPRINTF(("xbdback %s: connect\n", xbusd->xbusd_path));
503 /* read comunication informations */ 500 /* read comunication informations */
504 err = xenbus_read_ul(NULL, xbusd->xbusd_otherend, 501 err = xenbus_read_ul(NULL, xbusd->xbusd_otherend,
505 "ring-ref", &ring_ref, 10); 502 "ring-ref", &ring_ref, 10);
506 if (err) { 503 if (err) {
507 xenbus_dev_fatal(xbusd, err, "reading %s/ring-ref", 504 xenbus_dev_fatal(xbusd, err, "reading %s/ring-ref",
508 xbusd->xbusd_otherend); 505 xbusd->xbusd_otherend);
509 return -1; 506 return -1;
510 } 507 }
511 XENPRINTF(("xbdback %s: connect ring-ref %lu\n", xbusd->xbusd_path, ring_ref)); 508 XENPRINTF(("xbdback %s: connect ring-ref %lu\n", xbusd->xbusd_path, ring_ref));
512 err = xenbus_read_ul(NULL, xbusd->xbusd_otherend, 509 err = xenbus_read_ul(NULL, xbusd->xbusd_otherend,
513 "event-channel", &revtchn, 10); 510 "event-channel", &revtchn, 10);
514 if (err) { 511 if (err) {
515 xenbus_dev_fatal(xbusd, err, "reading %s/event-channel", 512 xenbus_dev_fatal(xbusd, err, "reading %s/event-channel",
516 xbusd->xbusd_otherend); 513 xbusd->xbusd_otherend);
517 return -1; 514 return -1;
518 } 515 }
519 XENPRINTF(("xbdback %s: connect revtchn %lu\n", xbusd->xbusd_path, revtchn)); 516 XENPRINTF(("xbdback %s: connect revtchn %lu\n", xbusd->xbusd_path, revtchn));
520 err = xenbus_read(NULL, xbusd->xbusd_otherend, "protocol", 517 err = xenbus_read(NULL, xbusd->xbusd_otherend, "protocol",
521 xsproto, sizeof(xsproto)); 518 xsproto, sizeof(xsproto));
522 if (err) { 519 if (err) {
523 xbdi->xbdi_proto = XBDIP_NATIVE; 520 xbdi->xbdi_proto = XBDIP_NATIVE;
524 proto = "unspecified"; 521 proto = "unspecified";
525 XENPRINTF(("xbdback %s: connect no xsproto\n", xbusd->xbusd_path)); 522 XENPRINTF(("xbdback %s: connect no xsproto\n", xbusd->xbusd_path));
526 } else { 523 } else {
527 XENPRINTF(("xbdback %s: connect xsproto %s\n", xbusd->xbusd_path, xsproto)); 524 XENPRINTF(("xbdback %s: connect xsproto %s\n", xbusd->xbusd_path, xsproto));
528 if (strcmp(xsproto, XEN_IO_PROTO_ABI_NATIVE) == 0) { 525 if (strcmp(xsproto, XEN_IO_PROTO_ABI_NATIVE) == 0) {
529 xbdi->xbdi_proto = XBDIP_NATIVE; 526 xbdi->xbdi_proto = XBDIP_NATIVE;
530 proto = XEN_IO_PROTO_ABI_NATIVE; 527 proto = XEN_IO_PROTO_ABI_NATIVE;
531 } else if (strcmp(xsproto, XEN_IO_PROTO_ABI_X86_32) == 0) { 528 } else if (strcmp(xsproto, XEN_IO_PROTO_ABI_X86_32) == 0) {
532 xbdi->xbdi_proto = XBDIP_32; 529 xbdi->xbdi_proto = XBDIP_32;
533 proto = XEN_IO_PROTO_ABI_X86_32; 530 proto = XEN_IO_PROTO_ABI_X86_32;
534 } else if (strcmp(xsproto, XEN_IO_PROTO_ABI_X86_64) == 0) { 531 } else if (strcmp(xsproto, XEN_IO_PROTO_ABI_X86_64) == 0) {
535 xbdi->xbdi_proto = XBDIP_64; 532 xbdi->xbdi_proto = XBDIP_64;
536 proto = XEN_IO_PROTO_ABI_X86_64; 533 proto = XEN_IO_PROTO_ABI_X86_64;
537 } else { 534 } else {
538 aprint_error("xbd domain %d: unknown proto %s\n", 535 aprint_error("xbd domain %d: unknown proto %s\n",
539 xbdi->xbdi_domid, xsproto); 536 xbdi->xbdi_domid, xsproto);
540 return -1; 537 return -1;
541 } 538 }
542 } 539 }
543 540
544 /* allocate VA space and map rings */ 541 /* allocate VA space and map rings */
545 xbdi->xbdi_ring_va = uvm_km_alloc(kernel_map, PAGE_SIZE, 0, 542 xbdi->xbdi_ring_va = uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
546 UVM_KMF_VAONLY); 543 UVM_KMF_VAONLY);
547 if (xbdi->xbdi_ring_va == 0) { 544 if (xbdi->xbdi_ring_va == 0) {
548 xenbus_dev_fatal(xbusd, ENOMEM, 545 xenbus_dev_fatal(xbusd, ENOMEM,
549 "can't get VA for ring", xbusd->xbusd_otherend); 546 "can't get VA for ring", xbusd->xbusd_otherend);
550 return -1; 547 return -1;
551 } 548 }
552 XENPRINTF(("xbdback %s: connect va 0x%" PRIxVADDR "\n", xbusd->xbusd_path, xbdi->xbdi_ring_va)); 549 XENPRINTF(("xbdback %s: connect va 0x%" PRIxVADDR "\n", xbusd->xbusd_path, xbdi->xbdi_ring_va));
553 550
554 grop.host_addr = xbdi->xbdi_ring_va; 551 grop.host_addr = xbdi->xbdi_ring_va;
555 grop.flags = GNTMAP_host_map; 552 grop.flags = GNTMAP_host_map;
556 grop.ref = ring_ref; 553 grop.ref = ring_ref;
557 grop.dom = xbdi->xbdi_domid; 554 grop.dom = xbdi->xbdi_domid;
558 err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, 555 err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
559 &grop, 1); 556 &grop, 1);
560 if (err || grop.status) { 557 if (err || grop.status) {
561 aprint_error("xbdback %s: can't map grant ref: %d/%d\n", 558 aprint_error("xbdback %s: can't map grant ref: %d/%d\n",
562 xbusd->xbusd_path, err, grop.status); 559 xbusd->xbusd_path, err, grop.status);
563 xenbus_dev_fatal(xbusd, EINVAL, 560 xenbus_dev_fatal(xbusd, EINVAL,
564 "can't map ring", xbusd->xbusd_otherend); 561 "can't map ring", xbusd->xbusd_otherend);
565 goto err; 562 goto err;
566 } 563 }
567 xbdi->xbdi_ring_handle = grop.handle; 564 xbdi->xbdi_ring_handle = grop.handle;
568 XENPRINTF(("xbdback %s: connect grhandle %d\n", xbusd->xbusd_path, grop.handle)); 565 XENPRINTF(("xbdback %s: connect grhandle %d\n", xbusd->xbusd_path, grop.handle));
569 566
570 switch(xbdi->xbdi_proto) { 567 switch(xbdi->xbdi_proto) {
571 case XBDIP_NATIVE: 568 case XBDIP_NATIVE:
572 { 569 {
573 blkif_sring_t *sring = (void *)xbdi->xbdi_ring_va; 570 blkif_sring_t *sring = (void *)xbdi->xbdi_ring_va;
574 BACK_RING_INIT(&xbdi->xbdi_ring.ring_n, sring, PAGE_SIZE); 571 BACK_RING_INIT(&xbdi->xbdi_ring.ring_n, sring, PAGE_SIZE);
575 break; 572 break;
576 } 573 }
577 case XBDIP_32: 574 case XBDIP_32:
578 { 575 {
579 blkif_x86_32_sring_t *sring = (void *)xbdi->xbdi_ring_va; 576 blkif_x86_32_sring_t *sring = (void *)xbdi->xbdi_ring_va;
580 BACK_RING_INIT(&xbdi->xbdi_ring.ring_32, sring, PAGE_SIZE); 577 BACK_RING_INIT(&xbdi->xbdi_ring.ring_32, sring, PAGE_SIZE);
581 break; 578 break;
582 } 579 }
583 case XBDIP_64: 580 case XBDIP_64:
584 { 581 {
585 blkif_x86_64_sring_t *sring = (void *)xbdi->xbdi_ring_va; 582 blkif_x86_64_sring_t *sring = (void *)xbdi->xbdi_ring_va;
586 BACK_RING_INIT(&xbdi->xbdi_ring.ring_64, sring, PAGE_SIZE); 583 BACK_RING_INIT(&xbdi->xbdi_ring.ring_64, sring, PAGE_SIZE);
587 break; 584 break;
588 } 585 }
589 } 586 }
590 587
591 evop.cmd = EVTCHNOP_bind_interdomain; 588 evop.cmd = EVTCHNOP_bind_interdomain;
592 evop.u.bind_interdomain.remote_dom = xbdi->xbdi_domid; 589 evop.u.bind_interdomain.remote_dom = xbdi->xbdi_domid;
593 evop.u.bind_interdomain.remote_port = revtchn; 590 evop.u.bind_interdomain.remote_port = revtchn;
594 err = HYPERVISOR_event_channel_op(&evop); 591 err = HYPERVISOR_event_channel_op(&evop);
595 if (err) { 592 if (err) {
596 aprint_error("blkback %s: " 593 aprint_error("blkback %s: "
597 "can't get event channel: %d\n", 594 "can't get event channel: %d\n",
598 xbusd->xbusd_otherend, err); 595 xbusd->xbusd_otherend, err);
599 xenbus_dev_fatal(xbusd, err, 596 xenbus_dev_fatal(xbusd, err,
600 "can't bind event channel", xbusd->xbusd_otherend); 597 "can't bind event channel", xbusd->xbusd_otherend);
601 goto err2; 598 goto err2;
602 } 599 }
603 XENPRINTF(("xbdback %s: connect evchannel %d\n", xbusd->xbusd_path, xbdi->xbdi_evtchn)); 600 XENPRINTF(("xbdback %s: connect evchannel %d\n", xbusd->xbusd_path, xbdi->xbdi_evtchn));
604 xbdi->xbdi_evtchn = evop.u.bind_interdomain.local_port; 601 xbdi->xbdi_evtchn = evop.u.bind_interdomain.local_port;
605 602
606 xbdi->xbdi_ih = xen_intr_establish_xname(-1, &xen_pic, xbdi->xbdi_evtchn, 603 xbdi->xbdi_ih = xen_intr_establish_xname(-1, &xen_pic, xbdi->xbdi_evtchn,
607 IST_LEVEL, IPL_BIO, xbdback_evthandler, xbdi, false, 604 IST_LEVEL, IPL_BIO, xbdback_evthandler, xbdi, false,
608 xbdi->xbdi_name); 605 xbdi->xbdi_name);
609 KASSERT(xbdi->xbdi_ih != NULL); 606 KASSERT(xbdi->xbdi_ih != NULL);
610 aprint_verbose("xbd backend domain %d handle %#x (%d) " 607 aprint_verbose("xbd backend domain %d handle %#x (%d) "
611 "using event channel %d, protocol %s\n", xbdi->xbdi_domid, 608 "using event channel %d, protocol %s\n", xbdi->xbdi_domid,
612 xbdi->xbdi_handle, xbdi->xbdi_handle, xbdi->xbdi_evtchn, proto); 609 xbdi->xbdi_handle, xbdi->xbdi_handle, xbdi->xbdi_evtchn, proto);
613 610
614 /* enable the xbdback event handler machinery */ 611 /* enable the xbdback event handler machinery */
615 xbdi->xbdi_status = WAITING; 612 xbdi->xbdi_status = WAITING;
616 hypervisor_unmask_event(xbdi->xbdi_evtchn); 613 hypervisor_unmask_event(xbdi->xbdi_evtchn);
617 hypervisor_notify_via_evtchn(xbdi->xbdi_evtchn); 614 hypervisor_notify_via_evtchn(xbdi->xbdi_evtchn);
618 615
619 if (kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL, 616 if (kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL,
620 xbdback_thread, xbdi, NULL, "%s", xbdi->xbdi_name) == 0) 617 xbdback_thread, xbdi, NULL, "%s", xbdi->xbdi_name) == 0)
621 return 0; 618 return 0;
622 619
623err2: 620err2:
624 /* unmap ring */ 621 /* unmap ring */
625 ungrop.host_addr = xbdi->xbdi_ring_va; 622 ungrop.host_addr = xbdi->xbdi_ring_va;
626 ungrop.handle = xbdi->xbdi_ring_handle; 623 ungrop.handle = xbdi->xbdi_ring_handle;
627 ungrop.dev_bus_addr = 0; 624 ungrop.dev_bus_addr = 0;
628 err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 625 err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
629 &ungrop, 1); 626 &ungrop, 1);
630 if (err) 627 if (err)
631 aprint_error("xbdback %s: unmap_grant_ref failed: %d\n", 628 aprint_error("xbdback %s: unmap_grant_ref failed: %d\n",
632 xbusd->xbusd_path, err); 629 xbusd->xbusd_path, err);
633 630
634err: 631err:
635 /* free ring VA space */ 632 /* free ring VA space */
636 uvm_km_free(kernel_map, xbdi->xbdi_ring_va, PAGE_SIZE, UVM_KMF_VAONLY); 633 uvm_km_free(kernel_map, xbdi->xbdi_ring_va, PAGE_SIZE, UVM_KMF_VAONLY);
637 return -1; 634 return -1;
638} 635}
639 636
640/* 637/*
641 * Signal a xbdback thread to disconnect. Done in 'xenwatch' thread context. 638 * Signal a xbdback thread to disconnect. Done in 'xenwatch' thread context.
642 */ 639 */
643static void 640static void
644xbdback_disconnect(struct xbdback_instance *xbdi) 641xbdback_disconnect(struct xbdback_instance *xbdi)
645{ 642{
646  643
647 mutex_enter(&xbdi->xbdi_lock); 644 mutex_enter(&xbdi->xbdi_lock);
648 if (xbdi->xbdi_status == DISCONNECTED) { 645 if (xbdi->xbdi_status == DISCONNECTED) {
649 mutex_exit(&xbdi->xbdi_lock); 646 mutex_exit(&xbdi->xbdi_lock);
650 return; 647 return;
651 } 648 }
652 hypervisor_mask_event(xbdi->xbdi_evtchn); 649 hypervisor_mask_event(xbdi->xbdi_evtchn);
653 xen_intr_disestablish(xbdi->xbdi_ih); 650 xen_intr_disestablish(xbdi->xbdi_ih);
654 651
655 /* signal thread that we want to disconnect, then wait for it */ 652 /* signal thread that we want to disconnect, then wait for it */
656 xbdi->xbdi_status = DISCONNECTING; 653 xbdi->xbdi_status = DISCONNECTING;
657 cv_signal(&xbdi->xbdi_cv); 654 cv_signal(&xbdi->xbdi_cv);
658 655
659 while (xbdi->xbdi_status != DISCONNECTED) 656 while (xbdi->xbdi_status != DISCONNECTED)
660 cv_wait(&xbdi->xbdi_cv, &xbdi->xbdi_lock); 657 cv_wait(&xbdi->xbdi_cv, &xbdi->xbdi_lock);
661 658
662 mutex_exit(&xbdi->xbdi_lock); 659 mutex_exit(&xbdi->xbdi_lock);
663 660
664 xenbus_switch_state(xbdi->xbdi_xbusd, NULL, XenbusStateClosing); 661 xenbus_switch_state(xbdi->xbdi_xbusd, NULL, XenbusStateClosing);
665} 662}
666 663
667static void 664static void
668xbdback_frontend_changed(void *arg, XenbusState new_state) 665xbdback_frontend_changed(void *arg, XenbusState new_state)
669{ 666{
670 struct xbdback_instance *xbdi = arg; 667 struct xbdback_instance *xbdi = arg;
671 struct xenbus_device *xbusd = xbdi->xbdi_xbusd; 668 struct xenbus_device *xbusd = xbdi->xbdi_xbusd;
672 669
673 XENPRINTF(("xbdback %s: new state %d\n", xbusd->xbusd_path, new_state)); 670 XENPRINTF(("xbdback %s: new state %d\n", xbusd->xbusd_path, new_state));
674 switch(new_state) { 671 switch(new_state) {
675 case XenbusStateInitialising: 672 case XenbusStateInitialising:
676 break; 673 break;
677 case XenbusStateInitialised: 674 case XenbusStateInitialised:
678 case XenbusStateConnected: 675 case XenbusStateConnected:
679 if (xbdi->xbdi_status == WAITING || xbdi->xbdi_status == RUN) 676 if (xbdi->xbdi_status == WAITING || xbdi->xbdi_status == RUN)
680 break; 677 break;
681 xbdback_connect(xbdi); 678 xbdback_connect(xbdi);
682 break; 679 break;
683 case XenbusStateClosing: 680 case XenbusStateClosing:
684 xbdback_disconnect(xbdi); 681 xbdback_disconnect(xbdi);
685 break; 682 break;
686 case XenbusStateClosed: 683 case XenbusStateClosed:
687 /* otherend_changed() should handle it for us */ 684 /* otherend_changed() should handle it for us */
688 panic("xbdback_frontend_changed: closed\n"); 685 panic("xbdback_frontend_changed: closed\n");
689 case XenbusStateUnknown: 686 case XenbusStateUnknown:
690 case XenbusStateInitWait: 687 case XenbusStateInitWait:
691 default: 688 default:
692 aprint_error("xbdback %s: invalid frontend state %d\n", 689 aprint_error("xbdback %s: invalid frontend state %d\n",
693 xbusd->xbusd_path, new_state); 690 xbusd->xbusd_path, new_state);
694 } 691 }
695 return; 692 return;
696} 693}
697 694
698static void 695static void
699xbdback_backend_changed(struct xenbus_watch *watch, 696xbdback_backend_changed(struct xenbus_watch *watch,
700 const char **vec, unsigned int len) 697 const char **vec, unsigned int len)
701{ 698{
702 struct xenbus_device *xbusd = watch->xbw_dev; 699 struct xenbus_device *xbusd = watch->xbw_dev;
703 struct xbdback_instance *xbdi = xbusd->xbusd_u.b.b_cookie; 700 struct xbdback_instance *xbdi = xbusd->xbusd_u.b.b_cookie;
704 int err; 701 int err;
705 long dev; 702 long dev;
706 char mode[32]; 703 char mode[32];
707 struct xenbus_transaction *xbt; 704 struct xenbus_transaction *xbt;
708 const char *devname; 705 const char *devname;
709 int major; 706 int major;
710 707
711 err = xenbus_read_ul(NULL, xbusd->xbusd_path, "physical-device", 708 err = xenbus_read_ul(NULL, xbusd->xbusd_path, "physical-device",
712 &dev, 10); 709 &dev, 10);
713 /* 710 /*
714 * An error can occur as the watch can fire up just after being 711 * An error can occur as the watch can fire up just after being
715 * registered. So we have to ignore error :( 712 * registered. So we have to ignore error :(
716 */ 713 */
717 if (err) 714 if (err)
718 return; 715 return;
719 /* 716 /*
720 * we can also fire up after having opened the device, don't try 717 * we can also fire up after having opened the device, don't try
721 * to do it twice. 718 * to do it twice.
722 */ 719 */
723 if (xbdi->xbdi_vp != NULL) { 720 if (xbdi->xbdi_vp != NULL) {
724 if (xbdi->xbdi_status == WAITING || xbdi->xbdi_status == RUN) { 721 if (xbdi->xbdi_status == WAITING || xbdi->xbdi_status == RUN) {
725 if (xbdi->xbdi_dev != dev) { 722 if (xbdi->xbdi_dev != dev) {
726 printf("xbdback %s: changing physical device " 723 printf("xbdback %s: changing physical device "
727 "from %#"PRIx64" to %#lx not supported\n", 724 "from %#"PRIx64" to %#lx not supported\n",
728 xbusd->xbusd_path, xbdi->xbdi_dev, dev); 725 xbusd->xbusd_path, xbdi->xbdi_dev, dev);
729 } 726 }
730 } 727 }
731 return; 728 return;
732 } 729 }
733 xbdi->xbdi_dev = dev; 730 xbdi->xbdi_dev = dev;
734 err = xenbus_read(NULL, xbusd->xbusd_path, "mode", mode, sizeof(mode)); 731 err = xenbus_read(NULL, xbusd->xbusd_path, "mode", mode, sizeof(mode));
735 if (err) { 732 if (err) {
736 printf("xbdback: failed to read %s/mode: %d\n", 733 printf("xbdback: failed to read %s/mode: %d\n",
737 xbusd->xbusd_path, err); 734 xbusd->xbusd_path, err);
738 return; 735 return;
739 } 736 }
740 if (mode[0] == 'w') 737 if (mode[0] == 'w')
741 xbdi->xbdi_ro = false; 738 xbdi->xbdi_ro = false;
742 else 739 else
743 xbdi->xbdi_ro = true; 740 xbdi->xbdi_ro = true;
744 major = major(xbdi->xbdi_dev); 741 major = major(xbdi->xbdi_dev);
745 devname = devsw_blk2name(major); 742 devname = devsw_blk2name(major);
746 if (devname == NULL) { 743 if (devname == NULL) {
747 printf("xbdback %s: unknown device 0x%"PRIx64"\n", 744 printf("xbdback %s: unknown device 0x%"PRIx64"\n",
748 xbusd->xbusd_path, xbdi->xbdi_dev); 745 xbusd->xbusd_path, xbdi->xbdi_dev);
749 return; 746 return;
750 } 747 }
751 xbdi->xbdi_bdevsw = bdevsw_lookup(xbdi->xbdi_dev); 748 xbdi->xbdi_bdevsw = bdevsw_lookup(xbdi->xbdi_dev);
752 if (xbdi->xbdi_bdevsw == NULL) { 749 if (xbdi->xbdi_bdevsw == NULL) {
753 printf("xbdback %s: no bdevsw for device 0x%"PRIx64"\n", 750 printf("xbdback %s: no bdevsw for device 0x%"PRIx64"\n",
754 xbusd->xbusd_path, xbdi->xbdi_dev); 751 xbusd->xbusd_path, xbdi->xbdi_dev);
755 return; 752 return;
756 } 753 }
757 err = bdevvp(xbdi->xbdi_dev, &xbdi->xbdi_vp); 754 err = bdevvp(xbdi->xbdi_dev, &xbdi->xbdi_vp);
758 if (err) { 755 if (err) {
759 printf("xbdback %s: can't open device 0x%"PRIx64": %d\n", 756 printf("xbdback %s: can't open device 0x%"PRIx64": %d\n",
760 xbusd->xbusd_path, xbdi->xbdi_dev, err); 757 xbusd->xbusd_path, xbdi->xbdi_dev, err);
761 return; 758 return;
762 } 759 }
763 err = vn_lock(xbdi->xbdi_vp, LK_EXCLUSIVE | LK_RETRY); 760 err = vn_lock(xbdi->xbdi_vp, LK_EXCLUSIVE | LK_RETRY);
764 if (err) { 761 if (err) {
765 printf("xbdback %s: can't vn_lock device 0x%"PRIx64": %d\n", 762 printf("xbdback %s: can't vn_lock device 0x%"PRIx64": %d\n",
766 xbusd->xbusd_path, xbdi->xbdi_dev, err); 763 xbusd->xbusd_path, xbdi->xbdi_dev, err);
767 vrele(xbdi->xbdi_vp); 764 vrele(xbdi->xbdi_vp);
768 return; 765 return;
769 } 766 }
770 err = VOP_OPEN(xbdi->xbdi_vp, FREAD, NOCRED); 767 err = VOP_OPEN(xbdi->xbdi_vp, FREAD, NOCRED);
771 if (err) { 768 if (err) {
772 printf("xbdback %s: can't VOP_OPEN device 0x%"PRIx64": %d\n", 769 printf("xbdback %s: can't VOP_OPEN device 0x%"PRIx64": %d\n",
773 xbusd->xbusd_path, xbdi->xbdi_dev, err); 770 xbusd->xbusd_path, xbdi->xbdi_dev, err);
774 vput(xbdi->xbdi_vp); 771 vput(xbdi->xbdi_vp);
775 return; 772 return;
776 } 773 }
777 VOP_UNLOCK(xbdi->xbdi_vp); 774 VOP_UNLOCK(xbdi->xbdi_vp);
778 775
779 /* dk device; get wedge data */ 776 /* dk device; get wedge data */
780 struct dkwedge_info wi; 777 struct dkwedge_info wi;
781 if ((err = getdiskinfo(xbdi->xbdi_vp, &wi)) == 0) { 778 if ((err = getdiskinfo(xbdi->xbdi_vp, &wi)) == 0) {
782 xbdi->xbdi_size = wi.dkw_size; 779 xbdi->xbdi_size = wi.dkw_size;
783 printf("xbd backend: attach device %s (size %" PRIu64 ") " 780 printf("xbd backend: attach device %s (size %" PRIu64 ") "
784 "for domain %d\n", wi.dkw_devname, xbdi->xbdi_size, 781 "for domain %d\n", wi.dkw_devname, xbdi->xbdi_size,
785 xbdi->xbdi_domid); 782 xbdi->xbdi_domid);
786 } else { 783 } else {
787 /* If both Ioctls failed set device size to 0 and return */ 784 /* If both Ioctls failed set device size to 0 and return */
788 printf("xbdback %s: can't DIOCGWEDGEINFO device " 785 printf("xbdback %s: can't DIOCGWEDGEINFO device "
789 "0x%"PRIx64": %d\n", xbusd->xbusd_path, 786 "0x%"PRIx64": %d\n", xbusd->xbusd_path,
790 xbdi->xbdi_dev, err);  787 xbdi->xbdi_dev, err);
791 xbdi->xbdi_size = xbdi->xbdi_dev = 0; 788 xbdi->xbdi_size = xbdi->xbdi_dev = 0;
792 vn_close(xbdi->xbdi_vp, FREAD, NOCRED); 789 vn_close(xbdi->xbdi_vp, FREAD, NOCRED);
793 xbdi->xbdi_vp = NULL; 790 xbdi->xbdi_vp = NULL;
794 return; 791 return;
795 } 792 }
796again: 793again:
797 xbt = xenbus_transaction_start(); 794 xbt = xenbus_transaction_start();
798 if (xbt == NULL) { 795 if (xbt == NULL) {
799 printf("xbdback %s: can't start transaction\n", 796 printf("xbdback %s: can't start transaction\n",
800 xbusd->xbusd_path); 797 xbusd->xbusd_path);
801 return; 798 return;
802 } 799 }
803 err = xenbus_printf(xbt, xbusd->xbusd_path, "sectors", "%" PRIu64 , 800 err = xenbus_printf(xbt, xbusd->xbusd_path, "sectors", "%" PRIu64 ,
804 xbdi->xbdi_size); 801 xbdi->xbdi_size);
805 if (err) { 802 if (err) {
806 printf("xbdback: failed to write %s/sectors: %d\n", 803 printf("xbdback: failed to write %s/sectors: %d\n",
807 xbusd->xbusd_path, err); 804 xbusd->xbusd_path, err);
808 goto abort; 805 goto abort;
809 } 806 }
810 err = xenbus_printf(xbt, xbusd->xbusd_path, "info", "%u", 807 err = xenbus_printf(xbt, xbusd->xbusd_path, "info", "%u",
811 xbdi->xbdi_ro ? VDISK_READONLY : 0); 808 xbdi->xbdi_ro ? VDISK_READONLY : 0);
812 if (err) { 809 if (err) {
813 printf("xbdback: failed to write %s/info: %d\n", 810 printf("xbdback: failed to write %s/info: %d\n",
814 xbusd->xbusd_path, err); 811 xbusd->xbusd_path, err);
815 goto abort; 812 goto abort;
816 } 813 }
817 err = xenbus_printf(xbt, xbusd->xbusd_path, "sector-size", "%lu", 814 err = xenbus_printf(xbt, xbusd->xbusd_path, "sector-size", "%lu",
818 (u_long)DEV_BSIZE); 815 (u_long)DEV_BSIZE);
819 if (err) { 816 if (err) {
820 printf("xbdback: failed to write %s/sector-size: %d\n", 817 printf("xbdback: failed to write %s/sector-size: %d\n",
821 xbusd->xbusd_path, err); 818 xbusd->xbusd_path, err);
822 goto abort; 819 goto abort;
823 } 820 }
824 err = xenbus_printf(xbt, xbusd->xbusd_path, "feature-flush-cache", 821 err = xenbus_printf(xbt, xbusd->xbusd_path, "feature-flush-cache",
825 "%u", 1); 822 "%u", 1);
826 if (err) { 823 if (err) {
827 printf("xbdback: failed to write %s/feature-flush-cache: %d\n", 824 printf("xbdback: failed to write %s/feature-flush-cache: %d\n",
828 xbusd->xbusd_path, err); 825 xbusd->xbusd_path, err);
829 goto abort; 826 goto abort;
830 } 827 }
831 err = xenbus_printf(xbt, xbusd->xbusd_path, 828 err = xenbus_printf(xbt, xbusd->xbusd_path,
832 "feature-max-indirect-segments", "%u", VBD_MAX_INDIRECT_SEGMENTS); 829 "feature-max-indirect-segments", "%u", VBD_MAX_INDIRECT_SEGMENTS);
833 if (err) { 830 if (err) {
834 printf("xbdback: failed to write %s/feature-indirect: %d\n", 831 printf("xbdback: failed to write %s/feature-indirect: %d\n",
835 xbusd->xbusd_path, err); 832 xbusd->xbusd_path, err);
836 goto abort; 833 goto abort;
837 } 834 }
838 err = xenbus_transaction_end(xbt, 0); 835 err = xenbus_transaction_end(xbt, 0);
839 if (err == EAGAIN) 836 if (err == EAGAIN)
840 goto again; 837 goto again;
841 if (err) { 838 if (err) {
842 printf("xbdback %s: can't end transaction: %d\n", 839 printf("xbdback %s: can't end transaction: %d\n",
843 xbusd->xbusd_path, err); 840 xbusd->xbusd_path, err);
844 } 841 }
845 err = xenbus_switch_state(xbusd, NULL, XenbusStateConnected); 842 err = xenbus_switch_state(xbusd, NULL, XenbusStateConnected);
846 if (err) { 843 if (err) {
847 printf("xbdback %s: can't switch state: %d\n", 844 printf("xbdback %s: can't switch state: %d\n",
848 xbusd->xbusd_path, err); 845 xbusd->xbusd_path, err);
849 } 846 }
850 return; 847 return;
851abort: 848abort:
852 xenbus_transaction_end(xbt, 1); 849 xenbus_transaction_end(xbt, 1);
853} 850}
854 851
855/* 852/*
856 * Used by a xbdi thread to signal that it is now disconnected. 853 * Used by a xbdi thread to signal that it is now disconnected.
857 */ 854 */
858static void 855static void
859xbdback_finish_disconnect(struct xbdback_instance *xbdi) 856xbdback_finish_disconnect(struct xbdback_instance *xbdi)
860{ 857{
861 KASSERT(mutex_owned(&xbdi->xbdi_lock)); 858 KASSERT(mutex_owned(&xbdi->xbdi_lock));
862 KASSERT(xbdi->xbdi_status == DISCONNECTING); 859 KASSERT(xbdi->xbdi_status == DISCONNECTING);
863 860
864 xbdi->xbdi_status = DISCONNECTED; 861 xbdi->xbdi_status = DISCONNECTED;
865 862
866 cv_signal(&xbdi->xbdi_cv); 863 cv_signal(&xbdi->xbdi_cv);
867} 864}
868 865
869static bool 866static bool
870xbdif_lookup(domid_t dom , uint32_t handle) 867xbdif_lookup(domid_t dom , uint32_t handle)
871{ 868{
872 struct xbdback_instance *xbdi; 869 struct xbdback_instance *xbdi;
873 bool found = false; 870 bool found = false;
874 871
875 mutex_enter(&xbdback_lock); 872 mutex_enter(&xbdback_lock);
876 SLIST_FOREACH(xbdi, &xbdback_instances, next) { 873 SLIST_FOREACH(xbdi, &xbdback_instances, next) {
877 if (xbdi->xbdi_domid == dom && xbdi->xbdi_handle == handle) { 874 if (xbdi->xbdi_domid == dom && xbdi->xbdi_handle == handle) {
878 found = true; 875 found = true;
879 break; 876 break;
880 } 877 }
881 } 878 }
882 mutex_exit(&xbdback_lock); 879 mutex_exit(&xbdback_lock);
883 880
884 return found; 881 return found;
885} 882}
886 883
887static int 884static int
888xbdback_evthandler(void *arg) 885xbdback_evthandler(void *arg)
889{ 886{
890 struct xbdback_instance *xbdi = arg; 887 struct xbdback_instance *xbdi = arg;
891 888
892 XENPRINTF(("xbdback_evthandler domain %d: cont %p\n", 889 XENPRINTF(("xbdback_evthandler domain %d: cont %p\n",
893 xbdi->xbdi_domid, xbdi->xbdi_cont)); 890 xbdi->xbdi_domid, xbdi->xbdi_cont));
894 891
895 xbdback_wakeup_thread(xbdi); 892 xbdback_wakeup_thread(xbdi);
896 893
897 return 1; 894 return 1;
898} 895}
899 896
900/* 897/*
901 * Main thread routine for one xbdback instance. Woken up by 898 * Main thread routine for one xbdback instance. Woken up by
902 * xbdback_evthandler when a domain has I/O work scheduled in a I/O ring. 899 * xbdback_evthandler when a domain has I/O work scheduled in a I/O ring.
903 */ 900 */
904static void 901static void
905xbdback_thread(void *arg) 902xbdback_thread(void *arg)
906{ 903{
907 struct xbdback_instance *xbdi = arg; 904 struct xbdback_instance *xbdi = arg;
908 905
909 for (;;) { 906 for (;;) {
910 mutex_enter(&xbdi->xbdi_lock); 907 mutex_enter(&xbdi->xbdi_lock);
911 switch (xbdi->xbdi_status) { 908 switch (xbdi->xbdi_status) {
912 case WAITING: 909 case WAITING:
913 cv_wait(&xbdi->xbdi_cv, &xbdi->xbdi_lock); 910 cv_wait(&xbdi->xbdi_cv, &xbdi->xbdi_lock);
914 mutex_exit(&xbdi->xbdi_lock); 911 mutex_exit(&xbdi->xbdi_lock);
915 break; 912 break;
916 case RUN: 913 case RUN:
917 xbdi->xbdi_status = WAITING; /* reset state */ 914 xbdi->xbdi_status = WAITING; /* reset state */
918 mutex_exit(&xbdi->xbdi_lock); 915 mutex_exit(&xbdi->xbdi_lock);
919 916
920 if (xbdi->xbdi_cont == NULL) { 917 if (xbdi->xbdi_cont == NULL) {
921 xbdi->xbdi_cont = xbdback_co_main; 918 xbdi->xbdi_cont = xbdback_co_main;
922 } 919 }
923 920
924 xbdback_trampoline(xbdi, xbdi); 921 xbdback_trampoline(xbdi, xbdi);
925 break; 922 break;
926 case DISCONNECTING: 923 case DISCONNECTING:
927 if (xbdi->xbdi_pendingreqs > 0) { 924 if (xbdi->xbdi_pendingreqs > 0) {
928 /* there are pending I/Os. Wait for them. */ 925 /* there are pending I/Os. Wait for them. */
929 cv_wait(&xbdi->xbdi_cv, &xbdi->xbdi_lock); 926 cv_wait(&xbdi->xbdi_cv, &xbdi->xbdi_lock);
930 mutex_exit(&xbdi->xbdi_lock); 927 mutex_exit(&xbdi->xbdi_lock);
931 break; 928 break;
932 } 929 }
933  930
934 /* All I/Os should have been processed by now, 931 /* All I/Os should have been processed by now,
935 * xbdi_refcnt should drop to 0 */ 932 * xbdi_refcnt should drop to 0 */
936 xbdi_put(xbdi); 933 xbdi_put(xbdi);
937 KASSERT(xbdi->xbdi_refcnt == 0); 934 KASSERT(xbdi->xbdi_refcnt == 0);
938 mutex_exit(&xbdi->xbdi_lock); 935 mutex_exit(&xbdi->xbdi_lock);
939 kthread_exit(0); 936 kthread_exit(0);
940 break; 937 break;
941 default: 938 default:
942 panic("%s: invalid state %d", 939 panic("%s: invalid state %d",
943 xbdi->xbdi_name, xbdi->xbdi_status); 940 xbdi->xbdi_name, xbdi->xbdi_status);
944 } 941 }
945 } 942 }
946} 943}
947 944
948static void * 945static void *
949xbdback_co_main(struct xbdback_instance *xbdi, void *obj) 946xbdback_co_main(struct xbdback_instance *xbdi, void *obj)
950{ 947{
951 (void)obj; 948 (void)obj;
952 949
953 xbdi->xbdi_req_prod = xbdi->xbdi_ring.ring_n.sring->req_prod; 950 xbdi->xbdi_req_prod = xbdi->xbdi_ring.ring_n.sring->req_prod;
954 xen_rmb(); /* ensure we see all requests up to req_prod */ 951 xen_rmb(); /* ensure we see all requests up to req_prod */
955 /* 952 /*
956 * note that we'll eventually get a full ring of request. 953 * note that we'll eventually get a full ring of request.
957 * in this case, MASK_BLKIF_IDX(req_cons) == MASK_BLKIF_IDX(req_prod) 954 * in this case, MASK_BLKIF_IDX(req_cons) == MASK_BLKIF_IDX(req_prod)
958 */ 955 */
959 xbdi->xbdi_cont = xbdback_co_main_loop; 956 xbdi->xbdi_cont = xbdback_co_main_loop;
960 return xbdi; 957 return xbdi;
961} 958}
962 959
963/* 960/*
964 * Fetch a blkif request from the ring, and pass control to the appropriate 961 * Fetch a blkif request from the ring, and pass control to the appropriate
965 * continuation. 962 * continuation.
966 * If someone asked for disconnection, do not fetch any more request from 963 * If someone asked for disconnection, do not fetch any more request from
967 * the ring. 964 * the ring.
968 */ 965 */
969static void * 966static void *
970xbdback_co_main_loop(struct xbdback_instance *xbdi, void *obj __unused)  967xbdback_co_main_loop(struct xbdback_instance *xbdi, void *obj __unused)
971{ 968{
972 blkif_request_t *req, *reqn; 969 blkif_request_t *req, *reqn;
973 blkif_x86_32_request_t *req32; 970 blkif_x86_32_request_t *req32;
974 blkif_x86_64_request_t *req64; 971 blkif_x86_64_request_t *req64;
975 blkif_request_indirect_t *rin; 972 blkif_request_indirect_t *rin;
976 973
977 if (xbdi->xbdi_ring.ring_n.req_cons != xbdi->xbdi_req_prod) { 974 if (xbdi->xbdi_ring.ring_n.req_cons != xbdi->xbdi_req_prod) {
978 req = &xbdi->xbdi_xen_req; 975 req = &xbdi->xbdi_xen_req;
979 memset(req, 0, sizeof(*req)); 976 memset(req, 0, sizeof(*req));
980 977
981 switch(xbdi->xbdi_proto) { 978 switch(xbdi->xbdi_proto) {
982 case XBDIP_NATIVE: 979 case XBDIP_NATIVE:
983 reqn = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_n, 980 reqn = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_n,
984 xbdi->xbdi_ring.ring_n.req_cons); 981 xbdi->xbdi_ring.ring_n.req_cons);
985 req->operation = reqn->operation; 982 req->operation = reqn->operation;
986 req->id = reqn->id; 983 req->id = reqn->id;
987 break; 984 break;
988 case XBDIP_32: 985 case XBDIP_32:
989 req32 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_32, 986 req32 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_32,
990 xbdi->xbdi_ring.ring_n.req_cons); 987 xbdi->xbdi_ring.ring_n.req_cons);
991 req->operation = req32->operation; 988 req->operation = req32->operation;
992 req->id = req32->id; 989 req->id = req32->id;
993 break; 990 break;
994 case XBDIP_64: 991 case XBDIP_64:
995 req64 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_64, 992 req64 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_64,
996 xbdi->xbdi_ring.ring_n.req_cons); 993 xbdi->xbdi_ring.ring_n.req_cons);
997 req->operation = req64->operation; 994 req->operation = req64->operation;
998 req->id = req64->id; 995 req->id = req64->id;
999 break; 996 break;
1000 } 997 }
1001 __insn_barrier(); 998 __insn_barrier();
1002 XENPRINTF(("xbdback op %d req_cons 0x%x req_prod 0x%x " 999 XENPRINTF(("xbdback op %d req_cons 0x%x req_prod 0x%x "
1003 "resp_prod 0x%x id %" PRIu64 "\n", req->operation, 1000 "resp_prod 0x%x id %" PRIu64 "\n", req->operation,
1004 xbdi->xbdi_ring.ring_n.req_cons, 1001 xbdi->xbdi_ring.ring_n.req_cons,
1005 xbdi->xbdi_req_prod, 1002 xbdi->xbdi_req_prod,
1006 xbdi->xbdi_ring.ring_n.rsp_prod_pvt, 1003 xbdi->xbdi_ring.ring_n.rsp_prod_pvt,
1007 req->id)); 1004 req->id));
1008 switch (req->operation) { 1005 switch (req->operation) {
1009 case BLKIF_OP_INDIRECT: 1006 case BLKIF_OP_INDIRECT:
1010 /* just check indirect_op, rest is handled later */ 1007 /* just check indirect_op, rest is handled later */
1011 rin = (blkif_request_indirect_t *) 1008 rin = (blkif_request_indirect_t *)
1012 RING_GET_REQUEST(&xbdi->xbdi_ring.ring_n, 1009 RING_GET_REQUEST(&xbdi->xbdi_ring.ring_n,
1013 xbdi->xbdi_ring.ring_n.req_cons); 1010 xbdi->xbdi_ring.ring_n.req_cons);
1014 if (rin->indirect_op != BLKIF_OP_READ && 1011 if (rin->indirect_op != BLKIF_OP_READ &&
1015 rin->indirect_op != BLKIF_OP_WRITE) { 1012 rin->indirect_op != BLKIF_OP_WRITE) {
1016 if (ratecheck(&xbdi->xbdi_lasterr_time, 1013 if (ratecheck(&xbdi->xbdi_lasterr_time,
1017 &xbdback_err_intvl)) { 1014 &xbdback_err_intvl)) {
1018 printf("%s: unknown ind operation %d\n", 1015 printf("%s: unknown ind operation %d\n",
1019 xbdi->xbdi_name, 1016 xbdi->xbdi_name,
1020 rin->indirect_op); 1017 rin->indirect_op);
1021 } 1018 }
1022 goto fail; 1019 goto fail;
1023 } 1020 }
1024 /* FALLTHROUGH */ 1021 /* FALLTHROUGH */
1025 case BLKIF_OP_READ: 1022 case BLKIF_OP_READ:
1026 case BLKIF_OP_WRITE: 1023 case BLKIF_OP_WRITE:
1027 xbdi->xbdi_cont = xbdback_co_io; 1024 xbdi->xbdi_cont = xbdback_co_io;
1028 break; 1025 break;
1029 case BLKIF_OP_FLUSH_DISKCACHE: 1026 case BLKIF_OP_FLUSH_DISKCACHE:
1030 xbdi_get(xbdi); 1027 xbdi_get(xbdi);
1031 xbdi->xbdi_cont = xbdback_co_cache_flush; 1028 xbdi->xbdi_cont = xbdback_co_cache_flush;
1032 break; 1029 break;
1033 default: 1030 default:
1034 if (ratecheck(&xbdi->xbdi_lasterr_time, 1031 if (ratecheck(&xbdi->xbdi_lasterr_time,
1035 &xbdback_err_intvl)) { 1032 &xbdback_err_intvl)) {
1036 printf("%s: unknown operation %d\n", 1033 printf("%s: unknown operation %d\n",
1037 xbdi->xbdi_name, req->operation); 1034 xbdi->xbdi_name, req->operation);
1038 } 1035 }
1039fail: 1036fail:
1040 xbdback_send_reply(xbdi, req->id, req->operation, 1037 xbdback_send_reply(xbdi, req->id, req->operation,
1041 BLKIF_RSP_ERROR); 1038 BLKIF_RSP_ERROR);
1042 xbdi->xbdi_cont = xbdback_co_main_incr; 1039 xbdi->xbdi_cont = xbdback_co_main_incr;
1043 break; 1040 break;
1044 } 1041 }
1045 } else { 1042 } else {
1046 KASSERT(xbdi->xbdi_io == NULL); 
1047 xbdi->xbdi_cont = xbdback_co_main_done2; 1043 xbdi->xbdi_cont = xbdback_co_main_done2;
1048 } 1044 }
1049 return xbdi; 1045 return xbdi;
1050} 1046}
1051 1047
1052/* 1048/*
1053 * Increment consumer index and move on to the next request. In case 1049 * Increment consumer index and move on to the next request. In case
1054 * we want to disconnect, leave continuation now. 1050 * we want to disconnect, leave continuation now.
1055 */ 1051 */
1056static void * 1052static void *
1057xbdback_co_main_incr(struct xbdback_instance *xbdi, void *obj) 1053xbdback_co_main_incr(struct xbdback_instance *xbdi, void *obj)
1058{ 1054{
1059 (void)obj; 1055 (void)obj;
1060 blkif_back_ring_t *ring = &xbdi->xbdi_ring.ring_n; 1056 blkif_back_ring_t *ring = &xbdi->xbdi_ring.ring_n;
1061 1057
1062 ring->req_cons++; 1058 ring->req_cons++;
1063 1059
1064 /* 1060 /*
1065 * Do not bother with locking here when checking for xbdi_status: if 1061 * Do not bother with locking here when checking for xbdi_status: if
1066 * we get a transient state, we will get the right value at 1062 * we get a transient state, we will get the right value at
1067 * the next increment. 1063 * the next increment.
1068 */ 1064 */
1069 if (xbdi->xbdi_status == DISCONNECTING) 1065 if (xbdi->xbdi_status == DISCONNECTING)
1070 xbdi->xbdi_cont = NULL; 1066 xbdi->xbdi_cont = NULL;
1071 else 1067 else
1072 xbdi->xbdi_cont = xbdback_co_main_loop; 1068 xbdi->xbdi_cont = xbdback_co_main_loop;
1073 1069
1074 /* 1070 /*
1075 * Each time the thread processes a full ring of requests, give 1071 * Each time the thread processes a full ring of requests, give
1076 * a chance to other threads to process I/Os too 1072 * a chance to other threads to process I/Os too
1077 */ 1073 */
1078 if ((ring->req_cons % BLKIF_RING_SIZE) == 0) 1074 if ((ring->req_cons % BLKIF_RING_SIZE) == 0)
1079 yield(); 1075 yield();
1080 1076
1081 return xbdi; 1077 return xbdi;
1082} 1078}
1083 1079
1084/* 1080/*
1085 * Check for requests in the instance's ring. In case there are, start again 1081 * Check for requests in the instance's ring. In case there are, start again
1086 * from the beginning. If not, stall. 1082 * from the beginning. If not, stall.
1087 */ 1083 */
1088static void * 1084static void *
1089xbdback_co_main_done2(struct xbdback_instance *xbdi, void *obj) 1085xbdback_co_main_done2(struct xbdback_instance *xbdi, void *obj)
1090{ 1086{
1091 int work_to_do; 1087 int work_to_do;
1092 1088
1093 KASSERT(xbdi->xbdi_io == NULL); 
1094 RING_FINAL_CHECK_FOR_REQUESTS(&xbdi->xbdi_ring.ring_n, work_to_do); 1089 RING_FINAL_CHECK_FOR_REQUESTS(&xbdi->xbdi_ring.ring_n, work_to_do);
1095 if (work_to_do) 1090 if (work_to_do)
1096 xbdi->xbdi_cont = xbdback_co_main; 1091 xbdi->xbdi_cont = xbdback_co_main;
1097 else 1092 else
1098 xbdi->xbdi_cont = NULL; 1093 xbdi->xbdi_cont = NULL;
1099 1094
1100 return xbdi; 1095 return xbdi;
1101} 1096}
1102 1097
1103/* 1098/*
1104 * Frontend requested a cache flush operation. 1099 * Frontend requested a cache flush operation.
1105 */ 1100 */
1106static void * 1101static void *
1107xbdback_co_cache_flush(struct xbdback_instance *xbdi, void *obj __unused) 1102xbdback_co_cache_flush(struct xbdback_instance *xbdi, void *obj __unused)
1108{ 1103{
1109 if (xbdi->xbdi_pendingreqs > 0) { 1104 if (xbdi->xbdi_pendingreqs > 0) {
1110 /* 1105 /*
1111 * There are pending requests. 1106 * There are pending requests.
1112 * Event or iodone() will restart processing 1107 * Event or iodone() will restart processing
1113 */ 1108 */
1114 xbdi->xbdi_cont = NULL; 1109 xbdi->xbdi_cont = NULL;
1115 xbdi_put(xbdi); 1110 xbdi_put(xbdi);
1116 return NULL; 1111 return NULL;
1117 } 1112 }
1118 xbdi->xbdi_cont = xbdback_co_cache_doflush; 1113 xbdi->xbdi_cont = xbdback_co_cache_doflush;
1119 return xbdback_pool_get(&xbdback_io_pool, xbdi); 1114 return xbdback_pool_get(&xbdback_io_pool, xbdi);
1120} 1115}
1121 1116
1122/* Start the flush work */ 1117/* Start the flush work */
1123static void * 1118static void *
1124xbdback_co_cache_doflush(struct xbdback_instance *xbdi, void *obj) 1119xbdback_co_cache_doflush(struct xbdback_instance *xbdi, void *obj)
1125{ 1120{
1126 struct xbdback_io *xbd_io; 1121 struct xbdback_io *xbd_io;
1127 1122
1128 XENPRINTF(("xbdback_co_cache_doflush %p %p\n", xbdi, obj)); 1123 XENPRINTF(("xbdback_co_cache_doflush %p %p\n", xbdi, obj));
1129 xbd_io = xbdi->xbdi_io = obj; 1124 xbd_io = obj;
1130 xbd_io->xio_xbdi = xbdi; 1125 xbd_io->xio_xbdi = xbdi;
1131 xbd_io->xio_operation = xbdi->xbdi_xen_req.operation; 1126 xbd_io->xio_operation = xbdi->xbdi_xen_req.operation;
1132 xbd_io->xio_id = xbdi->xbdi_xen_req.id; 1127 xbd_io->xio_id = xbdi->xbdi_xen_req.id;
1133 xbdi->xbdi_cont = xbdback_co_do_io; 1128 xbdi->xbdi_cont = xbdback_co_do_io;
1134 return xbdi; 1129 return xbd_io;
1135} 1130}
1136 1131
1137/* 1132/*
1138 * A read or write I/O request must be processed. Do some checks first, 1133 * A read or write I/O request must be processed. Do some checks first,
1139 * then get the segment information directly from the ring request. 1134 * then get the segment information directly from the ring request.
1140 */ 1135 */
1141static void * 1136static void *
1142xbdback_co_io(struct xbdback_instance *xbdi, void *obj __unused) 1137xbdback_co_io(struct xbdback_instance *xbdi, void *obj __unused)
1143{  1138{
1144 int i, error; 1139 int i, error;
1145 blkif_request_t *req, *reqn; 1140 blkif_request_t *req, *reqn;
1146 blkif_x86_32_request_t *req32; 1141 blkif_x86_32_request_t *req32;
1147 blkif_x86_64_request_t *req64; 1142 blkif_x86_64_request_t *req64;
1148 blkif_request_indirect_t *rinn; 1143 blkif_request_indirect_t *rinn;
1149 blkif_x86_32_request_indirect_t *rin32; 1144 blkif_x86_32_request_indirect_t *rin32;
1150 blkif_x86_64_request_indirect_t *rin64; 1145 blkif_x86_64_request_indirect_t *rin64;
1151 1146
1152 req = &xbdi->xbdi_xen_req; 1147 req = &xbdi->xbdi_xen_req;
1153 1148
1154 /* some sanity checks */ 1149 /* some sanity checks */
1155 KASSERT(req->operation == BLKIF_OP_READ || 1150 KASSERT(req->operation == BLKIF_OP_READ ||
1156 req->operation == BLKIF_OP_WRITE || 1151 req->operation == BLKIF_OP_WRITE ||
1157 req->operation == BLKIF_OP_INDIRECT); 1152 req->operation == BLKIF_OP_INDIRECT);
1158 if (req->operation == BLKIF_OP_WRITE) { 1153 if (req->operation == BLKIF_OP_WRITE) {
1159 if (xbdi->xbdi_ro) { 1154 if (xbdi->xbdi_ro) {
1160 error = EROFS; 1155 error = EROFS;
1161 goto end; 1156 goto end;
1162 } 1157 }
1163 } 1158 }
1164 1159
1165 /* copy request segments */ 1160 /* copy request segments */
1166 switch (xbdi->xbdi_proto) { 1161 switch (xbdi->xbdi_proto) {
1167 case XBDIP_NATIVE: 1162 case XBDIP_NATIVE:
1168 reqn = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_n, 1163 reqn = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_n,
1169 xbdi->xbdi_ring.ring_n.req_cons); 1164 xbdi->xbdi_ring.ring_n.req_cons);
1170 req->handle = reqn->handle; 1165 req->handle = reqn->handle;
1171 req->sector_number = reqn->sector_number; 1166 req->sector_number = reqn->sector_number;
1172 if (reqn->operation == BLKIF_OP_INDIRECT) { 1167 if (reqn->operation == BLKIF_OP_INDIRECT) {
1173 rinn = (blkif_request_indirect_t *)reqn; 1168 rinn = (blkif_request_indirect_t *)reqn;
1174 req->operation = rinn->indirect_op; 1169 req->operation = rinn->indirect_op;
1175 req->nr_segments = (uint8_t)rinn->nr_segments; 1170 req->nr_segments = (uint8_t)rinn->nr_segments;
1176 if (req->nr_segments > VBD_MAX_INDIRECT_SEGMENTS) 1171 if (req->nr_segments > VBD_MAX_INDIRECT_SEGMENTS)
1177 goto bad_nr_segments; 1172 goto bad_nr_segments;
1178 xbdi->xbdi_in_gntref = rinn->indirect_grefs[0]; 1173 xbdi->xbdi_in_gntref = rinn->indirect_grefs[0];
1179 /* first_sect and segment grefs fetched later */ 1174 /* first_sect and segment grefs fetched later */
1180 } else { 1175 } else {
1181 req->nr_segments = reqn->nr_segments; 1176 req->nr_segments = reqn->nr_segments;
1182 if (req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST) 1177 if (req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST)
1183 goto bad_nr_segments; 1178 goto bad_nr_segments;
1184 for (i = 0; i < req->nr_segments; i++) 1179 for (i = 0; i < req->nr_segments; i++)
1185 xbdi->xbdi_seg[i] = reqn->seg[i]; 1180 xbdi->xbdi_seg[i] = reqn->seg[i];
1186 xbdi->xbdi_in_gntref = 0; 1181 xbdi->xbdi_in_gntref = 0;
1187 } 1182 }
1188 break; 1183 break;
1189 case XBDIP_32: 1184 case XBDIP_32:
1190 req32 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_32, 1185 req32 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_32,
1191 xbdi->xbdi_ring.ring_n.req_cons); 1186 xbdi->xbdi_ring.ring_n.req_cons);
1192 req->handle = req32->handle; 1187 req->handle = req32->handle;
1193 req->sector_number = req32->sector_number; 1188 req->sector_number = req32->sector_number;
1194 if (req32->operation == BLKIF_OP_INDIRECT) { 1189 if (req32->operation == BLKIF_OP_INDIRECT) {
1195 rin32 = (blkif_x86_32_request_indirect_t *)req32; 1190 rin32 = (blkif_x86_32_request_indirect_t *)req32;
1196 req->operation = rin32->indirect_op; 1191 req->operation = rin32->indirect_op;
1197 req->nr_segments = (uint8_t)rin32->nr_segments; 1192 req->nr_segments = (uint8_t)rin32->nr_segments;
1198 if (req->nr_segments > VBD_MAX_INDIRECT_SEGMENTS) 1193 if (req->nr_segments > VBD_MAX_INDIRECT_SEGMENTS)
1199 goto bad_nr_segments; 1194 goto bad_nr_segments;
1200 xbdi->xbdi_in_gntref = rin32->indirect_grefs[0]; 1195 xbdi->xbdi_in_gntref = rin32->indirect_grefs[0];
1201 /* first_sect and segment grefs fetched later */ 1196 /* first_sect and segment grefs fetched later */
1202 } else { 1197 } else {
1203 req->nr_segments = req32->nr_segments; 1198 req->nr_segments = req32->nr_segments;
1204 if (req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST) 1199 if (req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST)
1205 goto bad_nr_segments; 1200 goto bad_nr_segments;
1206 for (i = 0; i < req->nr_segments; i++) 1201 for (i = 0; i < req->nr_segments; i++)
1207 xbdi->xbdi_seg[i] = req32->seg[i]; 1202 xbdi->xbdi_seg[i] = req32->seg[i];
1208 xbdi->xbdi_in_gntref = 0; 1203 xbdi->xbdi_in_gntref = 0;
1209 } 1204 }
1210 break; 1205 break;
1211 case XBDIP_64: 1206 case XBDIP_64:
1212 req64 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_64, 1207 req64 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_64,
1213 xbdi->xbdi_ring.ring_n.req_cons); 1208 xbdi->xbdi_ring.ring_n.req_cons);
1214 req->handle = req64->handle; 1209 req->handle = req64->handle;
1215 req->sector_number = req64->sector_number; 1210 req->sector_number = req64->sector_number;
1216 if (req64->operation == BLKIF_OP_INDIRECT) { 1211 if (req64->operation == BLKIF_OP_INDIRECT) {
1217 rin64 = (blkif_x86_64_request_indirect_t *)req64; 1212 rin64 = (blkif_x86_64_request_indirect_t *)req64;
1218 req->nr_segments = (uint8_t)rin64->nr_segments; 1213 req->nr_segments = (uint8_t)rin64->nr_segments;
1219 if (req->nr_segments > VBD_MAX_INDIRECT_SEGMENTS) 1214 if (req->nr_segments > VBD_MAX_INDIRECT_SEGMENTS)
1220 goto bad_nr_segments; 1215 goto bad_nr_segments;
1221 xbdi->xbdi_in_gntref = rin64->indirect_grefs[0]; 1216 xbdi->xbdi_in_gntref = rin64->indirect_grefs[0];
1222 /* first_sect and segment grefs fetched later */ 1217 /* first_sect and segment grefs fetched later */
1223 } else { 1218 } else {
1224 req->nr_segments = req64->nr_segments; 1219 req->nr_segments = req64->nr_segments;
1225 if (req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST) 1220 if (req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST)
1226 goto bad_nr_segments; 1221 goto bad_nr_segments;
1227 for (i = 0; i < req->nr_segments; i++) 1222 for (i = 0; i < req->nr_segments; i++)
1228 xbdi->xbdi_seg[i] = req64->seg[i]; 1223 xbdi->xbdi_seg[i] = req64->seg[i];
1229 xbdi->xbdi_in_gntref = 0; 1224 xbdi->xbdi_in_gntref = 0;
1230 } 1225 }
1231 break; 1226 break;
1232 } 1227 }
1233 1228
1234 /* Max value checked already earlier */ 1229 /* Max value checked already earlier */
1235 if (req->nr_segments < 1) 1230 if (req->nr_segments < 1)
1236 goto bad_nr_segments; 1231 goto bad_nr_segments;
1237 1232
1238 KASSERT(xbdi->xbdi_io == NULL); 
1239 xbdi->xbdi_cont = xbdback_co_io_gotio; 1233 xbdi->xbdi_cont = xbdback_co_io_gotio;
1240 return xbdback_pool_get(&xbdback_io_pool, xbdi); 1234 return xbdback_pool_get(&xbdback_io_pool, xbdi);
1241 1235
1242 bad_nr_segments: 1236 bad_nr_segments:
1243 if (ratecheck(&xbdi->xbdi_lasterr_time, &xbdback_err_intvl)) { 1237 if (ratecheck(&xbdi->xbdi_lasterr_time, &xbdback_err_intvl)) {
1244 printf("%s: invalid number of segments: %d\n", 1238 printf("%s: invalid number of segments: %d\n",
1245 xbdi->xbdi_name, req->nr_segments); 1239 xbdi->xbdi_name, req->nr_segments);
1246 } 1240 }
1247 error = EINVAL; 1241 error = EINVAL;
1248 /* FALLTHROUGH */ 1242 /* FALLTHROUGH */
1249 1243
1250 end: 1244 end:
1251 xbdback_send_reply(xbdi, xbdi->xbdi_xen_req.id, 1245 xbdback_send_reply(xbdi, xbdi->xbdi_xen_req.id,
1252 xbdi->xbdi_xen_req.operation, 1246 xbdi->xbdi_xen_req.operation,
1253 (error == EROFS) ? BLKIF_RSP_EOPNOTSUPP : BLKIF_RSP_ERROR); 1247 (error == EROFS) ? BLKIF_RSP_EOPNOTSUPP : BLKIF_RSP_ERROR);
1254 xbdi->xbdi_cont = xbdback_co_main_incr; 1248 xbdi->xbdi_cont = xbdback_co_main_incr;
1255 return xbdi; 1249 return xbdi;
1256} 1250}
1257 1251
1258/* Prepare an I/O buffer for a xbdback instance */ 1252/* Prepare an I/O buffer for a xbdback instance */
1259static void * 1253static void *
1260xbdback_co_io_gotio(struct xbdback_instance *xbdi, void *obj) 1254xbdback_co_io_gotio(struct xbdback_instance *xbdi, void *obj)
1261{ 1255{
1262 struct xbdback_io *xbd_io; 1256 struct xbdback_io *xbd_io;
1263 int buf_flags; 1257 int buf_flags;
1264 size_t bcount; 1258 size_t bcount;
1265 blkif_request_t *req; 1259 blkif_request_t *req;
1266 1260
1267 xbdi_get(xbdi); 1261 xbdi_get(xbdi);
1268 atomic_inc_uint(&xbdi->xbdi_pendingreqs); 1262 atomic_inc_uint(&xbdi->xbdi_pendingreqs);
1269  1263
1270 req = &xbdi->xbdi_xen_req; 1264 req = &xbdi->xbdi_xen_req;
1271 xbd_io = xbdi->xbdi_io = obj; 1265 xbd_io = obj;
1272 memset(xbd_io, 0, sizeof(*xbd_io)); 1266 memset(xbd_io, 0, sizeof(*xbd_io));
1273 buf_init(&xbd_io->xio_buf); 1267 buf_init(&xbd_io->xio_buf);
1274 xbd_io->xio_xbdi = xbdi; 1268 xbd_io->xio_xbdi = xbdi;
1275 xbd_io->xio_operation = req->operation; 1269 xbd_io->xio_operation = req->operation;
1276 xbd_io->xio_id = req->id; 1270 xbd_io->xio_id = req->id;
1277 1271
1278 /* If segments are on an indirect page, copy them now */ 1272 /* If segments are on an indirect page, copy them now */
1279 if (xbdi->xbdi_in_gntref) { 1273 if (xbdi->xbdi_in_gntref) {
1280 gnttab_copy_t gop; 1274 gnttab_copy_t gop;
1281 paddr_t ma; 1275 paddr_t ma;
1282 1276
1283 gop.flags = GNTCOPY_source_gref; 1277 gop.flags = GNTCOPY_source_gref;
1284 gop.len = req->nr_segments 1278 gop.len = req->nr_segments
1285 * sizeof(struct blkif_request_segment); 1279 * sizeof(struct blkif_request_segment);
1286 1280
1287 gop.source.u.ref = xbdi->xbdi_in_gntref; 1281 gop.source.u.ref = xbdi->xbdi_in_gntref;
1288 gop.source.offset = 0; 1282 gop.source.offset = 0;
1289 gop.source.domid = xbdi->xbdi_domid; 1283 gop.source.domid = xbdi->xbdi_domid;
1290 1284
1291 ma = xbdi->xbdi_seg_dmamap->dm_segs[0].ds_addr; 1285 ma = xbdi->xbdi_seg_dmamap->dm_segs[0].ds_addr;
1292 gop.dest.offset = ma & PAGE_MASK; 1286 gop.dest.offset = ma & PAGE_MASK;
1293 gop.dest.domid = DOMID_SELF; 1287 gop.dest.domid = DOMID_SELF;
1294 gop.dest.u.gmfn = ma >> PAGE_SHIFT; 1288 gop.dest.u.gmfn = ma >> PAGE_SHIFT;
1295 1289
1296 if (HYPERVISOR_grant_table_op(GNTTABOP_copy, &gop, 1) != 0) { 1290 if (HYPERVISOR_grant_table_op(GNTTABOP_copy, &gop, 1) != 0) {
1297 printf("%s: GNTTABOP_copy failed\n", xbdi->xbdi_name); 1291 printf("%s: GNTTABOP_copy failed\n", xbdi->xbdi_name);
1298 xbdback_send_reply(xbdi, xbdi->xbdi_xen_req.id, 1292 xbdback_send_reply(xbdi, xbdi->xbdi_xen_req.id,
1299 xbdi->xbdi_xen_req.operation, 1293 xbdi->xbdi_xen_req.operation,
1300 BLKIF_RSP_ERROR); 1294 BLKIF_RSP_ERROR);
1301 xbdi->xbdi_cont = xbdback_co_main_incr; 1295 xbdi->xbdi_cont = xbdback_co_main_incr;
1302 return NULL; 1296 return NULL;
1303 } 1297 }
1304 } 1298 }
1305 1299
1306 /* Process segments */ 1300 /* Process segments */
1307 bcount = 0; 1301 bcount = 0;
1308 for (int i = 0; i < req->nr_segments; i++) { 1302 for (int i = 0; i < req->nr_segments; i++) {
1309 struct blkif_request_segment *seg = &xbdi->xbdi_seg[i]; 1303 struct blkif_request_segment *seg = &xbdi->xbdi_seg[i];
1310 xbd_io->xio_gref[i] = seg->gref; 1304 xbd_io->xio_gref[i] = seg->gref;
1311 bcount += (seg->last_sect - seg->first_sect + 1) 1305 bcount += (seg->last_sect - seg->first_sect + 1)
1312 * VBD_BSIZE; 1306 * VBD_BSIZE;
1313 } 1307 }
1314 xbd_io->xio_nrma = req->nr_segments; 1308 xbd_io->xio_nrma = req->nr_segments;
1315 xbd_io->xio_start_offset = xbdi->xbdi_seg[0].first_sect * VBD_BSIZE; 1309 xbd_io->xio_start_offset = xbdi->xbdi_seg[0].first_sect * VBD_BSIZE;
1316 1310
1317 KASSERT(bcount <= MAXPHYS); 1311 KASSERT(bcount <= MAXPHYS);
1318 KASSERT(xbd_io->xio_start_offset < PAGE_SIZE); 1312 KASSERT(xbd_io->xio_start_offset < PAGE_SIZE);
1319 KASSERT(bcount + xbd_io->xio_start_offset < VBD_VA_SIZE); 1313 KASSERT(bcount + xbd_io->xio_start_offset < VBD_VA_SIZE);
1320 1314
1321 /* Fill-in the buf */ 1315 /* Fill-in the buf */
1322 if (xbdi->xbdi_xen_req.operation == BLKIF_OP_WRITE) { 1316 if (xbdi->xbdi_xen_req.operation == BLKIF_OP_WRITE) {
1323 buf_flags = B_WRITE; 1317 buf_flags = B_WRITE;
1324 } else { 1318 } else {
1325 buf_flags = B_READ; 1319 buf_flags = B_READ;
1326 } 1320 }
1327 1321
1328 xbd_io->xio_buf.b_flags = buf_flags; 1322 xbd_io->xio_buf.b_flags = buf_flags;
1329 xbd_io->xio_buf.b_cflags = 0; 1323 xbd_io->xio_buf.b_cflags = 0;
1330 xbd_io->xio_buf.b_oflags = 0; 1324 xbd_io->xio_buf.b_oflags = 0;
1331 xbd_io->xio_buf.b_iodone = xbdback_iodone; 1325 xbd_io->xio_buf.b_iodone = xbdback_iodone;
1332 xbd_io->xio_buf.b_proc = NULL; 1326 xbd_io->xio_buf.b_proc = NULL;
1333 xbd_io->xio_buf.b_vp = xbdi->xbdi_vp; 1327 xbd_io->xio_buf.b_vp = xbdi->xbdi_vp;
1334 xbd_io->xio_buf.b_objlock = xbdi->xbdi_vp->v_interlock; 1328 xbd_io->xio_buf.b_objlock = xbdi->xbdi_vp->v_interlock;
1335 xbd_io->xio_buf.b_dev = xbdi->xbdi_dev; 1329 xbd_io->xio_buf.b_dev = xbdi->xbdi_dev;
1336 xbd_io->xio_buf.b_blkno = req->sector_number; 1330 xbd_io->xio_buf.b_blkno = req->sector_number;
1337 xbd_io->xio_buf.b_bcount = bcount; 1331 xbd_io->xio_buf.b_bcount = bcount;
1338 xbd_io->xio_buf.b_data = NULL; 1332 xbd_io->xio_buf.b_data = NULL;
1339 xbd_io->xio_buf.b_private = xbd_io; 1333 xbd_io->xio_buf.b_private = xbd_io;
1340 1334
1341 xbdi->xbdi_cont = xbdback_co_do_io; 1335 xbdi->xbdi_cont = xbdback_co_do_io;
1342 return xbdback_map_shm(xbdi->xbdi_io); 1336 return xbdback_map_shm(xbd_io);
1343} 1337}
1344 1338
1345static void 1339static void
1346xbdback_io_error(struct xbdback_io *xbd_io, int error) 1340xbdback_io_error(struct xbdback_io *xbd_io, int error)
1347{ 1341{
1348 xbd_io->xio_buf.b_error = error; 1342 xbd_io->xio_buf.b_error = error;
1349 xbdback_iodone(&xbd_io->xio_buf); 1343 xbdback_iodone(&xbd_io->xio_buf);
1350} 1344}
1351 1345
1352/* 1346/*
1353 * Main xbdback I/O routine. It can either perform a flush operation or 1347 * Main xbdback I/O routine. It can either perform a flush operation or
1354 * schedule a read/write operation. 1348 * schedule a read/write operation.
1355 */ 1349 */
1356static void * 1350static void *
1357xbdback_co_do_io(struct xbdback_instance *xbdi, void *obj) 1351xbdback_co_do_io(struct xbdback_instance *xbdi, void *obj)
1358{ 1352{
1359 struct xbdback_io *xbd_io = xbdi->xbdi_io; 1353 struct xbdback_io *xbd_io = obj;
1360 1354
1361 switch (xbd_io->xio_operation) { 1355 switch (xbd_io->xio_operation) {
1362 case BLKIF_OP_FLUSH_DISKCACHE: 1356 case BLKIF_OP_FLUSH_DISKCACHE:
1363 { 1357 {
1364 int error; 1358 int error;
1365 int force = 1; 1359 int force = 1;
1366 1360
1367 error = VOP_IOCTL(xbdi->xbdi_vp, DIOCCACHESYNC, &force, FWRITE, 1361 error = VOP_IOCTL(xbdi->xbdi_vp, DIOCCACHESYNC, &force, FWRITE,
1368 kauth_cred_get()); 1362 kauth_cred_get());
1369 if (error) { 1363 if (error) {
1370 aprint_error("xbdback %s: DIOCCACHESYNC returned %d\n", 1364 aprint_error("xbdback %s: DIOCCACHESYNC returned %d\n",
1371 xbdi->xbdi_xbusd->xbusd_path, error); 1365 xbdi->xbdi_xbusd->xbusd_path, error);
1372 if (error == EOPNOTSUPP || error == ENOTTY) 1366 if (error == EOPNOTSUPP || error == ENOTTY)
1373 error = BLKIF_RSP_EOPNOTSUPP; 1367 error = BLKIF_RSP_EOPNOTSUPP;
1374 else 1368 else
1375 error = BLKIF_RSP_ERROR; 1369 error = BLKIF_RSP_ERROR;
1376 } else 1370 } else
1377 error = BLKIF_RSP_OKAY; 1371 error = BLKIF_RSP_OKAY;
1378 xbdback_send_reply(xbdi, xbd_io->xio_id, 1372 xbdback_send_reply(xbdi, xbd_io->xio_id,
1379 xbd_io->xio_operation, error); 1373 xbd_io->xio_operation, error);
1380 xbdback_pool_put(&xbdback_io_pool, xbd_io); 1374 xbdback_pool_put(&xbdback_io_pool, xbd_io);
1381 xbdi_put(xbdi); 1375 xbdi_put(xbdi);
1382 xbdi->xbdi_io = NULL; 
1383 xbdi->xbdi_cont = xbdback_co_main_incr; 1376 xbdi->xbdi_cont = xbdback_co_main_incr;
1384 return xbdi; 1377 return xbdi;
1385 } 1378 }
1386 case BLKIF_OP_READ: 1379 case BLKIF_OP_READ:
1387 case BLKIF_OP_WRITE: 1380 case BLKIF_OP_WRITE:
1388 xbd_io->xio_buf.b_data = (void *) 1381 xbd_io->xio_buf.b_data = (void *)
1389 (xbd_io->xio_vaddr + xbd_io->xio_start_offset); 1382 (xbd_io->xio_vaddr + xbd_io->xio_start_offset);
1390 1383
1391 if ((xbd_io->xio_buf.b_flags & B_READ) == 0) { 1384 if ((xbd_io->xio_buf.b_flags & B_READ) == 0) {
1392 mutex_enter(xbd_io->xio_buf.b_vp->v_interlock); 1385 mutex_enter(xbd_io->xio_buf.b_vp->v_interlock);
1393 xbd_io->xio_buf.b_vp->v_numoutput++; 1386 xbd_io->xio_buf.b_vp->v_numoutput++;
1394 mutex_exit(xbd_io->xio_buf.b_vp->v_interlock); 1387 mutex_exit(xbd_io->xio_buf.b_vp->v_interlock);
1395 } 1388 }
1396 /* will call xbdback_iodone() asynchronously when done */ 1389 /* will call xbdback_iodone() asynchronously when done */
1397 bdev_strategy(&xbd_io->xio_buf); 1390 bdev_strategy(&xbd_io->xio_buf);
1398 xbdi->xbdi_io = NULL; 
1399 xbdi->xbdi_cont = xbdback_co_main_incr; 1391 xbdi->xbdi_cont = xbdback_co_main_incr;
1400 return xbdi; 1392 return xbdi;
1401 default: 1393 default:
1402 /* Should never happen */ 1394 /* Should never happen */
1403 panic("xbdback_co_do_io: unsupported operation %d", 1395 panic("xbdback_co_do_io: unsupported operation %d",
1404 xbd_io->xio_operation); 1396 xbd_io->xio_operation);
1405 } 1397 }
1406} 1398}
1407 1399
1408/* 1400/*
1409 * Called from softint(9) context when an I/O is done: for each request, send 1401 * Called from softint(9) context when an I/O is done: for each request, send
1410 * back the associated reply to the domain. 1402 * back the associated reply to the domain.
1411 * 1403 *
1412 * This gets reused by xbdback_io_error to report errors from other sources. 1404 * This gets reused by xbdback_io_error to report errors from other sources.
1413 */ 1405 */
1414static void 1406static void
1415xbdback_iodone(struct buf *bp) 1407xbdback_iodone(struct buf *bp)
1416{ 1408{
1417 struct xbdback_io *xbd_io; 1409 struct xbdback_io *xbd_io;
1418 struct xbdback_instance *xbdi; 1410 struct xbdback_instance *xbdi;
1419 int status; 1411 int status;
1420 1412
1421 KERNEL_LOCK(1, NULL); /* XXXSMP */ 1413 KERNEL_LOCK(1, NULL); /* XXXSMP */
1422 1414
1423 xbd_io = bp->b_private; 1415 xbd_io = bp->b_private;
1424 xbdi = xbd_io->xio_xbdi; 1416 xbdi = xbd_io->xio_xbdi;
1425 1417
1426 XENPRINTF(("xbdback_io domain %d: iodone ptr 0x%lx\n", 1418 XENPRINTF(("xbdback_io domain %d: iodone ptr 0x%lx\n",
1427 xbdi->xbdi_domid, (long)xbd_io)); 1419 xbdi->xbdi_domid, (long)xbd_io));
1428 1420
1429 KASSERT(bp->b_error != 0 || xbd_io->xio_xv != NULL); 1421 KASSERT(bp->b_error != 0 || xbd_io->xio_xv != NULL);
1430 if (xbd_io->xio_xv != NULL) 1422 if (xbd_io->xio_xv != NULL)
1431 xbdback_unmap_shm(xbd_io); 1423 xbdback_unmap_shm(xbd_io);
1432 1424
1433 if (bp->b_error != 0) { 1425 if (bp->b_error != 0) {
1434 printf("xbd IO domain %d: error %d\n", 1426 printf("xbd IO domain %d: error %d\n",
1435 xbdi->xbdi_domid, bp->b_error); 1427 xbdi->xbdi_domid, bp->b_error);
1436 status = BLKIF_RSP_ERROR; 1428 status = BLKIF_RSP_ERROR;
1437 } else 1429 } else
1438 status = BLKIF_RSP_OKAY; 1430 status = BLKIF_RSP_OKAY;
1439  1431
1440 xbdback_send_reply(xbdi, xbd_io->xio_id, xbd_io->xio_operation, status); 1432 xbdback_send_reply(xbdi, xbd_io->xio_id, xbd_io->xio_operation, status);
1441 1433
1442 xbdi_put(xbdi); 1434 xbdi_put(xbdi);
1443 atomic_dec_uint(&xbdi->xbdi_pendingreqs); 1435 atomic_dec_uint(&xbdi->xbdi_pendingreqs);
1444 buf_destroy(&xbd_io->xio_buf); 1436 buf_destroy(&xbd_io->xio_buf);
1445 xbdback_pool_put(&xbdback_io_pool, xbd_io); 1437 xbdback_pool_put(&xbdback_io_pool, xbd_io);
1446 1438
1447 xbdback_wakeup_thread(xbdi); 1439 xbdback_wakeup_thread(xbdi);
1448 KERNEL_UNLOCK_ONE(NULL); /* XXXSMP */ 1440 KERNEL_UNLOCK_ONE(NULL); /* XXXSMP */
1449} 1441}
1450 1442
1451/* 1443/*
1452 * Wake up the per xbdback instance thread. 1444 * Wake up the per xbdback instance thread.
1453 */ 1445 */
1454static void 1446static void
1455xbdback_wakeup_thread(struct xbdback_instance *xbdi) 1447xbdback_wakeup_thread(struct xbdback_instance *xbdi)
1456{ 1448{
1457 1449
1458 mutex_enter(&xbdi->xbdi_lock); 1450 mutex_enter(&xbdi->xbdi_lock);
1459 /* only set RUN state when we are WAITING for work */ 1451 /* only set RUN state when we are WAITING for work */
1460 if (xbdi->xbdi_status == WAITING) 1452 if (xbdi->xbdi_status == WAITING)
1461 xbdi->xbdi_status = RUN; 1453 xbdi->xbdi_status = RUN;
1462 cv_broadcast(&xbdi->xbdi_cv); 1454 cv_broadcast(&xbdi->xbdi_cv);
1463 mutex_exit(&xbdi->xbdi_lock); 1455 mutex_exit(&xbdi->xbdi_lock);
1464} 1456}
1465 1457
1466/* 1458/*
1467 * called once a request has completed. Place the reply in the ring and 1459 * called once a request has completed. Place the reply in the ring and
1468 * notify the guest OS. 1460 * notify the guest OS.
1469 */ 1461 */
1470static void 1462static void
1471xbdback_send_reply(struct xbdback_instance *xbdi, uint64_t id, 1463xbdback_send_reply(struct xbdback_instance *xbdi, uint64_t id,
1472 int op, int status) 1464 int op, int status)
1473{ 1465{
1474 blkif_response_t *resp_n; 1466 blkif_response_t *resp_n;
1475 blkif_x86_32_response_t *resp32; 1467 blkif_x86_32_response_t *resp32;
1476 blkif_x86_64_response_t *resp64; 1468 blkif_x86_64_response_t *resp64;
1477 int notify; 1469 int notify;
1478 1470
1479 /* 1471 /*
1480 * The ring can be accessed by the xbdback thread, xbdback_iodone() 1472 * The ring can be accessed by the xbdback thread, xbdback_iodone()
1481 * handler, or any handler that triggered the shm callback. So 1473 * handler, or any handler that triggered the shm callback. So
1482 * protect ring access via the xbdi_lock mutex. 1474 * protect ring access via the xbdi_lock mutex.
1483 */ 1475 */
1484 mutex_enter(&xbdi->xbdi_lock); 1476 mutex_enter(&xbdi->xbdi_lock);
1485 switch (xbdi->xbdi_proto) { 1477 switch (xbdi->xbdi_proto) {
1486 case XBDIP_NATIVE: 1478 case XBDIP_NATIVE:
1487 resp_n = RING_GET_RESPONSE(&xbdi->xbdi_ring.ring_n, 1479 resp_n = RING_GET_RESPONSE(&xbdi->xbdi_ring.ring_n,
1488 xbdi->xbdi_ring.ring_n.rsp_prod_pvt); 1480 xbdi->xbdi_ring.ring_n.rsp_prod_pvt);
1489 resp_n->id = id; 1481 resp_n->id = id;
1490 resp_n->operation = op; 1482 resp_n->operation = op;
1491 resp_n->status = status; 1483 resp_n->status = status;
1492 break; 1484 break;
1493 case XBDIP_32: 1485 case XBDIP_32:
1494 resp32 = RING_GET_RESPONSE(&xbdi->xbdi_ring.ring_32, 1486 resp32 = RING_GET_RESPONSE(&xbdi->xbdi_ring.ring_32,
1495 xbdi->xbdi_ring.ring_n.rsp_prod_pvt); 1487 xbdi->xbdi_ring.ring_n.rsp_prod_pvt);
1496 resp32->id = id; 1488 resp32->id = id;
1497 resp32->operation = op; 1489 resp32->operation = op;
1498 resp32->status = status; 1490 resp32->status = status;
1499 break; 1491 break;
1500 case XBDIP_64: 1492 case XBDIP_64:
1501 resp64 = RING_GET_RESPONSE(&xbdi->xbdi_ring.ring_64, 1493 resp64 = RING_GET_RESPONSE(&xbdi->xbdi_ring.ring_64,
1502 xbdi->xbdi_ring.ring_n.rsp_prod_pvt); 1494 xbdi->xbdi_ring.ring_n.rsp_prod_pvt);
1503 resp64->id = id; 1495 resp64->id = id;
1504 resp64->operation = op; 1496 resp64->operation = op;
1505 resp64->status = status; 1497 resp64->status = status;
1506 break; 1498 break;
1507 } 1499 }
1508 xbdi->xbdi_ring.ring_n.rsp_prod_pvt++; 1500 xbdi->xbdi_ring.ring_n.rsp_prod_pvt++;
1509 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xbdi->xbdi_ring.ring_n, notify); 1501 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xbdi->xbdi_ring.ring_n, notify);
1510 mutex_exit(&xbdi->xbdi_lock); 1502 mutex_exit(&xbdi->xbdi_lock);
1511 1503
1512 if (notify) { 1504 if (notify) {
1513 XENPRINTF(("xbdback_send_reply notify %d\n", xbdi->xbdi_domid)); 1505 XENPRINTF(("xbdback_send_reply notify %d\n", xbdi->xbdi_domid));
1514 hypervisor_notify_via_evtchn(xbdi->xbdi_evtchn); 1506 hypervisor_notify_via_evtchn(xbdi->xbdi_evtchn);
1515 } 1507 }
1516} 1508}
1517 1509
1518/* 1510/*
1519 * Map multiple entries of an I/O request into backend's VA space. 1511 * Map multiple entries of an I/O request into backend's VA space.
1520 * The xbd_io->xio_gref array has to be filled out by the caller. 1512 * The xbd_io->xio_gref array has to be filled out by the caller.
1521 */ 1513 */
1522static void * 1514static void *
1523xbdback_map_shm(struct xbdback_io *xbd_io) 1515xbdback_map_shm(struct xbdback_io *xbd_io)
1524{ 1516{
1525 struct xbdback_instance *xbdi = xbd_io->xio_xbdi; 1517 struct xbdback_instance *xbdi = xbd_io->xio_xbdi;
1526 int error, s; 1518 int error, s;
1527 1519
1528#ifdef XENDEBUG_VBD 1520#ifdef XENDEBUG_VBD
1529 int i; 1521 int i;
1530 printf("xbdback_map_shm map grant "); 1522 printf("xbdback_map_shm map grant ");
1531 for (i = 0; i < xbd_io->xio_nrma; i++) { 1523 for (i = 0; i < xbd_io->xio_nrma; i++) {
1532 printf("%u ", (u_int)xbd_io->xio_gref[i]); 1524 printf("%u ", (u_int)xbd_io->xio_gref[i]);
1533 } 1525 }
1534#endif 1526#endif
1535 1527
1536 s = splvm(); /* XXXSMP */ 1528 s = splvm(); /* XXXSMP */
1537 xbd_io->xio_xv = SLIST_FIRST(&xbdi->xbdi_va_free); 1529 xbd_io->xio_xv = SLIST_FIRST(&xbdi->xbdi_va_free);
1538 KASSERT(xbd_io->xio_xv != NULL); 1530 KASSERT(xbd_io->xio_xv != NULL);
1539 SLIST_REMOVE_HEAD(&xbdi->xbdi_va_free, xv_next); 1531 SLIST_REMOVE_HEAD(&xbdi->xbdi_va_free, xv_next);
1540 xbd_io->xio_vaddr = xbd_io->xio_xv->xv_vaddr; 1532 xbd_io->xio_vaddr = xbd_io->xio_xv->xv_vaddr;
1541 splx(s); 1533 splx(s);
1542 1534
1543 error = xen_shm_map(xbd_io->xio_nrma, xbdi->xbdi_domid, 1535 error = xen_shm_map(xbd_io->xio_nrma, xbdi->xbdi_domid,
1544 xbd_io->xio_gref, xbd_io->xio_vaddr, xbd_io->xio_gh,  1536 xbd_io->xio_gref, xbd_io->xio_vaddr, xbd_io->xio_gh,
1545 (xbd_io->xio_operation == BLKIF_OP_WRITE) ? XSHM_RO : 0); 1537 (xbd_io->xio_operation == BLKIF_OP_WRITE) ? XSHM_RO : 0);
1546 1538
1547 switch(error) { 1539 switch(error) {
1548 case 0: 1540 case 0:
1549#ifdef XENDEBUG_VBD 1541#ifdef XENDEBUG_VBD
1550 printf("handle "); 1542 printf("handle ");
1551 for (i = 0; i < xbd_io->xio_nrma; i++) { 1543 for (i = 0; i < xbd_io->xio_nrma; i++) {
1552 printf("%u ", (u_int)xbd_io->xio_gh[i]); 1544 printf("%u ", (u_int)xbd_io->xio_gh[i]);
1553 } 1545 }
1554 printf("\n"); 1546 printf("\n");
1555#endif 1547#endif
1556 return xbdi; 1548 return xbd_io;
1557 default: 1549 default:
1558 if (ratecheck(&xbdi->xbdi_lasterr_time, &xbdback_err_intvl)) { 1550 if (ratecheck(&xbdi->xbdi_lasterr_time, &xbdback_err_intvl)) {
1559 printf("xbdback_map_shm: xen_shm error %d ", error); 1551 printf("xbdback_map_shm: xen_shm error %d ", error);
1560 } 1552 }
1561 xbdback_io_error(xbdi->xbdi_io, error); 1553 /* this will also free xbd_io via xbdback_iodone() */
 1554 xbdback_io_error(xbd_io, error);
1562 SLIST_INSERT_HEAD(&xbdi->xbdi_va_free, xbd_io->xio_xv, xv_next); 1555 SLIST_INSERT_HEAD(&xbdi->xbdi_va_free, xbd_io->xio_xv, xv_next);
1563 xbd_io->xio_xv = NULL; 1556 xbd_io->xio_xv = NULL;
1564 xbdi->xbdi_io = NULL; 1557 /* do not retry */
1565 // do not retry 
1566 xbdi->xbdi_cont = xbdback_co_main_incr; 1558 xbdi->xbdi_cont = xbdback_co_main_incr;
1567 return xbdi; 1559 return xbdi;
1568 } 1560 }
1569} 1561}
1570 1562
1571/* unmap a request from our virtual address space (request is done) */ 1563/* unmap a request from our virtual address space (request is done) */
1572static void 1564static void
1573xbdback_unmap_shm(struct xbdback_io *xbd_io) 1565xbdback_unmap_shm(struct xbdback_io *xbd_io)
1574{ 1566{
1575 struct xbdback_instance *xbdi = xbd_io->xio_xbdi; 1567 struct xbdback_instance *xbdi = xbd_io->xio_xbdi;
1576 1568
1577#ifdef XENDEBUG_VBD 1569#ifdef XENDEBUG_VBD
1578 int i; 1570 int i;
1579 printf("xbdback_unmap_shm handle "); 1571 printf("xbdback_unmap_shm handle ");
1580 for (i = 0; i < xbd_io->xio_nrma; i++) { 1572 for (i = 0; i < xbd_io->xio_nrma; i++) {
1581 printf("%u ", (u_int)xbd_io->xio_gh[i]); 1573 printf("%u ", (u_int)xbd_io->xio_gh[i]);
1582 } 1574 }
1583 printf("\n"); 1575 printf("\n");
1584#endif 1576#endif
1585 1577
1586 KASSERT(xbd_io->xio_xv != NULL); 1578 KASSERT(xbd_io->xio_xv != NULL);
1587 xen_shm_unmap(xbd_io->xio_vaddr, xbd_io->xio_nrma, 1579 xen_shm_unmap(xbd_io->xio_vaddr, xbd_io->xio_nrma,
1588 xbd_io->xio_gh); 1580 xbd_io->xio_gh);
1589 SLIST_INSERT_HEAD(&xbdi->xbdi_va_free, xbd_io->xio_xv, xv_next); 1581 SLIST_INSERT_HEAD(&xbdi->xbdi_va_free, xbd_io->xio_xv, xv_next);
1590 xbd_io->xio_xv = NULL; 1582 xbd_io->xio_xv = NULL;
1591 xbd_io->xio_vaddr = -1; 1583 xbd_io->xio_vaddr = -1;
1592} 1584}
1593 1585
1594/* Obtain memory from a pool */ 1586/* Obtain memory from a pool */
1595static void * 1587static void *
1596xbdback_pool_get(struct pool_cache *pc, 1588xbdback_pool_get(struct pool_cache *pc,
1597 struct xbdback_instance *xbdi) 1589 struct xbdback_instance *xbdi)
1598{ 1590{
1599 return pool_cache_get(pc, PR_WAITOK); 1591 return pool_cache_get(pc, PR_WAITOK);
1600} 1592}
1601 1593
1602/* Restore memory to a pool */ 1594/* Restore memory to a pool */
1603static void 1595static void
1604xbdback_pool_put(struct pool_cache *pc, void *item) 1596xbdback_pool_put(struct pool_cache *pc, void *item)
1605{ 1597{
1606 pool_cache_put(pc, item); 1598 pool_cache_put(pc, item);
1607} 1599}
1608 1600
1609/* 1601/*
1610 * Trampoline routine. Calls continuations in a loop and only exits when 1602 * Trampoline routine. Calls continuations in a loop and only exits when
1611 * either the returned object or the next callback is NULL. 1603 * either the returned object or the next callback is NULL.
1612 */ 1604 */
1613static void 1605static void
1614xbdback_trampoline(struct xbdback_instance *xbdi, void *obj) 1606xbdback_trampoline(struct xbdback_instance *xbdi, void *obj)
1615{ 1607{
1616 xbdback_cont_t cont; 1608 xbdback_cont_t cont;
1617 1609
1618 while(obj != NULL && xbdi->xbdi_cont != NULL) { 1610 while(obj != NULL && xbdi->xbdi_cont != NULL) {
1619 cont = xbdi->xbdi_cont; 1611 cont = xbdi->xbdi_cont;
1620#ifdef DIAGNOSTIC 1612#ifdef DIAGNOSTIC
1621 xbdi->xbdi_cont = (xbdback_cont_t)0xDEADBEEF; 1613 xbdi->xbdi_cont = (xbdback_cont_t)0xDEADBEEF;
1622#endif 1614#endif
1623 obj = (*cont)(xbdi, obj); 1615 obj = (*cont)(xbdi, obj);
1624#ifdef DIAGNOSTIC 1616#ifdef DIAGNOSTIC
1625 if (xbdi->xbdi_cont == (xbdback_cont_t)0xDEADBEEF) { 1617 if (xbdi->xbdi_cont == (xbdback_cont_t)0xDEADBEEF) {
1626 printf("xbdback_trampoline: 0x%lx didn't set " 1618 printf("xbdback_trampoline: 0x%lx didn't set "
1627 "xbdi->xbdi_cont!\n", (long)cont); 1619 "xbdi->xbdi_cont!\n", (long)cont);
1628 panic("xbdback_trampoline: bad continuation"); 1620 panic("xbdback_trampoline: bad continuation");
1629 } 1621 }
1630#endif 1622#endif
1631 } 1623 }
1632} 1624}