Mon Apr 20 14:11:05 2020 UTC ()
do not retry when xen_shm_map() fails after having returned error for it,
it corrupts the ring; just go to next request

should fix the problem with ZFS causing failscade when req fails,
reported by Brian Buhrow on port-xen


(jdolecek)
diff -r1.81 -r1.82 src/sys/arch/xen/xen/xbdback_xenbus.c

cvs diff -r1.81 -r1.82 src/sys/arch/xen/xen/xbdback_xenbus.c (switch to unified diff)

--- src/sys/arch/xen/xen/xbdback_xenbus.c 2020/04/20 03:00:33 1.81
+++ src/sys/arch/xen/xen/xbdback_xenbus.c 2020/04/20 14:11:04 1.82
@@ -1,1874 +1,1875 @@ @@ -1,1874 +1,1875 @@
1/* $NetBSD: xbdback_xenbus.c,v 1.81 2020/04/20 03:00:33 msaitoh Exp $ */ 1/* $NetBSD: xbdback_xenbus.c,v 1.82 2020/04/20 14:11:04 jdolecek Exp $ */
2 2
3/* 3/*
4 * Copyright (c) 2006 Manuel Bouyer. 4 * Copyright (c) 2006 Manuel Bouyer.
5 * 5 *
6 * Redistribution and use in source and binary forms, with or without 6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions 7 * modification, are permitted provided that the following conditions
8 * are met: 8 * are met:
9 * 1. Redistributions of source code must retain the above copyright 9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer. 10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright 11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the 12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution. 13 * documentation and/or other materials provided with the distribution.
14 * 14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 * 25 *
26 */ 26 */
27 27
28#include <sys/cdefs.h> 28#include <sys/cdefs.h>
29__KERNEL_RCSID(0, "$NetBSD: xbdback_xenbus.c,v 1.81 2020/04/20 03:00:33 msaitoh Exp $"); 29__KERNEL_RCSID(0, "$NetBSD: xbdback_xenbus.c,v 1.82 2020/04/20 14:11:04 jdolecek Exp $");
30 30
31#include <sys/atomic.h> 31#include <sys/atomic.h>
32#include <sys/buf.h> 32#include <sys/buf.h>
33#include <sys/condvar.h> 33#include <sys/condvar.h>
34#include <sys/conf.h> 34#include <sys/conf.h>
35#include <sys/disk.h> 35#include <sys/disk.h>
36#include <sys/device.h> 36#include <sys/device.h>
37#include <sys/fcntl.h> 37#include <sys/fcntl.h>
38#include <sys/kauth.h> 38#include <sys/kauth.h>
39#include <sys/kernel.h> 39#include <sys/kernel.h>
40#include <sys/kmem.h> 40#include <sys/kmem.h>
41#include <sys/kthread.h> 41#include <sys/kthread.h>
42#include <sys/mutex.h> 42#include <sys/mutex.h>
43#include <sys/param.h> 43#include <sys/param.h>
44#include <sys/queue.h> 44#include <sys/queue.h>
45#include <sys/systm.h> 45#include <sys/systm.h>
46#include <sys/time.h> 46#include <sys/time.h>
47#include <sys/types.h> 47#include <sys/types.h>
48#include <sys/vnode.h> 48#include <sys/vnode.h>
49 49
50#include <xen/xen.h> 50#include <xen/xen.h>
51#include <xen/xen_shm.h> 51#include <xen/xen_shm.h>
52#include <xen/evtchn.h> 52#include <xen/evtchn.h>
53#include <xen/xenbus.h> 53#include <xen/xenbus.h>
54#include <xen/xenring.h> 54#include <xen/xenring.h>
55#include <xen/include/public/io/protocols.h> 55#include <xen/include/public/io/protocols.h>
56 56
57/* #define XENDEBUG_VBD */ 57/* #define XENDEBUG_VBD */
58#ifdef XENDEBUG_VBD 58#ifdef XENDEBUG_VBD
59#define XENPRINTF(x) printf x 59#define XENPRINTF(x) printf x
60#else 60#else
61#define XENPRINTF(x) 61#define XENPRINTF(x)
62#endif 62#endif
63 63
64#define BLKIF_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE) 64#define BLKIF_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
65 65
66/* 66/*
67 * Backend block device driver for Xen 67 * Backend block device driver for Xen
68 */ 68 */
69 69
70/* Values are expressed in 512-byte sectors */ 70/* Values are expressed in 512-byte sectors */
71#define VBD_BSIZE 512 71#define VBD_BSIZE 512
72#define VBD_MAXSECT ((PAGE_SIZE / VBD_BSIZE) - 1) 72#define VBD_MAXSECT ((PAGE_SIZE / VBD_BSIZE) - 1)
73 73
74/* Need to alloc one extra page to account for possible mapping offset */ 74/* Need to alloc one extra page to account for possible mapping offset */
75#define VBD_VA_SIZE (MAXPHYS + PAGE_SIZE) 75#define VBD_VA_SIZE (MAXPHYS + PAGE_SIZE)
76 76
77struct xbdback_request; 77struct xbdback_request;
78struct xbdback_io; 78struct xbdback_io;
79struct xbdback_fragment; 79struct xbdback_fragment;
80struct xbdback_instance; 80struct xbdback_instance;
81 81
82/* 82/*
83 * status of a xbdback instance: 83 * status of a xbdback instance:
84 * WAITING: xbdback instance is connected, waiting for requests 84 * WAITING: xbdback instance is connected, waiting for requests
85 * RUN: xbdi thread must be woken up, I/Os have to be processed 85 * RUN: xbdi thread must be woken up, I/Os have to be processed
86 * DISCONNECTING: the instance is closing, no more I/Os can be scheduled 86 * DISCONNECTING: the instance is closing, no more I/Os can be scheduled
87 * DISCONNECTED: no I/Os, no ring, the thread should terminate. 87 * DISCONNECTED: no I/Os, no ring, the thread should terminate.
88 */ 88 */
89typedef enum {WAITING, RUN, DISCONNECTING, DISCONNECTED} xbdback_state_t; 89typedef enum {WAITING, RUN, DISCONNECTING, DISCONNECTED} xbdback_state_t;
90 90
91/* 91/*
92 * Each xbdback instance is managed by a single thread that handles all 92 * Each xbdback instance is managed by a single thread that handles all
93 * the I/O processing. As there are a variety of conditions that can block, 93 * the I/O processing. As there are a variety of conditions that can block,
94 * everything will be done in a sort of continuation-passing style. 94 * everything will be done in a sort of continuation-passing style.
95 * 95 *
96 * When the execution has to block to delay processing, for example to 96 * When the execution has to block to delay processing, for example to
97 * allow system to recover because of memory shortage (via shared memory 97 * allow system to recover because of memory shortage (via shared memory
98 * callback), the return value of a continuation can be set to NULL. In that 98 * callback), the return value of a continuation can be set to NULL. In that
99 * case, the thread will go back to sleeping and wait for the proper 99 * case, the thread will go back to sleeping and wait for the proper
100 * condition before it starts processing requests again from where it left. 100 * condition before it starts processing requests again from where it left.
101 * Continuation state is "stored" in the xbdback instance (xbdi_cont and 101 * Continuation state is "stored" in the xbdback instance (xbdi_cont and
102 * xbdi_cont_aux), and should only be manipulated by the instance thread. 102 * xbdi_cont_aux), and should only be manipulated by the instance thread.
103 * 103 *
104 * As xbdback(4) has to handle different sort of asynchronous events (Xen 104 * As xbdback(4) has to handle different sort of asynchronous events (Xen
105 * event channels, biointr() soft interrupts, xenbus commands), the xbdi_lock 105 * event channels, biointr() soft interrupts, xenbus commands), the xbdi_lock
106 * mutex is used to protect specific elements of the xbdback instance from 106 * mutex is used to protect specific elements of the xbdback instance from
107 * concurrent access: thread status and ring access (when pushing responses). 107 * concurrent access: thread status and ring access (when pushing responses).
108 *  108 *
109 * Here's how the call graph is supposed to be for a single I/O: 109 * Here's how the call graph is supposed to be for a single I/O:
110 * 110 *
111 * xbdback_co_main() 111 * xbdback_co_main()
112 * | 112 * |
113 * | --> xbdback_co_cache_doflush() or NULL 113 * | --> xbdback_co_cache_doflush() or NULL
114 * | | 114 * | |
115 * | - xbdback_co_cache_flush2() <- xbdback_co_do_io() <- 115 * | - xbdback_co_cache_flush2() <- xbdback_co_do_io() <-
116 * | | | 116 * | | |
117 * | |-> xbdback_co_cache_flush() -> xbdback_co_map_io()- 117 * | |-> xbdback_co_cache_flush() -> xbdback_co_map_io()-
118 * xbdback_co_main_loop()-| 118 * xbdback_co_main_loop()-|
119 * | |-> xbdback_co_main_done() ---> xbdback_co_map_io()- 119 * | |-> xbdback_co_main_done() ---> xbdback_co_map_io()-
120 * | | | 120 * | | |
121 * | -- xbdback_co_main_done2() <-- xbdback_co_do_io() <- 121 * | -- xbdback_co_main_done2() <-- xbdback_co_do_io() <-
122 * | | 122 * | |
123 * | --> xbdback_co_main() or NULL 123 * | --> xbdback_co_main() or NULL
124 * | 124 * |
125 * xbdback_co_io() -> xbdback_co_main_incr() -> xbdback_co_main_loop() 125 * xbdback_co_io() -> xbdback_co_main_incr() -> xbdback_co_main_loop()
126 * | 126 * |
127 * xbdback_co_io_gotreq()--+--> xbdback_co_map_io() --- 127 * xbdback_co_io_gotreq()--+--> xbdback_co_map_io() ---
128 * | | | 128 * | | |
129 * -> xbdback_co_io_loop()----| <- xbdback_co_do_io() <-- 129 * -> xbdback_co_io_loop()----| <- xbdback_co_do_io() <--
130 * | | | | 130 * | | | |
131 * | | | |----------> xbdback_co_io_gotio() 131 * | | | |----------> xbdback_co_io_gotio()
132 * | | | | 132 * | | | |
133 * | | xbdback_co_main_incr() | 133 * | | xbdback_co_main_incr() |
134 * | | | | 134 * | | | |
135 * | | xbdback_co_main_loop() | 135 * | | xbdback_co_main_loop() |
136 * | | | 136 * | | |
137 * | xbdback_co_io_gotio2() <-----------| 137 * | xbdback_co_io_gotio2() <-----------|
138 * | | | 138 * | | |
139 * | | |----------> xbdback_co_io_gotfrag() 139 * | | |----------> xbdback_co_io_gotfrag()
140 * | | | 140 * | | |
141 * -- xbdback_co_io_gotfrag2() <---------| 141 * -- xbdback_co_io_gotfrag2() <---------|
142 * | 142 * |
143 * xbdback_co_main_incr() -> xbdback_co_main_loop() 143 * xbdback_co_main_incr() -> xbdback_co_main_loop()
144 */ 144 */
145typedef void *(* xbdback_cont_t)(struct xbdback_instance *, void *); 145typedef void *(* xbdback_cont_t)(struct xbdback_instance *, void *);
146 146
147enum xbdi_proto { 147enum xbdi_proto {
148 XBDIP_NATIVE, 148 XBDIP_NATIVE,
149 XBDIP_32, 149 XBDIP_32,
150 XBDIP_64 150 XBDIP_64
151}; 151};
152 152
153struct xbdback_va { 153struct xbdback_va {
154 SLIST_ENTRY(xbdback_va) xv_next; 154 SLIST_ENTRY(xbdback_va) xv_next;
155 vaddr_t xv_vaddr; 155 vaddr_t xv_vaddr;
156}; 156};
157 157
158/* we keep the xbdback instances in a linked list */ 158/* we keep the xbdback instances in a linked list */
159struct xbdback_instance { 159struct xbdback_instance {
160 SLIST_ENTRY(xbdback_instance) next; 160 SLIST_ENTRY(xbdback_instance) next;
161 struct xenbus_device *xbdi_xbusd; /* our xenstore entry */ 161 struct xenbus_device *xbdi_xbusd; /* our xenstore entry */
162 struct xenbus_watch xbdi_watch; /* to watch our store */ 162 struct xenbus_watch xbdi_watch; /* to watch our store */
163 domid_t xbdi_domid; /* attached to this domain */ 163 domid_t xbdi_domid; /* attached to this domain */
164 uint32_t xbdi_handle; /* domain-specific handle */ 164 uint32_t xbdi_handle; /* domain-specific handle */
165 char xbdi_name[16]; /* name of this instance */ 165 char xbdi_name[16]; /* name of this instance */
166 /* mutex that protects concurrent access to the xbdback instance */ 166 /* mutex that protects concurrent access to the xbdback instance */
167 kmutex_t xbdi_lock; 167 kmutex_t xbdi_lock;
168 kcondvar_t xbdi_cv; /* wait channel for thread work */ 168 kcondvar_t xbdi_cv; /* wait channel for thread work */
169 xbdback_state_t xbdi_status; /* thread's status */ 169 xbdback_state_t xbdi_status; /* thread's status */
170 /* KVA for mapping transfers */ 170 /* KVA for mapping transfers */
171 struct xbdback_va xbdi_va[BLKIF_RING_SIZE]; 171 struct xbdback_va xbdi_va[BLKIF_RING_SIZE];
172 SLIST_HEAD(, xbdback_va) xbdi_va_free; 172 SLIST_HEAD(, xbdback_va) xbdi_va_free;
173 /* backing device parameters */ 173 /* backing device parameters */
174 dev_t xbdi_dev; 174 dev_t xbdi_dev;
175 const struct bdevsw *xbdi_bdevsw; /* pointer to the device's bdevsw */ 175 const struct bdevsw *xbdi_bdevsw; /* pointer to the device's bdevsw */
176 struct vnode *xbdi_vp; 176 struct vnode *xbdi_vp;
177 uint64_t xbdi_size; 177 uint64_t xbdi_size;
178 bool xbdi_ro; /* is device read-only ? */ 178 bool xbdi_ro; /* is device read-only ? */
179 /* parameters for the communication */ 179 /* parameters for the communication */
180 unsigned int xbdi_evtchn; 180 unsigned int xbdi_evtchn;
181 struct intrhand *xbdi_ih; 181 struct intrhand *xbdi_ih;
182 /* private parameters for communication */ 182 /* private parameters for communication */
183 blkif_back_ring_proto_t xbdi_ring; 183 blkif_back_ring_proto_t xbdi_ring;
184 enum xbdi_proto xbdi_proto; 184 enum xbdi_proto xbdi_proto;
185 grant_handle_t xbdi_ring_handle; /* to unmap the ring */ 185 grant_handle_t xbdi_ring_handle; /* to unmap the ring */
186 vaddr_t xbdi_ring_va; /* to unmap the ring */ 186 vaddr_t xbdi_ring_va; /* to unmap the ring */
187 /* disconnection must be postponed until all I/O is done */ 187 /* disconnection must be postponed until all I/O is done */
188 int xbdi_refcnt; 188 int xbdi_refcnt;
189 /*  189 /*
190 * State for I/O processing/coalescing follows; this has to 190 * State for I/O processing/coalescing follows; this has to
191 * live here instead of on the stack because of the 191 * live here instead of on the stack because of the
192 * continuation-ness (see above). 192 * continuation-ness (see above).
193 */ 193 */
194 RING_IDX xbdi_req_prod; /* limit on request indices */ 194 RING_IDX xbdi_req_prod; /* limit on request indices */
195 xbdback_cont_t xbdi_cont, xbdi_cont_aux; 195 xbdback_cont_t xbdi_cont, xbdi_cont_aux;
196 /* _request state: track requests fetched from ring */ 196 /* _request state: track requests fetched from ring */
197 struct xbdback_request *xbdi_req; /* if NULL, ignore following */ 197 struct xbdback_request *xbdi_req; /* if NULL, ignore following */
198 blkif_request_t xbdi_xen_req; 198 blkif_request_t xbdi_xen_req;
199 int xbdi_segno; 199 int xbdi_segno;
200 /* _io state: I/O associated to this instance */ 200 /* _io state: I/O associated to this instance */
201 struct xbdback_io *xbdi_io; /* if NULL, ignore next field */ 201 struct xbdback_io *xbdi_io; /* if NULL, ignore next field */
202 daddr_t xbdi_next_sector; 202 daddr_t xbdi_next_sector;
203 uint8_t xbdi_last_fs, xbdi_this_fs; /* first sectors */ 203 uint8_t xbdi_last_fs, xbdi_this_fs; /* first sectors */
204 uint8_t xbdi_last_ls, xbdi_this_ls; /* last sectors */ 204 uint8_t xbdi_last_ls, xbdi_this_ls; /* last sectors */
205 grant_ref_t xbdi_thisgrt, xbdi_lastgrt; /* grants */ 205 grant_ref_t xbdi_thisgrt, xbdi_lastgrt; /* grants */
206 /* other state */ 206 /* other state */
207 int xbdi_same_page; /* are we merging two segments on the same page? */ 207 int xbdi_same_page; /* are we merging two segments on the same page? */
208 uint xbdi_pendingreqs; /* number of I/O in fly */ 208 uint xbdi_pendingreqs; /* number of I/O in fly */
209 struct timeval xbdi_lasterr_time; /* error time tracking */ 209 struct timeval xbdi_lasterr_time; /* error time tracking */
210#ifdef DEBUG 210#ifdef DEBUG
211 struct timeval xbdi_lastfragio_time; /* fragmented I/O tracking */ 211 struct timeval xbdi_lastfragio_time; /* fragmented I/O tracking */
212#endif 212#endif
213}; 213};
214/* Manipulation of the above reference count. */ 214/* Manipulation of the above reference count. */
215#define xbdi_get(xbdip) atomic_inc_uint(&(xbdip)->xbdi_refcnt) 215#define xbdi_get(xbdip) atomic_inc_uint(&(xbdip)->xbdi_refcnt)
216#define xbdi_put(xbdip) \ 216#define xbdi_put(xbdip) \
217do { \ 217do { \
218 if (atomic_dec_uint_nv(&(xbdip)->xbdi_refcnt) == 0) \ 218 if (atomic_dec_uint_nv(&(xbdip)->xbdi_refcnt) == 0) \
219 xbdback_finish_disconnect(xbdip); \ 219 xbdback_finish_disconnect(xbdip); \
220} while (/* CONSTCOND */ 0) 220} while (/* CONSTCOND */ 0)
221 221
222static SLIST_HEAD(, xbdback_instance) xbdback_instances; 222static SLIST_HEAD(, xbdback_instance) xbdback_instances;
223static kmutex_t xbdback_lock; 223static kmutex_t xbdback_lock;
224 224
225/* 225/*
226 * For each request from a guest, a xbdback_request is allocated from 226 * For each request from a guest, a xbdback_request is allocated from
227 * a pool. This will describe the request until completion. The 227 * a pool. This will describe the request until completion. The
228 * request may require multiple IO operations to perform, so the 228 * request may require multiple IO operations to perform, so the
229 * per-IO information is not stored here. 229 * per-IO information is not stored here.
230 */ 230 */
231struct xbdback_request { 231struct xbdback_request {
232 struct xbdback_instance *rq_xbdi; /* our xbd instance */ 232 struct xbdback_instance *rq_xbdi; /* our xbd instance */
233 uint64_t rq_id; 233 uint64_t rq_id;
234 int rq_iocount; /* reference count; or, number of outstanding I/O's */ 234 int rq_iocount; /* reference count; or, number of outstanding I/O's */
235 int rq_ioerrs; 235 int rq_ioerrs;
236 uint8_t rq_operation; 236 uint8_t rq_operation;
237}; 237};
238 238
239/* 239/*
240 * For each I/O operation associated with one of those requests, an 240 * For each I/O operation associated with one of those requests, an
241 * xbdback_io is allocated from a pool. It may correspond to multiple 241 * xbdback_io is allocated from a pool. It may correspond to multiple
242 * Xen disk requests, or parts of them, if several arrive at once that 242 * Xen disk requests, or parts of them, if several arrive at once that
243 * can be coalesced. 243 * can be coalesced.
244 */ 244 */
245struct xbdback_io { 245struct xbdback_io {
246 /* The instance pointer is duplicated for convenience. */ 246 /* The instance pointer is duplicated for convenience. */
247 struct xbdback_instance *xio_xbdi; /* our xbd instance */ 247 struct xbdback_instance *xio_xbdi; /* our xbd instance */
248 uint8_t xio_operation; 248 uint8_t xio_operation;
249 union { 249 union {
250 struct { 250 struct {
251 struct buf xio_buf; /* our I/O */ 251 struct buf xio_buf; /* our I/O */
252 /* xbd requests involved */ 252 /* xbd requests involved */
253 SLIST_HEAD(, xbdback_fragment) xio_rq; 253 SLIST_HEAD(, xbdback_fragment) xio_rq;
254 /* the virtual address to map the request at */ 254 /* the virtual address to map the request at */
255 vaddr_t xio_vaddr; 255 vaddr_t xio_vaddr;
256 struct xbdback_va *xio_xv; 256 struct xbdback_va *xio_xv;
257 /* grants to map */ 257 /* grants to map */
258 grant_ref_t xio_gref[XENSHM_MAX_PAGES_PER_REQUEST]; 258 grant_ref_t xio_gref[XENSHM_MAX_PAGES_PER_REQUEST];
259 /* grants release */ 259 /* grants release */
260 grant_handle_t xio_gh[XENSHM_MAX_PAGES_PER_REQUEST]; 260 grant_handle_t xio_gh[XENSHM_MAX_PAGES_PER_REQUEST];
261 uint16_t xio_nrma; /* number of guest pages */ 261 uint16_t xio_nrma; /* number of guest pages */
262 uint16_t xio_mapped; /* == 1: grants are mapped */ 262 uint16_t xio_mapped; /* == 1: grants are mapped */
263 } xio_rw; 263 } xio_rw;
264 uint64_t xio_flush_id; 264 uint64_t xio_flush_id;
265 } u; 265 } u;
266}; 266};
267#define xio_buf u.xio_rw.xio_buf 267#define xio_buf u.xio_rw.xio_buf
268#define xio_rq u.xio_rw.xio_rq 268#define xio_rq u.xio_rw.xio_rq
269#define xio_vaddr u.xio_rw.xio_vaddr 269#define xio_vaddr u.xio_rw.xio_vaddr
270#define xio_xv u.xio_rw.xio_xv 270#define xio_xv u.xio_rw.xio_xv
271#define xio_gref u.xio_rw.xio_gref 271#define xio_gref u.xio_rw.xio_gref
272#define xio_gh u.xio_rw.xio_gh 272#define xio_gh u.xio_rw.xio_gh
273#define xio_nrma u.xio_rw.xio_nrma 273#define xio_nrma u.xio_rw.xio_nrma
274#define xio_mapped u.xio_rw.xio_mapped 274#define xio_mapped u.xio_rw.xio_mapped
275 275
276#define xio_flush_id u.xio_flush_id 276#define xio_flush_id u.xio_flush_id
277 277
278/* 278/*
279 * Rather than having the xbdback_io keep an array of the 279 * Rather than having the xbdback_io keep an array of the
280 * xbdback_requests involved, since the actual number will probably be 280 * xbdback_requests involved, since the actual number will probably be
281 * small but might be as large as BLKIF_RING_SIZE, use a list. This 281 * small but might be as large as BLKIF_RING_SIZE, use a list. This
282 * would be threaded through xbdback_request, but one of them might be 282 * would be threaded through xbdback_request, but one of them might be
283 * part of multiple I/O's, alas. 283 * part of multiple I/O's, alas.
284 */ 284 */
285struct xbdback_fragment { 285struct xbdback_fragment {
286 struct xbdback_request *car; 286 struct xbdback_request *car;
287 SLIST_ENTRY(xbdback_fragment) cdr; 287 SLIST_ENTRY(xbdback_fragment) cdr;
288}; 288};
289 289
290/* 290/*
291 * Pools to manage the chain of block requests and I/Os fragments 291 * Pools to manage the chain of block requests and I/Os fragments
292 * submitted by frontend. 292 * submitted by frontend.
293 */ 293 */
294/* XXXSMP */ 294/* XXXSMP */
295static struct xbdback_pool { 295static struct xbdback_pool {
296 struct pool_cache pc; 296 struct pool_cache pc;
297 struct timeval last_warning; 297 struct timeval last_warning;
298} xbdback_request_pool, xbdback_io_pool, xbdback_fragment_pool; 298} xbdback_request_pool, xbdback_io_pool, xbdback_fragment_pool;
299 299
300/* Interval between reports of I/O errors from frontend */ 300/* Interval between reports of I/O errors from frontend */
301static const struct timeval xbdback_err_intvl = { 1, 0 }; 301static const struct timeval xbdback_err_intvl = { 1, 0 };
302 302
303#ifdef DEBUG 303#ifdef DEBUG
304static const struct timeval xbdback_fragio_intvl = { 60, 0 }; 304static const struct timeval xbdback_fragio_intvl = { 60, 0 };
305#endif 305#endif
306 void xbdbackattach(int); 306 void xbdbackattach(int);
307static int xbdback_xenbus_create(struct xenbus_device *); 307static int xbdback_xenbus_create(struct xenbus_device *);
308static int xbdback_xenbus_destroy(void *); 308static int xbdback_xenbus_destroy(void *);
309static void xbdback_frontend_changed(void *, XenbusState); 309static void xbdback_frontend_changed(void *, XenbusState);
310static void xbdback_backend_changed(struct xenbus_watch *, 310static void xbdback_backend_changed(struct xenbus_watch *,
311 const char **, unsigned int); 311 const char **, unsigned int);
312static int xbdback_evthandler(void *); 312static int xbdback_evthandler(void *);
313 313
314static int xbdback_connect(struct xbdback_instance *); 314static int xbdback_connect(struct xbdback_instance *);
315static void xbdback_disconnect(struct xbdback_instance *); 315static void xbdback_disconnect(struct xbdback_instance *);
316static void xbdback_finish_disconnect(struct xbdback_instance *); 316static void xbdback_finish_disconnect(struct xbdback_instance *);
317 317
318static bool xbdif_lookup(domid_t, uint32_t); 318static bool xbdif_lookup(domid_t, uint32_t);
319 319
320static void *xbdback_co_main(struct xbdback_instance *, void *); 320static void *xbdback_co_main(struct xbdback_instance *, void *);
321static void *xbdback_co_main_loop(struct xbdback_instance *, void *); 321static void *xbdback_co_main_loop(struct xbdback_instance *, void *);
322static void *xbdback_co_main_incr(struct xbdback_instance *, void *); 322static void *xbdback_co_main_incr(struct xbdback_instance *, void *);
323static void *xbdback_co_main_done(struct xbdback_instance *, void *); 323static void *xbdback_co_main_done(struct xbdback_instance *, void *);
324static void *xbdback_co_main_done2(struct xbdback_instance *, void *); 324static void *xbdback_co_main_done2(struct xbdback_instance *, void *);
325 325
326static void *xbdback_co_cache_flush(struct xbdback_instance *, void *); 326static void *xbdback_co_cache_flush(struct xbdback_instance *, void *);
327static void *xbdback_co_cache_flush2(struct xbdback_instance *, void *); 327static void *xbdback_co_cache_flush2(struct xbdback_instance *, void *);
328static void *xbdback_co_cache_doflush(struct xbdback_instance *, void *); 328static void *xbdback_co_cache_doflush(struct xbdback_instance *, void *);
329 329
330static void *xbdback_co_io(struct xbdback_instance *, void *); 330static void *xbdback_co_io(struct xbdback_instance *, void *);
331static void *xbdback_co_io_gotreq(struct xbdback_instance *, void *); 331static void *xbdback_co_io_gotreq(struct xbdback_instance *, void *);
332static void *xbdback_co_io_loop(struct xbdback_instance *, void *); 332static void *xbdback_co_io_loop(struct xbdback_instance *, void *);
333static void *xbdback_co_io_gotio(struct xbdback_instance *, void *); 333static void *xbdback_co_io_gotio(struct xbdback_instance *, void *);
334static void *xbdback_co_io_gotio2(struct xbdback_instance *, void *); 334static void *xbdback_co_io_gotio2(struct xbdback_instance *, void *);
335static void *xbdback_co_io_gotfrag(struct xbdback_instance *, void *); 335static void *xbdback_co_io_gotfrag(struct xbdback_instance *, void *);
336static void *xbdback_co_io_gotfrag2(struct xbdback_instance *, void *); 336static void *xbdback_co_io_gotfrag2(struct xbdback_instance *, void *);
337 337
338static void *xbdback_co_map_io(struct xbdback_instance *, void *); 338static void *xbdback_co_map_io(struct xbdback_instance *, void *);
339static void *xbdback_co_do_io(struct xbdback_instance *, void *); 339static void *xbdback_co_do_io(struct xbdback_instance *, void *);
340 340
341static void xbdback_io_error(struct xbdback_io *, int); 341static void xbdback_io_error(struct xbdback_io *, int);
342static void xbdback_iodone(struct buf *); 342static void xbdback_iodone(struct buf *);
343static void xbdback_send_reply(struct xbdback_instance *, uint64_t , int , int); 343static void xbdback_send_reply(struct xbdback_instance *, uint64_t , int , int);
344 344
345static void *xbdback_map_shm(struct xbdback_io *); 345static void *xbdback_map_shm(struct xbdback_io *);
346static void xbdback_unmap_shm(struct xbdback_io *); 346static void xbdback_unmap_shm(struct xbdback_io *);
347 347
348static void *xbdback_pool_get(struct xbdback_pool *, 348static void *xbdback_pool_get(struct xbdback_pool *,
349 struct xbdback_instance *); 349 struct xbdback_instance *);
350static void xbdback_pool_put(struct xbdback_pool *, void *); 350static void xbdback_pool_put(struct xbdback_pool *, void *);
351static void xbdback_thread(void *); 351static void xbdback_thread(void *);
352static void xbdback_wakeup_thread(struct xbdback_instance *); 352static void xbdback_wakeup_thread(struct xbdback_instance *);
353static void xbdback_trampoline(struct xbdback_instance *, void *); 353static void xbdback_trampoline(struct xbdback_instance *, void *);
354 354
355static struct xenbus_backend_driver xbd_backend_driver = { 355static struct xenbus_backend_driver xbd_backend_driver = {
356 .xbakd_create = xbdback_xenbus_create, 356 .xbakd_create = xbdback_xenbus_create,
357 .xbakd_type = "vbd" 357 .xbakd_type = "vbd"
358}; 358};
359 359
360void 360void
361xbdbackattach(int n) 361xbdbackattach(int n)
362{ 362{
363 XENPRINTF(("xbdbackattach\n")); 363 XENPRINTF(("xbdbackattach\n"));
364 364
365 /* 365 /*
366 * initialize the backend driver, register the control message handler 366 * initialize the backend driver, register the control message handler
367 * and send driver up message. 367 * and send driver up message.
368 */ 368 */
369 SLIST_INIT(&xbdback_instances); 369 SLIST_INIT(&xbdback_instances);
370 mutex_init(&xbdback_lock, MUTEX_DEFAULT, IPL_NONE); 370 mutex_init(&xbdback_lock, MUTEX_DEFAULT, IPL_NONE);
371 371
372 pool_cache_bootstrap(&xbdback_request_pool.pc, 372 pool_cache_bootstrap(&xbdback_request_pool.pc,
373 sizeof(struct xbdback_request), 0, 0, 0, "xbbrp", NULL, 373 sizeof(struct xbdback_request), 0, 0, 0, "xbbrp", NULL,
374 IPL_SOFTBIO, NULL, NULL, NULL); 374 IPL_SOFTBIO, NULL, NULL, NULL);
375 pool_cache_bootstrap(&xbdback_io_pool.pc, 375 pool_cache_bootstrap(&xbdback_io_pool.pc,
376 sizeof(struct xbdback_io), 0, 0, 0, "xbbip", NULL, 376 sizeof(struct xbdback_io), 0, 0, 0, "xbbip", NULL,
377 IPL_SOFTBIO, NULL, NULL, NULL); 377 IPL_SOFTBIO, NULL, NULL, NULL);
378 pool_cache_bootstrap(&xbdback_fragment_pool.pc, 378 pool_cache_bootstrap(&xbdback_fragment_pool.pc,
379 sizeof(struct xbdback_fragment), 0, 0, 0, "xbbfp", NULL, 379 sizeof(struct xbdback_fragment), 0, 0, 0, "xbbfp", NULL,
380 IPL_SOFTBIO, NULL, NULL, NULL); 380 IPL_SOFTBIO, NULL, NULL, NULL);
381 381
382 /* we allocate enough to handle a whole ring at once */ 382 /* we allocate enough to handle a whole ring at once */
383 pool_prime(&xbdback_request_pool.pc.pc_pool, BLKIF_RING_SIZE); 383 pool_prime(&xbdback_request_pool.pc.pc_pool, BLKIF_RING_SIZE);
384 pool_prime(&xbdback_io_pool.pc.pc_pool, BLKIF_RING_SIZE); 384 pool_prime(&xbdback_io_pool.pc.pc_pool, BLKIF_RING_SIZE);
385 pool_prime(&xbdback_fragment_pool.pc.pc_pool, 385 pool_prime(&xbdback_fragment_pool.pc.pc_pool,
386 BLKIF_MAX_SEGMENTS_PER_REQUEST * BLKIF_RING_SIZE); 386 BLKIF_MAX_SEGMENTS_PER_REQUEST * BLKIF_RING_SIZE);
387 387
388 xenbus_backend_register(&xbd_backend_driver); 388 xenbus_backend_register(&xbd_backend_driver);
389} 389}
390 390
391static int 391static int
392xbdback_xenbus_create(struct xenbus_device *xbusd) 392xbdback_xenbus_create(struct xenbus_device *xbusd)
393{ 393{
394 struct xbdback_instance *xbdi; 394 struct xbdback_instance *xbdi;
395 long domid, handle; 395 long domid, handle;
396 int error, i; 396 int error, i;
397 char *ep; 397 char *ep;
398 398
399 if ((error = xenbus_read_ul(NULL, xbusd->xbusd_path, 399 if ((error = xenbus_read_ul(NULL, xbusd->xbusd_path,
400 "frontend-id", &domid, 10)) != 0) { 400 "frontend-id", &domid, 10)) != 0) {
401 aprint_error("xbdback: can't read %s/frontend-id: %d\n", 401 aprint_error("xbdback: can't read %s/frontend-id: %d\n",
402 xbusd->xbusd_path, error); 402 xbusd->xbusd_path, error);
403 return error; 403 return error;
404 } 404 }
405 405
406 /* 406 /*
407 * get handle: this is the last component of the path; which is 407 * get handle: this is the last component of the path; which is
408 * a decimal number. $path/dev contains the device name, which is not 408 * a decimal number. $path/dev contains the device name, which is not
409 * appropriate. 409 * appropriate.
410 */ 410 */
411 for (i = strlen(xbusd->xbusd_path); i > 0; i--) { 411 for (i = strlen(xbusd->xbusd_path); i > 0; i--) {
412 if (xbusd->xbusd_path[i] == '/') 412 if (xbusd->xbusd_path[i] == '/')
413 break; 413 break;
414 } 414 }
415 if (i == 0) { 415 if (i == 0) {
416 aprint_error("xbdback: can't parse %s\n", 416 aprint_error("xbdback: can't parse %s\n",
417 xbusd->xbusd_path); 417 xbusd->xbusd_path);
418 return EFTYPE; 418 return EFTYPE;
419 } 419 }
420 handle = strtoul(&xbusd->xbusd_path[i+1], &ep, 10); 420 handle = strtoul(&xbusd->xbusd_path[i+1], &ep, 10);
421 if (*ep != '\0') { 421 if (*ep != '\0') {
422 aprint_error("xbdback: can't parse %s\n", 422 aprint_error("xbdback: can't parse %s\n",
423 xbusd->xbusd_path); 423 xbusd->xbusd_path);
424 return EFTYPE; 424 return EFTYPE;
425 } 425 }
426  426
427 if (xbdif_lookup(domid, handle)) { 427 if (xbdif_lookup(domid, handle)) {
428 return EEXIST; 428 return EEXIST;
429 } 429 }
430 xbdi = kmem_zalloc(sizeof(*xbdi), KM_SLEEP); 430 xbdi = kmem_zalloc(sizeof(*xbdi), KM_SLEEP);
431 431
432 xbdi->xbdi_domid = domid; 432 xbdi->xbdi_domid = domid;
433 xbdi->xbdi_handle = handle; 433 xbdi->xbdi_handle = handle;
434 snprintf(xbdi->xbdi_name, sizeof(xbdi->xbdi_name), "xbdb%di%d", 434 snprintf(xbdi->xbdi_name, sizeof(xbdi->xbdi_name), "xbdb%di%d",
435 xbdi->xbdi_domid, xbdi->xbdi_handle); 435 xbdi->xbdi_domid, xbdi->xbdi_handle);
436 436
437 /* initialize status and reference counter */ 437 /* initialize status and reference counter */
438 xbdi->xbdi_status = DISCONNECTED; 438 xbdi->xbdi_status = DISCONNECTED;
439 xbdi_get(xbdi); 439 xbdi_get(xbdi);
440 440
441 mutex_init(&xbdi->xbdi_lock, MUTEX_DEFAULT, IPL_BIO); 441 mutex_init(&xbdi->xbdi_lock, MUTEX_DEFAULT, IPL_BIO);
442 cv_init(&xbdi->xbdi_cv, xbdi->xbdi_name); 442 cv_init(&xbdi->xbdi_cv, xbdi->xbdi_name);
443 mutex_enter(&xbdback_lock); 443 mutex_enter(&xbdback_lock);
444 SLIST_INSERT_HEAD(&xbdback_instances, xbdi, next); 444 SLIST_INSERT_HEAD(&xbdback_instances, xbdi, next);
445 mutex_exit(&xbdback_lock); 445 mutex_exit(&xbdback_lock);
446 446
447 xbusd->xbusd_u.b.b_cookie = xbdi;  447 xbusd->xbusd_u.b.b_cookie = xbdi;
448 xbusd->xbusd_u.b.b_detach = xbdback_xenbus_destroy; 448 xbusd->xbusd_u.b.b_detach = xbdback_xenbus_destroy;
449 xbusd->xbusd_otherend_changed = xbdback_frontend_changed; 449 xbusd->xbusd_otherend_changed = xbdback_frontend_changed;
450 xbdi->xbdi_xbusd = xbusd; 450 xbdi->xbdi_xbusd = xbusd;
451 451
452 SLIST_INIT(&xbdi->xbdi_va_free); 452 SLIST_INIT(&xbdi->xbdi_va_free);
453 for (i = 0; i < BLKIF_RING_SIZE; i++) { 453 for (i = 0; i < BLKIF_RING_SIZE; i++) {
454 xbdi->xbdi_va[i].xv_vaddr = uvm_km_alloc(kernel_map, 454 xbdi->xbdi_va[i].xv_vaddr = uvm_km_alloc(kernel_map,
455 VBD_VA_SIZE, 0, UVM_KMF_VAONLY|UVM_KMF_WAITVA); 455 VBD_VA_SIZE, 0, UVM_KMF_VAONLY|UVM_KMF_WAITVA);
456 SLIST_INSERT_HEAD(&xbdi->xbdi_va_free, &xbdi->xbdi_va[i], 456 SLIST_INSERT_HEAD(&xbdi->xbdi_va_free, &xbdi->xbdi_va[i],
457 xv_next); 457 xv_next);
458 } 458 }
459 459
460 error = xenbus_watch_path2(xbusd, xbusd->xbusd_path, "physical-device", 460 error = xenbus_watch_path2(xbusd, xbusd->xbusd_path, "physical-device",
461 &xbdi->xbdi_watch, xbdback_backend_changed); 461 &xbdi->xbdi_watch, xbdback_backend_changed);
462 if (error) { 462 if (error) {
463 printf("failed to watch on %s/physical-device: %d\n", 463 printf("failed to watch on %s/physical-device: %d\n",
464 xbusd->xbusd_path, error); 464 xbusd->xbusd_path, error);
465 goto fail; 465 goto fail;
466 } 466 }
467 xbdi->xbdi_watch.xbw_dev = xbusd; 467 xbdi->xbdi_watch.xbw_dev = xbusd;
468 error = xenbus_switch_state(xbusd, NULL, XenbusStateInitWait); 468 error = xenbus_switch_state(xbusd, NULL, XenbusStateInitWait);
469 if (error) { 469 if (error) {
470 printf("failed to switch state on %s: %d\n", 470 printf("failed to switch state on %s: %d\n",
471 xbusd->xbusd_path, error); 471 xbusd->xbusd_path, error);
472 goto fail2; 472 goto fail2;
473 } 473 }
474 return 0; 474 return 0;
475fail2: 475fail2:
476 unregister_xenbus_watch(&xbdi->xbdi_watch); 476 unregister_xenbus_watch(&xbdi->xbdi_watch);
477fail: 477fail:
478 kmem_free(xbdi, sizeof(*xbdi)); 478 kmem_free(xbdi, sizeof(*xbdi));
479 return error; 479 return error;
480} 480}
481 481
482static int 482static int
483xbdback_xenbus_destroy(void *arg) 483xbdback_xenbus_destroy(void *arg)
484{ 484{
485 struct xbdback_instance *xbdi = arg; 485 struct xbdback_instance *xbdi = arg;
486 struct xenbus_device *xbusd = xbdi->xbdi_xbusd; 486 struct xenbus_device *xbusd = xbdi->xbdi_xbusd;
487 struct gnttab_unmap_grant_ref ungrop; 487 struct gnttab_unmap_grant_ref ungrop;
488 int err; 488 int err;
489 489
490 XENPRINTF(("xbdback_xenbus_destroy state %d\n", xbdi->xbdi_status)); 490 XENPRINTF(("xbdback_xenbus_destroy state %d\n", xbdi->xbdi_status));
491 491
492 xbdback_disconnect(xbdi); 492 xbdback_disconnect(xbdi);
493 493
494 /* unregister watch */ 494 /* unregister watch */
495 if (xbdi->xbdi_watch.node) 495 if (xbdi->xbdi_watch.node)
496 xenbus_unwatch_path(&xbdi->xbdi_watch); 496 xenbus_unwatch_path(&xbdi->xbdi_watch);
497 497
498 /* unmap ring */ 498 /* unmap ring */
499 if (xbdi->xbdi_ring_va != 0) { 499 if (xbdi->xbdi_ring_va != 0) {
500 ungrop.host_addr = xbdi->xbdi_ring_va; 500 ungrop.host_addr = xbdi->xbdi_ring_va;
501 ungrop.handle = xbdi->xbdi_ring_handle; 501 ungrop.handle = xbdi->xbdi_ring_handle;
502 ungrop.dev_bus_addr = 0; 502 ungrop.dev_bus_addr = 0;
503 err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 503 err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
504 &ungrop, 1); 504 &ungrop, 1);
505 if (err) 505 if (err)
506 printf("xbdback %s: unmap_grant_ref failed: %d\n", 506 printf("xbdback %s: unmap_grant_ref failed: %d\n",
507 xbusd->xbusd_otherend, err); 507 xbusd->xbusd_otherend, err);
508 uvm_km_free(kernel_map, xbdi->xbdi_ring_va, 508 uvm_km_free(kernel_map, xbdi->xbdi_ring_va,
509 PAGE_SIZE, UVM_KMF_VAONLY); 509 PAGE_SIZE, UVM_KMF_VAONLY);
510 } 510 }
511 /* close device */ 511 /* close device */
512 if (xbdi->xbdi_size) { 512 if (xbdi->xbdi_size) {
513 const char *name; 513 const char *name;
514 struct dkwedge_info wi; 514 struct dkwedge_info wi;
515 if (getdiskinfo(xbdi->xbdi_vp, &wi) == 0) 515 if (getdiskinfo(xbdi->xbdi_vp, &wi) == 0)
516 name = wi.dkw_devname; 516 name = wi.dkw_devname;
517 else 517 else
518 name = "*unknown*"; 518 name = "*unknown*";
519 printf("xbd backend: detach device %s for domain %d\n", 519 printf("xbd backend: detach device %s for domain %d\n",
520 name, xbdi->xbdi_domid); 520 name, xbdi->xbdi_domid);
521 vn_close(xbdi->xbdi_vp, FREAD, NOCRED); 521 vn_close(xbdi->xbdi_vp, FREAD, NOCRED);
522 } 522 }
523 mutex_enter(&xbdback_lock); 523 mutex_enter(&xbdback_lock);
524 SLIST_REMOVE(&xbdback_instances, xbdi, xbdback_instance, next); 524 SLIST_REMOVE(&xbdback_instances, xbdi, xbdback_instance, next);
525 mutex_exit(&xbdback_lock); 525 mutex_exit(&xbdback_lock);
526 526
527 for (int i = 0; i < BLKIF_RING_SIZE; i++) { 527 for (int i = 0; i < BLKIF_RING_SIZE; i++) {
528 if (xbdi->xbdi_va[i].xv_vaddr != 0) { 528 if (xbdi->xbdi_va[i].xv_vaddr != 0) {
529 uvm_km_free(kernel_map, xbdi->xbdi_va[i].xv_vaddr, 529 uvm_km_free(kernel_map, xbdi->xbdi_va[i].xv_vaddr,
530 VBD_VA_SIZE, UVM_KMF_VAONLY); 530 VBD_VA_SIZE, UVM_KMF_VAONLY);
531 xbdi->xbdi_va[i].xv_vaddr = 0; 531 xbdi->xbdi_va[i].xv_vaddr = 0;
532 } 532 }
533 } 533 }
534 534
535 mutex_destroy(&xbdi->xbdi_lock); 535 mutex_destroy(&xbdi->xbdi_lock);
536 cv_destroy(&xbdi->xbdi_cv); 536 cv_destroy(&xbdi->xbdi_cv);
537 kmem_free(xbdi, sizeof(*xbdi)); 537 kmem_free(xbdi, sizeof(*xbdi));
538 return 0; 538 return 0;
539} 539}
540 540
541static int 541static int
542xbdback_connect(struct xbdback_instance *xbdi) 542xbdback_connect(struct xbdback_instance *xbdi)
543{ 543{
544 int err; 544 int err;
545 struct gnttab_map_grant_ref grop; 545 struct gnttab_map_grant_ref grop;
546 struct gnttab_unmap_grant_ref ungrop; 546 struct gnttab_unmap_grant_ref ungrop;
547 evtchn_op_t evop; 547 evtchn_op_t evop;
548 u_long ring_ref, revtchn; 548 u_long ring_ref, revtchn;
549 char xsproto[32]; 549 char xsproto[32];
550 const char *proto; 550 const char *proto;
551 struct xenbus_device *xbusd = xbdi->xbdi_xbusd; 551 struct xenbus_device *xbusd = xbdi->xbdi_xbusd;
552 552
553 XENPRINTF(("xbdback %s: connect\n", xbusd->xbusd_path)); 553 XENPRINTF(("xbdback %s: connect\n", xbusd->xbusd_path));
554 /* read comunication informations */ 554 /* read comunication informations */
555 err = xenbus_read_ul(NULL, xbusd->xbusd_otherend, 555 err = xenbus_read_ul(NULL, xbusd->xbusd_otherend,
556 "ring-ref", &ring_ref, 10); 556 "ring-ref", &ring_ref, 10);
557 if (err) { 557 if (err) {
558 xenbus_dev_fatal(xbusd, err, "reading %s/ring-ref", 558 xenbus_dev_fatal(xbusd, err, "reading %s/ring-ref",
559 xbusd->xbusd_otherend); 559 xbusd->xbusd_otherend);
560 return -1; 560 return -1;
561 } 561 }
562 XENPRINTF(("xbdback %s: connect ring-ref %lu\n", xbusd->xbusd_path, ring_ref)); 562 XENPRINTF(("xbdback %s: connect ring-ref %lu\n", xbusd->xbusd_path, ring_ref));
563 err = xenbus_read_ul(NULL, xbusd->xbusd_otherend, 563 err = xenbus_read_ul(NULL, xbusd->xbusd_otherend,
564 "event-channel", &revtchn, 10); 564 "event-channel", &revtchn, 10);
565 if (err) { 565 if (err) {
566 xenbus_dev_fatal(xbusd, err, "reading %s/event-channel", 566 xenbus_dev_fatal(xbusd, err, "reading %s/event-channel",
567 xbusd->xbusd_otherend); 567 xbusd->xbusd_otherend);
568 return -1; 568 return -1;
569 } 569 }
570 XENPRINTF(("xbdback %s: connect revtchn %lu\n", xbusd->xbusd_path, revtchn)); 570 XENPRINTF(("xbdback %s: connect revtchn %lu\n", xbusd->xbusd_path, revtchn));
571 err = xenbus_read(NULL, xbusd->xbusd_otherend, "protocol", 571 err = xenbus_read(NULL, xbusd->xbusd_otherend, "protocol",
572 xsproto, sizeof(xsproto)); 572 xsproto, sizeof(xsproto));
573 if (err) { 573 if (err) {
574 xbdi->xbdi_proto = XBDIP_NATIVE; 574 xbdi->xbdi_proto = XBDIP_NATIVE;
575 proto = "unspecified"; 575 proto = "unspecified";
576 XENPRINTF(("xbdback %s: connect no xsproto\n", xbusd->xbusd_path)); 576 XENPRINTF(("xbdback %s: connect no xsproto\n", xbusd->xbusd_path));
577 } else { 577 } else {
578 XENPRINTF(("xbdback %s: connect xsproto %s\n", xbusd->xbusd_path, xsproto)); 578 XENPRINTF(("xbdback %s: connect xsproto %s\n", xbusd->xbusd_path, xsproto));
579 if (strcmp(xsproto, XEN_IO_PROTO_ABI_NATIVE) == 0) { 579 if (strcmp(xsproto, XEN_IO_PROTO_ABI_NATIVE) == 0) {
580 xbdi->xbdi_proto = XBDIP_NATIVE; 580 xbdi->xbdi_proto = XBDIP_NATIVE;
581 proto = XEN_IO_PROTO_ABI_NATIVE; 581 proto = XEN_IO_PROTO_ABI_NATIVE;
582 } else if (strcmp(xsproto, XEN_IO_PROTO_ABI_X86_32) == 0) { 582 } else if (strcmp(xsproto, XEN_IO_PROTO_ABI_X86_32) == 0) {
583 xbdi->xbdi_proto = XBDIP_32; 583 xbdi->xbdi_proto = XBDIP_32;
584 proto = XEN_IO_PROTO_ABI_X86_32; 584 proto = XEN_IO_PROTO_ABI_X86_32;
585 } else if (strcmp(xsproto, XEN_IO_PROTO_ABI_X86_64) == 0) { 585 } else if (strcmp(xsproto, XEN_IO_PROTO_ABI_X86_64) == 0) {
586 xbdi->xbdi_proto = XBDIP_64; 586 xbdi->xbdi_proto = XBDIP_64;
587 proto = XEN_IO_PROTO_ABI_X86_64; 587 proto = XEN_IO_PROTO_ABI_X86_64;
588 } else { 588 } else {
589 aprint_error("xbd domain %d: unknown proto %s\n", 589 aprint_error("xbd domain %d: unknown proto %s\n",
590 xbdi->xbdi_domid, xsproto); 590 xbdi->xbdi_domid, xsproto);
591 return -1; 591 return -1;
592 } 592 }
593 } 593 }
594 594
595 /* allocate VA space and map rings */ 595 /* allocate VA space and map rings */
596 xbdi->xbdi_ring_va = uvm_km_alloc(kernel_map, PAGE_SIZE, 0, 596 xbdi->xbdi_ring_va = uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
597 UVM_KMF_VAONLY); 597 UVM_KMF_VAONLY);
598 if (xbdi->xbdi_ring_va == 0) { 598 if (xbdi->xbdi_ring_va == 0) {
599 xenbus_dev_fatal(xbusd, ENOMEM, 599 xenbus_dev_fatal(xbusd, ENOMEM,
600 "can't get VA for ring", xbusd->xbusd_otherend); 600 "can't get VA for ring", xbusd->xbusd_otherend);
601 return -1; 601 return -1;
602 } 602 }
603 XENPRINTF(("xbdback %s: connect va 0x%" PRIxVADDR "\n", xbusd->xbusd_path, xbdi->xbdi_ring_va)); 603 XENPRINTF(("xbdback %s: connect va 0x%" PRIxVADDR "\n", xbusd->xbusd_path, xbdi->xbdi_ring_va));
604 604
605 grop.host_addr = xbdi->xbdi_ring_va; 605 grop.host_addr = xbdi->xbdi_ring_va;
606 grop.flags = GNTMAP_host_map; 606 grop.flags = GNTMAP_host_map;
607 grop.ref = ring_ref; 607 grop.ref = ring_ref;
608 grop.dom = xbdi->xbdi_domid; 608 grop.dom = xbdi->xbdi_domid;
609 err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, 609 err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
610 &grop, 1); 610 &grop, 1);
611 if (err || grop.status) { 611 if (err || grop.status) {
612 aprint_error("xbdback %s: can't map grant ref: %d/%d\n", 612 aprint_error("xbdback %s: can't map grant ref: %d/%d\n",
613 xbusd->xbusd_path, err, grop.status); 613 xbusd->xbusd_path, err, grop.status);
614 xenbus_dev_fatal(xbusd, EINVAL, 614 xenbus_dev_fatal(xbusd, EINVAL,
615 "can't map ring", xbusd->xbusd_otherend); 615 "can't map ring", xbusd->xbusd_otherend);
616 goto err; 616 goto err;
617 } 617 }
618 xbdi->xbdi_ring_handle = grop.handle; 618 xbdi->xbdi_ring_handle = grop.handle;
619 XENPRINTF(("xbdback %s: connect grhandle %d\n", xbusd->xbusd_path, grop.handle)); 619 XENPRINTF(("xbdback %s: connect grhandle %d\n", xbusd->xbusd_path, grop.handle));
620 620
621 switch(xbdi->xbdi_proto) { 621 switch(xbdi->xbdi_proto) {
622 case XBDIP_NATIVE: 622 case XBDIP_NATIVE:
623 { 623 {
624 blkif_sring_t *sring = (void *)xbdi->xbdi_ring_va; 624 blkif_sring_t *sring = (void *)xbdi->xbdi_ring_va;
625 BACK_RING_INIT(&xbdi->xbdi_ring.ring_n, sring, PAGE_SIZE); 625 BACK_RING_INIT(&xbdi->xbdi_ring.ring_n, sring, PAGE_SIZE);
626 break; 626 break;
627 } 627 }
628 case XBDIP_32: 628 case XBDIP_32:
629 { 629 {
630 blkif_x86_32_sring_t *sring = (void *)xbdi->xbdi_ring_va; 630 blkif_x86_32_sring_t *sring = (void *)xbdi->xbdi_ring_va;
631 BACK_RING_INIT(&xbdi->xbdi_ring.ring_32, sring, PAGE_SIZE); 631 BACK_RING_INIT(&xbdi->xbdi_ring.ring_32, sring, PAGE_SIZE);
632 break; 632 break;
633 } 633 }
634 case XBDIP_64: 634 case XBDIP_64:
635 { 635 {
636 blkif_x86_64_sring_t *sring = (void *)xbdi->xbdi_ring_va; 636 blkif_x86_64_sring_t *sring = (void *)xbdi->xbdi_ring_va;
637 BACK_RING_INIT(&xbdi->xbdi_ring.ring_64, sring, PAGE_SIZE); 637 BACK_RING_INIT(&xbdi->xbdi_ring.ring_64, sring, PAGE_SIZE);
638 break; 638 break;
639 } 639 }
640 } 640 }
641 641
642 evop.cmd = EVTCHNOP_bind_interdomain; 642 evop.cmd = EVTCHNOP_bind_interdomain;
643 evop.u.bind_interdomain.remote_dom = xbdi->xbdi_domid; 643 evop.u.bind_interdomain.remote_dom = xbdi->xbdi_domid;
644 evop.u.bind_interdomain.remote_port = revtchn; 644 evop.u.bind_interdomain.remote_port = revtchn;
645 err = HYPERVISOR_event_channel_op(&evop); 645 err = HYPERVISOR_event_channel_op(&evop);
646 if (err) { 646 if (err) {
647 aprint_error("blkback %s: " 647 aprint_error("blkback %s: "
648 "can't get event channel: %d\n", 648 "can't get event channel: %d\n",
649 xbusd->xbusd_otherend, err); 649 xbusd->xbusd_otherend, err);
650 xenbus_dev_fatal(xbusd, err, 650 xenbus_dev_fatal(xbusd, err,
651 "can't bind event channel", xbusd->xbusd_otherend); 651 "can't bind event channel", xbusd->xbusd_otherend);
652 goto err2; 652 goto err2;
653 } 653 }
654 XENPRINTF(("xbdback %s: connect evchannel %d\n", xbusd->xbusd_path, xbdi->xbdi_evtchn)); 654 XENPRINTF(("xbdback %s: connect evchannel %d\n", xbusd->xbusd_path, xbdi->xbdi_evtchn));
655 xbdi->xbdi_evtchn = evop.u.bind_interdomain.local_port; 655 xbdi->xbdi_evtchn = evop.u.bind_interdomain.local_port;
656 656
657 xbdi->xbdi_ih = xen_intr_establish_xname(-1, &xen_pic, xbdi->xbdi_evtchn, 657 xbdi->xbdi_ih = xen_intr_establish_xname(-1, &xen_pic, xbdi->xbdi_evtchn,
658 IST_LEVEL, IPL_BIO, xbdback_evthandler, xbdi, false, 658 IST_LEVEL, IPL_BIO, xbdback_evthandler, xbdi, false,
659 xbdi->xbdi_name); 659 xbdi->xbdi_name);
660 KASSERT(xbdi->xbdi_ih != NULL); 660 KASSERT(xbdi->xbdi_ih != NULL);
661 aprint_verbose("xbd backend domain %d handle %#x (%d) " 661 aprint_verbose("xbd backend domain %d handle %#x (%d) "
662 "using event channel %d, protocol %s\n", xbdi->xbdi_domid, 662 "using event channel %d, protocol %s\n", xbdi->xbdi_domid,
663 xbdi->xbdi_handle, xbdi->xbdi_handle, xbdi->xbdi_evtchn, proto); 663 xbdi->xbdi_handle, xbdi->xbdi_handle, xbdi->xbdi_evtchn, proto);
664 664
665 /* enable the xbdback event handler machinery */ 665 /* enable the xbdback event handler machinery */
666 xbdi->xbdi_status = WAITING; 666 xbdi->xbdi_status = WAITING;
667 hypervisor_unmask_event(xbdi->xbdi_evtchn); 667 hypervisor_unmask_event(xbdi->xbdi_evtchn);
668 hypervisor_notify_via_evtchn(xbdi->xbdi_evtchn); 668 hypervisor_notify_via_evtchn(xbdi->xbdi_evtchn);
669 669
670 if (kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL, 670 if (kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL,
671 xbdback_thread, xbdi, NULL, "%s", xbdi->xbdi_name) == 0) 671 xbdback_thread, xbdi, NULL, "%s", xbdi->xbdi_name) == 0)
672 return 0; 672 return 0;
673 673
674err2: 674err2:
675 /* unmap ring */ 675 /* unmap ring */
676 ungrop.host_addr = xbdi->xbdi_ring_va; 676 ungrop.host_addr = xbdi->xbdi_ring_va;
677 ungrop.handle = xbdi->xbdi_ring_handle; 677 ungrop.handle = xbdi->xbdi_ring_handle;
678 ungrop.dev_bus_addr = 0; 678 ungrop.dev_bus_addr = 0;
679 err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 679 err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
680 &ungrop, 1); 680 &ungrop, 1);
681 if (err) 681 if (err)
682 aprint_error("xbdback %s: unmap_grant_ref failed: %d\n", 682 aprint_error("xbdback %s: unmap_grant_ref failed: %d\n",
683 xbusd->xbusd_path, err); 683 xbusd->xbusd_path, err);
684 684
685err: 685err:
686 /* free ring VA space */ 686 /* free ring VA space */
687 uvm_km_free(kernel_map, xbdi->xbdi_ring_va, PAGE_SIZE, UVM_KMF_VAONLY); 687 uvm_km_free(kernel_map, xbdi->xbdi_ring_va, PAGE_SIZE, UVM_KMF_VAONLY);
688 return -1; 688 return -1;
689} 689}
690 690
691/* 691/*
692 * Signal a xbdback thread to disconnect. Done in 'xenwatch' thread context. 692 * Signal a xbdback thread to disconnect. Done in 'xenwatch' thread context.
693 */ 693 */
694static void 694static void
695xbdback_disconnect(struct xbdback_instance *xbdi) 695xbdback_disconnect(struct xbdback_instance *xbdi)
696{ 696{
697  697
698 mutex_enter(&xbdi->xbdi_lock); 698 mutex_enter(&xbdi->xbdi_lock);
699 if (xbdi->xbdi_status == DISCONNECTED) { 699 if (xbdi->xbdi_status == DISCONNECTED) {
700 mutex_exit(&xbdi->xbdi_lock); 700 mutex_exit(&xbdi->xbdi_lock);
701 return; 701 return;
702 } 702 }
703 hypervisor_mask_event(xbdi->xbdi_evtchn); 703 hypervisor_mask_event(xbdi->xbdi_evtchn);
704 xen_intr_disestablish(xbdi->xbdi_ih); 704 xen_intr_disestablish(xbdi->xbdi_ih);
705 705
706 /* signal thread that we want to disconnect, then wait for it */ 706 /* signal thread that we want to disconnect, then wait for it */
707 xbdi->xbdi_status = DISCONNECTING; 707 xbdi->xbdi_status = DISCONNECTING;
708 cv_signal(&xbdi->xbdi_cv); 708 cv_signal(&xbdi->xbdi_cv);
709 709
710 while (xbdi->xbdi_status != DISCONNECTED) 710 while (xbdi->xbdi_status != DISCONNECTED)
711 cv_wait(&xbdi->xbdi_cv, &xbdi->xbdi_lock); 711 cv_wait(&xbdi->xbdi_cv, &xbdi->xbdi_lock);
712 712
713 mutex_exit(&xbdi->xbdi_lock); 713 mutex_exit(&xbdi->xbdi_lock);
714 714
715 xenbus_switch_state(xbdi->xbdi_xbusd, NULL, XenbusStateClosing); 715 xenbus_switch_state(xbdi->xbdi_xbusd, NULL, XenbusStateClosing);
716} 716}
717 717
718static void 718static void
719xbdback_frontend_changed(void *arg, XenbusState new_state) 719xbdback_frontend_changed(void *arg, XenbusState new_state)
720{ 720{
721 struct xbdback_instance *xbdi = arg; 721 struct xbdback_instance *xbdi = arg;
722 struct xenbus_device *xbusd = xbdi->xbdi_xbusd; 722 struct xenbus_device *xbusd = xbdi->xbdi_xbusd;
723 723
724 XENPRINTF(("xbdback %s: new state %d\n", xbusd->xbusd_path, new_state)); 724 XENPRINTF(("xbdback %s: new state %d\n", xbusd->xbusd_path, new_state));
725 switch(new_state) { 725 switch(new_state) {
726 case XenbusStateInitialising: 726 case XenbusStateInitialising:
727 break; 727 break;
728 case XenbusStateInitialised: 728 case XenbusStateInitialised:
729 case XenbusStateConnected: 729 case XenbusStateConnected:
730 if (xbdi->xbdi_status == WAITING || xbdi->xbdi_status == RUN) 730 if (xbdi->xbdi_status == WAITING || xbdi->xbdi_status == RUN)
731 break; 731 break;
732 xbdback_connect(xbdi); 732 xbdback_connect(xbdi);
733 break; 733 break;
734 case XenbusStateClosing: 734 case XenbusStateClosing:
735 xbdback_disconnect(xbdi); 735 xbdback_disconnect(xbdi);
736 break; 736 break;
737 case XenbusStateClosed: 737 case XenbusStateClosed:
738 /* otherend_changed() should handle it for us */ 738 /* otherend_changed() should handle it for us */
739 panic("xbdback_frontend_changed: closed\n"); 739 panic("xbdback_frontend_changed: closed\n");
740 case XenbusStateUnknown: 740 case XenbusStateUnknown:
741 case XenbusStateInitWait: 741 case XenbusStateInitWait:
742 default: 742 default:
743 aprint_error("xbdback %s: invalid frontend state %d\n", 743 aprint_error("xbdback %s: invalid frontend state %d\n",
744 xbusd->xbusd_path, new_state); 744 xbusd->xbusd_path, new_state);
745 } 745 }
746 return; 746 return;
747} 747}
748 748
749static void 749static void
750xbdback_backend_changed(struct xenbus_watch *watch, 750xbdback_backend_changed(struct xenbus_watch *watch,
751 const char **vec, unsigned int len) 751 const char **vec, unsigned int len)
752{ 752{
753 struct xenbus_device *xbusd = watch->xbw_dev; 753 struct xenbus_device *xbusd = watch->xbw_dev;
754 struct xbdback_instance *xbdi = xbusd->xbusd_u.b.b_cookie; 754 struct xbdback_instance *xbdi = xbusd->xbusd_u.b.b_cookie;
755 int err; 755 int err;
756 long dev; 756 long dev;
757 char mode[32]; 757 char mode[32];
758 struct xenbus_transaction *xbt; 758 struct xenbus_transaction *xbt;
759 const char *devname; 759 const char *devname;
760 int major; 760 int major;
761 761
762 err = xenbus_read_ul(NULL, xbusd->xbusd_path, "physical-device", 762 err = xenbus_read_ul(NULL, xbusd->xbusd_path, "physical-device",
763 &dev, 10); 763 &dev, 10);
764 /* 764 /*
765 * An error can occur as the watch can fire up just after being 765 * An error can occur as the watch can fire up just after being
766 * registered. So we have to ignore error :( 766 * registered. So we have to ignore error :(
767 */ 767 */
768 if (err) 768 if (err)
769 return; 769 return;
770 /* 770 /*
771 * we can also fire up after having opened the device, don't try 771 * we can also fire up after having opened the device, don't try
772 * to do it twice. 772 * to do it twice.
773 */ 773 */
774 if (xbdi->xbdi_vp != NULL) { 774 if (xbdi->xbdi_vp != NULL) {
775 if (xbdi->xbdi_status == WAITING || xbdi->xbdi_status == RUN) { 775 if (xbdi->xbdi_status == WAITING || xbdi->xbdi_status == RUN) {
776 if (xbdi->xbdi_dev != dev) { 776 if (xbdi->xbdi_dev != dev) {
777 printf("xbdback %s: changing physical device " 777 printf("xbdback %s: changing physical device "
778 "from %#"PRIx64" to %#lx not supported\n", 778 "from %#"PRIx64" to %#lx not supported\n",
779 xbusd->xbusd_path, xbdi->xbdi_dev, dev); 779 xbusd->xbusd_path, xbdi->xbdi_dev, dev);
780 } 780 }
781 } 781 }
782 return; 782 return;
783 } 783 }
784 xbdi->xbdi_dev = dev; 784 xbdi->xbdi_dev = dev;
785 err = xenbus_read(NULL, xbusd->xbusd_path, "mode", mode, sizeof(mode)); 785 err = xenbus_read(NULL, xbusd->xbusd_path, "mode", mode, sizeof(mode));
786 if (err) { 786 if (err) {
787 printf("xbdback: failed to read %s/mode: %d\n", 787 printf("xbdback: failed to read %s/mode: %d\n",
788 xbusd->xbusd_path, err); 788 xbusd->xbusd_path, err);
789 return; 789 return;
790 } 790 }
791 if (mode[0] == 'w') 791 if (mode[0] == 'w')
792 xbdi->xbdi_ro = false; 792 xbdi->xbdi_ro = false;
793 else 793 else
794 xbdi->xbdi_ro = true; 794 xbdi->xbdi_ro = true;
795 major = major(xbdi->xbdi_dev); 795 major = major(xbdi->xbdi_dev);
796 devname = devsw_blk2name(major); 796 devname = devsw_blk2name(major);
797 if (devname == NULL) { 797 if (devname == NULL) {
798 printf("xbdback %s: unknown device 0x%"PRIx64"\n", 798 printf("xbdback %s: unknown device 0x%"PRIx64"\n",
799 xbusd->xbusd_path, xbdi->xbdi_dev); 799 xbusd->xbusd_path, xbdi->xbdi_dev);
800 return; 800 return;
801 } 801 }
802 xbdi->xbdi_bdevsw = bdevsw_lookup(xbdi->xbdi_dev); 802 xbdi->xbdi_bdevsw = bdevsw_lookup(xbdi->xbdi_dev);
803 if (xbdi->xbdi_bdevsw == NULL) { 803 if (xbdi->xbdi_bdevsw == NULL) {
804 printf("xbdback %s: no bdevsw for device 0x%"PRIx64"\n", 804 printf("xbdback %s: no bdevsw for device 0x%"PRIx64"\n",
805 xbusd->xbusd_path, xbdi->xbdi_dev); 805 xbusd->xbusd_path, xbdi->xbdi_dev);
806 return; 806 return;
807 } 807 }
808 err = bdevvp(xbdi->xbdi_dev, &xbdi->xbdi_vp); 808 err = bdevvp(xbdi->xbdi_dev, &xbdi->xbdi_vp);
809 if (err) { 809 if (err) {
810 printf("xbdback %s: can't open device 0x%"PRIx64": %d\n", 810 printf("xbdback %s: can't open device 0x%"PRIx64": %d\n",
811 xbusd->xbusd_path, xbdi->xbdi_dev, err); 811 xbusd->xbusd_path, xbdi->xbdi_dev, err);
812 return; 812 return;
813 } 813 }
814 err = vn_lock(xbdi->xbdi_vp, LK_EXCLUSIVE | LK_RETRY); 814 err = vn_lock(xbdi->xbdi_vp, LK_EXCLUSIVE | LK_RETRY);
815 if (err) { 815 if (err) {
816 printf("xbdback %s: can't vn_lock device 0x%"PRIx64": %d\n", 816 printf("xbdback %s: can't vn_lock device 0x%"PRIx64": %d\n",
817 xbusd->xbusd_path, xbdi->xbdi_dev, err); 817 xbusd->xbusd_path, xbdi->xbdi_dev, err);
818 vrele(xbdi->xbdi_vp); 818 vrele(xbdi->xbdi_vp);
819 return; 819 return;
820 } 820 }
821 err = VOP_OPEN(xbdi->xbdi_vp, FREAD, NOCRED); 821 err = VOP_OPEN(xbdi->xbdi_vp, FREAD, NOCRED);
822 if (err) { 822 if (err) {
823 printf("xbdback %s: can't VOP_OPEN device 0x%"PRIx64": %d\n", 823 printf("xbdback %s: can't VOP_OPEN device 0x%"PRIx64": %d\n",
824 xbusd->xbusd_path, xbdi->xbdi_dev, err); 824 xbusd->xbusd_path, xbdi->xbdi_dev, err);
825 vput(xbdi->xbdi_vp); 825 vput(xbdi->xbdi_vp);
826 return; 826 return;
827 } 827 }
828 VOP_UNLOCK(xbdi->xbdi_vp); 828 VOP_UNLOCK(xbdi->xbdi_vp);
829 829
830 /* dk device; get wedge data */ 830 /* dk device; get wedge data */
831 struct dkwedge_info wi; 831 struct dkwedge_info wi;
832 if ((err = getdiskinfo(xbdi->xbdi_vp, &wi)) == 0) { 832 if ((err = getdiskinfo(xbdi->xbdi_vp, &wi)) == 0) {
833 xbdi->xbdi_size = wi.dkw_size; 833 xbdi->xbdi_size = wi.dkw_size;
834 printf("xbd backend: attach device %s (size %" PRIu64 ") " 834 printf("xbd backend: attach device %s (size %" PRIu64 ") "
835 "for domain %d\n", wi.dkw_devname, xbdi->xbdi_size, 835 "for domain %d\n", wi.dkw_devname, xbdi->xbdi_size,
836 xbdi->xbdi_domid); 836 xbdi->xbdi_domid);
837 } else { 837 } else {
838 /* If both Ioctls failed set device size to 0 and return */ 838 /* If both Ioctls failed set device size to 0 and return */
839 printf("xbdback %s: can't DIOCGWEDGEINFO device " 839 printf("xbdback %s: can't DIOCGWEDGEINFO device "
840 "0x%"PRIx64": %d\n", xbusd->xbusd_path, 840 "0x%"PRIx64": %d\n", xbusd->xbusd_path,
841 xbdi->xbdi_dev, err);  841 xbdi->xbdi_dev, err);
842 xbdi->xbdi_size = xbdi->xbdi_dev = 0; 842 xbdi->xbdi_size = xbdi->xbdi_dev = 0;
843 vn_close(xbdi->xbdi_vp, FREAD, NOCRED); 843 vn_close(xbdi->xbdi_vp, FREAD, NOCRED);
844 xbdi->xbdi_vp = NULL; 844 xbdi->xbdi_vp = NULL;
845 return; 845 return;
846 } 846 }
847again: 847again:
848 xbt = xenbus_transaction_start(); 848 xbt = xenbus_transaction_start();
849 if (xbt == NULL) { 849 if (xbt == NULL) {
850 printf("xbdback %s: can't start transaction\n", 850 printf("xbdback %s: can't start transaction\n",
851 xbusd->xbusd_path); 851 xbusd->xbusd_path);
852 return; 852 return;
853 } 853 }
854 err = xenbus_printf(xbt, xbusd->xbusd_path, "sectors", "%" PRIu64 , 854 err = xenbus_printf(xbt, xbusd->xbusd_path, "sectors", "%" PRIu64 ,
855 xbdi->xbdi_size); 855 xbdi->xbdi_size);
856 if (err) { 856 if (err) {
857 printf("xbdback: failed to write %s/sectors: %d\n", 857 printf("xbdback: failed to write %s/sectors: %d\n",
858 xbusd->xbusd_path, err); 858 xbusd->xbusd_path, err);
859 goto abort; 859 goto abort;
860 } 860 }
861 err = xenbus_printf(xbt, xbusd->xbusd_path, "info", "%u", 861 err = xenbus_printf(xbt, xbusd->xbusd_path, "info", "%u",
862 xbdi->xbdi_ro ? VDISK_READONLY : 0); 862 xbdi->xbdi_ro ? VDISK_READONLY : 0);
863 if (err) { 863 if (err) {
864 printf("xbdback: failed to write %s/info: %d\n", 864 printf("xbdback: failed to write %s/info: %d\n",
865 xbusd->xbusd_path, err); 865 xbusd->xbusd_path, err);
866 goto abort; 866 goto abort;
867 } 867 }
868 err = xenbus_printf(xbt, xbusd->xbusd_path, "sector-size", "%lu", 868 err = xenbus_printf(xbt, xbusd->xbusd_path, "sector-size", "%lu",
869 (u_long)DEV_BSIZE); 869 (u_long)DEV_BSIZE);
870 if (err) { 870 if (err) {
871 printf("xbdback: failed to write %s/sector-size: %d\n", 871 printf("xbdback: failed to write %s/sector-size: %d\n",
872 xbusd->xbusd_path, err); 872 xbusd->xbusd_path, err);
873 goto abort; 873 goto abort;
874 } 874 }
875 err = xenbus_printf(xbt, xbusd->xbusd_path, "feature-flush-cache", 875 err = xenbus_printf(xbt, xbusd->xbusd_path, "feature-flush-cache",
876 "%u", 1); 876 "%u", 1);
877 if (err) { 877 if (err) {
878 printf("xbdback: failed to write %s/feature-flush-cache: %d\n", 878 printf("xbdback: failed to write %s/feature-flush-cache: %d\n",
879 xbusd->xbusd_path, err); 879 xbusd->xbusd_path, err);
880 goto abort; 880 goto abort;
881 } 881 }
882 err = xenbus_transaction_end(xbt, 0); 882 err = xenbus_transaction_end(xbt, 0);
883 if (err == EAGAIN) 883 if (err == EAGAIN)
884 goto again; 884 goto again;
885 if (err) { 885 if (err) {
886 printf("xbdback %s: can't end transaction: %d\n", 886 printf("xbdback %s: can't end transaction: %d\n",
887 xbusd->xbusd_path, err); 887 xbusd->xbusd_path, err);
888 } 888 }
889 err = xenbus_switch_state(xbusd, NULL, XenbusStateConnected); 889 err = xenbus_switch_state(xbusd, NULL, XenbusStateConnected);
890 if (err) { 890 if (err) {
891 printf("xbdback %s: can't switch state: %d\n", 891 printf("xbdback %s: can't switch state: %d\n",
892 xbusd->xbusd_path, err); 892 xbusd->xbusd_path, err);
893 } 893 }
894 return; 894 return;
895abort: 895abort:
896 xenbus_transaction_end(xbt, 1); 896 xenbus_transaction_end(xbt, 1);
897} 897}
898 898
899/* 899/*
900 * Used by a xbdi thread to signal that it is now disconnected. 900 * Used by a xbdi thread to signal that it is now disconnected.
901 */ 901 */
902static void 902static void
903xbdback_finish_disconnect(struct xbdback_instance *xbdi) 903xbdback_finish_disconnect(struct xbdback_instance *xbdi)
904{ 904{
905 KASSERT(mutex_owned(&xbdi->xbdi_lock)); 905 KASSERT(mutex_owned(&xbdi->xbdi_lock));
906 KASSERT(xbdi->xbdi_status == DISCONNECTING); 906 KASSERT(xbdi->xbdi_status == DISCONNECTING);
907 907
908 xbdi->xbdi_status = DISCONNECTED; 908 xbdi->xbdi_status = DISCONNECTED;
909 909
910 cv_signal(&xbdi->xbdi_cv); 910 cv_signal(&xbdi->xbdi_cv);
911} 911}
912 912
913static bool 913static bool
914xbdif_lookup(domid_t dom , uint32_t handle) 914xbdif_lookup(domid_t dom , uint32_t handle)
915{ 915{
916 struct xbdback_instance *xbdi; 916 struct xbdback_instance *xbdi;
917 bool found = false; 917 bool found = false;
918 918
919 mutex_enter(&xbdback_lock); 919 mutex_enter(&xbdback_lock);
920 SLIST_FOREACH(xbdi, &xbdback_instances, next) { 920 SLIST_FOREACH(xbdi, &xbdback_instances, next) {
921 if (xbdi->xbdi_domid == dom && xbdi->xbdi_handle == handle) { 921 if (xbdi->xbdi_domid == dom && xbdi->xbdi_handle == handle) {
922 found = true; 922 found = true;
923 break; 923 break;
924 } 924 }
925 } 925 }
926 mutex_exit(&xbdback_lock); 926 mutex_exit(&xbdback_lock);
927 927
928 return found; 928 return found;
929} 929}
930 930
931static int 931static int
932xbdback_evthandler(void *arg) 932xbdback_evthandler(void *arg)
933{ 933{
934 struct xbdback_instance *xbdi = arg; 934 struct xbdback_instance *xbdi = arg;
935 935
936 XENPRINTF(("xbdback_evthandler domain %d: cont %p\n", 936 XENPRINTF(("xbdback_evthandler domain %d: cont %p\n",
937 xbdi->xbdi_domid, xbdi->xbdi_cont)); 937 xbdi->xbdi_domid, xbdi->xbdi_cont));
938 938
939 xbdback_wakeup_thread(xbdi); 939 xbdback_wakeup_thread(xbdi);
940 940
941 return 1; 941 return 1;
942} 942}
943 943
944/* 944/*
945 * Main thread routine for one xbdback instance. Woken up by 945 * Main thread routine for one xbdback instance. Woken up by
946 * xbdback_evthandler when a domain has I/O work scheduled in a I/O ring. 946 * xbdback_evthandler when a domain has I/O work scheduled in a I/O ring.
947 */ 947 */
948static void 948static void
949xbdback_thread(void *arg) 949xbdback_thread(void *arg)
950{ 950{
951 struct xbdback_instance *xbdi = arg; 951 struct xbdback_instance *xbdi = arg;
952 952
953 for (;;) { 953 for (;;) {
954 mutex_enter(&xbdi->xbdi_lock); 954 mutex_enter(&xbdi->xbdi_lock);
955 switch (xbdi->xbdi_status) { 955 switch (xbdi->xbdi_status) {
956 case WAITING: 956 case WAITING:
957 cv_wait(&xbdi->xbdi_cv, &xbdi->xbdi_lock); 957 cv_wait(&xbdi->xbdi_cv, &xbdi->xbdi_lock);
958 mutex_exit(&xbdi->xbdi_lock); 958 mutex_exit(&xbdi->xbdi_lock);
959 break; 959 break;
960 case RUN: 960 case RUN:
961 xbdi->xbdi_status = WAITING; /* reset state */ 961 xbdi->xbdi_status = WAITING; /* reset state */
962 mutex_exit(&xbdi->xbdi_lock); 962 mutex_exit(&xbdi->xbdi_lock);
963 963
964 if (xbdi->xbdi_cont == NULL) { 964 if (xbdi->xbdi_cont == NULL) {
965 xbdi->xbdi_cont = xbdback_co_main; 965 xbdi->xbdi_cont = xbdback_co_main;
966 } 966 }
967 967
968 xbdback_trampoline(xbdi, xbdi); 968 xbdback_trampoline(xbdi, xbdi);
969 break; 969 break;
970 case DISCONNECTING: 970 case DISCONNECTING:
971 if (xbdi->xbdi_pendingreqs > 0) { 971 if (xbdi->xbdi_pendingreqs > 0) {
972 /* there are pending I/Os. Wait for them. */ 972 /* there are pending I/Os. Wait for them. */
973 cv_wait(&xbdi->xbdi_cv, &xbdi->xbdi_lock); 973 cv_wait(&xbdi->xbdi_cv, &xbdi->xbdi_lock);
974 mutex_exit(&xbdi->xbdi_lock); 974 mutex_exit(&xbdi->xbdi_lock);
975 break; 975 break;
976 } 976 }
977  977
978 /* All I/Os should have been processed by now, 978 /* All I/Os should have been processed by now,
979 * xbdi_refcnt should drop to 0 */ 979 * xbdi_refcnt should drop to 0 */
980 xbdi_put(xbdi); 980 xbdi_put(xbdi);
981 KASSERT(xbdi->xbdi_refcnt == 0); 981 KASSERT(xbdi->xbdi_refcnt == 0);
982 mutex_exit(&xbdi->xbdi_lock); 982 mutex_exit(&xbdi->xbdi_lock);
983 kthread_exit(0); 983 kthread_exit(0);
984 break; 984 break;
985 default: 985 default:
986 panic("%s: invalid state %d", 986 panic("%s: invalid state %d",
987 xbdi->xbdi_name, xbdi->xbdi_status); 987 xbdi->xbdi_name, xbdi->xbdi_status);
988 } 988 }
989 } 989 }
990} 990}
991 991
992static void * 992static void *
993xbdback_co_main(struct xbdback_instance *xbdi, void *obj) 993xbdback_co_main(struct xbdback_instance *xbdi, void *obj)
994{ 994{
995 (void)obj; 995 (void)obj;
996 996
997 xbdi->xbdi_req_prod = xbdi->xbdi_ring.ring_n.sring->req_prod; 997 xbdi->xbdi_req_prod = xbdi->xbdi_ring.ring_n.sring->req_prod;
998 xen_rmb(); /* ensure we see all requests up to req_prod */ 998 xen_rmb(); /* ensure we see all requests up to req_prod */
999 /* 999 /*
1000 * note that we'll eventually get a full ring of request. 1000 * note that we'll eventually get a full ring of request.
1001 * in this case, MASK_BLKIF_IDX(req_cons) == MASK_BLKIF_IDX(req_prod) 1001 * in this case, MASK_BLKIF_IDX(req_cons) == MASK_BLKIF_IDX(req_prod)
1002 */ 1002 */
1003 xbdi->xbdi_cont = xbdback_co_main_loop; 1003 xbdi->xbdi_cont = xbdback_co_main_loop;
1004 return xbdi; 1004 return xbdi;
1005} 1005}
1006 1006
1007/* 1007/*
1008 * Fetch a blkif request from the ring, and pass control to the appropriate 1008 * Fetch a blkif request from the ring, and pass control to the appropriate
1009 * continuation. 1009 * continuation.
1010 * If someone asked for disconnection, do not fetch any more request from 1010 * If someone asked for disconnection, do not fetch any more request from
1011 * the ring. 1011 * the ring.
1012 */ 1012 */
1013static void * 1013static void *
1014xbdback_co_main_loop(struct xbdback_instance *xbdi, void *obj)  1014xbdback_co_main_loop(struct xbdback_instance *xbdi, void *obj)
1015{ 1015{
1016 blkif_request_t *req; 1016 blkif_request_t *req;
1017 blkif_x86_32_request_t *req32; 1017 blkif_x86_32_request_t *req32;
1018 blkif_x86_64_request_t *req64; 1018 blkif_x86_64_request_t *req64;
1019 1019
1020 (void)obj; 1020 (void)obj;
1021 req = &xbdi->xbdi_xen_req; 1021 req = &xbdi->xbdi_xen_req;
1022 if (xbdi->xbdi_ring.ring_n.req_cons != xbdi->xbdi_req_prod) { 1022 if (xbdi->xbdi_ring.ring_n.req_cons != xbdi->xbdi_req_prod) {
1023 switch(xbdi->xbdi_proto) { 1023 switch(xbdi->xbdi_proto) {
1024 case XBDIP_NATIVE: 1024 case XBDIP_NATIVE:
1025 memcpy(req, RING_GET_REQUEST(&xbdi->xbdi_ring.ring_n, 1025 memcpy(req, RING_GET_REQUEST(&xbdi->xbdi_ring.ring_n,
1026 xbdi->xbdi_ring.ring_n.req_cons), 1026 xbdi->xbdi_ring.ring_n.req_cons),
1027 sizeof(blkif_request_t)); 1027 sizeof(blkif_request_t));
1028 break; 1028 break;
1029 case XBDIP_32: 1029 case XBDIP_32:
1030 req32 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_32, 1030 req32 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_32,
1031 xbdi->xbdi_ring.ring_n.req_cons); 1031 xbdi->xbdi_ring.ring_n.req_cons);
1032 req->operation = req32->operation; 1032 req->operation = req32->operation;
1033 req->nr_segments = req32->nr_segments; 1033 req->nr_segments = req32->nr_segments;
1034 req->handle = req32->handle; 1034 req->handle = req32->handle;
1035 req->id = req32->id; 1035 req->id = req32->id;
1036 req->sector_number = req32->sector_number; 1036 req->sector_number = req32->sector_number;
1037 break; 1037 break;
1038  1038
1039 case XBDIP_64: 1039 case XBDIP_64:
1040 req64 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_64, 1040 req64 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_64,
1041 xbdi->xbdi_ring.ring_n.req_cons); 1041 xbdi->xbdi_ring.ring_n.req_cons);
1042 req->operation = req64->operation; 1042 req->operation = req64->operation;
1043 req->nr_segments = req64->nr_segments; 1043 req->nr_segments = req64->nr_segments;
1044 req->handle = req64->handle; 1044 req->handle = req64->handle;
1045 req->id = req64->id; 1045 req->id = req64->id;
1046 req->sector_number = req64->sector_number; 1046 req->sector_number = req64->sector_number;
1047 break; 1047 break;
1048 } 1048 }
1049 __insn_barrier(); 1049 __insn_barrier();
1050 XENPRINTF(("xbdback op %d req_cons 0x%x req_prod 0x%x " 1050 XENPRINTF(("xbdback op %d req_cons 0x%x req_prod 0x%x "
1051 "resp_prod 0x%x id %" PRIu64 "\n", req->operation, 1051 "resp_prod 0x%x id %" PRIu64 "\n", req->operation,
1052 xbdi->xbdi_ring.ring_n.req_cons, 1052 xbdi->xbdi_ring.ring_n.req_cons,
1053 xbdi->xbdi_req_prod, 1053 xbdi->xbdi_req_prod,
1054 xbdi->xbdi_ring.ring_n.rsp_prod_pvt, 1054 xbdi->xbdi_ring.ring_n.rsp_prod_pvt,
1055 req->id)); 1055 req->id));
1056 switch(req->operation) { 1056 switch(req->operation) {
1057 case BLKIF_OP_READ: 1057 case BLKIF_OP_READ:
1058 case BLKIF_OP_WRITE: 1058 case BLKIF_OP_WRITE:
1059 xbdi->xbdi_cont = xbdback_co_io; 1059 xbdi->xbdi_cont = xbdback_co_io;
1060 break; 1060 break;
1061 case BLKIF_OP_FLUSH_DISKCACHE: 1061 case BLKIF_OP_FLUSH_DISKCACHE:
1062 xbdi_get(xbdi); 1062 xbdi_get(xbdi);
1063 xbdi->xbdi_cont = xbdback_co_cache_flush; 1063 xbdi->xbdi_cont = xbdback_co_cache_flush;
1064 break; 1064 break;
1065 default: 1065 default:
1066 if (ratecheck(&xbdi->xbdi_lasterr_time, 1066 if (ratecheck(&xbdi->xbdi_lasterr_time,
1067 &xbdback_err_intvl)) { 1067 &xbdback_err_intvl)) {
1068 printf("%s: unknown operation %d\n", 1068 printf("%s: unknown operation %d\n",
1069 xbdi->xbdi_name, req->operation); 1069 xbdi->xbdi_name, req->operation);
1070 } 1070 }
1071 xbdback_send_reply(xbdi, req->id, req->operation, 1071 xbdback_send_reply(xbdi, req->id, req->operation,
1072 BLKIF_RSP_ERROR); 1072 BLKIF_RSP_ERROR);
1073 xbdi->xbdi_cont = xbdback_co_main_incr; 1073 xbdi->xbdi_cont = xbdback_co_main_incr;
1074 break; 1074 break;
1075 } 1075 }
1076 } else { 1076 } else {
1077 xbdi->xbdi_cont = xbdback_co_main_done; 1077 xbdi->xbdi_cont = xbdback_co_main_done;
1078 } 1078 }
1079 return xbdi; 1079 return xbdi;
1080} 1080}
1081 1081
1082/* 1082/*
1083 * Increment consumer index and move on to the next request. In case 1083 * Increment consumer index and move on to the next request. In case
1084 * we want to disconnect, leave continuation now. 1084 * we want to disconnect, leave continuation now.
1085 */ 1085 */
1086static void * 1086static void *
1087xbdback_co_main_incr(struct xbdback_instance *xbdi, void *obj) 1087xbdback_co_main_incr(struct xbdback_instance *xbdi, void *obj)
1088{ 1088{
1089 (void)obj; 1089 (void)obj;
1090 blkif_back_ring_t *ring = &xbdi->xbdi_ring.ring_n; 1090 blkif_back_ring_t *ring = &xbdi->xbdi_ring.ring_n;
1091 1091
1092 ring->req_cons++; 1092 ring->req_cons++;
1093 1093
1094 /* 1094 /*
1095 * Do not bother with locking here when checking for xbdi_status: if 1095 * Do not bother with locking here when checking for xbdi_status: if
1096 * we get a transient state, we will get the right value at 1096 * we get a transient state, we will get the right value at
1097 * the next increment. 1097 * the next increment.
1098 */ 1098 */
1099 if (xbdi->xbdi_status == DISCONNECTING) 1099 if (xbdi->xbdi_status == DISCONNECTING)
1100 xbdi->xbdi_cont = NULL; 1100 xbdi->xbdi_cont = NULL;
1101 else 1101 else
1102 xbdi->xbdi_cont = xbdback_co_main_loop; 1102 xbdi->xbdi_cont = xbdback_co_main_loop;
1103 1103
1104 /* 1104 /*
1105 * Each time the thread processes a full ring of requests, give 1105 * Each time the thread processes a full ring of requests, give
1106 * a chance to other threads to process I/Os too 1106 * a chance to other threads to process I/Os too
1107 */ 1107 */
1108 if ((ring->req_cons % BLKIF_RING_SIZE) == 0) 1108 if ((ring->req_cons % BLKIF_RING_SIZE) == 0)
1109 yield(); 1109 yield();
1110 1110
1111 return xbdi; 1111 return xbdi;
1112} 1112}
1113 1113
1114/* 1114/*
1115 * Ring processing is over. If there are any I/O still present for this 1115 * Ring processing is over. If there are any I/O still present for this
1116 * instance, handle them first. 1116 * instance, handle them first.
1117 */ 1117 */
1118static void * 1118static void *
1119xbdback_co_main_done(struct xbdback_instance *xbdi, void *obj) 1119xbdback_co_main_done(struct xbdback_instance *xbdi, void *obj)
1120{ 1120{
1121 (void)obj; 1121 (void)obj;
1122 if (xbdi->xbdi_io != NULL) { 1122 if (xbdi->xbdi_io != NULL) {
1123 KASSERT(xbdi->xbdi_io->xio_operation == BLKIF_OP_READ || 1123 KASSERT(xbdi->xbdi_io->xio_operation == BLKIF_OP_READ ||
1124 xbdi->xbdi_io->xio_operation == BLKIF_OP_WRITE); 1124 xbdi->xbdi_io->xio_operation == BLKIF_OP_WRITE);
1125 xbdi->xbdi_cont = xbdback_co_map_io; 1125 xbdi->xbdi_cont = xbdback_co_map_io;
1126 xbdi->xbdi_cont_aux = xbdback_co_main_done2; 1126 xbdi->xbdi_cont_aux = xbdback_co_main_done2;
1127 } else { 1127 } else {
1128 xbdi->xbdi_cont = xbdback_co_main_done2; 1128 xbdi->xbdi_cont = xbdback_co_main_done2;
1129 } 1129 }
1130 return xbdi; 1130 return xbdi;
1131} 1131}
1132 1132
1133/* 1133/*
1134 * Check for requests in the instance's ring. In case there are, start again 1134 * Check for requests in the instance's ring. In case there are, start again
1135 * from the beginning. If not, stall. 1135 * from the beginning. If not, stall.
1136 */ 1136 */
1137static void * 1137static void *
1138xbdback_co_main_done2(struct xbdback_instance *xbdi, void *obj) 1138xbdback_co_main_done2(struct xbdback_instance *xbdi, void *obj)
1139{ 1139{
1140 int work_to_do; 1140 int work_to_do;
1141 1141
1142 RING_FINAL_CHECK_FOR_REQUESTS(&xbdi->xbdi_ring.ring_n, work_to_do); 1142 RING_FINAL_CHECK_FOR_REQUESTS(&xbdi->xbdi_ring.ring_n, work_to_do);
1143 if (work_to_do) 1143 if (work_to_do)
1144 xbdi->xbdi_cont = xbdback_co_main; 1144 xbdi->xbdi_cont = xbdback_co_main;
1145 else 1145 else
1146 xbdi->xbdi_cont = NULL; 1146 xbdi->xbdi_cont = NULL;
1147 1147
1148 return xbdi; 1148 return xbdi;
1149} 1149}
1150 1150
1151/* 1151/*
1152 * Frontend requested a cache flush operation. 1152 * Frontend requested a cache flush operation.
1153 */ 1153 */
1154static void * 1154static void *
1155xbdback_co_cache_flush(struct xbdback_instance *xbdi, void *obj) 1155xbdback_co_cache_flush(struct xbdback_instance *xbdi, void *obj)
1156{ 1156{
1157 (void)obj; 1157 (void)obj;
1158 1158
1159 XENPRINTF(("xbdback_co_cache_flush %p %p\n", xbdi, obj)); 1159 XENPRINTF(("xbdback_co_cache_flush %p %p\n", xbdi, obj));
1160 if (xbdi->xbdi_io != NULL) { 1160 if (xbdi->xbdi_io != NULL) {
1161 /* Some I/Os are required for this instance. Process them. */ 1161 /* Some I/Os are required for this instance. Process them. */
1162 KASSERT(xbdi->xbdi_io->xio_operation == BLKIF_OP_READ || 1162 KASSERT(xbdi->xbdi_io->xio_operation == BLKIF_OP_READ ||
1163 xbdi->xbdi_io->xio_operation == BLKIF_OP_WRITE); 1163 xbdi->xbdi_io->xio_operation == BLKIF_OP_WRITE);
1164 KASSERT(xbdi->xbdi_pendingreqs > 0); 1164 KASSERT(xbdi->xbdi_pendingreqs > 0);
1165 xbdi->xbdi_cont = xbdback_co_map_io; 1165 xbdi->xbdi_cont = xbdback_co_map_io;
1166 xbdi->xbdi_cont_aux = xbdback_co_cache_flush2; 1166 xbdi->xbdi_cont_aux = xbdback_co_cache_flush2;
1167 } else { 1167 } else {
1168 xbdi->xbdi_cont = xbdback_co_cache_flush2; 1168 xbdi->xbdi_cont = xbdback_co_cache_flush2;
1169 } 1169 }
1170 return xbdi; 1170 return xbdi;
1171} 1171}
1172 1172
1173static void * 1173static void *
1174xbdback_co_cache_flush2(struct xbdback_instance *xbdi, void *obj) 1174xbdback_co_cache_flush2(struct xbdback_instance *xbdi, void *obj)
1175{ 1175{
1176 (void)obj; 1176 (void)obj;
1177 XENPRINTF(("xbdback_co_cache_flush2 %p %p\n", xbdi, obj)); 1177 XENPRINTF(("xbdback_co_cache_flush2 %p %p\n", xbdi, obj));
1178 if (xbdi->xbdi_pendingreqs > 0) { 1178 if (xbdi->xbdi_pendingreqs > 0) {
1179 /* 1179 /*
1180 * There are pending requests. 1180 * There are pending requests.
1181 * Event or iodone() will restart processing 1181 * Event or iodone() will restart processing
1182 */ 1182 */
1183 xbdi->xbdi_cont = NULL; 1183 xbdi->xbdi_cont = NULL;
1184 xbdi_put(xbdi); 1184 xbdi_put(xbdi);
1185 return NULL; 1185 return NULL;
1186 } 1186 }
1187 xbdi->xbdi_cont = xbdback_co_cache_doflush; 1187 xbdi->xbdi_cont = xbdback_co_cache_doflush;
1188 return xbdback_pool_get(&xbdback_io_pool, xbdi); 1188 return xbdback_pool_get(&xbdback_io_pool, xbdi);
1189} 1189}
1190 1190
1191/* Start the flush work */ 1191/* Start the flush work */
1192static void * 1192static void *
1193xbdback_co_cache_doflush(struct xbdback_instance *xbdi, void *obj) 1193xbdback_co_cache_doflush(struct xbdback_instance *xbdi, void *obj)
1194{ 1194{
1195 struct xbdback_io *xbd_io; 1195 struct xbdback_io *xbd_io;
1196 1196
1197 XENPRINTF(("xbdback_co_cache_doflush %p %p\n", xbdi, obj)); 1197 XENPRINTF(("xbdback_co_cache_doflush %p %p\n", xbdi, obj));
1198 xbd_io = xbdi->xbdi_io = obj; 1198 xbd_io = xbdi->xbdi_io = obj;
1199 xbd_io->xio_xbdi = xbdi; 1199 xbd_io->xio_xbdi = xbdi;
1200 xbd_io->xio_operation = xbdi->xbdi_xen_req.operation; 1200 xbd_io->xio_operation = xbdi->xbdi_xen_req.operation;
1201 xbd_io->xio_flush_id = xbdi->xbdi_xen_req.id; 1201 xbd_io->xio_flush_id = xbdi->xbdi_xen_req.id;
1202 xbdi->xbdi_cont = xbdback_co_do_io; 1202 xbdi->xbdi_cont = xbdback_co_do_io;
1203 return xbdi; 1203 return xbdi;
1204} 1204}
1205 1205
1206/* 1206/*
1207 * A read or write I/O request must be processed. Do some checks first, 1207 * A read or write I/O request must be processed. Do some checks first,
1208 * then get the segment information directly from the ring request. 1208 * then get the segment information directly from the ring request.
1209 */ 1209 */
1210static void * 1210static void *
1211xbdback_co_io(struct xbdback_instance *xbdi, void *obj) 1211xbdback_co_io(struct xbdback_instance *xbdi, void *obj)
1212{  1212{
1213 int i, error; 1213 int i, error;
1214 blkif_request_t *req; 1214 blkif_request_t *req;
1215 blkif_x86_32_request_t *req32; 1215 blkif_x86_32_request_t *req32;
1216 blkif_x86_64_request_t *req64; 1216 blkif_x86_64_request_t *req64;
1217 1217
1218 (void)obj; 1218 (void)obj;
1219 1219
1220 /* some sanity checks */ 1220 /* some sanity checks */
1221 req = &xbdi->xbdi_xen_req; 1221 req = &xbdi->xbdi_xen_req;
1222 if (req->nr_segments < 1 || 1222 if (req->nr_segments < 1 ||
1223 req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST) { 1223 req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST) {
1224 if (ratecheck(&xbdi->xbdi_lasterr_time, 1224 if (ratecheck(&xbdi->xbdi_lasterr_time,
1225 &xbdback_err_intvl)) { 1225 &xbdback_err_intvl)) {
1226 printf("%s: invalid number of segments: %d\n", 1226 printf("%s: invalid number of segments: %d\n",
1227 xbdi->xbdi_name, 1227 xbdi->xbdi_name,
1228 xbdi->xbdi_xen_req.nr_segments); 1228 xbdi->xbdi_xen_req.nr_segments);
1229 } 1229 }
1230 error = EINVAL; 1230 error = EINVAL;
1231 goto end; 1231 goto end;
1232 } 1232 }
1233 1233
1234 KASSERT(req->operation == BLKIF_OP_READ || 1234 KASSERT(req->operation == BLKIF_OP_READ ||
1235 req->operation == BLKIF_OP_WRITE); 1235 req->operation == BLKIF_OP_WRITE);
1236 if (req->operation == BLKIF_OP_WRITE) { 1236 if (req->operation == BLKIF_OP_WRITE) {
1237 if (xbdi->xbdi_ro) { 1237 if (xbdi->xbdi_ro) {
1238 error = EROFS; 1238 error = EROFS;
1239 goto end; 1239 goto end;
1240 } 1240 }
1241 } 1241 }
1242 1242
1243 xbdi->xbdi_segno = 0; 1243 xbdi->xbdi_segno = 0;
1244 1244
1245 /* copy request segments */ 1245 /* copy request segments */
1246 switch(xbdi->xbdi_proto) { 1246 switch(xbdi->xbdi_proto) {
1247 case XBDIP_NATIVE: 1247 case XBDIP_NATIVE:
1248 /* already copied in xbdback_co_main_loop */ 1248 /* already copied in xbdback_co_main_loop */
1249 break; 1249 break;
1250 case XBDIP_32: 1250 case XBDIP_32:
1251 req32 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_32, 1251 req32 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_32,
1252 xbdi->xbdi_ring.ring_n.req_cons); 1252 xbdi->xbdi_ring.ring_n.req_cons);
1253 for (i = 0; i < req->nr_segments; i++) 1253 for (i = 0; i < req->nr_segments; i++)
1254 req->seg[i] = req32->seg[i]; 1254 req->seg[i] = req32->seg[i];
1255 break; 1255 break;
1256 case XBDIP_64: 1256 case XBDIP_64:
1257 req64 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_64, 1257 req64 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_64,
1258 xbdi->xbdi_ring.ring_n.req_cons); 1258 xbdi->xbdi_ring.ring_n.req_cons);
1259 for (i = 0; i < req->nr_segments; i++) 1259 for (i = 0; i < req->nr_segments; i++)
1260 req->seg[i] = req64->seg[i]; 1260 req->seg[i] = req64->seg[i];
1261 break; 1261 break;
1262 } 1262 }
1263 1263
1264 xbdi->xbdi_cont = xbdback_co_io_gotreq; 1264 xbdi->xbdi_cont = xbdback_co_io_gotreq;
1265 return xbdback_pool_get(&xbdback_request_pool, xbdi); 1265 return xbdback_pool_get(&xbdback_request_pool, xbdi);
1266 1266
1267 end: 1267 end:
1268 xbdback_send_reply(xbdi, xbdi->xbdi_xen_req.id, 1268 xbdback_send_reply(xbdi, xbdi->xbdi_xen_req.id,
1269 xbdi->xbdi_xen_req.operation, error); 1269 xbdi->xbdi_xen_req.operation, error);
1270 xbdi->xbdi_cont = xbdback_co_main_incr; 1270 xbdi->xbdi_cont = xbdback_co_main_incr;
1271 return xbdi; 1271 return xbdi;
1272} 1272}
1273 1273
1274/* 1274/*
1275 * We have fetched segment requests from the ring. In case there are already 1275 * We have fetched segment requests from the ring. In case there are already
1276 * I/Os prepared for this instance, we can try coalescing the requests 1276 * I/Os prepared for this instance, we can try coalescing the requests
1277 * with these I/Os. 1277 * with these I/Os.
1278 */ 1278 */
1279static void * 1279static void *
1280xbdback_co_io_gotreq(struct xbdback_instance *xbdi, void *obj) 1280xbdback_co_io_gotreq(struct xbdback_instance *xbdi, void *obj)
1281{ 1281{
1282 struct xbdback_request *xrq; 1282 struct xbdback_request *xrq;
1283 1283
1284 xrq = xbdi->xbdi_req = obj; 1284 xrq = xbdi->xbdi_req = obj;
1285  1285
1286 xrq->rq_xbdi = xbdi; 1286 xrq->rq_xbdi = xbdi;
1287 xrq->rq_iocount = 0; 1287 xrq->rq_iocount = 0;
1288 xrq->rq_ioerrs = 0; 1288 xrq->rq_ioerrs = 0;
1289 xrq->rq_id = xbdi->xbdi_xen_req.id; 1289 xrq->rq_id = xbdi->xbdi_xen_req.id;
1290 xrq->rq_operation = xbdi->xbdi_xen_req.operation; 1290 xrq->rq_operation = xbdi->xbdi_xen_req.operation;
1291 KASSERT(xbdi->xbdi_req->rq_operation == BLKIF_OP_READ || 1291 KASSERT(xbdi->xbdi_req->rq_operation == BLKIF_OP_READ ||
1292 xbdi->xbdi_req->rq_operation == BLKIF_OP_WRITE); 1292 xbdi->xbdi_req->rq_operation == BLKIF_OP_WRITE);
1293 1293
1294 /*  1294 /*
1295 * Request-level reasons not to coalesce: different device, 1295 * Request-level reasons not to coalesce: different device,
1296 * different op, or noncontiguous disk sectors (vs. previous 1296 * different op, or noncontiguous disk sectors (vs. previous
1297 * request handed to us). 1297 * request handed to us).
1298 */ 1298 */
1299 xbdi->xbdi_cont = xbdback_co_io_loop; 1299 xbdi->xbdi_cont = xbdback_co_io_loop;
1300 if (xbdi->xbdi_io != NULL) { 1300 if (xbdi->xbdi_io != NULL) {
1301 struct xbdback_request *last_req; 1301 struct xbdback_request *last_req;
1302 last_req = SLIST_FIRST(&xbdi->xbdi_io->xio_rq)->car; 1302 last_req = SLIST_FIRST(&xbdi->xbdi_io->xio_rq)->car;
1303 XENPRINTF(("xbdback_io domain %d: hoping for sector %" PRIu64 1303 XENPRINTF(("xbdback_io domain %d: hoping for sector %" PRIu64
1304 "; got %" PRIu64 "\n", xbdi->xbdi_domid, 1304 "; got %" PRIu64 "\n", xbdi->xbdi_domid,
1305 xbdi->xbdi_next_sector, 1305 xbdi->xbdi_next_sector,
1306 xbdi->xbdi_xen_req.sector_number)); 1306 xbdi->xbdi_xen_req.sector_number));
1307 if ((xrq->rq_operation != last_req->rq_operation) 1307 if ((xrq->rq_operation != last_req->rq_operation)
1308 || (xbdi->xbdi_xen_req.sector_number != 1308 || (xbdi->xbdi_xen_req.sector_number !=
1309 xbdi->xbdi_next_sector)) { 1309 xbdi->xbdi_next_sector)) {
1310 XENPRINTF(("xbdback_io domain %d: segment break\n", 1310 XENPRINTF(("xbdback_io domain %d: segment break\n",
1311 xbdi->xbdi_domid)); 1311 xbdi->xbdi_domid));
1312 xbdi->xbdi_next_sector = 1312 xbdi->xbdi_next_sector =
1313 xbdi->xbdi_xen_req.sector_number; 1313 xbdi->xbdi_xen_req.sector_number;
1314 KASSERT(xbdi->xbdi_io->xio_operation == BLKIF_OP_READ || 1314 KASSERT(xbdi->xbdi_io->xio_operation == BLKIF_OP_READ ||
1315 xbdi->xbdi_io->xio_operation == BLKIF_OP_WRITE); 1315 xbdi->xbdi_io->xio_operation == BLKIF_OP_WRITE);
1316 xbdi->xbdi_cont_aux = xbdback_co_io_loop; 1316 xbdi->xbdi_cont_aux = xbdback_co_io_loop;
1317 xbdi->xbdi_cont = xbdback_co_map_io; 1317 xbdi->xbdi_cont = xbdback_co_map_io;
1318 } 1318 }
1319 } else { 1319 } else {
1320 xbdi->xbdi_next_sector = xbdi->xbdi_xen_req.sector_number; 1320 xbdi->xbdi_next_sector = xbdi->xbdi_xen_req.sector_number;
1321 } 1321 }
1322 return xbdi; 1322 return xbdi;
1323} 1323}
1324 1324
1325/* Handle coalescing of multiple segment requests into one I/O work */ 1325/* Handle coalescing of multiple segment requests into one I/O work */
1326static void * 1326static void *
1327xbdback_co_io_loop(struct xbdback_instance *xbdi, void *obj) 1327xbdback_co_io_loop(struct xbdback_instance *xbdi, void *obj)
1328{ 1328{
1329 (void)obj; 1329 (void)obj;
1330 KASSERT(xbdi->xbdi_req->rq_operation == BLKIF_OP_READ || 1330 KASSERT(xbdi->xbdi_req->rq_operation == BLKIF_OP_READ ||
1331 xbdi->xbdi_req->rq_operation == BLKIF_OP_WRITE); 1331 xbdi->xbdi_req->rq_operation == BLKIF_OP_WRITE);
1332 if (xbdi->xbdi_segno < xbdi->xbdi_xen_req.nr_segments) { 1332 if (xbdi->xbdi_segno < xbdi->xbdi_xen_req.nr_segments) {
1333 uint8_t this_fs, this_ls, last_ls; 1333 uint8_t this_fs, this_ls, last_ls;
1334 grant_ref_t thisgrt; 1334 grant_ref_t thisgrt;
1335 /*  1335 /*
1336 * Segment-level reason to coalesce: handling full 1336 * Segment-level reason to coalesce: handling full
1337 * pages, or adjacent sector ranges from the same page 1337 * pages, or adjacent sector ranges from the same page
1338 * (and yes, this latter does happen). But not if the 1338 * (and yes, this latter does happen). But not if the
1339 * array of client pseudo-physical pages is full. 1339 * array of client pseudo-physical pages is full.
1340 */ 1340 */
1341 this_fs = xbdi->xbdi_xen_req.seg[xbdi->xbdi_segno].first_sect; 1341 this_fs = xbdi->xbdi_xen_req.seg[xbdi->xbdi_segno].first_sect;
1342 this_ls = xbdi->xbdi_xen_req.seg[xbdi->xbdi_segno].last_sect; 1342 this_ls = xbdi->xbdi_xen_req.seg[xbdi->xbdi_segno].last_sect;
1343 thisgrt = xbdi->xbdi_xen_req.seg[xbdi->xbdi_segno].gref; 1343 thisgrt = xbdi->xbdi_xen_req.seg[xbdi->xbdi_segno].gref;
1344 XENPRINTF(("xbdback_io domain %d: " 1344 XENPRINTF(("xbdback_io domain %d: "
1345 "first,last_sect[%d]=0%o,0%o\n", 1345 "first,last_sect[%d]=0%o,0%o\n",
1346 xbdi->xbdi_domid, xbdi->xbdi_segno, 1346 xbdi->xbdi_domid, xbdi->xbdi_segno,
1347 this_fs, this_ls)); 1347 this_fs, this_ls));
1348 last_ls = xbdi->xbdi_last_ls = xbdi->xbdi_this_ls; 1348 last_ls = xbdi->xbdi_last_ls = xbdi->xbdi_this_ls;
1349 xbdi->xbdi_this_fs = this_fs; 1349 xbdi->xbdi_this_fs = this_fs;
1350 xbdi->xbdi_this_ls = this_ls; 1350 xbdi->xbdi_this_ls = this_ls;
1351 xbdi->xbdi_thisgrt = thisgrt; 1351 xbdi->xbdi_thisgrt = thisgrt;
1352 if (xbdi->xbdi_io != NULL) { 1352 if (xbdi->xbdi_io != NULL) {
1353 if (last_ls == VBD_MAXSECT 1353 if (last_ls == VBD_MAXSECT
1354 && this_fs == 0 1354 && this_fs == 0
1355 && xbdi->xbdi_io->xio_nrma 1355 && xbdi->xbdi_io->xio_nrma
1356 < XENSHM_MAX_PAGES_PER_REQUEST) { 1356 < XENSHM_MAX_PAGES_PER_REQUEST) {
1357 xbdi->xbdi_same_page = 0; 1357 xbdi->xbdi_same_page = 0;
1358 } else if (last_ls + 1 1358 } else if (last_ls + 1
1359 == this_fs 1359 == this_fs
1360#ifdef notyet 1360#ifdef notyet
1361 && (last_fas & ~PAGE_MASK) 1361 && (last_fas & ~PAGE_MASK)
1362 == (this_fas & ~PAGE_MASK) 1362 == (this_fas & ~PAGE_MASK)
1363#else  1363#else
1364 && 0 /* can't know frame number yet */ 1364 && 0 /* can't know frame number yet */
1365#endif 1365#endif
1366 ) { 1366 ) {
1367#ifdef DEBUG 1367#ifdef DEBUG
1368 if (ratecheck(&xbdi->xbdi_lastfragio_time, 1368 if (ratecheck(&xbdi->xbdi_lastfragio_time,
1369 &xbdback_fragio_intvl)) 1369 &xbdback_fragio_intvl))
1370 printf("%s: domain is sending" 1370 printf("%s: domain is sending"
1371 " excessively fragmented I/O\n", 1371 " excessively fragmented I/O\n",
1372 xbdi->xbdi_name); 1372 xbdi->xbdi_name);
1373#endif 1373#endif
1374 printf("xbdback_io: would maybe glue " 1374 printf("xbdback_io: would maybe glue "
1375 "same page sec %d (%d->%d)\n", 1375 "same page sec %d (%d->%d)\n",
1376 xbdi->xbdi_segno, this_fs, this_ls); 1376 xbdi->xbdi_segno, this_fs, this_ls);
1377 XENPRINTF(("xbdback_io domain %d: glue same " 1377 XENPRINTF(("xbdback_io domain %d: glue same "
1378 "page", xbdi->xbdi_domid)); 1378 "page", xbdi->xbdi_domid));
1379 panic("notyet!"); 1379 panic("notyet!");
1380 xbdi->xbdi_same_page = 1; 1380 xbdi->xbdi_same_page = 1;
1381 } else { 1381 } else {
1382 KASSERT(xbdi->xbdi_io->xio_operation == 1382 KASSERT(xbdi->xbdi_io->xio_operation ==
1383 BLKIF_OP_READ || 1383 BLKIF_OP_READ ||
1384 xbdi->xbdi_io->xio_operation == 1384 xbdi->xbdi_io->xio_operation ==
1385 BLKIF_OP_WRITE); 1385 BLKIF_OP_WRITE);
1386 xbdi->xbdi_cont_aux = xbdback_co_io_loop; 1386 xbdi->xbdi_cont_aux = xbdback_co_io_loop;
1387 xbdi->xbdi_cont = xbdback_co_map_io; 1387 xbdi->xbdi_cont = xbdback_co_map_io;
1388 return xbdi; 1388 return xbdi;
1389 } 1389 }
1390 } else 1390 } else
1391 xbdi->xbdi_same_page = 0; 1391 xbdi->xbdi_same_page = 0;
1392 1392
1393 if (xbdi->xbdi_io == NULL) { 1393 if (xbdi->xbdi_io == NULL) {
1394 xbdi->xbdi_cont = xbdback_co_io_gotio; 1394 xbdi->xbdi_cont = xbdback_co_io_gotio;
1395 return xbdback_pool_get(&xbdback_io_pool, xbdi); 1395 return xbdback_pool_get(&xbdback_io_pool, xbdi);
1396 } else { 1396 } else {
1397 xbdi->xbdi_cont = xbdback_co_io_gotio2; 1397 xbdi->xbdi_cont = xbdback_co_io_gotio2;
1398 } 1398 }
1399 } else { 1399 } else {
1400 /* done with the loop over segments; get next request */ 1400 /* done with the loop over segments; get next request */
1401 xbdi->xbdi_cont = xbdback_co_main_incr; 1401 xbdi->xbdi_cont = xbdback_co_main_incr;
1402 } 1402 }
1403 return xbdi; 1403 return xbdi;
1404} 1404}
1405 1405
1406/* Prepare an I/O buffer for a xbdback instance */ 1406/* Prepare an I/O buffer for a xbdback instance */
1407static void * 1407static void *
1408xbdback_co_io_gotio(struct xbdback_instance *xbdi, void *obj) 1408xbdback_co_io_gotio(struct xbdback_instance *xbdi, void *obj)
1409{ 1409{
1410 struct xbdback_io *xbd_io; 1410 struct xbdback_io *xbd_io;
1411 vaddr_t start_offset; /* start offset in vm area */ 1411 vaddr_t start_offset; /* start offset in vm area */
1412 int buf_flags; 1412 int buf_flags;
1413 1413
1414 xbdi_get(xbdi); 1414 xbdi_get(xbdi);
1415 atomic_inc_uint(&xbdi->xbdi_pendingreqs); 1415 atomic_inc_uint(&xbdi->xbdi_pendingreqs);
1416  1416
1417 xbd_io = xbdi->xbdi_io = obj; 1417 xbd_io = xbdi->xbdi_io = obj;
1418 buf_init(&xbd_io->xio_buf); 1418 buf_init(&xbd_io->xio_buf);
1419 xbd_io->xio_xbdi = xbdi; 1419 xbd_io->xio_xbdi = xbdi;
1420 SLIST_INIT(&xbd_io->xio_rq); 1420 SLIST_INIT(&xbd_io->xio_rq);
1421 xbd_io->xio_nrma = 0; 1421 xbd_io->xio_nrma = 0;
1422 xbd_io->xio_mapped = 0; 1422 xbd_io->xio_mapped = 0;
1423 xbd_io->xio_operation = xbdi->xbdi_xen_req.operation; 1423 xbd_io->xio_operation = xbdi->xbdi_xen_req.operation;
1424 1424
1425 start_offset = xbdi->xbdi_this_fs * VBD_BSIZE; 1425 start_offset = xbdi->xbdi_this_fs * VBD_BSIZE;
1426 KASSERT(start_offset < PAGE_SIZE); 1426 KASSERT(start_offset < PAGE_SIZE);
1427 1427
1428 if (xbdi->xbdi_xen_req.operation == BLKIF_OP_WRITE) { 1428 if (xbdi->xbdi_xen_req.operation == BLKIF_OP_WRITE) {
1429 buf_flags = B_WRITE; 1429 buf_flags = B_WRITE;
1430 } else { 1430 } else {
1431 buf_flags = B_READ; 1431 buf_flags = B_READ;
1432 } 1432 }
1433 1433
1434 xbd_io->xio_buf.b_flags = buf_flags; 1434 xbd_io->xio_buf.b_flags = buf_flags;
1435 xbd_io->xio_buf.b_cflags = 0; 1435 xbd_io->xio_buf.b_cflags = 0;
1436 xbd_io->xio_buf.b_oflags = 0; 1436 xbd_io->xio_buf.b_oflags = 0;
1437 xbd_io->xio_buf.b_iodone = xbdback_iodone; 1437 xbd_io->xio_buf.b_iodone = xbdback_iodone;
1438 xbd_io->xio_buf.b_proc = NULL; 1438 xbd_io->xio_buf.b_proc = NULL;
1439 xbd_io->xio_buf.b_vp = xbdi->xbdi_vp; 1439 xbd_io->xio_buf.b_vp = xbdi->xbdi_vp;
1440 xbd_io->xio_buf.b_objlock = xbdi->xbdi_vp->v_interlock; 1440 xbd_io->xio_buf.b_objlock = xbdi->xbdi_vp->v_interlock;
1441 xbd_io->xio_buf.b_dev = xbdi->xbdi_dev; 1441 xbd_io->xio_buf.b_dev = xbdi->xbdi_dev;
1442 xbd_io->xio_buf.b_blkno = xbdi->xbdi_next_sector; 1442 xbd_io->xio_buf.b_blkno = xbdi->xbdi_next_sector;
1443 xbd_io->xio_buf.b_bcount = 0; 1443 xbd_io->xio_buf.b_bcount = 0;
1444 xbd_io->xio_buf.b_data = (void *)start_offset; 1444 xbd_io->xio_buf.b_data = (void *)start_offset;
1445 xbd_io->xio_buf.b_private = xbd_io; 1445 xbd_io->xio_buf.b_private = xbd_io;
1446 1446
1447 xbdi->xbdi_cont = xbdback_co_io_gotio2; 1447 xbdi->xbdi_cont = xbdback_co_io_gotio2;
1448 return xbdi; 1448 return xbdi;
1449} 1449}
1450 1450
1451/* Manage fragments */ 1451/* Manage fragments */
1452static void * 1452static void *
1453xbdback_co_io_gotio2(struct xbdback_instance *xbdi, void *obj) 1453xbdback_co_io_gotio2(struct xbdback_instance *xbdi, void *obj)
1454{ 1454{
1455 (void)obj; 1455 (void)obj;
1456 if (xbdi->xbdi_segno == 0 || SLIST_EMPTY(&xbdi->xbdi_io->xio_rq)) { 1456 if (xbdi->xbdi_segno == 0 || SLIST_EMPTY(&xbdi->xbdi_io->xio_rq)) {
1457 /* if this is the first segment of a new request */ 1457 /* if this is the first segment of a new request */
1458 /* or if it's the first segment of the io */ 1458 /* or if it's the first segment of the io */
1459 xbdi->xbdi_cont = xbdback_co_io_gotfrag; 1459 xbdi->xbdi_cont = xbdback_co_io_gotfrag;
1460 return xbdback_pool_get(&xbdback_fragment_pool, xbdi); 1460 return xbdback_pool_get(&xbdback_fragment_pool, xbdi);
1461 } 1461 }
1462 xbdi->xbdi_cont = xbdback_co_io_gotfrag2; 1462 xbdi->xbdi_cont = xbdback_co_io_gotfrag2;
1463 return xbdi; 1463 return xbdi;
1464} 1464}
1465 1465
1466/* Prepare the instance for its first fragment */ 1466/* Prepare the instance for its first fragment */
1467static void * 1467static void *
1468xbdback_co_io_gotfrag(struct xbdback_instance *xbdi, void *obj) 1468xbdback_co_io_gotfrag(struct xbdback_instance *xbdi, void *obj)
1469{ 1469{
1470 struct xbdback_fragment *xbd_fr; 1470 struct xbdback_fragment *xbd_fr;
1471 1471
1472 xbd_fr = obj; 1472 xbd_fr = obj;
1473 xbd_fr->car = xbdi->xbdi_req; 1473 xbd_fr->car = xbdi->xbdi_req;
1474 SLIST_INSERT_HEAD(&xbdi->xbdi_io->xio_rq, xbd_fr, cdr); 1474 SLIST_INSERT_HEAD(&xbdi->xbdi_io->xio_rq, xbd_fr, cdr);
1475 ++xbdi->xbdi_req->rq_iocount; 1475 ++xbdi->xbdi_req->rq_iocount;
1476 1476
1477 xbdi->xbdi_cont = xbdback_co_io_gotfrag2; 1477 xbdi->xbdi_cont = xbdback_co_io_gotfrag2;
1478 return xbdi; 1478 return xbdi;
1479} 1479}
1480 1480
1481/* Last routine to manage segments fragments for one I/O */ 1481/* Last routine to manage segments fragments for one I/O */
1482static void * 1482static void *
1483xbdback_co_io_gotfrag2(struct xbdback_instance *xbdi, void *obj) 1483xbdback_co_io_gotfrag2(struct xbdback_instance *xbdi, void *obj)
1484{ 1484{
1485 struct xbdback_io *xbd_io; 1485 struct xbdback_io *xbd_io;
1486 int seg_size; 1486 int seg_size;
1487 uint8_t this_fs, this_ls; 1487 uint8_t this_fs, this_ls;
1488 1488
1489 this_fs = xbdi->xbdi_this_fs; 1489 this_fs = xbdi->xbdi_this_fs;
1490 this_ls = xbdi->xbdi_this_ls; 1490 this_ls = xbdi->xbdi_this_ls;
1491 xbd_io = xbdi->xbdi_io; 1491 xbd_io = xbdi->xbdi_io;
1492 seg_size = this_ls - this_fs + 1; 1492 seg_size = this_ls - this_fs + 1;
1493 1493
1494 if (seg_size < 0) { 1494 if (seg_size < 0) {
1495 if (ratecheck(&xbdi->xbdi_lasterr_time, &xbdback_err_intvl)) { 1495 if (ratecheck(&xbdi->xbdi_lasterr_time, &xbdback_err_intvl)) {
1496 printf("xbdback_io domain %d: negative-size request " 1496 printf("xbdback_io domain %d: negative-size request "
1497 "(%d %d)\n", 1497 "(%d %d)\n",
1498 xbdi->xbdi_domid, this_ls, this_fs); 1498 xbdi->xbdi_domid, this_ls, this_fs);
1499 } 1499 }
1500 xbdback_io_error(xbdi->xbdi_io, EINVAL); 1500 xbdback_io_error(xbdi->xbdi_io, EINVAL);
1501 xbdi->xbdi_io = NULL; 1501 xbdi->xbdi_io = NULL;
1502 xbdi->xbdi_cont = xbdback_co_main_incr; 1502 xbdi->xbdi_cont = xbdback_co_main_incr;
1503 return xbdi; 1503 return xbdi;
1504 } 1504 }
1505  1505
1506 if (!xbdi->xbdi_same_page) { 1506 if (!xbdi->xbdi_same_page) {
1507 XENPRINTF(("xbdback_io domain %d: appending grant %u\n", 1507 XENPRINTF(("xbdback_io domain %d: appending grant %u\n",
1508 xbdi->xbdi_domid, (u_int)xbdi->xbdi_thisgrt)); 1508 xbdi->xbdi_domid, (u_int)xbdi->xbdi_thisgrt));
1509 xbd_io->xio_gref[xbd_io->xio_nrma++] = xbdi->xbdi_thisgrt; 1509 xbd_io->xio_gref[xbd_io->xio_nrma++] = xbdi->xbdi_thisgrt;
1510 } 1510 }
1511 1511
1512 xbd_io->xio_buf.b_bcount += (daddr_t)(seg_size * VBD_BSIZE); 1512 xbd_io->xio_buf.b_bcount += (daddr_t)(seg_size * VBD_BSIZE);
1513 XENPRINTF(("xbdback_io domain %d: start sect %ld size %d\n", 1513 XENPRINTF(("xbdback_io domain %d: start sect %ld size %d\n",
1514 xbdi->xbdi_domid, (long)xbdi->xbdi_next_sector, seg_size)); 1514 xbdi->xbdi_domid, (long)xbdi->xbdi_next_sector, seg_size));
1515  1515
1516 /* Finally, the end of the segment loop! */ 1516 /* Finally, the end of the segment loop! */
1517 xbdi->xbdi_next_sector += seg_size; 1517 xbdi->xbdi_next_sector += seg_size;
1518 ++xbdi->xbdi_segno; 1518 ++xbdi->xbdi_segno;
1519 xbdi->xbdi_cont = xbdback_co_io_loop; 1519 xbdi->xbdi_cont = xbdback_co_io_loop;
1520 return xbdi; 1520 return xbdi;
1521} 1521}
1522 1522
1523/* 1523/*
1524 * Map the different I/O requests in backend's VA space. 1524 * Map the different I/O requests in backend's VA space.
1525 */ 1525 */
1526static void * 1526static void *
1527xbdback_co_map_io(struct xbdback_instance *xbdi, void *obj) 1527xbdback_co_map_io(struct xbdback_instance *xbdi, void *obj)
1528{ 1528{
1529 (void)obj; 1529 (void)obj;
1530 XENPRINTF(("xbdback_io domain %d: flush sect %ld size %d ptr 0x%lx\n", 1530 XENPRINTF(("xbdback_io domain %d: flush sect %ld size %d ptr 0x%lx\n",
1531 xbdi->xbdi_domid, (long)xbdi->xbdi_io->xio_buf.b_blkno, 1531 xbdi->xbdi_domid, (long)xbdi->xbdi_io->xio_buf.b_blkno,
1532 (int)xbdi->xbdi_io->xio_buf.b_bcount, (long)xbdi->xbdi_io)); 1532 (int)xbdi->xbdi_io->xio_buf.b_bcount, (long)xbdi->xbdi_io));
1533 xbdi->xbdi_cont = xbdback_co_do_io; 1533 xbdi->xbdi_cont = xbdback_co_do_io;
1534 return xbdback_map_shm(xbdi->xbdi_io); 1534 return xbdback_map_shm(xbdi->xbdi_io);
1535} 1535}
1536 1536
1537static void 1537static void
1538xbdback_io_error(struct xbdback_io *xbd_io, int error) 1538xbdback_io_error(struct xbdback_io *xbd_io, int error)
1539{ 1539{
1540 xbd_io->xio_buf.b_error = error; 1540 xbd_io->xio_buf.b_error = error;
1541 xbdback_iodone(&xbd_io->xio_buf); 1541 xbdback_iodone(&xbd_io->xio_buf);
1542} 1542}
1543 1543
1544/* 1544/*
1545 * Main xbdback I/O routine. It can either perform a flush operation or 1545 * Main xbdback I/O routine. It can either perform a flush operation or
1546 * schedule a read/write operation. 1546 * schedule a read/write operation.
1547 */ 1547 */
1548static void * 1548static void *
1549xbdback_co_do_io(struct xbdback_instance *xbdi, void *obj) 1549xbdback_co_do_io(struct xbdback_instance *xbdi, void *obj)
1550{ 1550{
1551 struct xbdback_io *xbd_io = xbdi->xbdi_io; 1551 struct xbdback_io *xbd_io = xbdi->xbdi_io;
1552 vaddr_t start_offset; 1552 vaddr_t start_offset;
1553 int nsegs __diagused; 1553 int nsegs __diagused;
1554 1554
1555 switch (xbd_io->xio_operation) { 1555 switch (xbd_io->xio_operation) {
1556 case BLKIF_OP_FLUSH_DISKCACHE: 1556 case BLKIF_OP_FLUSH_DISKCACHE:
1557 { 1557 {
1558 int error; 1558 int error;
1559 int force = 1; 1559 int force = 1;
1560 1560
1561 error = VOP_IOCTL(xbdi->xbdi_vp, DIOCCACHESYNC, &force, FWRITE, 1561 error = VOP_IOCTL(xbdi->xbdi_vp, DIOCCACHESYNC, &force, FWRITE,
1562 kauth_cred_get()); 1562 kauth_cred_get());
1563 if (error) { 1563 if (error) {
1564 aprint_error("xbdback %s: DIOCCACHESYNC returned %d\n", 1564 aprint_error("xbdback %s: DIOCCACHESYNC returned %d\n",
1565 xbdi->xbdi_xbusd->xbusd_path, error); 1565 xbdi->xbdi_xbusd->xbusd_path, error);
1566 if (error == EOPNOTSUPP || error == ENOTTY) 1566 if (error == EOPNOTSUPP || error == ENOTTY)
1567 error = BLKIF_RSP_EOPNOTSUPP; 1567 error = BLKIF_RSP_EOPNOTSUPP;
1568 else 1568 else
1569 error = BLKIF_RSP_ERROR; 1569 error = BLKIF_RSP_ERROR;
1570 } else 1570 } else
1571 error = BLKIF_RSP_OKAY; 1571 error = BLKIF_RSP_OKAY;
1572 xbdback_send_reply(xbdi, xbd_io->xio_flush_id, 1572 xbdback_send_reply(xbdi, xbd_io->xio_flush_id,
1573 xbd_io->xio_operation, error); 1573 xbd_io->xio_operation, error);
1574 xbdback_pool_put(&xbdback_io_pool, xbd_io); 1574 xbdback_pool_put(&xbdback_io_pool, xbd_io);
1575 xbdi_put(xbdi); 1575 xbdi_put(xbdi);
1576 xbdi->xbdi_io = NULL; 1576 xbdi->xbdi_io = NULL;
1577 xbdi->xbdi_cont = xbdback_co_main_incr; 1577 xbdi->xbdi_cont = xbdback_co_main_incr;
1578 return xbdi; 1578 return xbdi;
1579 } 1579 }
1580 case BLKIF_OP_READ: 1580 case BLKIF_OP_READ:
1581 case BLKIF_OP_WRITE: 1581 case BLKIF_OP_WRITE:
1582 start_offset = (vaddr_t)xbd_io->xio_buf.b_data; 1582 start_offset = (vaddr_t)xbd_io->xio_buf.b_data;
1583 KASSERT(xbd_io->xio_buf.b_bcount + start_offset < VBD_VA_SIZE); 1583 KASSERT(xbd_io->xio_buf.b_bcount + start_offset < VBD_VA_SIZE);
1584 xbd_io->xio_buf.b_data = (void *) 1584 xbd_io->xio_buf.b_data = (void *)
1585 (start_offset + xbd_io->xio_vaddr); 1585 (start_offset + xbd_io->xio_vaddr);
1586#ifdef DIAGNOSTIC 1586#ifdef DIAGNOSTIC
1587 nsegs = round_page(start_offset + xbd_io->xio_buf.b_bcount) 1587 nsegs = round_page(start_offset + xbd_io->xio_buf.b_bcount)
1588 >> PAGE_SHIFT; 1588 >> PAGE_SHIFT;
1589 if (nsegs > xbd_io->xio_nrma) { 1589 if (nsegs > xbd_io->xio_nrma) {
1590 printf("xbdback_co_do_io: vaddr %#" PRIxVADDR 1590 printf("xbdback_co_do_io: vaddr %#" PRIxVADDR
1591 " bcount %#x doesn't fit in %d pages\n", 1591 " bcount %#x doesn't fit in %d pages\n",
1592 start_offset, xbd_io->xio_buf.b_bcount, 1592 start_offset, xbd_io->xio_buf.b_bcount,
1593 xbd_io->xio_nrma); 1593 xbd_io->xio_nrma);
1594 panic("xbdback_co_do_io: not enough pages"); 1594 panic("xbdback_co_do_io: not enough pages");
1595 } 1595 }
1596#endif 1596#endif
1597 if ((xbd_io->xio_buf.b_flags & B_READ) == 0) { 1597 if ((xbd_io->xio_buf.b_flags & B_READ) == 0) {
1598 mutex_enter(xbd_io->xio_buf.b_vp->v_interlock); 1598 mutex_enter(xbd_io->xio_buf.b_vp->v_interlock);
1599 xbd_io->xio_buf.b_vp->v_numoutput++; 1599 xbd_io->xio_buf.b_vp->v_numoutput++;
1600 mutex_exit(xbd_io->xio_buf.b_vp->v_interlock); 1600 mutex_exit(xbd_io->xio_buf.b_vp->v_interlock);
1601 } 1601 }
1602 bdev_strategy(&xbd_io->xio_buf); 1602 bdev_strategy(&xbd_io->xio_buf);
1603 /* will call xbdback_iodone() asynchronously when done */ 1603 /* will call xbdback_iodone() asynchronously when done */
1604 xbdi->xbdi_io = NULL; 1604 xbdi->xbdi_io = NULL;
1605 xbdi->xbdi_cont = xbdi->xbdi_cont_aux; 1605 xbdi->xbdi_cont = xbdi->xbdi_cont_aux;
1606 return xbdi; 1606 return xbdi;
1607 default: 1607 default:
1608 /* Should never happen */ 1608 /* Should never happen */
1609 panic("xbdback_co_do_io: unsupported operation %d", 1609 panic("xbdback_co_do_io: unsupported operation %d",
1610 xbd_io->xio_operation); 1610 xbd_io->xio_operation);
1611 } 1611 }
1612} 1612}
1613 1613
1614/* 1614/*
1615 * Called from softint(9) context when an I/O is done: for each request, send 1615 * Called from softint(9) context when an I/O is done: for each request, send
1616 * back the associated reply to the domain. 1616 * back the associated reply to the domain.
1617 * 1617 *
1618 * This gets reused by xbdback_io_error to report errors from other sources. 1618 * This gets reused by xbdback_io_error to report errors from other sources.
1619 */ 1619 */
1620static void 1620static void
1621xbdback_iodone(struct buf *bp) 1621xbdback_iodone(struct buf *bp)
1622{ 1622{
1623 struct xbdback_io *xbd_io; 1623 struct xbdback_io *xbd_io;
1624 struct xbdback_instance *xbdi; 1624 struct xbdback_instance *xbdi;
1625 int errp; 1625 int errp;
1626 1626
1627 KERNEL_LOCK(1, NULL); /* XXXSMP */ 1627 KERNEL_LOCK(1, NULL); /* XXXSMP */
1628 1628
1629 xbd_io = bp->b_private; 1629 xbd_io = bp->b_private;
1630 xbdi = xbd_io->xio_xbdi; 1630 xbdi = xbd_io->xio_xbdi;
1631 1631
1632 XENPRINTF(("xbdback_io domain %d: iodone ptr 0x%lx\n", 1632 XENPRINTF(("xbdback_io domain %d: iodone ptr 0x%lx\n",
1633 xbdi->xbdi_domid, (long)xbd_io)); 1633 xbdi->xbdi_domid, (long)xbd_io));
1634 1634
1635 if (xbd_io->xio_mapped == 1) 1635 if (xbd_io->xio_mapped == 1)
1636 xbdback_unmap_shm(xbd_io); 1636 xbdback_unmap_shm(xbd_io);
1637 1637
1638 if (bp->b_error != 0) { 1638 if (bp->b_error != 0) {
1639 printf("xbd IO domain %d: error %d\n", 1639 printf("xbd IO domain %d: error %d\n",
1640 xbdi->xbdi_domid, bp->b_error); 1640 xbdi->xbdi_domid, bp->b_error);
1641 errp = 1; 1641 errp = 1;
1642 } else 1642 } else
1643 errp = 0; 1643 errp = 0;
1644  1644
1645 /* for each constituent xbd request */ 1645 /* for each constituent xbd request */
1646 while(!SLIST_EMPTY(&xbd_io->xio_rq)) { 1646 while(!SLIST_EMPTY(&xbd_io->xio_rq)) {
1647 struct xbdback_fragment *xbd_fr; 1647 struct xbdback_fragment *xbd_fr;
1648 struct xbdback_request *xbd_req; 1648 struct xbdback_request *xbd_req;
1649 struct xbdback_instance *rxbdi __diagused; 1649 struct xbdback_instance *rxbdi __diagused;
1650 int error; 1650 int error;
1651  1651
1652 xbd_fr = SLIST_FIRST(&xbd_io->xio_rq); 1652 xbd_fr = SLIST_FIRST(&xbd_io->xio_rq);
1653 xbd_req = xbd_fr->car; 1653 xbd_req = xbd_fr->car;
1654 SLIST_REMOVE_HEAD(&xbd_io->xio_rq, cdr); 1654 SLIST_REMOVE_HEAD(&xbd_io->xio_rq, cdr);
1655 xbdback_pool_put(&xbdback_fragment_pool, xbd_fr); 1655 xbdback_pool_put(&xbdback_fragment_pool, xbd_fr);
1656  1656
1657 if (errp) 1657 if (errp)
1658 ++xbd_req->rq_ioerrs; 1658 ++xbd_req->rq_ioerrs;
1659  1659
1660 /* finalize it only if this was its last I/O */ 1660 /* finalize it only if this was its last I/O */
1661 if (--xbd_req->rq_iocount > 0) 1661 if (--xbd_req->rq_iocount > 0)
1662 continue; 1662 continue;
1663 1663
1664 rxbdi = xbd_req->rq_xbdi; 1664 rxbdi = xbd_req->rq_xbdi;
1665 KASSERT(xbdi == rxbdi); 1665 KASSERT(xbdi == rxbdi);
1666  1666
1667 error = xbd_req->rq_ioerrs > 0 1667 error = xbd_req->rq_ioerrs > 0
1668 ? BLKIF_RSP_ERROR 1668 ? BLKIF_RSP_ERROR
1669 : BLKIF_RSP_OKAY; 1669 : BLKIF_RSP_OKAY;
1670 1670
1671 XENPRINTF(("xbdback_io domain %d: end request %"PRIu64 1671 XENPRINTF(("xbdback_io domain %d: end request %"PRIu64
1672 " error=%d\n", 1672 " error=%d\n",
1673 xbdi->xbdi_domid, xbd_req->rq_id, error)); 1673 xbdi->xbdi_domid, xbd_req->rq_id, error));
1674 xbdback_send_reply(xbdi, xbd_req->rq_id, 1674 xbdback_send_reply(xbdi, xbd_req->rq_id,
1675 xbd_req->rq_operation, error); 1675 xbd_req->rq_operation, error);
1676 xbdback_pool_put(&xbdback_request_pool, xbd_req); 1676 xbdback_pool_put(&xbdback_request_pool, xbd_req);
1677 } 1677 }
1678 xbdi_put(xbdi); 1678 xbdi_put(xbdi);
1679 atomic_dec_uint(&xbdi->xbdi_pendingreqs); 1679 atomic_dec_uint(&xbdi->xbdi_pendingreqs);
1680 buf_destroy(&xbd_io->xio_buf); 1680 buf_destroy(&xbd_io->xio_buf);
1681 xbdback_pool_put(&xbdback_io_pool, xbd_io); 1681 xbdback_pool_put(&xbdback_io_pool, xbd_io);
1682 1682
1683 xbdback_wakeup_thread(xbdi); 1683 xbdback_wakeup_thread(xbdi);
1684 KERNEL_UNLOCK_ONE(NULL); /* XXXSMP */ 1684 KERNEL_UNLOCK_ONE(NULL); /* XXXSMP */
1685} 1685}
1686 1686
1687/* 1687/*
1688 * Wake up the per xbdback instance thread. 1688 * Wake up the per xbdback instance thread.
1689 */ 1689 */
1690static void 1690static void
1691xbdback_wakeup_thread(struct xbdback_instance *xbdi) 1691xbdback_wakeup_thread(struct xbdback_instance *xbdi)
1692{ 1692{
1693 1693
1694 mutex_enter(&xbdi->xbdi_lock); 1694 mutex_enter(&xbdi->xbdi_lock);
1695 /* only set RUN state when we are WAITING for work */ 1695 /* only set RUN state when we are WAITING for work */
1696 if (xbdi->xbdi_status == WAITING) 1696 if (xbdi->xbdi_status == WAITING)
1697 xbdi->xbdi_status = RUN; 1697 xbdi->xbdi_status = RUN;
1698 cv_broadcast(&xbdi->xbdi_cv); 1698 cv_broadcast(&xbdi->xbdi_cv);
1699 mutex_exit(&xbdi->xbdi_lock); 1699 mutex_exit(&xbdi->xbdi_lock);
1700} 1700}
1701 1701
1702/* 1702/*
1703 * called once a request has completed. Place the reply in the ring and 1703 * called once a request has completed. Place the reply in the ring and
1704 * notify the guest OS. 1704 * notify the guest OS.
1705 */ 1705 */
1706static void 1706static void
1707xbdback_send_reply(struct xbdback_instance *xbdi, uint64_t id, 1707xbdback_send_reply(struct xbdback_instance *xbdi, uint64_t id,
1708 int op, int status) 1708 int op, int status)
1709{ 1709{
1710 blkif_response_t *resp_n; 1710 blkif_response_t *resp_n;
1711 blkif_x86_32_response_t *resp32; 1711 blkif_x86_32_response_t *resp32;
1712 blkif_x86_64_response_t *resp64; 1712 blkif_x86_64_response_t *resp64;
1713 int notify; 1713 int notify;
1714 1714
1715 /* 1715 /*
1716 * The ring can be accessed by the xbdback thread, xbdback_iodone() 1716 * The ring can be accessed by the xbdback thread, xbdback_iodone()
1717 * handler, or any handler that triggered the shm callback. So 1717 * handler, or any handler that triggered the shm callback. So
1718 * protect ring access via the xbdi_lock mutex. 1718 * protect ring access via the xbdi_lock mutex.
1719 */ 1719 */
1720 mutex_enter(&xbdi->xbdi_lock); 1720 mutex_enter(&xbdi->xbdi_lock);
1721 switch (xbdi->xbdi_proto) { 1721 switch (xbdi->xbdi_proto) {
1722 case XBDIP_NATIVE: 1722 case XBDIP_NATIVE:
1723 resp_n = RING_GET_RESPONSE(&xbdi->xbdi_ring.ring_n, 1723 resp_n = RING_GET_RESPONSE(&xbdi->xbdi_ring.ring_n,
1724 xbdi->xbdi_ring.ring_n.rsp_prod_pvt); 1724 xbdi->xbdi_ring.ring_n.rsp_prod_pvt);
1725 resp_n->id = id; 1725 resp_n->id = id;
1726 resp_n->operation = op; 1726 resp_n->operation = op;
1727 resp_n->status = status; 1727 resp_n->status = status;
1728 break; 1728 break;
1729 case XBDIP_32: 1729 case XBDIP_32:
1730 resp32 = RING_GET_RESPONSE(&xbdi->xbdi_ring.ring_32, 1730 resp32 = RING_GET_RESPONSE(&xbdi->xbdi_ring.ring_32,
1731 xbdi->xbdi_ring.ring_n.rsp_prod_pvt); 1731 xbdi->xbdi_ring.ring_n.rsp_prod_pvt);
1732 resp32->id = id; 1732 resp32->id = id;
1733 resp32->operation = op; 1733 resp32->operation = op;
1734 resp32->status = status; 1734 resp32->status = status;
1735 break; 1735 break;
1736 case XBDIP_64: 1736 case XBDIP_64:
1737 resp64 = RING_GET_RESPONSE(&xbdi->xbdi_ring.ring_64, 1737 resp64 = RING_GET_RESPONSE(&xbdi->xbdi_ring.ring_64,
1738 xbdi->xbdi_ring.ring_n.rsp_prod_pvt); 1738 xbdi->xbdi_ring.ring_n.rsp_prod_pvt);
1739 resp64->id = id; 1739 resp64->id = id;
1740 resp64->operation = op; 1740 resp64->operation = op;
1741 resp64->status = status; 1741 resp64->status = status;
1742 break; 1742 break;
1743 } 1743 }
1744 xbdi->xbdi_ring.ring_n.rsp_prod_pvt++; 1744 xbdi->xbdi_ring.ring_n.rsp_prod_pvt++;
1745 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xbdi->xbdi_ring.ring_n, notify); 1745 RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xbdi->xbdi_ring.ring_n, notify);
1746 mutex_exit(&xbdi->xbdi_lock); 1746 mutex_exit(&xbdi->xbdi_lock);
1747 1747
1748 if (notify) { 1748 if (notify) {
1749 XENPRINTF(("xbdback_send_reply notify %d\n", xbdi->xbdi_domid)); 1749 XENPRINTF(("xbdback_send_reply notify %d\n", xbdi->xbdi_domid));
1750 hypervisor_notify_via_evtchn(xbdi->xbdi_evtchn); 1750 hypervisor_notify_via_evtchn(xbdi->xbdi_evtchn);
1751 } 1751 }
1752} 1752}
1753 1753
1754/* 1754/*
1755 * Map multiple entries of an I/O request into backend's VA space. 1755 * Map multiple entries of an I/O request into backend's VA space.
1756 * The xbd_io->xio_gref array has to be filled out by the caller. 1756 * The xbd_io->xio_gref array has to be filled out by the caller.
1757 */ 1757 */
1758static void * 1758static void *
1759xbdback_map_shm(struct xbdback_io *xbd_io) 1759xbdback_map_shm(struct xbdback_io *xbd_io)
1760{ 1760{
1761 struct xbdback_instance *xbdi = xbd_io->xio_xbdi; 1761 struct xbdback_instance *xbdi = xbd_io->xio_xbdi;
1762 struct xbdback_request *xbd_rq; 1762 struct xbdback_request *xbd_rq;
1763 int error, s; 1763 int error, s;
1764 1764
1765#ifdef XENDEBUG_VBD 1765#ifdef XENDEBUG_VBD
1766 int i; 1766 int i;
1767 printf("xbdback_map_shm map grant "); 1767 printf("xbdback_map_shm map grant ");
1768 for (i = 0; i < xbd_io->xio_nrma; i++) { 1768 for (i = 0; i < xbd_io->xio_nrma; i++) {
1769 printf("%u ", (u_int)xbd_io->xio_gref[i]); 1769 printf("%u ", (u_int)xbd_io->xio_gref[i]);
1770 } 1770 }
1771#endif 1771#endif
1772 1772
1773 KASSERT(xbd_io->xio_mapped == 0); 1773 KASSERT(xbd_io->xio_mapped == 0);
1774 1774
1775 s = splvm(); /* XXXSMP */ 1775 s = splvm(); /* XXXSMP */
1776 xbd_rq = SLIST_FIRST(&xbd_io->xio_rq)->car; 1776 xbd_rq = SLIST_FIRST(&xbd_io->xio_rq)->car;
1777 1777
1778 xbd_io->xio_xv = SLIST_FIRST(&xbdi->xbdi_va_free); 1778 xbd_io->xio_xv = SLIST_FIRST(&xbdi->xbdi_va_free);
1779 KASSERT(xbd_io->xio_xv != NULL); 1779 KASSERT(xbd_io->xio_xv != NULL);
1780 SLIST_REMOVE_HEAD(&xbdi->xbdi_va_free, xv_next); 1780 SLIST_REMOVE_HEAD(&xbdi->xbdi_va_free, xv_next);
1781 xbd_io->xio_vaddr = xbd_io->xio_xv->xv_vaddr; 1781 xbd_io->xio_vaddr = xbd_io->xio_xv->xv_vaddr;
1782 splx(s); 1782 splx(s);
1783 1783
1784 error = xen_shm_map(xbd_io->xio_nrma, xbdi->xbdi_domid, 1784 error = xen_shm_map(xbd_io->xio_nrma, xbdi->xbdi_domid,
1785 xbd_io->xio_gref, xbd_io->xio_vaddr, xbd_io->xio_gh,  1785 xbd_io->xio_gref, xbd_io->xio_vaddr, xbd_io->xio_gh,
1786 (xbd_rq->rq_operation == BLKIF_OP_WRITE) ? XSHM_RO : 0); 1786 (xbd_rq->rq_operation == BLKIF_OP_WRITE) ? XSHM_RO : 0);
1787 1787
1788 switch(error) { 1788 switch(error) {
1789 case 0: 1789 case 0:
1790#ifdef XENDEBUG_VBD 1790#ifdef XENDEBUG_VBD
1791 printf("handle "); 1791 printf("handle ");
1792 for (i = 0; i < xbd_io->xio_nrma; i++) { 1792 for (i = 0; i < xbd_io->xio_nrma; i++) {
1793 printf("%u ", (u_int)xbd_io->xio_gh[i]); 1793 printf("%u ", (u_int)xbd_io->xio_gh[i]);
1794 } 1794 }
1795 printf("\n"); 1795 printf("\n");
1796#endif 1796#endif
1797 xbd_io->xio_mapped = 1; 1797 xbd_io->xio_mapped = 1;
1798 return xbdi; 1798 return xbdi;
1799 default: 1799 default:
1800 if (ratecheck(&xbdi->xbdi_lasterr_time, &xbdback_err_intvl)) { 1800 if (ratecheck(&xbdi->xbdi_lasterr_time, &xbdback_err_intvl)) {
1801 printf("xbdback_map_shm: xen_shm error %d ", error); 1801 printf("xbdback_map_shm: xen_shm error %d ", error);
1802 } 1802 }
1803 xbdback_io_error(xbdi->xbdi_io, error); 1803 xbdback_io_error(xbdi->xbdi_io, error);
1804 SLIST_INSERT_HEAD(&xbdi->xbdi_va_free, xbd_io->xio_xv, xv_next); 1804 SLIST_INSERT_HEAD(&xbdi->xbdi_va_free, xbd_io->xio_xv, xv_next);
1805 xbd_io->xio_xv = NULL; 1805 xbd_io->xio_xv = NULL;
1806 xbdi->xbdi_io = NULL; 1806 xbdi->xbdi_io = NULL;
1807 xbdi->xbdi_cont = xbdi->xbdi_cont_aux; 1807 // do not retry
 1808 xbdi->xbdi_cont = xbdback_co_main_incr;
1808 return xbdi; 1809 return xbdi;
1809 } 1810 }
1810} 1811}
1811 1812
1812/* unmap a request from our virtual address space (request is done) */ 1813/* unmap a request from our virtual address space (request is done) */
1813static void 1814static void
1814xbdback_unmap_shm(struct xbdback_io *xbd_io) 1815xbdback_unmap_shm(struct xbdback_io *xbd_io)
1815{ 1816{
1816 struct xbdback_instance *xbdi = xbd_io->xio_xbdi; 1817 struct xbdback_instance *xbdi = xbd_io->xio_xbdi;
1817 1818
1818#ifdef XENDEBUG_VBD 1819#ifdef XENDEBUG_VBD
1819 int i; 1820 int i;
1820 printf("xbdback_unmap_shm handle "); 1821 printf("xbdback_unmap_shm handle ");
1821 for (i = 0; i < xbd_io->xio_nrma; i++) { 1822 for (i = 0; i < xbd_io->xio_nrma; i++) {
1822 printf("%u ", (u_int)xbd_io->xio_gh[i]); 1823 printf("%u ", (u_int)xbd_io->xio_gh[i]);
1823 } 1824 }
1824 printf("\n"); 1825 printf("\n");
1825#endif 1826#endif
1826 1827
1827 KASSERT(xbd_io->xio_mapped == 1); 1828 KASSERT(xbd_io->xio_mapped == 1);
1828 xbd_io->xio_mapped = 0; 1829 xbd_io->xio_mapped = 0;
1829 xen_shm_unmap(xbd_io->xio_vaddr, xbd_io->xio_nrma, 1830 xen_shm_unmap(xbd_io->xio_vaddr, xbd_io->xio_nrma,
1830 xbd_io->xio_gh); 1831 xbd_io->xio_gh);
1831 SLIST_INSERT_HEAD(&xbdi->xbdi_va_free, xbd_io->xio_xv, xv_next); 1832 SLIST_INSERT_HEAD(&xbdi->xbdi_va_free, xbd_io->xio_xv, xv_next);
1832 xbd_io->xio_xv = NULL; 1833 xbd_io->xio_xv = NULL;
1833 xbd_io->xio_vaddr = -1; 1834 xbd_io->xio_vaddr = -1;
1834} 1835}
1835 1836
1836/* Obtain memory from a pool */ 1837/* Obtain memory from a pool */
1837static void * 1838static void *
1838xbdback_pool_get(struct xbdback_pool *pp, 1839xbdback_pool_get(struct xbdback_pool *pp,
1839 struct xbdback_instance *xbdi) 1840 struct xbdback_instance *xbdi)
1840{ 1841{
1841 return pool_cache_get(&pp->pc, PR_WAITOK); 1842 return pool_cache_get(&pp->pc, PR_WAITOK);
1842} 1843}
1843 1844
1844/* Restore memory to a pool */ 1845/* Restore memory to a pool */
1845static void 1846static void
1846xbdback_pool_put(struct xbdback_pool *pp, void *item) 1847xbdback_pool_put(struct xbdback_pool *pp, void *item)
1847{ 1848{
1848 pool_cache_put(&pp->pc, item); 1849 pool_cache_put(&pp->pc, item);
1849} 1850}
1850 1851
1851/* 1852/*
1852 * Trampoline routine. Calls continuations in a loop and only exits when 1853 * Trampoline routine. Calls continuations in a loop and only exits when
1853 * either the returned object or the next callback is NULL. 1854 * either the returned object or the next callback is NULL.
1854 */ 1855 */
1855static void 1856static void
1856xbdback_trampoline(struct xbdback_instance *xbdi, void *obj) 1857xbdback_trampoline(struct xbdback_instance *xbdi, void *obj)
1857{ 1858{
1858 xbdback_cont_t cont; 1859 xbdback_cont_t cont;
1859 1860
1860 while(obj != NULL && xbdi->xbdi_cont != NULL) { 1861 while(obj != NULL && xbdi->xbdi_cont != NULL) {
1861 cont = xbdi->xbdi_cont; 1862 cont = xbdi->xbdi_cont;
1862#ifdef DIAGNOSTIC 1863#ifdef DIAGNOSTIC
1863 xbdi->xbdi_cont = (xbdback_cont_t)0xDEADBEEF; 1864 xbdi->xbdi_cont = (xbdback_cont_t)0xDEADBEEF;
1864#endif 1865#endif
1865 obj = (*cont)(xbdi, obj); 1866 obj = (*cont)(xbdi, obj);
1866#ifdef DIAGNOSTIC 1867#ifdef DIAGNOSTIC
1867 if (xbdi->xbdi_cont == (xbdback_cont_t)0xDEADBEEF) { 1868 if (xbdi->xbdi_cont == (xbdback_cont_t)0xDEADBEEF) {
1868 printf("xbdback_trampoline: 0x%lx didn't set " 1869 printf("xbdback_trampoline: 0x%lx didn't set "
1869 "xbdi->xbdi_cont!\n", (long)cont); 1870 "xbdi->xbdi_cont!\n", (long)cont);
1870 panic("xbdback_trampoline: bad continuation"); 1871 panic("xbdback_trampoline: bad continuation");
1871 } 1872 }
1872#endif 1873#endif
1873 } 1874 }
1874} 1875}