Sun Apr 19 20:53:20 2020 UTC ()
SLIST_INIT() xbdi->xbdi_va_free just for clarity


(jdolecek)
diff -r1.79 -r1.80 src/sys/arch/xen/xen/xbdback_xenbus.c

cvs diff -r1.79 -r1.80 src/sys/arch/xen/xen/xbdback_xenbus.c (switch to unified diff)

--- src/sys/arch/xen/xen/xbdback_xenbus.c 2020/04/19 18:47:40 1.79
+++ src/sys/arch/xen/xen/xbdback_xenbus.c 2020/04/19 20:53:20 1.80
@@ -1,1450 +1,1451 @@ @@ -1,1450 +1,1451 @@
1/* $NetBSD: xbdback_xenbus.c,v 1.79 2020/04/19 18:47:40 jdolecek Exp $ */ 1/* $NetBSD: xbdback_xenbus.c,v 1.80 2020/04/19 20:53:20 jdolecek Exp $ */
2 2
3/* 3/*
4 * Copyright (c) 2006 Manuel Bouyer. 4 * Copyright (c) 2006 Manuel Bouyer.
5 * 5 *
6 * Redistribution and use in source and binary forms, with or without 6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions 7 * modification, are permitted provided that the following conditions
8 * are met: 8 * are met:
9 * 1. Redistributions of source code must retain the above copyright 9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer. 10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright 11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the 12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution. 13 * documentation and/or other materials provided with the distribution.
14 * 14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 * 25 *
26 */ 26 */
27 27
28#include <sys/cdefs.h> 28#include <sys/cdefs.h>
29__KERNEL_RCSID(0, "$NetBSD: xbdback_xenbus.c,v 1.79 2020/04/19 18:47:40 jdolecek Exp $"); 29__KERNEL_RCSID(0, "$NetBSD: xbdback_xenbus.c,v 1.80 2020/04/19 20:53:20 jdolecek Exp $");
30 30
31#include <sys/atomic.h> 31#include <sys/atomic.h>
32#include <sys/buf.h> 32#include <sys/buf.h>
33#include <sys/condvar.h> 33#include <sys/condvar.h>
34#include <sys/conf.h> 34#include <sys/conf.h>
35#include <sys/disk.h> 35#include <sys/disk.h>
36#include <sys/device.h> 36#include <sys/device.h>
37#include <sys/fcntl.h> 37#include <sys/fcntl.h>
38#include <sys/kauth.h> 38#include <sys/kauth.h>
39#include <sys/kernel.h> 39#include <sys/kernel.h>
40#include <sys/kmem.h> 40#include <sys/kmem.h>
41#include <sys/kthread.h> 41#include <sys/kthread.h>
42#include <sys/mutex.h> 42#include <sys/mutex.h>
43#include <sys/param.h> 43#include <sys/param.h>
44#include <sys/queue.h> 44#include <sys/queue.h>
45#include <sys/systm.h> 45#include <sys/systm.h>
46#include <sys/time.h> 46#include <sys/time.h>
47#include <sys/types.h> 47#include <sys/types.h>
48#include <sys/vnode.h> 48#include <sys/vnode.h>
49 49
50#include <xen/xen.h> 50#include <xen/xen.h>
51#include <xen/xen_shm.h> 51#include <xen/xen_shm.h>
52#include <xen/evtchn.h> 52#include <xen/evtchn.h>
53#include <xen/xenbus.h> 53#include <xen/xenbus.h>
54#include <xen/xenring.h> 54#include <xen/xenring.h>
55#include <xen/include/public/io/protocols.h> 55#include <xen/include/public/io/protocols.h>
56 56
57/* #define XENDEBUG_VBD */ 57/* #define XENDEBUG_VBD */
58#ifdef XENDEBUG_VBD 58#ifdef XENDEBUG_VBD
59#define XENPRINTF(x) printf x 59#define XENPRINTF(x) printf x
60#else 60#else
61#define XENPRINTF(x) 61#define XENPRINTF(x)
62#endif 62#endif
63 63
64#define BLKIF_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE) 64#define BLKIF_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
65 65
66/* 66/*
67 * Backend block device driver for Xen 67 * Backend block device driver for Xen
68 */ 68 */
69 69
70/* Values are expressed in 512-byte sectors */ 70/* Values are expressed in 512-byte sectors */
71#define VBD_BSIZE 512 71#define VBD_BSIZE 512
72#define VBD_MAXSECT ((PAGE_SIZE / VBD_BSIZE) - 1) 72#define VBD_MAXSECT ((PAGE_SIZE / VBD_BSIZE) - 1)
73 73
74/* Need to alloc one extra page to account for possible mapping offset */ 74/* Need to alloc one extra page to account for possible mapping offset */
75#define VBD_VA_SIZE (MAXPHYS + PAGE_SIZE) 75#define VBD_VA_SIZE (MAXPHYS + PAGE_SIZE)
76 76
77struct xbdback_request; 77struct xbdback_request;
78struct xbdback_io; 78struct xbdback_io;
79struct xbdback_fragment; 79struct xbdback_fragment;
80struct xbdback_instance; 80struct xbdback_instance;
81 81
82/* 82/*
83 * status of a xbdback instance: 83 * status of a xbdback instance:
84 * WAITING: xbdback instance is connected, waiting for requests 84 * WAITING: xbdback instance is connected, waiting for requests
85 * RUN: xbdi thread must be woken up, I/Os have to be processed 85 * RUN: xbdi thread must be woken up, I/Os have to be processed
86 * DISCONNECTING: the instance is closing, no more I/Os can be scheduled 86 * DISCONNECTING: the instance is closing, no more I/Os can be scheduled
87 * DISCONNECTED: no I/Os, no ring, the thread should terminate. 87 * DISCONNECTED: no I/Os, no ring, the thread should terminate.
88 */ 88 */
89typedef enum {WAITING, RUN, DISCONNECTING, DISCONNECTED} xbdback_state_t; 89typedef enum {WAITING, RUN, DISCONNECTING, DISCONNECTED} xbdback_state_t;
90 90
91/* 91/*
92 * Each xbdback instance is managed by a single thread that handles all 92 * Each xbdback instance is managed by a single thread that handles all
93 * the I/O processing. As there are a variety of conditions that can block, 93 * the I/O processing. As there are a variety of conditions that can block,
94 * everything will be done in a sort of continuation-passing style. 94 * everything will be done in a sort of continuation-passing style.
95 * 95 *
96 * When the execution has to block to delay processing, for example to 96 * When the execution has to block to delay processing, for example to
97 * allow system to recover because of memory shortage (via shared memory 97 * allow system to recover because of memory shortage (via shared memory
98 * callback), the return value of a continuation can be set to NULL. In that 98 * callback), the return value of a continuation can be set to NULL. In that
99 * case, the thread will go back to sleeping and wait for the proper 99 * case, the thread will go back to sleeping and wait for the proper
100 * condition before it starts processing requests again from where it left. 100 * condition before it starts processing requests again from where it left.
101 * Continuation state is "stored" in the xbdback instance (xbdi_cont and 101 * Continuation state is "stored" in the xbdback instance (xbdi_cont and
102 * xbdi_cont_aux), and should only be manipulated by the instance thread. 102 * xbdi_cont_aux), and should only be manipulated by the instance thread.
103 * 103 *
104 * As xbdback(4) has to handle different sort of asynchronous events (Xen 104 * As xbdback(4) has to handle different sort of asynchronous events (Xen
105 * event channels, biointr() soft interrupts, xenbus commands), the xbdi_lock 105 * event channels, biointr() soft interrupts, xenbus commands), the xbdi_lock
106 * mutex is used to protect specific elements of the xbdback instance from 106 * mutex is used to protect specific elements of the xbdback instance from
107 * concurrent access: thread status and ring access (when pushing responses). 107 * concurrent access: thread status and ring access (when pushing responses).
108 *  108 *
109 * Here's how the call graph is supposed to be for a single I/O: 109 * Here's how the call graph is supposed to be for a single I/O:
110 * 110 *
111 * xbdback_co_main() 111 * xbdback_co_main()
112 * | 112 * |
113 * | --> xbdback_co_cache_doflush() or NULL 113 * | --> xbdback_co_cache_doflush() or NULL
114 * | | 114 * | |
115 * | - xbdback_co_cache_flush2() <- xbdback_co_do_io() <- 115 * | - xbdback_co_cache_flush2() <- xbdback_co_do_io() <-
116 * | | | 116 * | | |
117 * | |-> xbdback_co_cache_flush() -> xbdback_co_map_io()- 117 * | |-> xbdback_co_cache_flush() -> xbdback_co_map_io()-
118 * xbdback_co_main_loop()-| 118 * xbdback_co_main_loop()-|
119 * | |-> xbdback_co_main_done() ---> xbdback_co_map_io()- 119 * | |-> xbdback_co_main_done() ---> xbdback_co_map_io()-
120 * | | | 120 * | | |
121 * | -- xbdback_co_main_done2() <-- xbdback_co_do_io() <- 121 * | -- xbdback_co_main_done2() <-- xbdback_co_do_io() <-
122 * | | 122 * | |
123 * | --> xbdback_co_main() or NULL 123 * | --> xbdback_co_main() or NULL
124 * | 124 * |
125 * xbdback_co_io() -> xbdback_co_main_incr() -> xbdback_co_main_loop() 125 * xbdback_co_io() -> xbdback_co_main_incr() -> xbdback_co_main_loop()
126 * | 126 * |
127 * xbdback_co_io_gotreq()--+--> xbdback_co_map_io() --- 127 * xbdback_co_io_gotreq()--+--> xbdback_co_map_io() ---
128 * | | | 128 * | | |
129 * -> xbdback_co_io_loop()----| <- xbdback_co_do_io() <-- 129 * -> xbdback_co_io_loop()----| <- xbdback_co_do_io() <--
130 * | | | | 130 * | | | |
131 * | | | |----------> xbdback_co_io_gotio() 131 * | | | |----------> xbdback_co_io_gotio()
132 * | | | | 132 * | | | |
133 * | | xbdback_co_main_incr() | 133 * | | xbdback_co_main_incr() |
134 * | | | | 134 * | | | |
135 * | | xbdback_co_main_loop() | 135 * | | xbdback_co_main_loop() |
136 * | | | 136 * | | |
137 * | xbdback_co_io_gotio2() <-----------| 137 * | xbdback_co_io_gotio2() <-----------|
138 * | | | 138 * | | |
139 * | | |----------> xbdback_co_io_gotfrag() 139 * | | |----------> xbdback_co_io_gotfrag()
140 * | | | 140 * | | |
141 * -- xbdback_co_io_gotfrag2() <---------| 141 * -- xbdback_co_io_gotfrag2() <---------|
142 * | 142 * |
143 * xbdback_co_main_incr() -> xbdback_co_main_loop() 143 * xbdback_co_main_incr() -> xbdback_co_main_loop()
144 */ 144 */
145typedef void *(* xbdback_cont_t)(struct xbdback_instance *, void *); 145typedef void *(* xbdback_cont_t)(struct xbdback_instance *, void *);
146 146
147enum xbdi_proto { 147enum xbdi_proto {
148 XBDIP_NATIVE, 148 XBDIP_NATIVE,
149 XBDIP_32, 149 XBDIP_32,
150 XBDIP_64 150 XBDIP_64
151}; 151};
152 152
153struct xbdback_va { 153struct xbdback_va {
154 SLIST_ENTRY(xbdback_va) xv_next; 154 SLIST_ENTRY(xbdback_va) xv_next;
155 vaddr_t xv_vaddr; 155 vaddr_t xv_vaddr;
156}; 156};
157 157
158/* we keep the xbdback instances in a linked list */ 158/* we keep the xbdback instances in a linked list */
159struct xbdback_instance { 159struct xbdback_instance {
160 SLIST_ENTRY(xbdback_instance) next; 160 SLIST_ENTRY(xbdback_instance) next;
161 struct xenbus_device *xbdi_xbusd; /* our xenstore entry */ 161 struct xenbus_device *xbdi_xbusd; /* our xenstore entry */
162 struct xenbus_watch xbdi_watch; /* to watch our store */ 162 struct xenbus_watch xbdi_watch; /* to watch our store */
163 domid_t xbdi_domid; /* attached to this domain */ 163 domid_t xbdi_domid; /* attached to this domain */
164 uint32_t xbdi_handle; /* domain-specific handle */ 164 uint32_t xbdi_handle; /* domain-specific handle */
165 char xbdi_name[16]; /* name of this instance */ 165 char xbdi_name[16]; /* name of this instance */
166 /* mutex that protects concurrent access to the xbdback instance */ 166 /* mutex that protects concurrent access to the xbdback instance */
167 kmutex_t xbdi_lock; 167 kmutex_t xbdi_lock;
168 kcondvar_t xbdi_cv; /* wait channel for thread work */ 168 kcondvar_t xbdi_cv; /* wait channel for thread work */
169 xbdback_state_t xbdi_status; /* thread's status */ 169 xbdback_state_t xbdi_status; /* thread's status */
170 /* KVA for mapping transfers */ 170 /* KVA for mapping transfers */
171 struct xbdback_va xbdi_va[BLKIF_RING_SIZE]; 171 struct xbdback_va xbdi_va[BLKIF_RING_SIZE];
172 SLIST_HEAD(, xbdback_va) xbdi_va_free; 172 SLIST_HEAD(, xbdback_va) xbdi_va_free;
173 /* backing device parameters */ 173 /* backing device parameters */
174 dev_t xbdi_dev; 174 dev_t xbdi_dev;
175 const struct bdevsw *xbdi_bdevsw; /* pointer to the device's bdevsw */ 175 const struct bdevsw *xbdi_bdevsw; /* pointer to the device's bdevsw */
176 struct vnode *xbdi_vp; 176 struct vnode *xbdi_vp;
177 uint64_t xbdi_size; 177 uint64_t xbdi_size;
178 bool xbdi_ro; /* is device read-only ? */ 178 bool xbdi_ro; /* is device read-only ? */
179 /* parameters for the communication */ 179 /* parameters for the communication */
180 unsigned int xbdi_evtchn; 180 unsigned int xbdi_evtchn;
181 struct intrhand *xbdi_ih; 181 struct intrhand *xbdi_ih;
182 /* private parameters for communication */ 182 /* private parameters for communication */
183 blkif_back_ring_proto_t xbdi_ring; 183 blkif_back_ring_proto_t xbdi_ring;
184 enum xbdi_proto xbdi_proto; 184 enum xbdi_proto xbdi_proto;
185 grant_handle_t xbdi_ring_handle; /* to unmap the ring */ 185 grant_handle_t xbdi_ring_handle; /* to unmap the ring */
186 vaddr_t xbdi_ring_va; /* to unmap the ring */ 186 vaddr_t xbdi_ring_va; /* to unmap the ring */
187 /* disconnection must be postponed until all I/O is done */ 187 /* disconnection must be postponed until all I/O is done */
188 int xbdi_refcnt; 188 int xbdi_refcnt;
189 /*  189 /*
190 * State for I/O processing/coalescing follows; this has to 190 * State for I/O processing/coalescing follows; this has to
191 * live here instead of on the stack because of the 191 * live here instead of on the stack because of the
192 * continuation-ness (see above). 192 * continuation-ness (see above).
193 */ 193 */
194 RING_IDX xbdi_req_prod; /* limit on request indices */ 194 RING_IDX xbdi_req_prod; /* limit on request indices */
195 xbdback_cont_t xbdi_cont, xbdi_cont_aux; 195 xbdback_cont_t xbdi_cont, xbdi_cont_aux;
196 /* _request state: track requests fetched from ring */ 196 /* _request state: track requests fetched from ring */
197 struct xbdback_request *xbdi_req; /* if NULL, ignore following */ 197 struct xbdback_request *xbdi_req; /* if NULL, ignore following */
198 blkif_request_t xbdi_xen_req; 198 blkif_request_t xbdi_xen_req;
199 int xbdi_segno; 199 int xbdi_segno;
200 /* _io state: I/O associated to this instance */ 200 /* _io state: I/O associated to this instance */
201 struct xbdback_io *xbdi_io; /* if NULL, ignore next field */ 201 struct xbdback_io *xbdi_io; /* if NULL, ignore next field */
202 daddr_t xbdi_next_sector; 202 daddr_t xbdi_next_sector;
203 uint8_t xbdi_last_fs, xbdi_this_fs; /* first sectors */ 203 uint8_t xbdi_last_fs, xbdi_this_fs; /* first sectors */
204 uint8_t xbdi_last_ls, xbdi_this_ls; /* last sectors */ 204 uint8_t xbdi_last_ls, xbdi_this_ls; /* last sectors */
205 grant_ref_t xbdi_thisgrt, xbdi_lastgrt; /* grants */ 205 grant_ref_t xbdi_thisgrt, xbdi_lastgrt; /* grants */
206 /* other state */ 206 /* other state */
207 int xbdi_same_page; /* are we merging two segments on the same page? */ 207 int xbdi_same_page; /* are we merging two segments on the same page? */
208 uint xbdi_pendingreqs; /* number of I/O in fly */ 208 uint xbdi_pendingreqs; /* number of I/O in fly */
209 struct timeval xbdi_lasterr_time; /* error time tracking */ 209 struct timeval xbdi_lasterr_time; /* error time tracking */
210#ifdef DEBUG 210#ifdef DEBUG
211 struct timeval xbdi_lastfragio_time; /* fragmented I/O tracking */ 211 struct timeval xbdi_lastfragio_time; /* fragmented I/O tracking */
212#endif 212#endif
213}; 213};
214/* Manipulation of the above reference count. */ 214/* Manipulation of the above reference count. */
215#define xbdi_get(xbdip) atomic_inc_uint(&(xbdip)->xbdi_refcnt) 215#define xbdi_get(xbdip) atomic_inc_uint(&(xbdip)->xbdi_refcnt)
216#define xbdi_put(xbdip) \ 216#define xbdi_put(xbdip) \
217do { \ 217do { \
218 if (atomic_dec_uint_nv(&(xbdip)->xbdi_refcnt) == 0) \ 218 if (atomic_dec_uint_nv(&(xbdip)->xbdi_refcnt) == 0) \
219 xbdback_finish_disconnect(xbdip); \ 219 xbdback_finish_disconnect(xbdip); \
220} while (/* CONSTCOND */ 0) 220} while (/* CONSTCOND */ 0)
221 221
222static SLIST_HEAD(, xbdback_instance) xbdback_instances; 222static SLIST_HEAD(, xbdback_instance) xbdback_instances;
223static kmutex_t xbdback_lock; 223static kmutex_t xbdback_lock;
224 224
225/* 225/*
226 * For each request from a guest, a xbdback_request is allocated from 226 * For each request from a guest, a xbdback_request is allocated from
227 * a pool. This will describe the request until completion. The 227 * a pool. This will describe the request until completion. The
228 * request may require multiple IO operations to perform, so the 228 * request may require multiple IO operations to perform, so the
229 * per-IO information is not stored here. 229 * per-IO information is not stored here.
230 */ 230 */
231struct xbdback_request { 231struct xbdback_request {
232 struct xbdback_instance *rq_xbdi; /* our xbd instance */ 232 struct xbdback_instance *rq_xbdi; /* our xbd instance */
233 uint64_t rq_id; 233 uint64_t rq_id;
234 int rq_iocount; /* reference count; or, number of outstanding I/O's */ 234 int rq_iocount; /* reference count; or, number of outstanding I/O's */
235 int rq_ioerrs; 235 int rq_ioerrs;
236 uint8_t rq_operation; 236 uint8_t rq_operation;
237}; 237};
238 238
239/* 239/*
240 * For each I/O operation associated with one of those requests, an 240 * For each I/O operation associated with one of those requests, an
241 * xbdback_io is allocated from a pool. It may correspond to multiple 241 * xbdback_io is allocated from a pool. It may correspond to multiple
242 * Xen disk requests, or parts of them, if several arrive at once that 242 * Xen disk requests, or parts of them, if several arrive at once that
243 * can be coalesced. 243 * can be coalesced.
244 */ 244 */
245struct xbdback_io { 245struct xbdback_io {
246 /* The instance pointer is duplicated for convenience. */ 246 /* The instance pointer is duplicated for convenience. */
247 struct xbdback_instance *xio_xbdi; /* our xbd instance */ 247 struct xbdback_instance *xio_xbdi; /* our xbd instance */
248 uint8_t xio_operation; 248 uint8_t xio_operation;
249 union { 249 union {
250 struct { 250 struct {
251 struct buf xio_buf; /* our I/O */ 251 struct buf xio_buf; /* our I/O */
252 /* xbd requests involved */ 252 /* xbd requests involved */
253 SLIST_HEAD(, xbdback_fragment) xio_rq; 253 SLIST_HEAD(, xbdback_fragment) xio_rq;
254 /* the virtual address to map the request at */ 254 /* the virtual address to map the request at */
255 vaddr_t xio_vaddr; 255 vaddr_t xio_vaddr;
256 struct xbdback_va *xio_xv; 256 struct xbdback_va *xio_xv;
257 /* grants to map */ 257 /* grants to map */
258 grant_ref_t xio_gref[XENSHM_MAX_PAGES_PER_REQUEST]; 258 grant_ref_t xio_gref[XENSHM_MAX_PAGES_PER_REQUEST];
259 /* grants release */ 259 /* grants release */
260 grant_handle_t xio_gh[XENSHM_MAX_PAGES_PER_REQUEST]; 260 grant_handle_t xio_gh[XENSHM_MAX_PAGES_PER_REQUEST];
261 uint16_t xio_nrma; /* number of guest pages */ 261 uint16_t xio_nrma; /* number of guest pages */
262 uint16_t xio_mapped; /* == 1: grants are mapped */ 262 uint16_t xio_mapped; /* == 1: grants are mapped */
263 } xio_rw; 263 } xio_rw;
264 uint64_t xio_flush_id; 264 uint64_t xio_flush_id;
265 } u; 265 } u;
266}; 266};
267#define xio_buf u.xio_rw.xio_buf 267#define xio_buf u.xio_rw.xio_buf
268#define xio_rq u.xio_rw.xio_rq 268#define xio_rq u.xio_rw.xio_rq
269#define xio_vaddr u.xio_rw.xio_vaddr 269#define xio_vaddr u.xio_rw.xio_vaddr
270#define xio_xv u.xio_rw.xio_xv 270#define xio_xv u.xio_rw.xio_xv
271#define xio_gref u.xio_rw.xio_gref 271#define xio_gref u.xio_rw.xio_gref
272#define xio_gh u.xio_rw.xio_gh 272#define xio_gh u.xio_rw.xio_gh
273#define xio_nrma u.xio_rw.xio_nrma 273#define xio_nrma u.xio_rw.xio_nrma
274#define xio_mapped u.xio_rw.xio_mapped 274#define xio_mapped u.xio_rw.xio_mapped
275 275
276#define xio_flush_id u.xio_flush_id 276#define xio_flush_id u.xio_flush_id
277 277
278/* 278/*
279 * Rather than having the xbdback_io keep an array of the 279 * Rather than having the xbdback_io keep an array of the
280 * xbdback_requests involved, since the actual number will probably be 280 * xbdback_requests involved, since the actual number will probably be
281 * small but might be as large as BLKIF_RING_SIZE, use a list. This 281 * small but might be as large as BLKIF_RING_SIZE, use a list. This
282 * would be threaded through xbdback_request, but one of them might be 282 * would be threaded through xbdback_request, but one of them might be
283 * part of multiple I/O's, alas. 283 * part of multiple I/O's, alas.
284 */ 284 */
285struct xbdback_fragment { 285struct xbdback_fragment {
286 struct xbdback_request *car; 286 struct xbdback_request *car;
287 SLIST_ENTRY(xbdback_fragment) cdr; 287 SLIST_ENTRY(xbdback_fragment) cdr;
288}; 288};
289 289
290/* 290/*
291 * Pools to manage the chain of block requests and I/Os fragments 291 * Pools to manage the chain of block requests and I/Os fragments
292 * submitted by frontend. 292 * submitted by frontend.
293 */ 293 */
294/* XXXSMP */ 294/* XXXSMP */
295static struct xbdback_pool { 295static struct xbdback_pool {
296 struct pool_cache pc; 296 struct pool_cache pc;
297 struct timeval last_warning; 297 struct timeval last_warning;
298} xbdback_request_pool, xbdback_io_pool, xbdback_fragment_pool; 298} xbdback_request_pool, xbdback_io_pool, xbdback_fragment_pool;
299 299
300/* Interval between reports of I/O errors from frontend */ 300/* Interval between reports of I/O errors from frontend */
301static const struct timeval xbdback_err_intvl = { 1, 0 }; 301static const struct timeval xbdback_err_intvl = { 1, 0 };
302 302
303#ifdef DEBUG 303#ifdef DEBUG
304static const struct timeval xbdback_fragio_intvl = { 60, 0 }; 304static const struct timeval xbdback_fragio_intvl = { 60, 0 };
305#endif 305#endif
306 void xbdbackattach(int); 306 void xbdbackattach(int);
307static int xbdback_xenbus_create(struct xenbus_device *); 307static int xbdback_xenbus_create(struct xenbus_device *);
308static int xbdback_xenbus_destroy(void *); 308static int xbdback_xenbus_destroy(void *);
309static void xbdback_frontend_changed(void *, XenbusState); 309static void xbdback_frontend_changed(void *, XenbusState);
310static void xbdback_backend_changed(struct xenbus_watch *, 310static void xbdback_backend_changed(struct xenbus_watch *,
311 const char **, unsigned int); 311 const char **, unsigned int);
312static int xbdback_evthandler(void *); 312static int xbdback_evthandler(void *);
313 313
314static int xbdback_connect(struct xbdback_instance *); 314static int xbdback_connect(struct xbdback_instance *);
315static void xbdback_disconnect(struct xbdback_instance *); 315static void xbdback_disconnect(struct xbdback_instance *);
316static void xbdback_finish_disconnect(struct xbdback_instance *); 316static void xbdback_finish_disconnect(struct xbdback_instance *);
317 317
318static bool xbdif_lookup(domid_t, uint32_t); 318static bool xbdif_lookup(domid_t, uint32_t);
319 319
320static void *xbdback_co_main(struct xbdback_instance *, void *); 320static void *xbdback_co_main(struct xbdback_instance *, void *);
321static void *xbdback_co_main_loop(struct xbdback_instance *, void *); 321static void *xbdback_co_main_loop(struct xbdback_instance *, void *);
322static void *xbdback_co_main_incr(struct xbdback_instance *, void *); 322static void *xbdback_co_main_incr(struct xbdback_instance *, void *);
323static void *xbdback_co_main_done(struct xbdback_instance *, void *); 323static void *xbdback_co_main_done(struct xbdback_instance *, void *);
324static void *xbdback_co_main_done2(struct xbdback_instance *, void *); 324static void *xbdback_co_main_done2(struct xbdback_instance *, void *);
325 325
326static void *xbdback_co_cache_flush(struct xbdback_instance *, void *); 326static void *xbdback_co_cache_flush(struct xbdback_instance *, void *);
327static void *xbdback_co_cache_flush2(struct xbdback_instance *, void *); 327static void *xbdback_co_cache_flush2(struct xbdback_instance *, void *);
328static void *xbdback_co_cache_doflush(struct xbdback_instance *, void *); 328static void *xbdback_co_cache_doflush(struct xbdback_instance *, void *);
329 329
330static void *xbdback_co_io(struct xbdback_instance *, void *); 330static void *xbdback_co_io(struct xbdback_instance *, void *);
331static void *xbdback_co_io_gotreq(struct xbdback_instance *, void *); 331static void *xbdback_co_io_gotreq(struct xbdback_instance *, void *);
332static void *xbdback_co_io_loop(struct xbdback_instance *, void *); 332static void *xbdback_co_io_loop(struct xbdback_instance *, void *);
333static void *xbdback_co_io_gotio(struct xbdback_instance *, void *); 333static void *xbdback_co_io_gotio(struct xbdback_instance *, void *);
334static void *xbdback_co_io_gotio2(struct xbdback_instance *, void *); 334static void *xbdback_co_io_gotio2(struct xbdback_instance *, void *);
335static void *xbdback_co_io_gotfrag(struct xbdback_instance *, void *); 335static void *xbdback_co_io_gotfrag(struct xbdback_instance *, void *);
336static void *xbdback_co_io_gotfrag2(struct xbdback_instance *, void *); 336static void *xbdback_co_io_gotfrag2(struct xbdback_instance *, void *);
337 337
338static void *xbdback_co_map_io(struct xbdback_instance *, void *); 338static void *xbdback_co_map_io(struct xbdback_instance *, void *);
339static void *xbdback_co_do_io(struct xbdback_instance *, void *); 339static void *xbdback_co_do_io(struct xbdback_instance *, void *);
340 340
341static void xbdback_io_error(struct xbdback_io *, int); 341static void xbdback_io_error(struct xbdback_io *, int);
342static void xbdback_iodone(struct buf *); 342static void xbdback_iodone(struct buf *);
343static void xbdback_send_reply(struct xbdback_instance *, uint64_t , int , int); 343static void xbdback_send_reply(struct xbdback_instance *, uint64_t , int , int);
344 344
345static void *xbdback_map_shm(struct xbdback_io *); 345static void *xbdback_map_shm(struct xbdback_io *);
346static void xbdback_unmap_shm(struct xbdback_io *); 346static void xbdback_unmap_shm(struct xbdback_io *);
347 347
348static void *xbdback_pool_get(struct xbdback_pool *, 348static void *xbdback_pool_get(struct xbdback_pool *,
349 struct xbdback_instance *); 349 struct xbdback_instance *);
350static void xbdback_pool_put(struct xbdback_pool *, void *); 350static void xbdback_pool_put(struct xbdback_pool *, void *);
351static void xbdback_thread(void *); 351static void xbdback_thread(void *);
352static void xbdback_wakeup_thread(struct xbdback_instance *); 352static void xbdback_wakeup_thread(struct xbdback_instance *);
353static void xbdback_trampoline(struct xbdback_instance *, void *); 353static void xbdback_trampoline(struct xbdback_instance *, void *);
354 354
355static struct xenbus_backend_driver xbd_backend_driver = { 355static struct xenbus_backend_driver xbd_backend_driver = {
356 .xbakd_create = xbdback_xenbus_create, 356 .xbakd_create = xbdback_xenbus_create,
357 .xbakd_type = "vbd" 357 .xbakd_type = "vbd"
358}; 358};
359 359
360void 360void
361xbdbackattach(int n) 361xbdbackattach(int n)
362{ 362{
363 XENPRINTF(("xbdbackattach\n")); 363 XENPRINTF(("xbdbackattach\n"));
364 364
365 /* 365 /*
366 * initialize the backend driver, register the control message handler 366 * initialize the backend driver, register the control message handler
367 * and send driver up message. 367 * and send driver up message.
368 */ 368 */
369 SLIST_INIT(&xbdback_instances); 369 SLIST_INIT(&xbdback_instances);
370 mutex_init(&xbdback_lock, MUTEX_DEFAULT, IPL_NONE); 370 mutex_init(&xbdback_lock, MUTEX_DEFAULT, IPL_NONE);
371 371
372 pool_cache_bootstrap(&xbdback_request_pool.pc, 372 pool_cache_bootstrap(&xbdback_request_pool.pc,
373 sizeof(struct xbdback_request), 0, 0, 0, "xbbrp", NULL, 373 sizeof(struct xbdback_request), 0, 0, 0, "xbbrp", NULL,
374 IPL_SOFTBIO, NULL, NULL, NULL); 374 IPL_SOFTBIO, NULL, NULL, NULL);
375 pool_cache_bootstrap(&xbdback_io_pool.pc, 375 pool_cache_bootstrap(&xbdback_io_pool.pc,
376 sizeof(struct xbdback_io), 0, 0, 0, "xbbip", NULL, 376 sizeof(struct xbdback_io), 0, 0, 0, "xbbip", NULL,
377 IPL_SOFTBIO, NULL, NULL, NULL); 377 IPL_SOFTBIO, NULL, NULL, NULL);
378 pool_cache_bootstrap(&xbdback_fragment_pool.pc, 378 pool_cache_bootstrap(&xbdback_fragment_pool.pc,
379 sizeof(struct xbdback_fragment), 0, 0, 0, "xbbfp", NULL, 379 sizeof(struct xbdback_fragment), 0, 0, 0, "xbbfp", NULL,
380 IPL_SOFTBIO, NULL, NULL, NULL); 380 IPL_SOFTBIO, NULL, NULL, NULL);
381 381
382 /* we allocate enough to handle a whole ring at once */ 382 /* we allocate enough to handle a whole ring at once */
383 pool_prime(&xbdback_request_pool.pc.pc_pool, BLKIF_RING_SIZE); 383 pool_prime(&xbdback_request_pool.pc.pc_pool, BLKIF_RING_SIZE);
384 pool_prime(&xbdback_io_pool.pc.pc_pool, BLKIF_RING_SIZE); 384 pool_prime(&xbdback_io_pool.pc.pc_pool, BLKIF_RING_SIZE);
385 pool_prime(&xbdback_fragment_pool.pc.pc_pool, 385 pool_prime(&xbdback_fragment_pool.pc.pc_pool,
386 BLKIF_MAX_SEGMENTS_PER_REQUEST * BLKIF_RING_SIZE); 386 BLKIF_MAX_SEGMENTS_PER_REQUEST * BLKIF_RING_SIZE);
387 387
388 xenbus_backend_register(&xbd_backend_driver); 388 xenbus_backend_register(&xbd_backend_driver);
389} 389}
390 390
391static int 391static int
392xbdback_xenbus_create(struct xenbus_device *xbusd) 392xbdback_xenbus_create(struct xenbus_device *xbusd)
393{ 393{
394 struct xbdback_instance *xbdi; 394 struct xbdback_instance *xbdi;
395 long domid, handle; 395 long domid, handle;
396 int error, i; 396 int error, i;
397 char *ep; 397 char *ep;
398 398
399 if ((error = xenbus_read_ul(NULL, xbusd->xbusd_path, 399 if ((error = xenbus_read_ul(NULL, xbusd->xbusd_path,
400 "frontend-id", &domid, 10)) != 0) { 400 "frontend-id", &domid, 10)) != 0) {
401 aprint_error("xbdback: can't read %s/frontend-id: %d\n", 401 aprint_error("xbdback: can't read %s/frontend-id: %d\n",
402 xbusd->xbusd_path, error); 402 xbusd->xbusd_path, error);
403 return error; 403 return error;
404 } 404 }
405 405
406 /* 406 /*
407 * get handle: this is the last component of the path; which is 407 * get handle: this is the last component of the path; which is
408 * a decimal number. $path/dev contains the device name, which is not 408 * a decimal number. $path/dev contains the device name, which is not
409 * appropriate. 409 * appropriate.
410 */ 410 */
411 for (i = strlen(xbusd->xbusd_path); i > 0; i--) { 411 for (i = strlen(xbusd->xbusd_path); i > 0; i--) {
412 if (xbusd->xbusd_path[i] == '/') 412 if (xbusd->xbusd_path[i] == '/')
413 break; 413 break;
414 } 414 }
415 if (i == 0) { 415 if (i == 0) {
416 aprint_error("xbdback: can't parse %s\n", 416 aprint_error("xbdback: can't parse %s\n",
417 xbusd->xbusd_path); 417 xbusd->xbusd_path);
418 return EFTYPE; 418 return EFTYPE;
419 } 419 }
420 handle = strtoul(&xbusd->xbusd_path[i+1], &ep, 10); 420 handle = strtoul(&xbusd->xbusd_path[i+1], &ep, 10);
421 if (*ep != '\0') { 421 if (*ep != '\0') {
422 aprint_error("xbdback: can't parse %s\n", 422 aprint_error("xbdback: can't parse %s\n",
423 xbusd->xbusd_path); 423 xbusd->xbusd_path);
424 return EFTYPE; 424 return EFTYPE;
425 } 425 }
426  426
427 if (xbdif_lookup(domid, handle)) { 427 if (xbdif_lookup(domid, handle)) {
428 return EEXIST; 428 return EEXIST;
429 } 429 }
430 xbdi = kmem_zalloc(sizeof(*xbdi), KM_SLEEP); 430 xbdi = kmem_zalloc(sizeof(*xbdi), KM_SLEEP);
431 431
432 xbdi->xbdi_domid = domid; 432 xbdi->xbdi_domid = domid;
433 xbdi->xbdi_handle = handle; 433 xbdi->xbdi_handle = handle;
434 snprintf(xbdi->xbdi_name, sizeof(xbdi->xbdi_name), "xbdb%di%d", 434 snprintf(xbdi->xbdi_name, sizeof(xbdi->xbdi_name), "xbdb%di%d",
435 xbdi->xbdi_domid, xbdi->xbdi_handle); 435 xbdi->xbdi_domid, xbdi->xbdi_handle);
436 436
437 /* initialize status and reference counter */ 437 /* initialize status and reference counter */
438 xbdi->xbdi_status = DISCONNECTED; 438 xbdi->xbdi_status = DISCONNECTED;
439 xbdi_get(xbdi); 439 xbdi_get(xbdi);
440 440
441 mutex_init(&xbdi->xbdi_lock, MUTEX_DEFAULT, IPL_BIO); 441 mutex_init(&xbdi->xbdi_lock, MUTEX_DEFAULT, IPL_BIO);
442 cv_init(&xbdi->xbdi_cv, xbdi->xbdi_name); 442 cv_init(&xbdi->xbdi_cv, xbdi->xbdi_name);
443 mutex_enter(&xbdback_lock); 443 mutex_enter(&xbdback_lock);
444 SLIST_INSERT_HEAD(&xbdback_instances, xbdi, next); 444 SLIST_INSERT_HEAD(&xbdback_instances, xbdi, next);
445 mutex_exit(&xbdback_lock); 445 mutex_exit(&xbdback_lock);
446 446
447 xbusd->xbusd_u.b.b_cookie = xbdi;  447 xbusd->xbusd_u.b.b_cookie = xbdi;
448 xbusd->xbusd_u.b.b_detach = xbdback_xenbus_destroy; 448 xbusd->xbusd_u.b.b_detach = xbdback_xenbus_destroy;
449 xbusd->xbusd_otherend_changed = xbdback_frontend_changed; 449 xbusd->xbusd_otherend_changed = xbdback_frontend_changed;
450 xbdi->xbdi_xbusd = xbusd; 450 xbdi->xbdi_xbusd = xbusd;
451 451
 452 SLIST_INIT(&xbdi->xbdi_va_free);
452 for (i = 0; i < BLKIF_RING_SIZE; i++) { 453 for (i = 0; i < BLKIF_RING_SIZE; i++) {
453 xbdi->xbdi_va[i].xv_vaddr = uvm_km_alloc(kernel_map, 454 xbdi->xbdi_va[i].xv_vaddr = uvm_km_alloc(kernel_map,
454 VBD_VA_SIZE, 0, UVM_KMF_VAONLY|UVM_KMF_WAITVA); 455 VBD_VA_SIZE, 0, UVM_KMF_VAONLY|UVM_KMF_WAITVA);
455 SLIST_INSERT_HEAD(&xbdi->xbdi_va_free, &xbdi->xbdi_va[i], 456 SLIST_INSERT_HEAD(&xbdi->xbdi_va_free, &xbdi->xbdi_va[i],
456 xv_next); 457 xv_next);
457 } 458 }
458 459
459 error = xenbus_watch_path2(xbusd, xbusd->xbusd_path, "physical-device", 460 error = xenbus_watch_path2(xbusd, xbusd->xbusd_path, "physical-device",
460 &xbdi->xbdi_watch, xbdback_backend_changed); 461 &xbdi->xbdi_watch, xbdback_backend_changed);
461 if (error) { 462 if (error) {
462 printf("failed to watch on %s/physical-device: %d\n", 463 printf("failed to watch on %s/physical-device: %d\n",
463 xbusd->xbusd_path, error); 464 xbusd->xbusd_path, error);
464 goto fail; 465 goto fail;
465 } 466 }
466 xbdi->xbdi_watch.xbw_dev = xbusd; 467 xbdi->xbdi_watch.xbw_dev = xbusd;
467 error = xenbus_switch_state(xbusd, NULL, XenbusStateInitWait); 468 error = xenbus_switch_state(xbusd, NULL, XenbusStateInitWait);
468 if (error) { 469 if (error) {
469 printf("failed to switch state on %s: %d\n", 470 printf("failed to switch state on %s: %d\n",
470 xbusd->xbusd_path, error); 471 xbusd->xbusd_path, error);
471 goto fail2; 472 goto fail2;
472 } 473 }
473 return 0; 474 return 0;
474fail2: 475fail2:
475 unregister_xenbus_watch(&xbdi->xbdi_watch); 476 unregister_xenbus_watch(&xbdi->xbdi_watch);
476fail: 477fail:
477 kmem_free(xbdi, sizeof(*xbdi)); 478 kmem_free(xbdi, sizeof(*xbdi));
478 return error; 479 return error;
479} 480}
480 481
481static int 482static int
482xbdback_xenbus_destroy(void *arg) 483xbdback_xenbus_destroy(void *arg)
483{ 484{
484 struct xbdback_instance *xbdi = arg; 485 struct xbdback_instance *xbdi = arg;
485 struct xenbus_device *xbusd = xbdi->xbdi_xbusd; 486 struct xenbus_device *xbusd = xbdi->xbdi_xbusd;
486 struct gnttab_unmap_grant_ref ungrop; 487 struct gnttab_unmap_grant_ref ungrop;
487 int err; 488 int err;
488 489
489 XENPRINTF(("xbdback_xenbus_destroy state %d\n", xbdi->xbdi_status)); 490 XENPRINTF(("xbdback_xenbus_destroy state %d\n", xbdi->xbdi_status));
490 491
491 xbdback_disconnect(xbdi); 492 xbdback_disconnect(xbdi);
492 493
493 /* unregister watch */ 494 /* unregister watch */
494 if (xbdi->xbdi_watch.node) 495 if (xbdi->xbdi_watch.node)
495 xenbus_unwatch_path(&xbdi->xbdi_watch); 496 xenbus_unwatch_path(&xbdi->xbdi_watch);
496 497
497 /* unmap ring */ 498 /* unmap ring */
498 if (xbdi->xbdi_ring_va != 0) { 499 if (xbdi->xbdi_ring_va != 0) {
499 ungrop.host_addr = xbdi->xbdi_ring_va; 500 ungrop.host_addr = xbdi->xbdi_ring_va;
500 ungrop.handle = xbdi->xbdi_ring_handle; 501 ungrop.handle = xbdi->xbdi_ring_handle;
501 ungrop.dev_bus_addr = 0; 502 ungrop.dev_bus_addr = 0;
502 err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 503 err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
503 &ungrop, 1); 504 &ungrop, 1);
504 if (err) 505 if (err)
505 printf("xbdback %s: unmap_grant_ref failed: %d\n", 506 printf("xbdback %s: unmap_grant_ref failed: %d\n",
506 xbusd->xbusd_otherend, err); 507 xbusd->xbusd_otherend, err);
507 uvm_km_free(kernel_map, xbdi->xbdi_ring_va, 508 uvm_km_free(kernel_map, xbdi->xbdi_ring_va,
508 PAGE_SIZE, UVM_KMF_VAONLY); 509 PAGE_SIZE, UVM_KMF_VAONLY);
509 } 510 }
510 /* close device */ 511 /* close device */
511 if (xbdi->xbdi_size) { 512 if (xbdi->xbdi_size) {
512 const char *name; 513 const char *name;
513 struct dkwedge_info wi; 514 struct dkwedge_info wi;
514 if (getdiskinfo(xbdi->xbdi_vp, &wi) == 0) 515 if (getdiskinfo(xbdi->xbdi_vp, &wi) == 0)
515 name = wi.dkw_devname; 516 name = wi.dkw_devname;
516 else 517 else
517 name = "*unknown*"; 518 name = "*unknown*";
518 printf("xbd backend: detach device %s for domain %d\n", 519 printf("xbd backend: detach device %s for domain %d\n",
519 name, xbdi->xbdi_domid); 520 name, xbdi->xbdi_domid);
520 vn_close(xbdi->xbdi_vp, FREAD, NOCRED); 521 vn_close(xbdi->xbdi_vp, FREAD, NOCRED);
521 } 522 }
522 mutex_enter(&xbdback_lock); 523 mutex_enter(&xbdback_lock);
523 SLIST_REMOVE(&xbdback_instances, xbdi, xbdback_instance, next); 524 SLIST_REMOVE(&xbdback_instances, xbdi, xbdback_instance, next);
524 mutex_exit(&xbdback_lock); 525 mutex_exit(&xbdback_lock);
525 526
526 for (int i = 0; i < BLKIF_RING_SIZE; i++) { 527 for (int i = 0; i < BLKIF_RING_SIZE; i++) {
527 if (xbdi->xbdi_va[i].xv_vaddr != 0) { 528 if (xbdi->xbdi_va[i].xv_vaddr != 0) {
528 uvm_km_free(kernel_map, xbdi->xbdi_va[i].xv_vaddr, 529 uvm_km_free(kernel_map, xbdi->xbdi_va[i].xv_vaddr,
529 VBD_VA_SIZE, UVM_KMF_VAONLY); 530 VBD_VA_SIZE, UVM_KMF_VAONLY);
530 xbdi->xbdi_va[i].xv_vaddr = 0; 531 xbdi->xbdi_va[i].xv_vaddr = 0;
531 } 532 }
532 } 533 }
533 534
534 mutex_destroy(&xbdi->xbdi_lock); 535 mutex_destroy(&xbdi->xbdi_lock);
535 cv_destroy(&xbdi->xbdi_cv); 536 cv_destroy(&xbdi->xbdi_cv);
536 kmem_free(xbdi, sizeof(*xbdi)); 537 kmem_free(xbdi, sizeof(*xbdi));
537 return 0; 538 return 0;
538} 539}
539 540
540static int 541static int
541xbdback_connect(struct xbdback_instance *xbdi) 542xbdback_connect(struct xbdback_instance *xbdi)
542{ 543{
543 int err; 544 int err;
544 struct gnttab_map_grant_ref grop; 545 struct gnttab_map_grant_ref grop;
545 struct gnttab_unmap_grant_ref ungrop; 546 struct gnttab_unmap_grant_ref ungrop;
546 evtchn_op_t evop; 547 evtchn_op_t evop;
547 u_long ring_ref, revtchn; 548 u_long ring_ref, revtchn;
548 char xsproto[32]; 549 char xsproto[32];
549 const char *proto; 550 const char *proto;
550 struct xenbus_device *xbusd = xbdi->xbdi_xbusd; 551 struct xenbus_device *xbusd = xbdi->xbdi_xbusd;
551 552
552 XENPRINTF(("xbdback %s: connect\n", xbusd->xbusd_path)); 553 XENPRINTF(("xbdback %s: connect\n", xbusd->xbusd_path));
553 /* read comunication informations */ 554 /* read comunication informations */
554 err = xenbus_read_ul(NULL, xbusd->xbusd_otherend, 555 err = xenbus_read_ul(NULL, xbusd->xbusd_otherend,
555 "ring-ref", &ring_ref, 10); 556 "ring-ref", &ring_ref, 10);
556 if (err) { 557 if (err) {
557 xenbus_dev_fatal(xbusd, err, "reading %s/ring-ref", 558 xenbus_dev_fatal(xbusd, err, "reading %s/ring-ref",
558 xbusd->xbusd_otherend); 559 xbusd->xbusd_otherend);
559 return -1; 560 return -1;
560 } 561 }
561 XENPRINTF(("xbdback %s: connect ring-ref %lu\n", xbusd->xbusd_path, ring_ref)); 562 XENPRINTF(("xbdback %s: connect ring-ref %lu\n", xbusd->xbusd_path, ring_ref));
562 err = xenbus_read_ul(NULL, xbusd->xbusd_otherend, 563 err = xenbus_read_ul(NULL, xbusd->xbusd_otherend,
563 "event-channel", &revtchn, 10); 564 "event-channel", &revtchn, 10);
564 if (err) { 565 if (err) {
565 xenbus_dev_fatal(xbusd, err, "reading %s/event-channel", 566 xenbus_dev_fatal(xbusd, err, "reading %s/event-channel",
566 xbusd->xbusd_otherend); 567 xbusd->xbusd_otherend);
567 return -1; 568 return -1;
568 } 569 }
569 XENPRINTF(("xbdback %s: connect revtchn %lu\n", xbusd->xbusd_path, revtchn)); 570 XENPRINTF(("xbdback %s: connect revtchn %lu\n", xbusd->xbusd_path, revtchn));
570 err = xenbus_read(NULL, xbusd->xbusd_otherend, "protocol", 571 err = xenbus_read(NULL, xbusd->xbusd_otherend, "protocol",
571 xsproto, sizeof(xsproto)); 572 xsproto, sizeof(xsproto));
572 if (err) { 573 if (err) {
573 xbdi->xbdi_proto = XBDIP_NATIVE; 574 xbdi->xbdi_proto = XBDIP_NATIVE;
574 proto = "unspecified"; 575 proto = "unspecified";
575 XENPRINTF(("xbdback %s: connect no xsproto\n", xbusd->xbusd_path)); 576 XENPRINTF(("xbdback %s: connect no xsproto\n", xbusd->xbusd_path));
576 } else { 577 } else {
577 XENPRINTF(("xbdback %s: connect xsproto %s\n", xbusd->xbusd_path, xsproto)); 578 XENPRINTF(("xbdback %s: connect xsproto %s\n", xbusd->xbusd_path, xsproto));
578 if (strcmp(xsproto, XEN_IO_PROTO_ABI_NATIVE) == 0) { 579 if (strcmp(xsproto, XEN_IO_PROTO_ABI_NATIVE) == 0) {
579 xbdi->xbdi_proto = XBDIP_NATIVE; 580 xbdi->xbdi_proto = XBDIP_NATIVE;
580 proto = XEN_IO_PROTO_ABI_NATIVE; 581 proto = XEN_IO_PROTO_ABI_NATIVE;
581 } else if (strcmp(xsproto, XEN_IO_PROTO_ABI_X86_32) == 0) { 582 } else if (strcmp(xsproto, XEN_IO_PROTO_ABI_X86_32) == 0) {
582 xbdi->xbdi_proto = XBDIP_32; 583 xbdi->xbdi_proto = XBDIP_32;
583 proto = XEN_IO_PROTO_ABI_X86_32; 584 proto = XEN_IO_PROTO_ABI_X86_32;
584 } else if (strcmp(xsproto, XEN_IO_PROTO_ABI_X86_64) == 0) { 585 } else if (strcmp(xsproto, XEN_IO_PROTO_ABI_X86_64) == 0) {
585 xbdi->xbdi_proto = XBDIP_64; 586 xbdi->xbdi_proto = XBDIP_64;
586 proto = XEN_IO_PROTO_ABI_X86_64; 587 proto = XEN_IO_PROTO_ABI_X86_64;
587 } else { 588 } else {
588 aprint_error("xbd domain %d: unknown proto %s\n", 589 aprint_error("xbd domain %d: unknown proto %s\n",
589 xbdi->xbdi_domid, xsproto); 590 xbdi->xbdi_domid, xsproto);
590 return -1; 591 return -1;
591 } 592 }
592 } 593 }
593 594
594 /* allocate VA space and map rings */ 595 /* allocate VA space and map rings */
595 xbdi->xbdi_ring_va = uvm_km_alloc(kernel_map, PAGE_SIZE, 0, 596 xbdi->xbdi_ring_va = uvm_km_alloc(kernel_map, PAGE_SIZE, 0,
596 UVM_KMF_VAONLY); 597 UVM_KMF_VAONLY);
597 if (xbdi->xbdi_ring_va == 0) { 598 if (xbdi->xbdi_ring_va == 0) {
598 xenbus_dev_fatal(xbusd, ENOMEM, 599 xenbus_dev_fatal(xbusd, ENOMEM,
599 "can't get VA for ring", xbusd->xbusd_otherend); 600 "can't get VA for ring", xbusd->xbusd_otherend);
600 return -1; 601 return -1;
601 } 602 }
602 XENPRINTF(("xbdback %s: connect va 0x%" PRIxVADDR "\n", xbusd->xbusd_path, xbdi->xbdi_ring_va)); 603 XENPRINTF(("xbdback %s: connect va 0x%" PRIxVADDR "\n", xbusd->xbusd_path, xbdi->xbdi_ring_va));
603 604
604 grop.host_addr = xbdi->xbdi_ring_va; 605 grop.host_addr = xbdi->xbdi_ring_va;
605 grop.flags = GNTMAP_host_map; 606 grop.flags = GNTMAP_host_map;
606 grop.ref = ring_ref; 607 grop.ref = ring_ref;
607 grop.dom = xbdi->xbdi_domid; 608 grop.dom = xbdi->xbdi_domid;
608 err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, 609 err = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref,
609 &grop, 1); 610 &grop, 1);
610 if (err || grop.status) { 611 if (err || grop.status) {
611 aprint_error("xbdback %s: can't map grant ref: %d/%d\n", 612 aprint_error("xbdback %s: can't map grant ref: %d/%d\n",
612 xbusd->xbusd_path, err, grop.status); 613 xbusd->xbusd_path, err, grop.status);
613 xenbus_dev_fatal(xbusd, EINVAL, 614 xenbus_dev_fatal(xbusd, EINVAL,
614 "can't map ring", xbusd->xbusd_otherend); 615 "can't map ring", xbusd->xbusd_otherend);
615 goto err; 616 goto err;
616 } 617 }
617 xbdi->xbdi_ring_handle = grop.handle; 618 xbdi->xbdi_ring_handle = grop.handle;
618 XENPRINTF(("xbdback %s: connect grhandle %d\n", xbusd->xbusd_path, grop.handle)); 619 XENPRINTF(("xbdback %s: connect grhandle %d\n", xbusd->xbusd_path, grop.handle));
619 620
620 switch(xbdi->xbdi_proto) { 621 switch(xbdi->xbdi_proto) {
621 case XBDIP_NATIVE: 622 case XBDIP_NATIVE:
622 { 623 {
623 blkif_sring_t *sring = (void *)xbdi->xbdi_ring_va; 624 blkif_sring_t *sring = (void *)xbdi->xbdi_ring_va;
624 BACK_RING_INIT(&xbdi->xbdi_ring.ring_n, sring, PAGE_SIZE); 625 BACK_RING_INIT(&xbdi->xbdi_ring.ring_n, sring, PAGE_SIZE);
625 break; 626 break;
626 } 627 }
627 case XBDIP_32: 628 case XBDIP_32:
628 { 629 {
629 blkif_x86_32_sring_t *sring = (void *)xbdi->xbdi_ring_va; 630 blkif_x86_32_sring_t *sring = (void *)xbdi->xbdi_ring_va;
630 BACK_RING_INIT(&xbdi->xbdi_ring.ring_32, sring, PAGE_SIZE); 631 BACK_RING_INIT(&xbdi->xbdi_ring.ring_32, sring, PAGE_SIZE);
631 break; 632 break;
632 } 633 }
633 case XBDIP_64: 634 case XBDIP_64:
634 { 635 {
635 blkif_x86_64_sring_t *sring = (void *)xbdi->xbdi_ring_va; 636 blkif_x86_64_sring_t *sring = (void *)xbdi->xbdi_ring_va;
636 BACK_RING_INIT(&xbdi->xbdi_ring.ring_64, sring, PAGE_SIZE); 637 BACK_RING_INIT(&xbdi->xbdi_ring.ring_64, sring, PAGE_SIZE);
637 break; 638 break;
638 } 639 }
639 } 640 }
640 641
641 evop.cmd = EVTCHNOP_bind_interdomain; 642 evop.cmd = EVTCHNOP_bind_interdomain;
642 evop.u.bind_interdomain.remote_dom = xbdi->xbdi_domid; 643 evop.u.bind_interdomain.remote_dom = xbdi->xbdi_domid;
643 evop.u.bind_interdomain.remote_port = revtchn; 644 evop.u.bind_interdomain.remote_port = revtchn;
644 err = HYPERVISOR_event_channel_op(&evop); 645 err = HYPERVISOR_event_channel_op(&evop);
645 if (err) { 646 if (err) {
646 aprint_error("blkback %s: " 647 aprint_error("blkback %s: "
647 "can't get event channel: %d\n", 648 "can't get event channel: %d\n",
648 xbusd->xbusd_otherend, err); 649 xbusd->xbusd_otherend, err);
649 xenbus_dev_fatal(xbusd, err, 650 xenbus_dev_fatal(xbusd, err,
650 "can't bind event channel", xbusd->xbusd_otherend); 651 "can't bind event channel", xbusd->xbusd_otherend);
651 goto err2; 652 goto err2;
652 } 653 }
653 XENPRINTF(("xbdback %s: connect evchannel %d\n", xbusd->xbusd_path, xbdi->xbdi_evtchn)); 654 XENPRINTF(("xbdback %s: connect evchannel %d\n", xbusd->xbusd_path, xbdi->xbdi_evtchn));
654 xbdi->xbdi_evtchn = evop.u.bind_interdomain.local_port; 655 xbdi->xbdi_evtchn = evop.u.bind_interdomain.local_port;
655 656
656 xbdi->xbdi_ih = xen_intr_establish_xname(-1, &xen_pic, xbdi->xbdi_evtchn, 657 xbdi->xbdi_ih = xen_intr_establish_xname(-1, &xen_pic, xbdi->xbdi_evtchn,
657 IST_LEVEL, IPL_BIO, xbdback_evthandler, xbdi, false, 658 IST_LEVEL, IPL_BIO, xbdback_evthandler, xbdi, false,
658 xbdi->xbdi_name); 659 xbdi->xbdi_name);
659 KASSERT(xbdi->xbdi_ih != NULL); 660 KASSERT(xbdi->xbdi_ih != NULL);
660 aprint_verbose("xbd backend domain %d handle %#x (%d) " 661 aprint_verbose("xbd backend domain %d handle %#x (%d) "
661 "using event channel %d, protocol %s\n", xbdi->xbdi_domid, 662 "using event channel %d, protocol %s\n", xbdi->xbdi_domid,
662 xbdi->xbdi_handle, xbdi->xbdi_handle, xbdi->xbdi_evtchn, proto); 663 xbdi->xbdi_handle, xbdi->xbdi_handle, xbdi->xbdi_evtchn, proto);
663 664
664 /* enable the xbdback event handler machinery */ 665 /* enable the xbdback event handler machinery */
665 xbdi->xbdi_status = WAITING; 666 xbdi->xbdi_status = WAITING;
666 hypervisor_unmask_event(xbdi->xbdi_evtchn); 667 hypervisor_unmask_event(xbdi->xbdi_evtchn);
667 hypervisor_notify_via_evtchn(xbdi->xbdi_evtchn); 668 hypervisor_notify_via_evtchn(xbdi->xbdi_evtchn);
668 669
669 if (kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL, 670 if (kthread_create(PRI_NONE, KTHREAD_MPSAFE, NULL,
670 xbdback_thread, xbdi, NULL, "%s", xbdi->xbdi_name) == 0) 671 xbdback_thread, xbdi, NULL, "%s", xbdi->xbdi_name) == 0)
671 return 0; 672 return 0;
672 673
673err2: 674err2:
674 /* unmap ring */ 675 /* unmap ring */
675 ungrop.host_addr = xbdi->xbdi_ring_va; 676 ungrop.host_addr = xbdi->xbdi_ring_va;
676 ungrop.handle = xbdi->xbdi_ring_handle; 677 ungrop.handle = xbdi->xbdi_ring_handle;
677 ungrop.dev_bus_addr = 0; 678 ungrop.dev_bus_addr = 0;
678 err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 679 err = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
679 &ungrop, 1); 680 &ungrop, 1);
680 if (err) 681 if (err)
681 aprint_error("xbdback %s: unmap_grant_ref failed: %d\n", 682 aprint_error("xbdback %s: unmap_grant_ref failed: %d\n",
682 xbusd->xbusd_path, err); 683 xbusd->xbusd_path, err);
683 684
684err: 685err:
685 /* free ring VA space */ 686 /* free ring VA space */
686 uvm_km_free(kernel_map, xbdi->xbdi_ring_va, PAGE_SIZE, UVM_KMF_VAONLY); 687 uvm_km_free(kernel_map, xbdi->xbdi_ring_va, PAGE_SIZE, UVM_KMF_VAONLY);
687 return -1; 688 return -1;
688} 689}
689 690
690/* 691/*
691 * Signal a xbdback thread to disconnect. Done in 'xenwatch' thread context. 692 * Signal a xbdback thread to disconnect. Done in 'xenwatch' thread context.
692 */ 693 */
693static void 694static void
694xbdback_disconnect(struct xbdback_instance *xbdi) 695xbdback_disconnect(struct xbdback_instance *xbdi)
695{ 696{
696  697
697 mutex_enter(&xbdi->xbdi_lock); 698 mutex_enter(&xbdi->xbdi_lock);
698 if (xbdi->xbdi_status == DISCONNECTED) { 699 if (xbdi->xbdi_status == DISCONNECTED) {
699 mutex_exit(&xbdi->xbdi_lock); 700 mutex_exit(&xbdi->xbdi_lock);
700 return; 701 return;
701 } 702 }
702 hypervisor_mask_event(xbdi->xbdi_evtchn); 703 hypervisor_mask_event(xbdi->xbdi_evtchn);
703 xen_intr_disestablish(xbdi->xbdi_ih); 704 xen_intr_disestablish(xbdi->xbdi_ih);
704 705
705 /* signal thread that we want to disconnect, then wait for it */ 706 /* signal thread that we want to disconnect, then wait for it */
706 xbdi->xbdi_status = DISCONNECTING; 707 xbdi->xbdi_status = DISCONNECTING;
707 cv_signal(&xbdi->xbdi_cv); 708 cv_signal(&xbdi->xbdi_cv);
708 709
709 while (xbdi->xbdi_status != DISCONNECTED) 710 while (xbdi->xbdi_status != DISCONNECTED)
710 cv_wait(&xbdi->xbdi_cv, &xbdi->xbdi_lock); 711 cv_wait(&xbdi->xbdi_cv, &xbdi->xbdi_lock);
711 712
712 mutex_exit(&xbdi->xbdi_lock); 713 mutex_exit(&xbdi->xbdi_lock);
713 714
714 xenbus_switch_state(xbdi->xbdi_xbusd, NULL, XenbusStateClosing); 715 xenbus_switch_state(xbdi->xbdi_xbusd, NULL, XenbusStateClosing);
715} 716}
716 717
717static void 718static void
718xbdback_frontend_changed(void *arg, XenbusState new_state) 719xbdback_frontend_changed(void *arg, XenbusState new_state)
719{ 720{
720 struct xbdback_instance *xbdi = arg; 721 struct xbdback_instance *xbdi = arg;
721 struct xenbus_device *xbusd = xbdi->xbdi_xbusd; 722 struct xenbus_device *xbusd = xbdi->xbdi_xbusd;
722 723
723 XENPRINTF(("xbdback %s: new state %d\n", xbusd->xbusd_path, new_state)); 724 XENPRINTF(("xbdback %s: new state %d\n", xbusd->xbusd_path, new_state));
724 switch(new_state) { 725 switch(new_state) {
725 case XenbusStateInitialising: 726 case XenbusStateInitialising:
726 break; 727 break;
727 case XenbusStateInitialised: 728 case XenbusStateInitialised:
728 case XenbusStateConnected: 729 case XenbusStateConnected:
729 if (xbdi->xbdi_status == WAITING || xbdi->xbdi_status == RUN) 730 if (xbdi->xbdi_status == WAITING || xbdi->xbdi_status == RUN)
730 break; 731 break;
731 xbdback_connect(xbdi); 732 xbdback_connect(xbdi);
732 break; 733 break;
733 case XenbusStateClosing: 734 case XenbusStateClosing:
734 xbdback_disconnect(xbdi); 735 xbdback_disconnect(xbdi);
735 break; 736 break;
736 case XenbusStateClosed: 737 case XenbusStateClosed:
737 /* otherend_changed() should handle it for us */ 738 /* otherend_changed() should handle it for us */
738 panic("xbdback_frontend_changed: closed\n"); 739 panic("xbdback_frontend_changed: closed\n");
739 case XenbusStateUnknown: 740 case XenbusStateUnknown:
740 case XenbusStateInitWait: 741 case XenbusStateInitWait:
741 default: 742 default:
742 aprint_error("xbdback %s: invalid frontend state %d\n", 743 aprint_error("xbdback %s: invalid frontend state %d\n",
743 xbusd->xbusd_path, new_state); 744 xbusd->xbusd_path, new_state);
744 } 745 }
745 return; 746 return;
746} 747}
747 748
748static void 749static void
749xbdback_backend_changed(struct xenbus_watch *watch, 750xbdback_backend_changed(struct xenbus_watch *watch,
750 const char **vec, unsigned int len) 751 const char **vec, unsigned int len)
751{ 752{
752 struct xenbus_device *xbusd = watch->xbw_dev; 753 struct xenbus_device *xbusd = watch->xbw_dev;
753 struct xbdback_instance *xbdi = xbusd->xbusd_u.b.b_cookie; 754 struct xbdback_instance *xbdi = xbusd->xbusd_u.b.b_cookie;
754 int err; 755 int err;
755 long dev; 756 long dev;
756 char mode[32]; 757 char mode[32];
757 struct xenbus_transaction *xbt; 758 struct xenbus_transaction *xbt;
758 const char *devname; 759 const char *devname;
759 int major; 760 int major;
760 761
761 err = xenbus_read_ul(NULL, xbusd->xbusd_path, "physical-device", 762 err = xenbus_read_ul(NULL, xbusd->xbusd_path, "physical-device",
762 &dev, 10); 763 &dev, 10);
763 /* 764 /*
764 * An error can occur as the watch can fire up just after being 765 * An error can occur as the watch can fire up just after being
765 * registered. So we have to ignore error :( 766 * registered. So we have to ignore error :(
766 */ 767 */
767 if (err) 768 if (err)
768 return; 769 return;
769 /* 770 /*
770 * we can also fire up after having opened the device, don't try 771 * we can also fire up after having opened the device, don't try
771 * to do it twice. 772 * to do it twice.
772 */ 773 */
773 if (xbdi->xbdi_vp != NULL) { 774 if (xbdi->xbdi_vp != NULL) {
774 if (xbdi->xbdi_status == WAITING || xbdi->xbdi_status == RUN) { 775 if (xbdi->xbdi_status == WAITING || xbdi->xbdi_status == RUN) {
775 if (xbdi->xbdi_dev != dev) { 776 if (xbdi->xbdi_dev != dev) {
776 printf("xbdback %s: changing physical device " 777 printf("xbdback %s: changing physical device "
777 "from %#"PRIx64" to %#lx not supported\n", 778 "from %#"PRIx64" to %#lx not supported\n",
778 xbusd->xbusd_path, xbdi->xbdi_dev, dev); 779 xbusd->xbusd_path, xbdi->xbdi_dev, dev);
779 } 780 }
780 } 781 }
781 return; 782 return;
782 } 783 }
783 xbdi->xbdi_dev = dev; 784 xbdi->xbdi_dev = dev;
784 err = xenbus_read(NULL, xbusd->xbusd_path, "mode", mode, sizeof(mode)); 785 err = xenbus_read(NULL, xbusd->xbusd_path, "mode", mode, sizeof(mode));
785 if (err) { 786 if (err) {
786 printf("xbdback: failed to read %s/mode: %d\n", 787 printf("xbdback: failed to read %s/mode: %d\n",
787 xbusd->xbusd_path, err); 788 xbusd->xbusd_path, err);
788 return; 789 return;
789 } 790 }
790 if (mode[0] == 'w') 791 if (mode[0] == 'w')
791 xbdi->xbdi_ro = false; 792 xbdi->xbdi_ro = false;
792 else 793 else
793 xbdi->xbdi_ro = true; 794 xbdi->xbdi_ro = true;
794 major = major(xbdi->xbdi_dev); 795 major = major(xbdi->xbdi_dev);
795 devname = devsw_blk2name(major); 796 devname = devsw_blk2name(major);
796 if (devname == NULL) { 797 if (devname == NULL) {
797 printf("xbdback %s: unknown device 0x%"PRIx64"\n", 798 printf("xbdback %s: unknown device 0x%"PRIx64"\n",
798 xbusd->xbusd_path, xbdi->xbdi_dev); 799 xbusd->xbusd_path, xbdi->xbdi_dev);
799 return; 800 return;
800 } 801 }
801 xbdi->xbdi_bdevsw = bdevsw_lookup(xbdi->xbdi_dev); 802 xbdi->xbdi_bdevsw = bdevsw_lookup(xbdi->xbdi_dev);
802 if (xbdi->xbdi_bdevsw == NULL) { 803 if (xbdi->xbdi_bdevsw == NULL) {
803 printf("xbdback %s: no bdevsw for device 0x%"PRIx64"\n", 804 printf("xbdback %s: no bdevsw for device 0x%"PRIx64"\n",
804 xbusd->xbusd_path, xbdi->xbdi_dev); 805 xbusd->xbusd_path, xbdi->xbdi_dev);
805 return; 806 return;
806 } 807 }
807 err = bdevvp(xbdi->xbdi_dev, &xbdi->xbdi_vp); 808 err = bdevvp(xbdi->xbdi_dev, &xbdi->xbdi_vp);
808 if (err) { 809 if (err) {
809 printf("xbdback %s: can't open device 0x%"PRIx64": %d\n", 810 printf("xbdback %s: can't open device 0x%"PRIx64": %d\n",
810 xbusd->xbusd_path, xbdi->xbdi_dev, err); 811 xbusd->xbusd_path, xbdi->xbdi_dev, err);
811 return; 812 return;
812 } 813 }
813 err = vn_lock(xbdi->xbdi_vp, LK_EXCLUSIVE | LK_RETRY); 814 err = vn_lock(xbdi->xbdi_vp, LK_EXCLUSIVE | LK_RETRY);
814 if (err) { 815 if (err) {
815 printf("xbdback %s: can't vn_lock device 0x%"PRIx64": %d\n", 816 printf("xbdback %s: can't vn_lock device 0x%"PRIx64": %d\n",
816 xbusd->xbusd_path, xbdi->xbdi_dev, err); 817 xbusd->xbusd_path, xbdi->xbdi_dev, err);
817 vrele(xbdi->xbdi_vp); 818 vrele(xbdi->xbdi_vp);
818 return; 819 return;
819 } 820 }
820 err = VOP_OPEN(xbdi->xbdi_vp, FREAD, NOCRED); 821 err = VOP_OPEN(xbdi->xbdi_vp, FREAD, NOCRED);
821 if (err) { 822 if (err) {
822 printf("xbdback %s: can't VOP_OPEN device 0x%"PRIx64": %d\n", 823 printf("xbdback %s: can't VOP_OPEN device 0x%"PRIx64": %d\n",
823 xbusd->xbusd_path, xbdi->xbdi_dev, err); 824 xbusd->xbusd_path, xbdi->xbdi_dev, err);
824 vput(xbdi->xbdi_vp); 825 vput(xbdi->xbdi_vp);
825 return; 826 return;
826 } 827 }
827 VOP_UNLOCK(xbdi->xbdi_vp); 828 VOP_UNLOCK(xbdi->xbdi_vp);
828 829
829 /* dk device; get wedge data */ 830 /* dk device; get wedge data */
830 struct dkwedge_info wi; 831 struct dkwedge_info wi;
831 if ((err = getdiskinfo(xbdi->xbdi_vp, &wi)) == 0) { 832 if ((err = getdiskinfo(xbdi->xbdi_vp, &wi)) == 0) {
832 xbdi->xbdi_size = wi.dkw_size; 833 xbdi->xbdi_size = wi.dkw_size;
833 printf("xbd backend: attach device %s (size %" PRIu64 ") " 834 printf("xbd backend: attach device %s (size %" PRIu64 ") "
834 "for domain %d\n", wi.dkw_devname, xbdi->xbdi_size, 835 "for domain %d\n", wi.dkw_devname, xbdi->xbdi_size,
835 xbdi->xbdi_domid); 836 xbdi->xbdi_domid);
836 } else { 837 } else {
837 /* If both Ioctls failed set device size to 0 and return */ 838 /* If both Ioctls failed set device size to 0 and return */
838 printf("xbdback %s: can't DIOCGWEDGEINFO device " 839 printf("xbdback %s: can't DIOCGWEDGEINFO device "
839 "0x%"PRIx64": %d\n", xbusd->xbusd_path, 840 "0x%"PRIx64": %d\n", xbusd->xbusd_path,
840 xbdi->xbdi_dev, err);  841 xbdi->xbdi_dev, err);
841 xbdi->xbdi_size = xbdi->xbdi_dev = 0; 842 xbdi->xbdi_size = xbdi->xbdi_dev = 0;
842 vn_close(xbdi->xbdi_vp, FREAD, NOCRED); 843 vn_close(xbdi->xbdi_vp, FREAD, NOCRED);
843 xbdi->xbdi_vp = NULL; 844 xbdi->xbdi_vp = NULL;
844 return; 845 return;
845 } 846 }
846again: 847again:
847 xbt = xenbus_transaction_start(); 848 xbt = xenbus_transaction_start();
848 if (xbt == NULL) { 849 if (xbt == NULL) {
849 printf("xbdback %s: can't start transaction\n", 850 printf("xbdback %s: can't start transaction\n",
850 xbusd->xbusd_path); 851 xbusd->xbusd_path);
851 return; 852 return;
852 } 853 }
853 err = xenbus_printf(xbt, xbusd->xbusd_path, "sectors", "%" PRIu64 , 854 err = xenbus_printf(xbt, xbusd->xbusd_path, "sectors", "%" PRIu64 ,
854 xbdi->xbdi_size); 855 xbdi->xbdi_size);
855 if (err) { 856 if (err) {
856 printf("xbdback: failed to write %s/sectors: %d\n", 857 printf("xbdback: failed to write %s/sectors: %d\n",
857 xbusd->xbusd_path, err); 858 xbusd->xbusd_path, err);
858 goto abort; 859 goto abort;
859 } 860 }
860 err = xenbus_printf(xbt, xbusd->xbusd_path, "info", "%u", 861 err = xenbus_printf(xbt, xbusd->xbusd_path, "info", "%u",
861 xbdi->xbdi_ro ? VDISK_READONLY : 0); 862 xbdi->xbdi_ro ? VDISK_READONLY : 0);
862 if (err) { 863 if (err) {
863 printf("xbdback: failed to write %s/info: %d\n", 864 printf("xbdback: failed to write %s/info: %d\n",
864 xbusd->xbusd_path, err); 865 xbusd->xbusd_path, err);
865 goto abort; 866 goto abort;
866 } 867 }
867 err = xenbus_printf(xbt, xbusd->xbusd_path, "sector-size", "%lu", 868 err = xenbus_printf(xbt, xbusd->xbusd_path, "sector-size", "%lu",
868 (u_long)DEV_BSIZE); 869 (u_long)DEV_BSIZE);
869 if (err) { 870 if (err) {
870 printf("xbdback: failed to write %s/sector-size: %d\n", 871 printf("xbdback: failed to write %s/sector-size: %d\n",
871 xbusd->xbusd_path, err); 872 xbusd->xbusd_path, err);
872 goto abort; 873 goto abort;
873 } 874 }
874 err = xenbus_printf(xbt, xbusd->xbusd_path, "feature-flush-cache", 875 err = xenbus_printf(xbt, xbusd->xbusd_path, "feature-flush-cache",
875 "%u", 1); 876 "%u", 1);
876 if (err) { 877 if (err) {
877 printf("xbdback: failed to write %s/feature-flush-cache: %d\n", 878 printf("xbdback: failed to write %s/feature-flush-cache: %d\n",
878 xbusd->xbusd_path, err); 879 xbusd->xbusd_path, err);
879 goto abort; 880 goto abort;
880 } 881 }
881 err = xenbus_transaction_end(xbt, 0); 882 err = xenbus_transaction_end(xbt, 0);
882 if (err == EAGAIN) 883 if (err == EAGAIN)
883 goto again; 884 goto again;
884 if (err) { 885 if (err) {
885 printf("xbdback %s: can't end transaction: %d\n", 886 printf("xbdback %s: can't end transaction: %d\n",
886 xbusd->xbusd_path, err); 887 xbusd->xbusd_path, err);
887 } 888 }
888 err = xenbus_switch_state(xbusd, NULL, XenbusStateConnected); 889 err = xenbus_switch_state(xbusd, NULL, XenbusStateConnected);
889 if (err) { 890 if (err) {
890 printf("xbdback %s: can't switch state: %d\n", 891 printf("xbdback %s: can't switch state: %d\n",
891 xbusd->xbusd_path, err); 892 xbusd->xbusd_path, err);
892 } 893 }
893 return; 894 return;
894abort: 895abort:
895 xenbus_transaction_end(xbt, 1); 896 xenbus_transaction_end(xbt, 1);
896} 897}
897 898
898/* 899/*
899 * Used by a xbdi thread to signal that it is now disconnected. 900 * Used by a xbdi thread to signal that it is now disconnected.
900 */ 901 */
901static void 902static void
902xbdback_finish_disconnect(struct xbdback_instance *xbdi) 903xbdback_finish_disconnect(struct xbdback_instance *xbdi)
903{ 904{
904 KASSERT(mutex_owned(&xbdi->xbdi_lock)); 905 KASSERT(mutex_owned(&xbdi->xbdi_lock));
905 KASSERT(xbdi->xbdi_status == DISCONNECTING); 906 KASSERT(xbdi->xbdi_status == DISCONNECTING);
906 907
907 xbdi->xbdi_status = DISCONNECTED; 908 xbdi->xbdi_status = DISCONNECTED;
908 909
909 cv_signal(&xbdi->xbdi_cv); 910 cv_signal(&xbdi->xbdi_cv);
910} 911}
911 912
912static bool 913static bool
913xbdif_lookup(domid_t dom , uint32_t handle) 914xbdif_lookup(domid_t dom , uint32_t handle)
914{ 915{
915 struct xbdback_instance *xbdi; 916 struct xbdback_instance *xbdi;
916 bool found = false; 917 bool found = false;
917 918
918 mutex_enter(&xbdback_lock); 919 mutex_enter(&xbdback_lock);
919 SLIST_FOREACH(xbdi, &xbdback_instances, next) { 920 SLIST_FOREACH(xbdi, &xbdback_instances, next) {
920 if (xbdi->xbdi_domid == dom && xbdi->xbdi_handle == handle) { 921 if (xbdi->xbdi_domid == dom && xbdi->xbdi_handle == handle) {
921 found = true; 922 found = true;
922 break; 923 break;
923 } 924 }
924 } 925 }
925 mutex_exit(&xbdback_lock); 926 mutex_exit(&xbdback_lock);
926 927
927 return found; 928 return found;
928} 929}
929 930
930static int 931static int
931xbdback_evthandler(void *arg) 932xbdback_evthandler(void *arg)
932{ 933{
933 struct xbdback_instance *xbdi = arg; 934 struct xbdback_instance *xbdi = arg;
934 935
935 XENPRINTF(("xbdback_evthandler domain %d: cont %p\n", 936 XENPRINTF(("xbdback_evthandler domain %d: cont %p\n",
936 xbdi->xbdi_domid, xbdi->xbdi_cont)); 937 xbdi->xbdi_domid, xbdi->xbdi_cont));
937 938
938 xbdback_wakeup_thread(xbdi); 939 xbdback_wakeup_thread(xbdi);
939 940
940 return 1; 941 return 1;
941} 942}
942 943
943/* 944/*
944 * Main thread routine for one xbdback instance. Woken up by 945 * Main thread routine for one xbdback instance. Woken up by
945 * xbdback_evthandler when a domain has I/O work scheduled in a I/O ring. 946 * xbdback_evthandler when a domain has I/O work scheduled in a I/O ring.
946 */ 947 */
947static void 948static void
948xbdback_thread(void *arg) 949xbdback_thread(void *arg)
949{ 950{
950 struct xbdback_instance *xbdi = arg; 951 struct xbdback_instance *xbdi = arg;
951 952
952 for (;;) { 953 for (;;) {
953 mutex_enter(&xbdi->xbdi_lock); 954 mutex_enter(&xbdi->xbdi_lock);
954 switch (xbdi->xbdi_status) { 955 switch (xbdi->xbdi_status) {
955 case WAITING: 956 case WAITING:
956 cv_wait(&xbdi->xbdi_cv, &xbdi->xbdi_lock); 957 cv_wait(&xbdi->xbdi_cv, &xbdi->xbdi_lock);
957 mutex_exit(&xbdi->xbdi_lock); 958 mutex_exit(&xbdi->xbdi_lock);
958 break; 959 break;
959 case RUN: 960 case RUN:
960 xbdi->xbdi_status = WAITING; /* reset state */ 961 xbdi->xbdi_status = WAITING; /* reset state */
961 mutex_exit(&xbdi->xbdi_lock); 962 mutex_exit(&xbdi->xbdi_lock);
962 963
963 if (xbdi->xbdi_cont == NULL) { 964 if (xbdi->xbdi_cont == NULL) {
964 xbdi->xbdi_cont = xbdback_co_main; 965 xbdi->xbdi_cont = xbdback_co_main;
965 } 966 }
966 967
967 xbdback_trampoline(xbdi, xbdi); 968 xbdback_trampoline(xbdi, xbdi);
968 break; 969 break;
969 case DISCONNECTING: 970 case DISCONNECTING:
970 if (xbdi->xbdi_pendingreqs > 0) { 971 if (xbdi->xbdi_pendingreqs > 0) {
971 /* there are pending I/Os. Wait for them. */ 972 /* there are pending I/Os. Wait for them. */
972 cv_wait(&xbdi->xbdi_cv, &xbdi->xbdi_lock); 973 cv_wait(&xbdi->xbdi_cv, &xbdi->xbdi_lock);
973 mutex_exit(&xbdi->xbdi_lock); 974 mutex_exit(&xbdi->xbdi_lock);
974 break; 975 break;
975 } 976 }
976  977
977 /* All I/Os should have been processed by now, 978 /* All I/Os should have been processed by now,
978 * xbdi_refcnt should drop to 0 */ 979 * xbdi_refcnt should drop to 0 */
979 xbdi_put(xbdi); 980 xbdi_put(xbdi);
980 KASSERT(xbdi->xbdi_refcnt == 0); 981 KASSERT(xbdi->xbdi_refcnt == 0);
981 mutex_exit(&xbdi->xbdi_lock); 982 mutex_exit(&xbdi->xbdi_lock);
982 kthread_exit(0); 983 kthread_exit(0);
983 break; 984 break;
984 default: 985 default:
985 panic("%s: invalid state %d", 986 panic("%s: invalid state %d",
986 xbdi->xbdi_name, xbdi->xbdi_status); 987 xbdi->xbdi_name, xbdi->xbdi_status);
987 } 988 }
988 } 989 }
989} 990}
990 991
991static void * 992static void *
992xbdback_co_main(struct xbdback_instance *xbdi, void *obj) 993xbdback_co_main(struct xbdback_instance *xbdi, void *obj)
993{ 994{
994 (void)obj; 995 (void)obj;
995 996
996 xbdi->xbdi_req_prod = xbdi->xbdi_ring.ring_n.sring->req_prod; 997 xbdi->xbdi_req_prod = xbdi->xbdi_ring.ring_n.sring->req_prod;
997 xen_rmb(); /* ensure we see all requests up to req_prod */ 998 xen_rmb(); /* ensure we see all requests up to req_prod */
998 /* 999 /*
999 * note that we'll eventually get a full ring of request. 1000 * note that we'll eventually get a full ring of request.
1000 * in this case, MASK_BLKIF_IDX(req_cons) == MASK_BLKIF_IDX(req_prod) 1001 * in this case, MASK_BLKIF_IDX(req_cons) == MASK_BLKIF_IDX(req_prod)
1001 */ 1002 */
1002 xbdi->xbdi_cont = xbdback_co_main_loop; 1003 xbdi->xbdi_cont = xbdback_co_main_loop;
1003 return xbdi; 1004 return xbdi;
1004} 1005}
1005 1006
1006/* 1007/*
1007 * Fetch a blkif request from the ring, and pass control to the appropriate 1008 * Fetch a blkif request from the ring, and pass control to the appropriate
1008 * continuation. 1009 * continuation.
1009 * If someone asked for disconnection, do not fetch any more request from 1010 * If someone asked for disconnection, do not fetch any more request from
1010 * the ring. 1011 * the ring.
1011 */ 1012 */
1012static void * 1013static void *
1013xbdback_co_main_loop(struct xbdback_instance *xbdi, void *obj)  1014xbdback_co_main_loop(struct xbdback_instance *xbdi, void *obj)
1014{ 1015{
1015 blkif_request_t *req; 1016 blkif_request_t *req;
1016 blkif_x86_32_request_t *req32; 1017 blkif_x86_32_request_t *req32;
1017 blkif_x86_64_request_t *req64; 1018 blkif_x86_64_request_t *req64;
1018 1019
1019 (void)obj; 1020 (void)obj;
1020 req = &xbdi->xbdi_xen_req; 1021 req = &xbdi->xbdi_xen_req;
1021 if (xbdi->xbdi_ring.ring_n.req_cons != xbdi->xbdi_req_prod) { 1022 if (xbdi->xbdi_ring.ring_n.req_cons != xbdi->xbdi_req_prod) {
1022 switch(xbdi->xbdi_proto) { 1023 switch(xbdi->xbdi_proto) {
1023 case XBDIP_NATIVE: 1024 case XBDIP_NATIVE:
1024 memcpy(req, RING_GET_REQUEST(&xbdi->xbdi_ring.ring_n, 1025 memcpy(req, RING_GET_REQUEST(&xbdi->xbdi_ring.ring_n,
1025 xbdi->xbdi_ring.ring_n.req_cons), 1026 xbdi->xbdi_ring.ring_n.req_cons),
1026 sizeof(blkif_request_t)); 1027 sizeof(blkif_request_t));
1027 break; 1028 break;
1028 case XBDIP_32: 1029 case XBDIP_32:
1029 req32 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_32, 1030 req32 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_32,
1030 xbdi->xbdi_ring.ring_n.req_cons); 1031 xbdi->xbdi_ring.ring_n.req_cons);
1031 req->operation = req32->operation; 1032 req->operation = req32->operation;
1032 req->nr_segments = req32->nr_segments; 1033 req->nr_segments = req32->nr_segments;
1033 req->handle = req32->handle; 1034 req->handle = req32->handle;
1034 req->id = req32->id; 1035 req->id = req32->id;
1035 req->sector_number = req32->sector_number; 1036 req->sector_number = req32->sector_number;
1036 break; 1037 break;
1037  1038
1038 case XBDIP_64: 1039 case XBDIP_64:
1039 req64 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_64, 1040 req64 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_64,
1040 xbdi->xbdi_ring.ring_n.req_cons); 1041 xbdi->xbdi_ring.ring_n.req_cons);
1041 req->operation = req64->operation; 1042 req->operation = req64->operation;
1042 req->nr_segments = req64->nr_segments; 1043 req->nr_segments = req64->nr_segments;
1043 req->handle = req64->handle; 1044 req->handle = req64->handle;
1044 req->id = req64->id; 1045 req->id = req64->id;
1045 req->sector_number = req64->sector_number; 1046 req->sector_number = req64->sector_number;
1046 break; 1047 break;
1047 } 1048 }
1048 __insn_barrier(); 1049 __insn_barrier();
1049 XENPRINTF(("xbdback op %d req_cons 0x%x req_prod 0x%x " 1050 XENPRINTF(("xbdback op %d req_cons 0x%x req_prod 0x%x "
1050 "resp_prod 0x%x id %" PRIu64 "\n", req->operation, 1051 "resp_prod 0x%x id %" PRIu64 "\n", req->operation,
1051 xbdi->xbdi_ring.ring_n.req_cons, 1052 xbdi->xbdi_ring.ring_n.req_cons,
1052 xbdi->xbdi_req_prod, 1053 xbdi->xbdi_req_prod,
1053 xbdi->xbdi_ring.ring_n.rsp_prod_pvt, 1054 xbdi->xbdi_ring.ring_n.rsp_prod_pvt,
1054 req->id)); 1055 req->id));
1055 switch(req->operation) { 1056 switch(req->operation) {
1056 case BLKIF_OP_READ: 1057 case BLKIF_OP_READ:
1057 case BLKIF_OP_WRITE: 1058 case BLKIF_OP_WRITE:
1058 xbdi->xbdi_cont = xbdback_co_io; 1059 xbdi->xbdi_cont = xbdback_co_io;
1059 break; 1060 break;
1060 case BLKIF_OP_FLUSH_DISKCACHE: 1061 case BLKIF_OP_FLUSH_DISKCACHE:
1061 xbdi_get(xbdi); 1062 xbdi_get(xbdi);
1062 xbdi->xbdi_cont = xbdback_co_cache_flush; 1063 xbdi->xbdi_cont = xbdback_co_cache_flush;
1063 break; 1064 break;
1064 default: 1065 default:
1065 if (ratecheck(&xbdi->xbdi_lasterr_time, 1066 if (ratecheck(&xbdi->xbdi_lasterr_time,
1066 &xbdback_err_intvl)) { 1067 &xbdback_err_intvl)) {
1067 printf("%s: unknown operation %d\n", 1068 printf("%s: unknown operation %d\n",
1068 xbdi->xbdi_name, req->operation); 1069 xbdi->xbdi_name, req->operation);
1069 } 1070 }
1070 xbdback_send_reply(xbdi, req->id, req->operation, 1071 xbdback_send_reply(xbdi, req->id, req->operation,
1071 BLKIF_RSP_ERROR); 1072 BLKIF_RSP_ERROR);
1072 xbdi->xbdi_cont = xbdback_co_main_incr; 1073 xbdi->xbdi_cont = xbdback_co_main_incr;
1073 break; 1074 break;
1074 } 1075 }
1075 } else { 1076 } else {
1076 xbdi->xbdi_cont = xbdback_co_main_done; 1077 xbdi->xbdi_cont = xbdback_co_main_done;
1077 } 1078 }
1078 return xbdi; 1079 return xbdi;
1079} 1080}
1080 1081
1081/* 1082/*
1082 * Increment consumer index and move on to the next request. In case 1083 * Increment consumer index and move on to the next request. In case
1083 * we want to disconnect, leave continuation now. 1084 * we want to disconnect, leave continuation now.
1084 */ 1085 */
1085static void * 1086static void *
1086xbdback_co_main_incr(struct xbdback_instance *xbdi, void *obj) 1087xbdback_co_main_incr(struct xbdback_instance *xbdi, void *obj)
1087{ 1088{
1088 (void)obj; 1089 (void)obj;
1089 blkif_back_ring_t *ring = &xbdi->xbdi_ring.ring_n; 1090 blkif_back_ring_t *ring = &xbdi->xbdi_ring.ring_n;
1090 1091
1091 ring->req_cons++; 1092 ring->req_cons++;
1092 1093
1093 /* 1094 /*
1094 * Do not bother with locking here when checking for xbdi_status: if 1095 * Do not bother with locking here when checking for xbdi_status: if
1095 * we get a transient state, we will get the right value at 1096 * we get a transient state, we will get the right value at
1096 * the next increment. 1097 * the next increment.
1097 */ 1098 */
1098 if (xbdi->xbdi_status == DISCONNECTING) 1099 if (xbdi->xbdi_status == DISCONNECTING)
1099 xbdi->xbdi_cont = NULL; 1100 xbdi->xbdi_cont = NULL;
1100 else 1101 else
1101 xbdi->xbdi_cont = xbdback_co_main_loop; 1102 xbdi->xbdi_cont = xbdback_co_main_loop;
1102 1103
1103 /* 1104 /*
1104 * Each time the thread processes a full ring of requests, give 1105 * Each time the thread processes a full ring of requests, give
1105 * a chance to other threads to process I/Os too 1106 * a chance to other threads to process I/Os too
1106 */ 1107 */
1107 if ((ring->req_cons % BLKIF_RING_SIZE) == 0) 1108 if ((ring->req_cons % BLKIF_RING_SIZE) == 0)
1108 yield(); 1109 yield();
1109 1110
1110 return xbdi; 1111 return xbdi;
1111} 1112}
1112 1113
1113/* 1114/*
1114 * Ring processing is over. If there are any I/O still present for this 1115 * Ring processing is over. If there are any I/O still present for this
1115 * instance, handle them first. 1116 * instance, handle them first.
1116 */ 1117 */
1117static void * 1118static void *
1118xbdback_co_main_done(struct xbdback_instance *xbdi, void *obj) 1119xbdback_co_main_done(struct xbdback_instance *xbdi, void *obj)
1119{ 1120{
1120 (void)obj; 1121 (void)obj;
1121 if (xbdi->xbdi_io != NULL) { 1122 if (xbdi->xbdi_io != NULL) {
1122 KASSERT(xbdi->xbdi_io->xio_operation == BLKIF_OP_READ || 1123 KASSERT(xbdi->xbdi_io->xio_operation == BLKIF_OP_READ ||
1123 xbdi->xbdi_io->xio_operation == BLKIF_OP_WRITE); 1124 xbdi->xbdi_io->xio_operation == BLKIF_OP_WRITE);
1124 xbdi->xbdi_cont = xbdback_co_map_io; 1125 xbdi->xbdi_cont = xbdback_co_map_io;
1125 xbdi->xbdi_cont_aux = xbdback_co_main_done2; 1126 xbdi->xbdi_cont_aux = xbdback_co_main_done2;
1126 } else { 1127 } else {
1127 xbdi->xbdi_cont = xbdback_co_main_done2; 1128 xbdi->xbdi_cont = xbdback_co_main_done2;
1128 } 1129 }
1129 return xbdi; 1130 return xbdi;
1130} 1131}
1131 1132
1132/* 1133/*
1133 * Check for requests in the instance's ring. In case there are, start again 1134 * Check for requests in the instance's ring. In case there are, start again
1134 * from the beginning. If not, stall. 1135 * from the beginning. If not, stall.
1135 */ 1136 */
1136static void * 1137static void *
1137xbdback_co_main_done2(struct xbdback_instance *xbdi, void *obj) 1138xbdback_co_main_done2(struct xbdback_instance *xbdi, void *obj)
1138{ 1139{
1139 int work_to_do; 1140 int work_to_do;
1140 1141
1141 RING_FINAL_CHECK_FOR_REQUESTS(&xbdi->xbdi_ring.ring_n, work_to_do); 1142 RING_FINAL_CHECK_FOR_REQUESTS(&xbdi->xbdi_ring.ring_n, work_to_do);
1142 if (work_to_do) 1143 if (work_to_do)
1143 xbdi->xbdi_cont = xbdback_co_main; 1144 xbdi->xbdi_cont = xbdback_co_main;
1144 else 1145 else
1145 xbdi->xbdi_cont = NULL; 1146 xbdi->xbdi_cont = NULL;
1146 1147
1147 return xbdi; 1148 return xbdi;
1148} 1149}
1149 1150
1150/* 1151/*
1151 * Frontend requested a cache flush operation. 1152 * Frontend requested a cache flush operation.
1152 */ 1153 */
1153static void * 1154static void *
1154xbdback_co_cache_flush(struct xbdback_instance *xbdi, void *obj) 1155xbdback_co_cache_flush(struct xbdback_instance *xbdi, void *obj)
1155{ 1156{
1156 (void)obj; 1157 (void)obj;
1157 1158
1158 XENPRINTF(("xbdback_co_cache_flush %p %p\n", xbdi, obj)); 1159 XENPRINTF(("xbdback_co_cache_flush %p %p\n", xbdi, obj));
1159 if (xbdi->xbdi_io != NULL) { 1160 if (xbdi->xbdi_io != NULL) {
1160 /* Some I/Os are required for this instance. Process them. */ 1161 /* Some I/Os are required for this instance. Process them. */
1161 KASSERT(xbdi->xbdi_io->xio_operation == BLKIF_OP_READ || 1162 KASSERT(xbdi->xbdi_io->xio_operation == BLKIF_OP_READ ||
1162 xbdi->xbdi_io->xio_operation == BLKIF_OP_WRITE); 1163 xbdi->xbdi_io->xio_operation == BLKIF_OP_WRITE);
1163 KASSERT(xbdi->xbdi_pendingreqs > 0); 1164 KASSERT(xbdi->xbdi_pendingreqs > 0);
1164 xbdi->xbdi_cont = xbdback_co_map_io; 1165 xbdi->xbdi_cont = xbdback_co_map_io;
1165 xbdi->xbdi_cont_aux = xbdback_co_cache_flush2; 1166 xbdi->xbdi_cont_aux = xbdback_co_cache_flush2;
1166 } else { 1167 } else {
1167 xbdi->xbdi_cont = xbdback_co_cache_flush2; 1168 xbdi->xbdi_cont = xbdback_co_cache_flush2;
1168 } 1169 }
1169 return xbdi; 1170 return xbdi;
1170} 1171}
1171 1172
1172static void * 1173static void *
1173xbdback_co_cache_flush2(struct xbdback_instance *xbdi, void *obj) 1174xbdback_co_cache_flush2(struct xbdback_instance *xbdi, void *obj)
1174{ 1175{
1175 (void)obj; 1176 (void)obj;
1176 XENPRINTF(("xbdback_co_cache_flush2 %p %p\n", xbdi, obj)); 1177 XENPRINTF(("xbdback_co_cache_flush2 %p %p\n", xbdi, obj));
1177 if (xbdi->xbdi_pendingreqs > 0) { 1178 if (xbdi->xbdi_pendingreqs > 0) {
1178 /* 1179 /*
1179 * There are pending requests. 1180 * There are pending requests.
1180 * Event or iodone() will restart processing 1181 * Event or iodone() will restart processing
1181 */ 1182 */
1182 xbdi->xbdi_cont = NULL; 1183 xbdi->xbdi_cont = NULL;
1183 xbdi_put(xbdi); 1184 xbdi_put(xbdi);
1184 return NULL; 1185 return NULL;
1185 } 1186 }
1186 xbdi->xbdi_cont = xbdback_co_cache_doflush; 1187 xbdi->xbdi_cont = xbdback_co_cache_doflush;
1187 return xbdback_pool_get(&xbdback_io_pool, xbdi); 1188 return xbdback_pool_get(&xbdback_io_pool, xbdi);
1188} 1189}
1189 1190
1190/* Start the flush work */ 1191/* Start the flush work */
1191static void * 1192static void *
1192xbdback_co_cache_doflush(struct xbdback_instance *xbdi, void *obj) 1193xbdback_co_cache_doflush(struct xbdback_instance *xbdi, void *obj)
1193{ 1194{
1194 struct xbdback_io *xbd_io; 1195 struct xbdback_io *xbd_io;
1195 1196
1196 XENPRINTF(("xbdback_co_cache_doflush %p %p\n", xbdi, obj)); 1197 XENPRINTF(("xbdback_co_cache_doflush %p %p\n", xbdi, obj));
1197 xbd_io = xbdi->xbdi_io = obj; 1198 xbd_io = xbdi->xbdi_io = obj;
1198 xbd_io->xio_xbdi = xbdi; 1199 xbd_io->xio_xbdi = xbdi;
1199 xbd_io->xio_operation = xbdi->xbdi_xen_req.operation; 1200 xbd_io->xio_operation = xbdi->xbdi_xen_req.operation;
1200 xbd_io->xio_flush_id = xbdi->xbdi_xen_req.id; 1201 xbd_io->xio_flush_id = xbdi->xbdi_xen_req.id;
1201 xbdi->xbdi_cont = xbdback_co_do_io; 1202 xbdi->xbdi_cont = xbdback_co_do_io;
1202 return xbdi; 1203 return xbdi;
1203} 1204}
1204 1205
1205/* 1206/*
1206 * A read or write I/O request must be processed. Do some checks first, 1207 * A read or write I/O request must be processed. Do some checks first,
1207 * then get the segment information directly from the ring request. 1208 * then get the segment information directly from the ring request.
1208 */ 1209 */
1209static void * 1210static void *
1210xbdback_co_io(struct xbdback_instance *xbdi, void *obj) 1211xbdback_co_io(struct xbdback_instance *xbdi, void *obj)
1211{  1212{
1212 int i, error; 1213 int i, error;
1213 blkif_request_t *req; 1214 blkif_request_t *req;
1214 blkif_x86_32_request_t *req32; 1215 blkif_x86_32_request_t *req32;
1215 blkif_x86_64_request_t *req64; 1216 blkif_x86_64_request_t *req64;
1216 1217
1217 (void)obj; 1218 (void)obj;
1218 1219
1219 /* some sanity checks */ 1220 /* some sanity checks */
1220 req = &xbdi->xbdi_xen_req; 1221 req = &xbdi->xbdi_xen_req;
1221 if (req->nr_segments < 1 || 1222 if (req->nr_segments < 1 ||
1222 req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST) { 1223 req->nr_segments > BLKIF_MAX_SEGMENTS_PER_REQUEST) {
1223 if (ratecheck(&xbdi->xbdi_lasterr_time, 1224 if (ratecheck(&xbdi->xbdi_lasterr_time,
1224 &xbdback_err_intvl)) { 1225 &xbdback_err_intvl)) {
1225 printf("%s: invalid number of segments: %d\n", 1226 printf("%s: invalid number of segments: %d\n",
1226 xbdi->xbdi_name, 1227 xbdi->xbdi_name,
1227 xbdi->xbdi_xen_req.nr_segments); 1228 xbdi->xbdi_xen_req.nr_segments);
1228 } 1229 }
1229 error = EINVAL; 1230 error = EINVAL;
1230 goto end; 1231 goto end;
1231 } 1232 }
1232 1233
1233 KASSERT(req->operation == BLKIF_OP_READ || 1234 KASSERT(req->operation == BLKIF_OP_READ ||
1234 req->operation == BLKIF_OP_WRITE); 1235 req->operation == BLKIF_OP_WRITE);
1235 if (req->operation == BLKIF_OP_WRITE) { 1236 if (req->operation == BLKIF_OP_WRITE) {
1236 if (xbdi->xbdi_ro) { 1237 if (xbdi->xbdi_ro) {
1237 error = EROFS; 1238 error = EROFS;
1238 goto end; 1239 goto end;
1239 } 1240 }
1240 } 1241 }
1241 1242
1242 xbdi->xbdi_segno = 0; 1243 xbdi->xbdi_segno = 0;
1243 1244
1244 /* copy request segments */ 1245 /* copy request segments */
1245 switch(xbdi->xbdi_proto) { 1246 switch(xbdi->xbdi_proto) {
1246 case XBDIP_NATIVE: 1247 case XBDIP_NATIVE:
1247 /* already copied in xbdback_co_main_loop */ 1248 /* already copied in xbdback_co_main_loop */
1248 break; 1249 break;
1249 case XBDIP_32: 1250 case XBDIP_32:
1250 req32 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_32, 1251 req32 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_32,
1251 xbdi->xbdi_ring.ring_n.req_cons); 1252 xbdi->xbdi_ring.ring_n.req_cons);
1252 for (i = 0; i < req->nr_segments; i++) 1253 for (i = 0; i < req->nr_segments; i++)
1253 req->seg[i] = req32->seg[i]; 1254 req->seg[i] = req32->seg[i];
1254 break; 1255 break;
1255 case XBDIP_64: 1256 case XBDIP_64:
1256 req64 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_64, 1257 req64 = RING_GET_REQUEST(&xbdi->xbdi_ring.ring_64,
1257 xbdi->xbdi_ring.ring_n.req_cons); 1258 xbdi->xbdi_ring.ring_n.req_cons);
1258 for (i = 0; i < req->nr_segments; i++) 1259 for (i = 0; i < req->nr_segments; i++)
1259 req->seg[i] = req64->seg[i]; 1260 req->seg[i] = req64->seg[i];
1260 break; 1261 break;
1261 } 1262 }
1262 1263
1263 xbdi->xbdi_cont = xbdback_co_io_gotreq; 1264 xbdi->xbdi_cont = xbdback_co_io_gotreq;
1264 return xbdback_pool_get(&xbdback_request_pool, xbdi); 1265 return xbdback_pool_get(&xbdback_request_pool, xbdi);
1265 1266
1266 end: 1267 end:
1267 xbdback_send_reply(xbdi, xbdi->xbdi_xen_req.id, 1268 xbdback_send_reply(xbdi, xbdi->xbdi_xen_req.id,
1268 xbdi->xbdi_xen_req.operation, error); 1269 xbdi->xbdi_xen_req.operation, error);
1269 xbdi->xbdi_cont = xbdback_co_main_incr; 1270 xbdi->xbdi_cont = xbdback_co_main_incr;
1270 return xbdi; 1271 return xbdi;
1271} 1272}
1272 1273
1273/* 1274/*
1274 * We have fetched segment requests from the ring. In case there are already 1275 * We have fetched segment requests from the ring. In case there are already
1275 * I/Os prepared for this instance, we can try coalescing the requests 1276 * I/Os prepared for this instance, we can try coalescing the requests
1276 * with these I/Os. 1277 * with these I/Os.
1277 */ 1278 */
1278static void * 1279static void *
1279xbdback_co_io_gotreq(struct xbdback_instance *xbdi, void *obj) 1280xbdback_co_io_gotreq(struct xbdback_instance *xbdi, void *obj)
1280{ 1281{
1281 struct xbdback_request *xrq; 1282 struct xbdback_request *xrq;
1282 1283
1283 xrq = xbdi->xbdi_req = obj; 1284 xrq = xbdi->xbdi_req = obj;
1284  1285
1285 xrq->rq_xbdi = xbdi; 1286 xrq->rq_xbdi = xbdi;
1286 xrq->rq_iocount = 0; 1287 xrq->rq_iocount = 0;
1287 xrq->rq_ioerrs = 0; 1288 xrq->rq_ioerrs = 0;
1288 xrq->rq_id = xbdi->xbdi_xen_req.id; 1289 xrq->rq_id = xbdi->xbdi_xen_req.id;
1289 xrq->rq_operation = xbdi->xbdi_xen_req.operation; 1290 xrq->rq_operation = xbdi->xbdi_xen_req.operation;
1290 KASSERT(xbdi->xbdi_req->rq_operation == BLKIF_OP_READ || 1291 KASSERT(xbdi->xbdi_req->rq_operation == BLKIF_OP_READ ||
1291 xbdi->xbdi_req->rq_operation == BLKIF_OP_WRITE); 1292 xbdi->xbdi_req->rq_operation == BLKIF_OP_WRITE);
1292 1293
1293 /*  1294 /*
1294 * Request-level reasons not to coalesce: different device, 1295 * Request-level reasons not to coalesce: different device,
1295 * different op, or noncontiguous disk sectors (vs. previous 1296 * different op, or noncontiguous disk sectors (vs. previous
1296 * request handed to us). 1297 * request handed to us).
1297 */ 1298 */
1298 xbdi->xbdi_cont = xbdback_co_io_loop; 1299 xbdi->xbdi_cont = xbdback_co_io_loop;
1299 if (xbdi->xbdi_io != NULL) { 1300 if (xbdi->xbdi_io != NULL) {
1300 struct xbdback_request *last_req; 1301 struct xbdback_request *last_req;
1301 last_req = SLIST_FIRST(&xbdi->xbdi_io->xio_rq)->car; 1302 last_req = SLIST_FIRST(&xbdi->xbdi_io->xio_rq)->car;
1302 XENPRINTF(("xbdback_io domain %d: hoping for sector %" PRIu64 1303 XENPRINTF(("xbdback_io domain %d: hoping for sector %" PRIu64
1303 "; got %" PRIu64 "\n", xbdi->xbdi_domid, 1304 "; got %" PRIu64 "\n", xbdi->xbdi_domid,
1304 xbdi->xbdi_next_sector, 1305 xbdi->xbdi_next_sector,
1305 xbdi->xbdi_xen_req.sector_number)); 1306 xbdi->xbdi_xen_req.sector_number));
1306 if ((xrq->rq_operation != last_req->rq_operation) 1307 if ((xrq->rq_operation != last_req->rq_operation)
1307 || (xbdi->xbdi_xen_req.sector_number != 1308 || (xbdi->xbdi_xen_req.sector_number !=
1308 xbdi->xbdi_next_sector)) { 1309 xbdi->xbdi_next_sector)) {
1309 XENPRINTF(("xbdback_io domain %d: segment break\n", 1310 XENPRINTF(("xbdback_io domain %d: segment break\n",
1310 xbdi->xbdi_domid)); 1311 xbdi->xbdi_domid));
1311 xbdi->xbdi_next_sector = 1312 xbdi->xbdi_next_sector =
1312 xbdi->xbdi_xen_req.sector_number; 1313 xbdi->xbdi_xen_req.sector_number;
1313 KASSERT(xbdi->xbdi_io->xio_operation == BLKIF_OP_READ || 1314 KASSERT(xbdi->xbdi_io->xio_operation == BLKIF_OP_READ ||
1314 xbdi->xbdi_io->xio_operation == BLKIF_OP_WRITE); 1315 xbdi->xbdi_io->xio_operation == BLKIF_OP_WRITE);
1315 xbdi->xbdi_cont_aux = xbdback_co_io_loop; 1316 xbdi->xbdi_cont_aux = xbdback_co_io_loop;
1316 xbdi->xbdi_cont = xbdback_co_map_io; 1317 xbdi->xbdi_cont = xbdback_co_map_io;
1317 } 1318 }
1318 } else { 1319 } else {
1319 xbdi->xbdi_next_sector = xbdi->xbdi_xen_req.sector_number; 1320 xbdi->xbdi_next_sector = xbdi->xbdi_xen_req.sector_number;
1320 } 1321 }
1321 return xbdi; 1322 return xbdi;
1322} 1323}
1323 1324
1324/* Handle coalescing of multiple segment requests into one I/O work */ 1325/* Handle coalescing of multiple segment requests into one I/O work */
1325static void * 1326static void *
1326xbdback_co_io_loop(struct xbdback_instance *xbdi, void *obj) 1327xbdback_co_io_loop(struct xbdback_instance *xbdi, void *obj)
1327{ 1328{
1328 (void)obj; 1329 (void)obj;
1329 KASSERT(xbdi->xbdi_req->rq_operation == BLKIF_OP_READ || 1330 KASSERT(xbdi->xbdi_req->rq_operation == BLKIF_OP_READ ||
1330 xbdi->xbdi_req->rq_operation == BLKIF_OP_WRITE); 1331 xbdi->xbdi_req->rq_operation == BLKIF_OP_WRITE);
1331 if (xbdi->xbdi_segno < xbdi->xbdi_xen_req.nr_segments) { 1332 if (xbdi->xbdi_segno < xbdi->xbdi_xen_req.nr_segments) {
1332 uint8_t this_fs, this_ls, last_ls; 1333 uint8_t this_fs, this_ls, last_ls;
1333 grant_ref_t thisgrt; 1334 grant_ref_t thisgrt;
1334 /*  1335 /*
1335 * Segment-level reason to coalesce: handling full 1336 * Segment-level reason to coalesce: handling full
1336 * pages, or adjacent sector ranges from the same page 1337 * pages, or adjacent sector ranges from the same page
1337 * (and yes, this latter does happen). But not if the 1338 * (and yes, this latter does happen). But not if the
1338 * array of client pseudo-physical pages is full. 1339 * array of client pseudo-physical pages is full.
1339 */ 1340 */
1340 this_fs = xbdi->xbdi_xen_req.seg[xbdi->xbdi_segno].first_sect; 1341 this_fs = xbdi->xbdi_xen_req.seg[xbdi->xbdi_segno].first_sect;
1341 this_ls = xbdi->xbdi_xen_req.seg[xbdi->xbdi_segno].last_sect; 1342 this_ls = xbdi->xbdi_xen_req.seg[xbdi->xbdi_segno].last_sect;
1342 thisgrt = xbdi->xbdi_xen_req.seg[xbdi->xbdi_segno].gref; 1343 thisgrt = xbdi->xbdi_xen_req.seg[xbdi->xbdi_segno].gref;
1343 XENPRINTF(("xbdback_io domain %d: " 1344 XENPRINTF(("xbdback_io domain %d: "
1344 "first,last_sect[%d]=0%o,0%o\n", 1345 "first,last_sect[%d]=0%o,0%o\n",
1345 xbdi->xbdi_domid, xbdi->xbdi_segno, 1346 xbdi->xbdi_domid, xbdi->xbdi_segno,
1346 this_fs, this_ls)); 1347 this_fs, this_ls));
1347 last_ls = xbdi->xbdi_last_ls = xbdi->xbdi_this_ls; 1348 last_ls = xbdi->xbdi_last_ls = xbdi->xbdi_this_ls;
1348 xbdi->xbdi_this_fs = this_fs; 1349 xbdi->xbdi_this_fs = this_fs;
1349 xbdi->xbdi_this_ls = this_ls; 1350 xbdi->xbdi_this_ls = this_ls;
1350 xbdi->xbdi_thisgrt = thisgrt; 1351 xbdi->xbdi_thisgrt = thisgrt;
1351 if (xbdi->xbdi_io != NULL) { 1352 if (xbdi->xbdi_io != NULL) {
1352 if (last_ls == VBD_MAXSECT 1353 if (last_ls == VBD_MAXSECT
1353 && this_fs == 0 1354 && this_fs == 0
1354 && xbdi->xbdi_io->xio_nrma 1355 && xbdi->xbdi_io->xio_nrma
1355 < XENSHM_MAX_PAGES_PER_REQUEST) { 1356 < XENSHM_MAX_PAGES_PER_REQUEST) {
1356 xbdi->xbdi_same_page = 0; 1357 xbdi->xbdi_same_page = 0;
1357 } else if (last_ls + 1 1358 } else if (last_ls + 1
1358 == this_fs 1359 == this_fs
1359#ifdef notyet 1360#ifdef notyet
1360 && (last_fas & ~PAGE_MASK) 1361 && (last_fas & ~PAGE_MASK)
1361 == (this_fas & ~PAGE_MASK) 1362 == (this_fas & ~PAGE_MASK)
1362#else  1363#else
1363 && 0 /* can't know frame number yet */ 1364 && 0 /* can't know frame number yet */
1364#endif 1365#endif
1365 ) { 1366 ) {
1366#ifdef DEBUG 1367#ifdef DEBUG
1367 if (ratecheck(&xbdi->xbdi_lastfragio_time, 1368 if (ratecheck(&xbdi->xbdi_lastfragio_time,
1368 &xbdback_fragio_intvl)) 1369 &xbdback_fragio_intvl))
1369 printf("%s: domain is sending" 1370 printf("%s: domain is sending"
1370 " excessively fragmented I/O\n", 1371 " excessively fragmented I/O\n",
1371 xbdi->xbdi_name); 1372 xbdi->xbdi_name);
1372#endif 1373#endif
1373 printf("xbdback_io: would maybe glue " 1374 printf("xbdback_io: would maybe glue "
1374 "same page sec %d (%d->%d)\n", 1375 "same page sec %d (%d->%d)\n",
1375 xbdi->xbdi_segno, this_fs, this_ls); 1376 xbdi->xbdi_segno, this_fs, this_ls);
1376 XENPRINTF(("xbdback_io domain %d: glue same " 1377 XENPRINTF(("xbdback_io domain %d: glue same "
1377 "page", xbdi->xbdi_domid)); 1378 "page", xbdi->xbdi_domid));
1378 panic("notyet!"); 1379 panic("notyet!");
1379 xbdi->xbdi_same_page = 1; 1380 xbdi->xbdi_same_page = 1;
1380 } else { 1381 } else {
1381 KASSERT(xbdi->xbdi_io->xio_operation == 1382 KASSERT(xbdi->xbdi_io->xio_operation ==
1382 BLKIF_OP_READ || 1383 BLKIF_OP_READ ||
1383 xbdi->xbdi_io->xio_operation == 1384 xbdi->xbdi_io->xio_operation ==
1384 BLKIF_OP_WRITE); 1385 BLKIF_OP_WRITE);
1385 xbdi->xbdi_cont_aux = xbdback_co_io_loop; 1386 xbdi->xbdi_cont_aux = xbdback_co_io_loop;
1386 xbdi->xbdi_cont = xbdback_co_map_io; 1387 xbdi->xbdi_cont = xbdback_co_map_io;
1387 return xbdi; 1388 return xbdi;
1388 } 1389 }
1389 } else 1390 } else
1390 xbdi->xbdi_same_page = 0; 1391 xbdi->xbdi_same_page = 0;
1391 1392
1392 if (xbdi->xbdi_io == NULL) { 1393 if (xbdi->xbdi_io == NULL) {
1393 xbdi->xbdi_cont = xbdback_co_io_gotio; 1394 xbdi->xbdi_cont = xbdback_co_io_gotio;
1394 return xbdback_pool_get(&xbdback_io_pool, xbdi); 1395 return xbdback_pool_get(&xbdback_io_pool, xbdi);
1395 } else { 1396 } else {
1396 xbdi->xbdi_cont = xbdback_co_io_gotio2; 1397 xbdi->xbdi_cont = xbdback_co_io_gotio2;
1397 } 1398 }
1398 } else { 1399 } else {
1399 /* done with the loop over segments; get next request */ 1400 /* done with the loop over segments; get next request */
1400 xbdi->xbdi_cont = xbdback_co_main_incr; 1401 xbdi->xbdi_cont = xbdback_co_main_incr;
1401 } 1402 }
1402 return xbdi; 1403 return xbdi;
1403} 1404}
1404 1405
1405/* Prepare an I/O buffer for a xbdback instance */ 1406/* Prepare an I/O buffer for a xbdback instance */
1406static void * 1407static void *
1407xbdback_co_io_gotio(struct xbdback_instance *xbdi, void *obj) 1408xbdback_co_io_gotio(struct xbdback_instance *xbdi, void *obj)
1408{ 1409{
1409 struct xbdback_io *xbd_io; 1410 struct xbdback_io *xbd_io;
1410 vaddr_t start_offset; /* start offset in vm area */ 1411 vaddr_t start_offset; /* start offset in vm area */
1411 int buf_flags; 1412 int buf_flags;
1412 1413
1413 xbdi_get(xbdi); 1414 xbdi_get(xbdi);
1414 atomic_inc_uint(&xbdi->xbdi_pendingreqs); 1415 atomic_inc_uint(&xbdi->xbdi_pendingreqs);
1415  1416
1416 xbd_io = xbdi->xbdi_io = obj; 1417 xbd_io = xbdi->xbdi_io = obj;
1417 buf_init(&xbd_io->xio_buf); 1418 buf_init(&xbd_io->xio_buf);
1418 xbd_io->xio_xbdi = xbdi; 1419 xbd_io->xio_xbdi = xbdi;
1419 SLIST_INIT(&xbd_io->xio_rq); 1420 SLIST_INIT(&xbd_io->xio_rq);
1420 xbd_io->xio_nrma = 0; 1421 xbd_io->xio_nrma = 0;
1421 xbd_io->xio_mapped = 0; 1422 xbd_io->xio_mapped = 0;
1422 xbd_io->xio_operation = xbdi->xbdi_xen_req.operation; 1423 xbd_io->xio_operation = xbdi->xbdi_xen_req.operation;
1423 1424
1424 start_offset = xbdi->xbdi_this_fs * VBD_BSIZE; 1425 start_offset = xbdi->xbdi_this_fs * VBD_BSIZE;
1425 KASSERT(start_offset < PAGE_SIZE); 1426 KASSERT(start_offset < PAGE_SIZE);
1426 1427
1427 if (xbdi->xbdi_xen_req.operation == BLKIF_OP_WRITE) { 1428 if (xbdi->xbdi_xen_req.operation == BLKIF_OP_WRITE) {
1428 buf_flags = B_WRITE; 1429 buf_flags = B_WRITE;
1429 } else { 1430 } else {
1430 buf_flags = B_READ; 1431 buf_flags = B_READ;
1431 } 1432 }
1432 1433
1433 xbd_io->xio_buf.b_flags = buf_flags; 1434 xbd_io->xio_buf.b_flags = buf_flags;
1434 xbd_io->xio_buf.b_cflags = 0; 1435 xbd_io->xio_buf.b_cflags = 0;
1435 xbd_io->xio_buf.b_oflags = 0; 1436 xbd_io->xio_buf.b_oflags = 0;
1436 xbd_io->xio_buf.b_iodone = xbdback_iodone; 1437 xbd_io->xio_buf.b_iodone = xbdback_iodone;
1437 xbd_io->xio_buf.b_proc = NULL; 1438 xbd_io->xio_buf.b_proc = NULL;
1438 xbd_io->xio_buf.b_vp = xbdi->xbdi_vp; 1439 xbd_io->xio_buf.b_vp = xbdi->xbdi_vp;
1439 xbd_io->xio_buf.b_objlock = xbdi->xbdi_vp->v_interlock; 1440 xbd_io->xio_buf.b_objlock = xbdi->xbdi_vp->v_interlock;
1440 xbd_io->xio_buf.b_dev = xbdi->xbdi_dev; 1441 xbd_io->xio_buf.b_dev = xbdi->xbdi_dev;
1441 xbd_io->xio_buf.b_blkno = xbdi->xbdi_next_sector; 1442 xbd_io->xio_buf.b_blkno = xbdi->xbdi_next_sector;
1442 xbd_io->xio_buf.b_bcount = 0; 1443 xbd_io->xio_buf.b_bcount = 0;
1443 xbd_io->xio_buf.b_data = (void *)start_offset; 1444 xbd_io->xio_buf.b_data = (void *)start_offset;
1444 xbd_io->xio_buf.b_private = xbd_io; 1445 xbd_io->xio_buf.b_private = xbd_io;
1445 1446
1446 xbdi->xbdi_cont = xbdback_co_io_gotio2; 1447 xbdi->xbdi_cont = xbdback_co_io_gotio2;
1447 return xbdi; 1448 return xbdi;
1448} 1449}
1449 1450
1450/* Manage fragments */ 1451/* Manage fragments */