Tue Apr 14 13:02:40 2020 UTC ()
add forgotten mutex_exit() in detach, which caused panic in uvm_unmap1()
due to held spin lock


(jdolecek)
diff -r1.109 -r1.110 src/sys/arch/xen/xen/xbd_xenbus.c

cvs diff -r1.109 -r1.110 src/sys/arch/xen/xen/xbd_xenbus.c (switch to unified diff)

--- src/sys/arch/xen/xen/xbd_xenbus.c 2020/04/14 09:27:28 1.109
+++ src/sys/arch/xen/xen/xbd_xenbus.c 2020/04/14 13:02:40 1.110
@@ -1,1116 +1,1117 @@ @@ -1,1116 +1,1117 @@
1/* $NetBSD: xbd_xenbus.c,v 1.109 2020/04/14 09:27:28 jdolecek Exp $ */ 1/* $NetBSD: xbd_xenbus.c,v 1.110 2020/04/14 13:02:40 jdolecek Exp $ */
2 2
3/* 3/*
4 * Copyright (c) 2006 Manuel Bouyer. 4 * Copyright (c) 2006 Manuel Bouyer.
5 * 5 *
6 * Redistribution and use in source and binary forms, with or without 6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions 7 * modification, are permitted provided that the following conditions
8 * are met: 8 * are met:
9 * 1. Redistributions of source code must retain the above copyright 9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer. 10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright 11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the 12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution. 13 * documentation and/or other materials provided with the distribution.
14 * 14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 * 25 *
26 */ 26 */
27 27
28/* 28/*
29 * The file contains the xbd frontend code required for block-level 29 * The file contains the xbd frontend code required for block-level
30 * communications (similar to hard disks) between two Xen domains. 30 * communications (similar to hard disks) between two Xen domains.
31 * 31 *
32 * We are not supposed to receive solicitations spontaneously from backend. The 32 * We are not supposed to receive solicitations spontaneously from backend. The
33 * protocol is therefore fairly simple and uses only one ring to communicate 33 * protocol is therefore fairly simple and uses only one ring to communicate
34 * with backend: frontend posts requests to the ring then wait for their 34 * with backend: frontend posts requests to the ring then wait for their
35 * replies asynchronously. 35 * replies asynchronously.
36 * 36 *
37 * xbd follows NetBSD's disk(9) convention. At any time, a LWP can schedule 37 * xbd follows NetBSD's disk(9) convention. At any time, a LWP can schedule
38 * an operation request for the device (be it open(), read(), write(), ...). 38 * an operation request for the device (be it open(), read(), write(), ...).
39 * Calls are typically processed that way: 39 * Calls are typically processed that way:
40 * - initiate request: xbdread/write/open/ioctl/.. 40 * - initiate request: xbdread/write/open/ioctl/..
41 * - depending on operation, it is handled directly by disk(9) subsystem or 41 * - depending on operation, it is handled directly by disk(9) subsystem or
42 * goes through physio(9) first. 42 * goes through physio(9) first.
43 * - the request is ultimately processed by xbd_diskstart() that prepares the 43 * - the request is ultimately processed by xbd_diskstart() that prepares the
44 * xbd requests, post them in the ring I/O queue, then signal the backend. 44 * xbd requests, post them in the ring I/O queue, then signal the backend.
45 * 45 *
46 * When a response is available in the queue, the backend signals the frontend 46 * When a response is available in the queue, the backend signals the frontend
47 * via its event channel. This triggers xbd_handler(), which will link back 47 * via its event channel. This triggers xbd_handler(), which will link back
48 * the response to its request through the request ID, and mark the I/O as 48 * the response to its request through the request ID, and mark the I/O as
49 * completed. 49 * completed.
50 */ 50 */
51 51
52#include <sys/cdefs.h> 52#include <sys/cdefs.h>
53__KERNEL_RCSID(0, "$NetBSD: xbd_xenbus.c,v 1.109 2020/04/14 09:27:28 jdolecek Exp $"); 53__KERNEL_RCSID(0, "$NetBSD: xbd_xenbus.c,v 1.110 2020/04/14 13:02:40 jdolecek Exp $");
54 54
55#include "opt_xen.h" 55#include "opt_xen.h"
56 56
57 57
58#include <sys/param.h> 58#include <sys/param.h>
59#include <sys/buf.h> 59#include <sys/buf.h>
60#include <sys/bufq.h> 60#include <sys/bufq.h>
61#include <sys/device.h> 61#include <sys/device.h>
62#include <sys/disk.h> 62#include <sys/disk.h>
63#include <sys/disklabel.h> 63#include <sys/disklabel.h>
64#include <sys/conf.h> 64#include <sys/conf.h>
65#include <sys/fcntl.h> 65#include <sys/fcntl.h>
66#include <sys/kernel.h> 66#include <sys/kernel.h>
67#include <sys/proc.h> 67#include <sys/proc.h>
68#include <sys/systm.h> 68#include <sys/systm.h>
69#include <sys/stat.h> 69#include <sys/stat.h>
70#include <sys/vnode.h> 70#include <sys/vnode.h>
71#include <sys/mutex.h> 71#include <sys/mutex.h>
72 72
73#include <dev/dkvar.h> 73#include <dev/dkvar.h>
74 74
75#include <uvm/uvm.h> 75#include <uvm/uvm.h>
76 76
77#include <xen/hypervisor.h> 77#include <xen/hypervisor.h>
78#include <xen/evtchn.h> 78#include <xen/evtchn.h>
79#include <xen/granttables.h> 79#include <xen/granttables.h>
80#include <xen/include/public/io/blkif.h> 80#include <xen/include/public/io/blkif.h>
81#include <xen/include/public/io/protocols.h> 81#include <xen/include/public/io/protocols.h>
82 82
83#include <xen/xenbus.h> 83#include <xen/xenbus.h>
84#include "locators.h" 84#include "locators.h"
85 85
86#undef XBD_DEBUG 86#undef XBD_DEBUG
87#ifdef XBD_DEBUG 87#ifdef XBD_DEBUG
88#define DPRINTF(x) printf x; 88#define DPRINTF(x) printf x;
89#else 89#else
90#define DPRINTF(x) 90#define DPRINTF(x)
91#endif 91#endif
92 92
93#define GRANT_INVALID_REF -1 93#define GRANT_INVALID_REF -1
94 94
95#define XBD_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE) 95#define XBD_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
96#define XBD_MAX_XFER (PAGE_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST) 96#define XBD_MAX_XFER (PAGE_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST)
97 97
98#define XEN_BSHIFT 9 /* log2(XEN_BSIZE) */ 98#define XEN_BSHIFT 9 /* log2(XEN_BSIZE) */
99#define XEN_BSIZE (1 << XEN_BSHIFT)  99#define XEN_BSIZE (1 << XEN_BSHIFT)
100 100
101struct xbd_req { 101struct xbd_req {
102 SLIST_ENTRY(xbd_req) req_next; 102 SLIST_ENTRY(xbd_req) req_next;
103 uint16_t req_id; /* ID passed to backend */ 103 uint16_t req_id; /* ID passed to backend */
104 bus_dmamap_t req_dmamap; 104 bus_dmamap_t req_dmamap;
105 union { 105 union {
106 struct { 106 struct {
107 grant_ref_t req_gntref[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 107 grant_ref_t req_gntref[BLKIF_MAX_SEGMENTS_PER_REQUEST];
108 struct buf *req_bp; /* buffer associated with this request */ 108 struct buf *req_bp; /* buffer associated with this request */
109 void *req_data; /* pointer to the data buffer */ 109 void *req_data; /* pointer to the data buffer */
110 } req_rw; 110 } req_rw;
111 struct { 111 struct {
112 int s_error; 112 int s_error;
113 int s_done; 113 int s_done;
114 } req_sync; 114 } req_sync;
115 } u; 115 } u;
116}; 116};
117#define req_gntref u.req_rw.req_gntref 117#define req_gntref u.req_rw.req_gntref
118#define req_bp u.req_rw.req_bp 118#define req_bp u.req_rw.req_bp
119#define req_data u.req_rw.req_data 119#define req_data u.req_rw.req_data
120#define req_sync u.req_sync 120#define req_sync u.req_sync
121 121
122struct xbd_xenbus_softc { 122struct xbd_xenbus_softc {
123 struct dk_softc sc_dksc; /* Must be first in this struct */ 123 struct dk_softc sc_dksc; /* Must be first in this struct */
124 struct xenbus_device *sc_xbusd; 124 struct xenbus_device *sc_xbusd;
125 unsigned int sc_evtchn; 125 unsigned int sc_evtchn;
126 126
127 struct intrhand *sc_ih; /* Interrupt handler for this instance. */ 127 struct intrhand *sc_ih; /* Interrupt handler for this instance. */
128 kmutex_t sc_lock; 128 kmutex_t sc_lock;
129 kcondvar_t sc_cache_flush_cv; 129 kcondvar_t sc_cache_flush_cv;
130 kcondvar_t sc_req_cv; 130 kcondvar_t sc_req_cv;
131 kcondvar_t sc_detach_cv; 131 kcondvar_t sc_detach_cv;
132 kcondvar_t sc_suspend_cv; 132 kcondvar_t sc_suspend_cv;
133 133
134 blkif_front_ring_t sc_ring; 134 blkif_front_ring_t sc_ring;
135 grant_ref_t sc_ring_gntref; 135 grant_ref_t sc_ring_gntref;
136 136
137 struct xbd_req sc_reqs[XBD_RING_SIZE]; 137 struct xbd_req sc_reqs[XBD_RING_SIZE];
138 SLIST_HEAD(,xbd_req) sc_xbdreq_head; /* list of free requests */ 138 SLIST_HEAD(,xbd_req) sc_xbdreq_head; /* list of free requests */
139 139
140 int sc_backend_status; /* our status with backend */ 140 int sc_backend_status; /* our status with backend */
141#define BLKIF_STATE_DISCONNECTED 0 141#define BLKIF_STATE_DISCONNECTED 0
142#define BLKIF_STATE_CONNECTED 1 142#define BLKIF_STATE_CONNECTED 1
143#define BLKIF_STATE_SUSPENDED 2 143#define BLKIF_STATE_SUSPENDED 2
144 144
145 int sc_shutdown; 145 int sc_shutdown;
146#define BLKIF_SHUTDOWN_RUN 0 /* no shutdown */ 146#define BLKIF_SHUTDOWN_RUN 0 /* no shutdown */
147#define BLKIF_SHUTDOWN_REMOTE 1 /* backend-initiated shutdown in progress */ 147#define BLKIF_SHUTDOWN_REMOTE 1 /* backend-initiated shutdown in progress */
148#define BLKIF_SHUTDOWN_LOCAL 2 /* locally-initiated shutdown in progress */ 148#define BLKIF_SHUTDOWN_LOCAL 2 /* locally-initiated shutdown in progress */
149 149
150 uint64_t sc_sectors; /* number of sectors for this device */ 150 uint64_t sc_sectors; /* number of sectors for this device */
151 u_long sc_secsize; /* sector size */ 151 u_long sc_secsize; /* sector size */
152 uint64_t sc_xbdsize; /* size of disk in DEV_BSIZE */ 152 uint64_t sc_xbdsize; /* size of disk in DEV_BSIZE */
153 u_long sc_info; /* VDISK_* */ 153 u_long sc_info; /* VDISK_* */
154 u_long sc_handle; /* from backend */ 154 u_long sc_handle; /* from backend */
155 int sc_features; 155 int sc_features;
156#define BLKIF_FEATURE_CACHE_FLUSH 0x1 156#define BLKIF_FEATURE_CACHE_FLUSH 0x1
157#define BLKIF_FEATURE_BARRIER 0x2 157#define BLKIF_FEATURE_BARRIER 0x2
158#define BLKIF_FEATURE_PERSISTENT 0x4 158#define BLKIF_FEATURE_PERSISTENT 0x4
159#define BLKIF_FEATURE_BITS \ 159#define BLKIF_FEATURE_BITS \
160 "\20\1CACHE-FLUSH\2BARRIER\3PERSISTENT" 160 "\20\1CACHE-FLUSH\2BARRIER\3PERSISTENT"
161 struct evcnt sc_cnt_map_unalign; 161 struct evcnt sc_cnt_map_unalign;
162}; 162};
163 163
164#if 0 164#if 0
165/* too big to be on stack */ 165/* too big to be on stack */
166static multicall_entry_t rq_mcl[XBD_RING_SIZE+1]; 166static multicall_entry_t rq_mcl[XBD_RING_SIZE+1];
167static paddr_t rq_pages[XBD_RING_SIZE]; 167static paddr_t rq_pages[XBD_RING_SIZE];
168#endif 168#endif
169 169
170static int xbd_xenbus_match(device_t, cfdata_t, void *); 170static int xbd_xenbus_match(device_t, cfdata_t, void *);
171static void xbd_xenbus_attach(device_t, device_t, void *); 171static void xbd_xenbus_attach(device_t, device_t, void *);
172static int xbd_xenbus_detach(device_t, int); 172static int xbd_xenbus_detach(device_t, int);
173 173
174static bool xbd_xenbus_suspend(device_t, const pmf_qual_t *); 174static bool xbd_xenbus_suspend(device_t, const pmf_qual_t *);
175static bool xbd_xenbus_resume(device_t, const pmf_qual_t *); 175static bool xbd_xenbus_resume(device_t, const pmf_qual_t *);
176 176
177static int xbd_handler(void *); 177static int xbd_handler(void *);
178static int xbd_diskstart(device_t, struct buf *); 178static int xbd_diskstart(device_t, struct buf *);
179static void xbd_iosize(device_t, int *); 179static void xbd_iosize(device_t, int *);
180static void xbd_backend_changed(void *, XenbusState); 180static void xbd_backend_changed(void *, XenbusState);
181static void xbd_connect(struct xbd_xenbus_softc *); 181static void xbd_connect(struct xbd_xenbus_softc *);
182 182
183static int xbd_map_align(struct xbd_req *); 183static int xbd_map_align(struct xbd_req *);
184static void xbd_unmap_align(struct xbd_req *); 184static void xbd_unmap_align(struct xbd_req *);
185 185
186static void xbdminphys(struct buf *); 186static void xbdminphys(struct buf *);
187 187
188CFATTACH_DECL3_NEW(xbd, sizeof(struct xbd_xenbus_softc), 188CFATTACH_DECL3_NEW(xbd, sizeof(struct xbd_xenbus_softc),
189 xbd_xenbus_match, xbd_xenbus_attach, xbd_xenbus_detach, NULL, NULL, NULL, 189 xbd_xenbus_match, xbd_xenbus_attach, xbd_xenbus_detach, NULL, NULL, NULL,
190 DVF_DETACH_SHUTDOWN); 190 DVF_DETACH_SHUTDOWN);
191 191
192dev_type_open(xbdopen); 192dev_type_open(xbdopen);
193dev_type_close(xbdclose); 193dev_type_close(xbdclose);
194dev_type_read(xbdread); 194dev_type_read(xbdread);
195dev_type_write(xbdwrite); 195dev_type_write(xbdwrite);
196dev_type_ioctl(xbdioctl); 196dev_type_ioctl(xbdioctl);
197dev_type_strategy(xbdstrategy); 197dev_type_strategy(xbdstrategy);
198dev_type_dump(xbddump); 198dev_type_dump(xbddump);
199dev_type_size(xbdsize); 199dev_type_size(xbdsize);
200 200
201const struct bdevsw xbd_bdevsw = { 201const struct bdevsw xbd_bdevsw = {
202 .d_open = xbdopen, 202 .d_open = xbdopen,
203 .d_close = xbdclose, 203 .d_close = xbdclose,
204 .d_strategy = xbdstrategy, 204 .d_strategy = xbdstrategy,
205 .d_ioctl = xbdioctl, 205 .d_ioctl = xbdioctl,
206 .d_dump = xbddump, 206 .d_dump = xbddump,
207 .d_psize = xbdsize, 207 .d_psize = xbdsize,
208 .d_discard = nodiscard, 208 .d_discard = nodiscard,
209 .d_flag = D_DISK | D_MPSAFE 209 .d_flag = D_DISK | D_MPSAFE
210}; 210};
211 211
212const struct cdevsw xbd_cdevsw = { 212const struct cdevsw xbd_cdevsw = {
213 .d_open = xbdopen, 213 .d_open = xbdopen,
214 .d_close = xbdclose, 214 .d_close = xbdclose,
215 .d_read = xbdread, 215 .d_read = xbdread,
216 .d_write = xbdwrite, 216 .d_write = xbdwrite,
217 .d_ioctl = xbdioctl, 217 .d_ioctl = xbdioctl,
218 .d_stop = nostop, 218 .d_stop = nostop,
219 .d_tty = notty, 219 .d_tty = notty,
220 .d_poll = nopoll, 220 .d_poll = nopoll,
221 .d_mmap = nommap, 221 .d_mmap = nommap,
222 .d_kqfilter = nokqfilter, 222 .d_kqfilter = nokqfilter,
223 .d_discard = nodiscard, 223 .d_discard = nodiscard,
224 .d_flag = D_DISK | D_MPSAFE 224 .d_flag = D_DISK | D_MPSAFE
225}; 225};
226 226
227extern struct cfdriver xbd_cd; 227extern struct cfdriver xbd_cd;
228 228
229static struct dkdriver xbddkdriver = { 229static struct dkdriver xbddkdriver = {
230 .d_strategy = xbdstrategy, 230 .d_strategy = xbdstrategy,
231 .d_minphys = xbdminphys, 231 .d_minphys = xbdminphys,
232 .d_open = xbdopen, 232 .d_open = xbdopen,
233 .d_close = xbdclose, 233 .d_close = xbdclose,
234 .d_diskstart = xbd_diskstart, 234 .d_diskstart = xbd_diskstart,
235 .d_iosize = xbd_iosize, 235 .d_iosize = xbd_iosize,
236}; 236};
237 237
238static int 238static int
239xbd_xenbus_match(device_t parent, cfdata_t match, void *aux) 239xbd_xenbus_match(device_t parent, cfdata_t match, void *aux)
240{ 240{
241 struct xenbusdev_attach_args *xa = aux; 241 struct xenbusdev_attach_args *xa = aux;
242 242
243 if (strcmp(xa->xa_type, "vbd") != 0) 243 if (strcmp(xa->xa_type, "vbd") != 0)
244 return 0; 244 return 0;
245 245
246 if (match->cf_loc[XENBUSCF_ID] != XENBUSCF_ID_DEFAULT && 246 if (match->cf_loc[XENBUSCF_ID] != XENBUSCF_ID_DEFAULT &&
247 match->cf_loc[XENBUSCF_ID] != xa->xa_id) 247 match->cf_loc[XENBUSCF_ID] != xa->xa_id)
248 return 0; 248 return 0;
249 249
250 return 1; 250 return 1;
251} 251}
252 252
253static void 253static void
254xbd_xenbus_attach(device_t parent, device_t self, void *aux) 254xbd_xenbus_attach(device_t parent, device_t self, void *aux)
255{ 255{
256 struct xbd_xenbus_softc *sc = device_private(self); 256 struct xbd_xenbus_softc *sc = device_private(self);
257 struct xenbusdev_attach_args *xa = aux; 257 struct xenbusdev_attach_args *xa = aux;
258 blkif_sring_t *ring; 258 blkif_sring_t *ring;
259 RING_IDX i; 259 RING_IDX i;
260#ifdef XBD_DEBUG 260#ifdef XBD_DEBUG
261 char **dir, *val; 261 char **dir, *val;
262 int dir_n = 0; 262 int dir_n = 0;
263 char id_str[20]; 263 char id_str[20];
264 int err; 264 int err;
265#endif 265#endif
266 266
267 config_pending_incr(self); 267 config_pending_incr(self);
268 aprint_normal(": Xen Virtual Block Device Interface\n"); 268 aprint_normal(": Xen Virtual Block Device Interface\n");
269 269
270 dk_init(&sc->sc_dksc, self, DKTYPE_ESDI); 270 dk_init(&sc->sc_dksc, self, DKTYPE_ESDI);
271 disk_init(&sc->sc_dksc.sc_dkdev, device_xname(self), &xbddkdriver); 271 disk_init(&sc->sc_dksc.sc_dkdev, device_xname(self), &xbddkdriver);
272 272
273 sc->sc_xbusd = xa->xa_xbusd; 273 sc->sc_xbusd = xa->xa_xbusd;
274 sc->sc_xbusd->xbusd_otherend_changed = xbd_backend_changed; 274 sc->sc_xbusd->xbusd_otherend_changed = xbd_backend_changed;
275 275
276 mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_BIO); 276 mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_BIO);
277 cv_init(&sc->sc_cache_flush_cv, "xbdsync"); 277 cv_init(&sc->sc_cache_flush_cv, "xbdsync");
278 cv_init(&sc->sc_req_cv, "xbdreq"); 278 cv_init(&sc->sc_req_cv, "xbdreq");
279 cv_init(&sc->sc_detach_cv, "xbddetach"); 279 cv_init(&sc->sc_detach_cv, "xbddetach");
280 cv_init(&sc->sc_suspend_cv, "xbdsuspend"); 280 cv_init(&sc->sc_suspend_cv, "xbdsuspend");
281 281
282 /* initialize free requests list */ 282 /* initialize free requests list */
283 SLIST_INIT(&sc->sc_xbdreq_head); 283 SLIST_INIT(&sc->sc_xbdreq_head);
284 for (i = 0; i < XBD_RING_SIZE; i++) { 284 for (i = 0; i < XBD_RING_SIZE; i++) {
285 sc->sc_reqs[i].req_id = i; 285 sc->sc_reqs[i].req_id = i;
286 SLIST_INSERT_HEAD(&sc->sc_xbdreq_head, &sc->sc_reqs[i], 286 SLIST_INSERT_HEAD(&sc->sc_xbdreq_head, &sc->sc_reqs[i],
287 req_next); 287 req_next);
288 } 288 }
289 289
290 sc->sc_backend_status = BLKIF_STATE_DISCONNECTED; 290 sc->sc_backend_status = BLKIF_STATE_DISCONNECTED;
291 sc->sc_shutdown = BLKIF_SHUTDOWN_REMOTE; 291 sc->sc_shutdown = BLKIF_SHUTDOWN_REMOTE;
292 292
293 ring = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0, UVM_KMF_WIRED); 293 ring = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0, UVM_KMF_WIRED);
294 if (ring == NULL) 294 if (ring == NULL)
295 panic("%s: can't alloc ring", device_xname(self)); 295 panic("%s: can't alloc ring", device_xname(self));
296 sc->sc_ring.sring = ring; 296 sc->sc_ring.sring = ring;
297 297
298 evcnt_attach_dynamic(&sc->sc_cnt_map_unalign, EVCNT_TYPE_MISC, 298 evcnt_attach_dynamic(&sc->sc_cnt_map_unalign, EVCNT_TYPE_MISC,
299 NULL, device_xname(self), "map unaligned"); 299 NULL, device_xname(self), "map unaligned");
300 300
301 for (i = 0; i < XBD_RING_SIZE; i++) { 301 for (i = 0; i < XBD_RING_SIZE; i++) {
302 if (bus_dmamap_create(sc->sc_xbusd->xbusd_dmat, 302 if (bus_dmamap_create(sc->sc_xbusd->xbusd_dmat,
303 XBD_MAX_XFER, BLKIF_MAX_SEGMENTS_PER_REQUEST, 303 XBD_MAX_XFER, BLKIF_MAX_SEGMENTS_PER_REQUEST,
304 PAGE_SIZE, PAGE_SIZE, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, 304 PAGE_SIZE, PAGE_SIZE, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW,
305 &sc->sc_reqs[i].req_dmamap) != 0) { 305 &sc->sc_reqs[i].req_dmamap) != 0) {
306 aprint_error_dev(self, "can't alloc dma maps\n"); 306 aprint_error_dev(self, "can't alloc dma maps\n");
307 return; 307 return;
308 } 308 }
309 } 309 }
310 310
311 /* resume shared structures and tell backend that we are ready */ 311 /* resume shared structures and tell backend that we are ready */
312 if (xbd_xenbus_resume(self, PMF_Q_NONE) == false) { 312 if (xbd_xenbus_resume(self, PMF_Q_NONE) == false) {
313 uvm_km_free(kernel_map, (vaddr_t)ring, PAGE_SIZE, 313 uvm_km_free(kernel_map, (vaddr_t)ring, PAGE_SIZE,
314 UVM_KMF_WIRED); 314 UVM_KMF_WIRED);
315 return; 315 return;
316 } 316 }
317 317
318 if (!pmf_device_register(self, xbd_xenbus_suspend, xbd_xenbus_resume)) 318 if (!pmf_device_register(self, xbd_xenbus_suspend, xbd_xenbus_resume))
319 aprint_error_dev(self, "couldn't establish power handler\n"); 319 aprint_error_dev(self, "couldn't establish power handler\n");
320 320
321} 321}
322 322
323static int 323static int
324xbd_xenbus_detach(device_t dev, int flags) 324xbd_xenbus_detach(device_t dev, int flags)
325{ 325{
326 struct xbd_xenbus_softc *sc = device_private(dev); 326 struct xbd_xenbus_softc *sc = device_private(dev);
327 int bmaj, cmaj, i, mn, rc; 327 int bmaj, cmaj, i, mn, rc;
328 328
329 DPRINTF(("%s: xbd_detach\n", device_xname(dev))); 329 DPRINTF(("%s: xbd_detach\n", device_xname(dev)));
330 330
331 rc = disk_begindetach(&sc->sc_dksc.sc_dkdev, NULL, dev, flags); 331 rc = disk_begindetach(&sc->sc_dksc.sc_dkdev, NULL, dev, flags);
332 if (rc != 0) 332 if (rc != 0)
333 return rc; 333 return rc;
334 334
335 mutex_enter(&sc->sc_lock); 335 mutex_enter(&sc->sc_lock);
336 if (sc->sc_shutdown == BLKIF_SHUTDOWN_RUN) { 336 if (sc->sc_shutdown == BLKIF_SHUTDOWN_RUN) {
337 sc->sc_shutdown = BLKIF_SHUTDOWN_LOCAL; 337 sc->sc_shutdown = BLKIF_SHUTDOWN_LOCAL;
338 338
339 /* wait for requests to complete */ 339 /* wait for requests to complete */
340 while (sc->sc_backend_status == BLKIF_STATE_CONNECTED && 340 while (sc->sc_backend_status == BLKIF_STATE_CONNECTED &&
341 disk_isbusy(&sc->sc_dksc.sc_dkdev)) { 341 disk_isbusy(&sc->sc_dksc.sc_dkdev)) {
342 cv_timedwait(&sc->sc_detach_cv, &sc->sc_lock, hz/2); 342 cv_timedwait(&sc->sc_detach_cv, &sc->sc_lock, hz/2);
343 } 343 }
344 mutex_exit(&sc->sc_lock); 344 mutex_exit(&sc->sc_lock);
345 345
346 /* Trigger state transition with backend */ 346 /* Trigger state transition with backend */
347 xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateClosing); 347 xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateClosing);
348 348
349 mutex_enter(&sc->sc_lock); 349 mutex_enter(&sc->sc_lock);
350 } 350 }
351 if ((flags & DETACH_FORCE) == 0) { 351 if ((flags & DETACH_FORCE) == 0) {
352 /* xbd_xenbus_detach already in progress */ 352 /* xbd_xenbus_detach already in progress */
353 cv_broadcast(&sc->sc_detach_cv); 353 cv_broadcast(&sc->sc_detach_cv);
354 mutex_exit(&sc->sc_lock); 354 mutex_exit(&sc->sc_lock);
355 return EALREADY; 355 return EALREADY;
356 } 356 }
357 mutex_exit(&sc->sc_lock); 357 mutex_exit(&sc->sc_lock);
358 while (xenbus_read_driver_state(sc->sc_xbusd->xbusd_otherend) 358 while (xenbus_read_driver_state(sc->sc_xbusd->xbusd_otherend)
359 != XenbusStateClosed) { 359 != XenbusStateClosed) {
360 mutex_enter(&sc->sc_lock); 360 mutex_enter(&sc->sc_lock);
361 cv_timedwait(&sc->sc_detach_cv, &sc->sc_lock, hz/2); 361 cv_timedwait(&sc->sc_detach_cv, &sc->sc_lock, hz/2);
362 mutex_exit(&sc->sc_lock); 362 mutex_exit(&sc->sc_lock);
363 } 363 }
364 364
365 /* locate the major number */ 365 /* locate the major number */
366 bmaj = bdevsw_lookup_major(&xbd_bdevsw); 366 bmaj = bdevsw_lookup_major(&xbd_bdevsw);
367 cmaj = cdevsw_lookup_major(&xbd_cdevsw); 367 cmaj = cdevsw_lookup_major(&xbd_cdevsw);
368 368
369 /* Nuke the vnodes for any open instances. */ 369 /* Nuke the vnodes for any open instances. */
370 for (i = 0; i < MAXPARTITIONS; i++) { 370 for (i = 0; i < MAXPARTITIONS; i++) {
371 mn = DISKMINOR(device_unit(dev), i); 371 mn = DISKMINOR(device_unit(dev), i);
372 vdevgone(bmaj, mn, mn, VBLK); 372 vdevgone(bmaj, mn, mn, VBLK);
373 vdevgone(cmaj, mn, mn, VCHR); 373 vdevgone(cmaj, mn, mn, VCHR);
374 } 374 }
375 375
376 if (sc->sc_backend_status == BLKIF_STATE_CONNECTED) { 376 if (sc->sc_backend_status == BLKIF_STATE_CONNECTED) {
377 /* Delete all of our wedges. */ 377 /* Delete all of our wedges. */
378 dkwedge_delall(&sc->sc_dksc.sc_dkdev); 378 dkwedge_delall(&sc->sc_dksc.sc_dkdev);
379 379
380 /* Kill off any queued buffers. */ 380 /* Kill off any queued buffers. */
381 dk_drain(&sc->sc_dksc); 381 dk_drain(&sc->sc_dksc);
382 bufq_free(sc->sc_dksc.sc_bufq); 382 bufq_free(sc->sc_dksc.sc_bufq);
383 383
384 /* detach disk */ 384 /* detach disk */
385 disk_detach(&sc->sc_dksc.sc_dkdev); 385 disk_detach(&sc->sc_dksc.sc_dkdev);
386 disk_destroy(&sc->sc_dksc.sc_dkdev); 386 disk_destroy(&sc->sc_dksc.sc_dkdev);
387 dk_detach(&sc->sc_dksc); 387 dk_detach(&sc->sc_dksc);
388 } 388 }
389 389
390 hypervisor_mask_event(sc->sc_evtchn); 390 hypervisor_mask_event(sc->sc_evtchn);
391 xen_intr_disestablish(sc->sc_ih); 391 xen_intr_disestablish(sc->sc_ih);
392 392
393 mutex_enter(&sc->sc_lock); 393 mutex_enter(&sc->sc_lock);
394 while (xengnt_status(sc->sc_ring_gntref)) 394 while (xengnt_status(sc->sc_ring_gntref))
395 cv_timedwait(&sc->sc_detach_cv, &sc->sc_lock, hz/2); 395 cv_timedwait(&sc->sc_detach_cv, &sc->sc_lock, hz/2);
 396 mutex_exit(&sc->sc_lock);
396 397
397 xengnt_revoke_access(sc->sc_ring_gntref); 398 xengnt_revoke_access(sc->sc_ring_gntref);
398 uvm_km_free(kernel_map, (vaddr_t)sc->sc_ring.sring, 399 uvm_km_free(kernel_map, (vaddr_t)sc->sc_ring.sring,
399 PAGE_SIZE, UVM_KMF_WIRED); 400 PAGE_SIZE, UVM_KMF_WIRED);
400 401
401 for (i = 0; i < XBD_RING_SIZE; i++) { 402 for (i = 0; i < XBD_RING_SIZE; i++) {
402 if (sc->sc_reqs[i].req_dmamap != NULL) { 403 if (sc->sc_reqs[i].req_dmamap != NULL) {
403 bus_dmamap_destroy(sc->sc_xbusd->xbusd_dmat, 404 bus_dmamap_destroy(sc->sc_xbusd->xbusd_dmat,
404 sc->sc_reqs[i].req_dmamap); 405 sc->sc_reqs[i].req_dmamap);
405 sc->sc_reqs[i].req_dmamap = NULL; 406 sc->sc_reqs[i].req_dmamap = NULL;
406 } 407 }
407 } 408 }
408 409
409 mutex_destroy(&sc->sc_lock); 410 mutex_destroy(&sc->sc_lock);
410 411
411 evcnt_detach(&sc->sc_cnt_map_unalign); 412 evcnt_detach(&sc->sc_cnt_map_unalign);
412 413
413 pmf_device_deregister(dev); 414 pmf_device_deregister(dev);
414 415
415 return 0; 416 return 0;
416} 417}
417 418
418static bool 419static bool
419xbd_xenbus_suspend(device_t dev, const pmf_qual_t *qual) { 420xbd_xenbus_suspend(device_t dev, const pmf_qual_t *qual) {
420 421
421 struct xbd_xenbus_softc *sc; 422 struct xbd_xenbus_softc *sc;
422 423
423 sc = device_private(dev); 424 sc = device_private(dev);
424 425
425 mutex_enter(&sc->sc_lock); 426 mutex_enter(&sc->sc_lock);
426 /* wait for requests to complete, then suspend device */ 427 /* wait for requests to complete, then suspend device */
427 while (sc->sc_backend_status == BLKIF_STATE_CONNECTED && 428 while (sc->sc_backend_status == BLKIF_STATE_CONNECTED &&
428 disk_isbusy(&sc->sc_dksc.sc_dkdev)) { 429 disk_isbusy(&sc->sc_dksc.sc_dkdev)) {
429 cv_timedwait(&sc->sc_suspend_cv, &sc->sc_lock, hz/2); 430 cv_timedwait(&sc->sc_suspend_cv, &sc->sc_lock, hz/2);
430 } 431 }
431 432
432 hypervisor_mask_event(sc->sc_evtchn); 433 hypervisor_mask_event(sc->sc_evtchn);
433 sc->sc_backend_status = BLKIF_STATE_SUSPENDED; 434 sc->sc_backend_status = BLKIF_STATE_SUSPENDED;
434 xen_intr_disestablish(sc->sc_ih); 435 xen_intr_disestablish(sc->sc_ih);
435 436
436 mutex_exit(&sc->sc_lock); 437 mutex_exit(&sc->sc_lock);
437 438
438 xenbus_device_suspend(sc->sc_xbusd); 439 xenbus_device_suspend(sc->sc_xbusd);
439 aprint_verbose_dev(dev, "removed event channel %d\n", sc->sc_evtchn); 440 aprint_verbose_dev(dev, "removed event channel %d\n", sc->sc_evtchn);
440 441
441 return true; 442 return true;
442} 443}
443 444
444static bool 445static bool
445xbd_xenbus_resume(device_t dev, const pmf_qual_t *qual) 446xbd_xenbus_resume(device_t dev, const pmf_qual_t *qual)
446{ 447{
447 struct xbd_xenbus_softc *sc; 448 struct xbd_xenbus_softc *sc;
448 struct xenbus_transaction *xbt; 449 struct xenbus_transaction *xbt;
449 int error; 450 int error;
450 blkif_sring_t *ring; 451 blkif_sring_t *ring;
451 paddr_t ma; 452 paddr_t ma;
452 const char *errmsg; 453 const char *errmsg;
453 454
454 sc = device_private(dev); 455 sc = device_private(dev);
455 456
456 if (sc->sc_backend_status == BLKIF_STATE_SUSPENDED) { 457 if (sc->sc_backend_status == BLKIF_STATE_SUSPENDED) {
457 /* 458 /*
458 * Device was suspended, so ensure that access associated to 459 * Device was suspended, so ensure that access associated to
459 * the block I/O ring is revoked. 460 * the block I/O ring is revoked.
460 */ 461 */
461 xengnt_revoke_access(sc->sc_ring_gntref); 462 xengnt_revoke_access(sc->sc_ring_gntref);
462 } 463 }
463 sc->sc_ring_gntref = GRANT_INVALID_REF; 464 sc->sc_ring_gntref = GRANT_INVALID_REF;
464 465
465 /* Initialize ring */ 466 /* Initialize ring */
466 ring = sc->sc_ring.sring; 467 ring = sc->sc_ring.sring;
467 memset(ring, 0, PAGE_SIZE); 468 memset(ring, 0, PAGE_SIZE);
468 SHARED_RING_INIT(ring); 469 SHARED_RING_INIT(ring);
469 FRONT_RING_INIT(&sc->sc_ring, ring, PAGE_SIZE); 470 FRONT_RING_INIT(&sc->sc_ring, ring, PAGE_SIZE);
470 471
471 /* 472 /*
472 * get MA address of the ring, and use it to set up the grant entry 473 * get MA address of the ring, and use it to set up the grant entry
473 * for the block device 474 * for the block device
474 */ 475 */
475 (void)pmap_extract_ma(pmap_kernel(), (vaddr_t)ring, &ma); 476 (void)pmap_extract_ma(pmap_kernel(), (vaddr_t)ring, &ma);
476 error = xenbus_grant_ring(sc->sc_xbusd, ma, &sc->sc_ring_gntref); 477 error = xenbus_grant_ring(sc->sc_xbusd, ma, &sc->sc_ring_gntref);
477 if (error) 478 if (error)
478 goto abort_resume; 479 goto abort_resume;
479 480
480 error = xenbus_alloc_evtchn(sc->sc_xbusd, &sc->sc_evtchn); 481 error = xenbus_alloc_evtchn(sc->sc_xbusd, &sc->sc_evtchn);
481 if (error) 482 if (error)
482 goto abort_resume; 483 goto abort_resume;
483 484
484 aprint_verbose_dev(dev, "using event channel %d\n", 485 aprint_verbose_dev(dev, "using event channel %d\n",
485 sc->sc_evtchn); 486 sc->sc_evtchn);
486 sc->sc_ih = xen_intr_establish_xname(-1, &xen_pic, sc->sc_evtchn, 487 sc->sc_ih = xen_intr_establish_xname(-1, &xen_pic, sc->sc_evtchn,
487 IST_LEVEL, IPL_BIO, &xbd_handler, sc, true, device_xname(dev)); 488 IST_LEVEL, IPL_BIO, &xbd_handler, sc, true, device_xname(dev));
488 KASSERT(sc->sc_ih != NULL); 489 KASSERT(sc->sc_ih != NULL);
489 490
490again: 491again:
491 xbt = xenbus_transaction_start(); 492 xbt = xenbus_transaction_start();
492 if (xbt == NULL) 493 if (xbt == NULL)
493 return false; 494 return false;
494 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path, 495 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
495 "ring-ref","%u", sc->sc_ring_gntref); 496 "ring-ref","%u", sc->sc_ring_gntref);
496 if (error) { 497 if (error) {
497 errmsg = "writing ring-ref"; 498 errmsg = "writing ring-ref";
498 goto abort_transaction; 499 goto abort_transaction;
499 } 500 }
500 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path, 501 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
501 "event-channel", "%u", sc->sc_evtchn); 502 "event-channel", "%u", sc->sc_evtchn);
502 if (error) { 503 if (error) {
503 errmsg = "writing event channel"; 504 errmsg = "writing event channel";
504 goto abort_transaction; 505 goto abort_transaction;
505 } 506 }
506 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path, 507 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
507 "protocol", "%s", XEN_IO_PROTO_ABI_NATIVE); 508 "protocol", "%s", XEN_IO_PROTO_ABI_NATIVE);
508 if (error) { 509 if (error) {
509 errmsg = "writing protocol"; 510 errmsg = "writing protocol";
510 goto abort_transaction; 511 goto abort_transaction;
511 } 512 }
512 error = xenbus_transaction_end(xbt, 0); 513 error = xenbus_transaction_end(xbt, 0);
513 if (error == EAGAIN) 514 if (error == EAGAIN)
514 goto again; 515 goto again;
515 if (error != 0) { 516 if (error != 0) {
516 xenbus_dev_fatal(sc->sc_xbusd, error, 517 xenbus_dev_fatal(sc->sc_xbusd, error,
517 "completing transaction"); 518 "completing transaction");
518 return false; 519 return false;
519 } 520 }
520 521
521 xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateInitialised); 522 xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateInitialised);
522 523
523 if (sc->sc_backend_status == BLKIF_STATE_SUSPENDED) { 524 if (sc->sc_backend_status == BLKIF_STATE_SUSPENDED) {
524 /* 525 /*
525 * device was suspended, softc structures are 526 * device was suspended, softc structures are
526 * already initialized - we use a shortcut 527 * already initialized - we use a shortcut
527 */ 528 */
528 sc->sc_backend_status = BLKIF_STATE_CONNECTED; 529 sc->sc_backend_status = BLKIF_STATE_CONNECTED;
529 xenbus_device_resume(sc->sc_xbusd); 530 xenbus_device_resume(sc->sc_xbusd);
530 hypervisor_unmask_event(sc->sc_evtchn); 531 hypervisor_unmask_event(sc->sc_evtchn);
531 xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateConnected); 532 xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateConnected);
532 } 533 }
533 534
534 return true; 535 return true;
535 536
536abort_resume: 537abort_resume:
537 xenbus_dev_fatal(sc->sc_xbusd, error, "resuming device"); 538 xenbus_dev_fatal(sc->sc_xbusd, error, "resuming device");
538 return false; 539 return false;
539 540
540abort_transaction: 541abort_transaction:
541 xenbus_transaction_end(xbt, 1); 542 xenbus_transaction_end(xbt, 1);
542 xenbus_dev_fatal(sc->sc_xbusd, error, "%s", errmsg); 543 xenbus_dev_fatal(sc->sc_xbusd, error, "%s", errmsg);
543 return false; 544 return false;
544} 545}
545 546
546static void 547static void
547xbd_backend_changed(void *arg, XenbusState new_state) 548xbd_backend_changed(void *arg, XenbusState new_state)
548{ 549{
549 struct xbd_xenbus_softc *sc = device_private((device_t)arg); 550 struct xbd_xenbus_softc *sc = device_private((device_t)arg);
550 struct disk_geom *dg; 551 struct disk_geom *dg;
551 552
552 char buf[32]; 553 char buf[32];
553 DPRINTF(("%s: new backend state %d\n", 554 DPRINTF(("%s: new backend state %d\n",
554 device_xname(sc->sc_dksc.sc_dev), new_state)); 555 device_xname(sc->sc_dksc.sc_dev), new_state));
555 556
556 switch (new_state) { 557 switch (new_state) {
557 case XenbusStateUnknown: 558 case XenbusStateUnknown:
558 case XenbusStateInitialising: 559 case XenbusStateInitialising:
559 case XenbusStateInitWait: 560 case XenbusStateInitWait:
560 case XenbusStateInitialised: 561 case XenbusStateInitialised:
561 break; 562 break;
562 case XenbusStateClosing: 563 case XenbusStateClosing:
563 mutex_enter(&sc->sc_lock); 564 mutex_enter(&sc->sc_lock);
564 if (sc->sc_shutdown == BLKIF_SHUTDOWN_RUN) 565 if (sc->sc_shutdown == BLKIF_SHUTDOWN_RUN)
565 sc->sc_shutdown = BLKIF_SHUTDOWN_REMOTE; 566 sc->sc_shutdown = BLKIF_SHUTDOWN_REMOTE;
566 /* wait for requests to complete */ 567 /* wait for requests to complete */
567 while (sc->sc_backend_status == BLKIF_STATE_CONNECTED && 568 while (sc->sc_backend_status == BLKIF_STATE_CONNECTED &&
568 disk_isbusy(&sc->sc_dksc.sc_dkdev)) { 569 disk_isbusy(&sc->sc_dksc.sc_dkdev)) {
569 cv_timedwait(&sc->sc_detach_cv, &sc->sc_lock, hz/2); 570 cv_timedwait(&sc->sc_detach_cv, &sc->sc_lock, hz/2);
570 } 571 }
571 mutex_exit(&sc->sc_lock); 572 mutex_exit(&sc->sc_lock);
572 xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateClosed); 573 xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateClosed);
573 break; 574 break;
574 case XenbusStateConnected: 575 case XenbusStateConnected:
575 /* 576 /*
576 * note that xbd_backend_changed() can only be called by 577 * note that xbd_backend_changed() can only be called by
577 * the xenbus thread. 578 * the xenbus thread.
578 */ 579 */
579 580
580 if (sc->sc_backend_status == BLKIF_STATE_CONNECTED || 581 if (sc->sc_backend_status == BLKIF_STATE_CONNECTED ||
581 sc->sc_backend_status == BLKIF_STATE_SUSPENDED) 582 sc->sc_backend_status == BLKIF_STATE_SUSPENDED)
582 /* already connected */ 583 /* already connected */
583 return; 584 return;
584 585
585 xbd_connect(sc); 586 xbd_connect(sc);
586 sc->sc_shutdown = BLKIF_SHUTDOWN_RUN; 587 sc->sc_shutdown = BLKIF_SHUTDOWN_RUN;
587 sc->sc_xbdsize = 588 sc->sc_xbdsize =
588 sc->sc_sectors * (uint64_t)sc->sc_secsize / DEV_BSIZE; 589 sc->sc_sectors * (uint64_t)sc->sc_secsize / DEV_BSIZE;
589 dg = &sc->sc_dksc.sc_dkdev.dk_geom; 590 dg = &sc->sc_dksc.sc_dkdev.dk_geom;
590 memset(dg, 0, sizeof(*dg));  591 memset(dg, 0, sizeof(*dg));
591 592
592 dg->dg_secperunit = sc->sc_xbdsize; 593 dg->dg_secperunit = sc->sc_xbdsize;
593 dg->dg_secsize = DEV_BSIZE; 594 dg->dg_secsize = DEV_BSIZE;
594 dg->dg_ntracks = 1; 595 dg->dg_ntracks = 1;
595 // XXX: Ok to hard-code DEV_BSIZE? 596 // XXX: Ok to hard-code DEV_BSIZE?
596 dg->dg_nsectors = 1024 * (1024 / dg->dg_secsize); 597 dg->dg_nsectors = 1024 * (1024 / dg->dg_secsize);
597 dg->dg_ncylinders = dg->dg_secperunit / dg->dg_nsectors; 598 dg->dg_ncylinders = dg->dg_secperunit / dg->dg_nsectors;
598 599
599 bufq_alloc(&sc->sc_dksc.sc_bufq, "fcfs", 0); 600 bufq_alloc(&sc->sc_dksc.sc_bufq, "fcfs", 0);
600 dk_attach(&sc->sc_dksc); 601 dk_attach(&sc->sc_dksc);
601 disk_attach(&sc->sc_dksc.sc_dkdev); 602 disk_attach(&sc->sc_dksc.sc_dkdev);
602 603
603 sc->sc_backend_status = BLKIF_STATE_CONNECTED; 604 sc->sc_backend_status = BLKIF_STATE_CONNECTED;
604 hypervisor_unmask_event(sc->sc_evtchn); 605 hypervisor_unmask_event(sc->sc_evtchn);
605 606
606 format_bytes(buf, sizeof(buf), sc->sc_sectors * sc->sc_secsize); 607 format_bytes(buf, sizeof(buf), sc->sc_sectors * sc->sc_secsize);
607 aprint_normal_dev(sc->sc_dksc.sc_dev, 608 aprint_normal_dev(sc->sc_dksc.sc_dev,
608 "%s, %d bytes/sect x %" PRIu64 " sectors\n", 609 "%s, %d bytes/sect x %" PRIu64 " sectors\n",
609 buf, (int)dg->dg_secsize, sc->sc_xbdsize); 610 buf, (int)dg->dg_secsize, sc->sc_xbdsize);
610 snprintb(buf, sizeof(buf), BLKIF_FEATURE_BITS, 611 snprintb(buf, sizeof(buf), BLKIF_FEATURE_BITS,
611 sc->sc_features); 612 sc->sc_features);
612 aprint_normal_dev(sc->sc_dksc.sc_dev, 613 aprint_normal_dev(sc->sc_dksc.sc_dev,
613 "backend features %s\n", buf); 614 "backend features %s\n", buf);
614 615
615 /* Discover wedges on this disk. */ 616 /* Discover wedges on this disk. */
616 dkwedge_discover(&sc->sc_dksc.sc_dkdev); 617 dkwedge_discover(&sc->sc_dksc.sc_dkdev);
617 618
618 disk_set_info(sc->sc_dksc.sc_dev, &sc->sc_dksc.sc_dkdev, NULL); 619 disk_set_info(sc->sc_dksc.sc_dev, &sc->sc_dksc.sc_dkdev, NULL);
619 620
620 /* the disk should be working now */ 621 /* the disk should be working now */
621 config_pending_decr(sc->sc_dksc.sc_dev); 622 config_pending_decr(sc->sc_dksc.sc_dev);
622 break; 623 break;
623 default: 624 default:
624 panic("bad backend state %d", new_state); 625 panic("bad backend state %d", new_state);
625 } 626 }
626} 627}
627 628
628static void 629static void
629xbd_connect(struct xbd_xenbus_softc *sc) 630xbd_connect(struct xbd_xenbus_softc *sc)
630{ 631{
631 int err; 632 int err;
632 unsigned long long sectors; 633 unsigned long long sectors;
633 u_long val; 634 u_long val;
634 635
635 err = xenbus_read_ul(NULL, 636 err = xenbus_read_ul(NULL,
636 sc->sc_xbusd->xbusd_path, "virtual-device", &sc->sc_handle, 10); 637 sc->sc_xbusd->xbusd_path, "virtual-device", &sc->sc_handle, 10);
637 if (err) 638 if (err)
638 panic("%s: can't read number from %s/virtual-device\n",  639 panic("%s: can't read number from %s/virtual-device\n",
639 device_xname(sc->sc_dksc.sc_dev), 640 device_xname(sc->sc_dksc.sc_dev),
640 sc->sc_xbusd->xbusd_otherend); 641 sc->sc_xbusd->xbusd_otherend);
641 err = xenbus_read_ull(NULL, 642 err = xenbus_read_ull(NULL,
642 sc->sc_xbusd->xbusd_otherend, "sectors", &sectors, 10); 643 sc->sc_xbusd->xbusd_otherend, "sectors", &sectors, 10);
643 if (err) 644 if (err)
644 panic("%s: can't read number from %s/sectors\n",  645 panic("%s: can't read number from %s/sectors\n",
645 device_xname(sc->sc_dksc.sc_dev), 646 device_xname(sc->sc_dksc.sc_dev),
646 sc->sc_xbusd->xbusd_otherend); 647 sc->sc_xbusd->xbusd_otherend);
647 sc->sc_sectors = sectors; 648 sc->sc_sectors = sectors;
648 649
649 err = xenbus_read_ul(NULL, 650 err = xenbus_read_ul(NULL,
650 sc->sc_xbusd->xbusd_otherend, "info", &sc->sc_info, 10); 651 sc->sc_xbusd->xbusd_otherend, "info", &sc->sc_info, 10);
651 if (err) 652 if (err)
652 panic("%s: can't read number from %s/info\n",  653 panic("%s: can't read number from %s/info\n",
653 device_xname(sc->sc_dksc.sc_dev), 654 device_xname(sc->sc_dksc.sc_dev),
654 sc->sc_xbusd->xbusd_otherend); 655 sc->sc_xbusd->xbusd_otherend);
655 err = xenbus_read_ul(NULL, 656 err = xenbus_read_ul(NULL,
656 sc->sc_xbusd->xbusd_otherend, "sector-size", &sc->sc_secsize, 10); 657 sc->sc_xbusd->xbusd_otherend, "sector-size", &sc->sc_secsize, 10);
657 if (err) 658 if (err)
658 panic("%s: can't read number from %s/sector-size\n",  659 panic("%s: can't read number from %s/sector-size\n",
659 device_xname(sc->sc_dksc.sc_dev), 660 device_xname(sc->sc_dksc.sc_dev),
660 sc->sc_xbusd->xbusd_otherend); 661 sc->sc_xbusd->xbusd_otherend);
661 662
662 err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend, 663 err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend,
663 "feature-flush-cache", &val, 10); 664 "feature-flush-cache", &val, 10);
664 if (err) 665 if (err)
665 val = 0; 666 val = 0;
666 if (val > 0) 667 if (val > 0)
667 sc->sc_features |= BLKIF_FEATURE_CACHE_FLUSH; 668 sc->sc_features |= BLKIF_FEATURE_CACHE_FLUSH;
668 669
669 err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend, 670 err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend,
670 "feature-barrier", &val, 10); 671 "feature-barrier", &val, 10);
671 if (err) 672 if (err)
672 val = 0; 673 val = 0;
673 if (val > 0) 674 if (val > 0)
674 sc->sc_features |= BLKIF_FEATURE_BARRIER; 675 sc->sc_features |= BLKIF_FEATURE_BARRIER;
675 676
676 err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend, 677 err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend,
677 "feature-persistent", &val, 10); 678 "feature-persistent", &val, 10);
678 if (err) 679 if (err)
679 val = 0; 680 val = 0;
680 if (val > 0) 681 if (val > 0)
681 sc->sc_features |= BLKIF_FEATURE_PERSISTENT; 682 sc->sc_features |= BLKIF_FEATURE_PERSISTENT;
682 683
683 xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateConnected); 684 xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateConnected);
684} 685}
685 686
686static int 687static int
687xbd_handler(void *arg) 688xbd_handler(void *arg)
688{ 689{
689 struct xbd_xenbus_softc *sc = arg; 690 struct xbd_xenbus_softc *sc = arg;
690 struct buf *bp; 691 struct buf *bp;
691 RING_IDX resp_prod, i; 692 RING_IDX resp_prod, i;
692 int more_to_do; 693 int more_to_do;
693 int seg; 694 int seg;
694 695
695 DPRINTF(("xbd_handler(%s)\n", device_xname(sc->sc_dksc.sc_dev))); 696 DPRINTF(("xbd_handler(%s)\n", device_xname(sc->sc_dksc.sc_dev)));
696 697
697 if (__predict_false(sc->sc_backend_status != BLKIF_STATE_CONNECTED)) 698 if (__predict_false(sc->sc_backend_status != BLKIF_STATE_CONNECTED))
698 return 0; 699 return 0;
699 700
700 mutex_enter(&sc->sc_lock); 701 mutex_enter(&sc->sc_lock);
701again: 702again:
702 resp_prod = sc->sc_ring.sring->rsp_prod; 703 resp_prod = sc->sc_ring.sring->rsp_prod;
703 xen_rmb(); /* ensure we see replies up to resp_prod */ 704 xen_rmb(); /* ensure we see replies up to resp_prod */
704 for (i = sc->sc_ring.rsp_cons; i != resp_prod; i++) { 705 for (i = sc->sc_ring.rsp_cons; i != resp_prod; i++) {
705 blkif_response_t *rep = RING_GET_RESPONSE(&sc->sc_ring, i); 706 blkif_response_t *rep = RING_GET_RESPONSE(&sc->sc_ring, i);
706 struct xbd_req *xbdreq = &sc->sc_reqs[rep->id]; 707 struct xbd_req *xbdreq = &sc->sc_reqs[rep->id];
707 708
708 if (rep->operation == BLKIF_OP_FLUSH_DISKCACHE) { 709 if (rep->operation == BLKIF_OP_FLUSH_DISKCACHE) {
709 KASSERT(xbdreq->req_bp == NULL); 710 KASSERT(xbdreq->req_bp == NULL);
710 xbdreq->req_sync.s_error = rep->status; 711 xbdreq->req_sync.s_error = rep->status;
711 xbdreq->req_sync.s_done = 1; 712 xbdreq->req_sync.s_done = 1;
712 cv_broadcast(&sc->sc_cache_flush_cv); 713 cv_broadcast(&sc->sc_cache_flush_cv);
713 /* caller will free the req */ 714 /* caller will free the req */
714 continue; 715 continue;
715 } 716 }
716 717
717 if (rep->operation != BLKIF_OP_READ && 718 if (rep->operation != BLKIF_OP_READ &&
718 rep->operation != BLKIF_OP_WRITE) { 719 rep->operation != BLKIF_OP_WRITE) {
719 aprint_error_dev(sc->sc_dksc.sc_dev, 720 aprint_error_dev(sc->sc_dksc.sc_dev,
720 "bad operation %d from backend\n", rep->operation); 721 "bad operation %d from backend\n", rep->operation);
721 continue; 722 continue;
722 } 723 }
723 724
724 for (seg = 0; seg < xbdreq->req_dmamap->dm_nsegs; seg++) { 725 for (seg = 0; seg < xbdreq->req_dmamap->dm_nsegs; seg++) {
725 /* 726 /*
726 * We are not allowing persistent mappings, so 727 * We are not allowing persistent mappings, so
727 * expect the backend to release the grant 728 * expect the backend to release the grant
728 * immediately. 729 * immediately.
729 */ 730 */
730 KASSERT(xengnt_status(xbdreq->req_gntref[seg]) == 0); 731 KASSERT(xengnt_status(xbdreq->req_gntref[seg]) == 0);
731 xengnt_revoke_access(xbdreq->req_gntref[seg]); 732 xengnt_revoke_access(xbdreq->req_gntref[seg]);
732 } 733 }
733 734
734 bus_dmamap_unload(sc->sc_xbusd->xbusd_dmat, xbdreq->req_dmamap); 735 bus_dmamap_unload(sc->sc_xbusd->xbusd_dmat, xbdreq->req_dmamap);
735 736
736 bp = xbdreq->req_bp; 737 bp = xbdreq->req_bp;
737 KASSERT(bp != NULL && bp->b_data != NULL); 738 KASSERT(bp != NULL && bp->b_data != NULL);
738 DPRINTF(("%s(%p): b_bcount = %ld\n", __func__, 739 DPRINTF(("%s(%p): b_bcount = %ld\n", __func__,
739 bp, (long)bp->b_bcount)); 740 bp, (long)bp->b_bcount));
740 741
741 if (__predict_false(bp->b_data != xbdreq->req_data)) 742 if (__predict_false(bp->b_data != xbdreq->req_data))
742 xbd_unmap_align(xbdreq); 743 xbd_unmap_align(xbdreq);
743 xbdreq->req_bp = xbdreq->req_data = NULL; 744 xbdreq->req_bp = xbdreq->req_data = NULL;
744 745
745 /* b_resid was set in dk_start, only override on error */ 746 /* b_resid was set in dk_start, only override on error */
746 if (rep->status != BLKIF_RSP_OKAY) { 747 if (rep->status != BLKIF_RSP_OKAY) {
747 bp->b_error = EIO; 748 bp->b_error = EIO;
748 bp->b_resid = bp->b_bcount; 749 bp->b_resid = bp->b_bcount;
749 } 750 }
750 751
751 dk_done(&sc->sc_dksc, bp); 752 dk_done(&sc->sc_dksc, bp);
752 753
753 SLIST_INSERT_HEAD(&sc->sc_xbdreq_head, xbdreq, req_next); 754 SLIST_INSERT_HEAD(&sc->sc_xbdreq_head, xbdreq, req_next);
754 } 755 }
755 756
756 xen_rmb(); 757 xen_rmb();
757 sc->sc_ring.rsp_cons = i; 758 sc->sc_ring.rsp_cons = i;
758 759
759 RING_FINAL_CHECK_FOR_RESPONSES(&sc->sc_ring, more_to_do); 760 RING_FINAL_CHECK_FOR_RESPONSES(&sc->sc_ring, more_to_do);
760 if (more_to_do) 761 if (more_to_do)
761 goto again; 762 goto again;
762 763
763 cv_signal(&sc->sc_req_cv); 764 cv_signal(&sc->sc_req_cv);
764 mutex_exit(&sc->sc_lock); 765 mutex_exit(&sc->sc_lock);
765 766
766 dk_start(&sc->sc_dksc, NULL); 767 dk_start(&sc->sc_dksc, NULL);
767 768
768 return 1; 769 return 1;
769} 770}
770 771
771static void 772static void
772xbdminphys(struct buf *bp) 773xbdminphys(struct buf *bp)
773{ 774{
774 if (bp->b_bcount > XBD_MAX_XFER) { 775 if (bp->b_bcount > XBD_MAX_XFER) {
775 bp->b_bcount = XBD_MAX_XFER; 776 bp->b_bcount = XBD_MAX_XFER;
776 } 777 }
777 minphys(bp); 778 minphys(bp);
778} 779}
779 780
780static void 781static void
781xbd_iosize(device_t dev, int *maxxfer) 782xbd_iosize(device_t dev, int *maxxfer)
782{ 783{
783 /* 784 /*
784 * Always restrict dumps to XBD_MAX_XFER to avoid indirect segments, 785 * Always restrict dumps to XBD_MAX_XFER to avoid indirect segments,
785 * so that it uses as little memory as possible.  786 * so that it uses as little memory as possible.
786 */ 787 */
787 if (*maxxfer > XBD_MAX_XFER) 788 if (*maxxfer > XBD_MAX_XFER)
788 *maxxfer = XBD_MAX_XFER; 789 *maxxfer = XBD_MAX_XFER;
789} 790}
790 791
791int 792int
792xbdopen(dev_t dev, int flags, int fmt, struct lwp *l) 793xbdopen(dev_t dev, int flags, int fmt, struct lwp *l)
793{ 794{
794 struct xbd_xenbus_softc *sc; 795 struct xbd_xenbus_softc *sc;
795 796
796 sc = device_lookup_private(&xbd_cd, DISKUNIT(dev)); 797 sc = device_lookup_private(&xbd_cd, DISKUNIT(dev));
797 if (sc == NULL) 798 if (sc == NULL)
798 return (ENXIO); 799 return (ENXIO);
799 if ((flags & FWRITE) && (sc->sc_info & VDISK_READONLY)) 800 if ((flags & FWRITE) && (sc->sc_info & VDISK_READONLY))
800 return EROFS; 801 return EROFS;
801 802
802 DPRINTF(("xbdopen(0x%04x, %d)\n", dev, flags)); 803 DPRINTF(("xbdopen(0x%04x, %d)\n", dev, flags));
803 return dk_open(&sc->sc_dksc, dev, flags, fmt, l); 804 return dk_open(&sc->sc_dksc, dev, flags, fmt, l);
804} 805}
805 806
806int 807int
807xbdclose(dev_t dev, int flags, int fmt, struct lwp *l) 808xbdclose(dev_t dev, int flags, int fmt, struct lwp *l)
808{ 809{
809 struct xbd_xenbus_softc *sc; 810 struct xbd_xenbus_softc *sc;
810 811
811 sc = device_lookup_private(&xbd_cd, DISKUNIT(dev)); 812 sc = device_lookup_private(&xbd_cd, DISKUNIT(dev));
812 813
813 DPRINTF(("xbdclose(%d, %d)\n", dev, flags)); 814 DPRINTF(("xbdclose(%d, %d)\n", dev, flags));
814 return dk_close(&sc->sc_dksc, dev, flags, fmt, l); 815 return dk_close(&sc->sc_dksc, dev, flags, fmt, l);
815} 816}
816 817
817void 818void
818xbdstrategy(struct buf *bp) 819xbdstrategy(struct buf *bp)
819{ 820{
820 struct xbd_xenbus_softc *sc; 821 struct xbd_xenbus_softc *sc;
821 822
822 sc = device_lookup_private(&xbd_cd, DISKUNIT(bp->b_dev)); 823 sc = device_lookup_private(&xbd_cd, DISKUNIT(bp->b_dev));
823 824
824 DPRINTF(("xbdstrategy(%p): b_bcount = %ld\n", bp, 825 DPRINTF(("xbdstrategy(%p): b_bcount = %ld\n", bp,
825 (long)bp->b_bcount)); 826 (long)bp->b_bcount));
826 827
827 if (sc == NULL || sc->sc_shutdown != BLKIF_SHUTDOWN_RUN) { 828 if (sc == NULL || sc->sc_shutdown != BLKIF_SHUTDOWN_RUN) {
828 bp->b_error = EIO; 829 bp->b_error = EIO;
829 biodone(bp); 830 biodone(bp);
830 return; 831 return;
831 } 832 }
832 if (__predict_false((sc->sc_info & VDISK_READONLY) && 833 if (__predict_false((sc->sc_info & VDISK_READONLY) &&
833 (bp->b_flags & B_READ) == 0)) { 834 (bp->b_flags & B_READ) == 0)) {
834 bp->b_error = EROFS; 835 bp->b_error = EROFS;
835 biodone(bp); 836 biodone(bp);
836 return; 837 return;
837 } 838 }
838 839
839 dk_strategy(&sc->sc_dksc, bp); 840 dk_strategy(&sc->sc_dksc, bp);
840 return; 841 return;
841} 842}
842 843
843int 844int
844xbdsize(dev_t dev) 845xbdsize(dev_t dev)
845{ 846{
846 struct xbd_xenbus_softc *sc; 847 struct xbd_xenbus_softc *sc;
847 848
848 DPRINTF(("xbdsize(%d)\n", dev)); 849 DPRINTF(("xbdsize(%d)\n", dev));
849 850
850 sc = device_lookup_private(&xbd_cd, DISKUNIT(dev)); 851 sc = device_lookup_private(&xbd_cd, DISKUNIT(dev));
851 if (sc == NULL || sc->sc_shutdown != BLKIF_SHUTDOWN_RUN) 852 if (sc == NULL || sc->sc_shutdown != BLKIF_SHUTDOWN_RUN)
852 return -1; 853 return -1;
853 return dk_size(&sc->sc_dksc, dev); 854 return dk_size(&sc->sc_dksc, dev);
854} 855}
855 856
856int 857int
857xbdread(dev_t dev, struct uio *uio, int flags) 858xbdread(dev_t dev, struct uio *uio, int flags)
858{ 859{
859 struct xbd_xenbus_softc *sc =  860 struct xbd_xenbus_softc *sc =
860 device_lookup_private(&xbd_cd, DISKUNIT(dev)); 861 device_lookup_private(&xbd_cd, DISKUNIT(dev));
861 struct dk_softc *dksc = &sc->sc_dksc; 862 struct dk_softc *dksc = &sc->sc_dksc;
862 863
863 if (!DK_ATTACHED(dksc)) 864 if (!DK_ATTACHED(dksc))
864 return ENXIO; 865 return ENXIO;
865 return physio(xbdstrategy, NULL, dev, B_READ, xbdminphys, uio); 866 return physio(xbdstrategy, NULL, dev, B_READ, xbdminphys, uio);
866} 867}
867 868
868int 869int
869xbdwrite(dev_t dev, struct uio *uio, int flags) 870xbdwrite(dev_t dev, struct uio *uio, int flags)
870{ 871{
871 struct xbd_xenbus_softc *sc = 872 struct xbd_xenbus_softc *sc =
872 device_lookup_private(&xbd_cd, DISKUNIT(dev)); 873 device_lookup_private(&xbd_cd, DISKUNIT(dev));
873 struct dk_softc *dksc = &sc->sc_dksc; 874 struct dk_softc *dksc = &sc->sc_dksc;
874 875
875 if (!DK_ATTACHED(dksc)) 876 if (!DK_ATTACHED(dksc))
876 return ENXIO; 877 return ENXIO;
877 if (__predict_false(sc->sc_info & VDISK_READONLY)) 878 if (__predict_false(sc->sc_info & VDISK_READONLY))
878 return EROFS; 879 return EROFS;
879 return physio(xbdstrategy, NULL, dev, B_WRITE, xbdminphys, uio); 880 return physio(xbdstrategy, NULL, dev, B_WRITE, xbdminphys, uio);
880} 881}
881 882
882int 883int
883xbdioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 884xbdioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
884{ 885{
885 struct xbd_xenbus_softc *sc = 886 struct xbd_xenbus_softc *sc =
886 device_lookup_private(&xbd_cd, DISKUNIT(dev)); 887 device_lookup_private(&xbd_cd, DISKUNIT(dev));
887 struct dk_softc *dksc; 888 struct dk_softc *dksc;
888 int error; 889 int error;
889 struct xbd_req *xbdreq; 890 struct xbd_req *xbdreq;
890 blkif_request_t *req; 891 blkif_request_t *req;
891 int notify; 892 int notify;
892 893
893 DPRINTF(("xbdioctl(%d, %08lx, %p, %d, %p)\n", 894 DPRINTF(("xbdioctl(%d, %08lx, %p, %d, %p)\n",
894 dev, cmd, data, flag, l)); 895 dev, cmd, data, flag, l));
895 dksc = &sc->sc_dksc; 896 dksc = &sc->sc_dksc;
896 897
897 switch (cmd) { 898 switch (cmd) {
898 case DIOCGCACHE: 899 case DIOCGCACHE:
899 { 900 {
900 /* Assume there is write cache if cache-flush is supported */ 901 /* Assume there is write cache if cache-flush is supported */
901 int *bitsp = (int *)data; 902 int *bitsp = (int *)data;
902 *bitsp = 0; 903 *bitsp = 0;
903 if (sc->sc_features & BLKIF_FEATURE_CACHE_FLUSH) 904 if (sc->sc_features & BLKIF_FEATURE_CACHE_FLUSH)
904 *bitsp |= DKCACHE_WRITE; 905 *bitsp |= DKCACHE_WRITE;
905 error = 0; 906 error = 0;
906 break; 907 break;
907 } 908 }
908 case DIOCCACHESYNC: 909 case DIOCCACHESYNC:
909 if ((sc->sc_features & BLKIF_FEATURE_CACHE_FLUSH) == 0) 910 if ((sc->sc_features & BLKIF_FEATURE_CACHE_FLUSH) == 0)
910 return EOPNOTSUPP; 911 return EOPNOTSUPP;
911 912
912 mutex_enter(&sc->sc_lock); 913 mutex_enter(&sc->sc_lock);
913 while ((xbdreq = SLIST_FIRST(&sc->sc_xbdreq_head)) == NULL) 914 while ((xbdreq = SLIST_FIRST(&sc->sc_xbdreq_head)) == NULL)
914 cv_wait(&sc->sc_req_cv, &sc->sc_lock); 915 cv_wait(&sc->sc_req_cv, &sc->sc_lock);
915 916
916 SLIST_REMOVE_HEAD(&sc->sc_xbdreq_head, req_next); 917 SLIST_REMOVE_HEAD(&sc->sc_xbdreq_head, req_next);
917 req = RING_GET_REQUEST(&sc->sc_ring, 918 req = RING_GET_REQUEST(&sc->sc_ring,
918 sc->sc_ring.req_prod_pvt); 919 sc->sc_ring.req_prod_pvt);
919 req->id = xbdreq->req_id; 920 req->id = xbdreq->req_id;
920 req->operation = BLKIF_OP_FLUSH_DISKCACHE; 921 req->operation = BLKIF_OP_FLUSH_DISKCACHE;
921 req->handle = sc->sc_handle; 922 req->handle = sc->sc_handle;
922 xbdreq->req_sync.s_done = 0; 923 xbdreq->req_sync.s_done = 0;
923 sc->sc_ring.req_prod_pvt++; 924 sc->sc_ring.req_prod_pvt++;
924 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_ring, notify); 925 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_ring, notify);
925 if (notify) 926 if (notify)
926 hypervisor_notify_via_evtchn(sc->sc_evtchn); 927 hypervisor_notify_via_evtchn(sc->sc_evtchn);
927 /* request sent, now wait for completion */ 928 /* request sent, now wait for completion */
928 while (xbdreq->req_sync.s_done == 0) 929 while (xbdreq->req_sync.s_done == 0)
929 cv_wait(&sc->sc_cache_flush_cv, &sc->sc_lock); 930 cv_wait(&sc->sc_cache_flush_cv, &sc->sc_lock);
930 931
931 if (xbdreq->req_sync.s_error == BLKIF_RSP_EOPNOTSUPP) 932 if (xbdreq->req_sync.s_error == BLKIF_RSP_EOPNOTSUPP)
932 error = EOPNOTSUPP; 933 error = EOPNOTSUPP;
933 else if (xbdreq->req_sync.s_error == BLKIF_RSP_OKAY) 934 else if (xbdreq->req_sync.s_error == BLKIF_RSP_OKAY)
934 error = 0; 935 error = 0;
935 else 936 else
936 error = EIO; 937 error = EIO;
937 SLIST_INSERT_HEAD(&sc->sc_xbdreq_head, xbdreq, req_next); 938 SLIST_INSERT_HEAD(&sc->sc_xbdreq_head, xbdreq, req_next);
938 cv_signal(&sc->sc_req_cv); 939 cv_signal(&sc->sc_req_cv);
939 mutex_exit(&sc->sc_lock); 940 mutex_exit(&sc->sc_lock);
940 941
941 /* Restart I/O if it was waiting for req */ 942 /* Restart I/O if it was waiting for req */
942 dk_start(&sc->sc_dksc, NULL); 943 dk_start(&sc->sc_dksc, NULL);
943 break; 944 break;
944 945
945 default: 946 default:
946 error = dk_ioctl(dksc, dev, cmd, data, flag, l); 947 error = dk_ioctl(dksc, dev, cmd, data, flag, l);
947 break; 948 break;
948 } 949 }
949 950
950 return error; 951 return error;
951} 952}
952 953
953int 954int
954xbddump(dev_t dev, daddr_t blkno, void *va, size_t size) 955xbddump(dev_t dev, daddr_t blkno, void *va, size_t size)
955{ 956{
956 struct xbd_xenbus_softc *sc; 957 struct xbd_xenbus_softc *sc;
957 958
958 sc = device_lookup_private(&xbd_cd, DISKUNIT(dev)); 959 sc = device_lookup_private(&xbd_cd, DISKUNIT(dev));
959 if (sc == NULL) 960 if (sc == NULL)
960 return (ENXIO); 961 return (ENXIO);
961 962
962 DPRINTF(("xbddump(%d, %" PRId64 ", %p, %lu)\n", dev, blkno, va, 963 DPRINTF(("xbddump(%d, %" PRId64 ", %p, %lu)\n", dev, blkno, va,
963 (unsigned long)size)); 964 (unsigned long)size));
964 return dk_dump(&sc->sc_dksc, dev, blkno, va, size, 0); 965 return dk_dump(&sc->sc_dksc, dev, blkno, va, size, 0);
965} 966}
966 967
967static int 968static int
968xbd_diskstart(device_t self, struct buf *bp) 969xbd_diskstart(device_t self, struct buf *bp)
969{ 970{
970 struct xbd_xenbus_softc *sc = device_private(self); 971 struct xbd_xenbus_softc *sc = device_private(self);
971 struct xbd_req *xbdreq; 972 struct xbd_req *xbdreq;
972 blkif_request_t *req; 973 blkif_request_t *req;
973 size_t off; 974 size_t off;
974 paddr_t ma; 975 paddr_t ma;
975 int nsects, nbytes, seg; 976 int nsects, nbytes, seg;
976 int notify, error = 0; 977 int notify, error = 0;
977 978
978 DPRINTF(("xbd_diskstart(%p): b_bcount = %ld\n", 979 DPRINTF(("xbd_diskstart(%p): b_bcount = %ld\n",
979 bp, (long)bp->b_bcount)); 980 bp, (long)bp->b_bcount));
980 981
981 KASSERT(bp->b_bcount <= XBD_MAX_XFER); 982 KASSERT(bp->b_bcount <= XBD_MAX_XFER);
982 983
983 mutex_enter(&sc->sc_lock); 984 mutex_enter(&sc->sc_lock);
984 985
985 if (sc->sc_shutdown != BLKIF_SHUTDOWN_RUN) { 986 if (sc->sc_shutdown != BLKIF_SHUTDOWN_RUN) {
986 error = EIO; 987 error = EIO;
987 goto out; 988 goto out;
988 } 989 }
989 990
990 if (bp->b_rawblkno < 0 || bp->b_rawblkno > sc->sc_xbdsize) { 991 if (bp->b_rawblkno < 0 || bp->b_rawblkno > sc->sc_xbdsize) {
991 /* invalid block number */ 992 /* invalid block number */
992 error = EINVAL; 993 error = EINVAL;
993 goto out; 994 goto out;
994 } 995 }
995 996
996 if (__predict_false( 997 if (__predict_false(
997 sc->sc_backend_status == BLKIF_STATE_SUSPENDED)) { 998 sc->sc_backend_status == BLKIF_STATE_SUSPENDED)) {
998 /* device is suspended, do not consume buffer */ 999 /* device is suspended, do not consume buffer */
999 DPRINTF(("%s: (xbd_diskstart) device suspended\n", 1000 DPRINTF(("%s: (xbd_diskstart) device suspended\n",
1000 sc->sc_dksc.sc_xname)); 1001 sc->sc_dksc.sc_xname));
1001 error = EAGAIN; 1002 error = EAGAIN;
1002 goto out; 1003 goto out;
1003 } 1004 }
1004 1005
1005 xbdreq = SLIST_FIRST(&sc->sc_xbdreq_head); 1006 xbdreq = SLIST_FIRST(&sc->sc_xbdreq_head);
1006 if (__predict_false(xbdreq == NULL)) { 1007 if (__predict_false(xbdreq == NULL)) {
1007 DPRINTF(("xbd_diskstart: no req\n")); 1008 DPRINTF(("xbd_diskstart: no req\n"));
1008 error = EAGAIN; 1009 error = EAGAIN;
1009 goto out; 1010 goto out;
1010 } 1011 }
1011 1012
1012 xbdreq->req_bp = bp; 1013 xbdreq->req_bp = bp;
1013 xbdreq->req_data = bp->b_data; 1014 xbdreq->req_data = bp->b_data;
1014 if (__predict_false((vaddr_t)bp->b_data & (XEN_BSIZE - 1))) { 1015 if (__predict_false((vaddr_t)bp->b_data & (XEN_BSIZE - 1))) {
1015 /* Only can get here if this is physio() request */ 1016 /* Only can get here if this is physio() request */
1016 KASSERT(bp->b_saveaddr != NULL); 1017 KASSERT(bp->b_saveaddr != NULL);
1017 1018
1018 sc->sc_cnt_map_unalign.ev_count++; 1019 sc->sc_cnt_map_unalign.ev_count++;
1019 1020
1020 if (__predict_false(xbd_map_align(xbdreq) != 0)) { 1021 if (__predict_false(xbd_map_align(xbdreq) != 0)) {
1021 DPRINTF(("xbd_diskstart: no align\n")); 1022 DPRINTF(("xbd_diskstart: no align\n"));
1022 error = EAGAIN; 1023 error = EAGAIN;
1023 goto out; 1024 goto out;
1024 } 1025 }
1025 } 1026 }
1026 1027
1027 if (__predict_false(bus_dmamap_load(sc->sc_xbusd->xbusd_dmat, 1028 if (__predict_false(bus_dmamap_load(sc->sc_xbusd->xbusd_dmat,
1028 xbdreq->req_dmamap, xbdreq->req_data, bp->b_bcount, NULL, 1029 xbdreq->req_dmamap, xbdreq->req_data, bp->b_bcount, NULL,
1029 BUS_DMA_NOWAIT) != 0)) { 1030 BUS_DMA_NOWAIT) != 0)) {
1030 printf("%s: %s: xengnt_grant_access failed", 1031 printf("%s: %s: xengnt_grant_access failed",
1031 device_xname(sc->sc_dksc.sc_dev), __func__); 1032 device_xname(sc->sc_dksc.sc_dev), __func__);
1032 error = EINVAL; 1033 error = EINVAL;
1033 goto out; 1034 goto out;
1034 } 1035 }
1035 1036
1036 /* We are now committed to the transfer */ 1037 /* We are now committed to the transfer */
1037 SLIST_REMOVE_HEAD(&sc->sc_xbdreq_head, req_next); 1038 SLIST_REMOVE_HEAD(&sc->sc_xbdreq_head, req_next);
1038 req = RING_GET_REQUEST(&sc->sc_ring, sc->sc_ring.req_prod_pvt); 1039 req = RING_GET_REQUEST(&sc->sc_ring, sc->sc_ring.req_prod_pvt);
1039 req->id = xbdreq->req_id; 1040 req->id = xbdreq->req_id;
1040 req->operation = 1041 req->operation =
1041 bp->b_flags & B_READ ? BLKIF_OP_READ : BLKIF_OP_WRITE; 1042 bp->b_flags & B_READ ? BLKIF_OP_READ : BLKIF_OP_WRITE;
1042 req->sector_number = bp->b_rawblkno; 1043 req->sector_number = bp->b_rawblkno;
1043 req->handle = sc->sc_handle; 1044 req->handle = sc->sc_handle;
1044 1045
1045 bp->b_resid = 0; 1046 bp->b_resid = 0;
1046 for (seg = 0; seg < xbdreq->req_dmamap->dm_nsegs; seg++) { 1047 for (seg = 0; seg < xbdreq->req_dmamap->dm_nsegs; seg++) {
1047 bus_dma_segment_t *dmaseg = &xbdreq->req_dmamap->dm_segs[seg]; 1048 bus_dma_segment_t *dmaseg = &xbdreq->req_dmamap->dm_segs[seg];
1048 1049
1049 ma = dmaseg->ds_addr; 1050 ma = dmaseg->ds_addr;
1050 off = ma & PAGE_MASK; 1051 off = ma & PAGE_MASK;
1051 nbytes = dmaseg->ds_len; 1052 nbytes = dmaseg->ds_len;
1052 nsects = nbytes >> XEN_BSHIFT; 1053 nsects = nbytes >> XEN_BSHIFT;
1053 1054
1054 req->seg[seg].first_sect = off >> XEN_BSHIFT; 1055 req->seg[seg].first_sect = off >> XEN_BSHIFT;
1055 req->seg[seg].last_sect = (off >> XEN_BSHIFT) + nsects - 1; 1056 req->seg[seg].last_sect = (off >> XEN_BSHIFT) + nsects - 1;
1056 KASSERT(req->seg[seg].first_sect <= req->seg[seg].last_sect); 1057 KASSERT(req->seg[seg].first_sect <= req->seg[seg].last_sect);
1057 KASSERT(req->seg[seg].last_sect < (PAGE_SIZE / XEN_BSIZE)); 1058 KASSERT(req->seg[seg].last_sect < (PAGE_SIZE / XEN_BSIZE));
1058 1059
1059 if (__predict_false(xengnt_grant_access( 1060 if (__predict_false(xengnt_grant_access(
1060 sc->sc_xbusd->xbusd_otherend_id, 1061 sc->sc_xbusd->xbusd_otherend_id,
1061 (ma & ~PAGE_MASK), (bp->b_flags & B_READ) == 0, 1062 (ma & ~PAGE_MASK), (bp->b_flags & B_READ) == 0,
1062 &xbdreq->req_gntref[seg]))) { 1063 &xbdreq->req_gntref[seg]))) {
1063 printf("%s: %s: xengnt_grant_access failed", 1064 printf("%s: %s: xengnt_grant_access failed",
1064 device_xname(sc->sc_dksc.sc_dev), __func__); 1065 device_xname(sc->sc_dksc.sc_dev), __func__);
1065 if (seg > 0) { 1066 if (seg > 0) {
1066 for (; --seg >= 0; ) { 1067 for (; --seg >= 0; ) {
1067 xengnt_revoke_access( 1068 xengnt_revoke_access(
1068 xbdreq->req_gntref[seg]); 1069 xbdreq->req_gntref[seg]);
1069 } 1070 }
1070 } 1071 }
1071 bus_dmamap_unload(sc->sc_xbusd->xbusd_dmat, 1072 bus_dmamap_unload(sc->sc_xbusd->xbusd_dmat,
1072 xbdreq->req_dmamap); 1073 xbdreq->req_dmamap);
1073 SLIST_INSERT_HEAD(&sc->sc_xbdreq_head, xbdreq, 1074 SLIST_INSERT_HEAD(&sc->sc_xbdreq_head, xbdreq,
1074 req_next); 1075 req_next);
1075 error = EFAULT; 1076 error = EFAULT;
1076 goto out; 1077 goto out;
1077 } 1078 }
1078 1079
1079 req->seg[seg].gref = xbdreq->req_gntref[seg]; 1080 req->seg[seg].gref = xbdreq->req_gntref[seg];
1080 } 1081 }
1081 req->nr_segments = seg; 1082 req->nr_segments = seg;
1082 sc->sc_ring.req_prod_pvt++; 1083 sc->sc_ring.req_prod_pvt++;
1083 1084
1084 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_ring, notify); 1085 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_ring, notify);
1085 if (notify) 1086 if (notify)
1086 hypervisor_notify_via_evtchn(sc->sc_evtchn); 1087 hypervisor_notify_via_evtchn(sc->sc_evtchn);
1087 1088
1088out: 1089out:
1089 mutex_exit(&sc->sc_lock); 1090 mutex_exit(&sc->sc_lock);
1090 return error; 1091 return error;
1091} 1092}
1092 1093
1093static int 1094static int
1094xbd_map_align(struct xbd_req *req) 1095xbd_map_align(struct xbd_req *req)
1095{ 1096{
1096 int rc; 1097 int rc;
1097 1098
1098 rc = uvm_km_kmem_alloc(kmem_va_arena, 1099 rc = uvm_km_kmem_alloc(kmem_va_arena,
1099 req->req_bp->b_bcount, (VM_NOSLEEP | VM_INSTANTFIT), 1100 req->req_bp->b_bcount, (VM_NOSLEEP | VM_INSTANTFIT),
1100 (vmem_addr_t *)&req->req_data); 1101 (vmem_addr_t *)&req->req_data);
1101 if (__predict_false(rc != 0)) 1102 if (__predict_false(rc != 0))
1102 return ENOMEM; 1103 return ENOMEM;
1103 if ((req->req_bp->b_flags & B_READ) == 0) 1104 if ((req->req_bp->b_flags & B_READ) == 0)
1104 memcpy(req->req_data, req->req_bp->b_data, 1105 memcpy(req->req_data, req->req_bp->b_data,
1105 req->req_bp->b_bcount); 1106 req->req_bp->b_bcount);
1106 return 0; 1107 return 0;
1107} 1108}
1108 1109
1109static void 1110static void
1110xbd_unmap_align(struct xbd_req *req) 1111xbd_unmap_align(struct xbd_req *req)
1111{ 1112{
1112 if (req->req_bp->b_flags & B_READ) 1113 if (req->req_bp->b_flags & B_READ)
1113 memcpy(req->req_bp->b_data, req->req_data, 1114 memcpy(req->req_bp->b_data, req->req_data,
1114 req->req_bp->b_bcount); 1115 req->req_bp->b_bcount);
1115 uvm_km_kmem_free(kmem_va_arena, (vaddr_t)req->req_data, req->req_bp->b_bcount); 1116 uvm_km_kmem_free(kmem_va_arena, (vaddr_t)req->req_data, req->req_bp->b_bcount);
1116} 1117}