Tue Apr 14 13:10:43 2020 UTC ()
use single pre-allocated buffer for unaligned I/O - it's rare and not
performance critical path, it's more important to ensure it will succeed
eventually; also return EAGAIN rather than ENOMEM, so the I/O will be
retried by dk_start() when previous I/O finishes

fix yet another leak on the xengnt_grant_access() fail path in
xbd_diskstart() - this time the unalign buffer


(jdolecek)
diff -r1.110 -r1.111 src/sys/arch/xen/xen/xbd_xenbus.c

cvs diff -r1.110 -r1.111 src/sys/arch/xen/xen/xbd_xenbus.c (switch to unified diff)

--- src/sys/arch/xen/xen/xbd_xenbus.c 2020/04/14 13:02:40 1.110
+++ src/sys/arch/xen/xen/xbd_xenbus.c 2020/04/14 13:10:43 1.111
@@ -1,1117 +1,1144 @@ @@ -1,1117 +1,1144 @@
1/* $NetBSD: xbd_xenbus.c,v 1.110 2020/04/14 13:02:40 jdolecek Exp $ */ 1/* $NetBSD: xbd_xenbus.c,v 1.111 2020/04/14 13:10:43 jdolecek Exp $ */
2 2
3/* 3/*
4 * Copyright (c) 2006 Manuel Bouyer. 4 * Copyright (c) 2006 Manuel Bouyer.
5 * 5 *
6 * Redistribution and use in source and binary forms, with or without 6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions 7 * modification, are permitted provided that the following conditions
8 * are met: 8 * are met:
9 * 1. Redistributions of source code must retain the above copyright 9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer. 10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright 11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the 12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution. 13 * documentation and/or other materials provided with the distribution.
14 * 14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 * 25 *
26 */ 26 */
27 27
28/* 28/*
29 * The file contains the xbd frontend code required for block-level 29 * The file contains the xbd frontend code required for block-level
30 * communications (similar to hard disks) between two Xen domains. 30 * communications (similar to hard disks) between two Xen domains.
31 * 31 *
32 * We are not supposed to receive solicitations spontaneously from backend. The 32 * We are not supposed to receive solicitations spontaneously from backend. The
33 * protocol is therefore fairly simple and uses only one ring to communicate 33 * protocol is therefore fairly simple and uses only one ring to communicate
34 * with backend: frontend posts requests to the ring then wait for their 34 * with backend: frontend posts requests to the ring then wait for their
35 * replies asynchronously. 35 * replies asynchronously.
36 * 36 *
37 * xbd follows NetBSD's disk(9) convention. At any time, a LWP can schedule 37 * xbd follows NetBSD's disk(9) convention. At any time, a LWP can schedule
38 * an operation request for the device (be it open(), read(), write(), ...). 38 * an operation request for the device (be it open(), read(), write(), ...).
39 * Calls are typically processed that way: 39 * Calls are typically processed that way:
40 * - initiate request: xbdread/write/open/ioctl/.. 40 * - initiate request: xbdread/write/open/ioctl/..
41 * - depending on operation, it is handled directly by disk(9) subsystem or 41 * - depending on operation, it is handled directly by disk(9) subsystem or
42 * goes through physio(9) first. 42 * goes through physio(9) first.
43 * - the request is ultimately processed by xbd_diskstart() that prepares the 43 * - the request is ultimately processed by xbd_diskstart() that prepares the
44 * xbd requests, post them in the ring I/O queue, then signal the backend. 44 * xbd requests, post them in the ring I/O queue, then signal the backend.
45 * 45 *
46 * When a response is available in the queue, the backend signals the frontend 46 * When a response is available in the queue, the backend signals the frontend
47 * via its event channel. This triggers xbd_handler(), which will link back 47 * via its event channel. This triggers xbd_handler(), which will link back
48 * the response to its request through the request ID, and mark the I/O as 48 * the response to its request through the request ID, and mark the I/O as
49 * completed. 49 * completed.
50 */ 50 */
51 51
52#include <sys/cdefs.h> 52#include <sys/cdefs.h>
53__KERNEL_RCSID(0, "$NetBSD: xbd_xenbus.c,v 1.110 2020/04/14 13:02:40 jdolecek Exp $"); 53__KERNEL_RCSID(0, "$NetBSD: xbd_xenbus.c,v 1.111 2020/04/14 13:10:43 jdolecek Exp $");
54 54
55#include "opt_xen.h" 55#include "opt_xen.h"
56 56
57 57
58#include <sys/param.h> 58#include <sys/param.h>
59#include <sys/buf.h> 59#include <sys/buf.h>
60#include <sys/bufq.h> 60#include <sys/bufq.h>
61#include <sys/device.h> 61#include <sys/device.h>
62#include <sys/disk.h> 62#include <sys/disk.h>
63#include <sys/disklabel.h> 63#include <sys/disklabel.h>
64#include <sys/conf.h> 64#include <sys/conf.h>
65#include <sys/fcntl.h> 65#include <sys/fcntl.h>
66#include <sys/kernel.h> 66#include <sys/kernel.h>
67#include <sys/proc.h> 67#include <sys/proc.h>
68#include <sys/systm.h> 68#include <sys/systm.h>
69#include <sys/stat.h> 69#include <sys/stat.h>
70#include <sys/vnode.h> 70#include <sys/vnode.h>
71#include <sys/mutex.h> 71#include <sys/mutex.h>
72 72
73#include <dev/dkvar.h> 73#include <dev/dkvar.h>
74 74
75#include <uvm/uvm.h> 75#include <uvm/uvm.h>
76 76
77#include <xen/hypervisor.h> 77#include <xen/hypervisor.h>
78#include <xen/evtchn.h> 78#include <xen/evtchn.h>
79#include <xen/granttables.h> 79#include <xen/granttables.h>
80#include <xen/include/public/io/blkif.h> 80#include <xen/include/public/io/blkif.h>
81#include <xen/include/public/io/protocols.h> 81#include <xen/include/public/io/protocols.h>
82 82
83#include <xen/xenbus.h> 83#include <xen/xenbus.h>
84#include "locators.h" 84#include "locators.h"
85 85
86#undef XBD_DEBUG 86#undef XBD_DEBUG
87#ifdef XBD_DEBUG 87#ifdef XBD_DEBUG
88#define DPRINTF(x) printf x; 88#define DPRINTF(x) printf x;
89#else 89#else
90#define DPRINTF(x) 90#define DPRINTF(x)
91#endif 91#endif
92 92
93#define GRANT_INVALID_REF -1 93#define GRANT_INVALID_REF -1
94 94
95#define XBD_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE) 95#define XBD_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
96#define XBD_MAX_XFER (PAGE_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST) 96#define XBD_MAX_XFER (PAGE_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST)
97 97
98#define XEN_BSHIFT 9 /* log2(XEN_BSIZE) */ 98#define XEN_BSHIFT 9 /* log2(XEN_BSIZE) */
99#define XEN_BSIZE (1 << XEN_BSHIFT)  99#define XEN_BSIZE (1 << XEN_BSHIFT)
100 100
101struct xbd_req { 101struct xbd_req {
102 SLIST_ENTRY(xbd_req) req_next; 102 SLIST_ENTRY(xbd_req) req_next;
103 uint16_t req_id; /* ID passed to backend */ 103 uint16_t req_id; /* ID passed to backend */
104 bus_dmamap_t req_dmamap; 104 bus_dmamap_t req_dmamap;
105 union { 105 union {
106 struct { 106 struct {
107 grant_ref_t req_gntref[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 107 grant_ref_t req_gntref[BLKIF_MAX_SEGMENTS_PER_REQUEST];
108 struct buf *req_bp; /* buffer associated with this request */ 108 struct buf *req_bp; /* buffer associated with this request */
109 void *req_data; /* pointer to the data buffer */ 109 void *req_data; /* pointer to the data buffer */
110 } req_rw; 110 } req_rw;
111 struct { 111 struct {
112 int s_error; 112 int s_error;
113 int s_done; 113 int s_done;
114 } req_sync; 114 } req_sync;
115 } u; 115 } u;
116}; 116};
117#define req_gntref u.req_rw.req_gntref 117#define req_gntref u.req_rw.req_gntref
118#define req_bp u.req_rw.req_bp 118#define req_bp u.req_rw.req_bp
119#define req_data u.req_rw.req_data 119#define req_data u.req_rw.req_data
120#define req_sync u.req_sync 120#define req_sync u.req_sync
121 121
122struct xbd_xenbus_softc { 122struct xbd_xenbus_softc {
123 struct dk_softc sc_dksc; /* Must be first in this struct */ 123 struct dk_softc sc_dksc; /* Must be first in this struct */
124 struct xenbus_device *sc_xbusd; 124 struct xenbus_device *sc_xbusd;
125 unsigned int sc_evtchn; 125 unsigned int sc_evtchn;
126 126
127 struct intrhand *sc_ih; /* Interrupt handler for this instance. */ 127 struct intrhand *sc_ih; /* Interrupt handler for this instance. */
128 kmutex_t sc_lock; 128 kmutex_t sc_lock;
129 kcondvar_t sc_cache_flush_cv; 129 kcondvar_t sc_cache_flush_cv;
130 kcondvar_t sc_req_cv; 130 kcondvar_t sc_req_cv;
131 kcondvar_t sc_detach_cv; 131 kcondvar_t sc_detach_cv;
132 kcondvar_t sc_suspend_cv; 132 kcondvar_t sc_suspend_cv;
133 133
134 blkif_front_ring_t sc_ring; 134 blkif_front_ring_t sc_ring;
135 grant_ref_t sc_ring_gntref; 135 grant_ref_t sc_ring_gntref;
136 136
137 struct xbd_req sc_reqs[XBD_RING_SIZE]; 137 struct xbd_req sc_reqs[XBD_RING_SIZE];
138 SLIST_HEAD(,xbd_req) sc_xbdreq_head; /* list of free requests */ 138 SLIST_HEAD(,xbd_req) sc_xbdreq_head; /* list of free requests */
139 139
 140 vmem_addr_t sc_unalign_buffer;
 141 bool sc_unalign_free;
 142
140 int sc_backend_status; /* our status with backend */ 143 int sc_backend_status; /* our status with backend */
141#define BLKIF_STATE_DISCONNECTED 0 144#define BLKIF_STATE_DISCONNECTED 0
142#define BLKIF_STATE_CONNECTED 1 145#define BLKIF_STATE_CONNECTED 1
143#define BLKIF_STATE_SUSPENDED 2 146#define BLKIF_STATE_SUSPENDED 2
144 147
145 int sc_shutdown; 148 int sc_shutdown;
146#define BLKIF_SHUTDOWN_RUN 0 /* no shutdown */ 149#define BLKIF_SHUTDOWN_RUN 0 /* no shutdown */
147#define BLKIF_SHUTDOWN_REMOTE 1 /* backend-initiated shutdown in progress */ 150#define BLKIF_SHUTDOWN_REMOTE 1 /* backend-initiated shutdown in progress */
148#define BLKIF_SHUTDOWN_LOCAL 2 /* locally-initiated shutdown in progress */ 151#define BLKIF_SHUTDOWN_LOCAL 2 /* locally-initiated shutdown in progress */
149 152
150 uint64_t sc_sectors; /* number of sectors for this device */ 153 uint64_t sc_sectors; /* number of sectors for this device */
151 u_long sc_secsize; /* sector size */ 154 u_long sc_secsize; /* sector size */
152 uint64_t sc_xbdsize; /* size of disk in DEV_BSIZE */ 155 uint64_t sc_xbdsize; /* size of disk in DEV_BSIZE */
153 u_long sc_info; /* VDISK_* */ 156 u_long sc_info; /* VDISK_* */
154 u_long sc_handle; /* from backend */ 157 u_long sc_handle; /* from backend */
155 int sc_features; 158 int sc_features;
156#define BLKIF_FEATURE_CACHE_FLUSH 0x1 159#define BLKIF_FEATURE_CACHE_FLUSH 0x1
157#define BLKIF_FEATURE_BARRIER 0x2 160#define BLKIF_FEATURE_BARRIER 0x2
158#define BLKIF_FEATURE_PERSISTENT 0x4 161#define BLKIF_FEATURE_PERSISTENT 0x4
159#define BLKIF_FEATURE_BITS \ 162#define BLKIF_FEATURE_BITS \
160 "\20\1CACHE-FLUSH\2BARRIER\3PERSISTENT" 163 "\20\1CACHE-FLUSH\2BARRIER\3PERSISTENT"
161 struct evcnt sc_cnt_map_unalign; 164 struct evcnt sc_cnt_map_unalign;
 165 struct evcnt sc_cnt_unalign_busy;
 166 struct evcnt sc_cnt_queue_full;
162}; 167};
163 168
164#if 0 169#if 0
165/* too big to be on stack */ 170/* too big to be on stack */
166static multicall_entry_t rq_mcl[XBD_RING_SIZE+1]; 171static multicall_entry_t rq_mcl[XBD_RING_SIZE+1];
167static paddr_t rq_pages[XBD_RING_SIZE]; 172static paddr_t rq_pages[XBD_RING_SIZE];
168#endif 173#endif
169 174
170static int xbd_xenbus_match(device_t, cfdata_t, void *); 175static int xbd_xenbus_match(device_t, cfdata_t, void *);
171static void xbd_xenbus_attach(device_t, device_t, void *); 176static void xbd_xenbus_attach(device_t, device_t, void *);
172static int xbd_xenbus_detach(device_t, int); 177static int xbd_xenbus_detach(device_t, int);
173 178
174static bool xbd_xenbus_suspend(device_t, const pmf_qual_t *); 179static bool xbd_xenbus_suspend(device_t, const pmf_qual_t *);
175static bool xbd_xenbus_resume(device_t, const pmf_qual_t *); 180static bool xbd_xenbus_resume(device_t, const pmf_qual_t *);
176 181
177static int xbd_handler(void *); 182static int xbd_handler(void *);
178static int xbd_diskstart(device_t, struct buf *); 183static int xbd_diskstart(device_t, struct buf *);
179static void xbd_iosize(device_t, int *); 184static void xbd_iosize(device_t, int *);
180static void xbd_backend_changed(void *, XenbusState); 185static void xbd_backend_changed(void *, XenbusState);
181static void xbd_connect(struct xbd_xenbus_softc *); 186static void xbd_connect(struct xbd_xenbus_softc *);
182 187
183static int xbd_map_align(struct xbd_req *); 188static int xbd_map_align(struct xbd_xenbus_softc *, struct xbd_req *);
184static void xbd_unmap_align(struct xbd_req *); 189static void xbd_unmap_align(struct xbd_xenbus_softc *, struct xbd_req *, bool);
185 190
186static void xbdminphys(struct buf *); 191static void xbdminphys(struct buf *);
187 192
188CFATTACH_DECL3_NEW(xbd, sizeof(struct xbd_xenbus_softc), 193CFATTACH_DECL3_NEW(xbd, sizeof(struct xbd_xenbus_softc),
189 xbd_xenbus_match, xbd_xenbus_attach, xbd_xenbus_detach, NULL, NULL, NULL, 194 xbd_xenbus_match, xbd_xenbus_attach, xbd_xenbus_detach, NULL, NULL, NULL,
190 DVF_DETACH_SHUTDOWN); 195 DVF_DETACH_SHUTDOWN);
191 196
192dev_type_open(xbdopen); 197dev_type_open(xbdopen);
193dev_type_close(xbdclose); 198dev_type_close(xbdclose);
194dev_type_read(xbdread); 199dev_type_read(xbdread);
195dev_type_write(xbdwrite); 200dev_type_write(xbdwrite);
196dev_type_ioctl(xbdioctl); 201dev_type_ioctl(xbdioctl);
197dev_type_strategy(xbdstrategy); 202dev_type_strategy(xbdstrategy);
198dev_type_dump(xbddump); 203dev_type_dump(xbddump);
199dev_type_size(xbdsize); 204dev_type_size(xbdsize);
200 205
201const struct bdevsw xbd_bdevsw = { 206const struct bdevsw xbd_bdevsw = {
202 .d_open = xbdopen, 207 .d_open = xbdopen,
203 .d_close = xbdclose, 208 .d_close = xbdclose,
204 .d_strategy = xbdstrategy, 209 .d_strategy = xbdstrategy,
205 .d_ioctl = xbdioctl, 210 .d_ioctl = xbdioctl,
206 .d_dump = xbddump, 211 .d_dump = xbddump,
207 .d_psize = xbdsize, 212 .d_psize = xbdsize,
208 .d_discard = nodiscard, 213 .d_discard = nodiscard,
209 .d_flag = D_DISK | D_MPSAFE 214 .d_flag = D_DISK | D_MPSAFE
210}; 215};
211 216
212const struct cdevsw xbd_cdevsw = { 217const struct cdevsw xbd_cdevsw = {
213 .d_open = xbdopen, 218 .d_open = xbdopen,
214 .d_close = xbdclose, 219 .d_close = xbdclose,
215 .d_read = xbdread, 220 .d_read = xbdread,
216 .d_write = xbdwrite, 221 .d_write = xbdwrite,
217 .d_ioctl = xbdioctl, 222 .d_ioctl = xbdioctl,
218 .d_stop = nostop, 223 .d_stop = nostop,
219 .d_tty = notty, 224 .d_tty = notty,
220 .d_poll = nopoll, 225 .d_poll = nopoll,
221 .d_mmap = nommap, 226 .d_mmap = nommap,
222 .d_kqfilter = nokqfilter, 227 .d_kqfilter = nokqfilter,
223 .d_discard = nodiscard, 228 .d_discard = nodiscard,
224 .d_flag = D_DISK | D_MPSAFE 229 .d_flag = D_DISK | D_MPSAFE
225}; 230};
226 231
227extern struct cfdriver xbd_cd; 232extern struct cfdriver xbd_cd;
228 233
229static struct dkdriver xbddkdriver = { 234static struct dkdriver xbddkdriver = {
230 .d_strategy = xbdstrategy, 235 .d_strategy = xbdstrategy,
231 .d_minphys = xbdminphys, 236 .d_minphys = xbdminphys,
232 .d_open = xbdopen, 237 .d_open = xbdopen,
233 .d_close = xbdclose, 238 .d_close = xbdclose,
234 .d_diskstart = xbd_diskstart, 239 .d_diskstart = xbd_diskstart,
235 .d_iosize = xbd_iosize, 240 .d_iosize = xbd_iosize,
236}; 241};
237 242
238static int 243static int
239xbd_xenbus_match(device_t parent, cfdata_t match, void *aux) 244xbd_xenbus_match(device_t parent, cfdata_t match, void *aux)
240{ 245{
241 struct xenbusdev_attach_args *xa = aux; 246 struct xenbusdev_attach_args *xa = aux;
242 247
243 if (strcmp(xa->xa_type, "vbd") != 0) 248 if (strcmp(xa->xa_type, "vbd") != 0)
244 return 0; 249 return 0;
245 250
246 if (match->cf_loc[XENBUSCF_ID] != XENBUSCF_ID_DEFAULT && 251 if (match->cf_loc[XENBUSCF_ID] != XENBUSCF_ID_DEFAULT &&
247 match->cf_loc[XENBUSCF_ID] != xa->xa_id) 252 match->cf_loc[XENBUSCF_ID] != xa->xa_id)
248 return 0; 253 return 0;
249 254
250 return 1; 255 return 1;
251} 256}
252 257
253static void 258static void
254xbd_xenbus_attach(device_t parent, device_t self, void *aux) 259xbd_xenbus_attach(device_t parent, device_t self, void *aux)
255{ 260{
256 struct xbd_xenbus_softc *sc = device_private(self); 261 struct xbd_xenbus_softc *sc = device_private(self);
257 struct xenbusdev_attach_args *xa = aux; 262 struct xenbusdev_attach_args *xa = aux;
258 blkif_sring_t *ring; 263 blkif_sring_t *ring;
259 RING_IDX i; 264 RING_IDX i;
260#ifdef XBD_DEBUG 265#ifdef XBD_DEBUG
261 char **dir, *val; 266 char **dir, *val;
262 int dir_n = 0; 267 int dir_n = 0;
263 char id_str[20]; 268 char id_str[20];
264 int err; 269 int err;
265#endif 270#endif
266 271
267 config_pending_incr(self); 272 config_pending_incr(self);
268 aprint_normal(": Xen Virtual Block Device Interface\n"); 273 aprint_normal(": Xen Virtual Block Device Interface\n");
269 274
270 dk_init(&sc->sc_dksc, self, DKTYPE_ESDI); 275 dk_init(&sc->sc_dksc, self, DKTYPE_ESDI);
271 disk_init(&sc->sc_dksc.sc_dkdev, device_xname(self), &xbddkdriver); 276 disk_init(&sc->sc_dksc.sc_dkdev, device_xname(self), &xbddkdriver);
272 277
273 sc->sc_xbusd = xa->xa_xbusd; 278 sc->sc_xbusd = xa->xa_xbusd;
274 sc->sc_xbusd->xbusd_otherend_changed = xbd_backend_changed; 279 sc->sc_xbusd->xbusd_otherend_changed = xbd_backend_changed;
275 280
276 mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_BIO); 281 mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_BIO);
277 cv_init(&sc->sc_cache_flush_cv, "xbdsync"); 282 cv_init(&sc->sc_cache_flush_cv, "xbdsync");
278 cv_init(&sc->sc_req_cv, "xbdreq"); 283 cv_init(&sc->sc_req_cv, "xbdreq");
279 cv_init(&sc->sc_detach_cv, "xbddetach"); 284 cv_init(&sc->sc_detach_cv, "xbddetach");
280 cv_init(&sc->sc_suspend_cv, "xbdsuspend"); 285 cv_init(&sc->sc_suspend_cv, "xbdsuspend");
281 286
282 /* initialize free requests list */ 287 /* initialize free requests list */
283 SLIST_INIT(&sc->sc_xbdreq_head); 288 SLIST_INIT(&sc->sc_xbdreq_head);
284 for (i = 0; i < XBD_RING_SIZE; i++) { 289 for (i = 0; i < XBD_RING_SIZE; i++) {
285 sc->sc_reqs[i].req_id = i; 290 sc->sc_reqs[i].req_id = i;
286 SLIST_INSERT_HEAD(&sc->sc_xbdreq_head, &sc->sc_reqs[i], 291 SLIST_INSERT_HEAD(&sc->sc_xbdreq_head, &sc->sc_reqs[i],
287 req_next); 292 req_next);
288 } 293 }
289 294
290 sc->sc_backend_status = BLKIF_STATE_DISCONNECTED; 295 sc->sc_backend_status = BLKIF_STATE_DISCONNECTED;
291 sc->sc_shutdown = BLKIF_SHUTDOWN_REMOTE; 296 sc->sc_shutdown = BLKIF_SHUTDOWN_REMOTE;
292 297
293 ring = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0, UVM_KMF_WIRED); 298 ring = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0, UVM_KMF_WIRED);
294 if (ring == NULL) 299 if (ring == NULL)
295 panic("%s: can't alloc ring", device_xname(self)); 300 panic("%s: can't alloc ring", device_xname(self));
296 sc->sc_ring.sring = ring; 301 sc->sc_ring.sring = ring;
297 302
298 evcnt_attach_dynamic(&sc->sc_cnt_map_unalign, EVCNT_TYPE_MISC, 303 evcnt_attach_dynamic(&sc->sc_cnt_map_unalign, EVCNT_TYPE_MISC,
299 NULL, device_xname(self), "map unaligned"); 304 NULL, device_xname(self), "map unaligned");
 305 evcnt_attach_dynamic(&sc->sc_cnt_unalign_busy, EVCNT_TYPE_MISC,
 306 NULL, device_xname(self), "map unaligned");
 307 evcnt_attach_dynamic(&sc->sc_cnt_queue_full, EVCNT_TYPE_MISC,
 308 NULL, device_xname(self), "queue full");
300 309
301 for (i = 0; i < XBD_RING_SIZE; i++) { 310 for (i = 0; i < XBD_RING_SIZE; i++) {
302 if (bus_dmamap_create(sc->sc_xbusd->xbusd_dmat, 311 if (bus_dmamap_create(sc->sc_xbusd->xbusd_dmat,
303 XBD_MAX_XFER, BLKIF_MAX_SEGMENTS_PER_REQUEST, 312 XBD_MAX_XFER, BLKIF_MAX_SEGMENTS_PER_REQUEST,
304 PAGE_SIZE, PAGE_SIZE, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, 313 PAGE_SIZE, PAGE_SIZE, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW,
305 &sc->sc_reqs[i].req_dmamap) != 0) { 314 &sc->sc_reqs[i].req_dmamap) != 0) {
306 aprint_error_dev(self, "can't alloc dma maps\n"); 315 aprint_error_dev(self, "can't alloc dma maps\n");
307 return; 316 return;
308 } 317 }
309 } 318 }
310 319
 320 if (uvm_km_kmem_alloc(kmem_va_arena,
 321 MAXPHYS, VM_SLEEP | VM_INSTANTFIT, &sc->sc_unalign_buffer) != 0) {
 322 aprint_error_dev(self, "can't alloc align buffer\n");
 323 return;
 324 }
 325 sc->sc_unalign_free = true;
 326
311 /* resume shared structures and tell backend that we are ready */ 327 /* resume shared structures and tell backend that we are ready */
312 if (xbd_xenbus_resume(self, PMF_Q_NONE) == false) { 328 if (xbd_xenbus_resume(self, PMF_Q_NONE) == false) {
313 uvm_km_free(kernel_map, (vaddr_t)ring, PAGE_SIZE, 329 uvm_km_free(kernel_map, (vaddr_t)ring, PAGE_SIZE,
314 UVM_KMF_WIRED); 330 UVM_KMF_WIRED);
315 return; 331 return;
316 } 332 }
317 333
318 if (!pmf_device_register(self, xbd_xenbus_suspend, xbd_xenbus_resume)) 334 if (!pmf_device_register(self, xbd_xenbus_suspend, xbd_xenbus_resume))
319 aprint_error_dev(self, "couldn't establish power handler\n"); 335 aprint_error_dev(self, "couldn't establish power handler\n");
320 336
321} 337}
322 338
323static int 339static int
324xbd_xenbus_detach(device_t dev, int flags) 340xbd_xenbus_detach(device_t dev, int flags)
325{ 341{
326 struct xbd_xenbus_softc *sc = device_private(dev); 342 struct xbd_xenbus_softc *sc = device_private(dev);
327 int bmaj, cmaj, i, mn, rc; 343 int bmaj, cmaj, i, mn, rc;
328 344
329 DPRINTF(("%s: xbd_detach\n", device_xname(dev))); 345 DPRINTF(("%s: xbd_detach\n", device_xname(dev)));
330 346
331 rc = disk_begindetach(&sc->sc_dksc.sc_dkdev, NULL, dev, flags); 347 rc = disk_begindetach(&sc->sc_dksc.sc_dkdev, NULL, dev, flags);
332 if (rc != 0) 348 if (rc != 0)
333 return rc; 349 return rc;
334 350
335 mutex_enter(&sc->sc_lock); 351 mutex_enter(&sc->sc_lock);
336 if (sc->sc_shutdown == BLKIF_SHUTDOWN_RUN) { 352 if (sc->sc_shutdown == BLKIF_SHUTDOWN_RUN) {
337 sc->sc_shutdown = BLKIF_SHUTDOWN_LOCAL; 353 sc->sc_shutdown = BLKIF_SHUTDOWN_LOCAL;
338 354
339 /* wait for requests to complete */ 355 /* wait for requests to complete */
340 while (sc->sc_backend_status == BLKIF_STATE_CONNECTED && 356 while (sc->sc_backend_status == BLKIF_STATE_CONNECTED &&
341 disk_isbusy(&sc->sc_dksc.sc_dkdev)) { 357 disk_isbusy(&sc->sc_dksc.sc_dkdev)) {
342 cv_timedwait(&sc->sc_detach_cv, &sc->sc_lock, hz/2); 358 cv_timedwait(&sc->sc_detach_cv, &sc->sc_lock, hz/2);
343 } 359 }
344 mutex_exit(&sc->sc_lock); 360 mutex_exit(&sc->sc_lock);
345 361
346 /* Trigger state transition with backend */ 362 /* Trigger state transition with backend */
347 xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateClosing); 363 xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateClosing);
348 364
349 mutex_enter(&sc->sc_lock); 365 mutex_enter(&sc->sc_lock);
350 } 366 }
351 if ((flags & DETACH_FORCE) == 0) { 367 if ((flags & DETACH_FORCE) == 0) {
352 /* xbd_xenbus_detach already in progress */ 368 /* xbd_xenbus_detach already in progress */
353 cv_broadcast(&sc->sc_detach_cv); 369 cv_broadcast(&sc->sc_detach_cv);
354 mutex_exit(&sc->sc_lock); 370 mutex_exit(&sc->sc_lock);
355 return EALREADY; 371 return EALREADY;
356 } 372 }
357 mutex_exit(&sc->sc_lock); 373 mutex_exit(&sc->sc_lock);
358 while (xenbus_read_driver_state(sc->sc_xbusd->xbusd_otherend) 374 while (xenbus_read_driver_state(sc->sc_xbusd->xbusd_otherend)
359 != XenbusStateClosed) { 375 != XenbusStateClosed) {
360 mutex_enter(&sc->sc_lock); 376 mutex_enter(&sc->sc_lock);
361 cv_timedwait(&sc->sc_detach_cv, &sc->sc_lock, hz/2); 377 cv_timedwait(&sc->sc_detach_cv, &sc->sc_lock, hz/2);
362 mutex_exit(&sc->sc_lock); 378 mutex_exit(&sc->sc_lock);
363 } 379 }
364 380
365 /* locate the major number */ 381 /* locate the major number */
366 bmaj = bdevsw_lookup_major(&xbd_bdevsw); 382 bmaj = bdevsw_lookup_major(&xbd_bdevsw);
367 cmaj = cdevsw_lookup_major(&xbd_cdevsw); 383 cmaj = cdevsw_lookup_major(&xbd_cdevsw);
368 384
369 /* Nuke the vnodes for any open instances. */ 385 /* Nuke the vnodes for any open instances. */
370 for (i = 0; i < MAXPARTITIONS; i++) { 386 for (i = 0; i < MAXPARTITIONS; i++) {
371 mn = DISKMINOR(device_unit(dev), i); 387 mn = DISKMINOR(device_unit(dev), i);
372 vdevgone(bmaj, mn, mn, VBLK); 388 vdevgone(bmaj, mn, mn, VBLK);
373 vdevgone(cmaj, mn, mn, VCHR); 389 vdevgone(cmaj, mn, mn, VCHR);
374 } 390 }
375 391
376 if (sc->sc_backend_status == BLKIF_STATE_CONNECTED) { 392 if (sc->sc_backend_status == BLKIF_STATE_CONNECTED) {
377 /* Delete all of our wedges. */ 393 /* Delete all of our wedges. */
378 dkwedge_delall(&sc->sc_dksc.sc_dkdev); 394 dkwedge_delall(&sc->sc_dksc.sc_dkdev);
379 395
380 /* Kill off any queued buffers. */ 396 /* Kill off any queued buffers. */
381 dk_drain(&sc->sc_dksc); 397 dk_drain(&sc->sc_dksc);
382 bufq_free(sc->sc_dksc.sc_bufq); 398 bufq_free(sc->sc_dksc.sc_bufq);
383 399
384 /* detach disk */ 400 /* detach disk */
385 disk_detach(&sc->sc_dksc.sc_dkdev); 401 disk_detach(&sc->sc_dksc.sc_dkdev);
386 disk_destroy(&sc->sc_dksc.sc_dkdev); 402 disk_destroy(&sc->sc_dksc.sc_dkdev);
387 dk_detach(&sc->sc_dksc); 403 dk_detach(&sc->sc_dksc);
388 } 404 }
389 405
390 hypervisor_mask_event(sc->sc_evtchn); 406 hypervisor_mask_event(sc->sc_evtchn);
391 xen_intr_disestablish(sc->sc_ih); 407 xen_intr_disestablish(sc->sc_ih);
392 408
393 mutex_enter(&sc->sc_lock); 409 mutex_enter(&sc->sc_lock);
394 while (xengnt_status(sc->sc_ring_gntref)) 410 while (xengnt_status(sc->sc_ring_gntref))
395 cv_timedwait(&sc->sc_detach_cv, &sc->sc_lock, hz/2); 411 cv_timedwait(&sc->sc_detach_cv, &sc->sc_lock, hz/2);
396 mutex_exit(&sc->sc_lock); 412 mutex_exit(&sc->sc_lock);
397 413
398 xengnt_revoke_access(sc->sc_ring_gntref); 414 xengnt_revoke_access(sc->sc_ring_gntref);
399 uvm_km_free(kernel_map, (vaddr_t)sc->sc_ring.sring, 415 uvm_km_free(kernel_map, (vaddr_t)sc->sc_ring.sring,
400 PAGE_SIZE, UVM_KMF_WIRED); 416 PAGE_SIZE, UVM_KMF_WIRED);
401 417
402 for (i = 0; i < XBD_RING_SIZE; i++) { 418 for (i = 0; i < XBD_RING_SIZE; i++) {
403 if (sc->sc_reqs[i].req_dmamap != NULL) { 419 if (sc->sc_reqs[i].req_dmamap != NULL) {
404 bus_dmamap_destroy(sc->sc_xbusd->xbusd_dmat, 420 bus_dmamap_destroy(sc->sc_xbusd->xbusd_dmat,
405 sc->sc_reqs[i].req_dmamap); 421 sc->sc_reqs[i].req_dmamap);
406 sc->sc_reqs[i].req_dmamap = NULL; 422 sc->sc_reqs[i].req_dmamap = NULL;
407 } 423 }
408 } 424 }
409 425
 426 if (sc->sc_unalign_buffer != 0) {
 427 uvm_km_kmem_free(kmem_va_arena, sc->sc_unalign_buffer, MAXPHYS);
 428 sc->sc_unalign_buffer = 0;
 429 }
 430
410 mutex_destroy(&sc->sc_lock); 431 mutex_destroy(&sc->sc_lock);
411 432
412 evcnt_detach(&sc->sc_cnt_map_unalign); 433 evcnt_detach(&sc->sc_cnt_map_unalign);
 434 evcnt_detach(&sc->sc_cnt_unalign_busy);
 435 evcnt_detach(&sc->sc_cnt_queue_full);
413 436
414 pmf_device_deregister(dev); 437 pmf_device_deregister(dev);
415 438
416 return 0; 439 return 0;
417} 440}
418 441
419static bool 442static bool
420xbd_xenbus_suspend(device_t dev, const pmf_qual_t *qual) { 443xbd_xenbus_suspend(device_t dev, const pmf_qual_t *qual) {
421 444
422 struct xbd_xenbus_softc *sc; 445 struct xbd_xenbus_softc *sc;
423 446
424 sc = device_private(dev); 447 sc = device_private(dev);
425 448
426 mutex_enter(&sc->sc_lock); 449 mutex_enter(&sc->sc_lock);
427 /* wait for requests to complete, then suspend device */ 450 /* wait for requests to complete, then suspend device */
428 while (sc->sc_backend_status == BLKIF_STATE_CONNECTED && 451 while (sc->sc_backend_status == BLKIF_STATE_CONNECTED &&
429 disk_isbusy(&sc->sc_dksc.sc_dkdev)) { 452 disk_isbusy(&sc->sc_dksc.sc_dkdev)) {
430 cv_timedwait(&sc->sc_suspend_cv, &sc->sc_lock, hz/2); 453 cv_timedwait(&sc->sc_suspend_cv, &sc->sc_lock, hz/2);
431 } 454 }
432 455
433 hypervisor_mask_event(sc->sc_evtchn); 456 hypervisor_mask_event(sc->sc_evtchn);
434 sc->sc_backend_status = BLKIF_STATE_SUSPENDED; 457 sc->sc_backend_status = BLKIF_STATE_SUSPENDED;
435 xen_intr_disestablish(sc->sc_ih); 458 xen_intr_disestablish(sc->sc_ih);
436 459
437 mutex_exit(&sc->sc_lock); 460 mutex_exit(&sc->sc_lock);
438 461
439 xenbus_device_suspend(sc->sc_xbusd); 462 xenbus_device_suspend(sc->sc_xbusd);
440 aprint_verbose_dev(dev, "removed event channel %d\n", sc->sc_evtchn); 463 aprint_verbose_dev(dev, "removed event channel %d\n", sc->sc_evtchn);
441 464
442 return true; 465 return true;
443} 466}
444 467
445static bool 468static bool
446xbd_xenbus_resume(device_t dev, const pmf_qual_t *qual) 469xbd_xenbus_resume(device_t dev, const pmf_qual_t *qual)
447{ 470{
448 struct xbd_xenbus_softc *sc; 471 struct xbd_xenbus_softc *sc;
449 struct xenbus_transaction *xbt; 472 struct xenbus_transaction *xbt;
450 int error; 473 int error;
451 blkif_sring_t *ring; 474 blkif_sring_t *ring;
452 paddr_t ma; 475 paddr_t ma;
453 const char *errmsg; 476 const char *errmsg;
454 477
455 sc = device_private(dev); 478 sc = device_private(dev);
456 479
457 if (sc->sc_backend_status == BLKIF_STATE_SUSPENDED) { 480 if (sc->sc_backend_status == BLKIF_STATE_SUSPENDED) {
458 /* 481 /*
459 * Device was suspended, so ensure that access associated to 482 * Device was suspended, so ensure that access associated to
460 * the block I/O ring is revoked. 483 * the block I/O ring is revoked.
461 */ 484 */
462 xengnt_revoke_access(sc->sc_ring_gntref); 485 xengnt_revoke_access(sc->sc_ring_gntref);
463 } 486 }
464 sc->sc_ring_gntref = GRANT_INVALID_REF; 487 sc->sc_ring_gntref = GRANT_INVALID_REF;
465 488
466 /* Initialize ring */ 489 /* Initialize ring */
467 ring = sc->sc_ring.sring; 490 ring = sc->sc_ring.sring;
468 memset(ring, 0, PAGE_SIZE); 491 memset(ring, 0, PAGE_SIZE);
469 SHARED_RING_INIT(ring); 492 SHARED_RING_INIT(ring);
470 FRONT_RING_INIT(&sc->sc_ring, ring, PAGE_SIZE); 493 FRONT_RING_INIT(&sc->sc_ring, ring, PAGE_SIZE);
471 494
472 /* 495 /*
473 * get MA address of the ring, and use it to set up the grant entry 496 * get MA address of the ring, and use it to set up the grant entry
474 * for the block device 497 * for the block device
475 */ 498 */
476 (void)pmap_extract_ma(pmap_kernel(), (vaddr_t)ring, &ma); 499 (void)pmap_extract_ma(pmap_kernel(), (vaddr_t)ring, &ma);
477 error = xenbus_grant_ring(sc->sc_xbusd, ma, &sc->sc_ring_gntref); 500 error = xenbus_grant_ring(sc->sc_xbusd, ma, &sc->sc_ring_gntref);
478 if (error) 501 if (error)
479 goto abort_resume; 502 goto abort_resume;
480 503
481 error = xenbus_alloc_evtchn(sc->sc_xbusd, &sc->sc_evtchn); 504 error = xenbus_alloc_evtchn(sc->sc_xbusd, &sc->sc_evtchn);
482 if (error) 505 if (error)
483 goto abort_resume; 506 goto abort_resume;
484 507
485 aprint_verbose_dev(dev, "using event channel %d\n", 508 aprint_verbose_dev(dev, "using event channel %d\n",
486 sc->sc_evtchn); 509 sc->sc_evtchn);
487 sc->sc_ih = xen_intr_establish_xname(-1, &xen_pic, sc->sc_evtchn, 510 sc->sc_ih = xen_intr_establish_xname(-1, &xen_pic, sc->sc_evtchn,
488 IST_LEVEL, IPL_BIO, &xbd_handler, sc, true, device_xname(dev)); 511 IST_LEVEL, IPL_BIO, &xbd_handler, sc, true, device_xname(dev));
489 KASSERT(sc->sc_ih != NULL); 512 KASSERT(sc->sc_ih != NULL);
490 513
491again: 514again:
492 xbt = xenbus_transaction_start(); 515 xbt = xenbus_transaction_start();
493 if (xbt == NULL) 516 if (xbt == NULL)
494 return false; 517 return false;
495 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path, 518 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
496 "ring-ref","%u", sc->sc_ring_gntref); 519 "ring-ref","%u", sc->sc_ring_gntref);
497 if (error) { 520 if (error) {
498 errmsg = "writing ring-ref"; 521 errmsg = "writing ring-ref";
499 goto abort_transaction; 522 goto abort_transaction;
500 } 523 }
501 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path, 524 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
502 "event-channel", "%u", sc->sc_evtchn); 525 "event-channel", "%u", sc->sc_evtchn);
503 if (error) { 526 if (error) {
504 errmsg = "writing event channel"; 527 errmsg = "writing event channel";
505 goto abort_transaction; 528 goto abort_transaction;
506 } 529 }
507 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path, 530 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
508 "protocol", "%s", XEN_IO_PROTO_ABI_NATIVE); 531 "protocol", "%s", XEN_IO_PROTO_ABI_NATIVE);
509 if (error) { 532 if (error) {
510 errmsg = "writing protocol"; 533 errmsg = "writing protocol";
511 goto abort_transaction; 534 goto abort_transaction;
512 } 535 }
513 error = xenbus_transaction_end(xbt, 0); 536 error = xenbus_transaction_end(xbt, 0);
514 if (error == EAGAIN) 537 if (error == EAGAIN)
515 goto again; 538 goto again;
516 if (error != 0) { 539 if (error != 0) {
517 xenbus_dev_fatal(sc->sc_xbusd, error, 540 xenbus_dev_fatal(sc->sc_xbusd, error,
518 "completing transaction"); 541 "completing transaction");
519 return false; 542 return false;
520 } 543 }
521 544
522 xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateInitialised); 545 xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateInitialised);
523 546
524 if (sc->sc_backend_status == BLKIF_STATE_SUSPENDED) { 547 if (sc->sc_backend_status == BLKIF_STATE_SUSPENDED) {
525 /* 548 /*
526 * device was suspended, softc structures are 549 * device was suspended, softc structures are
527 * already initialized - we use a shortcut 550 * already initialized - we use a shortcut
528 */ 551 */
529 sc->sc_backend_status = BLKIF_STATE_CONNECTED; 552 sc->sc_backend_status = BLKIF_STATE_CONNECTED;
530 xenbus_device_resume(sc->sc_xbusd); 553 xenbus_device_resume(sc->sc_xbusd);
531 hypervisor_unmask_event(sc->sc_evtchn); 554 hypervisor_unmask_event(sc->sc_evtchn);
532 xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateConnected); 555 xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateConnected);
533 } 556 }
534 557
535 return true; 558 return true;
536 559
537abort_resume: 560abort_resume:
538 xenbus_dev_fatal(sc->sc_xbusd, error, "resuming device"); 561 xenbus_dev_fatal(sc->sc_xbusd, error, "resuming device");
539 return false; 562 return false;
540 563
541abort_transaction: 564abort_transaction:
542 xenbus_transaction_end(xbt, 1); 565 xenbus_transaction_end(xbt, 1);
543 xenbus_dev_fatal(sc->sc_xbusd, error, "%s", errmsg); 566 xenbus_dev_fatal(sc->sc_xbusd, error, "%s", errmsg);
544 return false; 567 return false;
545} 568}
546 569
547static void 570static void
548xbd_backend_changed(void *arg, XenbusState new_state) 571xbd_backend_changed(void *arg, XenbusState new_state)
549{ 572{
550 struct xbd_xenbus_softc *sc = device_private((device_t)arg); 573 struct xbd_xenbus_softc *sc = device_private((device_t)arg);
551 struct disk_geom *dg; 574 struct disk_geom *dg;
552 575
553 char buf[32]; 576 char buf[32];
554 DPRINTF(("%s: new backend state %d\n", 577 DPRINTF(("%s: new backend state %d\n",
555 device_xname(sc->sc_dksc.sc_dev), new_state)); 578 device_xname(sc->sc_dksc.sc_dev), new_state));
556 579
557 switch (new_state) { 580 switch (new_state) {
558 case XenbusStateUnknown: 581 case XenbusStateUnknown:
559 case XenbusStateInitialising: 582 case XenbusStateInitialising:
560 case XenbusStateInitWait: 583 case XenbusStateInitWait:
561 case XenbusStateInitialised: 584 case XenbusStateInitialised:
562 break; 585 break;
563 case XenbusStateClosing: 586 case XenbusStateClosing:
564 mutex_enter(&sc->sc_lock); 587 mutex_enter(&sc->sc_lock);
565 if (sc->sc_shutdown == BLKIF_SHUTDOWN_RUN) 588 if (sc->sc_shutdown == BLKIF_SHUTDOWN_RUN)
566 sc->sc_shutdown = BLKIF_SHUTDOWN_REMOTE; 589 sc->sc_shutdown = BLKIF_SHUTDOWN_REMOTE;
567 /* wait for requests to complete */ 590 /* wait for requests to complete */
568 while (sc->sc_backend_status == BLKIF_STATE_CONNECTED && 591 while (sc->sc_backend_status == BLKIF_STATE_CONNECTED &&
569 disk_isbusy(&sc->sc_dksc.sc_dkdev)) { 592 disk_isbusy(&sc->sc_dksc.sc_dkdev)) {
570 cv_timedwait(&sc->sc_detach_cv, &sc->sc_lock, hz/2); 593 cv_timedwait(&sc->sc_detach_cv, &sc->sc_lock, hz/2);
571 } 594 }
572 mutex_exit(&sc->sc_lock); 595 mutex_exit(&sc->sc_lock);
573 xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateClosed); 596 xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateClosed);
574 break; 597 break;
575 case XenbusStateConnected: 598 case XenbusStateConnected:
576 /* 599 /*
577 * note that xbd_backend_changed() can only be called by 600 * note that xbd_backend_changed() can only be called by
578 * the xenbus thread. 601 * the xenbus thread.
579 */ 602 */
580 603
581 if (sc->sc_backend_status == BLKIF_STATE_CONNECTED || 604 if (sc->sc_backend_status == BLKIF_STATE_CONNECTED ||
582 sc->sc_backend_status == BLKIF_STATE_SUSPENDED) 605 sc->sc_backend_status == BLKIF_STATE_SUSPENDED)
583 /* already connected */ 606 /* already connected */
584 return; 607 return;
585 608
586 xbd_connect(sc); 609 xbd_connect(sc);
587 sc->sc_shutdown = BLKIF_SHUTDOWN_RUN; 610 sc->sc_shutdown = BLKIF_SHUTDOWN_RUN;
588 sc->sc_xbdsize = 611 sc->sc_xbdsize =
589 sc->sc_sectors * (uint64_t)sc->sc_secsize / DEV_BSIZE; 612 sc->sc_sectors * (uint64_t)sc->sc_secsize / DEV_BSIZE;
590 dg = &sc->sc_dksc.sc_dkdev.dk_geom; 613 dg = &sc->sc_dksc.sc_dkdev.dk_geom;
591 memset(dg, 0, sizeof(*dg));  614 memset(dg, 0, sizeof(*dg));
592 615
593 dg->dg_secperunit = sc->sc_xbdsize; 616 dg->dg_secperunit = sc->sc_xbdsize;
594 dg->dg_secsize = DEV_BSIZE; 617 dg->dg_secsize = DEV_BSIZE;
595 dg->dg_ntracks = 1; 618 dg->dg_ntracks = 1;
596 // XXX: Ok to hard-code DEV_BSIZE? 619 // XXX: Ok to hard-code DEV_BSIZE?
597 dg->dg_nsectors = 1024 * (1024 / dg->dg_secsize); 620 dg->dg_nsectors = 1024 * (1024 / dg->dg_secsize);
598 dg->dg_ncylinders = dg->dg_secperunit / dg->dg_nsectors; 621 dg->dg_ncylinders = dg->dg_secperunit / dg->dg_nsectors;
599 622
600 bufq_alloc(&sc->sc_dksc.sc_bufq, "fcfs", 0); 623 bufq_alloc(&sc->sc_dksc.sc_bufq, "fcfs", 0);
601 dk_attach(&sc->sc_dksc); 624 dk_attach(&sc->sc_dksc);
602 disk_attach(&sc->sc_dksc.sc_dkdev); 625 disk_attach(&sc->sc_dksc.sc_dkdev);
603 626
604 sc->sc_backend_status = BLKIF_STATE_CONNECTED; 627 sc->sc_backend_status = BLKIF_STATE_CONNECTED;
605 hypervisor_unmask_event(sc->sc_evtchn); 628 hypervisor_unmask_event(sc->sc_evtchn);
606 629
607 format_bytes(buf, sizeof(buf), sc->sc_sectors * sc->sc_secsize); 630 format_bytes(buf, sizeof(buf), sc->sc_sectors * sc->sc_secsize);
608 aprint_normal_dev(sc->sc_dksc.sc_dev, 631 aprint_normal_dev(sc->sc_dksc.sc_dev,
609 "%s, %d bytes/sect x %" PRIu64 " sectors\n", 632 "%s, %d bytes/sect x %" PRIu64 " sectors\n",
610 buf, (int)dg->dg_secsize, sc->sc_xbdsize); 633 buf, (int)dg->dg_secsize, sc->sc_xbdsize);
611 snprintb(buf, sizeof(buf), BLKIF_FEATURE_BITS, 634 snprintb(buf, sizeof(buf), BLKIF_FEATURE_BITS,
612 sc->sc_features); 635 sc->sc_features);
613 aprint_normal_dev(sc->sc_dksc.sc_dev, 636 aprint_normal_dev(sc->sc_dksc.sc_dev,
614 "backend features %s\n", buf); 637 "backend features %s\n", buf);
615 638
616 /* Discover wedges on this disk. */ 639 /* Discover wedges on this disk. */
617 dkwedge_discover(&sc->sc_dksc.sc_dkdev); 640 dkwedge_discover(&sc->sc_dksc.sc_dkdev);
618 641
619 disk_set_info(sc->sc_dksc.sc_dev, &sc->sc_dksc.sc_dkdev, NULL); 642 disk_set_info(sc->sc_dksc.sc_dev, &sc->sc_dksc.sc_dkdev, NULL);
620 643
621 /* the disk should be working now */ 644 /* the disk should be working now */
622 config_pending_decr(sc->sc_dksc.sc_dev); 645 config_pending_decr(sc->sc_dksc.sc_dev);
623 break; 646 break;
624 default: 647 default:
625 panic("bad backend state %d", new_state); 648 panic("bad backend state %d", new_state);
626 } 649 }
627} 650}
628 651
629static void 652static void
630xbd_connect(struct xbd_xenbus_softc *sc) 653xbd_connect(struct xbd_xenbus_softc *sc)
631{ 654{
632 int err; 655 int err;
633 unsigned long long sectors; 656 unsigned long long sectors;
634 u_long val; 657 u_long val;
635 658
636 err = xenbus_read_ul(NULL, 659 err = xenbus_read_ul(NULL,
637 sc->sc_xbusd->xbusd_path, "virtual-device", &sc->sc_handle, 10); 660 sc->sc_xbusd->xbusd_path, "virtual-device", &sc->sc_handle, 10);
638 if (err) 661 if (err)
639 panic("%s: can't read number from %s/virtual-device\n",  662 panic("%s: can't read number from %s/virtual-device\n",
640 device_xname(sc->sc_dksc.sc_dev), 663 device_xname(sc->sc_dksc.sc_dev),
641 sc->sc_xbusd->xbusd_otherend); 664 sc->sc_xbusd->xbusd_otherend);
642 err = xenbus_read_ull(NULL, 665 err = xenbus_read_ull(NULL,
643 sc->sc_xbusd->xbusd_otherend, "sectors", &sectors, 10); 666 sc->sc_xbusd->xbusd_otherend, "sectors", &sectors, 10);
644 if (err) 667 if (err)
645 panic("%s: can't read number from %s/sectors\n",  668 panic("%s: can't read number from %s/sectors\n",
646 device_xname(sc->sc_dksc.sc_dev), 669 device_xname(sc->sc_dksc.sc_dev),
647 sc->sc_xbusd->xbusd_otherend); 670 sc->sc_xbusd->xbusd_otherend);
648 sc->sc_sectors = sectors; 671 sc->sc_sectors = sectors;
649 672
650 err = xenbus_read_ul(NULL, 673 err = xenbus_read_ul(NULL,
651 sc->sc_xbusd->xbusd_otherend, "info", &sc->sc_info, 10); 674 sc->sc_xbusd->xbusd_otherend, "info", &sc->sc_info, 10);
652 if (err) 675 if (err)
653 panic("%s: can't read number from %s/info\n",  676 panic("%s: can't read number from %s/info\n",
654 device_xname(sc->sc_dksc.sc_dev), 677 device_xname(sc->sc_dksc.sc_dev),
655 sc->sc_xbusd->xbusd_otherend); 678 sc->sc_xbusd->xbusd_otherend);
656 err = xenbus_read_ul(NULL, 679 err = xenbus_read_ul(NULL,
657 sc->sc_xbusd->xbusd_otherend, "sector-size", &sc->sc_secsize, 10); 680 sc->sc_xbusd->xbusd_otherend, "sector-size", &sc->sc_secsize, 10);
658 if (err) 681 if (err)
659 panic("%s: can't read number from %s/sector-size\n",  682 panic("%s: can't read number from %s/sector-size\n",
660 device_xname(sc->sc_dksc.sc_dev), 683 device_xname(sc->sc_dksc.sc_dev),
661 sc->sc_xbusd->xbusd_otherend); 684 sc->sc_xbusd->xbusd_otherend);
662 685
663 err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend, 686 err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend,
664 "feature-flush-cache", &val, 10); 687 "feature-flush-cache", &val, 10);
665 if (err) 688 if (err)
666 val = 0; 689 val = 0;
667 if (val > 0) 690 if (val > 0)
668 sc->sc_features |= BLKIF_FEATURE_CACHE_FLUSH; 691 sc->sc_features |= BLKIF_FEATURE_CACHE_FLUSH;
669 692
670 err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend, 693 err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend,
671 "feature-barrier", &val, 10); 694 "feature-barrier", &val, 10);
672 if (err) 695 if (err)
673 val = 0; 696 val = 0;
674 if (val > 0) 697 if (val > 0)
675 sc->sc_features |= BLKIF_FEATURE_BARRIER; 698 sc->sc_features |= BLKIF_FEATURE_BARRIER;
676 699
677 err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend, 700 err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend,
678 "feature-persistent", &val, 10); 701 "feature-persistent", &val, 10);
679 if (err) 702 if (err)
680 val = 0; 703 val = 0;
681 if (val > 0) 704 if (val > 0)
682 sc->sc_features |= BLKIF_FEATURE_PERSISTENT; 705 sc->sc_features |= BLKIF_FEATURE_PERSISTENT;
683 706
684 xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateConnected); 707 xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateConnected);
685} 708}
686 709
687static int 710static int
688xbd_handler(void *arg) 711xbd_handler(void *arg)
689{ 712{
690 struct xbd_xenbus_softc *sc = arg; 713 struct xbd_xenbus_softc *sc = arg;
691 struct buf *bp; 714 struct buf *bp;
692 RING_IDX resp_prod, i; 715 RING_IDX resp_prod, i;
693 int more_to_do; 716 int more_to_do;
694 int seg; 717 int seg;
695 718
696 DPRINTF(("xbd_handler(%s)\n", device_xname(sc->sc_dksc.sc_dev))); 719 DPRINTF(("xbd_handler(%s)\n", device_xname(sc->sc_dksc.sc_dev)));
697 720
698 if (__predict_false(sc->sc_backend_status != BLKIF_STATE_CONNECTED)) 721 if (__predict_false(sc->sc_backend_status != BLKIF_STATE_CONNECTED))
699 return 0; 722 return 0;
700 723
701 mutex_enter(&sc->sc_lock); 724 mutex_enter(&sc->sc_lock);
702again: 725again:
703 resp_prod = sc->sc_ring.sring->rsp_prod; 726 resp_prod = sc->sc_ring.sring->rsp_prod;
704 xen_rmb(); /* ensure we see replies up to resp_prod */ 727 xen_rmb(); /* ensure we see replies up to resp_prod */
705 for (i = sc->sc_ring.rsp_cons; i != resp_prod; i++) { 728 for (i = sc->sc_ring.rsp_cons; i != resp_prod; i++) {
706 blkif_response_t *rep = RING_GET_RESPONSE(&sc->sc_ring, i); 729 blkif_response_t *rep = RING_GET_RESPONSE(&sc->sc_ring, i);
707 struct xbd_req *xbdreq = &sc->sc_reqs[rep->id]; 730 struct xbd_req *xbdreq = &sc->sc_reqs[rep->id];
708 731
709 if (rep->operation == BLKIF_OP_FLUSH_DISKCACHE) { 732 if (rep->operation == BLKIF_OP_FLUSH_DISKCACHE) {
710 KASSERT(xbdreq->req_bp == NULL); 733 KASSERT(xbdreq->req_bp == NULL);
711 xbdreq->req_sync.s_error = rep->status; 734 xbdreq->req_sync.s_error = rep->status;
712 xbdreq->req_sync.s_done = 1; 735 xbdreq->req_sync.s_done = 1;
713 cv_broadcast(&sc->sc_cache_flush_cv); 736 cv_broadcast(&sc->sc_cache_flush_cv);
714 /* caller will free the req */ 737 /* caller will free the req */
715 continue; 738 continue;
716 } 739 }
717 740
718 if (rep->operation != BLKIF_OP_READ && 741 if (rep->operation != BLKIF_OP_READ &&
719 rep->operation != BLKIF_OP_WRITE) { 742 rep->operation != BLKIF_OP_WRITE) {
720 aprint_error_dev(sc->sc_dksc.sc_dev, 743 aprint_error_dev(sc->sc_dksc.sc_dev,
721 "bad operation %d from backend\n", rep->operation); 744 "bad operation %d from backend\n", rep->operation);
722 continue; 745 continue;
723 } 746 }
724 747
725 for (seg = 0; seg < xbdreq->req_dmamap->dm_nsegs; seg++) { 748 for (seg = 0; seg < xbdreq->req_dmamap->dm_nsegs; seg++) {
726 /* 749 /*
727 * We are not allowing persistent mappings, so 750 * We are not allowing persistent mappings, so
728 * expect the backend to release the grant 751 * expect the backend to release the grant
729 * immediately. 752 * immediately.
730 */ 753 */
731 KASSERT(xengnt_status(xbdreq->req_gntref[seg]) == 0); 754 KASSERT(xengnt_status(xbdreq->req_gntref[seg]) == 0);
732 xengnt_revoke_access(xbdreq->req_gntref[seg]); 755 xengnt_revoke_access(xbdreq->req_gntref[seg]);
733 } 756 }
734 757
735 bus_dmamap_unload(sc->sc_xbusd->xbusd_dmat, xbdreq->req_dmamap); 758 bus_dmamap_unload(sc->sc_xbusd->xbusd_dmat, xbdreq->req_dmamap);
736 759
737 bp = xbdreq->req_bp; 760 bp = xbdreq->req_bp;
738 KASSERT(bp != NULL && bp->b_data != NULL); 761 KASSERT(bp != NULL && bp->b_data != NULL);
739 DPRINTF(("%s(%p): b_bcount = %ld\n", __func__, 762 DPRINTF(("%s(%p): b_bcount = %ld\n", __func__,
740 bp, (long)bp->b_bcount)); 763 bp, (long)bp->b_bcount));
741 764
742 if (__predict_false(bp->b_data != xbdreq->req_data)) 765 if (__predict_false(bp->b_data != xbdreq->req_data))
743 xbd_unmap_align(xbdreq); 766 xbd_unmap_align(sc, xbdreq, true);
744 xbdreq->req_bp = xbdreq->req_data = NULL; 767 xbdreq->req_bp = xbdreq->req_data = NULL;
745 768
746 /* b_resid was set in dk_start, only override on error */ 769 /* b_resid was set in dk_start, only override on error */
747 if (rep->status != BLKIF_RSP_OKAY) { 770 if (rep->status != BLKIF_RSP_OKAY) {
748 bp->b_error = EIO; 771 bp->b_error = EIO;
749 bp->b_resid = bp->b_bcount; 772 bp->b_resid = bp->b_bcount;
750 } 773 }
751 774
752 dk_done(&sc->sc_dksc, bp); 775 dk_done(&sc->sc_dksc, bp);
753 776
754 SLIST_INSERT_HEAD(&sc->sc_xbdreq_head, xbdreq, req_next); 777 SLIST_INSERT_HEAD(&sc->sc_xbdreq_head, xbdreq, req_next);
755 } 778 }
756 779
757 xen_rmb(); 780 xen_rmb();
758 sc->sc_ring.rsp_cons = i; 781 sc->sc_ring.rsp_cons = i;
759 782
760 RING_FINAL_CHECK_FOR_RESPONSES(&sc->sc_ring, more_to_do); 783 RING_FINAL_CHECK_FOR_RESPONSES(&sc->sc_ring, more_to_do);
761 if (more_to_do) 784 if (more_to_do)
762 goto again; 785 goto again;
763 786
764 cv_signal(&sc->sc_req_cv); 787 cv_signal(&sc->sc_req_cv);
765 mutex_exit(&sc->sc_lock); 788 mutex_exit(&sc->sc_lock);
766 789
767 dk_start(&sc->sc_dksc, NULL); 790 dk_start(&sc->sc_dksc, NULL);
768 791
769 return 1; 792 return 1;
770} 793}
771 794
772static void 795static void
773xbdminphys(struct buf *bp) 796xbdminphys(struct buf *bp)
774{ 797{
775 if (bp->b_bcount > XBD_MAX_XFER) { 798 if (bp->b_bcount > XBD_MAX_XFER) {
776 bp->b_bcount = XBD_MAX_XFER; 799 bp->b_bcount = XBD_MAX_XFER;
777 } 800 }
778 minphys(bp); 801 minphys(bp);
779} 802}
780 803
781static void 804static void
782xbd_iosize(device_t dev, int *maxxfer) 805xbd_iosize(device_t dev, int *maxxfer)
783{ 806{
784 /* 807 /*
785 * Always restrict dumps to XBD_MAX_XFER to avoid indirect segments, 808 * Always restrict dumps to XBD_MAX_XFER to avoid indirect segments,
786 * so that it uses as little memory as possible.  809 * so that it uses as little memory as possible.
787 */ 810 */
788 if (*maxxfer > XBD_MAX_XFER) 811 if (*maxxfer > XBD_MAX_XFER)
789 *maxxfer = XBD_MAX_XFER; 812 *maxxfer = XBD_MAX_XFER;
790} 813}
791 814
792int 815int
793xbdopen(dev_t dev, int flags, int fmt, struct lwp *l) 816xbdopen(dev_t dev, int flags, int fmt, struct lwp *l)
794{ 817{
795 struct xbd_xenbus_softc *sc; 818 struct xbd_xenbus_softc *sc;
796 819
797 sc = device_lookup_private(&xbd_cd, DISKUNIT(dev)); 820 sc = device_lookup_private(&xbd_cd, DISKUNIT(dev));
798 if (sc == NULL) 821 if (sc == NULL)
799 return (ENXIO); 822 return (ENXIO);
800 if ((flags & FWRITE) && (sc->sc_info & VDISK_READONLY)) 823 if ((flags & FWRITE) && (sc->sc_info & VDISK_READONLY))
801 return EROFS; 824 return EROFS;
802 825
803 DPRINTF(("xbdopen(0x%04x, %d)\n", dev, flags)); 826 DPRINTF(("xbdopen(0x%04x, %d)\n", dev, flags));
804 return dk_open(&sc->sc_dksc, dev, flags, fmt, l); 827 return dk_open(&sc->sc_dksc, dev, flags, fmt, l);
805} 828}
806 829
807int 830int
808xbdclose(dev_t dev, int flags, int fmt, struct lwp *l) 831xbdclose(dev_t dev, int flags, int fmt, struct lwp *l)
809{ 832{
810 struct xbd_xenbus_softc *sc; 833 struct xbd_xenbus_softc *sc;
811 834
812 sc = device_lookup_private(&xbd_cd, DISKUNIT(dev)); 835 sc = device_lookup_private(&xbd_cd, DISKUNIT(dev));
813 836
814 DPRINTF(("xbdclose(%d, %d)\n", dev, flags)); 837 DPRINTF(("xbdclose(%d, %d)\n", dev, flags));
815 return dk_close(&sc->sc_dksc, dev, flags, fmt, l); 838 return dk_close(&sc->sc_dksc, dev, flags, fmt, l);
816} 839}
817 840
818void 841void
819xbdstrategy(struct buf *bp) 842xbdstrategy(struct buf *bp)
820{ 843{
821 struct xbd_xenbus_softc *sc; 844 struct xbd_xenbus_softc *sc;
822 845
823 sc = device_lookup_private(&xbd_cd, DISKUNIT(bp->b_dev)); 846 sc = device_lookup_private(&xbd_cd, DISKUNIT(bp->b_dev));
824 847
825 DPRINTF(("xbdstrategy(%p): b_bcount = %ld\n", bp, 848 DPRINTF(("xbdstrategy(%p): b_bcount = %ld\n", bp,
826 (long)bp->b_bcount)); 849 (long)bp->b_bcount));
827 850
828 if (sc == NULL || sc->sc_shutdown != BLKIF_SHUTDOWN_RUN) { 851 if (sc == NULL || sc->sc_shutdown != BLKIF_SHUTDOWN_RUN) {
829 bp->b_error = EIO; 852 bp->b_error = EIO;
830 biodone(bp); 853 biodone(bp);
831 return; 854 return;
832 } 855 }
833 if (__predict_false((sc->sc_info & VDISK_READONLY) && 856 if (__predict_false((sc->sc_info & VDISK_READONLY) &&
834 (bp->b_flags & B_READ) == 0)) { 857 (bp->b_flags & B_READ) == 0)) {
835 bp->b_error = EROFS; 858 bp->b_error = EROFS;
836 biodone(bp); 859 biodone(bp);
837 return; 860 return;
838 } 861 }
839 862
840 dk_strategy(&sc->sc_dksc, bp); 863 dk_strategy(&sc->sc_dksc, bp);
841 return; 864 return;
842} 865}
843 866
844int 867int
845xbdsize(dev_t dev) 868xbdsize(dev_t dev)
846{ 869{
847 struct xbd_xenbus_softc *sc; 870 struct xbd_xenbus_softc *sc;
848 871
849 DPRINTF(("xbdsize(%d)\n", dev)); 872 DPRINTF(("xbdsize(%d)\n", dev));
850 873
851 sc = device_lookup_private(&xbd_cd, DISKUNIT(dev)); 874 sc = device_lookup_private(&xbd_cd, DISKUNIT(dev));
852 if (sc == NULL || sc->sc_shutdown != BLKIF_SHUTDOWN_RUN) 875 if (sc == NULL || sc->sc_shutdown != BLKIF_SHUTDOWN_RUN)
853 return -1; 876 return -1;
854 return dk_size(&sc->sc_dksc, dev); 877 return dk_size(&sc->sc_dksc, dev);
855} 878}
856 879
857int 880int
858xbdread(dev_t dev, struct uio *uio, int flags) 881xbdread(dev_t dev, struct uio *uio, int flags)
859{ 882{
860 struct xbd_xenbus_softc *sc =  883 struct xbd_xenbus_softc *sc =
861 device_lookup_private(&xbd_cd, DISKUNIT(dev)); 884 device_lookup_private(&xbd_cd, DISKUNIT(dev));
862 struct dk_softc *dksc = &sc->sc_dksc; 885 struct dk_softc *dksc = &sc->sc_dksc;
863 886
864 if (!DK_ATTACHED(dksc)) 887 if (!DK_ATTACHED(dksc))
865 return ENXIO; 888 return ENXIO;
866 return physio(xbdstrategy, NULL, dev, B_READ, xbdminphys, uio); 889 return physio(xbdstrategy, NULL, dev, B_READ, xbdminphys, uio);
867} 890}
868 891
869int 892int
870xbdwrite(dev_t dev, struct uio *uio, int flags) 893xbdwrite(dev_t dev, struct uio *uio, int flags)
871{ 894{
872 struct xbd_xenbus_softc *sc = 895 struct xbd_xenbus_softc *sc =
873 device_lookup_private(&xbd_cd, DISKUNIT(dev)); 896 device_lookup_private(&xbd_cd, DISKUNIT(dev));
874 struct dk_softc *dksc = &sc->sc_dksc; 897 struct dk_softc *dksc = &sc->sc_dksc;
875 898
876 if (!DK_ATTACHED(dksc)) 899 if (!DK_ATTACHED(dksc))
877 return ENXIO; 900 return ENXIO;
878 if (__predict_false(sc->sc_info & VDISK_READONLY)) 901 if (__predict_false(sc->sc_info & VDISK_READONLY))
879 return EROFS; 902 return EROFS;
880 return physio(xbdstrategy, NULL, dev, B_WRITE, xbdminphys, uio); 903 return physio(xbdstrategy, NULL, dev, B_WRITE, xbdminphys, uio);
881} 904}
882 905
883int 906int
884xbdioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 907xbdioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
885{ 908{
886 struct xbd_xenbus_softc *sc = 909 struct xbd_xenbus_softc *sc =
887 device_lookup_private(&xbd_cd, DISKUNIT(dev)); 910 device_lookup_private(&xbd_cd, DISKUNIT(dev));
888 struct dk_softc *dksc; 911 struct dk_softc *dksc;
889 int error; 912 int error;
890 struct xbd_req *xbdreq; 913 struct xbd_req *xbdreq;
891 blkif_request_t *req; 914 blkif_request_t *req;
892 int notify; 915 int notify;
893 916
894 DPRINTF(("xbdioctl(%d, %08lx, %p, %d, %p)\n", 917 DPRINTF(("xbdioctl(%d, %08lx, %p, %d, %p)\n",
895 dev, cmd, data, flag, l)); 918 dev, cmd, data, flag, l));
896 dksc = &sc->sc_dksc; 919 dksc = &sc->sc_dksc;
897 920
898 switch (cmd) { 921 switch (cmd) {
899 case DIOCGCACHE: 922 case DIOCGCACHE:
900 { 923 {
901 /* Assume there is write cache if cache-flush is supported */ 924 /* Assume there is write cache if cache-flush is supported */
902 int *bitsp = (int *)data; 925 int *bitsp = (int *)data;
903 *bitsp = 0; 926 *bitsp = 0;
904 if (sc->sc_features & BLKIF_FEATURE_CACHE_FLUSH) 927 if (sc->sc_features & BLKIF_FEATURE_CACHE_FLUSH)
905 *bitsp |= DKCACHE_WRITE; 928 *bitsp |= DKCACHE_WRITE;
906 error = 0; 929 error = 0;
907 break; 930 break;
908 } 931 }
909 case DIOCCACHESYNC: 932 case DIOCCACHESYNC:
910 if ((sc->sc_features & BLKIF_FEATURE_CACHE_FLUSH) == 0) 933 if ((sc->sc_features & BLKIF_FEATURE_CACHE_FLUSH) == 0)
911 return EOPNOTSUPP; 934 return EOPNOTSUPP;
912 935
913 mutex_enter(&sc->sc_lock); 936 mutex_enter(&sc->sc_lock);
914 while ((xbdreq = SLIST_FIRST(&sc->sc_xbdreq_head)) == NULL) 937 while ((xbdreq = SLIST_FIRST(&sc->sc_xbdreq_head)) == NULL)
915 cv_wait(&sc->sc_req_cv, &sc->sc_lock); 938 cv_wait(&sc->sc_req_cv, &sc->sc_lock);
916 939
917 SLIST_REMOVE_HEAD(&sc->sc_xbdreq_head, req_next); 940 SLIST_REMOVE_HEAD(&sc->sc_xbdreq_head, req_next);
918 req = RING_GET_REQUEST(&sc->sc_ring, 941 req = RING_GET_REQUEST(&sc->sc_ring,
919 sc->sc_ring.req_prod_pvt); 942 sc->sc_ring.req_prod_pvt);
920 req->id = xbdreq->req_id; 943 req->id = xbdreq->req_id;
921 req->operation = BLKIF_OP_FLUSH_DISKCACHE; 944 req->operation = BLKIF_OP_FLUSH_DISKCACHE;
922 req->handle = sc->sc_handle; 945 req->handle = sc->sc_handle;
923 xbdreq->req_sync.s_done = 0; 946 xbdreq->req_sync.s_done = 0;
924 sc->sc_ring.req_prod_pvt++; 947 sc->sc_ring.req_prod_pvt++;
925 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_ring, notify); 948 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_ring, notify);
926 if (notify) 949 if (notify)
927 hypervisor_notify_via_evtchn(sc->sc_evtchn); 950 hypervisor_notify_via_evtchn(sc->sc_evtchn);
928 /* request sent, now wait for completion */ 951 /* request sent, now wait for completion */
929 while (xbdreq->req_sync.s_done == 0) 952 while (xbdreq->req_sync.s_done == 0)
930 cv_wait(&sc->sc_cache_flush_cv, &sc->sc_lock); 953 cv_wait(&sc->sc_cache_flush_cv, &sc->sc_lock);
931 954
932 if (xbdreq->req_sync.s_error == BLKIF_RSP_EOPNOTSUPP) 955 if (xbdreq->req_sync.s_error == BLKIF_RSP_EOPNOTSUPP)
933 error = EOPNOTSUPP; 956 error = EOPNOTSUPP;
934 else if (xbdreq->req_sync.s_error == BLKIF_RSP_OKAY) 957 else if (xbdreq->req_sync.s_error == BLKIF_RSP_OKAY)
935 error = 0; 958 error = 0;
936 else 959 else
937 error = EIO; 960 error = EIO;
938 SLIST_INSERT_HEAD(&sc->sc_xbdreq_head, xbdreq, req_next); 961 SLIST_INSERT_HEAD(&sc->sc_xbdreq_head, xbdreq, req_next);
939 cv_signal(&sc->sc_req_cv); 962 cv_signal(&sc->sc_req_cv);
940 mutex_exit(&sc->sc_lock); 963 mutex_exit(&sc->sc_lock);
941 964
942 /* Restart I/O if it was waiting for req */ 965 /* Restart I/O if it was waiting for req */
943 dk_start(&sc->sc_dksc, NULL); 966 dk_start(&sc->sc_dksc, NULL);
944 break; 967 break;
945 968
946 default: 969 default:
947 error = dk_ioctl(dksc, dev, cmd, data, flag, l); 970 error = dk_ioctl(dksc, dev, cmd, data, flag, l);
948 break; 971 break;
949 } 972 }
950 973
951 return error; 974 return error;
952} 975}
953 976
954int 977int
955xbddump(dev_t dev, daddr_t blkno, void *va, size_t size) 978xbddump(dev_t dev, daddr_t blkno, void *va, size_t size)
956{ 979{
957 struct xbd_xenbus_softc *sc; 980 struct xbd_xenbus_softc *sc;
958 981
959 sc = device_lookup_private(&xbd_cd, DISKUNIT(dev)); 982 sc = device_lookup_private(&xbd_cd, DISKUNIT(dev));
960 if (sc == NULL) 983 if (sc == NULL)
961 return (ENXIO); 984 return (ENXIO);
962 985
963 DPRINTF(("xbddump(%d, %" PRId64 ", %p, %lu)\n", dev, blkno, va, 986 DPRINTF(("xbddump(%d, %" PRId64 ", %p, %lu)\n", dev, blkno, va,
964 (unsigned long)size)); 987 (unsigned long)size));
965 return dk_dump(&sc->sc_dksc, dev, blkno, va, size, 0); 988 return dk_dump(&sc->sc_dksc, dev, blkno, va, size, 0);
966} 989}
967 990
968static int 991static int
969xbd_diskstart(device_t self, struct buf *bp) 992xbd_diskstart(device_t self, struct buf *bp)
970{ 993{
971 struct xbd_xenbus_softc *sc = device_private(self); 994 struct xbd_xenbus_softc *sc = device_private(self);
972 struct xbd_req *xbdreq; 995 struct xbd_req *xbdreq;
973 blkif_request_t *req; 996 blkif_request_t *req;
974 size_t off; 997 size_t off;
975 paddr_t ma; 998 paddr_t ma;
976 int nsects, nbytes, seg; 999 int nsects, nbytes, seg;
977 int notify, error = 0; 1000 int notify, error = 0;
978 1001
979 DPRINTF(("xbd_diskstart(%p): b_bcount = %ld\n", 1002 DPRINTF(("xbd_diskstart(%p): b_bcount = %ld\n",
980 bp, (long)bp->b_bcount)); 1003 bp, (long)bp->b_bcount));
981 1004
982 KASSERT(bp->b_bcount <= XBD_MAX_XFER); 1005 KASSERT(bp->b_bcount <= XBD_MAX_XFER);
983 1006
984 mutex_enter(&sc->sc_lock); 1007 mutex_enter(&sc->sc_lock);
985 1008
986 if (sc->sc_shutdown != BLKIF_SHUTDOWN_RUN) { 1009 if (sc->sc_shutdown != BLKIF_SHUTDOWN_RUN) {
987 error = EIO; 1010 error = EIO;
988 goto out; 1011 goto out;
989 } 1012 }
990 1013
991 if (bp->b_rawblkno < 0 || bp->b_rawblkno > sc->sc_xbdsize) { 1014 if (bp->b_rawblkno < 0 || bp->b_rawblkno > sc->sc_xbdsize) {
992 /* invalid block number */ 1015 /* invalid block number */
993 error = EINVAL; 1016 error = EINVAL;
994 goto out; 1017 goto out;
995 } 1018 }
996 1019
997 if (__predict_false( 1020 if (__predict_false(
998 sc->sc_backend_status == BLKIF_STATE_SUSPENDED)) { 1021 sc->sc_backend_status == BLKIF_STATE_SUSPENDED)) {
999 /* device is suspended, do not consume buffer */ 1022 /* device is suspended, do not consume buffer */
1000 DPRINTF(("%s: (xbd_diskstart) device suspended\n", 1023 DPRINTF(("%s: (xbd_diskstart) device suspended\n",
1001 sc->sc_dksc.sc_xname)); 1024 sc->sc_dksc.sc_xname));
1002 error = EAGAIN; 1025 error = EAGAIN;
1003 goto out; 1026 goto out;
1004 } 1027 }
1005 1028
1006 xbdreq = SLIST_FIRST(&sc->sc_xbdreq_head); 1029 xbdreq = SLIST_FIRST(&sc->sc_xbdreq_head);
1007 if (__predict_false(xbdreq == NULL)) { 1030 if (__predict_false(xbdreq == NULL)) {
 1031 sc->sc_cnt_queue_full.ev_count++;
1008 DPRINTF(("xbd_diskstart: no req\n")); 1032 DPRINTF(("xbd_diskstart: no req\n"));
1009 error = EAGAIN; 1033 error = EAGAIN;
1010 goto out; 1034 goto out;
1011 } 1035 }
1012 1036
1013 xbdreq->req_bp = bp; 1037 xbdreq->req_bp = bp;
1014 xbdreq->req_data = bp->b_data; 1038 xbdreq->req_data = bp->b_data;
1015 if (__predict_false((vaddr_t)bp->b_data & (XEN_BSIZE - 1))) { 1039 if (__predict_false((vaddr_t)bp->b_data & (XEN_BSIZE - 1))) {
1016 /* Only can get here if this is physio() request */ 1040 /* Only can get here if this is physio() request */
1017 KASSERT(bp->b_saveaddr != NULL); 1041 KASSERT(bp->b_saveaddr != NULL);
1018 1042
1019 sc->sc_cnt_map_unalign.ev_count++; 1043 sc->sc_cnt_map_unalign.ev_count++;
1020 1044
1021 if (__predict_false(xbd_map_align(xbdreq) != 0)) { 1045 if (__predict_false(xbd_map_align(sc, xbdreq) != 0)) {
1022 DPRINTF(("xbd_diskstart: no align\n")); 1046 DPRINTF(("xbd_diskstart: no align\n"));
1023 error = EAGAIN; 1047 error = EAGAIN;
1024 goto out; 1048 goto out;
1025 } 1049 }
1026 } 1050 }
1027 1051
1028 if (__predict_false(bus_dmamap_load(sc->sc_xbusd->xbusd_dmat, 1052 if (__predict_false(bus_dmamap_load(sc->sc_xbusd->xbusd_dmat,
1029 xbdreq->req_dmamap, xbdreq->req_data, bp->b_bcount, NULL, 1053 xbdreq->req_dmamap, xbdreq->req_data, bp->b_bcount, NULL,
1030 BUS_DMA_NOWAIT) != 0)) { 1054 BUS_DMA_NOWAIT) != 0)) {
1031 printf("%s: %s: xengnt_grant_access failed", 1055 printf("%s: %s: bus_dmamap_load failed",
1032 device_xname(sc->sc_dksc.sc_dev), __func__); 1056 device_xname(sc->sc_dksc.sc_dev), __func__);
1033 error = EINVAL; 1057 error = EINVAL;
1034 goto out; 1058 goto out;
1035 } 1059 }
1036 1060
1037 /* We are now committed to the transfer */ 1061 /* We are now committed to the transfer */
1038 SLIST_REMOVE_HEAD(&sc->sc_xbdreq_head, req_next); 1062 SLIST_REMOVE_HEAD(&sc->sc_xbdreq_head, req_next);
1039 req = RING_GET_REQUEST(&sc->sc_ring, sc->sc_ring.req_prod_pvt); 1063 req = RING_GET_REQUEST(&sc->sc_ring, sc->sc_ring.req_prod_pvt);
1040 req->id = xbdreq->req_id; 1064 req->id = xbdreq->req_id;
1041 req->operation = 1065 req->operation =
1042 bp->b_flags & B_READ ? BLKIF_OP_READ : BLKIF_OP_WRITE; 1066 bp->b_flags & B_READ ? BLKIF_OP_READ : BLKIF_OP_WRITE;
1043 req->sector_number = bp->b_rawblkno; 1067 req->sector_number = bp->b_rawblkno;
1044 req->handle = sc->sc_handle; 1068 req->handle = sc->sc_handle;
1045 1069
1046 bp->b_resid = 0; 1070 bp->b_resid = 0;
1047 for (seg = 0; seg < xbdreq->req_dmamap->dm_nsegs; seg++) { 1071 for (seg = 0; seg < xbdreq->req_dmamap->dm_nsegs; seg++) {
1048 bus_dma_segment_t *dmaseg = &xbdreq->req_dmamap->dm_segs[seg]; 1072 bus_dma_segment_t *dmaseg = &xbdreq->req_dmamap->dm_segs[seg];
1049 1073
1050 ma = dmaseg->ds_addr; 1074 ma = dmaseg->ds_addr;
1051 off = ma & PAGE_MASK; 1075 off = ma & PAGE_MASK;
1052 nbytes = dmaseg->ds_len; 1076 nbytes = dmaseg->ds_len;
1053 nsects = nbytes >> XEN_BSHIFT; 1077 nsects = nbytes >> XEN_BSHIFT;
1054 1078
1055 req->seg[seg].first_sect = off >> XEN_BSHIFT; 1079 req->seg[seg].first_sect = off >> XEN_BSHIFT;
1056 req->seg[seg].last_sect = (off >> XEN_BSHIFT) + nsects - 1; 1080 req->seg[seg].last_sect = (off >> XEN_BSHIFT) + nsects - 1;
1057 KASSERT(req->seg[seg].first_sect <= req->seg[seg].last_sect); 1081 KASSERT(req->seg[seg].first_sect <= req->seg[seg].last_sect);
1058 KASSERT(req->seg[seg].last_sect < (PAGE_SIZE / XEN_BSIZE)); 1082 KASSERT(req->seg[seg].last_sect < (PAGE_SIZE / XEN_BSIZE));
1059 1083
1060 if (__predict_false(xengnt_grant_access( 1084 if (__predict_false(xengnt_grant_access(
1061 sc->sc_xbusd->xbusd_otherend_id, 1085 sc->sc_xbusd->xbusd_otherend_id,
1062 (ma & ~PAGE_MASK), (bp->b_flags & B_READ) == 0, 1086 (ma & ~PAGE_MASK), (bp->b_flags & B_READ) == 0,
1063 &xbdreq->req_gntref[seg]))) { 1087 &xbdreq->req_gntref[seg]))) {
1064 printf("%s: %s: xengnt_grant_access failed", 1088 printf("%s: %s: xengnt_grant_access failed",
1065 device_xname(sc->sc_dksc.sc_dev), __func__); 1089 device_xname(sc->sc_dksc.sc_dev), __func__);
1066 if (seg > 0) { 1090 if (seg > 0) {
1067 for (; --seg >= 0; ) { 1091 for (; --seg >= 0; ) {
1068 xengnt_revoke_access( 1092 xengnt_revoke_access(
1069 xbdreq->req_gntref[seg]); 1093 xbdreq->req_gntref[seg]);
1070 } 1094 }
1071 } 1095 }
1072 bus_dmamap_unload(sc->sc_xbusd->xbusd_dmat, 1096 bus_dmamap_unload(sc->sc_xbusd->xbusd_dmat,
1073 xbdreq->req_dmamap); 1097 xbdreq->req_dmamap);
 1098 if (__predict_false(bp->b_data != xbdreq->req_data))
 1099 xbd_unmap_align(sc, xbdreq, false);
1074 SLIST_INSERT_HEAD(&sc->sc_xbdreq_head, xbdreq, 1100 SLIST_INSERT_HEAD(&sc->sc_xbdreq_head, xbdreq,
1075 req_next); 1101 req_next);
1076 error = EFAULT; 1102 error = EFAULT;
1077 goto out; 1103 goto out;
1078 } 1104 }
1079 1105
1080 req->seg[seg].gref = xbdreq->req_gntref[seg]; 1106 req->seg[seg].gref = xbdreq->req_gntref[seg];
1081 } 1107 }
1082 req->nr_segments = seg; 1108 req->nr_segments = seg;
1083 sc->sc_ring.req_prod_pvt++; 1109 sc->sc_ring.req_prod_pvt++;
1084 1110
1085 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_ring, notify); 1111 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_ring, notify);
1086 if (notify) 1112 if (notify)
1087 hypervisor_notify_via_evtchn(sc->sc_evtchn); 1113 hypervisor_notify_via_evtchn(sc->sc_evtchn);
1088 1114
1089out: 1115out:
1090 mutex_exit(&sc->sc_lock); 1116 mutex_exit(&sc->sc_lock);
1091 return error; 1117 return error;
1092} 1118}
1093 1119
1094static int 1120static int
1095xbd_map_align(struct xbd_req *req) 1121xbd_map_align(struct xbd_xenbus_softc *sc, struct xbd_req *req)
1096{ 1122{
1097 int rc; 1123 if (!sc->sc_unalign_free) {
 1124 sc->sc_cnt_unalign_busy.ev_count++;
 1125 return EAGAIN;
 1126 }
 1127 sc->sc_unalign_free = false;
1098 1128
1099 rc = uvm_km_kmem_alloc(kmem_va_arena, 1129 KASSERT(req->req_bp->b_bcount < MAXPHYS);
1100 req->req_bp->b_bcount, (VM_NOSLEEP | VM_INSTANTFIT), 1130 req->req_data = (void *)sc->sc_unalign_buffer;
1101 (vmem_addr_t *)&req->req_data); 
1102 if (__predict_false(rc != 0)) 
1103 return ENOMEM; 
1104 if ((req->req_bp->b_flags & B_READ) == 0) 1131 if ((req->req_bp->b_flags & B_READ) == 0)
1105 memcpy(req->req_data, req->req_bp->b_data, 1132 memcpy(req->req_data, req->req_bp->b_data,
1106 req->req_bp->b_bcount); 1133 req->req_bp->b_bcount);
1107 return 0; 1134 return 0;
1108} 1135}
1109 1136
1110static void 1137static void
1111xbd_unmap_align(struct xbd_req *req) 1138xbd_unmap_align(struct xbd_xenbus_softc *sc, struct xbd_req *req, bool sync)
1112{ 1139{
1113 if (req->req_bp->b_flags & B_READ) 1140 if (sync && req->req_bp->b_flags & B_READ)
1114 memcpy(req->req_bp->b_data, req->req_data, 1141 memcpy(req->req_bp->b_data, req->req_data,
1115 req->req_bp->b_bcount); 1142 req->req_bp->b_bcount);
1116 uvm_km_kmem_free(kmem_va_arena, (vaddr_t)req->req_data, req->req_bp->b_bcount); 1143 sc->sc_unalign_free = true;
1117} 1144}