Thu Jul 27 17:56:31 2023 UTC ()
Pull up following revision(s) (requested by bouyer in ticket #248):

	sys/arch/xen/xen/xbd_xenbus.c: revision 1.132 (patch)
	sys/arch/xen/xen/xbd_xenbus.c: revision 1.133 (patch)
	sys/arch/xen/xen/xbd_xenbus.c: revision 1.134 (patch)

The disk size reported in the xenstore is always in XEN_BSIZE units,
not sector-size. Should fix the issue reported by Christian Kujau
on netbsd-users and port-xen.

Also use XEN_BSIZE when computing the number of bytes for format_bytes().
While there note in a comment that sc_sectors is in XEN_BSIZE units

Propoerly handle 4k sector size backends:
- report the backend's sector size to upper layers, not DEV_BSIZE.
  Adjust the number of sectors accordingly.
- Use sc_secsize instead of XEN_BSIZE where appropriate. The sectors numbers
  in I/O requests are still in XEN_BSIZE units, but must be a multiple
  of sc_secsize/XEN_BSIZE.
- As a consequence of previous, the buffer has to be aligned to sc_secsize,
  aligned to XEN_BSIZE may not be enough. This means that we may have to
  xbd_map_align() more buffer, including some without B_PHYS set.
- Add some more DPRINTF lines, related to I/O requests

Tested with a linux dom0.

thanks to Christian Kujau for providing access to his hardware for testing
and debugging.


(martin)
diff -r1.129 -r1.129.20.1 src/sys/arch/xen/xen/xbd_xenbus.c

cvs diff -r1.129 -r1.129.20.1 src/sys/arch/xen/xen/xbd_xenbus.c (switch to unified diff)

--- src/sys/arch/xen/xen/xbd_xenbus.c 2020/07/13 21:21:56 1.129
+++ src/sys/arch/xen/xen/xbd_xenbus.c 2023/07/27 17:56:31 1.129.20.1
@@ -1,1401 +1,1409 @@ @@ -1,1401 +1,1409 @@
1/* $NetBSD: xbd_xenbus.c,v 1.129 2020/07/13 21:21:56 jdolecek Exp $ */ 1/* $NetBSD: xbd_xenbus.c,v 1.129.20.1 2023/07/27 17:56:31 martin Exp $ */
2 2
3/* 3/*
4 * Copyright (c) 2006 Manuel Bouyer. 4 * Copyright (c) 2006 Manuel Bouyer.
5 * 5 *
6 * Redistribution and use in source and binary forms, with or without 6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions 7 * modification, are permitted provided that the following conditions
8 * are met: 8 * are met:
9 * 1. Redistributions of source code must retain the above copyright 9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer. 10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright 11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the 12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution. 13 * documentation and/or other materials provided with the distribution.
14 * 14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 * 25 *
26 */ 26 */
27 27
28/* 28/*
29 * The file contains the xbd frontend code required for block-level 29 * The file contains the xbd frontend code required for block-level
30 * communications (similar to hard disks) between two Xen domains. 30 * communications (similar to hard disks) between two Xen domains.
31 * 31 *
32 * We are not supposed to receive solicitations spontaneously from backend. The 32 * We are not supposed to receive solicitations spontaneously from backend. The
33 * protocol is therefore fairly simple and uses only one ring to communicate 33 * protocol is therefore fairly simple and uses only one ring to communicate
34 * with backend: frontend posts requests to the ring then wait for their 34 * with backend: frontend posts requests to the ring then wait for their
35 * replies asynchronously. 35 * replies asynchronously.
36 * 36 *
37 * xbd follows NetBSD's disk(9) convention. At any time, a LWP can schedule 37 * xbd follows NetBSD's disk(9) convention. At any time, a LWP can schedule
38 * an operation request for the device (be it open(), read(), write(), ...). 38 * an operation request for the device (be it open(), read(), write(), ...).
39 * Calls are typically processed that way: 39 * Calls are typically processed that way:
40 * - initiate request: xbdread/write/open/ioctl/.. 40 * - initiate request: xbdread/write/open/ioctl/..
41 * - depending on operation, it is handled directly by disk(9) subsystem or 41 * - depending on operation, it is handled directly by disk(9) subsystem or
42 * goes through physio(9) first. 42 * goes through physio(9) first.
43 * - the request is ultimately processed by xbd_diskstart() that prepares the 43 * - the request is ultimately processed by xbd_diskstart() that prepares the
44 * xbd requests, post them in the ring I/O queue, then signal the backend. 44 * xbd requests, post them in the ring I/O queue, then signal the backend.
45 * 45 *
46 * When a response is available in the queue, the backend signals the frontend 46 * When a response is available in the queue, the backend signals the frontend
47 * via its event channel. This triggers xbd_handler(), which will link back 47 * via its event channel. This triggers xbd_handler(), which will link back
48 * the response to its request through the request ID, and mark the I/O as 48 * the response to its request through the request ID, and mark the I/O as
49 * completed. 49 * completed.
50 */ 50 */
51 51
52#include <sys/cdefs.h> 52#include <sys/cdefs.h>
53__KERNEL_RCSID(0, "$NetBSD: xbd_xenbus.c,v 1.129 2020/07/13 21:21:56 jdolecek Exp $"); 53__KERNEL_RCSID(0, "$NetBSD: xbd_xenbus.c,v 1.129.20.1 2023/07/27 17:56:31 martin Exp $");
54 54
55#include "opt_xen.h" 55#include "opt_xen.h"
56 56
57 57
58#include <sys/param.h> 58#include <sys/param.h>
59#include <sys/buf.h> 59#include <sys/buf.h>
60#include <sys/bufq.h> 60#include <sys/bufq.h>
61#include <sys/device.h> 61#include <sys/device.h>
62#include <sys/disk.h> 62#include <sys/disk.h>
63#include <sys/disklabel.h> 63#include <sys/disklabel.h>
64#include <sys/conf.h> 64#include <sys/conf.h>
65#include <sys/fcntl.h> 65#include <sys/fcntl.h>
66#include <sys/kernel.h> 66#include <sys/kernel.h>
67#include <sys/proc.h> 67#include <sys/proc.h>
68#include <sys/systm.h> 68#include <sys/systm.h>
69#include <sys/stat.h> 69#include <sys/stat.h>
70#include <sys/vnode.h> 70#include <sys/vnode.h>
71#include <sys/mutex.h> 71#include <sys/mutex.h>
72 72
73#include <dev/dkvar.h> 73#include <dev/dkvar.h>
74 74
75#include <uvm/uvm.h> 75#include <uvm/uvm.h>
76 76
77#include <xen/intr.h> 77#include <xen/intr.h>
78#include <xen/hypervisor.h> 78#include <xen/hypervisor.h>
79#include <xen/evtchn.h> 79#include <xen/evtchn.h>
80#include <xen/granttables.h> 80#include <xen/granttables.h>
81#include <xen/include/public/io/blkif.h> 81#include <xen/include/public/io/blkif.h>
82#include <xen/include/public/io/protocols.h> 82#include <xen/include/public/io/protocols.h>
83 83
84#include <xen/xenbus.h> 84#include <xen/xenbus.h>
85#include "locators.h" 85#include "locators.h"
86 86
87#undef XBD_DEBUG 87#undef XBD_DEBUG
88#ifdef XBD_DEBUG 88#ifdef XBD_DEBUG
89#define DPRINTF(x) printf x; 89#define DPRINTF(x) printf x;
90#else 90#else
91#define DPRINTF(x) 91#define DPRINTF(x)
92#endif 92#endif
93 93
94#define GRANT_INVALID_REF -1 94#define GRANT_INVALID_REF -1
95 95
96#define XBD_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE) 96#define XBD_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE)
97#define XBD_MAX_XFER (PAGE_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST) 97#define XBD_MAX_XFER (PAGE_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST)
98#define XBD_MAX_CHUNK 32*1024 /* max I/O size we process in 1 req */ 98#define XBD_MAX_CHUNK 32*1024 /* max I/O size we process in 1 req */
99#define XBD_XFER_LIMIT (2*XBD_MAX_XFER) 99#define XBD_XFER_LIMIT (2*XBD_MAX_XFER)
100 100
101#define XEN_BSHIFT 9 /* log2(XEN_BSIZE) */ 101#define XEN_BSHIFT 9 /* log2(XEN_BSIZE) */
102#define XEN_BSIZE (1 << XEN_BSHIFT)  102#define XEN_BSIZE (1 << XEN_BSHIFT)
103 103
104CTASSERT((MAXPHYS <= 2*XBD_MAX_CHUNK)); 104CTASSERT((MAXPHYS <= 2*XBD_MAX_CHUNK));
105CTASSERT(XEN_BSIZE == DEV_BSIZE); 105CTASSERT(XEN_BSIZE == DEV_BSIZE);
106 106
107struct xbd_indirect { 107struct xbd_indirect {
108 SLIST_ENTRY(xbd_indirect) in_next; 108 SLIST_ENTRY(xbd_indirect) in_next;
109 struct blkif_request_segment *in_addr; 109 struct blkif_request_segment *in_addr;
110 grant_ref_t in_gntref; 110 grant_ref_t in_gntref;
111}; 111};
112 112
113struct xbd_req { 113struct xbd_req {
114 SLIST_ENTRY(xbd_req) req_next; 114 SLIST_ENTRY(xbd_req) req_next;
115 uint16_t req_id; /* ID passed to backend */ 115 uint16_t req_id; /* ID passed to backend */
116 bus_dmamap_t req_dmamap; 116 bus_dmamap_t req_dmamap;
117 struct xbd_req *req_parent, *req_child; 117 struct xbd_req *req_parent, *req_child;
118 bool req_parent_done; 118 bool req_parent_done;
119 union { 119 union {
120 struct { 120 struct {
121 grant_ref_t req_gntref[XBD_XFER_LIMIT >> PAGE_SHIFT]; 121 grant_ref_t req_gntref[XBD_XFER_LIMIT >> PAGE_SHIFT];
122 struct buf *req_bp; /* buffer associated with this request */ 122 struct buf *req_bp; /* buffer associated with this request */
123 void *req_data; /* pointer to the data buffer */ 123 void *req_data; /* pointer to the data buffer */
124 struct xbd_indirect *req_indirect; /* indirect page */ 124 struct xbd_indirect *req_indirect; /* indirect page */
125 } req_rw; 125 } req_rw;
126 struct { 126 struct {
127 int s_error; 127 int s_error;
128 int s_done; 128 int s_done;
129 } req_sync; 129 } req_sync;
130 } u; 130 } u;
131}; 131};
132#define req_gntref u.req_rw.req_gntref 132#define req_gntref u.req_rw.req_gntref
133#define req_bp u.req_rw.req_bp 133#define req_bp u.req_rw.req_bp
134#define req_data u.req_rw.req_data 134#define req_data u.req_rw.req_data
135#define req_indirect u.req_rw.req_indirect 135#define req_indirect u.req_rw.req_indirect
136#define req_sync u.req_sync 136#define req_sync u.req_sync
137 137
138struct xbd_xenbus_softc { 138struct xbd_xenbus_softc {
139 struct dk_softc sc_dksc; /* Must be first in this struct */ 139 struct dk_softc sc_dksc; /* Must be first in this struct */
140 struct xenbus_device *sc_xbusd; 140 struct xenbus_device *sc_xbusd;
141 unsigned int sc_evtchn; 141 unsigned int sc_evtchn;
142 142
143 struct intrhand *sc_ih; /* Interrupt handler for this instance. */ 143 struct intrhand *sc_ih; /* Interrupt handler for this instance. */
144 kmutex_t sc_lock; 144 kmutex_t sc_lock;
145 kcondvar_t sc_cache_flush_cv; 145 kcondvar_t sc_cache_flush_cv;
146 kcondvar_t sc_req_cv; 146 kcondvar_t sc_req_cv;
147 kcondvar_t sc_detach_cv; 147 kcondvar_t sc_detach_cv;
148 kcondvar_t sc_suspend_cv; 148 kcondvar_t sc_suspend_cv;
149 149
150 blkif_front_ring_t sc_ring; 150 blkif_front_ring_t sc_ring;
151 grant_ref_t sc_ring_gntref; 151 grant_ref_t sc_ring_gntref;
152 152
153 struct xbd_req sc_reqs[XBD_RING_SIZE]; 153 struct xbd_req sc_reqs[XBD_RING_SIZE];
154 SLIST_HEAD(,xbd_req) sc_xbdreq_head; /* list of free requests */ 154 SLIST_HEAD(,xbd_req) sc_xbdreq_head; /* list of free requests */
155 155
156 struct xbd_indirect sc_indirect[XBD_RING_SIZE]; 156 struct xbd_indirect sc_indirect[XBD_RING_SIZE];
157 SLIST_HEAD(,xbd_indirect) sc_indirect_head; 157 SLIST_HEAD(,xbd_indirect) sc_indirect_head;
158 158
159 vmem_addr_t sc_unalign_buffer; 159 vmem_addr_t sc_unalign_buffer;
160 void *sc_unalign_used; 160 void *sc_unalign_used;
161 161
162 int sc_backend_status; /* our status with backend */ 162 int sc_backend_status; /* our status with backend */
163#define BLKIF_STATE_DISCONNECTED 0 163#define BLKIF_STATE_DISCONNECTED 0
164#define BLKIF_STATE_CONNECTED 1 164#define BLKIF_STATE_CONNECTED 1
165#define BLKIF_STATE_SUSPENDED 2 165#define BLKIF_STATE_SUSPENDED 2
166 166
167 int sc_shutdown; 167 int sc_shutdown;
168#define BLKIF_SHUTDOWN_RUN 0 /* no shutdown */ 168#define BLKIF_SHUTDOWN_RUN 0 /* no shutdown */
169#define BLKIF_SHUTDOWN_REMOTE 1 /* backend-initiated shutdown in progress */ 169#define BLKIF_SHUTDOWN_REMOTE 1 /* backend-initiated shutdown in progress */
170#define BLKIF_SHUTDOWN_LOCAL 2 /* locally-initiated shutdown in progress */ 170#define BLKIF_SHUTDOWN_LOCAL 2 /* locally-initiated shutdown in progress */
171 171
172 uint64_t sc_sectors; /* number of sectors for this device */ 172 uint64_t sc_sectors; /* number of sc_secsize sectors for this device */
173 u_long sc_secsize; /* sector size */ 173 u_long sc_secsize; /* sector size */
174 uint64_t sc_xbdsize; /* size of disk in DEV_BSIZE */ 174 uint64_t sc_xbdsize; /* size of disk in DEV_BSIZE */
175 u_long sc_info; /* VDISK_* */ 175 u_long sc_info; /* VDISK_* */
176 u_long sc_handle; /* from backend */ 176 u_long sc_handle; /* from backend */
177 int sc_features; 177 int sc_features;
178#define BLKIF_FEATURE_CACHE_FLUSH 0x1 178#define BLKIF_FEATURE_CACHE_FLUSH 0x1
179#define BLKIF_FEATURE_BARRIER 0x2 179#define BLKIF_FEATURE_BARRIER 0x2
180#define BLKIF_FEATURE_PERSISTENT 0x4 180#define BLKIF_FEATURE_PERSISTENT 0x4
181#define BLKIF_FEATURE_INDIRECT 0x8 181#define BLKIF_FEATURE_INDIRECT 0x8
182#define BLKIF_FEATURE_BITS \ 182#define BLKIF_FEATURE_BITS \
183 "\20\1CACHE-FLUSH\2BARRIER\3PERSISTENT\4INDIRECT" 183 "\20\1CACHE-FLUSH\2BARRIER\3PERSISTENT\4INDIRECT"
184 struct evcnt sc_cnt_map_unalign; 184 struct evcnt sc_cnt_map_unalign;
185 struct evcnt sc_cnt_unalign_busy; 185 struct evcnt sc_cnt_unalign_busy;
186 struct evcnt sc_cnt_queue_full; 186 struct evcnt sc_cnt_queue_full;
187 struct evcnt sc_cnt_indirect; 187 struct evcnt sc_cnt_indirect;
188}; 188};
189 189
190static int xbd_xenbus_match(device_t, cfdata_t, void *); 190static int xbd_xenbus_match(device_t, cfdata_t, void *);
191static void xbd_xenbus_attach(device_t, device_t, void *); 191static void xbd_xenbus_attach(device_t, device_t, void *);
192static int xbd_xenbus_detach(device_t, int); 192static int xbd_xenbus_detach(device_t, int);
193 193
194static bool xbd_xenbus_suspend(device_t, const pmf_qual_t *); 194static bool xbd_xenbus_suspend(device_t, const pmf_qual_t *);
195static bool xbd_xenbus_resume(device_t, const pmf_qual_t *); 195static bool xbd_xenbus_resume(device_t, const pmf_qual_t *);
196 196
197static int xbd_handler(void *); 197static int xbd_handler(void *);
198static int xbd_diskstart(device_t, struct buf *); 198static int xbd_diskstart(device_t, struct buf *);
199static void xbd_iosize(device_t, int *); 199static void xbd_iosize(device_t, int *);
200static void xbd_backend_changed(void *, XenbusState); 200static void xbd_backend_changed(void *, XenbusState);
201static void xbd_connect(struct xbd_xenbus_softc *); 201static void xbd_connect(struct xbd_xenbus_softc *);
202static void xbd_features(struct xbd_xenbus_softc *); 202static void xbd_features(struct xbd_xenbus_softc *);
203 203
204static void xbd_diskstart_submit(struct xbd_xenbus_softc *, int, 204static void xbd_diskstart_submit(struct xbd_xenbus_softc *, int,
205 struct buf *bp, int, bus_dmamap_t, grant_ref_t *); 205 struct buf *bp, int, bus_dmamap_t, grant_ref_t *);
206static void xbd_diskstart_submit_indirect(struct xbd_xenbus_softc *, 206static void xbd_diskstart_submit_indirect(struct xbd_xenbus_softc *,
207 struct xbd_req *, struct buf *bp); 207 struct xbd_req *, struct buf *bp);
208static int xbd_map_align(struct xbd_xenbus_softc *, struct xbd_req *); 208static int xbd_map_align(struct xbd_xenbus_softc *, struct xbd_req *);
209static void xbd_unmap_align(struct xbd_xenbus_softc *, struct xbd_req *, 209static void xbd_unmap_align(struct xbd_xenbus_softc *, struct xbd_req *,
210 struct buf *); 210 struct buf *);
211 211
212static void xbdminphys(struct buf *); 212static void xbdminphys(struct buf *);
213 213
214CFATTACH_DECL3_NEW(xbd, sizeof(struct xbd_xenbus_softc), 214CFATTACH_DECL3_NEW(xbd, sizeof(struct xbd_xenbus_softc),
215 xbd_xenbus_match, xbd_xenbus_attach, xbd_xenbus_detach, NULL, NULL, NULL, 215 xbd_xenbus_match, xbd_xenbus_attach, xbd_xenbus_detach, NULL, NULL, NULL,
216 DVF_DETACH_SHUTDOWN); 216 DVF_DETACH_SHUTDOWN);
217 217
218static dev_type_open(xbdopen); 218static dev_type_open(xbdopen);
219static dev_type_close(xbdclose); 219static dev_type_close(xbdclose);
220static dev_type_read(xbdread); 220static dev_type_read(xbdread);
221static dev_type_write(xbdwrite); 221static dev_type_write(xbdwrite);
222static dev_type_ioctl(xbdioctl); 222static dev_type_ioctl(xbdioctl);
223static dev_type_strategy(xbdstrategy); 223static dev_type_strategy(xbdstrategy);
224static dev_type_dump(xbddump); 224static dev_type_dump(xbddump);
225static dev_type_size(xbdsize); 225static dev_type_size(xbdsize);
226 226
227const struct bdevsw xbd_bdevsw = { 227const struct bdevsw xbd_bdevsw = {
228 .d_open = xbdopen, 228 .d_open = xbdopen,
229 .d_close = xbdclose, 229 .d_close = xbdclose,
230 .d_strategy = xbdstrategy, 230 .d_strategy = xbdstrategy,
231 .d_ioctl = xbdioctl, 231 .d_ioctl = xbdioctl,
232 .d_dump = xbddump, 232 .d_dump = xbddump,
233 .d_psize = xbdsize, 233 .d_psize = xbdsize,
234 .d_discard = nodiscard, 234 .d_discard = nodiscard,
235 .d_flag = D_DISK | D_MPSAFE 235 .d_flag = D_DISK | D_MPSAFE
236}; 236};
237 237
238const struct cdevsw xbd_cdevsw = { 238const struct cdevsw xbd_cdevsw = {
239 .d_open = xbdopen, 239 .d_open = xbdopen,
240 .d_close = xbdclose, 240 .d_close = xbdclose,
241 .d_read = xbdread, 241 .d_read = xbdread,
242 .d_write = xbdwrite, 242 .d_write = xbdwrite,
243 .d_ioctl = xbdioctl, 243 .d_ioctl = xbdioctl,
244 .d_stop = nostop, 244 .d_stop = nostop,
245 .d_tty = notty, 245 .d_tty = notty,
246 .d_poll = nopoll, 246 .d_poll = nopoll,
247 .d_mmap = nommap, 247 .d_mmap = nommap,
248 .d_kqfilter = nokqfilter, 248 .d_kqfilter = nokqfilter,
249 .d_discard = nodiscard, 249 .d_discard = nodiscard,
250 .d_flag = D_DISK | D_MPSAFE 250 .d_flag = D_DISK | D_MPSAFE
251}; 251};
252 252
253extern struct cfdriver xbd_cd; 253extern struct cfdriver xbd_cd;
254 254
255static const struct dkdriver xbddkdriver = { 255static const struct dkdriver xbddkdriver = {
256 .d_strategy = xbdstrategy, 256 .d_strategy = xbdstrategy,
257 .d_minphys = xbdminphys, 257 .d_minphys = xbdminphys,
258 .d_open = xbdopen, 258 .d_open = xbdopen,
259 .d_close = xbdclose, 259 .d_close = xbdclose,
260 .d_diskstart = xbd_diskstart, 260 .d_diskstart = xbd_diskstart,
261 .d_iosize = xbd_iosize, 261 .d_iosize = xbd_iosize,
262}; 262};
263 263
264static int 264static int
265xbd_xenbus_match(device_t parent, cfdata_t match, void *aux) 265xbd_xenbus_match(device_t parent, cfdata_t match, void *aux)
266{ 266{
267 struct xenbusdev_attach_args *xa = aux; 267 struct xenbusdev_attach_args *xa = aux;
268 268
269 if (strcmp(xa->xa_type, "vbd") != 0) 269 if (strcmp(xa->xa_type, "vbd") != 0)
270 return 0; 270 return 0;
271 271
272 if (match->cf_loc[XENBUSCF_ID] != XENBUSCF_ID_DEFAULT && 272 if (match->cf_loc[XENBUSCF_ID] != XENBUSCF_ID_DEFAULT &&
273 match->cf_loc[XENBUSCF_ID] != xa->xa_id) 273 match->cf_loc[XENBUSCF_ID] != xa->xa_id)
274 return 0; 274 return 0;
275 275
276 return 1; 276 return 1;
277} 277}
278 278
279static void 279static void
280xbd_xenbus_attach(device_t parent, device_t self, void *aux) 280xbd_xenbus_attach(device_t parent, device_t self, void *aux)
281{ 281{
282 struct xbd_xenbus_softc *sc = device_private(self); 282 struct xbd_xenbus_softc *sc = device_private(self);
283 struct xenbusdev_attach_args *xa = aux; 283 struct xenbusdev_attach_args *xa = aux;
284 blkif_sring_t *ring; 284 blkif_sring_t *ring;
285 RING_IDX i; 285 RING_IDX i;
286 286
287 config_pending_incr(self); 287 config_pending_incr(self);
288 aprint_normal(": Xen Virtual Block Device Interface\n"); 288 aprint_normal(": Xen Virtual Block Device Interface\n");
289 289
290 dk_init(&sc->sc_dksc, self, DKTYPE_ESDI); 290 dk_init(&sc->sc_dksc, self, DKTYPE_ESDI);
291 disk_init(&sc->sc_dksc.sc_dkdev, device_xname(self), &xbddkdriver); 291 disk_init(&sc->sc_dksc.sc_dkdev, device_xname(self), &xbddkdriver);
292 292
293 sc->sc_xbusd = xa->xa_xbusd; 293 sc->sc_xbusd = xa->xa_xbusd;
294 sc->sc_xbusd->xbusd_otherend_changed = xbd_backend_changed; 294 sc->sc_xbusd->xbusd_otherend_changed = xbd_backend_changed;
295 295
296 mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_BIO); 296 mutex_init(&sc->sc_lock, MUTEX_DEFAULT, IPL_BIO);
297 cv_init(&sc->sc_cache_flush_cv, "xbdsync"); 297 cv_init(&sc->sc_cache_flush_cv, "xbdsync");
298 cv_init(&sc->sc_req_cv, "xbdreq"); 298 cv_init(&sc->sc_req_cv, "xbdreq");
299 cv_init(&sc->sc_detach_cv, "xbddetach"); 299 cv_init(&sc->sc_detach_cv, "xbddetach");
300 cv_init(&sc->sc_suspend_cv, "xbdsuspend"); 300 cv_init(&sc->sc_suspend_cv, "xbdsuspend");
301 301
302 xbd_features(sc); 302 xbd_features(sc);
303 303
304 /* initialize free requests list */ 304 /* initialize free requests list */
305 SLIST_INIT(&sc->sc_xbdreq_head); 305 SLIST_INIT(&sc->sc_xbdreq_head);
306 for (i = 0; i < XBD_RING_SIZE; i++) { 306 for (i = 0; i < XBD_RING_SIZE; i++) {
307 sc->sc_reqs[i].req_id = i; 307 sc->sc_reqs[i].req_id = i;
308 SLIST_INSERT_HEAD(&sc->sc_xbdreq_head, &sc->sc_reqs[i], 308 SLIST_INSERT_HEAD(&sc->sc_xbdreq_head, &sc->sc_reqs[i],
309 req_next); 309 req_next);
310 } 310 }
311 311
312 if (sc->sc_features & BLKIF_FEATURE_INDIRECT) { 312 if (sc->sc_features & BLKIF_FEATURE_INDIRECT) {
313 /* initialize indirect page list */ 313 /* initialize indirect page list */
314 for (i = 0; i < XBD_RING_SIZE; i++) { 314 for (i = 0; i < XBD_RING_SIZE; i++) {
315 vmem_addr_t va; 315 vmem_addr_t va;
316 if (uvm_km_kmem_alloc(kmem_va_arena, 316 if (uvm_km_kmem_alloc(kmem_va_arena,
317 PAGE_SIZE, VM_SLEEP | VM_INSTANTFIT, &va) != 0) { 317 PAGE_SIZE, VM_SLEEP | VM_INSTANTFIT, &va) != 0) {
318 aprint_error_dev(self, 318 aprint_error_dev(self,
319 "can't alloc indirect pages\n"); 319 "can't alloc indirect pages\n");
320 return; 320 return;
321 } 321 }
322 sc->sc_indirect[i].in_addr = (void *)va; 322 sc->sc_indirect[i].in_addr = (void *)va;
323 SLIST_INSERT_HEAD(&sc->sc_indirect_head, 323 SLIST_INSERT_HEAD(&sc->sc_indirect_head,
324 &sc->sc_indirect[i], in_next); 324 &sc->sc_indirect[i], in_next);
325 } 325 }
326 } 326 }
327 327
328 sc->sc_backend_status = BLKIF_STATE_DISCONNECTED; 328 sc->sc_backend_status = BLKIF_STATE_DISCONNECTED;
329 sc->sc_shutdown = BLKIF_SHUTDOWN_REMOTE; 329 sc->sc_shutdown = BLKIF_SHUTDOWN_REMOTE;
330 330
331 ring = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0, UVM_KMF_WIRED); 331 ring = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0, UVM_KMF_WIRED);
332 if (ring == NULL) 332 if (ring == NULL)
333 panic("%s: can't alloc ring", device_xname(self)); 333 panic("%s: can't alloc ring", device_xname(self));
334 sc->sc_ring.sring = ring; 334 sc->sc_ring.sring = ring;
335 335
336 evcnt_attach_dynamic(&sc->sc_cnt_map_unalign, EVCNT_TYPE_MISC, 336 evcnt_attach_dynamic(&sc->sc_cnt_map_unalign, EVCNT_TYPE_MISC,
337 NULL, device_xname(self), "map unaligned"); 337 NULL, device_xname(self), "map unaligned");
338 evcnt_attach_dynamic(&sc->sc_cnt_unalign_busy, EVCNT_TYPE_MISC, 338 evcnt_attach_dynamic(&sc->sc_cnt_unalign_busy, EVCNT_TYPE_MISC,
339 NULL, device_xname(self), "map unaligned"); 339 NULL, device_xname(self), "map unaligned");
340 evcnt_attach_dynamic(&sc->sc_cnt_queue_full, EVCNT_TYPE_MISC, 340 evcnt_attach_dynamic(&sc->sc_cnt_queue_full, EVCNT_TYPE_MISC,
341 NULL, device_xname(self), "queue full"); 341 NULL, device_xname(self), "queue full");
342 evcnt_attach_dynamic(&sc->sc_cnt_indirect, EVCNT_TYPE_MISC, 342 evcnt_attach_dynamic(&sc->sc_cnt_indirect, EVCNT_TYPE_MISC,
343 NULL, device_xname(self), "indirect segment"); 343 NULL, device_xname(self), "indirect segment");
344 344
345 for (i = 0; i < XBD_RING_SIZE; i++) { 345 for (i = 0; i < XBD_RING_SIZE; i++) {
346 if (bus_dmamap_create(sc->sc_xbusd->xbusd_dmat, 346 if (bus_dmamap_create(sc->sc_xbusd->xbusd_dmat,
347 MAXPHYS, XBD_XFER_LIMIT >> PAGE_SHIFT, 347 MAXPHYS, XBD_XFER_LIMIT >> PAGE_SHIFT,
348 PAGE_SIZE, PAGE_SIZE, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, 348 PAGE_SIZE, PAGE_SIZE, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW,
349 &sc->sc_reqs[i].req_dmamap) != 0) { 349 &sc->sc_reqs[i].req_dmamap) != 0) {
350 aprint_error_dev(self, "can't alloc dma maps\n"); 350 aprint_error_dev(self, "can't alloc dma maps\n");
351 return; 351 return;
352 } 352 }
353 } 353 }
354 354
355 if (uvm_km_kmem_alloc(kmem_va_arena, 355 if (uvm_km_kmem_alloc(kmem_va_arena,
356 MAXPHYS, VM_SLEEP | VM_INSTANTFIT, &sc->sc_unalign_buffer) != 0) { 356 MAXPHYS, VM_SLEEP | VM_INSTANTFIT, &sc->sc_unalign_buffer) != 0) {
357 aprint_error_dev(self, "can't alloc align buffer\n"); 357 aprint_error_dev(self, "can't alloc align buffer\n");
358 return; 358 return;
359 } 359 }
360 360
361 /* resume shared structures and tell backend that we are ready */ 361 /* resume shared structures and tell backend that we are ready */
362 if (xbd_xenbus_resume(self, PMF_Q_NONE) == false) { 362 if (xbd_xenbus_resume(self, PMF_Q_NONE) == false) {
363 uvm_km_free(kernel_map, (vaddr_t)ring, PAGE_SIZE, 363 uvm_km_free(kernel_map, (vaddr_t)ring, PAGE_SIZE,
364 UVM_KMF_WIRED); 364 UVM_KMF_WIRED);
365 return; 365 return;
366 } 366 }
367 367
368 if (!pmf_device_register(self, xbd_xenbus_suspend, xbd_xenbus_resume)) 368 if (!pmf_device_register(self, xbd_xenbus_suspend, xbd_xenbus_resume))
369 aprint_error_dev(self, "couldn't establish power handler\n"); 369 aprint_error_dev(self, "couldn't establish power handler\n");
370} 370}
371 371
372static int 372static int
373xbd_xenbus_detach(device_t dev, int flags) 373xbd_xenbus_detach(device_t dev, int flags)
374{ 374{
375 struct xbd_xenbus_softc *sc = device_private(dev); 375 struct xbd_xenbus_softc *sc = device_private(dev);
376 int bmaj, cmaj, i, mn, rc; 376 int bmaj, cmaj, i, mn, rc;
377 377
378 DPRINTF(("%s: xbd_detach\n", device_xname(dev))); 378 DPRINTF(("%s: xbd_detach\n", device_xname(dev)));
379 379
380 rc = disk_begindetach(&sc->sc_dksc.sc_dkdev, NULL, dev, flags); 380 rc = disk_begindetach(&sc->sc_dksc.sc_dkdev, NULL, dev, flags);
381 if (rc != 0) 381 if (rc != 0)
382 return rc; 382 return rc;
383 383
384 mutex_enter(&sc->sc_lock); 384 mutex_enter(&sc->sc_lock);
385 if (sc->sc_shutdown == BLKIF_SHUTDOWN_RUN) { 385 if (sc->sc_shutdown == BLKIF_SHUTDOWN_RUN) {
386 sc->sc_shutdown = BLKIF_SHUTDOWN_LOCAL; 386 sc->sc_shutdown = BLKIF_SHUTDOWN_LOCAL;
387 387
388 /* wait for requests to complete */ 388 /* wait for requests to complete */
389 while (sc->sc_backend_status == BLKIF_STATE_CONNECTED && 389 while (sc->sc_backend_status == BLKIF_STATE_CONNECTED &&
390 disk_isbusy(&sc->sc_dksc.sc_dkdev)) { 390 disk_isbusy(&sc->sc_dksc.sc_dkdev)) {
391 cv_timedwait(&sc->sc_detach_cv, &sc->sc_lock, hz/2); 391 cv_timedwait(&sc->sc_detach_cv, &sc->sc_lock, hz/2);
392 } 392 }
393 mutex_exit(&sc->sc_lock); 393 mutex_exit(&sc->sc_lock);
394 394
395 /* Trigger state transition with backend */ 395 /* Trigger state transition with backend */
396 xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateClosing); 396 xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateClosing);
397 397
398 mutex_enter(&sc->sc_lock); 398 mutex_enter(&sc->sc_lock);
399 } 399 }
400 if ((flags & DETACH_FORCE) == 0) { 400 if ((flags & DETACH_FORCE) == 0) {
401 /* xbd_xenbus_detach already in progress */ 401 /* xbd_xenbus_detach already in progress */
402 cv_broadcast(&sc->sc_detach_cv); 402 cv_broadcast(&sc->sc_detach_cv);
403 mutex_exit(&sc->sc_lock); 403 mutex_exit(&sc->sc_lock);
404 return EALREADY; 404 return EALREADY;
405 } 405 }
406 mutex_exit(&sc->sc_lock); 406 mutex_exit(&sc->sc_lock);
407 while (xenbus_read_driver_state(sc->sc_xbusd->xbusd_otherend) 407 while (xenbus_read_driver_state(sc->sc_xbusd->xbusd_otherend)
408 != XenbusStateClosed) { 408 != XenbusStateClosed) {
409 mutex_enter(&sc->sc_lock); 409 mutex_enter(&sc->sc_lock);
410 cv_timedwait(&sc->sc_detach_cv, &sc->sc_lock, hz/2); 410 cv_timedwait(&sc->sc_detach_cv, &sc->sc_lock, hz/2);
411 mutex_exit(&sc->sc_lock); 411 mutex_exit(&sc->sc_lock);
412 } 412 }
413 413
414 /* locate the major number */ 414 /* locate the major number */
415 bmaj = bdevsw_lookup_major(&xbd_bdevsw); 415 bmaj = bdevsw_lookup_major(&xbd_bdevsw);
416 cmaj = cdevsw_lookup_major(&xbd_cdevsw); 416 cmaj = cdevsw_lookup_major(&xbd_cdevsw);
417 417
418 /* Nuke the vnodes for any open instances. */ 418 /* Nuke the vnodes for any open instances. */
419 for (i = 0; i < MAXPARTITIONS; i++) { 419 for (i = 0; i < MAXPARTITIONS; i++) {
420 mn = DISKMINOR(device_unit(dev), i); 420 mn = DISKMINOR(device_unit(dev), i);
421 vdevgone(bmaj, mn, mn, VBLK); 421 vdevgone(bmaj, mn, mn, VBLK);
422 vdevgone(cmaj, mn, mn, VCHR); 422 vdevgone(cmaj, mn, mn, VCHR);
423 } 423 }
424 424
425 if (sc->sc_backend_status == BLKIF_STATE_CONNECTED) { 425 if (sc->sc_backend_status == BLKIF_STATE_CONNECTED) {
426 /* Delete all of our wedges. */ 426 /* Delete all of our wedges. */
427 dkwedge_delall(&sc->sc_dksc.sc_dkdev); 427 dkwedge_delall(&sc->sc_dksc.sc_dkdev);
428 428
429 /* Kill off any queued buffers. */ 429 /* Kill off any queued buffers. */
430 dk_drain(&sc->sc_dksc); 430 dk_drain(&sc->sc_dksc);
431 bufq_free(sc->sc_dksc.sc_bufq); 431 bufq_free(sc->sc_dksc.sc_bufq);
432 432
433 /* detach disk */ 433 /* detach disk */
434 disk_detach(&sc->sc_dksc.sc_dkdev); 434 disk_detach(&sc->sc_dksc.sc_dkdev);
435 disk_destroy(&sc->sc_dksc.sc_dkdev); 435 disk_destroy(&sc->sc_dksc.sc_dkdev);
436 dk_detach(&sc->sc_dksc); 436 dk_detach(&sc->sc_dksc);
437 } 437 }
438 438
439 hypervisor_mask_event(sc->sc_evtchn); 439 hypervisor_mask_event(sc->sc_evtchn);
440 if (sc->sc_ih != NULL) { 440 if (sc->sc_ih != NULL) {
441 xen_intr_disestablish(sc->sc_ih); 441 xen_intr_disestablish(sc->sc_ih);
442 sc->sc_ih = NULL; 442 sc->sc_ih = NULL;
443 } 443 }
444 444
445 mutex_enter(&sc->sc_lock); 445 mutex_enter(&sc->sc_lock);
446 while (xengnt_status(sc->sc_ring_gntref)) 446 while (xengnt_status(sc->sc_ring_gntref))
447 cv_timedwait(&sc->sc_detach_cv, &sc->sc_lock, hz/2); 447 cv_timedwait(&sc->sc_detach_cv, &sc->sc_lock, hz/2);
448 mutex_exit(&sc->sc_lock); 448 mutex_exit(&sc->sc_lock);
449 449
450 xengnt_revoke_access(sc->sc_ring_gntref); 450 xengnt_revoke_access(sc->sc_ring_gntref);
451 uvm_km_free(kernel_map, (vaddr_t)sc->sc_ring.sring, 451 uvm_km_free(kernel_map, (vaddr_t)sc->sc_ring.sring,
452 PAGE_SIZE, UVM_KMF_WIRED); 452 PAGE_SIZE, UVM_KMF_WIRED);
453 453
454 for (i = 0; i < XBD_RING_SIZE; i++) { 454 for (i = 0; i < XBD_RING_SIZE; i++) {
455 if (sc->sc_reqs[i].req_dmamap != NULL) { 455 if (sc->sc_reqs[i].req_dmamap != NULL) {
456 bus_dmamap_destroy(sc->sc_xbusd->xbusd_dmat, 456 bus_dmamap_destroy(sc->sc_xbusd->xbusd_dmat,
457 sc->sc_reqs[i].req_dmamap); 457 sc->sc_reqs[i].req_dmamap);
458 sc->sc_reqs[i].req_dmamap = NULL; 458 sc->sc_reqs[i].req_dmamap = NULL;
459 } 459 }
460 } 460 }
461 461
462 if (sc->sc_unalign_buffer != 0) { 462 if (sc->sc_unalign_buffer != 0) {
463 uvm_km_kmem_free(kmem_va_arena, sc->sc_unalign_buffer, MAXPHYS); 463 uvm_km_kmem_free(kmem_va_arena, sc->sc_unalign_buffer, MAXPHYS);
464 sc->sc_unalign_buffer = 0; 464 sc->sc_unalign_buffer = 0;
465 } 465 }
466 466
467 mutex_destroy(&sc->sc_lock); 467 mutex_destroy(&sc->sc_lock);
468 468
469 evcnt_detach(&sc->sc_cnt_map_unalign); 469 evcnt_detach(&sc->sc_cnt_map_unalign);
470 evcnt_detach(&sc->sc_cnt_unalign_busy); 470 evcnt_detach(&sc->sc_cnt_unalign_busy);
471 evcnt_detach(&sc->sc_cnt_queue_full); 471 evcnt_detach(&sc->sc_cnt_queue_full);
472 evcnt_detach(&sc->sc_cnt_indirect); 472 evcnt_detach(&sc->sc_cnt_indirect);
473 473
474 pmf_device_deregister(dev); 474 pmf_device_deregister(dev);
475 475
476 return 0; 476 return 0;
477} 477}
478 478
479static bool 479static bool
480xbd_xenbus_suspend(device_t dev, const pmf_qual_t *qual) { 480xbd_xenbus_suspend(device_t dev, const pmf_qual_t *qual) {
481 481
482 struct xbd_xenbus_softc *sc; 482 struct xbd_xenbus_softc *sc;
483 483
484 sc = device_private(dev); 484 sc = device_private(dev);
485 485
486 mutex_enter(&sc->sc_lock); 486 mutex_enter(&sc->sc_lock);
487 /* wait for requests to complete, then suspend device */ 487 /* wait for requests to complete, then suspend device */
488 while (sc->sc_backend_status == BLKIF_STATE_CONNECTED && 488 while (sc->sc_backend_status == BLKIF_STATE_CONNECTED &&
489 disk_isbusy(&sc->sc_dksc.sc_dkdev)) { 489 disk_isbusy(&sc->sc_dksc.sc_dkdev)) {
490 cv_timedwait(&sc->sc_suspend_cv, &sc->sc_lock, hz/2); 490 cv_timedwait(&sc->sc_suspend_cv, &sc->sc_lock, hz/2);
491 } 491 }
492 492
493 hypervisor_mask_event(sc->sc_evtchn); 493 hypervisor_mask_event(sc->sc_evtchn);
494 sc->sc_backend_status = BLKIF_STATE_SUSPENDED; 494 sc->sc_backend_status = BLKIF_STATE_SUSPENDED;
495 495
496#ifdef DIAGNOSTIC 496#ifdef DIAGNOSTIC
497 /* Check that all requests are finished and device ready for resume */ 497 /* Check that all requests are finished and device ready for resume */
498 int reqcnt = 0; 498 int reqcnt = 0;
499 struct xbd_req *req; 499 struct xbd_req *req;
500 SLIST_FOREACH(req, &sc->sc_xbdreq_head, req_next) 500 SLIST_FOREACH(req, &sc->sc_xbdreq_head, req_next)
501 reqcnt++; 501 reqcnt++;
502 KASSERT(reqcnt == __arraycount(sc->sc_reqs)); 502 KASSERT(reqcnt == __arraycount(sc->sc_reqs));
503 503
504 int incnt = 0; 504 int incnt = 0;
505 struct xbd_indirect *in; 505 struct xbd_indirect *in;
506 SLIST_FOREACH(in, &sc->sc_indirect_head, in_next) 506 SLIST_FOREACH(in, &sc->sc_indirect_head, in_next)
507 incnt++; 507 incnt++;
508 KASSERT(incnt == __arraycount(sc->sc_indirect)); 508 KASSERT(incnt == __arraycount(sc->sc_indirect));
509#endif 509#endif
510 510
511 mutex_exit(&sc->sc_lock); 511 mutex_exit(&sc->sc_lock);
512 512
513 xenbus_device_suspend(sc->sc_xbusd); 513 xenbus_device_suspend(sc->sc_xbusd);
514 aprint_verbose_dev(dev, "removed event channel %d\n", sc->sc_evtchn); 514 aprint_verbose_dev(dev, "removed event channel %d\n", sc->sc_evtchn);
515 515
516 return true; 516 return true;
517} 517}
518 518
519static bool 519static bool
520xbd_xenbus_resume(device_t dev, const pmf_qual_t *qual) 520xbd_xenbus_resume(device_t dev, const pmf_qual_t *qual)
521{ 521{
522 struct xbd_xenbus_softc *sc; 522 struct xbd_xenbus_softc *sc;
523 struct xenbus_transaction *xbt; 523 struct xenbus_transaction *xbt;
524 int error; 524 int error;
525 blkif_sring_t *ring; 525 blkif_sring_t *ring;
526 paddr_t ma; 526 paddr_t ma;
527 const char *errmsg; 527 const char *errmsg;
528 528
529 sc = device_private(dev); 529 sc = device_private(dev);
530 530
531 /* All grants were removed during suspend */ 531 /* All grants were removed during suspend */
532 sc->sc_ring_gntref = GRANT_INVALID_REF; 532 sc->sc_ring_gntref = GRANT_INVALID_REF;
533 533
534 /* Initialize ring */ 534 /* Initialize ring */
535 ring = sc->sc_ring.sring; 535 ring = sc->sc_ring.sring;
536 memset(ring, 0, PAGE_SIZE); 536 memset(ring, 0, PAGE_SIZE);
537 SHARED_RING_INIT(ring); 537 SHARED_RING_INIT(ring);
538 FRONT_RING_INIT(&sc->sc_ring, ring, PAGE_SIZE); 538 FRONT_RING_INIT(&sc->sc_ring, ring, PAGE_SIZE);
539 539
540 /* 540 /*
541 * get MA address of the ring, and use it to set up the grant entry 541 * get MA address of the ring, and use it to set up the grant entry
542 * for the block device 542 * for the block device
543 */ 543 */
544 (void)pmap_extract_ma(pmap_kernel(), (vaddr_t)ring, &ma); 544 (void)pmap_extract_ma(pmap_kernel(), (vaddr_t)ring, &ma);
545 error = xenbus_grant_ring(sc->sc_xbusd, ma, &sc->sc_ring_gntref); 545 error = xenbus_grant_ring(sc->sc_xbusd, ma, &sc->sc_ring_gntref);
546 if (error) 546 if (error)
547 goto abort_resume; 547 goto abort_resume;
548 548
549 if (sc->sc_features & BLKIF_FEATURE_INDIRECT) { 549 if (sc->sc_features & BLKIF_FEATURE_INDIRECT) {
550 for (int i = 0; i < XBD_RING_SIZE; i++) { 550 for (int i = 0; i < XBD_RING_SIZE; i++) {
551 vaddr_t va = (vaddr_t)sc->sc_indirect[i].in_addr; 551 vaddr_t va = (vaddr_t)sc->sc_indirect[i].in_addr;
552 KASSERT(va != 0); 552 KASSERT(va != 0);
553 KASSERT((va & PAGE_MASK) == 0); 553 KASSERT((va & PAGE_MASK) == 0);
554 (void)pmap_extract_ma(pmap_kernel(), va, &ma); 554 (void)pmap_extract_ma(pmap_kernel(), va, &ma);
555 if (xengnt_grant_access( 555 if (xengnt_grant_access(
556 sc->sc_xbusd->xbusd_otherend_id, 556 sc->sc_xbusd->xbusd_otherend_id,
557 ma, true, &sc->sc_indirect[i].in_gntref)) { 557 ma, true, &sc->sc_indirect[i].in_gntref)) {
558 aprint_error_dev(dev, 558 aprint_error_dev(dev,
559 "indirect page grant failed\n"); 559 "indirect page grant failed\n");
560 goto abort_resume; 560 goto abort_resume;
561 } 561 }
562 } 562 }
563 } 563 }
564 564
565 error = xenbus_alloc_evtchn(sc->sc_xbusd, &sc->sc_evtchn); 565 error = xenbus_alloc_evtchn(sc->sc_xbusd, &sc->sc_evtchn);
566 if (error) 566 if (error)
567 goto abort_resume; 567 goto abort_resume;
568 568
569 if (sc->sc_ih != NULL) { 569 if (sc->sc_ih != NULL) {
570 xen_intr_disestablish(sc->sc_ih); 570 xen_intr_disestablish(sc->sc_ih);
571 sc->sc_ih = NULL; 571 sc->sc_ih = NULL;
572 } 572 }
573 aprint_verbose_dev(dev, "using event channel %d\n", 573 aprint_verbose_dev(dev, "using event channel %d\n",
574 sc->sc_evtchn); 574 sc->sc_evtchn);
575 sc->sc_ih = xen_intr_establish_xname(-1, &xen_pic, sc->sc_evtchn, 575 sc->sc_ih = xen_intr_establish_xname(-1, &xen_pic, sc->sc_evtchn,
576 IST_LEVEL, IPL_BIO, &xbd_handler, sc, true, device_xname(dev)); 576 IST_LEVEL, IPL_BIO, &xbd_handler, sc, true, device_xname(dev));
577 KASSERT(sc->sc_ih != NULL); 577 KASSERT(sc->sc_ih != NULL);
578 578
579again: 579again:
580 xbt = xenbus_transaction_start(); 580 xbt = xenbus_transaction_start();
581 if (xbt == NULL) 581 if (xbt == NULL)
582 return false; 582 return false;
583 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path, 583 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
584 "ring-ref","%u", sc->sc_ring_gntref); 584 "ring-ref","%u", sc->sc_ring_gntref);
585 if (error) { 585 if (error) {
586 errmsg = "writing ring-ref"; 586 errmsg = "writing ring-ref";
587 goto abort_transaction; 587 goto abort_transaction;
588 } 588 }
589 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path, 589 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
590 "event-channel", "%u", sc->sc_evtchn); 590 "event-channel", "%u", sc->sc_evtchn);
591 if (error) { 591 if (error) {
592 errmsg = "writing event channel"; 592 errmsg = "writing event channel";
593 goto abort_transaction; 593 goto abort_transaction;
594 } 594 }
595 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path, 595 error = xenbus_printf(xbt, sc->sc_xbusd->xbusd_path,
596 "protocol", "%s", XEN_IO_PROTO_ABI_NATIVE); 596 "protocol", "%s", XEN_IO_PROTO_ABI_NATIVE);
597 if (error) { 597 if (error) {
598 errmsg = "writing protocol"; 598 errmsg = "writing protocol";
599 goto abort_transaction; 599 goto abort_transaction;
600 } 600 }
601 error = xenbus_transaction_end(xbt, 0); 601 error = xenbus_transaction_end(xbt, 0);
602 if (error == EAGAIN) 602 if (error == EAGAIN)
603 goto again; 603 goto again;
604 if (error != 0) { 604 if (error != 0) {
605 xenbus_dev_fatal(sc->sc_xbusd, error, 605 xenbus_dev_fatal(sc->sc_xbusd, error,
606 "completing transaction"); 606 "completing transaction");
607 return false; 607 return false;
608 } 608 }
609 609
610 xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateInitialised); 610 xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateInitialised);
611 611
612 if (sc->sc_backend_status == BLKIF_STATE_SUSPENDED) { 612 if (sc->sc_backend_status == BLKIF_STATE_SUSPENDED) {
613 /* 613 /*
614 * device was suspended, softc structures are 614 * device was suspended, softc structures are
615 * already initialized - we use a shortcut 615 * already initialized - we use a shortcut
616 */ 616 */
617 sc->sc_backend_status = BLKIF_STATE_CONNECTED; 617 sc->sc_backend_status = BLKIF_STATE_CONNECTED;
618 xenbus_device_resume(sc->sc_xbusd); 618 xenbus_device_resume(sc->sc_xbusd);
619 hypervisor_unmask_event(sc->sc_evtchn); 619 hypervisor_unmask_event(sc->sc_evtchn);
620 xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateConnected); 620 xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateConnected);
621 } 621 }
622 622
623 return true; 623 return true;
624 624
625abort_resume: 625abort_resume:
626 xenbus_dev_fatal(sc->sc_xbusd, error, "resuming device"); 626 xenbus_dev_fatal(sc->sc_xbusd, error, "resuming device");
627 return false; 627 return false;
628 628
629abort_transaction: 629abort_transaction:
630 xenbus_transaction_end(xbt, 1); 630 xenbus_transaction_end(xbt, 1);
631 xenbus_dev_fatal(sc->sc_xbusd, error, "%s", errmsg); 631 xenbus_dev_fatal(sc->sc_xbusd, error, "%s", errmsg);
632 return false; 632 return false;
633} 633}
634 634
635static void 635static void
636xbd_backend_changed(void *arg, XenbusState new_state) 636xbd_backend_changed(void *arg, XenbusState new_state)
637{ 637{
638 struct xbd_xenbus_softc *sc = device_private((device_t)arg); 638 struct xbd_xenbus_softc *sc = device_private((device_t)arg);
639 struct disk_geom *dg; 639 struct disk_geom *dg;
640 640
641 char buf[64]; 641 char buf[64];
642 DPRINTF(("%s: new backend state %d\n", 642 DPRINTF(("%s: new backend state %d\n",
643 device_xname(sc->sc_dksc.sc_dev), new_state)); 643 device_xname(sc->sc_dksc.sc_dev), new_state));
644 644
645 switch (new_state) { 645 switch (new_state) {
646 case XenbusStateUnknown: 646 case XenbusStateUnknown:
647 case XenbusStateInitialising: 647 case XenbusStateInitialising:
648 case XenbusStateInitWait: 648 case XenbusStateInitWait:
649 case XenbusStateInitialised: 649 case XenbusStateInitialised:
650 break; 650 break;
651 case XenbusStateClosing: 651 case XenbusStateClosing:
652 mutex_enter(&sc->sc_lock); 652 mutex_enter(&sc->sc_lock);
653 if (sc->sc_shutdown == BLKIF_SHUTDOWN_RUN) 653 if (sc->sc_shutdown == BLKIF_SHUTDOWN_RUN)
654 sc->sc_shutdown = BLKIF_SHUTDOWN_REMOTE; 654 sc->sc_shutdown = BLKIF_SHUTDOWN_REMOTE;
655 /* wait for requests to complete */ 655 /* wait for requests to complete */
656 while (sc->sc_backend_status == BLKIF_STATE_CONNECTED && 656 while (sc->sc_backend_status == BLKIF_STATE_CONNECTED &&
657 disk_isbusy(&sc->sc_dksc.sc_dkdev)) { 657 disk_isbusy(&sc->sc_dksc.sc_dkdev)) {
658 cv_timedwait(&sc->sc_detach_cv, &sc->sc_lock, hz/2); 658 cv_timedwait(&sc->sc_detach_cv, &sc->sc_lock, hz/2);
659 } 659 }
660 mutex_exit(&sc->sc_lock); 660 mutex_exit(&sc->sc_lock);
661 xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateClosed); 661 xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateClosed);
662 break; 662 break;
663 case XenbusStateConnected: 663 case XenbusStateConnected:
664 /* 664 /*
665 * note that xbd_backend_changed() can only be called by 665 * note that xbd_backend_changed() can only be called by
666 * the xenbus thread. 666 * the xenbus thread.
667 */ 667 */
668 668
669 if (sc->sc_backend_status == BLKIF_STATE_CONNECTED || 669 if (sc->sc_backend_status == BLKIF_STATE_CONNECTED ||
670 sc->sc_backend_status == BLKIF_STATE_SUSPENDED) 670 sc->sc_backend_status == BLKIF_STATE_SUSPENDED)
671 /* already connected */ 671 /* already connected */
672 return; 672 return;
673 673
674 xbd_connect(sc); 674 xbd_connect(sc);
675 sc->sc_shutdown = BLKIF_SHUTDOWN_RUN; 675 sc->sc_shutdown = BLKIF_SHUTDOWN_RUN;
676 sc->sc_xbdsize = 676 sc->sc_xbdsize =
677 sc->sc_sectors * (uint64_t)sc->sc_secsize / DEV_BSIZE; 677 sc->sc_sectors * (uint64_t)sc->sc_secsize / DEV_BSIZE;
678 dg = &sc->sc_dksc.sc_dkdev.dk_geom; 678 dg = &sc->sc_dksc.sc_dkdev.dk_geom;
679 memset(dg, 0, sizeof(*dg));  679 memset(dg, 0, sizeof(*dg));
680 680
681 dg->dg_secperunit = sc->sc_xbdsize; 681 dg->dg_secperunit = sc->sc_sectors;
682 dg->dg_secsize = DEV_BSIZE; 682 dg->dg_secsize = sc->sc_secsize;
683 dg->dg_ntracks = 1; 683 dg->dg_ntracks = 1;
684 // XXX: Ok to hard-code DEV_BSIZE? 684 dg->dg_nsectors = (1024 * 1024) / dg->dg_secsize;
685 dg->dg_nsectors = 1024 * (1024 / dg->dg_secsize); 
686 dg->dg_ncylinders = dg->dg_secperunit / dg->dg_nsectors; 685 dg->dg_ncylinders = dg->dg_secperunit / dg->dg_nsectors;
687 686
688 bufq_alloc(&sc->sc_dksc.sc_bufq, "fcfs", 0); 687 bufq_alloc(&sc->sc_dksc.sc_bufq, "fcfs", 0);
689 dk_attach(&sc->sc_dksc); 688 dk_attach(&sc->sc_dksc);
690 disk_attach(&sc->sc_dksc.sc_dkdev); 689 disk_attach(&sc->sc_dksc.sc_dkdev);
691 690
692 sc->sc_backend_status = BLKIF_STATE_CONNECTED; 691 sc->sc_backend_status = BLKIF_STATE_CONNECTED;
693 hypervisor_unmask_event(sc->sc_evtchn); 692 hypervisor_unmask_event(sc->sc_evtchn);
694 693
695 format_bytes(buf, uimin(9, sizeof(buf)), 694 format_bytes(buf, uimin(9, sizeof(buf)),
696 sc->sc_sectors * sc->sc_secsize); 695 sc->sc_sectors * dg->dg_secsize);
697 aprint_normal_dev(sc->sc_dksc.sc_dev, 696 aprint_normal_dev(sc->sc_dksc.sc_dev,
698 "%s, %d bytes/sect x %" PRIu64 " sectors\n", 697 "%s, %d bytes/sect x %" PRIu64 " sectors\n",
699 buf, (int)dg->dg_secsize, sc->sc_xbdsize); 698 buf, (int)dg->dg_secsize, sc->sc_sectors);
700 snprintb(buf, sizeof(buf), BLKIF_FEATURE_BITS, 699 snprintb(buf, sizeof(buf), BLKIF_FEATURE_BITS,
701 sc->sc_features); 700 sc->sc_features);
702 aprint_normal_dev(sc->sc_dksc.sc_dev, 701 aprint_normal_dev(sc->sc_dksc.sc_dev,
703 "backend features %s\n", buf); 702 "backend features %s\n", buf);
704 703
705 /* Discover wedges on this disk. */ 704 /* Discover wedges on this disk. */
706 dkwedge_discover(&sc->sc_dksc.sc_dkdev); 705 dkwedge_discover(&sc->sc_dksc.sc_dkdev);
707 706
708 disk_set_info(sc->sc_dksc.sc_dev, &sc->sc_dksc.sc_dkdev, NULL); 707 disk_set_info(sc->sc_dksc.sc_dev, &sc->sc_dksc.sc_dkdev, NULL);
709 708
710 /* the disk should be working now */ 709 /* the disk should be working now */
711 config_pending_decr(sc->sc_dksc.sc_dev); 710 config_pending_decr(sc->sc_dksc.sc_dev);
712 break; 711 break;
713 default: 712 default:
714 panic("bad backend state %d", new_state); 713 panic("bad backend state %d", new_state);
715 } 714 }
716} 715}
717 716
718static void 717static void
719xbd_connect(struct xbd_xenbus_softc *sc) 718xbd_connect(struct xbd_xenbus_softc *sc)
720{ 719{
721 int err; 720 int err;
722 unsigned long long sectors; 721 unsigned long long sectors;
723 u_long val; 722 u_long val;
724 723
725 /* 724 /*
726 * Must read feature-persistent later, e.g. Linux Dom0 writes 725 * Must read feature-persistent later, e.g. Linux Dom0 writes
727 * this together with the device info. 726 * this together with the device info.
728 */ 727 */
729 err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend, 728 err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend,
730 "feature-persistent", &val, 10); 729 "feature-persistent", &val, 10);
731 if (err) 730 if (err)
732 val = 0; 731 val = 0;
733 if (val > 0) 732 if (val > 0)
734 sc->sc_features |= BLKIF_FEATURE_PERSISTENT; 733 sc->sc_features |= BLKIF_FEATURE_PERSISTENT;
735 734
736 err = xenbus_read_ul(NULL, 735 err = xenbus_read_ul(NULL,
737 sc->sc_xbusd->xbusd_path, "virtual-device", &sc->sc_handle, 10); 736 sc->sc_xbusd->xbusd_path, "virtual-device", &sc->sc_handle, 10);
738 if (err) 737 if (err)
739 panic("%s: can't read number from %s/virtual-device\n",  738 panic("%s: can't read number from %s/virtual-device\n",
740 device_xname(sc->sc_dksc.sc_dev), 739 device_xname(sc->sc_dksc.sc_dev),
741 sc->sc_xbusd->xbusd_otherend); 740 sc->sc_xbusd->xbusd_otherend);
742 err = xenbus_read_ull(NULL, 
743 sc->sc_xbusd->xbusd_otherend, "sectors", &sectors, 10); 
744 if (err) 
745 panic("%s: can't read number from %s/sectors\n",  
746 device_xname(sc->sc_dksc.sc_dev), 
747 sc->sc_xbusd->xbusd_otherend); 
748 sc->sc_sectors = sectors; 
749 
750 err = xenbus_read_ul(NULL, 741 err = xenbus_read_ul(NULL,
751 sc->sc_xbusd->xbusd_otherend, "info", &sc->sc_info, 10); 742 sc->sc_xbusd->xbusd_otherend, "info", &sc->sc_info, 10);
752 if (err) 743 if (err)
753 panic("%s: can't read number from %s/info\n",  744 panic("%s: can't read number from %s/info\n",
754 device_xname(sc->sc_dksc.sc_dev), 745 device_xname(sc->sc_dksc.sc_dev),
755 sc->sc_xbusd->xbusd_otherend); 746 sc->sc_xbusd->xbusd_otherend);
756 err = xenbus_read_ul(NULL, 747 err = xenbus_read_ul(NULL,
757 sc->sc_xbusd->xbusd_otherend, "sector-size", &sc->sc_secsize, 10); 748 sc->sc_xbusd->xbusd_otherend, "sector-size", &sc->sc_secsize, 10);
758 if (err) 749 if (err)
759 panic("%s: can't read number from %s/sector-size\n",  750 panic("%s: can't read number from %s/sector-size\n",
760 device_xname(sc->sc_dksc.sc_dev), 751 device_xname(sc->sc_dksc.sc_dev),
761 sc->sc_xbusd->xbusd_otherend); 752 sc->sc_xbusd->xbusd_otherend);
 753 err = xenbus_read_ull(NULL,
 754 sc->sc_xbusd->xbusd_otherend, "sectors", &sectors, 10);
 755 if (err)
 756 panic("%s: can't read number from %s/sectors\n",
 757 device_xname(sc->sc_dksc.sc_dev),
 758 sc->sc_xbusd->xbusd_otherend);
 759 sc->sc_sectors = sectors * (uint64_t)XEN_BSIZE / sc->sc_secsize;
762 760
763 xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateConnected); 761 xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateConnected);
764} 762}
765 763
766static void 764static void
767xbd_features(struct xbd_xenbus_softc *sc) 765xbd_features(struct xbd_xenbus_softc *sc)
768{ 766{
769 int err; 767 int err;
770 u_long val; 768 u_long val;
771 769
772 err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend, 770 err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend,
773 "feature-flush-cache", &val, 10); 771 "feature-flush-cache", &val, 10);
774 if (err) 772 if (err)
775 val = 0; 773 val = 0;
776 if (val > 0) 774 if (val > 0)
777 sc->sc_features |= BLKIF_FEATURE_CACHE_FLUSH; 775 sc->sc_features |= BLKIF_FEATURE_CACHE_FLUSH;
778 776
779 err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend, 777 err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend,
780 "feature-barrier", &val, 10); 778 "feature-barrier", &val, 10);
781 if (err) 779 if (err)
782 val = 0; 780 val = 0;
783 if (val > 0) 781 if (val > 0)
784 sc->sc_features |= BLKIF_FEATURE_BARRIER; 782 sc->sc_features |= BLKIF_FEATURE_BARRIER;
785 783
786 err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend, 784 err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend,
787 "feature-max-indirect-segments", &val, 10); 785 "feature-max-indirect-segments", &val, 10);
788 if (err) 786 if (err)
789 val = 0; 787 val = 0;
790 if (val >= (MAXPHYS >> PAGE_SHIFT) + 1) { 788 if (val >= (MAXPHYS >> PAGE_SHIFT) + 1) {
791 /* We can use indirect segments, the limit is big enough */ 789 /* We can use indirect segments, the limit is big enough */
792 sc->sc_features |= BLKIF_FEATURE_INDIRECT; 790 sc->sc_features |= BLKIF_FEATURE_INDIRECT;
793 } 791 }
794} 792}
795 793
796static int 794static int
797xbd_handler(void *arg) 795xbd_handler(void *arg)
798{ 796{
799 struct xbd_xenbus_softc *sc = arg; 797 struct xbd_xenbus_softc *sc = arg;
800 struct buf *bp; 798 struct buf *bp;
801 RING_IDX resp_prod, i; 799 RING_IDX resp_prod, i;
802 int more_to_do; 800 int more_to_do;
803 int seg; 801 int seg;
804 grant_ref_t gntref; 802 grant_ref_t gntref;
805 803
806 DPRINTF(("xbd_handler(%s)\n", device_xname(sc->sc_dksc.sc_dev))); 804 DPRINTF(("xbd_handler(%s)\n", device_xname(sc->sc_dksc.sc_dev)));
807 805
808 if (__predict_false(sc->sc_backend_status != BLKIF_STATE_CONNECTED)) 806 if (__predict_false(sc->sc_backend_status != BLKIF_STATE_CONNECTED))
809 return 0; 807 return 0;
810 808
811 mutex_enter(&sc->sc_lock); 809 mutex_enter(&sc->sc_lock);
812again: 810again:
813 resp_prod = sc->sc_ring.sring->rsp_prod; 811 resp_prod = sc->sc_ring.sring->rsp_prod;
814 xen_rmb(); /* ensure we see replies up to resp_prod */ 812 xen_rmb(); /* ensure we see replies up to resp_prod */
815 for (i = sc->sc_ring.rsp_cons; i != resp_prod; i++) { 813 for (i = sc->sc_ring.rsp_cons; i != resp_prod; i++) {
816 blkif_response_t *rep = RING_GET_RESPONSE(&sc->sc_ring, i); 814 blkif_response_t *rep = RING_GET_RESPONSE(&sc->sc_ring, i);
817 struct xbd_req *xbdreq = &sc->sc_reqs[rep->id]; 815 struct xbd_req *xbdreq = &sc->sc_reqs[rep->id];
818 816
819 if (rep->operation == BLKIF_OP_FLUSH_DISKCACHE) { 817 if (rep->operation == BLKIF_OP_FLUSH_DISKCACHE) {
820 KASSERT(xbdreq->req_bp == NULL); 818 KASSERT(xbdreq->req_bp == NULL);
821 xbdreq->req_sync.s_error = rep->status; 819 xbdreq->req_sync.s_error = rep->status;
822 xbdreq->req_sync.s_done = 1; 820 xbdreq->req_sync.s_done = 1;
823 cv_broadcast(&sc->sc_cache_flush_cv); 821 cv_broadcast(&sc->sc_cache_flush_cv);
824 /* caller will free the req */ 822 /* caller will free the req */
825 continue; 823 continue;
826 } 824 }
827 825
828 if (rep->operation != BLKIF_OP_READ && 826 if (rep->operation != BLKIF_OP_READ &&
829 rep->operation != BLKIF_OP_WRITE) { 827 rep->operation != BLKIF_OP_WRITE) {
830 aprint_error_dev(sc->sc_dksc.sc_dev, 828 aprint_error_dev(sc->sc_dksc.sc_dev,
831 "bad operation %d from backend\n", rep->operation); 829 "bad operation %d from backend\n", rep->operation);
832 continue; 830 continue;
833 } 831 }
834 832
835 bp = xbdreq->req_bp; 833 bp = xbdreq->req_bp;
836 xbdreq->req_bp = NULL; 834 xbdreq->req_bp = NULL;
837 KASSERT(bp != NULL && bp->b_data != NULL); 835 KASSERT(bp != NULL && bp->b_data != NULL);
838 DPRINTF(("%s(%p): b_bcount = %ld\n", __func__, 836 DPRINTF(("%s(%p): b_bcount = %ld\n", __func__,
839 bp, (long)bp->b_bcount)); 837 bp, (long)bp->b_bcount));
840 838
841 if (bp->b_error != 0 || rep->status != BLKIF_RSP_OKAY) { 839 if (bp->b_error != 0 || rep->status != BLKIF_RSP_OKAY) {
 840 DPRINTF(("%s: error %d status %d\n", __func__,
 841 bp->b_error, rep->status));
842 bp->b_error = EIO; 842 bp->b_error = EIO;
843 bp->b_resid = bp->b_bcount; 843 bp->b_resid = bp->b_bcount;
844 } 844 }
845 845
846 if (xbdreq->req_parent) { 846 if (xbdreq->req_parent) {
847 struct xbd_req *req_parent = xbdreq->req_parent; 847 struct xbd_req *req_parent = xbdreq->req_parent;
848 848
849 /* Unhook and recycle child */ 849 /* Unhook and recycle child */
850 xbdreq->req_parent = NULL; 850 xbdreq->req_parent = NULL;
851 req_parent->req_child = NULL; 851 req_parent->req_child = NULL;
852 SLIST_INSERT_HEAD(&sc->sc_xbdreq_head, xbdreq, 852 SLIST_INSERT_HEAD(&sc->sc_xbdreq_head, xbdreq,
853 req_next); 853 req_next);
854 854
855 if (!req_parent->req_parent_done) { 855 if (!req_parent->req_parent_done) {
856 /* Finished before parent, nothig else to do */ 856 /* Finished before parent, nothig else to do */
857 continue; 857 continue;
858 } 858 }
859 859
860 /* Must do the cleanup now */ 860 /* Must do the cleanup now */
861 xbdreq = req_parent; 861 xbdreq = req_parent;
862 } 862 }
863 if (xbdreq->req_child) { 863 if (xbdreq->req_child) {
864 /* Finished before child, child will cleanup */ 864 /* Finished before child, child will cleanup */
865 xbdreq->req_parent_done = true; 865 xbdreq->req_parent_done = true;
866 continue; 866 continue;
867 } 867 }
868 868
869 if (bp->b_error == 0) 869 if (bp->b_error == 0)
870 bp->b_resid = 0; 870 bp->b_resid = 0;
871 871
872 KASSERT(xbdreq->req_dmamap->dm_nsegs > 0); 872 KASSERT(xbdreq->req_dmamap->dm_nsegs > 0);
873 for (seg = 0; seg < xbdreq->req_dmamap->dm_nsegs; seg++) { 873 for (seg = 0; seg < xbdreq->req_dmamap->dm_nsegs; seg++) {
874 /* 874 /*
875 * We are not allowing persistent mappings, so 875 * We are not allowing persistent mappings, so
876 * expect the backend to release the grant 876 * expect the backend to release the grant
877 * immediately. 877 * immediately.
878 */ 878 */
879 if (xbdreq->req_indirect) { 879 if (xbdreq->req_indirect) {
880 gntref = 880 gntref =
881 xbdreq->req_indirect->in_addr[seg].gref; 881 xbdreq->req_indirect->in_addr[seg].gref;
882 } else 882 } else
883 gntref = xbdreq->req_gntref[seg]; 883 gntref = xbdreq->req_gntref[seg];
884 KASSERT(xengnt_status(gntref) == 0); 884 KASSERT(xengnt_status(gntref) == 0);
885 xengnt_revoke_access(gntref); 885 xengnt_revoke_access(gntref);
886 } 886 }
887 887
888 bus_dmamap_unload(sc->sc_xbusd->xbusd_dmat, xbdreq->req_dmamap); 888 bus_dmamap_unload(sc->sc_xbusd->xbusd_dmat, xbdreq->req_dmamap);
889 889
890 if (__predict_false(bp->b_data != xbdreq->req_data)) 890 if (__predict_false(bp->b_data != xbdreq->req_data))
891 xbd_unmap_align(sc, xbdreq, bp); 891 xbd_unmap_align(sc, xbdreq, bp);
892 xbdreq->req_data = NULL; 892 xbdreq->req_data = NULL;
893 893
894 dk_done(&sc->sc_dksc, bp); 894 dk_done(&sc->sc_dksc, bp);
895 895
896 if (xbdreq->req_indirect) { 896 if (xbdreq->req_indirect) {
897 /* No persistent mappings, so check that 897 /* No persistent mappings, so check that
898 * backend unmapped the indirect segment grant too. 898 * backend unmapped the indirect segment grant too.
899 */ 899 */
900 KASSERT(xengnt_status(xbdreq->req_indirect->in_gntref) 900 KASSERT(xengnt_status(xbdreq->req_indirect->in_gntref)
901 == 0); 901 == 0);
902 SLIST_INSERT_HEAD(&sc->sc_indirect_head, 902 SLIST_INSERT_HEAD(&sc->sc_indirect_head,
903 xbdreq->req_indirect, in_next); 903 xbdreq->req_indirect, in_next);
904 xbdreq->req_indirect = NULL; 904 xbdreq->req_indirect = NULL;
905 } 905 }
906 SLIST_INSERT_HEAD(&sc->sc_xbdreq_head, xbdreq, req_next); 906 SLIST_INSERT_HEAD(&sc->sc_xbdreq_head, xbdreq, req_next);
907 } 907 }
908 908
909 xen_rmb(); 909 xen_rmb();
910 sc->sc_ring.rsp_cons = i; 910 sc->sc_ring.rsp_cons = i;
911 911
912 RING_FINAL_CHECK_FOR_RESPONSES(&sc->sc_ring, more_to_do); 912 RING_FINAL_CHECK_FOR_RESPONSES(&sc->sc_ring, more_to_do);
913 if (more_to_do) 913 if (more_to_do)
914 goto again; 914 goto again;
915 915
916 cv_signal(&sc->sc_req_cv); 916 cv_signal(&sc->sc_req_cv);
917 mutex_exit(&sc->sc_lock); 917 mutex_exit(&sc->sc_lock);
918 918
919 dk_start(&sc->sc_dksc, NULL); 919 dk_start(&sc->sc_dksc, NULL);
920 920
921 return 1; 921 return 1;
922} 922}
923 923
924static void 924static void
925xbdminphys(struct buf *bp) 925xbdminphys(struct buf *bp)
926{ 926{
927 if (bp->b_bcount > XBD_XFER_LIMIT) { 927 if (bp->b_bcount > XBD_XFER_LIMIT) {
928 bp->b_bcount = XBD_XFER_LIMIT; 928 bp->b_bcount = XBD_XFER_LIMIT;
929 } 929 }
930 minphys(bp); 930 minphys(bp);
931} 931}
932 932
933static void 933static void
934xbd_iosize(device_t dev, int *maxxfer) 934xbd_iosize(device_t dev, int *maxxfer)
935{ 935{
936 /* 936 /*
937 * Always restrict dumps to XBD_MAX_XFER to avoid indirect segments, 937 * Always restrict dumps to XBD_MAX_XFER to avoid indirect segments,
938 * so that it uses as little memory as possible.  938 * so that it uses as little memory as possible.
939 */ 939 */
940 if (*maxxfer > XBD_MAX_XFER) 940 if (*maxxfer > XBD_MAX_XFER)
941 *maxxfer = XBD_MAX_XFER; 941 *maxxfer = XBD_MAX_XFER;
942} 942}
943 943
944static int 944static int
945xbdopen(dev_t dev, int flags, int fmt, struct lwp *l) 945xbdopen(dev_t dev, int flags, int fmt, struct lwp *l)
946{ 946{
947 struct xbd_xenbus_softc *sc; 947 struct xbd_xenbus_softc *sc;
948 948
949 sc = device_lookup_private(&xbd_cd, DISKUNIT(dev)); 949 sc = device_lookup_private(&xbd_cd, DISKUNIT(dev));
950 if (sc == NULL) 950 if (sc == NULL)
951 return (ENXIO); 951 return (ENXIO);
952 if ((flags & FWRITE) && (sc->sc_info & VDISK_READONLY)) 952 if ((flags & FWRITE) && (sc->sc_info & VDISK_READONLY))
953 return EROFS; 953 return EROFS;
954 954
955 DPRINTF(("xbdopen(%" PRIx64 ", %d)\n", dev, flags)); 955 DPRINTF(("xbdopen(%" PRIx64 ", %d)\n", dev, flags));
956 return dk_open(&sc->sc_dksc, dev, flags, fmt, l); 956 return dk_open(&sc->sc_dksc, dev, flags, fmt, l);
957} 957}
958 958
959static int 959static int
960xbdclose(dev_t dev, int flags, int fmt, struct lwp *l) 960xbdclose(dev_t dev, int flags, int fmt, struct lwp *l)
961{ 961{
962 struct xbd_xenbus_softc *sc; 962 struct xbd_xenbus_softc *sc;
963 963
964 sc = device_lookup_private(&xbd_cd, DISKUNIT(dev)); 964 sc = device_lookup_private(&xbd_cd, DISKUNIT(dev));
965 965
966 DPRINTF(("xbdclose(%" PRIx64 ", %d)\n", dev, flags)); 966 DPRINTF(("xbdclose(%" PRIx64 ", %d)\n", dev, flags));
967 return dk_close(&sc->sc_dksc, dev, flags, fmt, l); 967 return dk_close(&sc->sc_dksc, dev, flags, fmt, l);
968} 968}
969 969
970static void 970static void
971xbdstrategy(struct buf *bp) 971xbdstrategy(struct buf *bp)
972{ 972{
973 struct xbd_xenbus_softc *sc; 973 struct xbd_xenbus_softc *sc;
974 974
975 sc = device_lookup_private(&xbd_cd, DISKUNIT(bp->b_dev)); 975 sc = device_lookup_private(&xbd_cd, DISKUNIT(bp->b_dev));
976 976
977 DPRINTF(("xbdstrategy(%p): b_bcount = %ld\n", bp, 977 DPRINTF(("xbdstrategy(%p): b_bcount = %ld\n", bp,
978 (long)bp->b_bcount)); 978 (long)bp->b_bcount));
979 979
980 if (sc == NULL || sc->sc_shutdown != BLKIF_SHUTDOWN_RUN) { 980 if (sc == NULL || sc->sc_shutdown != BLKIF_SHUTDOWN_RUN) {
981 bp->b_error = EIO; 981 bp->b_error = EIO;
982 biodone(bp); 982 biodone(bp);
983 return; 983 return;
984 } 984 }
985 if (__predict_false((sc->sc_info & VDISK_READONLY) && 985 if (__predict_false((sc->sc_info & VDISK_READONLY) &&
986 (bp->b_flags & B_READ) == 0)) { 986 (bp->b_flags & B_READ) == 0)) {
987 bp->b_error = EROFS; 987 bp->b_error = EROFS;
988 biodone(bp); 988 biodone(bp);
989 return; 989 return;
990 } 990 }
991 991
992 dk_strategy(&sc->sc_dksc, bp); 992 dk_strategy(&sc->sc_dksc, bp);
993 return; 993 return;
994} 994}
995 995
996static int 996static int
997xbdsize(dev_t dev) 997xbdsize(dev_t dev)
998{ 998{
999 struct xbd_xenbus_softc *sc; 999 struct xbd_xenbus_softc *sc;
1000 1000
1001 DPRINTF(("xbdsize(%" PRIx64 ")\n", dev)); 1001 DPRINTF(("xbdsize(%" PRIx64 ")\n", dev));
1002 1002
1003 sc = device_lookup_private(&xbd_cd, DISKUNIT(dev)); 1003 sc = device_lookup_private(&xbd_cd, DISKUNIT(dev));
1004 if (sc == NULL || sc->sc_shutdown != BLKIF_SHUTDOWN_RUN) 1004 if (sc == NULL || sc->sc_shutdown != BLKIF_SHUTDOWN_RUN)
1005 return -1; 1005 return -1;
1006 return dk_size(&sc->sc_dksc, dev); 1006 return dk_size(&sc->sc_dksc, dev);
1007} 1007}
1008 1008
1009static int 1009static int
1010xbdread(dev_t dev, struct uio *uio, int flags) 1010xbdread(dev_t dev, struct uio *uio, int flags)
1011{ 1011{
1012 struct xbd_xenbus_softc *sc =  1012 struct xbd_xenbus_softc *sc =
1013 device_lookup_private(&xbd_cd, DISKUNIT(dev)); 1013 device_lookup_private(&xbd_cd, DISKUNIT(dev));
1014 struct dk_softc *dksc = &sc->sc_dksc; 1014 struct dk_softc *dksc = &sc->sc_dksc;
1015 1015
1016 if (!DK_ATTACHED(dksc)) 1016 if (!DK_ATTACHED(dksc))
1017 return ENXIO; 1017 return ENXIO;
1018 return physio(xbdstrategy, NULL, dev, B_READ, xbdminphys, uio); 1018 return physio(xbdstrategy, NULL, dev, B_READ, xbdminphys, uio);
1019} 1019}
1020 1020
1021static int 1021static int
1022xbdwrite(dev_t dev, struct uio *uio, int flags) 1022xbdwrite(dev_t dev, struct uio *uio, int flags)
1023{ 1023{
1024 struct xbd_xenbus_softc *sc = 1024 struct xbd_xenbus_softc *sc =
1025 device_lookup_private(&xbd_cd, DISKUNIT(dev)); 1025 device_lookup_private(&xbd_cd, DISKUNIT(dev));
1026 struct dk_softc *dksc = &sc->sc_dksc; 1026 struct dk_softc *dksc = &sc->sc_dksc;
1027 1027
1028 if (!DK_ATTACHED(dksc)) 1028 if (!DK_ATTACHED(dksc))
1029 return ENXIO; 1029 return ENXIO;
1030 if (__predict_false(sc->sc_info & VDISK_READONLY)) 1030 if (__predict_false(sc->sc_info & VDISK_READONLY))
1031 return EROFS; 1031 return EROFS;
1032 return physio(xbdstrategy, NULL, dev, B_WRITE, xbdminphys, uio); 1032 return physio(xbdstrategy, NULL, dev, B_WRITE, xbdminphys, uio);
1033} 1033}
1034 1034
1035static int 1035static int
1036xbdioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l) 1036xbdioctl(dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
1037{ 1037{
1038 struct xbd_xenbus_softc *sc = 1038 struct xbd_xenbus_softc *sc =
1039 device_lookup_private(&xbd_cd, DISKUNIT(dev)); 1039 device_lookup_private(&xbd_cd, DISKUNIT(dev));
1040 struct dk_softc *dksc; 1040 struct dk_softc *dksc;
1041 int error; 1041 int error;
1042 struct xbd_req *xbdreq; 1042 struct xbd_req *xbdreq;
1043 blkif_request_t *req; 1043 blkif_request_t *req;
1044 int notify; 1044 int notify;
1045 1045
1046 DPRINTF(("xbdioctl(%" PRIx64 ", %08lx, %p, %d, %p)\n", 1046 DPRINTF(("xbdioctl(%" PRIx64 ", %08lx, %p, %d, %p)\n",
1047 dev, cmd, data, flag, l)); 1047 dev, cmd, data, flag, l));
1048 dksc = &sc->sc_dksc; 1048 dksc = &sc->sc_dksc;
1049 1049
1050 switch (cmd) { 1050 switch (cmd) {
1051 case DIOCGCACHE: 1051 case DIOCGCACHE:
1052 { 1052 {
1053 /* Assume there is write cache if cache-flush is supported */ 1053 /* Assume there is write cache if cache-flush is supported */
1054 int *bitsp = (int *)data; 1054 int *bitsp = (int *)data;
1055 *bitsp = 0; 1055 *bitsp = 0;
1056 if (sc->sc_features & BLKIF_FEATURE_CACHE_FLUSH) 1056 if (sc->sc_features & BLKIF_FEATURE_CACHE_FLUSH)
1057 *bitsp |= DKCACHE_WRITE; 1057 *bitsp |= DKCACHE_WRITE;
1058 error = 0; 1058 error = 0;
1059 break; 1059 break;
1060 } 1060 }
1061 case DIOCCACHESYNC: 1061 case DIOCCACHESYNC:
1062 if ((sc->sc_features & BLKIF_FEATURE_CACHE_FLUSH) == 0) 1062 if ((sc->sc_features & BLKIF_FEATURE_CACHE_FLUSH) == 0)
1063 return EOPNOTSUPP; 1063 return EOPNOTSUPP;
1064 1064
1065 mutex_enter(&sc->sc_lock); 1065 mutex_enter(&sc->sc_lock);
1066 while ((xbdreq = SLIST_FIRST(&sc->sc_xbdreq_head)) == NULL) 1066 while ((xbdreq = SLIST_FIRST(&sc->sc_xbdreq_head)) == NULL)
1067 cv_wait(&sc->sc_req_cv, &sc->sc_lock); 1067 cv_wait(&sc->sc_req_cv, &sc->sc_lock);
1068 KASSERT(!RING_FULL(&sc->sc_ring)); 1068 KASSERT(!RING_FULL(&sc->sc_ring));
1069 1069
1070 SLIST_REMOVE_HEAD(&sc->sc_xbdreq_head, req_next); 1070 SLIST_REMOVE_HEAD(&sc->sc_xbdreq_head, req_next);
1071 req = RING_GET_REQUEST(&sc->sc_ring, 1071 req = RING_GET_REQUEST(&sc->sc_ring,
1072 sc->sc_ring.req_prod_pvt); 1072 sc->sc_ring.req_prod_pvt);
1073 req->id = xbdreq->req_id; 1073 req->id = xbdreq->req_id;
1074 req->operation = BLKIF_OP_FLUSH_DISKCACHE; 1074 req->operation = BLKIF_OP_FLUSH_DISKCACHE;
1075 req->handle = sc->sc_handle; 1075 req->handle = sc->sc_handle;
1076 xbdreq->req_sync.s_done = 0; 1076 xbdreq->req_sync.s_done = 0;
1077 sc->sc_ring.req_prod_pvt++; 1077 sc->sc_ring.req_prod_pvt++;
1078 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_ring, notify); 1078 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_ring, notify);
1079 if (notify) 1079 if (notify)
1080 hypervisor_notify_via_evtchn(sc->sc_evtchn); 1080 hypervisor_notify_via_evtchn(sc->sc_evtchn);
1081 /* request sent, now wait for completion */ 1081 /* request sent, now wait for completion */
1082 while (xbdreq->req_sync.s_done == 0) 1082 while (xbdreq->req_sync.s_done == 0)
1083 cv_wait(&sc->sc_cache_flush_cv, &sc->sc_lock); 1083 cv_wait(&sc->sc_cache_flush_cv, &sc->sc_lock);
1084 1084
1085 if (xbdreq->req_sync.s_error == BLKIF_RSP_EOPNOTSUPP) 1085 if (xbdreq->req_sync.s_error == BLKIF_RSP_EOPNOTSUPP)
1086 error = EOPNOTSUPP; 1086 error = EOPNOTSUPP;
1087 else if (xbdreq->req_sync.s_error == BLKIF_RSP_OKAY) 1087 else if (xbdreq->req_sync.s_error == BLKIF_RSP_OKAY)
1088 error = 0; 1088 error = 0;
1089 else 1089 else
1090 error = EIO; 1090 error = EIO;
1091 SLIST_INSERT_HEAD(&sc->sc_xbdreq_head, xbdreq, req_next); 1091 SLIST_INSERT_HEAD(&sc->sc_xbdreq_head, xbdreq, req_next);
1092 cv_signal(&sc->sc_req_cv); 1092 cv_signal(&sc->sc_req_cv);
1093 mutex_exit(&sc->sc_lock); 1093 mutex_exit(&sc->sc_lock);
1094 1094
1095 /* Restart I/O if it was waiting for req */ 1095 /* Restart I/O if it was waiting for req */
1096 dk_start(&sc->sc_dksc, NULL); 1096 dk_start(&sc->sc_dksc, NULL);
1097 break; 1097 break;
1098 1098
1099 default: 1099 default:
1100 error = dk_ioctl(dksc, dev, cmd, data, flag, l); 1100 error = dk_ioctl(dksc, dev, cmd, data, flag, l);
1101 break; 1101 break;
1102 } 1102 }
1103 1103
1104 return error; 1104 return error;
1105} 1105}
1106 1106
1107static int 1107static int
1108xbddump(dev_t dev, daddr_t blkno, void *va, size_t size) 1108xbddump(dev_t dev, daddr_t blkno, void *va, size_t size)
1109{ 1109{
1110 struct xbd_xenbus_softc *sc; 1110 struct xbd_xenbus_softc *sc;
1111 1111
1112 sc = device_lookup_private(&xbd_cd, DISKUNIT(dev)); 1112 sc = device_lookup_private(&xbd_cd, DISKUNIT(dev));
1113 if (sc == NULL) 1113 if (sc == NULL)
1114 return (ENXIO); 1114 return (ENXIO);
1115 1115
1116 DPRINTF(("xbddump(%" PRIx64 ", %" PRId64 ", %p, %lu)\n", dev, blkno, va, 1116 DPRINTF(("xbddump(%" PRIx64 ", %" PRId64 ", %p, %lu)\n", dev, blkno, va,
1117 (unsigned long)size)); 1117 (unsigned long)size));
1118 return dk_dump(&sc->sc_dksc, dev, blkno, va, size, 0); 1118 return dk_dump(&sc->sc_dksc, dev, blkno, va, size, 0);
1119} 1119}
1120 1120
1121static int 1121static int
1122xbd_diskstart(device_t self, struct buf *bp) 1122xbd_diskstart(device_t self, struct buf *bp)
1123{ 1123{
1124 struct xbd_xenbus_softc *sc = device_private(self); 1124 struct xbd_xenbus_softc *sc = device_private(self);
1125 struct xbd_req *xbdreq; 1125 struct xbd_req *xbdreq;
1126 int error = 0; 1126 int error = 0;
1127 int notify; 1127 int notify;
1128 1128
1129 KASSERT(bp->b_bcount <= MAXPHYS); 1129 KASSERT(bp->b_bcount <= MAXPHYS);
1130 1130
1131 DPRINTF(("xbd_diskstart(%p): b_bcount = %ld\n", 1131 DPRINTF(("xbd_diskstart(%p): b_bcount = %ld\n",
1132 bp, (long)bp->b_bcount)); 1132 bp, (long)bp->b_bcount));
1133 1133
1134 mutex_enter(&sc->sc_lock); 1134 mutex_enter(&sc->sc_lock);
1135 1135
1136 if (sc->sc_shutdown != BLKIF_SHUTDOWN_RUN) { 1136 if (sc->sc_shutdown != BLKIF_SHUTDOWN_RUN) {
1137 error = EIO; 1137 error = EIO;
1138 goto out; 1138 goto out;
1139 } 1139 }
1140 1140
1141 if (bp->b_rawblkno < 0 || bp->b_rawblkno > sc->sc_xbdsize) { 1141 if (bp->b_rawblkno < 0 || bp->b_rawblkno > sc->sc_sectors) {
1142 /* invalid block number */ 1142 /* invalid block number */
1143 error = EINVAL; 1143 error = EINVAL;
1144 goto out; 1144 goto out;
1145 } 1145 }
1146 1146
1147 if (__predict_false( 1147 if (__predict_false(
1148 sc->sc_backend_status == BLKIF_STATE_SUSPENDED)) { 1148 sc->sc_backend_status == BLKIF_STATE_SUSPENDED)) {
1149 /* device is suspended, do not consume buffer */ 1149 /* device is suspended, do not consume buffer */
1150 DPRINTF(("%s: (xbd_diskstart) device suspended\n", 1150 DPRINTF(("%s: (xbd_diskstart) device suspended\n",
1151 sc->sc_dksc.sc_xname)); 1151 sc->sc_dksc.sc_xname));
1152 error = EAGAIN; 1152 error = EAGAIN;
1153 goto out; 1153 goto out;
1154 } 1154 }
1155 1155
1156 xbdreq = SLIST_FIRST(&sc->sc_xbdreq_head); 1156 xbdreq = SLIST_FIRST(&sc->sc_xbdreq_head);
1157 if (__predict_false(xbdreq == NULL)) { 1157 if (__predict_false(xbdreq == NULL)) {
1158 sc->sc_cnt_queue_full.ev_count++; 1158 sc->sc_cnt_queue_full.ev_count++;
1159 DPRINTF(("xbd_diskstart: no req\n")); 1159 DPRINTF(("xbd_diskstart: no req\n"));
1160 error = EAGAIN; 1160 error = EAGAIN;
1161 goto out; 1161 goto out;
1162 } 1162 }
1163 KASSERT(!RING_FULL(&sc->sc_ring)); 1163 KASSERT(!RING_FULL(&sc->sc_ring));
1164 1164
1165 if ((sc->sc_features & BLKIF_FEATURE_INDIRECT) == 0 1165 if ((sc->sc_features & BLKIF_FEATURE_INDIRECT) == 0
1166 && bp->b_bcount > XBD_MAX_CHUNK) { 1166 && bp->b_bcount > XBD_MAX_CHUNK) {
1167 if (!SLIST_NEXT(xbdreq, req_next)) { 1167 if (!SLIST_NEXT(xbdreq, req_next)) {
1168 DPRINTF(("%s: need extra req\n", __func__)); 1168 DPRINTF(("%s: need extra req\n", __func__));
1169 error = EAGAIN; 1169 error = EAGAIN;
1170 goto out; 1170 goto out;
1171 } 1171 }
1172 } 1172 }
1173 1173
1174 bp->b_resid = bp->b_bcount; 1174 bp->b_resid = bp->b_bcount;
1175 xbdreq->req_bp = bp; 1175 xbdreq->req_bp = bp;
1176 xbdreq->req_data = bp->b_data; 1176 xbdreq->req_data = bp->b_data;
1177 if (__predict_false((vaddr_t)bp->b_data & (XEN_BSIZE - 1))) { 1177 if (__predict_false((vaddr_t)bp->b_data & (sc->sc_secsize - 1))) {
1178 if (__predict_false(xbd_map_align(sc, xbdreq) != 0)) { 1178 if (__predict_false(xbd_map_align(sc, xbdreq) != 0)) {
1179 DPRINTF(("xbd_diskstart: no align\n")); 1179 DPRINTF(("xbd_diskstart: no align\n"));
1180 error = EAGAIN; 1180 error = EAGAIN;
1181 goto out; 1181 goto out;
1182 } 1182 }
1183 } 1183 }
1184 1184
1185 if (__predict_false(bus_dmamap_load(sc->sc_xbusd->xbusd_dmat, 1185 if (__predict_false(bus_dmamap_load(sc->sc_xbusd->xbusd_dmat,
1186 xbdreq->req_dmamap, xbdreq->req_data, bp->b_bcount, NULL, 1186 xbdreq->req_dmamap, xbdreq->req_data, bp->b_bcount, NULL,
1187 BUS_DMA_NOWAIT) != 0)) { 1187 BUS_DMA_NOWAIT) != 0)) {
1188 printf("%s: %s: bus_dmamap_load failed\n", 1188 printf("%s: %s: bus_dmamap_load failed\n",
1189 device_xname(sc->sc_dksc.sc_dev), __func__); 1189 device_xname(sc->sc_dksc.sc_dev), __func__);
1190 if (__predict_false(bp->b_data != xbdreq->req_data)) 1190 if (__predict_false(bp->b_data != xbdreq->req_data))
1191 xbd_unmap_align(sc, xbdreq, NULL); 1191 xbd_unmap_align(sc, xbdreq, NULL);
1192 error = EINVAL; 1192 error = EINVAL;
1193 goto out; 1193 goto out;
1194 } 1194 }
1195 KASSERTMSG(xbdreq->req_dmamap->dm_nsegs > 0, 1195 KASSERTMSG(xbdreq->req_dmamap->dm_nsegs > 0,
1196 "dm_nsegs == 0 with bcount %d", bp->b_bcount); 1196 "dm_nsegs == 0 with bcount %d", bp->b_bcount);
1197 1197
1198 for (int seg = 0; seg < xbdreq->req_dmamap->dm_nsegs; seg++) { 1198 for (int seg = 0; seg < xbdreq->req_dmamap->dm_nsegs; seg++) {
1199 KASSERT(seg < __arraycount(xbdreq->req_gntref)); 1199 KASSERT(seg < __arraycount(xbdreq->req_gntref));
1200 1200
1201 paddr_t ma = xbdreq->req_dmamap->dm_segs[seg].ds_addr; 1201 paddr_t ma = xbdreq->req_dmamap->dm_segs[seg].ds_addr;
1202 if (__predict_false(xengnt_grant_access( 1202 if (__predict_false(xengnt_grant_access(
1203 sc->sc_xbusd->xbusd_otherend_id, 1203 sc->sc_xbusd->xbusd_otherend_id,
1204 (ma & ~PAGE_MASK), (bp->b_flags & B_READ) == 0, 1204 (ma & ~PAGE_MASK), (bp->b_flags & B_READ) == 0,
1205 &xbdreq->req_gntref[seg]))) { 1205 &xbdreq->req_gntref[seg]))) {
1206 printf("%s: %s: xengnt_grant_access failed\n", 1206 printf("%s: %s: xengnt_grant_access failed\n",
1207 device_xname(sc->sc_dksc.sc_dev), __func__); 1207 device_xname(sc->sc_dksc.sc_dev), __func__);
1208 if (seg > 0) { 1208 if (seg > 0) {
1209 for (; --seg >= 0; ) { 1209 for (; --seg >= 0; ) {
1210 xengnt_revoke_access( 1210 xengnt_revoke_access(
1211 xbdreq->req_gntref[seg]); 1211 xbdreq->req_gntref[seg]);
1212 } 1212 }
1213 } 1213 }
1214 bus_dmamap_unload(sc->sc_xbusd->xbusd_dmat, 1214 bus_dmamap_unload(sc->sc_xbusd->xbusd_dmat,
1215 xbdreq->req_dmamap); 1215 xbdreq->req_dmamap);
1216 if (__predict_false(bp->b_data != xbdreq->req_data)) 1216 if (__predict_false(bp->b_data != xbdreq->req_data))
1217 xbd_unmap_align(sc, xbdreq, NULL); 1217 xbd_unmap_align(sc, xbdreq, NULL);
1218 error = EAGAIN; 1218 error = EAGAIN;
1219 goto out; 1219 goto out;
1220 } 1220 }
1221 } 1221 }
1222 1222
1223 KASSERT(xbdreq->req_parent == NULL); 1223 KASSERT(xbdreq->req_parent == NULL);
1224 KASSERT(xbdreq->req_child == NULL); 1224 KASSERT(xbdreq->req_child == NULL);
1225 1225
1226 /* We are now committed to the transfer */ 1226 /* We are now committed to the transfer */
1227 SLIST_REMOVE_HEAD(&sc->sc_xbdreq_head, req_next); 1227 SLIST_REMOVE_HEAD(&sc->sc_xbdreq_head, req_next);
1228 1228
1229 if ((sc->sc_features & BLKIF_FEATURE_INDIRECT) != 0 && 1229 if ((sc->sc_features & BLKIF_FEATURE_INDIRECT) != 0 &&
1230 bp->b_bcount > XBD_MAX_CHUNK) { 1230 bp->b_bcount > XBD_MAX_CHUNK) {
1231 xbd_diskstart_submit_indirect(sc, xbdreq, bp); 1231 xbd_diskstart_submit_indirect(sc, xbdreq, bp);
1232 goto push; 1232 goto push;
1233 } 1233 }
1234 1234
1235 xbd_diskstart_submit(sc, xbdreq->req_id, 1235 xbd_diskstart_submit(sc, xbdreq->req_id,
1236 bp, 0, xbdreq->req_dmamap, xbdreq->req_gntref); 1236 bp, 0, xbdreq->req_dmamap, xbdreq->req_gntref);
1237 1237
1238 if (bp->b_bcount > XBD_MAX_CHUNK) { 1238 if (bp->b_bcount > XBD_MAX_CHUNK) {
1239 KASSERT(!RING_FULL(&sc->sc_ring)); 1239 KASSERT(!RING_FULL(&sc->sc_ring));
1240 struct xbd_req *xbdreq2 = SLIST_FIRST(&sc->sc_xbdreq_head); 1240 struct xbd_req *xbdreq2 = SLIST_FIRST(&sc->sc_xbdreq_head);
1241 KASSERT(xbdreq2 != NULL); /* Checked earlier */ 1241 KASSERT(xbdreq2 != NULL); /* Checked earlier */
1242 SLIST_REMOVE_HEAD(&sc->sc_xbdreq_head, req_next); 1242 SLIST_REMOVE_HEAD(&sc->sc_xbdreq_head, req_next);
1243 xbdreq->req_child = xbdreq2; 1243 xbdreq->req_child = xbdreq2;
1244 xbdreq->req_parent_done = false; 1244 xbdreq->req_parent_done = false;
1245 xbdreq2->req_parent = xbdreq; 1245 xbdreq2->req_parent = xbdreq;
1246 xbdreq2->req_bp = bp; 1246 xbdreq2->req_bp = bp;
1247 xbdreq2->req_data = xbdreq->req_data; 1247 xbdreq2->req_data = xbdreq->req_data;
1248 xbd_diskstart_submit(sc, xbdreq2->req_id, 1248 xbd_diskstart_submit(sc, xbdreq2->req_id,
1249 bp, XBD_MAX_CHUNK, xbdreq->req_dmamap, 1249 bp, XBD_MAX_CHUNK, xbdreq->req_dmamap,
1250 xbdreq->req_gntref); 1250 xbdreq->req_gntref);
1251 } 1251 }
1252 1252
1253push: 1253push:
1254 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_ring, notify); 1254 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_ring, notify);
1255 if (notify) 1255 if (notify)
1256 hypervisor_notify_via_evtchn(sc->sc_evtchn); 1256 hypervisor_notify_via_evtchn(sc->sc_evtchn);
1257out: 1257out:
1258 mutex_exit(&sc->sc_lock); 1258 mutex_exit(&sc->sc_lock);
1259 return error; 1259 return error;
1260} 1260}
1261 1261
1262static void 1262static void
1263xbd_diskstart_submit(struct xbd_xenbus_softc *sc, 1263xbd_diskstart_submit(struct xbd_xenbus_softc *sc,
1264 int req_id, struct buf *bp, int start, bus_dmamap_t dmamap, 1264 int req_id, struct buf *bp, int start, bus_dmamap_t dmamap,
1265 grant_ref_t *gntref) 1265 grant_ref_t *gntref)
1266{ 1266{
1267 blkif_request_t *req; 1267 blkif_request_t *req;
1268 paddr_t ma; 1268 paddr_t ma;
1269 int nsects, nbytes, dmaseg, first_sect, size, segidx = 0; 1269 int nsects, nbytes, dmaseg, first_sect, size, segidx = 0;
1270 struct blkif_request_segment *reqseg; 1270 struct blkif_request_segment *reqseg;
1271 1271
1272 KASSERT(mutex_owned(&sc->sc_lock)); 1272 KASSERT(mutex_owned(&sc->sc_lock));
1273 1273
1274 req = RING_GET_REQUEST(&sc->sc_ring, sc->sc_ring.req_prod_pvt); 1274 req = RING_GET_REQUEST(&sc->sc_ring, sc->sc_ring.req_prod_pvt);
1275 req->id = req_id; 1275 req->id = req_id;
1276 req->operation = 1276 req->operation =
1277 bp->b_flags & B_READ ? BLKIF_OP_READ : BLKIF_OP_WRITE; 1277 bp->b_flags & B_READ ? BLKIF_OP_READ : BLKIF_OP_WRITE;
1278 req->sector_number = bp->b_rawblkno + (start >> XEN_BSHIFT); 1278 req->sector_number = (bp->b_rawblkno * sc->sc_secsize / XEN_BSIZE) +
 1279 (start >> XEN_BSHIFT);
1279 req->handle = sc->sc_handle; 1280 req->handle = sc->sc_handle;
 1281 DPRINTF(("%s: id %" PRIu64 " op %d sn %" PRIu64 " handle %d\n",
 1282 __func__, req->id, req->operation, req->sector_number,
 1283 req->handle));
1280 1284
1281 size = uimin(bp->b_bcount - start, XBD_MAX_CHUNK);  1285 size = uimin(bp->b_bcount - start, XBD_MAX_CHUNK);
1282 for (dmaseg = 0; dmaseg < dmamap->dm_nsegs && size > 0; dmaseg++) { 1286 for (dmaseg = 0; dmaseg < dmamap->dm_nsegs && size > 0; dmaseg++) {
1283 bus_dma_segment_t *ds = &dmamap->dm_segs[dmaseg]; 1287 bus_dma_segment_t *ds = &dmamap->dm_segs[dmaseg];
1284 1288
1285 ma = ds->ds_addr; 1289 ma = ds->ds_addr;
1286 nbytes = ds->ds_len; 1290 nbytes = ds->ds_len;
1287 1291
1288 if (start > 0) { 1292 if (start > 0) {
1289 if (start >= nbytes) { 1293 if (start >= nbytes) {
1290 start -= nbytes; 1294 start -= nbytes;
1291 continue; 1295 continue;
1292 } 1296 }
1293 ma += start; 1297 ma += start;
1294 nbytes -= start; 1298 nbytes -= start;
1295 start = 0; 1299 start = 0;
1296 } 1300 }
1297 size -= nbytes; 1301 size -= nbytes;
1298 1302
1299 KASSERT(((ma & PAGE_MASK) & (XEN_BSIZE - 1)) == 0); 1303 KASSERT(((ma & PAGE_MASK) & (sc->sc_secsize - 1)) == 0);
1300 KASSERT((nbytes & (XEN_BSIZE - 1)) == 0); 1304 KASSERT((nbytes & (sc->sc_secsize - 1)) == 0);
1301 KASSERT((size & (XEN_BSIZE - 1)) == 0); 1305 KASSERT((size & (sc->sc_secsize - 1)) == 0);
1302 first_sect = (ma & PAGE_MASK) >> XEN_BSHIFT; 1306 first_sect = (ma & PAGE_MASK) >> XEN_BSHIFT;
1303 nsects = nbytes >> XEN_BSHIFT; 1307 nsects = nbytes >> XEN_BSHIFT;
1304 1308
1305 reqseg = &req->seg[segidx++]; 1309 reqseg = &req->seg[segidx++];
1306 reqseg->first_sect = first_sect; 1310 reqseg->first_sect = first_sect;
1307 reqseg->last_sect = first_sect + nsects - 1; 1311 reqseg->last_sect = first_sect + nsects - 1;
1308 KASSERT(reqseg->first_sect <= reqseg->last_sect); 1312 KASSERT(reqseg->first_sect <= reqseg->last_sect);
1309 KASSERT(reqseg->last_sect < (PAGE_SIZE / XEN_BSIZE)); 1313 KASSERT(reqseg->last_sect < (PAGE_SIZE / XEN_BSIZE));
 1314 DPRINTF(("%s: seg %d fs %d ls %d\n", __func__, segidx,
 1315 reqseg->first_sect, reqseg->last_sect));
1310 1316
1311 reqseg->gref = gntref[dmaseg]; 1317 reqseg->gref = gntref[dmaseg];
1312 } 1318 }
1313 KASSERT(segidx > 0); 1319 KASSERT(segidx > 0);
1314 req->nr_segments = segidx; 1320 req->nr_segments = segidx;
1315 sc->sc_ring.req_prod_pvt++; 1321 sc->sc_ring.req_prod_pvt++;
1316} 1322}
1317 1323
1318static void 1324static void
1319xbd_diskstart_submit_indirect(struct xbd_xenbus_softc *sc, 1325xbd_diskstart_submit_indirect(struct xbd_xenbus_softc *sc,
1320 struct xbd_req *xbdreq, struct buf *bp) 1326 struct xbd_req *xbdreq, struct buf *bp)
1321{ 1327{
1322 blkif_request_indirect_t *req; 1328 blkif_request_indirect_t *req;
1323 paddr_t ma; 1329 paddr_t ma;
1324 int nsects, nbytes, dmaseg, first_sect; 1330 int nsects, nbytes, dmaseg, first_sect;
1325 struct blkif_request_segment *reqseg; 1331 struct blkif_request_segment *reqseg;
1326 1332
1327 KASSERT(mutex_owned(&sc->sc_lock)); 1333 KASSERT(mutex_owned(&sc->sc_lock));
1328 1334
1329 req = (blkif_request_indirect_t *)RING_GET_REQUEST(&sc->sc_ring, 1335 req = (blkif_request_indirect_t *)RING_GET_REQUEST(&sc->sc_ring,
1330 sc->sc_ring.req_prod_pvt); 1336 sc->sc_ring.req_prod_pvt);
1331 req->id = xbdreq->req_id; 1337 req->id = xbdreq->req_id;
1332 req->operation = BLKIF_OP_INDIRECT; 1338 req->operation = BLKIF_OP_INDIRECT;
1333 req->indirect_op = 1339 req->indirect_op =
1334 bp->b_flags & B_READ ? BLKIF_OP_READ : BLKIF_OP_WRITE; 1340 bp->b_flags & B_READ ? BLKIF_OP_READ : BLKIF_OP_WRITE;
1335 req->sector_number = bp->b_rawblkno; 1341 req->sector_number = bp->b_rawblkno * sc->sc_secsize / XEN_BSIZE;
1336 req->handle = sc->sc_handle; 1342 req->handle = sc->sc_handle;
 1343 DPRINTF(("%s: id %" PRIu64 " op %d sn %" PRIu64 " handle %d\n",
 1344 __func__, req->id, req->indirect_op, req->sector_number,
 1345 req->handle));
1337 1346
1338 xbdreq->req_indirect = SLIST_FIRST(&sc->sc_indirect_head); 1347 xbdreq->req_indirect = SLIST_FIRST(&sc->sc_indirect_head);
1339 KASSERT(xbdreq->req_indirect != NULL); /* always as many as reqs */ 1348 KASSERT(xbdreq->req_indirect != NULL); /* always as many as reqs */
1340 SLIST_REMOVE_HEAD(&sc->sc_indirect_head, in_next); 1349 SLIST_REMOVE_HEAD(&sc->sc_indirect_head, in_next);
1341 req->indirect_grefs[0] = xbdreq->req_indirect->in_gntref; 1350 req->indirect_grefs[0] = xbdreq->req_indirect->in_gntref;
1342 1351
1343 reqseg = xbdreq->req_indirect->in_addr; 1352 reqseg = xbdreq->req_indirect->in_addr;
1344 for (dmaseg = 0; dmaseg < xbdreq->req_dmamap->dm_nsegs; dmaseg++) { 1353 for (dmaseg = 0; dmaseg < xbdreq->req_dmamap->dm_nsegs; dmaseg++) {
1345 bus_dma_segment_t *ds = &xbdreq->req_dmamap->dm_segs[dmaseg]; 1354 bus_dma_segment_t *ds = &xbdreq->req_dmamap->dm_segs[dmaseg];
1346 1355
1347 ma = ds->ds_addr; 1356 ma = ds->ds_addr;
1348 nbytes = ds->ds_len; 1357 nbytes = ds->ds_len;
1349 1358
 1359 KASSERT(((ma & PAGE_MASK) & (sc->sc_secsize - 1)) == 0);
 1360 KASSERT((nbytes & (sc->sc_secsize - 1)) == 0);
 1361
1350 first_sect = (ma & PAGE_MASK) >> XEN_BSHIFT; 1362 first_sect = (ma & PAGE_MASK) >> XEN_BSHIFT;
1351 nsects = nbytes >> XEN_BSHIFT; 1363 nsects = nbytes >> XEN_BSHIFT;
1352 1364
1353 reqseg->first_sect = first_sect; 1365 reqseg->first_sect = first_sect;
1354 reqseg->last_sect = first_sect + nsects - 1; 1366 reqseg->last_sect = first_sect + nsects - 1;
1355 reqseg->gref = xbdreq->req_gntref[dmaseg]; 1367 reqseg->gref = xbdreq->req_gntref[dmaseg];
 1368 DPRINTF(("%s: seg %d fs %d ls %d\n", __func__, dmaseg,
 1369 reqseg->first_sect, reqseg->last_sect));
1356 1370
1357 KASSERT(reqseg->first_sect <= reqseg->last_sect); 1371 KASSERT(reqseg->first_sect <= reqseg->last_sect);
1358 KASSERT(reqseg->last_sect < (PAGE_SIZE / XEN_BSIZE)); 1372 KASSERT(reqseg->last_sect < (PAGE_SIZE / XEN_BSIZE));
1359 1373
1360 reqseg++; 1374 reqseg++;
1361 } 1375 }
1362 req->nr_segments = dmaseg; 1376 req->nr_segments = dmaseg;
1363 sc->sc_ring.req_prod_pvt++; 1377 sc->sc_ring.req_prod_pvt++;
1364 1378
1365 sc->sc_cnt_indirect.ev_count++; 1379 sc->sc_cnt_indirect.ev_count++;
1366} 1380}
1367 1381
1368static int 1382static int
1369xbd_map_align(struct xbd_xenbus_softc *sc, struct xbd_req *req) 1383xbd_map_align(struct xbd_xenbus_softc *sc, struct xbd_req *req)
1370{ 1384{
1371 /* 
1372 * Only can get here if this is physio() request, block I/O 
1373 * uses DEV_BSIZE-aligned buffers. 
1374 */ 
1375 KASSERT((req->req_bp->b_flags & B_PHYS) != 0); 
1376 
1377 sc->sc_cnt_map_unalign.ev_count++; 1385 sc->sc_cnt_map_unalign.ev_count++;
1378 1386
1379 if (sc->sc_unalign_used) { 1387 if (sc->sc_unalign_used) {
1380 sc->sc_cnt_unalign_busy.ev_count++; 1388 sc->sc_cnt_unalign_busy.ev_count++;
1381 return EAGAIN; 1389 return EAGAIN;
1382 } 1390 }
1383 sc->sc_unalign_used = req->req_bp; 1391 sc->sc_unalign_used = req->req_bp;
1384 1392
1385 KASSERT(req->req_bp->b_bcount <= MAXPHYS); 1393 KASSERT(req->req_bp->b_bcount <= MAXPHYS);
1386 req->req_data = (void *)sc->sc_unalign_buffer; 1394 req->req_data = (void *)sc->sc_unalign_buffer;
1387 if ((req->req_bp->b_flags & B_READ) == 0) 1395 if ((req->req_bp->b_flags & B_READ) == 0)
1388 memcpy(req->req_data, req->req_bp->b_data, 1396 memcpy(req->req_data, req->req_bp->b_data,
1389 req->req_bp->b_bcount); 1397 req->req_bp->b_bcount);
1390 return 0; 1398 return 0;
1391} 1399}
1392 1400
1393static void 1401static void
1394xbd_unmap_align(struct xbd_xenbus_softc *sc, struct xbd_req *req, 1402xbd_unmap_align(struct xbd_xenbus_softc *sc, struct xbd_req *req,
1395 struct buf *bp) 1403 struct buf *bp)
1396{ 1404{
1397 KASSERT(!bp || sc->sc_unalign_used == bp); 1405 KASSERT(!bp || sc->sc_unalign_used == bp);
1398 if (bp && bp->b_flags & B_READ) 1406 if (bp && bp->b_flags & B_READ)
1399 memcpy(bp->b_data, req->req_data, bp->b_bcount); 1407 memcpy(bp->b_data, req->req_data, bp->b_bcount);
1400 sc->sc_unalign_used = NULL; 1408 sc->sc_unalign_used = NULL;
1401} 1409}