Tue Apr 14 13:10:43 2020 UTC ()
use single pre-allocated buffer for unaligned I/O - it's rare and not
performance critical path, it's more important to ensure it will succeed
eventually; also return EAGAIN rather than ENOMEM, so the I/O will be
retried by dk_start() when previous I/O finishes

fix yet another leak on the xengnt_grant_access() fail path in
xbd_diskstart() - this time the unalign buffer


(jdolecek)
diff -r1.110 -r1.111 src/sys/arch/xen/xen/xbd_xenbus.c

cvs diff -r1.110 -r1.111 src/sys/arch/xen/xen/xbd_xenbus.c (expand / switch to unified diff)

--- src/sys/arch/xen/xen/xbd_xenbus.c 2020/04/14 13:02:40 1.110
+++ src/sys/arch/xen/xen/xbd_xenbus.c 2020/04/14 13:10:43 1.111
@@ -1,14 +1,14 @@ @@ -1,14 +1,14 @@
1/* $NetBSD: xbd_xenbus.c,v 1.110 2020/04/14 13:02:40 jdolecek Exp $ */ 1/* $NetBSD: xbd_xenbus.c,v 1.111 2020/04/14 13:10:43 jdolecek Exp $ */
2 2
3/* 3/*
4 * Copyright (c) 2006 Manuel Bouyer. 4 * Copyright (c) 2006 Manuel Bouyer.
5 * 5 *
6 * Redistribution and use in source and binary forms, with or without 6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions 7 * modification, are permitted provided that the following conditions
8 * are met: 8 * are met:
9 * 1. Redistributions of source code must retain the above copyright 9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer. 10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright 11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the 12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution. 13 * documentation and/or other materials provided with the distribution.
14 * 14 *
@@ -40,27 +40,27 @@ @@ -40,27 +40,27 @@
40 * - initiate request: xbdread/write/open/ioctl/.. 40 * - initiate request: xbdread/write/open/ioctl/..
41 * - depending on operation, it is handled directly by disk(9) subsystem or 41 * - depending on operation, it is handled directly by disk(9) subsystem or
42 * goes through physio(9) first. 42 * goes through physio(9) first.
43 * - the request is ultimately processed by xbd_diskstart() that prepares the 43 * - the request is ultimately processed by xbd_diskstart() that prepares the
44 * xbd requests, post them in the ring I/O queue, then signal the backend. 44 * xbd requests, post them in the ring I/O queue, then signal the backend.
45 * 45 *
46 * When a response is available in the queue, the backend signals the frontend 46 * When a response is available in the queue, the backend signals the frontend
47 * via its event channel. This triggers xbd_handler(), which will link back 47 * via its event channel. This triggers xbd_handler(), which will link back
48 * the response to its request through the request ID, and mark the I/O as 48 * the response to its request through the request ID, and mark the I/O as
49 * completed. 49 * completed.
50 */ 50 */
51 51
52#include <sys/cdefs.h> 52#include <sys/cdefs.h>
53__KERNEL_RCSID(0, "$NetBSD: xbd_xenbus.c,v 1.110 2020/04/14 13:02:40 jdolecek Exp $"); 53__KERNEL_RCSID(0, "$NetBSD: xbd_xenbus.c,v 1.111 2020/04/14 13:10:43 jdolecek Exp $");
54 54
55#include "opt_xen.h" 55#include "opt_xen.h"
56 56
57 57
58#include <sys/param.h> 58#include <sys/param.h>
59#include <sys/buf.h> 59#include <sys/buf.h>
60#include <sys/bufq.h> 60#include <sys/bufq.h>
61#include <sys/device.h> 61#include <sys/device.h>
62#include <sys/disk.h> 62#include <sys/disk.h>
63#include <sys/disklabel.h> 63#include <sys/disklabel.h>
64#include <sys/conf.h> 64#include <sys/conf.h>
65#include <sys/fcntl.h> 65#include <sys/fcntl.h>
66#include <sys/kernel.h> 66#include <sys/kernel.h>
@@ -127,71 +127,76 @@ struct xbd_xenbus_softc { @@ -127,71 +127,76 @@ struct xbd_xenbus_softc {
127 struct intrhand *sc_ih; /* Interrupt handler for this instance. */ 127 struct intrhand *sc_ih; /* Interrupt handler for this instance. */
128 kmutex_t sc_lock; 128 kmutex_t sc_lock;
129 kcondvar_t sc_cache_flush_cv; 129 kcondvar_t sc_cache_flush_cv;
130 kcondvar_t sc_req_cv; 130 kcondvar_t sc_req_cv;
131 kcondvar_t sc_detach_cv; 131 kcondvar_t sc_detach_cv;
132 kcondvar_t sc_suspend_cv; 132 kcondvar_t sc_suspend_cv;
133 133
134 blkif_front_ring_t sc_ring; 134 blkif_front_ring_t sc_ring;
135 grant_ref_t sc_ring_gntref; 135 grant_ref_t sc_ring_gntref;
136 136
137 struct xbd_req sc_reqs[XBD_RING_SIZE]; 137 struct xbd_req sc_reqs[XBD_RING_SIZE];
138 SLIST_HEAD(,xbd_req) sc_xbdreq_head; /* list of free requests */ 138 SLIST_HEAD(,xbd_req) sc_xbdreq_head; /* list of free requests */
139 139
 140 vmem_addr_t sc_unalign_buffer;
 141 bool sc_unalign_free;
 142
140 int sc_backend_status; /* our status with backend */ 143 int sc_backend_status; /* our status with backend */
141#define BLKIF_STATE_DISCONNECTED 0 144#define BLKIF_STATE_DISCONNECTED 0
142#define BLKIF_STATE_CONNECTED 1 145#define BLKIF_STATE_CONNECTED 1
143#define BLKIF_STATE_SUSPENDED 2 146#define BLKIF_STATE_SUSPENDED 2
144 147
145 int sc_shutdown; 148 int sc_shutdown;
146#define BLKIF_SHUTDOWN_RUN 0 /* no shutdown */ 149#define BLKIF_SHUTDOWN_RUN 0 /* no shutdown */
147#define BLKIF_SHUTDOWN_REMOTE 1 /* backend-initiated shutdown in progress */ 150#define BLKIF_SHUTDOWN_REMOTE 1 /* backend-initiated shutdown in progress */
148#define BLKIF_SHUTDOWN_LOCAL 2 /* locally-initiated shutdown in progress */ 151#define BLKIF_SHUTDOWN_LOCAL 2 /* locally-initiated shutdown in progress */
149 152
150 uint64_t sc_sectors; /* number of sectors for this device */ 153 uint64_t sc_sectors; /* number of sectors for this device */
151 u_long sc_secsize; /* sector size */ 154 u_long sc_secsize; /* sector size */
152 uint64_t sc_xbdsize; /* size of disk in DEV_BSIZE */ 155 uint64_t sc_xbdsize; /* size of disk in DEV_BSIZE */
153 u_long sc_info; /* VDISK_* */ 156 u_long sc_info; /* VDISK_* */
154 u_long sc_handle; /* from backend */ 157 u_long sc_handle; /* from backend */
155 int sc_features; 158 int sc_features;
156#define BLKIF_FEATURE_CACHE_FLUSH 0x1 159#define BLKIF_FEATURE_CACHE_FLUSH 0x1
157#define BLKIF_FEATURE_BARRIER 0x2 160#define BLKIF_FEATURE_BARRIER 0x2
158#define BLKIF_FEATURE_PERSISTENT 0x4 161#define BLKIF_FEATURE_PERSISTENT 0x4
159#define BLKIF_FEATURE_BITS \ 162#define BLKIF_FEATURE_BITS \
160 "\20\1CACHE-FLUSH\2BARRIER\3PERSISTENT" 163 "\20\1CACHE-FLUSH\2BARRIER\3PERSISTENT"
161 struct evcnt sc_cnt_map_unalign; 164 struct evcnt sc_cnt_map_unalign;
 165 struct evcnt sc_cnt_unalign_busy;
 166 struct evcnt sc_cnt_queue_full;
162}; 167};
163 168
164#if 0 169#if 0
165/* too big to be on stack */ 170/* too big to be on stack */
166static multicall_entry_t rq_mcl[XBD_RING_SIZE+1]; 171static multicall_entry_t rq_mcl[XBD_RING_SIZE+1];
167static paddr_t rq_pages[XBD_RING_SIZE]; 172static paddr_t rq_pages[XBD_RING_SIZE];
168#endif 173#endif
169 174
170static int xbd_xenbus_match(device_t, cfdata_t, void *); 175static int xbd_xenbus_match(device_t, cfdata_t, void *);
171static void xbd_xenbus_attach(device_t, device_t, void *); 176static void xbd_xenbus_attach(device_t, device_t, void *);
172static int xbd_xenbus_detach(device_t, int); 177static int xbd_xenbus_detach(device_t, int);
173 178
174static bool xbd_xenbus_suspend(device_t, const pmf_qual_t *); 179static bool xbd_xenbus_suspend(device_t, const pmf_qual_t *);
175static bool xbd_xenbus_resume(device_t, const pmf_qual_t *); 180static bool xbd_xenbus_resume(device_t, const pmf_qual_t *);
176 181
177static int xbd_handler(void *); 182static int xbd_handler(void *);
178static int xbd_diskstart(device_t, struct buf *); 183static int xbd_diskstart(device_t, struct buf *);
179static void xbd_iosize(device_t, int *); 184static void xbd_iosize(device_t, int *);
180static void xbd_backend_changed(void *, XenbusState); 185static void xbd_backend_changed(void *, XenbusState);
181static void xbd_connect(struct xbd_xenbus_softc *); 186static void xbd_connect(struct xbd_xenbus_softc *);
182 187
183static int xbd_map_align(struct xbd_req *); 188static int xbd_map_align(struct xbd_xenbus_softc *, struct xbd_req *);
184static void xbd_unmap_align(struct xbd_req *); 189static void xbd_unmap_align(struct xbd_xenbus_softc *, struct xbd_req *, bool);
185 190
186static void xbdminphys(struct buf *); 191static void xbdminphys(struct buf *);
187 192
188CFATTACH_DECL3_NEW(xbd, sizeof(struct xbd_xenbus_softc), 193CFATTACH_DECL3_NEW(xbd, sizeof(struct xbd_xenbus_softc),
189 xbd_xenbus_match, xbd_xenbus_attach, xbd_xenbus_detach, NULL, NULL, NULL, 194 xbd_xenbus_match, xbd_xenbus_attach, xbd_xenbus_detach, NULL, NULL, NULL,
190 DVF_DETACH_SHUTDOWN); 195 DVF_DETACH_SHUTDOWN);
191 196
192dev_type_open(xbdopen); 197dev_type_open(xbdopen);
193dev_type_close(xbdclose); 198dev_type_close(xbdclose);
194dev_type_read(xbdread); 199dev_type_read(xbdread);
195dev_type_write(xbdwrite); 200dev_type_write(xbdwrite);
196dev_type_ioctl(xbdioctl); 201dev_type_ioctl(xbdioctl);
197dev_type_strategy(xbdstrategy); 202dev_type_strategy(xbdstrategy);
@@ -287,37 +292,48 @@ xbd_xenbus_attach(device_t parent, devic @@ -287,37 +292,48 @@ xbd_xenbus_attach(device_t parent, devic
287 req_next); 292 req_next);
288 } 293 }
289 294
290 sc->sc_backend_status = BLKIF_STATE_DISCONNECTED; 295 sc->sc_backend_status = BLKIF_STATE_DISCONNECTED;
291 sc->sc_shutdown = BLKIF_SHUTDOWN_REMOTE; 296 sc->sc_shutdown = BLKIF_SHUTDOWN_REMOTE;
292 297
293 ring = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0, UVM_KMF_WIRED); 298 ring = (void *)uvm_km_alloc(kernel_map, PAGE_SIZE, 0, UVM_KMF_WIRED);
294 if (ring == NULL) 299 if (ring == NULL)
295 panic("%s: can't alloc ring", device_xname(self)); 300 panic("%s: can't alloc ring", device_xname(self));
296 sc->sc_ring.sring = ring; 301 sc->sc_ring.sring = ring;
297 302
298 evcnt_attach_dynamic(&sc->sc_cnt_map_unalign, EVCNT_TYPE_MISC, 303 evcnt_attach_dynamic(&sc->sc_cnt_map_unalign, EVCNT_TYPE_MISC,
299 NULL, device_xname(self), "map unaligned"); 304 NULL, device_xname(self), "map unaligned");
 305 evcnt_attach_dynamic(&sc->sc_cnt_unalign_busy, EVCNT_TYPE_MISC,
 306 NULL, device_xname(self), "map unaligned");
 307 evcnt_attach_dynamic(&sc->sc_cnt_queue_full, EVCNT_TYPE_MISC,
 308 NULL, device_xname(self), "queue full");
300 309
301 for (i = 0; i < XBD_RING_SIZE; i++) { 310 for (i = 0; i < XBD_RING_SIZE; i++) {
302 if (bus_dmamap_create(sc->sc_xbusd->xbusd_dmat, 311 if (bus_dmamap_create(sc->sc_xbusd->xbusd_dmat,
303 XBD_MAX_XFER, BLKIF_MAX_SEGMENTS_PER_REQUEST, 312 XBD_MAX_XFER, BLKIF_MAX_SEGMENTS_PER_REQUEST,
304 PAGE_SIZE, PAGE_SIZE, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW, 313 PAGE_SIZE, PAGE_SIZE, BUS_DMA_WAITOK | BUS_DMA_ALLOCNOW,
305 &sc->sc_reqs[i].req_dmamap) != 0) { 314 &sc->sc_reqs[i].req_dmamap) != 0) {
306 aprint_error_dev(self, "can't alloc dma maps\n"); 315 aprint_error_dev(self, "can't alloc dma maps\n");
307 return; 316 return;
308 } 317 }
309 } 318 }
310 319
 320 if (uvm_km_kmem_alloc(kmem_va_arena,
 321 MAXPHYS, VM_SLEEP | VM_INSTANTFIT, &sc->sc_unalign_buffer) != 0) {
 322 aprint_error_dev(self, "can't alloc align buffer\n");
 323 return;
 324 }
 325 sc->sc_unalign_free = true;
 326
311 /* resume shared structures and tell backend that we are ready */ 327 /* resume shared structures and tell backend that we are ready */
312 if (xbd_xenbus_resume(self, PMF_Q_NONE) == false) { 328 if (xbd_xenbus_resume(self, PMF_Q_NONE) == false) {
313 uvm_km_free(kernel_map, (vaddr_t)ring, PAGE_SIZE, 329 uvm_km_free(kernel_map, (vaddr_t)ring, PAGE_SIZE,
314 UVM_KMF_WIRED); 330 UVM_KMF_WIRED);
315 return; 331 return;
316 } 332 }
317 333
318 if (!pmf_device_register(self, xbd_xenbus_suspend, xbd_xenbus_resume)) 334 if (!pmf_device_register(self, xbd_xenbus_suspend, xbd_xenbus_resume))
319 aprint_error_dev(self, "couldn't establish power handler\n"); 335 aprint_error_dev(self, "couldn't establish power handler\n");
320 336
321} 337}
322 338
323static int 339static int
@@ -397,29 +413,36 @@ xbd_xenbus_detach(device_t dev, int flag @@ -397,29 +413,36 @@ xbd_xenbus_detach(device_t dev, int flag
397 413
398 xengnt_revoke_access(sc->sc_ring_gntref); 414 xengnt_revoke_access(sc->sc_ring_gntref);
399 uvm_km_free(kernel_map, (vaddr_t)sc->sc_ring.sring, 415 uvm_km_free(kernel_map, (vaddr_t)sc->sc_ring.sring,
400 PAGE_SIZE, UVM_KMF_WIRED); 416 PAGE_SIZE, UVM_KMF_WIRED);
401 417
402 for (i = 0; i < XBD_RING_SIZE; i++) { 418 for (i = 0; i < XBD_RING_SIZE; i++) {
403 if (sc->sc_reqs[i].req_dmamap != NULL) { 419 if (sc->sc_reqs[i].req_dmamap != NULL) {
404 bus_dmamap_destroy(sc->sc_xbusd->xbusd_dmat, 420 bus_dmamap_destroy(sc->sc_xbusd->xbusd_dmat,
405 sc->sc_reqs[i].req_dmamap); 421 sc->sc_reqs[i].req_dmamap);
406 sc->sc_reqs[i].req_dmamap = NULL; 422 sc->sc_reqs[i].req_dmamap = NULL;
407 } 423 }
408 } 424 }
409 425
 426 if (sc->sc_unalign_buffer != 0) {
 427 uvm_km_kmem_free(kmem_va_arena, sc->sc_unalign_buffer, MAXPHYS);
 428 sc->sc_unalign_buffer = 0;
 429 }
 430
410 mutex_destroy(&sc->sc_lock); 431 mutex_destroy(&sc->sc_lock);
411 432
412 evcnt_detach(&sc->sc_cnt_map_unalign); 433 evcnt_detach(&sc->sc_cnt_map_unalign);
 434 evcnt_detach(&sc->sc_cnt_unalign_busy);
 435 evcnt_detach(&sc->sc_cnt_queue_full);
413 436
414 pmf_device_deregister(dev); 437 pmf_device_deregister(dev);
415 438
416 return 0; 439 return 0;
417} 440}
418 441
419static bool 442static bool
420xbd_xenbus_suspend(device_t dev, const pmf_qual_t *qual) { 443xbd_xenbus_suspend(device_t dev, const pmf_qual_t *qual) {
421 444
422 struct xbd_xenbus_softc *sc; 445 struct xbd_xenbus_softc *sc;
423 446
424 sc = device_private(dev); 447 sc = device_private(dev);
425 448
@@ -730,27 +753,27 @@ again: @@ -730,27 +753,27 @@ again:
730 */ 753 */
731 KASSERT(xengnt_status(xbdreq->req_gntref[seg]) == 0); 754 KASSERT(xengnt_status(xbdreq->req_gntref[seg]) == 0);
732 xengnt_revoke_access(xbdreq->req_gntref[seg]); 755 xengnt_revoke_access(xbdreq->req_gntref[seg]);
733 } 756 }
734 757
735 bus_dmamap_unload(sc->sc_xbusd->xbusd_dmat, xbdreq->req_dmamap); 758 bus_dmamap_unload(sc->sc_xbusd->xbusd_dmat, xbdreq->req_dmamap);
736 759
737 bp = xbdreq->req_bp; 760 bp = xbdreq->req_bp;
738 KASSERT(bp != NULL && bp->b_data != NULL); 761 KASSERT(bp != NULL && bp->b_data != NULL);
739 DPRINTF(("%s(%p): b_bcount = %ld\n", __func__, 762 DPRINTF(("%s(%p): b_bcount = %ld\n", __func__,
740 bp, (long)bp->b_bcount)); 763 bp, (long)bp->b_bcount));
741 764
742 if (__predict_false(bp->b_data != xbdreq->req_data)) 765 if (__predict_false(bp->b_data != xbdreq->req_data))
743 xbd_unmap_align(xbdreq); 766 xbd_unmap_align(sc, xbdreq, true);
744 xbdreq->req_bp = xbdreq->req_data = NULL; 767 xbdreq->req_bp = xbdreq->req_data = NULL;
745 768
746 /* b_resid was set in dk_start, only override on error */ 769 /* b_resid was set in dk_start, only override on error */
747 if (rep->status != BLKIF_RSP_OKAY) { 770 if (rep->status != BLKIF_RSP_OKAY) {
748 bp->b_error = EIO; 771 bp->b_error = EIO;
749 bp->b_resid = bp->b_bcount; 772 bp->b_resid = bp->b_bcount;
750 } 773 }
751 774
752 dk_done(&sc->sc_dksc, bp); 775 dk_done(&sc->sc_dksc, bp);
753 776
754 SLIST_INSERT_HEAD(&sc->sc_xbdreq_head, xbdreq, req_next); 777 SLIST_INSERT_HEAD(&sc->sc_xbdreq_head, xbdreq, req_next);
755 } 778 }
756 779
@@ -995,50 +1018,51 @@ xbd_diskstart(device_t self, struct buf  @@ -995,50 +1018,51 @@ xbd_diskstart(device_t self, struct buf
995 } 1018 }
996 1019
997 if (__predict_false( 1020 if (__predict_false(
998 sc->sc_backend_status == BLKIF_STATE_SUSPENDED)) { 1021 sc->sc_backend_status == BLKIF_STATE_SUSPENDED)) {
999 /* device is suspended, do not consume buffer */ 1022 /* device is suspended, do not consume buffer */
1000 DPRINTF(("%s: (xbd_diskstart) device suspended\n", 1023 DPRINTF(("%s: (xbd_diskstart) device suspended\n",
1001 sc->sc_dksc.sc_xname)); 1024 sc->sc_dksc.sc_xname));
1002 error = EAGAIN; 1025 error = EAGAIN;
1003 goto out; 1026 goto out;
1004 } 1027 }
1005 1028
1006 xbdreq = SLIST_FIRST(&sc->sc_xbdreq_head); 1029 xbdreq = SLIST_FIRST(&sc->sc_xbdreq_head);
1007 if (__predict_false(xbdreq == NULL)) { 1030 if (__predict_false(xbdreq == NULL)) {
 1031 sc->sc_cnt_queue_full.ev_count++;
1008 DPRINTF(("xbd_diskstart: no req\n")); 1032 DPRINTF(("xbd_diskstart: no req\n"));
1009 error = EAGAIN; 1033 error = EAGAIN;
1010 goto out; 1034 goto out;
1011 } 1035 }
1012 1036
1013 xbdreq->req_bp = bp; 1037 xbdreq->req_bp = bp;
1014 xbdreq->req_data = bp->b_data; 1038 xbdreq->req_data = bp->b_data;
1015 if (__predict_false((vaddr_t)bp->b_data & (XEN_BSIZE - 1))) { 1039 if (__predict_false((vaddr_t)bp->b_data & (XEN_BSIZE - 1))) {
1016 /* Only can get here if this is physio() request */ 1040 /* Only can get here if this is physio() request */
1017 KASSERT(bp->b_saveaddr != NULL); 1041 KASSERT(bp->b_saveaddr != NULL);
1018 1042
1019 sc->sc_cnt_map_unalign.ev_count++; 1043 sc->sc_cnt_map_unalign.ev_count++;
1020 1044
1021 if (__predict_false(xbd_map_align(xbdreq) != 0)) { 1045 if (__predict_false(xbd_map_align(sc, xbdreq) != 0)) {
1022 DPRINTF(("xbd_diskstart: no align\n")); 1046 DPRINTF(("xbd_diskstart: no align\n"));
1023 error = EAGAIN; 1047 error = EAGAIN;
1024 goto out; 1048 goto out;
1025 } 1049 }
1026 } 1050 }
1027 1051
1028 if (__predict_false(bus_dmamap_load(sc->sc_xbusd->xbusd_dmat, 1052 if (__predict_false(bus_dmamap_load(sc->sc_xbusd->xbusd_dmat,
1029 xbdreq->req_dmamap, xbdreq->req_data, bp->b_bcount, NULL, 1053 xbdreq->req_dmamap, xbdreq->req_data, bp->b_bcount, NULL,
1030 BUS_DMA_NOWAIT) != 0)) { 1054 BUS_DMA_NOWAIT) != 0)) {
1031 printf("%s: %s: xengnt_grant_access failed", 1055 printf("%s: %s: bus_dmamap_load failed",
1032 device_xname(sc->sc_dksc.sc_dev), __func__); 1056 device_xname(sc->sc_dksc.sc_dev), __func__);
1033 error = EINVAL; 1057 error = EINVAL;
1034 goto out; 1058 goto out;
1035 } 1059 }
1036 1060
1037 /* We are now committed to the transfer */ 1061 /* We are now committed to the transfer */
1038 SLIST_REMOVE_HEAD(&sc->sc_xbdreq_head, req_next); 1062 SLIST_REMOVE_HEAD(&sc->sc_xbdreq_head, req_next);
1039 req = RING_GET_REQUEST(&sc->sc_ring, sc->sc_ring.req_prod_pvt); 1063 req = RING_GET_REQUEST(&sc->sc_ring, sc->sc_ring.req_prod_pvt);
1040 req->id = xbdreq->req_id; 1064 req->id = xbdreq->req_id;
1041 req->operation = 1065 req->operation =
1042 bp->b_flags & B_READ ? BLKIF_OP_READ : BLKIF_OP_WRITE; 1066 bp->b_flags & B_READ ? BLKIF_OP_READ : BLKIF_OP_WRITE;
1043 req->sector_number = bp->b_rawblkno; 1067 req->sector_number = bp->b_rawblkno;
1044 req->handle = sc->sc_handle; 1068 req->handle = sc->sc_handle;
@@ -1061,57 +1085,60 @@ xbd_diskstart(device_t self, struct buf  @@ -1061,57 +1085,60 @@ xbd_diskstart(device_t self, struct buf
1061 sc->sc_xbusd->xbusd_otherend_id, 1085 sc->sc_xbusd->xbusd_otherend_id,
1062 (ma & ~PAGE_MASK), (bp->b_flags & B_READ) == 0, 1086 (ma & ~PAGE_MASK), (bp->b_flags & B_READ) == 0,
1063 &xbdreq->req_gntref[seg]))) { 1087 &xbdreq->req_gntref[seg]))) {
1064 printf("%s: %s: xengnt_grant_access failed", 1088 printf("%s: %s: xengnt_grant_access failed",
1065 device_xname(sc->sc_dksc.sc_dev), __func__); 1089 device_xname(sc->sc_dksc.sc_dev), __func__);
1066 if (seg > 0) { 1090 if (seg > 0) {
1067 for (; --seg >= 0; ) { 1091 for (; --seg >= 0; ) {
1068 xengnt_revoke_access( 1092 xengnt_revoke_access(
1069 xbdreq->req_gntref[seg]); 1093 xbdreq->req_gntref[seg]);
1070 } 1094 }
1071 } 1095 }
1072 bus_dmamap_unload(sc->sc_xbusd->xbusd_dmat, 1096 bus_dmamap_unload(sc->sc_xbusd->xbusd_dmat,
1073 xbdreq->req_dmamap); 1097 xbdreq->req_dmamap);
 1098 if (__predict_false(bp->b_data != xbdreq->req_data))
 1099 xbd_unmap_align(sc, xbdreq, false);
1074 SLIST_INSERT_HEAD(&sc->sc_xbdreq_head, xbdreq, 1100 SLIST_INSERT_HEAD(&sc->sc_xbdreq_head, xbdreq,
1075 req_next); 1101 req_next);
1076 error = EFAULT; 1102 error = EFAULT;
1077 goto out; 1103 goto out;
1078 } 1104 }
1079 1105
1080 req->seg[seg].gref = xbdreq->req_gntref[seg]; 1106 req->seg[seg].gref = xbdreq->req_gntref[seg];
1081 } 1107 }
1082 req->nr_segments = seg; 1108 req->nr_segments = seg;
1083 sc->sc_ring.req_prod_pvt++; 1109 sc->sc_ring.req_prod_pvt++;
1084 1110
1085 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_ring, notify); 1111 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&sc->sc_ring, notify);
1086 if (notify) 1112 if (notify)
1087 hypervisor_notify_via_evtchn(sc->sc_evtchn); 1113 hypervisor_notify_via_evtchn(sc->sc_evtchn);
1088 1114
1089out: 1115out:
1090 mutex_exit(&sc->sc_lock); 1116 mutex_exit(&sc->sc_lock);
1091 return error; 1117 return error;
1092} 1118}
1093 1119
1094static int 1120static int
1095xbd_map_align(struct xbd_req *req) 1121xbd_map_align(struct xbd_xenbus_softc *sc, struct xbd_req *req)
1096{ 1122{
1097 int rc; 1123 if (!sc->sc_unalign_free) {
 1124 sc->sc_cnt_unalign_busy.ev_count++;
 1125 return EAGAIN;
 1126 }
 1127 sc->sc_unalign_free = false;
1098 1128
1099 rc = uvm_km_kmem_alloc(kmem_va_arena, 1129 KASSERT(req->req_bp->b_bcount < MAXPHYS);
1100 req->req_bp->b_bcount, (VM_NOSLEEP | VM_INSTANTFIT), 1130 req->req_data = (void *)sc->sc_unalign_buffer;
1101 (vmem_addr_t *)&req->req_data); 
1102 if (__predict_false(rc != 0)) 
1103 return ENOMEM; 
1104 if ((req->req_bp->b_flags & B_READ) == 0) 1131 if ((req->req_bp->b_flags & B_READ) == 0)
1105 memcpy(req->req_data, req->req_bp->b_data, 1132 memcpy(req->req_data, req->req_bp->b_data,
1106 req->req_bp->b_bcount); 1133 req->req_bp->b_bcount);
1107 return 0; 1134 return 0;
1108} 1135}
1109 1136
1110static void 1137static void
1111xbd_unmap_align(struct xbd_req *req) 1138xbd_unmap_align(struct xbd_xenbus_softc *sc, struct xbd_req *req, bool sync)
1112{ 1139{
1113 if (req->req_bp->b_flags & B_READ) 1140 if (sync && req->req_bp->b_flags & B_READ)
1114 memcpy(req->req_bp->b_data, req->req_data, 1141 memcpy(req->req_bp->b_data, req->req_data,
1115 req->req_bp->b_bcount); 1142 req->req_bp->b_bcount);
1116 uvm_km_kmem_free(kmem_va_arena, (vaddr_t)req->req_data, req->req_bp->b_bcount); 1143 sc->sc_unalign_free = true;
1117} 1144}