Thu Jul 27 17:56:31 2023 UTC ()
Pull up following revision(s) (requested by bouyer in ticket #248):

	sys/arch/xen/xen/xbd_xenbus.c: revision 1.132 (patch)
	sys/arch/xen/xen/xbd_xenbus.c: revision 1.133 (patch)
	sys/arch/xen/xen/xbd_xenbus.c: revision 1.134 (patch)

The disk size reported in the xenstore is always in XEN_BSIZE units,
not sector-size. Should fix the issue reported by Christian Kujau
on netbsd-users and port-xen.

Also use XEN_BSIZE when computing the number of bytes for format_bytes().
While there note in a comment that sc_sectors is in XEN_BSIZE units

Propoerly handle 4k sector size backends:
- report the backend's sector size to upper layers, not DEV_BSIZE.
  Adjust the number of sectors accordingly.
- Use sc_secsize instead of XEN_BSIZE where appropriate. The sectors numbers
  in I/O requests are still in XEN_BSIZE units, but must be a multiple
  of sc_secsize/XEN_BSIZE.
- As a consequence of previous, the buffer has to be aligned to sc_secsize,
  aligned to XEN_BSIZE may not be enough. This means that we may have to
  xbd_map_align() more buffer, including some without B_PHYS set.
- Add some more DPRINTF lines, related to I/O requests

Tested with a linux dom0.

thanks to Christian Kujau for providing access to his hardware for testing
and debugging.


(martin)
diff -r1.129 -r1.129.20.1 src/sys/arch/xen/xen/xbd_xenbus.c

cvs diff -r1.129 -r1.129.20.1 src/sys/arch/xen/xen/xbd_xenbus.c (expand / switch to unified diff)

--- src/sys/arch/xen/xen/xbd_xenbus.c 2020/07/13 21:21:56 1.129
+++ src/sys/arch/xen/xen/xbd_xenbus.c 2023/07/27 17:56:31 1.129.20.1
@@ -1,14 +1,14 @@ @@ -1,14 +1,14 @@
1/* $NetBSD: xbd_xenbus.c,v 1.129 2020/07/13 21:21:56 jdolecek Exp $ */ 1/* $NetBSD: xbd_xenbus.c,v 1.129.20.1 2023/07/27 17:56:31 martin Exp $ */
2 2
3/* 3/*
4 * Copyright (c) 2006 Manuel Bouyer. 4 * Copyright (c) 2006 Manuel Bouyer.
5 * 5 *
6 * Redistribution and use in source and binary forms, with or without 6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions 7 * modification, are permitted provided that the following conditions
8 * are met: 8 * are met:
9 * 1. Redistributions of source code must retain the above copyright 9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer. 10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright 11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the 12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution. 13 * documentation and/or other materials provided with the distribution.
14 * 14 *
@@ -40,27 +40,27 @@ @@ -40,27 +40,27 @@
40 * - initiate request: xbdread/write/open/ioctl/.. 40 * - initiate request: xbdread/write/open/ioctl/..
41 * - depending on operation, it is handled directly by disk(9) subsystem or 41 * - depending on operation, it is handled directly by disk(9) subsystem or
42 * goes through physio(9) first. 42 * goes through physio(9) first.
43 * - the request is ultimately processed by xbd_diskstart() that prepares the 43 * - the request is ultimately processed by xbd_diskstart() that prepares the
44 * xbd requests, post them in the ring I/O queue, then signal the backend. 44 * xbd requests, post them in the ring I/O queue, then signal the backend.
45 * 45 *
46 * When a response is available in the queue, the backend signals the frontend 46 * When a response is available in the queue, the backend signals the frontend
47 * via its event channel. This triggers xbd_handler(), which will link back 47 * via its event channel. This triggers xbd_handler(), which will link back
48 * the response to its request through the request ID, and mark the I/O as 48 * the response to its request through the request ID, and mark the I/O as
49 * completed. 49 * completed.
50 */ 50 */
51 51
52#include <sys/cdefs.h> 52#include <sys/cdefs.h>
53__KERNEL_RCSID(0, "$NetBSD: xbd_xenbus.c,v 1.129 2020/07/13 21:21:56 jdolecek Exp $"); 53__KERNEL_RCSID(0, "$NetBSD: xbd_xenbus.c,v 1.129.20.1 2023/07/27 17:56:31 martin Exp $");
54 54
55#include "opt_xen.h" 55#include "opt_xen.h"
56 56
57 57
58#include <sys/param.h> 58#include <sys/param.h>
59#include <sys/buf.h> 59#include <sys/buf.h>
60#include <sys/bufq.h> 60#include <sys/bufq.h>
61#include <sys/device.h> 61#include <sys/device.h>
62#include <sys/disk.h> 62#include <sys/disk.h>
63#include <sys/disklabel.h> 63#include <sys/disklabel.h>
64#include <sys/conf.h> 64#include <sys/conf.h>
65#include <sys/fcntl.h> 65#include <sys/fcntl.h>
66#include <sys/kernel.h> 66#include <sys/kernel.h>
@@ -159,27 +159,27 @@ struct xbd_xenbus_softc { @@ -159,27 +159,27 @@ struct xbd_xenbus_softc {
159 vmem_addr_t sc_unalign_buffer; 159 vmem_addr_t sc_unalign_buffer;
160 void *sc_unalign_used; 160 void *sc_unalign_used;
161 161
162 int sc_backend_status; /* our status with backend */ 162 int sc_backend_status; /* our status with backend */
163#define BLKIF_STATE_DISCONNECTED 0 163#define BLKIF_STATE_DISCONNECTED 0
164#define BLKIF_STATE_CONNECTED 1 164#define BLKIF_STATE_CONNECTED 1
165#define BLKIF_STATE_SUSPENDED 2 165#define BLKIF_STATE_SUSPENDED 2
166 166
167 int sc_shutdown; 167 int sc_shutdown;
168#define BLKIF_SHUTDOWN_RUN 0 /* no shutdown */ 168#define BLKIF_SHUTDOWN_RUN 0 /* no shutdown */
169#define BLKIF_SHUTDOWN_REMOTE 1 /* backend-initiated shutdown in progress */ 169#define BLKIF_SHUTDOWN_REMOTE 1 /* backend-initiated shutdown in progress */
170#define BLKIF_SHUTDOWN_LOCAL 2 /* locally-initiated shutdown in progress */ 170#define BLKIF_SHUTDOWN_LOCAL 2 /* locally-initiated shutdown in progress */
171 171
172 uint64_t sc_sectors; /* number of sectors for this device */ 172 uint64_t sc_sectors; /* number of sc_secsize sectors for this device */
173 u_long sc_secsize; /* sector size */ 173 u_long sc_secsize; /* sector size */
174 uint64_t sc_xbdsize; /* size of disk in DEV_BSIZE */ 174 uint64_t sc_xbdsize; /* size of disk in DEV_BSIZE */
175 u_long sc_info; /* VDISK_* */ 175 u_long sc_info; /* VDISK_* */
176 u_long sc_handle; /* from backend */ 176 u_long sc_handle; /* from backend */
177 int sc_features; 177 int sc_features;
178#define BLKIF_FEATURE_CACHE_FLUSH 0x1 178#define BLKIF_FEATURE_CACHE_FLUSH 0x1
179#define BLKIF_FEATURE_BARRIER 0x2 179#define BLKIF_FEATURE_BARRIER 0x2
180#define BLKIF_FEATURE_PERSISTENT 0x4 180#define BLKIF_FEATURE_PERSISTENT 0x4
181#define BLKIF_FEATURE_INDIRECT 0x8 181#define BLKIF_FEATURE_INDIRECT 0x8
182#define BLKIF_FEATURE_BITS \ 182#define BLKIF_FEATURE_BITS \
183 "\20\1CACHE-FLUSH\2BARRIER\3PERSISTENT\4INDIRECT" 183 "\20\1CACHE-FLUSH\2BARRIER\3PERSISTENT\4INDIRECT"
184 struct evcnt sc_cnt_map_unalign; 184 struct evcnt sc_cnt_map_unalign;
185 struct evcnt sc_cnt_unalign_busy; 185 struct evcnt sc_cnt_unalign_busy;
@@ -668,45 +668,44 @@ xbd_backend_changed(void *arg, XenbusSta @@ -668,45 +668,44 @@ xbd_backend_changed(void *arg, XenbusSta
668 668
669 if (sc->sc_backend_status == BLKIF_STATE_CONNECTED || 669 if (sc->sc_backend_status == BLKIF_STATE_CONNECTED ||
670 sc->sc_backend_status == BLKIF_STATE_SUSPENDED) 670 sc->sc_backend_status == BLKIF_STATE_SUSPENDED)
671 /* already connected */ 671 /* already connected */
672 return; 672 return;
673 673
674 xbd_connect(sc); 674 xbd_connect(sc);
675 sc->sc_shutdown = BLKIF_SHUTDOWN_RUN; 675 sc->sc_shutdown = BLKIF_SHUTDOWN_RUN;
676 sc->sc_xbdsize = 676 sc->sc_xbdsize =
677 sc->sc_sectors * (uint64_t)sc->sc_secsize / DEV_BSIZE; 677 sc->sc_sectors * (uint64_t)sc->sc_secsize / DEV_BSIZE;
678 dg = &sc->sc_dksc.sc_dkdev.dk_geom; 678 dg = &sc->sc_dksc.sc_dkdev.dk_geom;
679 memset(dg, 0, sizeof(*dg));  679 memset(dg, 0, sizeof(*dg));
680 680
681 dg->dg_secperunit = sc->sc_xbdsize; 681 dg->dg_secperunit = sc->sc_sectors;
682 dg->dg_secsize = DEV_BSIZE; 682 dg->dg_secsize = sc->sc_secsize;
683 dg->dg_ntracks = 1; 683 dg->dg_ntracks = 1;
684 // XXX: Ok to hard-code DEV_BSIZE? 684 dg->dg_nsectors = (1024 * 1024) / dg->dg_secsize;
685 dg->dg_nsectors = 1024 * (1024 / dg->dg_secsize); 
686 dg->dg_ncylinders = dg->dg_secperunit / dg->dg_nsectors; 685 dg->dg_ncylinders = dg->dg_secperunit / dg->dg_nsectors;
687 686
688 bufq_alloc(&sc->sc_dksc.sc_bufq, "fcfs", 0); 687 bufq_alloc(&sc->sc_dksc.sc_bufq, "fcfs", 0);
689 dk_attach(&sc->sc_dksc); 688 dk_attach(&sc->sc_dksc);
690 disk_attach(&sc->sc_dksc.sc_dkdev); 689 disk_attach(&sc->sc_dksc.sc_dkdev);
691 690
692 sc->sc_backend_status = BLKIF_STATE_CONNECTED; 691 sc->sc_backend_status = BLKIF_STATE_CONNECTED;
693 hypervisor_unmask_event(sc->sc_evtchn); 692 hypervisor_unmask_event(sc->sc_evtchn);
694 693
695 format_bytes(buf, uimin(9, sizeof(buf)), 694 format_bytes(buf, uimin(9, sizeof(buf)),
696 sc->sc_sectors * sc->sc_secsize); 695 sc->sc_sectors * dg->dg_secsize);
697 aprint_normal_dev(sc->sc_dksc.sc_dev, 696 aprint_normal_dev(sc->sc_dksc.sc_dev,
698 "%s, %d bytes/sect x %" PRIu64 " sectors\n", 697 "%s, %d bytes/sect x %" PRIu64 " sectors\n",
699 buf, (int)dg->dg_secsize, sc->sc_xbdsize); 698 buf, (int)dg->dg_secsize, sc->sc_sectors);
700 snprintb(buf, sizeof(buf), BLKIF_FEATURE_BITS, 699 snprintb(buf, sizeof(buf), BLKIF_FEATURE_BITS,
701 sc->sc_features); 700 sc->sc_features);
702 aprint_normal_dev(sc->sc_dksc.sc_dev, 701 aprint_normal_dev(sc->sc_dksc.sc_dev,
703 "backend features %s\n", buf); 702 "backend features %s\n", buf);
704 703
705 /* Discover wedges on this disk. */ 704 /* Discover wedges on this disk. */
706 dkwedge_discover(&sc->sc_dksc.sc_dkdev); 705 dkwedge_discover(&sc->sc_dksc.sc_dkdev);
707 706
708 disk_set_info(sc->sc_dksc.sc_dev, &sc->sc_dksc.sc_dkdev, NULL); 707 disk_set_info(sc->sc_dksc.sc_dev, &sc->sc_dksc.sc_dkdev, NULL);
709 708
710 /* the disk should be working now */ 709 /* the disk should be working now */
711 config_pending_decr(sc->sc_dksc.sc_dev); 710 config_pending_decr(sc->sc_dksc.sc_dev);
712 break; 711 break;
@@ -729,46 +728,45 @@ xbd_connect(struct xbd_xenbus_softc *sc) @@ -729,46 +728,45 @@ xbd_connect(struct xbd_xenbus_softc *sc)
729 err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend, 728 err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend,
730 "feature-persistent", &val, 10); 729 "feature-persistent", &val, 10);
731 if (err) 730 if (err)
732 val = 0; 731 val = 0;
733 if (val > 0) 732 if (val > 0)
734 sc->sc_features |= BLKIF_FEATURE_PERSISTENT; 733 sc->sc_features |= BLKIF_FEATURE_PERSISTENT;
735 734
736 err = xenbus_read_ul(NULL, 735 err = xenbus_read_ul(NULL,
737 sc->sc_xbusd->xbusd_path, "virtual-device", &sc->sc_handle, 10); 736 sc->sc_xbusd->xbusd_path, "virtual-device", &sc->sc_handle, 10);
738 if (err) 737 if (err)
739 panic("%s: can't read number from %s/virtual-device\n",  738 panic("%s: can't read number from %s/virtual-device\n",
740 device_xname(sc->sc_dksc.sc_dev), 739 device_xname(sc->sc_dksc.sc_dev),
741 sc->sc_xbusd->xbusd_otherend); 740 sc->sc_xbusd->xbusd_otherend);
742 err = xenbus_read_ull(NULL, 
743 sc->sc_xbusd->xbusd_otherend, "sectors", &sectors, 10); 
744 if (err) 
745 panic("%s: can't read number from %s/sectors\n",  
746 device_xname(sc->sc_dksc.sc_dev), 
747 sc->sc_xbusd->xbusd_otherend); 
748 sc->sc_sectors = sectors; 
749 
750 err = xenbus_read_ul(NULL, 741 err = xenbus_read_ul(NULL,
751 sc->sc_xbusd->xbusd_otherend, "info", &sc->sc_info, 10); 742 sc->sc_xbusd->xbusd_otherend, "info", &sc->sc_info, 10);
752 if (err) 743 if (err)
753 panic("%s: can't read number from %s/info\n",  744 panic("%s: can't read number from %s/info\n",
754 device_xname(sc->sc_dksc.sc_dev), 745 device_xname(sc->sc_dksc.sc_dev),
755 sc->sc_xbusd->xbusd_otherend); 746 sc->sc_xbusd->xbusd_otherend);
756 err = xenbus_read_ul(NULL, 747 err = xenbus_read_ul(NULL,
757 sc->sc_xbusd->xbusd_otherend, "sector-size", &sc->sc_secsize, 10); 748 sc->sc_xbusd->xbusd_otherend, "sector-size", &sc->sc_secsize, 10);
758 if (err) 749 if (err)
759 panic("%s: can't read number from %s/sector-size\n",  750 panic("%s: can't read number from %s/sector-size\n",
760 device_xname(sc->sc_dksc.sc_dev), 751 device_xname(sc->sc_dksc.sc_dev),
761 sc->sc_xbusd->xbusd_otherend); 752 sc->sc_xbusd->xbusd_otherend);
 753 err = xenbus_read_ull(NULL,
 754 sc->sc_xbusd->xbusd_otherend, "sectors", &sectors, 10);
 755 if (err)
 756 panic("%s: can't read number from %s/sectors\n",
 757 device_xname(sc->sc_dksc.sc_dev),
 758 sc->sc_xbusd->xbusd_otherend);
 759 sc->sc_sectors = sectors * (uint64_t)XEN_BSIZE / sc->sc_secsize;
762 760
763 xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateConnected); 761 xenbus_switch_state(sc->sc_xbusd, NULL, XenbusStateConnected);
764} 762}
765 763
766static void 764static void
767xbd_features(struct xbd_xenbus_softc *sc) 765xbd_features(struct xbd_xenbus_softc *sc)
768{ 766{
769 int err; 767 int err;
770 u_long val; 768 u_long val;
771 769
772 err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend, 770 err = xenbus_read_ul(NULL, sc->sc_xbusd->xbusd_otherend,
773 "feature-flush-cache", &val, 10); 771 "feature-flush-cache", &val, 10);
774 if (err) 772 if (err)
@@ -829,26 +827,28 @@ again: @@ -829,26 +827,28 @@ again:
829 rep->operation != BLKIF_OP_WRITE) { 827 rep->operation != BLKIF_OP_WRITE) {
830 aprint_error_dev(sc->sc_dksc.sc_dev, 828 aprint_error_dev(sc->sc_dksc.sc_dev,
831 "bad operation %d from backend\n", rep->operation); 829 "bad operation %d from backend\n", rep->operation);
832 continue; 830 continue;
833 } 831 }
834 832
835 bp = xbdreq->req_bp; 833 bp = xbdreq->req_bp;
836 xbdreq->req_bp = NULL; 834 xbdreq->req_bp = NULL;
837 KASSERT(bp != NULL && bp->b_data != NULL); 835 KASSERT(bp != NULL && bp->b_data != NULL);
838 DPRINTF(("%s(%p): b_bcount = %ld\n", __func__, 836 DPRINTF(("%s(%p): b_bcount = %ld\n", __func__,
839 bp, (long)bp->b_bcount)); 837 bp, (long)bp->b_bcount));
840 838
841 if (bp->b_error != 0 || rep->status != BLKIF_RSP_OKAY) { 839 if (bp->b_error != 0 || rep->status != BLKIF_RSP_OKAY) {
 840 DPRINTF(("%s: error %d status %d\n", __func__,
 841 bp->b_error, rep->status));
842 bp->b_error = EIO; 842 bp->b_error = EIO;
843 bp->b_resid = bp->b_bcount; 843 bp->b_resid = bp->b_bcount;
844 } 844 }
845 845
846 if (xbdreq->req_parent) { 846 if (xbdreq->req_parent) {
847 struct xbd_req *req_parent = xbdreq->req_parent; 847 struct xbd_req *req_parent = xbdreq->req_parent;
848 848
849 /* Unhook and recycle child */ 849 /* Unhook and recycle child */
850 xbdreq->req_parent = NULL; 850 xbdreq->req_parent = NULL;
851 req_parent->req_child = NULL; 851 req_parent->req_child = NULL;
852 SLIST_INSERT_HEAD(&sc->sc_xbdreq_head, xbdreq, 852 SLIST_INSERT_HEAD(&sc->sc_xbdreq_head, xbdreq,
853 req_next); 853 req_next);
854 854
@@ -1128,27 +1128,27 @@ xbd_diskstart(device_t self, struct buf  @@ -1128,27 +1128,27 @@ xbd_diskstart(device_t self, struct buf
1128 1128
1129 KASSERT(bp->b_bcount <= MAXPHYS); 1129 KASSERT(bp->b_bcount <= MAXPHYS);
1130 1130
1131 DPRINTF(("xbd_diskstart(%p): b_bcount = %ld\n", 1131 DPRINTF(("xbd_diskstart(%p): b_bcount = %ld\n",
1132 bp, (long)bp->b_bcount)); 1132 bp, (long)bp->b_bcount));
1133 1133
1134 mutex_enter(&sc->sc_lock); 1134 mutex_enter(&sc->sc_lock);
1135 1135
1136 if (sc->sc_shutdown != BLKIF_SHUTDOWN_RUN) { 1136 if (sc->sc_shutdown != BLKIF_SHUTDOWN_RUN) {
1137 error = EIO; 1137 error = EIO;
1138 goto out; 1138 goto out;
1139 } 1139 }
1140 1140
1141 if (bp->b_rawblkno < 0 || bp->b_rawblkno > sc->sc_xbdsize) { 1141 if (bp->b_rawblkno < 0 || bp->b_rawblkno > sc->sc_sectors) {
1142 /* invalid block number */ 1142 /* invalid block number */
1143 error = EINVAL; 1143 error = EINVAL;
1144 goto out; 1144 goto out;
1145 } 1145 }
1146 1146
1147 if (__predict_false( 1147 if (__predict_false(
1148 sc->sc_backend_status == BLKIF_STATE_SUSPENDED)) { 1148 sc->sc_backend_status == BLKIF_STATE_SUSPENDED)) {
1149 /* device is suspended, do not consume buffer */ 1149 /* device is suspended, do not consume buffer */
1150 DPRINTF(("%s: (xbd_diskstart) device suspended\n", 1150 DPRINTF(("%s: (xbd_diskstart) device suspended\n",
1151 sc->sc_dksc.sc_xname)); 1151 sc->sc_dksc.sc_xname));
1152 error = EAGAIN; 1152 error = EAGAIN;
1153 goto out; 1153 goto out;
1154 } 1154 }
@@ -1164,27 +1164,27 @@ xbd_diskstart(device_t self, struct buf  @@ -1164,27 +1164,27 @@ xbd_diskstart(device_t self, struct buf
1164 1164
1165 if ((sc->sc_features & BLKIF_FEATURE_INDIRECT) == 0 1165 if ((sc->sc_features & BLKIF_FEATURE_INDIRECT) == 0
1166 && bp->b_bcount > XBD_MAX_CHUNK) { 1166 && bp->b_bcount > XBD_MAX_CHUNK) {
1167 if (!SLIST_NEXT(xbdreq, req_next)) { 1167 if (!SLIST_NEXT(xbdreq, req_next)) {
1168 DPRINTF(("%s: need extra req\n", __func__)); 1168 DPRINTF(("%s: need extra req\n", __func__));
1169 error = EAGAIN; 1169 error = EAGAIN;
1170 goto out; 1170 goto out;
1171 } 1171 }
1172 } 1172 }
1173 1173
1174 bp->b_resid = bp->b_bcount; 1174 bp->b_resid = bp->b_bcount;
1175 xbdreq->req_bp = bp; 1175 xbdreq->req_bp = bp;
1176 xbdreq->req_data = bp->b_data; 1176 xbdreq->req_data = bp->b_data;
1177 if (__predict_false((vaddr_t)bp->b_data & (XEN_BSIZE - 1))) { 1177 if (__predict_false((vaddr_t)bp->b_data & (sc->sc_secsize - 1))) {
1178 if (__predict_false(xbd_map_align(sc, xbdreq) != 0)) { 1178 if (__predict_false(xbd_map_align(sc, xbdreq) != 0)) {
1179 DPRINTF(("xbd_diskstart: no align\n")); 1179 DPRINTF(("xbd_diskstart: no align\n"));
1180 error = EAGAIN; 1180 error = EAGAIN;
1181 goto out; 1181 goto out;
1182 } 1182 }
1183 } 1183 }
1184 1184
1185 if (__predict_false(bus_dmamap_load(sc->sc_xbusd->xbusd_dmat, 1185 if (__predict_false(bus_dmamap_load(sc->sc_xbusd->xbusd_dmat,
1186 xbdreq->req_dmamap, xbdreq->req_data, bp->b_bcount, NULL, 1186 xbdreq->req_dmamap, xbdreq->req_data, bp->b_bcount, NULL,
1187 BUS_DMA_NOWAIT) != 0)) { 1187 BUS_DMA_NOWAIT) != 0)) {
1188 printf("%s: %s: bus_dmamap_load failed\n", 1188 printf("%s: %s: bus_dmamap_load failed\n",
1189 device_xname(sc->sc_dksc.sc_dev), __func__); 1189 device_xname(sc->sc_dksc.sc_dev), __func__);
1190 if (__predict_false(bp->b_data != xbdreq->req_data)) 1190 if (__predict_false(bp->b_data != xbdreq->req_data))
@@ -1265,125 +1265,133 @@ xbd_diskstart_submit(struct xbd_xenbus_s @@ -1265,125 +1265,133 @@ xbd_diskstart_submit(struct xbd_xenbus_s
1265 grant_ref_t *gntref) 1265 grant_ref_t *gntref)
1266{ 1266{
1267 blkif_request_t *req; 1267 blkif_request_t *req;
1268 paddr_t ma; 1268 paddr_t ma;
1269 int nsects, nbytes, dmaseg, first_sect, size, segidx = 0; 1269 int nsects, nbytes, dmaseg, first_sect, size, segidx = 0;
1270 struct blkif_request_segment *reqseg; 1270 struct blkif_request_segment *reqseg;
1271 1271
1272 KASSERT(mutex_owned(&sc->sc_lock)); 1272 KASSERT(mutex_owned(&sc->sc_lock));
1273 1273
1274 req = RING_GET_REQUEST(&sc->sc_ring, sc->sc_ring.req_prod_pvt); 1274 req = RING_GET_REQUEST(&sc->sc_ring, sc->sc_ring.req_prod_pvt);
1275 req->id = req_id; 1275 req->id = req_id;
1276 req->operation = 1276 req->operation =
1277 bp->b_flags & B_READ ? BLKIF_OP_READ : BLKIF_OP_WRITE; 1277 bp->b_flags & B_READ ? BLKIF_OP_READ : BLKIF_OP_WRITE;
1278 req->sector_number = bp->b_rawblkno + (start >> XEN_BSHIFT); 1278 req->sector_number = (bp->b_rawblkno * sc->sc_secsize / XEN_BSIZE) +
 1279 (start >> XEN_BSHIFT);
1279 req->handle = sc->sc_handle; 1280 req->handle = sc->sc_handle;
 1281 DPRINTF(("%s: id %" PRIu64 " op %d sn %" PRIu64 " handle %d\n",
 1282 __func__, req->id, req->operation, req->sector_number,
 1283 req->handle));
1280 1284
1281 size = uimin(bp->b_bcount - start, XBD_MAX_CHUNK);  1285 size = uimin(bp->b_bcount - start, XBD_MAX_CHUNK);
1282 for (dmaseg = 0; dmaseg < dmamap->dm_nsegs && size > 0; dmaseg++) { 1286 for (dmaseg = 0; dmaseg < dmamap->dm_nsegs && size > 0; dmaseg++) {
1283 bus_dma_segment_t *ds = &dmamap->dm_segs[dmaseg]; 1287 bus_dma_segment_t *ds = &dmamap->dm_segs[dmaseg];
1284 1288
1285 ma = ds->ds_addr; 1289 ma = ds->ds_addr;
1286 nbytes = ds->ds_len; 1290 nbytes = ds->ds_len;
1287 1291
1288 if (start > 0) { 1292 if (start > 0) {
1289 if (start >= nbytes) { 1293 if (start >= nbytes) {
1290 start -= nbytes; 1294 start -= nbytes;
1291 continue; 1295 continue;
1292 } 1296 }
1293 ma += start; 1297 ma += start;
1294 nbytes -= start; 1298 nbytes -= start;
1295 start = 0; 1299 start = 0;
1296 } 1300 }
1297 size -= nbytes; 1301 size -= nbytes;
1298 1302
1299 KASSERT(((ma & PAGE_MASK) & (XEN_BSIZE - 1)) == 0); 1303 KASSERT(((ma & PAGE_MASK) & (sc->sc_secsize - 1)) == 0);
1300 KASSERT((nbytes & (XEN_BSIZE - 1)) == 0); 1304 KASSERT((nbytes & (sc->sc_secsize - 1)) == 0);
1301 KASSERT((size & (XEN_BSIZE - 1)) == 0); 1305 KASSERT((size & (sc->sc_secsize - 1)) == 0);
1302 first_sect = (ma & PAGE_MASK) >> XEN_BSHIFT; 1306 first_sect = (ma & PAGE_MASK) >> XEN_BSHIFT;
1303 nsects = nbytes >> XEN_BSHIFT; 1307 nsects = nbytes >> XEN_BSHIFT;
1304 1308
1305 reqseg = &req->seg[segidx++]; 1309 reqseg = &req->seg[segidx++];
1306 reqseg->first_sect = first_sect; 1310 reqseg->first_sect = first_sect;
1307 reqseg->last_sect = first_sect + nsects - 1; 1311 reqseg->last_sect = first_sect + nsects - 1;
1308 KASSERT(reqseg->first_sect <= reqseg->last_sect); 1312 KASSERT(reqseg->first_sect <= reqseg->last_sect);
1309 KASSERT(reqseg->last_sect < (PAGE_SIZE / XEN_BSIZE)); 1313 KASSERT(reqseg->last_sect < (PAGE_SIZE / XEN_BSIZE));
 1314 DPRINTF(("%s: seg %d fs %d ls %d\n", __func__, segidx,
 1315 reqseg->first_sect, reqseg->last_sect));
1310 1316
1311 reqseg->gref = gntref[dmaseg]; 1317 reqseg->gref = gntref[dmaseg];
1312 } 1318 }
1313 KASSERT(segidx > 0); 1319 KASSERT(segidx > 0);
1314 req->nr_segments = segidx; 1320 req->nr_segments = segidx;
1315 sc->sc_ring.req_prod_pvt++; 1321 sc->sc_ring.req_prod_pvt++;
1316} 1322}
1317 1323
1318static void 1324static void
1319xbd_diskstart_submit_indirect(struct xbd_xenbus_softc *sc, 1325xbd_diskstart_submit_indirect(struct xbd_xenbus_softc *sc,
1320 struct xbd_req *xbdreq, struct buf *bp) 1326 struct xbd_req *xbdreq, struct buf *bp)
1321{ 1327{
1322 blkif_request_indirect_t *req; 1328 blkif_request_indirect_t *req;
1323 paddr_t ma; 1329 paddr_t ma;
1324 int nsects, nbytes, dmaseg, first_sect; 1330 int nsects, nbytes, dmaseg, first_sect;
1325 struct blkif_request_segment *reqseg; 1331 struct blkif_request_segment *reqseg;
1326 1332
1327 KASSERT(mutex_owned(&sc->sc_lock)); 1333 KASSERT(mutex_owned(&sc->sc_lock));
1328 1334
1329 req = (blkif_request_indirect_t *)RING_GET_REQUEST(&sc->sc_ring, 1335 req = (blkif_request_indirect_t *)RING_GET_REQUEST(&sc->sc_ring,
1330 sc->sc_ring.req_prod_pvt); 1336 sc->sc_ring.req_prod_pvt);
1331 req->id = xbdreq->req_id; 1337 req->id = xbdreq->req_id;
1332 req->operation = BLKIF_OP_INDIRECT; 1338 req->operation = BLKIF_OP_INDIRECT;
1333 req->indirect_op = 1339 req->indirect_op =
1334 bp->b_flags & B_READ ? BLKIF_OP_READ : BLKIF_OP_WRITE; 1340 bp->b_flags & B_READ ? BLKIF_OP_READ : BLKIF_OP_WRITE;
1335 req->sector_number = bp->b_rawblkno; 1341 req->sector_number = bp->b_rawblkno * sc->sc_secsize / XEN_BSIZE;
1336 req->handle = sc->sc_handle; 1342 req->handle = sc->sc_handle;
 1343 DPRINTF(("%s: id %" PRIu64 " op %d sn %" PRIu64 " handle %d\n",
 1344 __func__, req->id, req->indirect_op, req->sector_number,
 1345 req->handle));
1337 1346
1338 xbdreq->req_indirect = SLIST_FIRST(&sc->sc_indirect_head); 1347 xbdreq->req_indirect = SLIST_FIRST(&sc->sc_indirect_head);
1339 KASSERT(xbdreq->req_indirect != NULL); /* always as many as reqs */ 1348 KASSERT(xbdreq->req_indirect != NULL); /* always as many as reqs */
1340 SLIST_REMOVE_HEAD(&sc->sc_indirect_head, in_next); 1349 SLIST_REMOVE_HEAD(&sc->sc_indirect_head, in_next);
1341 req->indirect_grefs[0] = xbdreq->req_indirect->in_gntref; 1350 req->indirect_grefs[0] = xbdreq->req_indirect->in_gntref;
1342 1351
1343 reqseg = xbdreq->req_indirect->in_addr; 1352 reqseg = xbdreq->req_indirect->in_addr;
1344 for (dmaseg = 0; dmaseg < xbdreq->req_dmamap->dm_nsegs; dmaseg++) { 1353 for (dmaseg = 0; dmaseg < xbdreq->req_dmamap->dm_nsegs; dmaseg++) {
1345 bus_dma_segment_t *ds = &xbdreq->req_dmamap->dm_segs[dmaseg]; 1354 bus_dma_segment_t *ds = &xbdreq->req_dmamap->dm_segs[dmaseg];
1346 1355
1347 ma = ds->ds_addr; 1356 ma = ds->ds_addr;
1348 nbytes = ds->ds_len; 1357 nbytes = ds->ds_len;
1349 1358
 1359 KASSERT(((ma & PAGE_MASK) & (sc->sc_secsize - 1)) == 0);
 1360 KASSERT((nbytes & (sc->sc_secsize - 1)) == 0);
 1361
1350 first_sect = (ma & PAGE_MASK) >> XEN_BSHIFT; 1362 first_sect = (ma & PAGE_MASK) >> XEN_BSHIFT;
1351 nsects = nbytes >> XEN_BSHIFT; 1363 nsects = nbytes >> XEN_BSHIFT;
1352 1364
1353 reqseg->first_sect = first_sect; 1365 reqseg->first_sect = first_sect;
1354 reqseg->last_sect = first_sect + nsects - 1; 1366 reqseg->last_sect = first_sect + nsects - 1;
1355 reqseg->gref = xbdreq->req_gntref[dmaseg]; 1367 reqseg->gref = xbdreq->req_gntref[dmaseg];
 1368 DPRINTF(("%s: seg %d fs %d ls %d\n", __func__, dmaseg,
 1369 reqseg->first_sect, reqseg->last_sect));
1356 1370
1357 KASSERT(reqseg->first_sect <= reqseg->last_sect); 1371 KASSERT(reqseg->first_sect <= reqseg->last_sect);
1358 KASSERT(reqseg->last_sect < (PAGE_SIZE / XEN_BSIZE)); 1372 KASSERT(reqseg->last_sect < (PAGE_SIZE / XEN_BSIZE));
1359 1373
1360 reqseg++; 1374 reqseg++;
1361 } 1375 }
1362 req->nr_segments = dmaseg; 1376 req->nr_segments = dmaseg;
1363 sc->sc_ring.req_prod_pvt++; 1377 sc->sc_ring.req_prod_pvt++;
1364 1378
1365 sc->sc_cnt_indirect.ev_count++; 1379 sc->sc_cnt_indirect.ev_count++;
1366} 1380}
1367 1381
1368static int 1382static int
1369xbd_map_align(struct xbd_xenbus_softc *sc, struct xbd_req *req) 1383xbd_map_align(struct xbd_xenbus_softc *sc, struct xbd_req *req)
1370{ 1384{
1371 /* 
1372 * Only can get here if this is physio() request, block I/O 
1373 * uses DEV_BSIZE-aligned buffers. 
1374 */ 
1375 KASSERT((req->req_bp->b_flags & B_PHYS) != 0); 
1376 
1377 sc->sc_cnt_map_unalign.ev_count++; 1385 sc->sc_cnt_map_unalign.ev_count++;
1378 1386
1379 if (sc->sc_unalign_used) { 1387 if (sc->sc_unalign_used) {
1380 sc->sc_cnt_unalign_busy.ev_count++; 1388 sc->sc_cnt_unalign_busy.ev_count++;
1381 return EAGAIN; 1389 return EAGAIN;
1382 } 1390 }
1383 sc->sc_unalign_used = req->req_bp; 1391 sc->sc_unalign_used = req->req_bp;
1384 1392
1385 KASSERT(req->req_bp->b_bcount <= MAXPHYS); 1393 KASSERT(req->req_bp->b_bcount <= MAXPHYS);
1386 req->req_data = (void *)sc->sc_unalign_buffer; 1394 req->req_data = (void *)sc->sc_unalign_buffer;
1387 if ((req->req_bp->b_flags & B_READ) == 0) 1395 if ((req->req_bp->b_flags & B_READ) == 0)
1388 memcpy(req->req_data, req->req_bp->b_data, 1396 memcpy(req->req_data, req->req_bp->b_data,
1389 req->req_bp->b_bcount); 1397 req->req_bp->b_bcount);