| @@ -1,410 +1,421 @@ | | | @@ -1,410 +1,421 @@ |
1 | /* $NetBSD: rf_diskqueue.c,v 1.58 2020/06/19 19:32:03 jdolecek Exp $ */ | | 1 | /* $NetBSD: rf_diskqueue.c,v 1.59 2021/07/23 00:26:19 oster Exp $ */ |
2 | /* | | 2 | /* |
3 | * Copyright (c) 1995 Carnegie-Mellon University. | | 3 | * Copyright (c) 1995 Carnegie-Mellon University. |
4 | * All rights reserved. | | 4 | * All rights reserved. |
5 | * | | 5 | * |
6 | * Author: Mark Holland | | 6 | * Author: Mark Holland |
7 | * | | 7 | * |
8 | * Permission to use, copy, modify and distribute this software and | | 8 | * Permission to use, copy, modify and distribute this software and |
9 | * its documentation is hereby granted, provided that both the copyright | | 9 | * its documentation is hereby granted, provided that both the copyright |
10 | * notice and this permission notice appear in all copies of the | | 10 | * notice and this permission notice appear in all copies of the |
11 | * software, derivative works or modified versions, and any portions | | 11 | * software, derivative works or modified versions, and any portions |
12 | * thereof, and that both notices appear in supporting documentation. | | 12 | * thereof, and that both notices appear in supporting documentation. |
13 | * | | 13 | * |
14 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" | | 14 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" |
15 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND | | 15 | * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND |
16 | * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. | | 16 | * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. |
17 | * | | 17 | * |
18 | * Carnegie Mellon requests users of this software to return to | | 18 | * Carnegie Mellon requests users of this software to return to |
19 | * | | 19 | * |
20 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU | | 20 | * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU |
21 | * School of Computer Science | | 21 | * School of Computer Science |
22 | * Carnegie Mellon University | | 22 | * Carnegie Mellon University |
23 | * Pittsburgh PA 15213-3890 | | 23 | * Pittsburgh PA 15213-3890 |
24 | * | | 24 | * |
25 | * any improvements or extensions that they make and grant Carnegie the | | 25 | * any improvements or extensions that they make and grant Carnegie the |
26 | * rights to redistribute these changes. | | 26 | * rights to redistribute these changes. |
27 | */ | | 27 | */ |
28 | | | 28 | |
29 | /**************************************************************************** | | 29 | /**************************************************************************** |
30 | * | | 30 | * |
31 | * rf_diskqueue.c -- higher-level disk queue code | | 31 | * rf_diskqueue.c -- higher-level disk queue code |
32 | * | | 32 | * |
33 | * the routines here are a generic wrapper around the actual queueing | | 33 | * the routines here are a generic wrapper around the actual queueing |
34 | * routines. The code here implements thread scheduling, synchronization, | | 34 | * routines. The code here implements thread scheduling, synchronization, |
35 | * and locking ops (see below) on top of the lower-level queueing code. | | 35 | * and locking ops (see below) on top of the lower-level queueing code. |
36 | * | | 36 | * |
37 | * to support atomic RMW, we implement "locking operations". When a | | 37 | * to support atomic RMW, we implement "locking operations". When a |
38 | * locking op is dispatched to the lower levels of the driver, the | | 38 | * locking op is dispatched to the lower levels of the driver, the |
39 | * queue is locked, and no further I/Os are dispatched until the queue | | 39 | * queue is locked, and no further I/Os are dispatched until the queue |
40 | * receives & completes a corresponding "unlocking operation". This | | 40 | * receives & completes a corresponding "unlocking operation". This |
41 | * code relies on the higher layers to guarantee that a locking op | | 41 | * code relies on the higher layers to guarantee that a locking op |
42 | * will always be eventually followed by an unlocking op. The model | | 42 | * will always be eventually followed by an unlocking op. The model |
43 | * is that the higher layers are structured so locking and unlocking | | 43 | * is that the higher layers are structured so locking and unlocking |
44 | * ops occur in pairs, i.e. an unlocking op cannot be generated until | | 44 | * ops occur in pairs, i.e. an unlocking op cannot be generated until |
45 | * after a locking op reports completion. There is no good way to | | 45 | * after a locking op reports completion. There is no good way to |
46 | * check to see that an unlocking op "corresponds" to the op that | | 46 | * check to see that an unlocking op "corresponds" to the op that |
47 | * currently has the queue locked, so we make no such attempt. Since | | 47 | * currently has the queue locked, so we make no such attempt. Since |
48 | * by definition there can be only one locking op outstanding on a | | 48 | * by definition there can be only one locking op outstanding on a |
49 | * disk, this should not be a problem. | | 49 | * disk, this should not be a problem. |
50 | * | | 50 | * |
51 | * In the kernel, we allow multiple I/Os to be concurrently dispatched | | 51 | * In the kernel, we allow multiple I/Os to be concurrently dispatched |
52 | * to the disk driver. In order to support locking ops in this | | 52 | * to the disk driver. In order to support locking ops in this |
53 | * environment, when we decide to do a locking op, we stop dispatching | | 53 | * environment, when we decide to do a locking op, we stop dispatching |
54 | * new I/Os and wait until all dispatched I/Os have completed before | | 54 | * new I/Os and wait until all dispatched I/Os have completed before |
55 | * dispatching the locking op. | | 55 | * dispatching the locking op. |
56 | * | | 56 | * |
57 | * Unfortunately, the code is different in the 3 different operating | | 57 | * Unfortunately, the code is different in the 3 different operating |
58 | * states (user level, kernel, simulator). In the kernel, I/O is | | 58 | * states (user level, kernel, simulator). In the kernel, I/O is |
59 | * non-blocking, and we have no disk threads to dispatch for us. | | 59 | * non-blocking, and we have no disk threads to dispatch for us. |
60 | * Therefore, we have to dispatch new I/Os to the scsi driver at the | | 60 | * Therefore, we have to dispatch new I/Os to the scsi driver at the |
61 | * time of enqueue, and also at the time of completion. At user | | 61 | * time of enqueue, and also at the time of completion. At user |
62 | * level, I/O is blocking, and so only the disk threads may dispatch | | 62 | * level, I/O is blocking, and so only the disk threads may dispatch |
63 | * I/Os. Thus at user level, all we can do at enqueue time is enqueue | | 63 | * I/Os. Thus at user level, all we can do at enqueue time is enqueue |
64 | * and wake up the disk thread to do the dispatch. | | 64 | * and wake up the disk thread to do the dispatch. |
65 | * | | 65 | * |
66 | ****************************************************************************/ | | 66 | ****************************************************************************/ |
67 | | | 67 | |
68 | #include <sys/cdefs.h> | | 68 | #include <sys/cdefs.h> |
69 | __KERNEL_RCSID(0, "$NetBSD: rf_diskqueue.c,v 1.58 2020/06/19 19:32:03 jdolecek Exp $"); | | 69 | __KERNEL_RCSID(0, "$NetBSD: rf_diskqueue.c,v 1.59 2021/07/23 00:26:19 oster Exp $"); |
70 | | | 70 | |
71 | #include <dev/raidframe/raidframevar.h> | | 71 | #include <dev/raidframe/raidframevar.h> |
72 | | | 72 | |
73 | #include "rf_threadstuff.h" | | 73 | #include "rf_threadstuff.h" |
74 | #include "rf_raid.h" | | 74 | #include "rf_raid.h" |
75 | #include "rf_diskqueue.h" | | 75 | #include "rf_diskqueue.h" |
76 | #include "rf_alloclist.h" | | 76 | #include "rf_alloclist.h" |
77 | #include "rf_acctrace.h" | | 77 | #include "rf_acctrace.h" |
78 | #include "rf_etimer.h" | | 78 | #include "rf_etimer.h" |
79 | #include "rf_general.h" | | 79 | #include "rf_general.h" |
80 | #include "rf_debugprint.h" | | 80 | #include "rf_debugprint.h" |
81 | #include "rf_shutdown.h" | | 81 | #include "rf_shutdown.h" |
82 | #include "rf_cvscan.h" | | 82 | #include "rf_cvscan.h" |
83 | #include "rf_sstf.h" | | 83 | #include "rf_sstf.h" |
84 | #include "rf_fifo.h" | | 84 | #include "rf_fifo.h" |
85 | #include "rf_kintf.h" | | 85 | #include "rf_kintf.h" |
86 | | | 86 | |
| | | 87 | #include <sys/buf.h> |
| | | 88 | |
87 | static void rf_ShutdownDiskQueueSystem(void *); | | 89 | static void rf_ShutdownDiskQueueSystem(void *); |
88 | | | 90 | |
89 | #ifndef RF_DEBUG_DISKQUEUE | | 91 | #ifndef RF_DEBUG_DISKQUEUE |
90 | #define RF_DEBUG_DISKQUEUE 0 | | 92 | #define RF_DEBUG_DISKQUEUE 0 |
91 | #endif | | 93 | #endif |
92 | | | 94 | |
93 | #if RF_DEBUG_DISKQUEUE | | 95 | #if RF_DEBUG_DISKQUEUE |
94 | #define Dprintf1(s,a) if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),NULL,NULL,NULL,NULL,NULL,NULL,NULL) | | 96 | #define Dprintf1(s,a) if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),NULL,NULL,NULL,NULL,NULL,NULL,NULL) |
95 | #define Dprintf2(s,a,b) if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),NULL,NULL,NULL,NULL,NULL,NULL) | | 97 | #define Dprintf2(s,a,b) if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),NULL,NULL,NULL,NULL,NULL,NULL) |
96 | #define Dprintf3(s,a,b,c) if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),NULL,NULL,NULL,NULL,NULL) | | 98 | #define Dprintf3(s,a,b,c) if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),NULL,NULL,NULL,NULL,NULL) |
97 | #else | | 99 | #else |
98 | #define Dprintf1(s,a) | | 100 | #define Dprintf1(s,a) |
99 | #define Dprintf2(s,a,b) | | 101 | #define Dprintf2(s,a,b) |
100 | #define Dprintf3(s,a,b,c) | | 102 | #define Dprintf3(s,a,b,c) |
101 | #endif | | 103 | #endif |
102 | | | 104 | |
103 | /***************************************************************************** | | 105 | /***************************************************************************** |
104 | * | | 106 | * |
105 | * the disk queue switch defines all the functions used in the | | 107 | * the disk queue switch defines all the functions used in the |
106 | * different queueing disciplines queue ID, init routine, enqueue | | 108 | * different queueing disciplines queue ID, init routine, enqueue |
107 | * routine, dequeue routine | | 109 | * routine, dequeue routine |
108 | * | | 110 | * |
109 | ****************************************************************************/ | | 111 | ****************************************************************************/ |
110 | | | 112 | |
111 | static const RF_DiskQueueSW_t diskqueuesw[] = { | | 113 | static const RF_DiskQueueSW_t diskqueuesw[] = { |
112 | {"fifo", /* FIFO */ | | 114 | {"fifo", /* FIFO */ |
113 | rf_FifoCreate, | | 115 | rf_FifoCreate, |
114 | rf_FifoEnqueue, | | 116 | rf_FifoEnqueue, |
115 | rf_FifoDequeue, | | 117 | rf_FifoDequeue, |
116 | rf_FifoPeek, | | 118 | rf_FifoPeek, |
117 | rf_FifoPromote}, | | 119 | rf_FifoPromote}, |
118 | | | 120 | |
119 | {"cvscan", /* cvscan */ | | 121 | {"cvscan", /* cvscan */ |
120 | rf_CvscanCreate, | | 122 | rf_CvscanCreate, |
121 | rf_CvscanEnqueue, | | 123 | rf_CvscanEnqueue, |
122 | rf_CvscanDequeue, | | 124 | rf_CvscanDequeue, |
123 | rf_CvscanPeek, | | 125 | rf_CvscanPeek, |
124 | rf_CvscanPromote}, | | 126 | rf_CvscanPromote}, |
125 | | | 127 | |
126 | {"sstf", /* shortest seek time first */ | | 128 | {"sstf", /* shortest seek time first */ |
127 | rf_SstfCreate, | | 129 | rf_SstfCreate, |
128 | rf_SstfEnqueue, | | 130 | rf_SstfEnqueue, |
129 | rf_SstfDequeue, | | 131 | rf_SstfDequeue, |
130 | rf_SstfPeek, | | 132 | rf_SstfPeek, |
131 | rf_SstfPromote}, | | 133 | rf_SstfPromote}, |
132 | | | 134 | |
133 | {"scan", /* SCAN (two-way elevator) */ | | 135 | {"scan", /* SCAN (two-way elevator) */ |
134 | rf_ScanCreate, | | 136 | rf_ScanCreate, |
135 | rf_SstfEnqueue, | | 137 | rf_SstfEnqueue, |
136 | rf_ScanDequeue, | | 138 | rf_ScanDequeue, |
137 | rf_ScanPeek, | | 139 | rf_ScanPeek, |
138 | rf_SstfPromote}, | | 140 | rf_SstfPromote}, |
139 | | | 141 | |
140 | {"cscan", /* CSCAN (one-way elevator) */ | | 142 | {"cscan", /* CSCAN (one-way elevator) */ |
141 | rf_CscanCreate, | | 143 | rf_CscanCreate, |
142 | rf_SstfEnqueue, | | 144 | rf_SstfEnqueue, |
143 | rf_CscanDequeue, | | 145 | rf_CscanDequeue, |
144 | rf_CscanPeek, | | 146 | rf_CscanPeek, |
145 | rf_SstfPromote}, | | 147 | rf_SstfPromote}, |
146 | | | 148 | |
147 | }; | | 149 | }; |
148 | #define NUM_DISK_QUEUE_TYPES (sizeof(diskqueuesw)/sizeof(RF_DiskQueueSW_t)) | | 150 | #define NUM_DISK_QUEUE_TYPES (sizeof(diskqueuesw)/sizeof(RF_DiskQueueSW_t)) |
149 | | | 151 | |
| | | 152 | |
150 | #define RF_MAX_FREE_DQD 256 | | 153 | #define RF_MAX_FREE_DQD 256 |
151 | #define RF_MIN_FREE_DQD 64 | | 154 | #define RF_MIN_FREE_DQD 64 |
152 | | | 155 | |
153 | #include <sys/buf.h> | | 156 | /* XXX: scale these... */ |
| | | 157 | #define RF_MAX_FREE_BUFIO 256 |
| | | 158 | #define RF_MIN_FREE_BUFIO 64 |
| | | 159 | |
| | | 160 | |
154 | | | 161 | |
155 | /* configures a single disk queue */ | | 162 | /* configures a single disk queue */ |
156 | | | 163 | |
157 | static void | | 164 | static void |
158 | rf_ShutdownDiskQueue(void *arg) | | 165 | rf_ShutdownDiskQueue(void *arg) |
159 | { | | 166 | { |
160 | RF_DiskQueue_t *diskqueue = arg; | | 167 | RF_DiskQueue_t *diskqueue = arg; |
161 | | | 168 | |
162 | rf_destroy_mutex2(diskqueue->mutex); | | 169 | rf_destroy_mutex2(diskqueue->mutex); |
163 | } | | 170 | } |
164 | | | 171 | |
165 | int | | 172 | int |
166 | rf_ConfigureDiskQueue(RF_Raid_t *raidPtr, RF_DiskQueue_t *diskqueue, | | 173 | rf_ConfigureDiskQueue(RF_Raid_t *raidPtr, RF_DiskQueue_t *diskqueue, |
167 | RF_RowCol_t c, const RF_DiskQueueSW_t *p, | | 174 | RF_RowCol_t c, const RF_DiskQueueSW_t *p, |
168 | RF_SectorCount_t sectPerDisk, dev_t dev, | | 175 | RF_SectorCount_t sectPerDisk, dev_t dev, |
169 | int maxOutstanding, RF_ShutdownList_t **listp, | | 176 | int maxOutstanding, RF_ShutdownList_t **listp, |
170 | RF_AllocListElem_t *clList) | | 177 | RF_AllocListElem_t *clList) |
171 | { | | 178 | { |
172 | diskqueue->col = c; | | 179 | diskqueue->col = c; |
173 | diskqueue->qPtr = p; | | 180 | diskqueue->qPtr = p; |
174 | diskqueue->qHdr = (p->Create) (sectPerDisk, clList, listp); | | 181 | diskqueue->qHdr = (p->Create) (sectPerDisk, clList, listp); |
175 | diskqueue->dev = dev; | | 182 | diskqueue->dev = dev; |
176 | diskqueue->numOutstanding = 0; | | 183 | diskqueue->numOutstanding = 0; |
177 | diskqueue->queueLength = 0; | | 184 | diskqueue->queueLength = 0; |
178 | diskqueue->maxOutstanding = maxOutstanding; | | 185 | diskqueue->maxOutstanding = maxOutstanding; |
179 | diskqueue->curPriority = RF_IO_NORMAL_PRIORITY; | | 186 | diskqueue->curPriority = RF_IO_NORMAL_PRIORITY; |
180 | diskqueue->flags = 0; | | 187 | diskqueue->flags = 0; |
181 | diskqueue->raidPtr = raidPtr; | | 188 | diskqueue->raidPtr = raidPtr; |
182 | diskqueue->rf_cinfo = &raidPtr->raid_cinfo[c]; | | 189 | diskqueue->rf_cinfo = &raidPtr->raid_cinfo[c]; |
183 | rf_init_mutex2(diskqueue->mutex, IPL_VM); | | 190 | rf_init_mutex2(diskqueue->mutex, IPL_VM); |
184 | rf_ShutdownCreate(listp, rf_ShutdownDiskQueue, diskqueue); | | 191 | rf_ShutdownCreate(listp, rf_ShutdownDiskQueue, diskqueue); |
185 | return (0); | | 192 | return (0); |
186 | } | | 193 | } |
187 | | | 194 | |
188 | static void | | 195 | static void |
189 | rf_ShutdownDiskQueueSystem(void *ignored) | | 196 | rf_ShutdownDiskQueueSystem(void *ignored) |
190 | { | | 197 | { |
191 | pool_destroy(&rf_pools.dqd); | | 198 | pool_destroy(&rf_pools.dqd); |
| | | 199 | pool_destroy(&rf_pools.bufio); |
192 | } | | 200 | } |
193 | | | 201 | |
194 | int | | 202 | int |
195 | rf_ConfigureDiskQueueSystem(RF_ShutdownList_t **listp) | | 203 | rf_ConfigureDiskQueueSystem(RF_ShutdownList_t **listp) |
196 | { | | 204 | { |
197 | | | 205 | |
198 | rf_pool_init(&rf_pools.dqd, sizeof(RF_DiskQueueData_t), | | 206 | rf_pool_init(&rf_pools.dqd, sizeof(RF_DiskQueueData_t), |
199 | "rf_dqd_pl", RF_MIN_FREE_DQD, RF_MAX_FREE_DQD); | | 207 | "rf_dqd_pl", RF_MIN_FREE_DQD, RF_MAX_FREE_DQD); |
| | | 208 | rf_pool_init(&rf_pools.bufio, sizeof(buf_t), |
| | | 209 | "rf_bufio_pl", RF_MIN_FREE_BUFIO, RF_MAX_FREE_BUFIO); |
200 | rf_ShutdownCreate(listp, rf_ShutdownDiskQueueSystem, NULL); | | 210 | rf_ShutdownCreate(listp, rf_ShutdownDiskQueueSystem, NULL); |
201 | | | 211 | |
202 | return (0); | | 212 | return (0); |
203 | } | | 213 | } |
204 | | | 214 | |
205 | int | | 215 | int |
206 | rf_ConfigureDiskQueues(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, | | 216 | rf_ConfigureDiskQueues(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, |
207 | RF_Config_t *cfgPtr) | | 217 | RF_Config_t *cfgPtr) |
208 | { | | 218 | { |
209 | RF_DiskQueue_t *diskQueues, *spareQueues; | | 219 | RF_DiskQueue_t *diskQueues, *spareQueues; |
210 | const RF_DiskQueueSW_t *p; | | 220 | const RF_DiskQueueSW_t *p; |
211 | RF_RowCol_t r,c; | | 221 | RF_RowCol_t r,c; |
212 | int rc, i; | | 222 | int rc, i; |
213 | | | 223 | |
214 | raidPtr->maxQueueDepth = cfgPtr->maxOutstandingDiskReqs; | | 224 | raidPtr->maxQueueDepth = cfgPtr->maxOutstandingDiskReqs; |
215 | | | 225 | |
216 | for (p = NULL, i = 0; i < NUM_DISK_QUEUE_TYPES; i++) { | | 226 | for (p = NULL, i = 0; i < NUM_DISK_QUEUE_TYPES; i++) { |
217 | if (!strcmp(diskqueuesw[i].queueType, cfgPtr->diskQueueType)) { | | 227 | if (!strcmp(diskqueuesw[i].queueType, cfgPtr->diskQueueType)) { |
218 | p = &diskqueuesw[i]; | | 228 | p = &diskqueuesw[i]; |
219 | break; | | 229 | break; |
220 | } | | 230 | } |
221 | } | | 231 | } |
222 | if (p == NULL) { | | 232 | if (p == NULL) { |
223 | RF_ERRORMSG2("Unknown queue type \"%s\". Using %s\n", cfgPtr->diskQueueType, diskqueuesw[0].queueType); | | 233 | RF_ERRORMSG2("Unknown queue type \"%s\". Using %s\n", cfgPtr->diskQueueType, diskqueuesw[0].queueType); |
224 | p = &diskqueuesw[0]; | | 234 | p = &diskqueuesw[0]; |
225 | } | | 235 | } |
226 | raidPtr->qType = p; | | 236 | raidPtr->qType = p; |
227 | | | 237 | |
228 | diskQueues = RF_MallocAndAdd( | | 238 | diskQueues = RF_MallocAndAdd( |
229 | (raidPtr->numCol + RF_MAXSPARE) * sizeof(*diskQueues), | | 239 | (raidPtr->numCol + RF_MAXSPARE) * sizeof(*diskQueues), |
230 | raidPtr->cleanupList); | | 240 | raidPtr->cleanupList); |
231 | if (diskQueues == NULL) | | 241 | if (diskQueues == NULL) |
232 | return (ENOMEM); | | 242 | return (ENOMEM); |
233 | raidPtr->Queues = diskQueues; | | 243 | raidPtr->Queues = diskQueues; |
234 | | | 244 | |
235 | for (c = 0; c < raidPtr->numCol; c++) { | | 245 | for (c = 0; c < raidPtr->numCol; c++) { |
236 | rc = rf_ConfigureDiskQueue(raidPtr, &diskQueues[c], | | 246 | rc = rf_ConfigureDiskQueue(raidPtr, &diskQueues[c], |
237 | c, p, | | 247 | c, p, |
238 | raidPtr->sectorsPerDisk, | | 248 | raidPtr->sectorsPerDisk, |
239 | raidPtr->Disks[c].dev, | | 249 | raidPtr->Disks[c].dev, |
240 | cfgPtr->maxOutstandingDiskReqs, | | 250 | cfgPtr->maxOutstandingDiskReqs, |
241 | listp, raidPtr->cleanupList); | | 251 | listp, raidPtr->cleanupList); |
242 | if (rc) | | 252 | if (rc) |
243 | return (rc); | | 253 | return (rc); |
244 | } | | 254 | } |
245 | | | 255 | |
246 | spareQueues = &raidPtr->Queues[raidPtr->numCol]; | | 256 | spareQueues = &raidPtr->Queues[raidPtr->numCol]; |
247 | for (r = 0; r < raidPtr->numSpare; r++) { | | 257 | for (r = 0; r < raidPtr->numSpare; r++) { |
248 | rc = rf_ConfigureDiskQueue(raidPtr, &spareQueues[r], | | 258 | rc = rf_ConfigureDiskQueue(raidPtr, &spareQueues[r], |
249 | raidPtr->numCol + r, p, | | 259 | raidPtr->numCol + r, p, |
250 | raidPtr->sectorsPerDisk, | | 260 | raidPtr->sectorsPerDisk, |
251 | raidPtr->Disks[raidPtr->numCol + r].dev, | | 261 | raidPtr->Disks[raidPtr->numCol + r].dev, |
252 | cfgPtr->maxOutstandingDiskReqs, listp, | | 262 | cfgPtr->maxOutstandingDiskReqs, listp, |
253 | raidPtr->cleanupList); | | 263 | raidPtr->cleanupList); |
254 | if (rc) | | 264 | if (rc) |
255 | return (rc); | | 265 | return (rc); |
256 | } | | 266 | } |
257 | return (0); | | 267 | return (0); |
258 | } | | 268 | } |
259 | /* Enqueue a disk I/O | | 269 | /* Enqueue a disk I/O |
260 | * | | 270 | * |
261 | * In the kernel, I/O is non-blocking and so we'd like to have multiple | | 271 | * In the kernel, I/O is non-blocking and so we'd like to have multiple |
262 | * I/Os outstanding on the physical disks when possible. | | 272 | * I/Os outstanding on the physical disks when possible. |
263 | * | | 273 | * |
264 | * when any request arrives at a queue, we have two choices: | | 274 | * when any request arrives at a queue, we have two choices: |
265 | * dispatch it to the lower levels | | 275 | * dispatch it to the lower levels |
266 | * queue it up | | 276 | * queue it up |
267 | * | | 277 | * |
268 | * kernel rules for when to do what: | | 278 | * kernel rules for when to do what: |
269 | * unlocking req : always dispatch it | | 279 | * unlocking req : always dispatch it |
270 | * normal req : queue empty => dispatch it & set priority | | 280 | * normal req : queue empty => dispatch it & set priority |
271 | * queue not full & priority is ok => dispatch it | | 281 | * queue not full & priority is ok => dispatch it |
272 | * else queue it | | 282 | * else queue it |
273 | */ | | 283 | */ |
274 | void | | 284 | void |
275 | rf_DiskIOEnqueue(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req, int pri) | | 285 | rf_DiskIOEnqueue(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req, int pri) |
276 | { | | 286 | { |
277 | RF_ETIMER_START(req->qtime); | | 287 | RF_ETIMER_START(req->qtime); |
278 | RF_ASSERT(req->type == RF_IO_TYPE_NOP || req->numSector); | | 288 | RF_ASSERT(req->type == RF_IO_TYPE_NOP || req->numSector); |
279 | req->priority = pri; | | 289 | req->priority = pri; |
280 | | | 290 | |
281 | #if RF_DEBUG_DISKQUEUE | | 291 | #if RF_DEBUG_DISKQUEUE |
282 | if (rf_queueDebug && (req->numSector == 0)) { | | 292 | if (rf_queueDebug && (req->numSector == 0)) { |
283 | printf("Warning: Enqueueing zero-sector access\n"); | | 293 | printf("Warning: Enqueueing zero-sector access\n"); |
284 | } | | 294 | } |
285 | #endif | | 295 | #endif |
286 | RF_LOCK_QUEUE_MUTEX(queue, "DiskIOEnqueue"); | | 296 | RF_LOCK_QUEUE_MUTEX(queue, "DiskIOEnqueue"); |
287 | if (RF_OK_TO_DISPATCH(queue, req)) { | | 297 | if (RF_OK_TO_DISPATCH(queue, req)) { |
288 | Dprintf2("Dispatching pri %d regular op to c %d (ok to dispatch)\n", pri, queue->col); | | 298 | Dprintf2("Dispatching pri %d regular op to c %d (ok to dispatch)\n", pri, queue->col); |
289 | rf_DispatchKernelIO(queue, req); | | 299 | rf_DispatchKernelIO(queue, req); |
290 | } else { | | 300 | } else { |
291 | queue->queueLength++; /* increment count of number of requests waiting in this queue */ | | 301 | queue->queueLength++; /* increment count of number of requests waiting in this queue */ |
292 | Dprintf2("Enqueueing pri %d regular op to c %d (not ok to dispatch)\n", pri, queue->col); | | 302 | Dprintf2("Enqueueing pri %d regular op to c %d (not ok to dispatch)\n", pri, queue->col); |
293 | req->queue = (void *) queue; | | 303 | req->queue = (void *) queue; |
294 | (queue->qPtr->Enqueue) (queue->qHdr, req, pri); | | 304 | (queue->qPtr->Enqueue) (queue->qHdr, req, pri); |
295 | } | | 305 | } |
296 | RF_UNLOCK_QUEUE_MUTEX(queue, "DiskIOEnqueue"); | | 306 | RF_UNLOCK_QUEUE_MUTEX(queue, "DiskIOEnqueue"); |
297 | } | | 307 | } |
298 | | | 308 | |
299 | | | 309 | |
300 | /* get the next set of I/Os started */ | | 310 | /* get the next set of I/Os started */ |
301 | void | | 311 | void |
302 | rf_DiskIOComplete(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req, int status) | | 312 | rf_DiskIOComplete(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req, int status) |
303 | { | | 313 | { |
304 | int done = 0; | | 314 | int done = 0; |
305 | | | 315 | |
306 | RF_LOCK_QUEUE_MUTEX(queue, "DiskIOComplete"); | | 316 | RF_LOCK_QUEUE_MUTEX(queue, "DiskIOComplete"); |
307 | queue->numOutstanding--; | | 317 | queue->numOutstanding--; |
308 | RF_ASSERT(queue->numOutstanding >= 0); | | 318 | RF_ASSERT(queue->numOutstanding >= 0); |
309 | | | 319 | |
310 | /* dispatch requests to the disk until we find one that we can't. */ | | 320 | /* dispatch requests to the disk until we find one that we can't. */ |
311 | /* no reason to continue once we've filled up the queue */ | | 321 | /* no reason to continue once we've filled up the queue */ |
312 | /* no reason to even start if the queue is locked */ | | 322 | /* no reason to even start if the queue is locked */ |
313 | | | 323 | |
314 | while (!done && !RF_QUEUE_FULL(queue)) { | | 324 | while (!done && !RF_QUEUE_FULL(queue)) { |
315 | req = (queue->qPtr->Dequeue) (queue->qHdr); | | 325 | req = (queue->qPtr->Dequeue) (queue->qHdr); |
316 | if (req) { | | 326 | if (req) { |
317 | Dprintf2("DiskIOComplete: extracting pri %d req from queue at c %d\n", req->priority, queue->col); | | 327 | Dprintf2("DiskIOComplete: extracting pri %d req from queue at c %d\n", req->priority, queue->col); |
318 | queue->queueLength--; /* decrement count of number of requests waiting in this queue */ | | 328 | queue->queueLength--; /* decrement count of number of requests waiting in this queue */ |
319 | RF_ASSERT(queue->queueLength >= 0); | | 329 | RF_ASSERT(queue->queueLength >= 0); |
320 | if (RF_OK_TO_DISPATCH(queue, req)) { | | 330 | if (RF_OK_TO_DISPATCH(queue, req)) { |
321 | Dprintf2("DiskIOComplete: dispatching pri %d regular req to c %d (ok to dispatch)\n", req->priority, queue->col); | | 331 | Dprintf2("DiskIOComplete: dispatching pri %d regular req to c %d (ok to dispatch)\n", req->priority, queue->col); |
322 | rf_DispatchKernelIO(queue, req); | | 332 | rf_DispatchKernelIO(queue, req); |
323 | } else { | | 333 | } else { |
324 | /* we can't dispatch it, so just re-enqueue it. | | 334 | /* we can't dispatch it, so just re-enqueue it. |
325 | potential trouble here if disk queues batch reqs */ | | 335 | potential trouble here if disk queues batch reqs */ |
326 | Dprintf2("DiskIOComplete: re-enqueueing pri %d regular req to c %d\n", req->priority, queue->col); | | 336 | Dprintf2("DiskIOComplete: re-enqueueing pri %d regular req to c %d\n", req->priority, queue->col); |
327 | queue->queueLength++; | | 337 | queue->queueLength++; |
328 | (queue->qPtr->Enqueue) (queue->qHdr, req, req->priority); | | 338 | (queue->qPtr->Enqueue) (queue->qHdr, req, req->priority); |
329 | done = 1; | | 339 | done = 1; |
330 | } | | 340 | } |
331 | } else { | | 341 | } else { |
332 | Dprintf1("DiskIOComplete: no more requests to extract.\n", ""); | | 342 | Dprintf1("DiskIOComplete: no more requests to extract.\n", ""); |
333 | done = 1; | | 343 | done = 1; |
334 | } | | 344 | } |
335 | } | | 345 | } |
336 | | | 346 | |
337 | RF_UNLOCK_QUEUE_MUTEX(queue, "DiskIOComplete"); | | 347 | RF_UNLOCK_QUEUE_MUTEX(queue, "DiskIOComplete"); |
338 | } | | 348 | } |
339 | /* promotes accesses tagged with the given parityStripeID from low priority | | 349 | /* promotes accesses tagged with the given parityStripeID from low priority |
340 | * to normal priority. This promotion is optional, meaning that a queue | | 350 | * to normal priority. This promotion is optional, meaning that a queue |
341 | * need not implement it. If there is no promotion routine associated with | | 351 | * need not implement it. If there is no promotion routine associated with |
342 | * a queue, this routine does nothing and returns -1. | | 352 | * a queue, this routine does nothing and returns -1. |
343 | */ | | 353 | */ |
344 | int | | 354 | int |
345 | rf_DiskIOPromote(RF_DiskQueue_t *queue, RF_StripeNum_t parityStripeID, | | 355 | rf_DiskIOPromote(RF_DiskQueue_t *queue, RF_StripeNum_t parityStripeID, |
346 | RF_ReconUnitNum_t which_ru) | | 356 | RF_ReconUnitNum_t which_ru) |
347 | { | | 357 | { |
348 | int retval; | | 358 | int retval; |
349 | | | 359 | |
350 | if (!queue->qPtr->Promote) | | 360 | if (!queue->qPtr->Promote) |
351 | return (-1); | | 361 | return (-1); |
352 | RF_LOCK_QUEUE_MUTEX(queue, "DiskIOPromote"); | | 362 | RF_LOCK_QUEUE_MUTEX(queue, "DiskIOPromote"); |
353 | retval = (queue->qPtr->Promote) (queue->qHdr, parityStripeID, which_ru); | | 363 | retval = (queue->qPtr->Promote) (queue->qHdr, parityStripeID, which_ru); |
354 | RF_UNLOCK_QUEUE_MUTEX(queue, "DiskIOPromote"); | | 364 | RF_UNLOCK_QUEUE_MUTEX(queue, "DiskIOPromote"); |
355 | return (retval); | | 365 | return (retval); |
356 | } | | 366 | } |
357 | | | 367 | |
358 | RF_DiskQueueData_t * | | 368 | RF_DiskQueueData_t * |
359 | rf_CreateDiskQueueData(RF_IoType_t typ, RF_SectorNum_t ssect, | | 369 | rf_CreateDiskQueueData(RF_IoType_t typ, RF_SectorNum_t ssect, |
360 | RF_SectorCount_t nsect, void *bf, | | 370 | RF_SectorCount_t nsect, void *bf, |
361 | RF_StripeNum_t parityStripeID, | | 371 | RF_StripeNum_t parityStripeID, |
362 | RF_ReconUnitNum_t which_ru, | | 372 | RF_ReconUnitNum_t which_ru, |
363 | void (*wakeF) (void *, int), void *arg, | | 373 | void (*wakeF) (void *, int), void *arg, |
364 | RF_AccTraceEntry_t *tracerec, RF_Raid_t *raidPtr, | | 374 | RF_AccTraceEntry_t *tracerec, RF_Raid_t *raidPtr, |
365 | RF_DiskQueueDataFlags_t flags, const struct buf *mbp, | | 375 | RF_DiskQueueDataFlags_t flags, const struct buf *mbp, |
366 | int waitflag) | | 376 | int waitflag) |
367 | { | | 377 | { |
368 | RF_DiskQueueData_t *p; | | 378 | RF_DiskQueueData_t *p; |
369 | | | 379 | |
370 | p = pool_get(&rf_pools.dqd, waitflag | PR_ZERO); | | 380 | p = pool_get(&rf_pools.dqd, PR_WAITOK | PR_ZERO); |
371 | if (p == NULL) | | 381 | KASSERT(p != NULL); |
372 | return (NULL); | | | |
373 | | | 382 | |
374 | if (waitflag == PR_WAITOK) { | | 383 | /* Obtain a buffer from our own pool. It is possible for the |
375 | p->bp = getiobuf(NULL, true); | | 384 | regular getiobuf() to run out of memory and return NULL. |
376 | } else { | | 385 | We need to guarantee that never happens, as RAIDframe |
377 | p->bp = getiobuf(NULL, false); | | 386 | doesn't have a good way to recover if memory allocation |
378 | } | | 387 | fails here. |
379 | if (p->bp == NULL) { | | 388 | */ |
380 | pool_put(&rf_pools.dqd, p); | | 389 | p->bp = pool_get(&rf_pools.bufio, PR_WAITOK | PR_ZERO); |
381 | return (NULL); | | 390 | KASSERT(p->bp != NULL); |
382 | } | | 391 | |
| | | 392 | buf_init(p->bp); |
| | | 393 | |
383 | SET(p->bp->b_cflags, BC_BUSY); /* mark buffer busy */ | | 394 | SET(p->bp->b_cflags, BC_BUSY); /* mark buffer busy */ |
384 | if (mbp) { | | 395 | if (mbp) { |
385 | SET(p->bp->b_flags, mbp->b_flags & rf_b_pass); | | 396 | SET(p->bp->b_flags, mbp->b_flags & rf_b_pass); |
386 | p->bp->b_proc = mbp->b_proc; | | 397 | p->bp->b_proc = mbp->b_proc; |
387 | } | | 398 | } |
388 | | | 399 | |
389 | p->sectorOffset = ssect + rf_protectedSectors; | | 400 | p->sectorOffset = ssect + rf_protectedSectors; |
390 | p->numSector = nsect; | | 401 | p->numSector = nsect; |
391 | p->type = typ; | | 402 | p->type = typ; |
392 | p->buf = bf; | | 403 | p->buf = bf; |
393 | p->parityStripeID = parityStripeID; | | 404 | p->parityStripeID = parityStripeID; |
394 | p->which_ru = which_ru; | | 405 | p->which_ru = which_ru; |
395 | p->CompleteFunc = wakeF; | | 406 | p->CompleteFunc = wakeF; |
396 | p->argument = arg; | | 407 | p->argument = arg; |
397 | p->next = NULL; | | 408 | p->next = NULL; |
398 | p->tracerec = tracerec; | | 409 | p->tracerec = tracerec; |
399 | p->priority = RF_IO_NORMAL_PRIORITY; | | 410 | p->priority = RF_IO_NORMAL_PRIORITY; |
400 | p->raidPtr = raidPtr; | | 411 | p->raidPtr = raidPtr; |
401 | p->flags = flags; | | 412 | p->flags = flags; |
402 | return (p); | | 413 | return (p); |
403 | } | | 414 | } |
404 | | | 415 | |
405 | void | | 416 | void |
406 | rf_FreeDiskQueueData(RF_DiskQueueData_t *p) | | 417 | rf_FreeDiskQueueData(RF_DiskQueueData_t *p) |
407 | { | | 418 | { |
408 | putiobuf(p->bp); | | 419 | pool_put(&rf_pools.bufio, p->bp); |
409 | pool_put(&rf_pools.dqd, p); | | 420 | pool_put(&rf_pools.dqd, p); |
410 | } | | 421 | } |