Fri Jul 23 00:26:20 2021 UTC ()
getiobuf() can return NULL if there are no IO buffers available.
RAIDframe can't deal with that, so create a dedicated pool of buffers
to use for IO.  PR_WAITOK is fine here, as we pre-allocate more than
we need to guarantee IO can make progress.  Tuning of pool still to
come.


(oster)
diff -r1.58 -r1.59 src/sys/dev/raidframe/rf_diskqueue.c
diff -r1.35 -r1.36 src/sys/dev/raidframe/rf_netbsd.h

cvs diff -r1.58 -r1.59 src/sys/dev/raidframe/rf_diskqueue.c (switch to unified diff)

--- src/sys/dev/raidframe/rf_diskqueue.c 2020/06/19 19:32:03 1.58
+++ src/sys/dev/raidframe/rf_diskqueue.c 2021/07/23 00:26:19 1.59
@@ -1,410 +1,421 @@ @@ -1,410 +1,421 @@
1/* $NetBSD: rf_diskqueue.c,v 1.58 2020/06/19 19:32:03 jdolecek Exp $ */ 1/* $NetBSD: rf_diskqueue.c,v 1.59 2021/07/23 00:26:19 oster Exp $ */
2/* 2/*
3 * Copyright (c) 1995 Carnegie-Mellon University. 3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved. 4 * All rights reserved.
5 * 5 *
6 * Author: Mark Holland 6 * Author: Mark Holland
7 * 7 *
8 * Permission to use, copy, modify and distribute this software and 8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright 9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the 10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions 11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation. 12 * thereof, and that both notices appear in supporting documentation.
13 * 13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 16 * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
17 * 17 *
18 * Carnegie Mellon requests users of this software to return to 18 * Carnegie Mellon requests users of this software to return to
19 * 19 *
20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
21 * School of Computer Science 21 * School of Computer Science
22 * Carnegie Mellon University 22 * Carnegie Mellon University
23 * Pittsburgh PA 15213-3890 23 * Pittsburgh PA 15213-3890
24 * 24 *
25 * any improvements or extensions that they make and grant Carnegie the 25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes. 26 * rights to redistribute these changes.
27 */ 27 */
28 28
29/**************************************************************************** 29/****************************************************************************
30 * 30 *
31 * rf_diskqueue.c -- higher-level disk queue code 31 * rf_diskqueue.c -- higher-level disk queue code
32 * 32 *
33 * the routines here are a generic wrapper around the actual queueing 33 * the routines here are a generic wrapper around the actual queueing
34 * routines. The code here implements thread scheduling, synchronization, 34 * routines. The code here implements thread scheduling, synchronization,
35 * and locking ops (see below) on top of the lower-level queueing code. 35 * and locking ops (see below) on top of the lower-level queueing code.
36 * 36 *
37 * to support atomic RMW, we implement "locking operations". When a 37 * to support atomic RMW, we implement "locking operations". When a
38 * locking op is dispatched to the lower levels of the driver, the 38 * locking op is dispatched to the lower levels of the driver, the
39 * queue is locked, and no further I/Os are dispatched until the queue 39 * queue is locked, and no further I/Os are dispatched until the queue
40 * receives & completes a corresponding "unlocking operation". This 40 * receives & completes a corresponding "unlocking operation". This
41 * code relies on the higher layers to guarantee that a locking op 41 * code relies on the higher layers to guarantee that a locking op
42 * will always be eventually followed by an unlocking op. The model 42 * will always be eventually followed by an unlocking op. The model
43 * is that the higher layers are structured so locking and unlocking 43 * is that the higher layers are structured so locking and unlocking
44 * ops occur in pairs, i.e. an unlocking op cannot be generated until 44 * ops occur in pairs, i.e. an unlocking op cannot be generated until
45 * after a locking op reports completion. There is no good way to 45 * after a locking op reports completion. There is no good way to
46 * check to see that an unlocking op "corresponds" to the op that 46 * check to see that an unlocking op "corresponds" to the op that
47 * currently has the queue locked, so we make no such attempt. Since 47 * currently has the queue locked, so we make no such attempt. Since
48 * by definition there can be only one locking op outstanding on a 48 * by definition there can be only one locking op outstanding on a
49 * disk, this should not be a problem. 49 * disk, this should not be a problem.
50 * 50 *
51 * In the kernel, we allow multiple I/Os to be concurrently dispatched 51 * In the kernel, we allow multiple I/Os to be concurrently dispatched
52 * to the disk driver. In order to support locking ops in this 52 * to the disk driver. In order to support locking ops in this
53 * environment, when we decide to do a locking op, we stop dispatching 53 * environment, when we decide to do a locking op, we stop dispatching
54 * new I/Os and wait until all dispatched I/Os have completed before 54 * new I/Os and wait until all dispatched I/Os have completed before
55 * dispatching the locking op. 55 * dispatching the locking op.
56 * 56 *
57 * Unfortunately, the code is different in the 3 different operating 57 * Unfortunately, the code is different in the 3 different operating
58 * states (user level, kernel, simulator). In the kernel, I/O is 58 * states (user level, kernel, simulator). In the kernel, I/O is
59 * non-blocking, and we have no disk threads to dispatch for us. 59 * non-blocking, and we have no disk threads to dispatch for us.
60 * Therefore, we have to dispatch new I/Os to the scsi driver at the 60 * Therefore, we have to dispatch new I/Os to the scsi driver at the
61 * time of enqueue, and also at the time of completion. At user 61 * time of enqueue, and also at the time of completion. At user
62 * level, I/O is blocking, and so only the disk threads may dispatch 62 * level, I/O is blocking, and so only the disk threads may dispatch
63 * I/Os. Thus at user level, all we can do at enqueue time is enqueue 63 * I/Os. Thus at user level, all we can do at enqueue time is enqueue
64 * and wake up the disk thread to do the dispatch. 64 * and wake up the disk thread to do the dispatch.
65 * 65 *
66 ****************************************************************************/ 66 ****************************************************************************/
67 67
68#include <sys/cdefs.h> 68#include <sys/cdefs.h>
69__KERNEL_RCSID(0, "$NetBSD: rf_diskqueue.c,v 1.58 2020/06/19 19:32:03 jdolecek Exp $"); 69__KERNEL_RCSID(0, "$NetBSD: rf_diskqueue.c,v 1.59 2021/07/23 00:26:19 oster Exp $");
70 70
71#include <dev/raidframe/raidframevar.h> 71#include <dev/raidframe/raidframevar.h>
72 72
73#include "rf_threadstuff.h" 73#include "rf_threadstuff.h"
74#include "rf_raid.h" 74#include "rf_raid.h"
75#include "rf_diskqueue.h" 75#include "rf_diskqueue.h"
76#include "rf_alloclist.h" 76#include "rf_alloclist.h"
77#include "rf_acctrace.h" 77#include "rf_acctrace.h"
78#include "rf_etimer.h" 78#include "rf_etimer.h"
79#include "rf_general.h" 79#include "rf_general.h"
80#include "rf_debugprint.h" 80#include "rf_debugprint.h"
81#include "rf_shutdown.h" 81#include "rf_shutdown.h"
82#include "rf_cvscan.h" 82#include "rf_cvscan.h"
83#include "rf_sstf.h" 83#include "rf_sstf.h"
84#include "rf_fifo.h" 84#include "rf_fifo.h"
85#include "rf_kintf.h" 85#include "rf_kintf.h"
86 86
 87#include <sys/buf.h>
 88
87static void rf_ShutdownDiskQueueSystem(void *); 89static void rf_ShutdownDiskQueueSystem(void *);
88 90
89#ifndef RF_DEBUG_DISKQUEUE 91#ifndef RF_DEBUG_DISKQUEUE
90#define RF_DEBUG_DISKQUEUE 0 92#define RF_DEBUG_DISKQUEUE 0
91#endif 93#endif
92 94
93#if RF_DEBUG_DISKQUEUE 95#if RF_DEBUG_DISKQUEUE
94#define Dprintf1(s,a) if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),NULL,NULL,NULL,NULL,NULL,NULL,NULL) 96#define Dprintf1(s,a) if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),NULL,NULL,NULL,NULL,NULL,NULL,NULL)
95#define Dprintf2(s,a,b) if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),NULL,NULL,NULL,NULL,NULL,NULL) 97#define Dprintf2(s,a,b) if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),NULL,NULL,NULL,NULL,NULL,NULL)
96#define Dprintf3(s,a,b,c) if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),NULL,NULL,NULL,NULL,NULL) 98#define Dprintf3(s,a,b,c) if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),NULL,NULL,NULL,NULL,NULL)
97#else 99#else
98#define Dprintf1(s,a) 100#define Dprintf1(s,a)
99#define Dprintf2(s,a,b) 101#define Dprintf2(s,a,b)
100#define Dprintf3(s,a,b,c) 102#define Dprintf3(s,a,b,c)
101#endif 103#endif
102 104
103/***************************************************************************** 105/*****************************************************************************
104 * 106 *
105 * the disk queue switch defines all the functions used in the 107 * the disk queue switch defines all the functions used in the
106 * different queueing disciplines queue ID, init routine, enqueue 108 * different queueing disciplines queue ID, init routine, enqueue
107 * routine, dequeue routine 109 * routine, dequeue routine
108 * 110 *
109 ****************************************************************************/ 111 ****************************************************************************/
110 112
111static const RF_DiskQueueSW_t diskqueuesw[] = { 113static const RF_DiskQueueSW_t diskqueuesw[] = {
112 {"fifo", /* FIFO */ 114 {"fifo", /* FIFO */
113 rf_FifoCreate, 115 rf_FifoCreate,
114 rf_FifoEnqueue, 116 rf_FifoEnqueue,
115 rf_FifoDequeue, 117 rf_FifoDequeue,
116 rf_FifoPeek, 118 rf_FifoPeek,
117 rf_FifoPromote}, 119 rf_FifoPromote},
118 120
119 {"cvscan", /* cvscan */ 121 {"cvscan", /* cvscan */
120 rf_CvscanCreate, 122 rf_CvscanCreate,
121 rf_CvscanEnqueue, 123 rf_CvscanEnqueue,
122 rf_CvscanDequeue, 124 rf_CvscanDequeue,
123 rf_CvscanPeek, 125 rf_CvscanPeek,
124 rf_CvscanPromote}, 126 rf_CvscanPromote},
125 127
126 {"sstf", /* shortest seek time first */ 128 {"sstf", /* shortest seek time first */
127 rf_SstfCreate, 129 rf_SstfCreate,
128 rf_SstfEnqueue, 130 rf_SstfEnqueue,
129 rf_SstfDequeue, 131 rf_SstfDequeue,
130 rf_SstfPeek, 132 rf_SstfPeek,
131 rf_SstfPromote}, 133 rf_SstfPromote},
132 134
133 {"scan", /* SCAN (two-way elevator) */ 135 {"scan", /* SCAN (two-way elevator) */
134 rf_ScanCreate, 136 rf_ScanCreate,
135 rf_SstfEnqueue, 137 rf_SstfEnqueue,
136 rf_ScanDequeue, 138 rf_ScanDequeue,
137 rf_ScanPeek, 139 rf_ScanPeek,
138 rf_SstfPromote}, 140 rf_SstfPromote},
139 141
140 {"cscan", /* CSCAN (one-way elevator) */ 142 {"cscan", /* CSCAN (one-way elevator) */
141 rf_CscanCreate, 143 rf_CscanCreate,
142 rf_SstfEnqueue, 144 rf_SstfEnqueue,
143 rf_CscanDequeue, 145 rf_CscanDequeue,
144 rf_CscanPeek, 146 rf_CscanPeek,
145 rf_SstfPromote}, 147 rf_SstfPromote},
146 148
147}; 149};
148#define NUM_DISK_QUEUE_TYPES (sizeof(diskqueuesw)/sizeof(RF_DiskQueueSW_t)) 150#define NUM_DISK_QUEUE_TYPES (sizeof(diskqueuesw)/sizeof(RF_DiskQueueSW_t))
149 151
 152
150#define RF_MAX_FREE_DQD 256 153#define RF_MAX_FREE_DQD 256
151#define RF_MIN_FREE_DQD 64 154#define RF_MIN_FREE_DQD 64
152 155
153#include <sys/buf.h> 156/* XXX: scale these... */
 157#define RF_MAX_FREE_BUFIO 256
 158#define RF_MIN_FREE_BUFIO 64
 159
 160
154 161
155/* configures a single disk queue */ 162/* configures a single disk queue */
156 163
157static void 164static void
158rf_ShutdownDiskQueue(void *arg) 165rf_ShutdownDiskQueue(void *arg)
159{ 166{
160 RF_DiskQueue_t *diskqueue = arg; 167 RF_DiskQueue_t *diskqueue = arg;
161 168
162 rf_destroy_mutex2(diskqueue->mutex); 169 rf_destroy_mutex2(diskqueue->mutex);
163} 170}
164 171
165int 172int
166rf_ConfigureDiskQueue(RF_Raid_t *raidPtr, RF_DiskQueue_t *diskqueue, 173rf_ConfigureDiskQueue(RF_Raid_t *raidPtr, RF_DiskQueue_t *diskqueue,
167 RF_RowCol_t c, const RF_DiskQueueSW_t *p, 174 RF_RowCol_t c, const RF_DiskQueueSW_t *p,
168 RF_SectorCount_t sectPerDisk, dev_t dev, 175 RF_SectorCount_t sectPerDisk, dev_t dev,
169 int maxOutstanding, RF_ShutdownList_t **listp, 176 int maxOutstanding, RF_ShutdownList_t **listp,
170 RF_AllocListElem_t *clList) 177 RF_AllocListElem_t *clList)
171{ 178{
172 diskqueue->col = c; 179 diskqueue->col = c;
173 diskqueue->qPtr = p; 180 diskqueue->qPtr = p;
174 diskqueue->qHdr = (p->Create) (sectPerDisk, clList, listp); 181 diskqueue->qHdr = (p->Create) (sectPerDisk, clList, listp);
175 diskqueue->dev = dev; 182 diskqueue->dev = dev;
176 diskqueue->numOutstanding = 0; 183 diskqueue->numOutstanding = 0;
177 diskqueue->queueLength = 0; 184 diskqueue->queueLength = 0;
178 diskqueue->maxOutstanding = maxOutstanding; 185 diskqueue->maxOutstanding = maxOutstanding;
179 diskqueue->curPriority = RF_IO_NORMAL_PRIORITY; 186 diskqueue->curPriority = RF_IO_NORMAL_PRIORITY;
180 diskqueue->flags = 0; 187 diskqueue->flags = 0;
181 diskqueue->raidPtr = raidPtr; 188 diskqueue->raidPtr = raidPtr;
182 diskqueue->rf_cinfo = &raidPtr->raid_cinfo[c]; 189 diskqueue->rf_cinfo = &raidPtr->raid_cinfo[c];
183 rf_init_mutex2(diskqueue->mutex, IPL_VM); 190 rf_init_mutex2(diskqueue->mutex, IPL_VM);
184 rf_ShutdownCreate(listp, rf_ShutdownDiskQueue, diskqueue); 191 rf_ShutdownCreate(listp, rf_ShutdownDiskQueue, diskqueue);
185 return (0); 192 return (0);
186} 193}
187 194
188static void 195static void
189rf_ShutdownDiskQueueSystem(void *ignored) 196rf_ShutdownDiskQueueSystem(void *ignored)
190{ 197{
191 pool_destroy(&rf_pools.dqd); 198 pool_destroy(&rf_pools.dqd);
 199 pool_destroy(&rf_pools.bufio);
192} 200}
193 201
194int 202int
195rf_ConfigureDiskQueueSystem(RF_ShutdownList_t **listp) 203rf_ConfigureDiskQueueSystem(RF_ShutdownList_t **listp)
196{ 204{
197 205
198 rf_pool_init(&rf_pools.dqd, sizeof(RF_DiskQueueData_t), 206 rf_pool_init(&rf_pools.dqd, sizeof(RF_DiskQueueData_t),
199 "rf_dqd_pl", RF_MIN_FREE_DQD, RF_MAX_FREE_DQD); 207 "rf_dqd_pl", RF_MIN_FREE_DQD, RF_MAX_FREE_DQD);
 208 rf_pool_init(&rf_pools.bufio, sizeof(buf_t),
 209 "rf_bufio_pl", RF_MIN_FREE_BUFIO, RF_MAX_FREE_BUFIO);
200 rf_ShutdownCreate(listp, rf_ShutdownDiskQueueSystem, NULL); 210 rf_ShutdownCreate(listp, rf_ShutdownDiskQueueSystem, NULL);
201 211
202 return (0); 212 return (0);
203} 213}
204 214
205int 215int
206rf_ConfigureDiskQueues(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, 216rf_ConfigureDiskQueues(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
207 RF_Config_t *cfgPtr) 217 RF_Config_t *cfgPtr)
208{ 218{
209 RF_DiskQueue_t *diskQueues, *spareQueues; 219 RF_DiskQueue_t *diskQueues, *spareQueues;
210 const RF_DiskQueueSW_t *p; 220 const RF_DiskQueueSW_t *p;
211 RF_RowCol_t r,c; 221 RF_RowCol_t r,c;
212 int rc, i; 222 int rc, i;
213 223
214 raidPtr->maxQueueDepth = cfgPtr->maxOutstandingDiskReqs; 224 raidPtr->maxQueueDepth = cfgPtr->maxOutstandingDiskReqs;
215 225
216 for (p = NULL, i = 0; i < NUM_DISK_QUEUE_TYPES; i++) { 226 for (p = NULL, i = 0; i < NUM_DISK_QUEUE_TYPES; i++) {
217 if (!strcmp(diskqueuesw[i].queueType, cfgPtr->diskQueueType)) { 227 if (!strcmp(diskqueuesw[i].queueType, cfgPtr->diskQueueType)) {
218 p = &diskqueuesw[i]; 228 p = &diskqueuesw[i];
219 break; 229 break;
220 } 230 }
221 } 231 }
222 if (p == NULL) { 232 if (p == NULL) {
223 RF_ERRORMSG2("Unknown queue type \"%s\". Using %s\n", cfgPtr->diskQueueType, diskqueuesw[0].queueType); 233 RF_ERRORMSG2("Unknown queue type \"%s\". Using %s\n", cfgPtr->diskQueueType, diskqueuesw[0].queueType);
224 p = &diskqueuesw[0]; 234 p = &diskqueuesw[0];
225 } 235 }
226 raidPtr->qType = p; 236 raidPtr->qType = p;
227 237
228 diskQueues = RF_MallocAndAdd( 238 diskQueues = RF_MallocAndAdd(
229 (raidPtr->numCol + RF_MAXSPARE) * sizeof(*diskQueues), 239 (raidPtr->numCol + RF_MAXSPARE) * sizeof(*diskQueues),
230 raidPtr->cleanupList); 240 raidPtr->cleanupList);
231 if (diskQueues == NULL) 241 if (diskQueues == NULL)
232 return (ENOMEM); 242 return (ENOMEM);
233 raidPtr->Queues = diskQueues; 243 raidPtr->Queues = diskQueues;
234 244
235 for (c = 0; c < raidPtr->numCol; c++) { 245 for (c = 0; c < raidPtr->numCol; c++) {
236 rc = rf_ConfigureDiskQueue(raidPtr, &diskQueues[c], 246 rc = rf_ConfigureDiskQueue(raidPtr, &diskQueues[c],
237 c, p, 247 c, p,
238 raidPtr->sectorsPerDisk, 248 raidPtr->sectorsPerDisk,
239 raidPtr->Disks[c].dev, 249 raidPtr->Disks[c].dev,
240 cfgPtr->maxOutstandingDiskReqs, 250 cfgPtr->maxOutstandingDiskReqs,
241 listp, raidPtr->cleanupList); 251 listp, raidPtr->cleanupList);
242 if (rc) 252 if (rc)
243 return (rc); 253 return (rc);
244 } 254 }
245 255
246 spareQueues = &raidPtr->Queues[raidPtr->numCol]; 256 spareQueues = &raidPtr->Queues[raidPtr->numCol];
247 for (r = 0; r < raidPtr->numSpare; r++) { 257 for (r = 0; r < raidPtr->numSpare; r++) {
248 rc = rf_ConfigureDiskQueue(raidPtr, &spareQueues[r], 258 rc = rf_ConfigureDiskQueue(raidPtr, &spareQueues[r],
249 raidPtr->numCol + r, p, 259 raidPtr->numCol + r, p,
250 raidPtr->sectorsPerDisk, 260 raidPtr->sectorsPerDisk,
251 raidPtr->Disks[raidPtr->numCol + r].dev, 261 raidPtr->Disks[raidPtr->numCol + r].dev,
252 cfgPtr->maxOutstandingDiskReqs, listp, 262 cfgPtr->maxOutstandingDiskReqs, listp,
253 raidPtr->cleanupList); 263 raidPtr->cleanupList);
254 if (rc) 264 if (rc)
255 return (rc); 265 return (rc);
256 } 266 }
257 return (0); 267 return (0);
258} 268}
259/* Enqueue a disk I/O 269/* Enqueue a disk I/O
260 * 270 *
261 * In the kernel, I/O is non-blocking and so we'd like to have multiple 271 * In the kernel, I/O is non-blocking and so we'd like to have multiple
262 * I/Os outstanding on the physical disks when possible. 272 * I/Os outstanding on the physical disks when possible.
263 * 273 *
264 * when any request arrives at a queue, we have two choices: 274 * when any request arrives at a queue, we have two choices:
265 * dispatch it to the lower levels 275 * dispatch it to the lower levels
266 * queue it up 276 * queue it up
267 * 277 *
268 * kernel rules for when to do what: 278 * kernel rules for when to do what:
269 * unlocking req : always dispatch it 279 * unlocking req : always dispatch it
270 * normal req : queue empty => dispatch it & set priority 280 * normal req : queue empty => dispatch it & set priority
271 * queue not full & priority is ok => dispatch it 281 * queue not full & priority is ok => dispatch it
272 * else queue it 282 * else queue it
273 */ 283 */
274void 284void
275rf_DiskIOEnqueue(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req, int pri) 285rf_DiskIOEnqueue(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req, int pri)
276{ 286{
277 RF_ETIMER_START(req->qtime); 287 RF_ETIMER_START(req->qtime);
278 RF_ASSERT(req->type == RF_IO_TYPE_NOP || req->numSector); 288 RF_ASSERT(req->type == RF_IO_TYPE_NOP || req->numSector);
279 req->priority = pri; 289 req->priority = pri;
280 290
281#if RF_DEBUG_DISKQUEUE 291#if RF_DEBUG_DISKQUEUE
282 if (rf_queueDebug && (req->numSector == 0)) { 292 if (rf_queueDebug && (req->numSector == 0)) {
283 printf("Warning: Enqueueing zero-sector access\n"); 293 printf("Warning: Enqueueing zero-sector access\n");
284 } 294 }
285#endif 295#endif
286 RF_LOCK_QUEUE_MUTEX(queue, "DiskIOEnqueue"); 296 RF_LOCK_QUEUE_MUTEX(queue, "DiskIOEnqueue");
287 if (RF_OK_TO_DISPATCH(queue, req)) { 297 if (RF_OK_TO_DISPATCH(queue, req)) {
288 Dprintf2("Dispatching pri %d regular op to c %d (ok to dispatch)\n", pri, queue->col); 298 Dprintf2("Dispatching pri %d regular op to c %d (ok to dispatch)\n", pri, queue->col);
289 rf_DispatchKernelIO(queue, req); 299 rf_DispatchKernelIO(queue, req);
290 } else { 300 } else {
291 queue->queueLength++; /* increment count of number of requests waiting in this queue */ 301 queue->queueLength++; /* increment count of number of requests waiting in this queue */
292 Dprintf2("Enqueueing pri %d regular op to c %d (not ok to dispatch)\n", pri, queue->col); 302 Dprintf2("Enqueueing pri %d regular op to c %d (not ok to dispatch)\n", pri, queue->col);
293 req->queue = (void *) queue; 303 req->queue = (void *) queue;
294 (queue->qPtr->Enqueue) (queue->qHdr, req, pri); 304 (queue->qPtr->Enqueue) (queue->qHdr, req, pri);
295 } 305 }
296 RF_UNLOCK_QUEUE_MUTEX(queue, "DiskIOEnqueue"); 306 RF_UNLOCK_QUEUE_MUTEX(queue, "DiskIOEnqueue");
297} 307}
298 308
299 309
300/* get the next set of I/Os started */ 310/* get the next set of I/Os started */
301void 311void
302rf_DiskIOComplete(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req, int status) 312rf_DiskIOComplete(RF_DiskQueue_t *queue, RF_DiskQueueData_t *req, int status)
303{ 313{
304 int done = 0; 314 int done = 0;
305 315
306 RF_LOCK_QUEUE_MUTEX(queue, "DiskIOComplete"); 316 RF_LOCK_QUEUE_MUTEX(queue, "DiskIOComplete");
307 queue->numOutstanding--; 317 queue->numOutstanding--;
308 RF_ASSERT(queue->numOutstanding >= 0); 318 RF_ASSERT(queue->numOutstanding >= 0);
309 319
310 /* dispatch requests to the disk until we find one that we can't. */ 320 /* dispatch requests to the disk until we find one that we can't. */
311 /* no reason to continue once we've filled up the queue */ 321 /* no reason to continue once we've filled up the queue */
312 /* no reason to even start if the queue is locked */ 322 /* no reason to even start if the queue is locked */
313 323
314 while (!done && !RF_QUEUE_FULL(queue)) { 324 while (!done && !RF_QUEUE_FULL(queue)) {
315 req = (queue->qPtr->Dequeue) (queue->qHdr); 325 req = (queue->qPtr->Dequeue) (queue->qHdr);
316 if (req) { 326 if (req) {
317 Dprintf2("DiskIOComplete: extracting pri %d req from queue at c %d\n", req->priority, queue->col); 327 Dprintf2("DiskIOComplete: extracting pri %d req from queue at c %d\n", req->priority, queue->col);
318 queue->queueLength--; /* decrement count of number of requests waiting in this queue */ 328 queue->queueLength--; /* decrement count of number of requests waiting in this queue */
319 RF_ASSERT(queue->queueLength >= 0); 329 RF_ASSERT(queue->queueLength >= 0);
320 if (RF_OK_TO_DISPATCH(queue, req)) { 330 if (RF_OK_TO_DISPATCH(queue, req)) {
321 Dprintf2("DiskIOComplete: dispatching pri %d regular req to c %d (ok to dispatch)\n", req->priority, queue->col); 331 Dprintf2("DiskIOComplete: dispatching pri %d regular req to c %d (ok to dispatch)\n", req->priority, queue->col);
322 rf_DispatchKernelIO(queue, req); 332 rf_DispatchKernelIO(queue, req);
323 } else {  333 } else {
324 /* we can't dispatch it, so just re-enqueue it.  334 /* we can't dispatch it, so just re-enqueue it.
325 potential trouble here if disk queues batch reqs */ 335 potential trouble here if disk queues batch reqs */
326 Dprintf2("DiskIOComplete: re-enqueueing pri %d regular req to c %d\n", req->priority, queue->col); 336 Dprintf2("DiskIOComplete: re-enqueueing pri %d regular req to c %d\n", req->priority, queue->col);
327 queue->queueLength++; 337 queue->queueLength++;
328 (queue->qPtr->Enqueue) (queue->qHdr, req, req->priority); 338 (queue->qPtr->Enqueue) (queue->qHdr, req, req->priority);
329 done = 1; 339 done = 1;
330 } 340 }
331 } else {  341 } else {
332 Dprintf1("DiskIOComplete: no more requests to extract.\n", ""); 342 Dprintf1("DiskIOComplete: no more requests to extract.\n", "");
333 done = 1; 343 done = 1;
334 } 344 }
335 } 345 }
336 346
337 RF_UNLOCK_QUEUE_MUTEX(queue, "DiskIOComplete"); 347 RF_UNLOCK_QUEUE_MUTEX(queue, "DiskIOComplete");
338} 348}
339/* promotes accesses tagged with the given parityStripeID from low priority 349/* promotes accesses tagged with the given parityStripeID from low priority
340 * to normal priority. This promotion is optional, meaning that a queue 350 * to normal priority. This promotion is optional, meaning that a queue
341 * need not implement it. If there is no promotion routine associated with 351 * need not implement it. If there is no promotion routine associated with
342 * a queue, this routine does nothing and returns -1. 352 * a queue, this routine does nothing and returns -1.
343 */ 353 */
344int 354int
345rf_DiskIOPromote(RF_DiskQueue_t *queue, RF_StripeNum_t parityStripeID, 355rf_DiskIOPromote(RF_DiskQueue_t *queue, RF_StripeNum_t parityStripeID,
346 RF_ReconUnitNum_t which_ru) 356 RF_ReconUnitNum_t which_ru)
347{ 357{
348 int retval; 358 int retval;
349 359
350 if (!queue->qPtr->Promote) 360 if (!queue->qPtr->Promote)
351 return (-1); 361 return (-1);
352 RF_LOCK_QUEUE_MUTEX(queue, "DiskIOPromote"); 362 RF_LOCK_QUEUE_MUTEX(queue, "DiskIOPromote");
353 retval = (queue->qPtr->Promote) (queue->qHdr, parityStripeID, which_ru); 363 retval = (queue->qPtr->Promote) (queue->qHdr, parityStripeID, which_ru);
354 RF_UNLOCK_QUEUE_MUTEX(queue, "DiskIOPromote"); 364 RF_UNLOCK_QUEUE_MUTEX(queue, "DiskIOPromote");
355 return (retval); 365 return (retval);
356} 366}
357 367
358RF_DiskQueueData_t * 368RF_DiskQueueData_t *
359rf_CreateDiskQueueData(RF_IoType_t typ, RF_SectorNum_t ssect, 369rf_CreateDiskQueueData(RF_IoType_t typ, RF_SectorNum_t ssect,
360 RF_SectorCount_t nsect, void *bf, 370 RF_SectorCount_t nsect, void *bf,
361 RF_StripeNum_t parityStripeID, 371 RF_StripeNum_t parityStripeID,
362 RF_ReconUnitNum_t which_ru, 372 RF_ReconUnitNum_t which_ru,
363 void (*wakeF) (void *, int), void *arg, 373 void (*wakeF) (void *, int), void *arg,
364 RF_AccTraceEntry_t *tracerec, RF_Raid_t *raidPtr, 374 RF_AccTraceEntry_t *tracerec, RF_Raid_t *raidPtr,
365 RF_DiskQueueDataFlags_t flags, const struct buf *mbp, 375 RF_DiskQueueDataFlags_t flags, const struct buf *mbp,
366 int waitflag) 376 int waitflag)
367{ 377{
368 RF_DiskQueueData_t *p; 378 RF_DiskQueueData_t *p;
369 379
370 p = pool_get(&rf_pools.dqd, waitflag | PR_ZERO); 380 p = pool_get(&rf_pools.dqd, PR_WAITOK | PR_ZERO);
371 if (p == NULL) 381 KASSERT(p != NULL);
372 return (NULL); 
373 382
374 if (waitflag == PR_WAITOK) { 383 /* Obtain a buffer from our own pool. It is possible for the
375 p->bp = getiobuf(NULL, true); 384 regular getiobuf() to run out of memory and return NULL.
376 } else { 385 We need to guarantee that never happens, as RAIDframe
377 p->bp = getiobuf(NULL, false); 386 doesn't have a good way to recover if memory allocation
378 } 387 fails here.
379 if (p->bp == NULL) { 388 */
380 pool_put(&rf_pools.dqd, p); 389 p->bp = pool_get(&rf_pools.bufio, PR_WAITOK | PR_ZERO);
381 return (NULL); 390 KASSERT(p->bp != NULL);
382 } 391
 392 buf_init(p->bp);
 393
383 SET(p->bp->b_cflags, BC_BUSY); /* mark buffer busy */ 394 SET(p->bp->b_cflags, BC_BUSY); /* mark buffer busy */
384 if (mbp) { 395 if (mbp) {
385 SET(p->bp->b_flags, mbp->b_flags & rf_b_pass); 396 SET(p->bp->b_flags, mbp->b_flags & rf_b_pass);
386 p->bp->b_proc = mbp->b_proc; 397 p->bp->b_proc = mbp->b_proc;
387 } 398 }
388 399
389 p->sectorOffset = ssect + rf_protectedSectors; 400 p->sectorOffset = ssect + rf_protectedSectors;
390 p->numSector = nsect; 401 p->numSector = nsect;
391 p->type = typ; 402 p->type = typ;
392 p->buf = bf; 403 p->buf = bf;
393 p->parityStripeID = parityStripeID; 404 p->parityStripeID = parityStripeID;
394 p->which_ru = which_ru; 405 p->which_ru = which_ru;
395 p->CompleteFunc = wakeF; 406 p->CompleteFunc = wakeF;
396 p->argument = arg; 407 p->argument = arg;
397 p->next = NULL; 408 p->next = NULL;
398 p->tracerec = tracerec; 409 p->tracerec = tracerec;
399 p->priority = RF_IO_NORMAL_PRIORITY; 410 p->priority = RF_IO_NORMAL_PRIORITY;
400 p->raidPtr = raidPtr; 411 p->raidPtr = raidPtr;
401 p->flags = flags; 412 p->flags = flags;
402 return (p); 413 return (p);
403} 414}
404 415
405void 416void
406rf_FreeDiskQueueData(RF_DiskQueueData_t *p) 417rf_FreeDiskQueueData(RF_DiskQueueData_t *p)
407{ 418{
408 putiobuf(p->bp); 419 pool_put(&rf_pools.bufio, p->bp);
409 pool_put(&rf_pools.dqd, p); 420 pool_put(&rf_pools.dqd, p);
410} 421}

cvs diff -r1.35 -r1.36 src/sys/dev/raidframe/rf_netbsd.h (switch to unified diff)

--- src/sys/dev/raidframe/rf_netbsd.h 2020/06/19 19:29:39 1.35
+++ src/sys/dev/raidframe/rf_netbsd.h 2021/07/23 00:26:19 1.36
@@ -1,109 +1,110 @@ @@ -1,109 +1,110 @@
1/* $NetBSD: rf_netbsd.h,v 1.35 2020/06/19 19:29:39 jdolecek Exp $ */ 1/* $NetBSD: rf_netbsd.h,v 1.36 2021/07/23 00:26:19 oster Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. 4 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to The NetBSD Foundation 7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Greg Oster; Jason R. Thorpe. 8 * by Greg Oster; Jason R. Thorpe.
9 * 9 *
10 * Redistribution and use in source and binary forms, with or without 10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions 11 * modification, are permitted provided that the following conditions
12 * are met: 12 * are met:
13 * 1. Redistributions of source code must retain the above copyright 13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer. 14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright 15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the 16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution. 17 * documentation and/or other materials provided with the distribution.
18 * 18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE. 29 * POSSIBILITY OF SUCH DAMAGE.
30 */ 30 */
31 31
32#ifndef _RF__RF_NETBSDSTUFF_H_ 32#ifndef _RF__RF_NETBSDSTUFF_H_
33#define _RF__RF_NETBSDSTUFF_H_ 33#define _RF__RF_NETBSDSTUFF_H_
34 34
35#include <sys/fcntl.h> 35#include <sys/fcntl.h>
36#include <sys/systm.h> 36#include <sys/systm.h>
37#include <sys/vnode.h> 37#include <sys/vnode.h>
38#include <sys/pool.h> 38#include <sys/pool.h>
39#include <sys/disk.h> 39#include <sys/disk.h>
40 40
41#include <dev/dkvar.h> 41#include <dev/dkvar.h>
42#include <dev/raidframe/raidframevar.h> 42#include <dev/raidframe/raidframevar.h>
43 43
44struct raidcinfo { 44struct raidcinfo {
45 struct vnode *ci_vp; /* component device's vnode */ 45 struct vnode *ci_vp; /* component device's vnode */
46 dev_t ci_dev; /* component device's dev_t */ 46 dev_t ci_dev; /* component device's dev_t */
47 RF_ComponentLabel_t ci_label; /* components RAIDframe label */ 47 RF_ComponentLabel_t ci_label; /* components RAIDframe label */
48#if 0 48#if 0
49 size_t ci_size; /* size */ 49 size_t ci_size; /* size */
50 char *ci_path; /* path to component */ 50 char *ci_path; /* path to component */
51 size_t ci_pathlen; /* length of component path */ 51 size_t ci_pathlen; /* length of component path */
52#endif 52#endif
53}; 53};
54 54
55 55
56/* a little structure to serve as a container for all the various 56/* a little structure to serve as a container for all the various
57 global pools used in RAIDframe */ 57 global pools used in RAIDframe */
58 58
59struct RF_Pools_s { 59struct RF_Pools_s {
60 struct pool alloclist; /* AllocList */ 60 struct pool alloclist; /* AllocList */
61 struct pool asm_hdr; /* Access Stripe Map Header */ 61 struct pool asm_hdr; /* Access Stripe Map Header */
62 struct pool asmap; /* Access Stripe Map */ 62 struct pool asmap; /* Access Stripe Map */
63 struct pool asmhle; /* Access Stripe Map Header List Elements */ 63 struct pool asmhle; /* Access Stripe Map Header List Elements */
 64 struct pool bufio; /* Buffer IO Pool */
64 struct pool callbackf; /* Callback function descriptors */ 65 struct pool callbackf; /* Callback function descriptors */
65 struct pool callbackv; /* Callback value descriptors */ 66 struct pool callbackv; /* Callback value descriptors */
66 struct pool dagh; /* DAG headers */ 67 struct pool dagh; /* DAG headers */
67 struct pool dagnode; /* DAG nodes */ 68 struct pool dagnode; /* DAG nodes */
68 struct pool daglist; /* DAG lists */ 69 struct pool daglist; /* DAG lists */
69 struct pool dagpcache; /* DAG pointer/param cache */ 70 struct pool dagpcache; /* DAG pointer/param cache */
70 struct pool dqd; /* Disk Queue Data */ 71 struct pool dqd; /* Disk Queue Data */
71 struct pool fss; /* Failed Stripe Structures */ 72 struct pool fss; /* Failed Stripe Structures */
72 struct pool funclist; /* Function Lists */ 73 struct pool funclist; /* Function Lists */
73 struct pool mcpair; /* Mutex/Cond Pairs */ 74 struct pool mcpair; /* Mutex/Cond Pairs */
74 struct pool pda; /* Physical Disk Access structures */ 75 struct pool pda; /* Physical Disk Access structures */
75 struct pool pss; /* Parity Stripe Status */ 76 struct pool pss; /* Parity Stripe Status */
76 struct pool pss_issued; /* Parity Stripe Status Issued */ 77 struct pool pss_issued; /* Parity Stripe Status Issued */
77 struct pool rad; /* Raid Access Descriptors */ 78 struct pool rad; /* Raid Access Descriptors */
78 struct pool reconbuffer; /* reconstruction buffer (header) pool */ 79 struct pool reconbuffer; /* reconstruction buffer (header) pool */
79 struct pool revent; /* reconstruct events */ 80 struct pool revent; /* reconstruct events */
80 struct pool stripelock; /* StripeLock */ 81 struct pool stripelock; /* StripeLock */
81 struct pool vfple; /* VoidFunctionPtr List Elements */ 82 struct pool vfple; /* VoidFunctionPtr List Elements */
82 struct pool vple; /* VoidPointer List Elements */ 83 struct pool vple; /* VoidPointer List Elements */
83}; 84};
84 85
85extern struct RF_Pools_s rf_pools; 86extern struct RF_Pools_s rf_pools;
86void rf_pool_init(struct pool *, size_t, const char *, size_t, size_t); 87void rf_pool_init(struct pool *, size_t, const char *, size_t, size_t);
87int rf_buf_queue_check(RF_Raid_t *); 88int rf_buf_queue_check(RF_Raid_t *);
88 89
89/* XXX probably belongs in a different .h file. */ 90/* XXX probably belongs in a different .h file. */
90typedef struct RF_AutoConfig_s { 91typedef struct RF_AutoConfig_s {
91 char devname[56]; /* the name of this component */ 92 char devname[56]; /* the name of this component */
92 int flag; /* a general-purpose flag */ 93 int flag; /* a general-purpose flag */
93 dev_t dev; /* the device for this component */ 94 dev_t dev; /* the device for this component */
94 struct vnode *vp; /* Mr. Vnode Pointer */ 95 struct vnode *vp; /* Mr. Vnode Pointer */
95 RF_ComponentLabel_t *clabel; /* the label */ 96 RF_ComponentLabel_t *clabel; /* the label */
96 struct RF_AutoConfig_s *next; /* the next autoconfig structure 97 struct RF_AutoConfig_s *next; /* the next autoconfig structure
97 in this set. */ 98 in this set. */
98} RF_AutoConfig_t; 99} RF_AutoConfig_t;
99 100
100typedef struct RF_ConfigSet_s { 101typedef struct RF_ConfigSet_s {
101 struct RF_AutoConfig_s *ac; /* all of the autoconfig structures for 102 struct RF_AutoConfig_s *ac; /* all of the autoconfig structures for
102 this config set. */ 103 this config set. */
103 int rootable; /* Set to 1 if this set can be root */ 104 int rootable; /* Set to 1 if this set can be root */
104 struct RF_ConfigSet_s *next; 105 struct RF_ConfigSet_s *next;
105} RF_ConfigSet_t; 106} RF_ConfigSet_t;
106 107
107extern const int rf_b_pass; 108extern const int rf_b_pass;
108 109
109#endif /* _RF__RF_NETBSDSTUFF_H_ */ 110#endif /* _RF__RF_NETBSDSTUFF_H_ */