Fri Jul 23 00:26:20 2021 UTC ()
getiobuf() can return NULL if there are no IO buffers available.
RAIDframe can't deal with that, so create a dedicated pool of buffers
to use for IO.  PR_WAITOK is fine here, as we pre-allocate more than
we need to guarantee IO can make progress.  Tuning of pool still to
come.


(oster)
diff -r1.58 -r1.59 src/sys/dev/raidframe/rf_diskqueue.c
diff -r1.35 -r1.36 src/sys/dev/raidframe/rf_netbsd.h

cvs diff -r1.58 -r1.59 src/sys/dev/raidframe/rf_diskqueue.c (expand / switch to unified diff)

--- src/sys/dev/raidframe/rf_diskqueue.c 2020/06/19 19:32:03 1.58
+++ src/sys/dev/raidframe/rf_diskqueue.c 2021/07/23 00:26:19 1.59
@@ -1,14 +1,14 @@ @@ -1,14 +1,14 @@
1/* $NetBSD: rf_diskqueue.c,v 1.58 2020/06/19 19:32:03 jdolecek Exp $ */ 1/* $NetBSD: rf_diskqueue.c,v 1.59 2021/07/23 00:26:19 oster Exp $ */
2/* 2/*
3 * Copyright (c) 1995 Carnegie-Mellon University. 3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved. 4 * All rights reserved.
5 * 5 *
6 * Author: Mark Holland 6 * Author: Mark Holland
7 * 7 *
8 * Permission to use, copy, modify and distribute this software and 8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright 9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the 10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions 11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation. 12 * thereof, and that both notices appear in supporting documentation.
13 * 13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
@@ -56,44 +56,46 @@ @@ -56,44 +56,46 @@
56 * 56 *
57 * Unfortunately, the code is different in the 3 different operating 57 * Unfortunately, the code is different in the 3 different operating
58 * states (user level, kernel, simulator). In the kernel, I/O is 58 * states (user level, kernel, simulator). In the kernel, I/O is
59 * non-blocking, and we have no disk threads to dispatch for us. 59 * non-blocking, and we have no disk threads to dispatch for us.
60 * Therefore, we have to dispatch new I/Os to the scsi driver at the 60 * Therefore, we have to dispatch new I/Os to the scsi driver at the
61 * time of enqueue, and also at the time of completion. At user 61 * time of enqueue, and also at the time of completion. At user
62 * level, I/O is blocking, and so only the disk threads may dispatch 62 * level, I/O is blocking, and so only the disk threads may dispatch
63 * I/Os. Thus at user level, all we can do at enqueue time is enqueue 63 * I/Os. Thus at user level, all we can do at enqueue time is enqueue
64 * and wake up the disk thread to do the dispatch. 64 * and wake up the disk thread to do the dispatch.
65 * 65 *
66 ****************************************************************************/ 66 ****************************************************************************/
67 67
68#include <sys/cdefs.h> 68#include <sys/cdefs.h>
69__KERNEL_RCSID(0, "$NetBSD: rf_diskqueue.c,v 1.58 2020/06/19 19:32:03 jdolecek Exp $"); 69__KERNEL_RCSID(0, "$NetBSD: rf_diskqueue.c,v 1.59 2021/07/23 00:26:19 oster Exp $");
70 70
71#include <dev/raidframe/raidframevar.h> 71#include <dev/raidframe/raidframevar.h>
72 72
73#include "rf_threadstuff.h" 73#include "rf_threadstuff.h"
74#include "rf_raid.h" 74#include "rf_raid.h"
75#include "rf_diskqueue.h" 75#include "rf_diskqueue.h"
76#include "rf_alloclist.h" 76#include "rf_alloclist.h"
77#include "rf_acctrace.h" 77#include "rf_acctrace.h"
78#include "rf_etimer.h" 78#include "rf_etimer.h"
79#include "rf_general.h" 79#include "rf_general.h"
80#include "rf_debugprint.h" 80#include "rf_debugprint.h"
81#include "rf_shutdown.h" 81#include "rf_shutdown.h"
82#include "rf_cvscan.h" 82#include "rf_cvscan.h"
83#include "rf_sstf.h" 83#include "rf_sstf.h"
84#include "rf_fifo.h" 84#include "rf_fifo.h"
85#include "rf_kintf.h" 85#include "rf_kintf.h"
86 86
 87#include <sys/buf.h>
 88
87static void rf_ShutdownDiskQueueSystem(void *); 89static void rf_ShutdownDiskQueueSystem(void *);
88 90
89#ifndef RF_DEBUG_DISKQUEUE 91#ifndef RF_DEBUG_DISKQUEUE
90#define RF_DEBUG_DISKQUEUE 0 92#define RF_DEBUG_DISKQUEUE 0
91#endif 93#endif
92 94
93#if RF_DEBUG_DISKQUEUE 95#if RF_DEBUG_DISKQUEUE
94#define Dprintf1(s,a) if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),NULL,NULL,NULL,NULL,NULL,NULL,NULL) 96#define Dprintf1(s,a) if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),NULL,NULL,NULL,NULL,NULL,NULL,NULL)
95#define Dprintf2(s,a,b) if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),NULL,NULL,NULL,NULL,NULL,NULL) 97#define Dprintf2(s,a,b) if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),NULL,NULL,NULL,NULL,NULL,NULL)
96#define Dprintf3(s,a,b,c) if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),NULL,NULL,NULL,NULL,NULL) 98#define Dprintf3(s,a,b,c) if (rf_queueDebug) rf_debug_printf(s,(void *)((unsigned long)a),(void *)((unsigned long)b),(void *)((unsigned long)c),NULL,NULL,NULL,NULL,NULL)
97#else 99#else
98#define Dprintf1(s,a) 100#define Dprintf1(s,a)
99#define Dprintf2(s,a,b) 101#define Dprintf2(s,a,b)
@@ -137,30 +139,35 @@ static const RF_DiskQueueSW_t diskqueues @@ -137,30 +139,35 @@ static const RF_DiskQueueSW_t diskqueues
137 rf_ScanPeek, 139 rf_ScanPeek,
138 rf_SstfPromote}, 140 rf_SstfPromote},
139 141
140 {"cscan", /* CSCAN (one-way elevator) */ 142 {"cscan", /* CSCAN (one-way elevator) */
141 rf_CscanCreate, 143 rf_CscanCreate,
142 rf_SstfEnqueue, 144 rf_SstfEnqueue,
143 rf_CscanDequeue, 145 rf_CscanDequeue,
144 rf_CscanPeek, 146 rf_CscanPeek,
145 rf_SstfPromote}, 147 rf_SstfPromote},
146 148
147}; 149};
148#define NUM_DISK_QUEUE_TYPES (sizeof(diskqueuesw)/sizeof(RF_DiskQueueSW_t)) 150#define NUM_DISK_QUEUE_TYPES (sizeof(diskqueuesw)/sizeof(RF_DiskQueueSW_t))
149 151
 152
150#define RF_MAX_FREE_DQD 256 153#define RF_MAX_FREE_DQD 256
151#define RF_MIN_FREE_DQD 64 154#define RF_MIN_FREE_DQD 64
152 155
153#include <sys/buf.h> 156/* XXX: scale these... */
 157#define RF_MAX_FREE_BUFIO 256
 158#define RF_MIN_FREE_BUFIO 64
 159
 160
154 161
155/* configures a single disk queue */ 162/* configures a single disk queue */
156 163
157static void 164static void
158rf_ShutdownDiskQueue(void *arg) 165rf_ShutdownDiskQueue(void *arg)
159{ 166{
160 RF_DiskQueue_t *diskqueue = arg; 167 RF_DiskQueue_t *diskqueue = arg;
161 168
162 rf_destroy_mutex2(diskqueue->mutex); 169 rf_destroy_mutex2(diskqueue->mutex);
163} 170}
164 171
165int 172int
166rf_ConfigureDiskQueue(RF_Raid_t *raidPtr, RF_DiskQueue_t *diskqueue, 173rf_ConfigureDiskQueue(RF_Raid_t *raidPtr, RF_DiskQueue_t *diskqueue,
@@ -179,34 +186,37 @@ rf_ConfigureDiskQueue(RF_Raid_t *raidPtr @@ -179,34 +186,37 @@ rf_ConfigureDiskQueue(RF_Raid_t *raidPtr
179 diskqueue->curPriority = RF_IO_NORMAL_PRIORITY; 186 diskqueue->curPriority = RF_IO_NORMAL_PRIORITY;
180 diskqueue->flags = 0; 187 diskqueue->flags = 0;
181 diskqueue->raidPtr = raidPtr; 188 diskqueue->raidPtr = raidPtr;
182 diskqueue->rf_cinfo = &raidPtr->raid_cinfo[c]; 189 diskqueue->rf_cinfo = &raidPtr->raid_cinfo[c];
183 rf_init_mutex2(diskqueue->mutex, IPL_VM); 190 rf_init_mutex2(diskqueue->mutex, IPL_VM);
184 rf_ShutdownCreate(listp, rf_ShutdownDiskQueue, diskqueue); 191 rf_ShutdownCreate(listp, rf_ShutdownDiskQueue, diskqueue);
185 return (0); 192 return (0);
186} 193}
187 194
188static void 195static void
189rf_ShutdownDiskQueueSystem(void *ignored) 196rf_ShutdownDiskQueueSystem(void *ignored)
190{ 197{
191 pool_destroy(&rf_pools.dqd); 198 pool_destroy(&rf_pools.dqd);
 199 pool_destroy(&rf_pools.bufio);
192} 200}
193 201
194int 202int
195rf_ConfigureDiskQueueSystem(RF_ShutdownList_t **listp) 203rf_ConfigureDiskQueueSystem(RF_ShutdownList_t **listp)
196{ 204{
197 205
198 rf_pool_init(&rf_pools.dqd, sizeof(RF_DiskQueueData_t), 206 rf_pool_init(&rf_pools.dqd, sizeof(RF_DiskQueueData_t),
199 "rf_dqd_pl", RF_MIN_FREE_DQD, RF_MAX_FREE_DQD); 207 "rf_dqd_pl", RF_MIN_FREE_DQD, RF_MAX_FREE_DQD);
 208 rf_pool_init(&rf_pools.bufio, sizeof(buf_t),
 209 "rf_bufio_pl", RF_MIN_FREE_BUFIO, RF_MAX_FREE_BUFIO);
200 rf_ShutdownCreate(listp, rf_ShutdownDiskQueueSystem, NULL); 210 rf_ShutdownCreate(listp, rf_ShutdownDiskQueueSystem, NULL);
201 211
202 return (0); 212 return (0);
203} 213}
204 214
205int 215int
206rf_ConfigureDiskQueues(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr, 216rf_ConfigureDiskQueues(RF_ShutdownList_t **listp, RF_Raid_t *raidPtr,
207 RF_Config_t *cfgPtr) 217 RF_Config_t *cfgPtr)
208{ 218{
209 RF_DiskQueue_t *diskQueues, *spareQueues; 219 RF_DiskQueue_t *diskQueues, *spareQueues;
210 const RF_DiskQueueSW_t *p; 220 const RF_DiskQueueSW_t *p;
211 RF_RowCol_t r,c; 221 RF_RowCol_t r,c;
212 int rc, i; 222 int rc, i;
@@ -357,54 +367,55 @@ rf_DiskIOPromote(RF_DiskQueue_t *queue,  @@ -357,54 +367,55 @@ rf_DiskIOPromote(RF_DiskQueue_t *queue,
357 367
358RF_DiskQueueData_t * 368RF_DiskQueueData_t *
359rf_CreateDiskQueueData(RF_IoType_t typ, RF_SectorNum_t ssect, 369rf_CreateDiskQueueData(RF_IoType_t typ, RF_SectorNum_t ssect,
360 RF_SectorCount_t nsect, void *bf, 370 RF_SectorCount_t nsect, void *bf,
361 RF_StripeNum_t parityStripeID, 371 RF_StripeNum_t parityStripeID,
362 RF_ReconUnitNum_t which_ru, 372 RF_ReconUnitNum_t which_ru,
363 void (*wakeF) (void *, int), void *arg, 373 void (*wakeF) (void *, int), void *arg,
364 RF_AccTraceEntry_t *tracerec, RF_Raid_t *raidPtr, 374 RF_AccTraceEntry_t *tracerec, RF_Raid_t *raidPtr,
365 RF_DiskQueueDataFlags_t flags, const struct buf *mbp, 375 RF_DiskQueueDataFlags_t flags, const struct buf *mbp,
366 int waitflag) 376 int waitflag)
367{ 377{
368 RF_DiskQueueData_t *p; 378 RF_DiskQueueData_t *p;
369 379
370 p = pool_get(&rf_pools.dqd, waitflag | PR_ZERO); 380 p = pool_get(&rf_pools.dqd, PR_WAITOK | PR_ZERO);
371 if (p == NULL) 381 KASSERT(p != NULL);
372 return (NULL); 
373 382
374 if (waitflag == PR_WAITOK) { 383 /* Obtain a buffer from our own pool. It is possible for the
375 p->bp = getiobuf(NULL, true); 384 regular getiobuf() to run out of memory and return NULL.
376 } else { 385 We need to guarantee that never happens, as RAIDframe
377 p->bp = getiobuf(NULL, false); 386 doesn't have a good way to recover if memory allocation
378 } 387 fails here.
379 if (p->bp == NULL) { 388 */
380 pool_put(&rf_pools.dqd, p); 389 p->bp = pool_get(&rf_pools.bufio, PR_WAITOK | PR_ZERO);
381 return (NULL); 390 KASSERT(p->bp != NULL);
382 } 391
 392 buf_init(p->bp);
 393
383 SET(p->bp->b_cflags, BC_BUSY); /* mark buffer busy */ 394 SET(p->bp->b_cflags, BC_BUSY); /* mark buffer busy */
384 if (mbp) { 395 if (mbp) {
385 SET(p->bp->b_flags, mbp->b_flags & rf_b_pass); 396 SET(p->bp->b_flags, mbp->b_flags & rf_b_pass);
386 p->bp->b_proc = mbp->b_proc; 397 p->bp->b_proc = mbp->b_proc;
387 } 398 }
388 399
389 p->sectorOffset = ssect + rf_protectedSectors; 400 p->sectorOffset = ssect + rf_protectedSectors;
390 p->numSector = nsect; 401 p->numSector = nsect;
391 p->type = typ; 402 p->type = typ;
392 p->buf = bf; 403 p->buf = bf;
393 p->parityStripeID = parityStripeID; 404 p->parityStripeID = parityStripeID;
394 p->which_ru = which_ru; 405 p->which_ru = which_ru;
395 p->CompleteFunc = wakeF; 406 p->CompleteFunc = wakeF;
396 p->argument = arg; 407 p->argument = arg;
397 p->next = NULL; 408 p->next = NULL;
398 p->tracerec = tracerec; 409 p->tracerec = tracerec;
399 p->priority = RF_IO_NORMAL_PRIORITY; 410 p->priority = RF_IO_NORMAL_PRIORITY;
400 p->raidPtr = raidPtr; 411 p->raidPtr = raidPtr;
401 p->flags = flags; 412 p->flags = flags;
402 return (p); 413 return (p);
403} 414}
404 415
405void 416void
406rf_FreeDiskQueueData(RF_DiskQueueData_t *p) 417rf_FreeDiskQueueData(RF_DiskQueueData_t *p)
407{ 418{
408 putiobuf(p->bp); 419 pool_put(&rf_pools.bufio, p->bp);
409 pool_put(&rf_pools.dqd, p); 420 pool_put(&rf_pools.dqd, p);
410} 421}

cvs diff -r1.35 -r1.36 src/sys/dev/raidframe/rf_netbsd.h (expand / switch to unified diff)

--- src/sys/dev/raidframe/rf_netbsd.h 2020/06/19 19:29:39 1.35
+++ src/sys/dev/raidframe/rf_netbsd.h 2021/07/23 00:26:19 1.36
@@ -1,14 +1,14 @@ @@ -1,14 +1,14 @@
1/* $NetBSD: rf_netbsd.h,v 1.35 2020/06/19 19:29:39 jdolecek Exp $ */ 1/* $NetBSD: rf_netbsd.h,v 1.36 2021/07/23 00:26:19 oster Exp $ */
2 2
3/*- 3/*-
4 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc. 4 * Copyright (c) 1996, 1997, 1998 The NetBSD Foundation, Inc.
5 * All rights reserved. 5 * All rights reserved.
6 * 6 *
7 * This code is derived from software contributed to The NetBSD Foundation 7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Greg Oster; Jason R. Thorpe. 8 * by Greg Oster; Jason R. Thorpe.
9 * 9 *
10 * Redistribution and use in source and binary forms, with or without 10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions 11 * modification, are permitted provided that the following conditions
12 * are met: 12 * are met:
13 * 1. Redistributions of source code must retain the above copyright 13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer. 14 * notice, this list of conditions and the following disclaimer.
@@ -51,26 +51,27 @@ struct raidcinfo { @@ -51,26 +51,27 @@ struct raidcinfo {
51 size_t ci_pathlen; /* length of component path */ 51 size_t ci_pathlen; /* length of component path */
52#endif 52#endif
53}; 53};
54 54
55 55
56/* a little structure to serve as a container for all the various 56/* a little structure to serve as a container for all the various
57 global pools used in RAIDframe */ 57 global pools used in RAIDframe */
58 58
59struct RF_Pools_s { 59struct RF_Pools_s {
60 struct pool alloclist; /* AllocList */ 60 struct pool alloclist; /* AllocList */
61 struct pool asm_hdr; /* Access Stripe Map Header */ 61 struct pool asm_hdr; /* Access Stripe Map Header */
62 struct pool asmap; /* Access Stripe Map */ 62 struct pool asmap; /* Access Stripe Map */
63 struct pool asmhle; /* Access Stripe Map Header List Elements */ 63 struct pool asmhle; /* Access Stripe Map Header List Elements */
 64 struct pool bufio; /* Buffer IO Pool */
64 struct pool callbackf; /* Callback function descriptors */ 65 struct pool callbackf; /* Callback function descriptors */
65 struct pool callbackv; /* Callback value descriptors */ 66 struct pool callbackv; /* Callback value descriptors */
66 struct pool dagh; /* DAG headers */ 67 struct pool dagh; /* DAG headers */
67 struct pool dagnode; /* DAG nodes */ 68 struct pool dagnode; /* DAG nodes */
68 struct pool daglist; /* DAG lists */ 69 struct pool daglist; /* DAG lists */
69 struct pool dagpcache; /* DAG pointer/param cache */ 70 struct pool dagpcache; /* DAG pointer/param cache */
70 struct pool dqd; /* Disk Queue Data */ 71 struct pool dqd; /* Disk Queue Data */
71 struct pool fss; /* Failed Stripe Structures */ 72 struct pool fss; /* Failed Stripe Structures */
72 struct pool funclist; /* Function Lists */ 73 struct pool funclist; /* Function Lists */
73 struct pool mcpair; /* Mutex/Cond Pairs */ 74 struct pool mcpair; /* Mutex/Cond Pairs */
74 struct pool pda; /* Physical Disk Access structures */ 75 struct pool pda; /* Physical Disk Access structures */
75 struct pool pss; /* Parity Stripe Status */ 76 struct pool pss; /* Parity Stripe Status */
76 struct pool pss_issued; /* Parity Stripe Status Issued */ 77 struct pool pss_issued; /* Parity Stripe Status Issued */