Wed Feb 11 23:54:11 2009 UTC ()
If we see a RF_RECON_WRITE_ERROR event we know a write has finished and
we need to account for that.  Failure to do so means we can end up
waiting forever for writes we think are outstanding, but which have
already completed.

Addresses the RAIDframe part of PR#40569.  Thanks to Matthias Scheler
for reporting the issue and verifying the fix.


(oster)
diff -r1.106 -r1.107 src/sys/dev/raidframe/rf_reconstruct.c

cvs diff -r1.106 -r1.107 src/sys/dev/raidframe/rf_reconstruct.c (expand / switch to unified diff)

--- src/sys/dev/raidframe/rf_reconstruct.c 2008/12/20 17:04:51 1.106
+++ src/sys/dev/raidframe/rf_reconstruct.c 2009/02/11 23:54:10 1.107
@@ -1,14 +1,14 @@ @@ -1,14 +1,14 @@
1/* $NetBSD: rf_reconstruct.c,v 1.106 2008/12/20 17:04:51 oster Exp $ */ 1/* $NetBSD: rf_reconstruct.c,v 1.107 2009/02/11 23:54:10 oster Exp $ */
2/* 2/*
3 * Copyright (c) 1995 Carnegie-Mellon University. 3 * Copyright (c) 1995 Carnegie-Mellon University.
4 * All rights reserved. 4 * All rights reserved.
5 * 5 *
6 * Author: Mark Holland 6 * Author: Mark Holland
7 * 7 *
8 * Permission to use, copy, modify and distribute this software and 8 * Permission to use, copy, modify and distribute this software and
9 * its documentation is hereby granted, provided that both the copyright 9 * its documentation is hereby granted, provided that both the copyright
10 * notice and this permission notice appear in all copies of the 10 * notice and this permission notice appear in all copies of the
11 * software, derivative works or modified versions, and any portions 11 * software, derivative works or modified versions, and any portions
12 * thereof, and that both notices appear in supporting documentation. 12 * thereof, and that both notices appear in supporting documentation.
13 * 13 *
14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
@@ -23,27 +23,27 @@ @@ -23,27 +23,27 @@
23 * Pittsburgh PA 15213-3890 23 * Pittsburgh PA 15213-3890
24 * 24 *
25 * any improvements or extensions that they make and grant Carnegie the 25 * any improvements or extensions that they make and grant Carnegie the
26 * rights to redistribute these changes. 26 * rights to redistribute these changes.
27 */ 27 */
28 28
29/************************************************************ 29/************************************************************
30 * 30 *
31 * rf_reconstruct.c -- code to perform on-line reconstruction 31 * rf_reconstruct.c -- code to perform on-line reconstruction
32 * 32 *
33 ************************************************************/ 33 ************************************************************/
34 34
35#include <sys/cdefs.h> 35#include <sys/cdefs.h>
36__KERNEL_RCSID(0, "$NetBSD: rf_reconstruct.c,v 1.106 2008/12/20 17:04:51 oster Exp $"); 36__KERNEL_RCSID(0, "$NetBSD: rf_reconstruct.c,v 1.107 2009/02/11 23:54:10 oster Exp $");
37 37
38#include <sys/param.h> 38#include <sys/param.h>
39#include <sys/time.h> 39#include <sys/time.h>
40#include <sys/buf.h> 40#include <sys/buf.h>
41#include <sys/errno.h> 41#include <sys/errno.h>
42#include <sys/systm.h> 42#include <sys/systm.h>
43#include <sys/proc.h> 43#include <sys/proc.h>
44#include <sys/ioctl.h> 44#include <sys/ioctl.h>
45#include <sys/fcntl.h> 45#include <sys/fcntl.h>
46#include <sys/vnode.h> 46#include <sys/vnode.h>
47#include <dev/raidframe/raidframevar.h> 47#include <dev/raidframe/raidframevar.h>
48 48
49#include "rf_raid.h" 49#include "rf_raid.h"
@@ -666,28 +666,30 @@ rf_ContinueReconstructFailedDisk(RF_Raid @@ -666,28 +666,30 @@ rf_ContinueReconstructFailedDisk(RF_Raid
666 */ 666 */
667 recon_error = 1; 667 recon_error = 1;
668 raidPtr->reconControl->error = 1; 668 raidPtr->reconControl->error = 1;
669  669
670 /* bump the numDisksDone count for reads, 670 /* bump the numDisksDone count for reads,
671 but not for writes */ 671 but not for writes */
672 if (status == RF_RECON_READ_ERROR) 672 if (status == RF_RECON_READ_ERROR)
673 reconDesc->numDisksDone++; 673 reconDesc->numDisksDone++;
674  674
675 /* write errors are special -- when we are 675 /* write errors are special -- when we are
676 done dealing with the reads that are 676 done dealing with the reads that are
677 finished, we don't want to wait for any 677 finished, we don't want to wait for any
678 writes */ 678 writes */
679 if (status == RF_RECON_WRITE_ERROR) 679 if (status == RF_RECON_WRITE_ERROR) {
680 write_error = 1; 680 write_error = 1;
 681 num_writes++;
 682 }
681  683
682 } else if (status == RF_RECON_READ_STOPPED) { 684 } else if (status == RF_RECON_READ_STOPPED) {
683 /* count this component as being "done" */ 685 /* count this component as being "done" */
684 reconDesc->numDisksDone++; 686 reconDesc->numDisksDone++;
685 } else if (status == RF_RECON_WRITE_DONE) { 687 } else if (status == RF_RECON_WRITE_DONE) {
686 num_writes++; 688 num_writes++;
687 }  689 }
688  690
689 if (recon_error) { 691 if (recon_error) {
690 /* make sure any stragglers are woken up so that 692 /* make sure any stragglers are woken up so that
691 their theads will complete, and we can get out 693 their theads will complete, and we can get out
692 of here with all IO processed */ 694 of here with all IO processed */
693 695
@@ -708,32 +710,33 @@ rf_ContinueReconstructFailedDisk(RF_Raid @@ -708,32 +710,33 @@ rf_ContinueReconstructFailedDisk(RF_Raid
708 } 710 }
709#endif 711#endif
710 } 712 }
711 713
712 /* reads done, wakup any waiters, and then wait for writes */ 714 /* reads done, wakup any waiters, and then wait for writes */
713 715
714 rf_WakeupHeadSepCBWaiters(raidPtr); 716 rf_WakeupHeadSepCBWaiters(raidPtr);
715 717
716 while (!recon_error && (num_writes < pending_writes)) { 718 while (!recon_error && (num_writes < pending_writes)) {
717 event = rf_GetNextReconEvent(reconDesc); 719 event = rf_GetNextReconEvent(reconDesc);
718 status = ProcessReconEvent(raidPtr, event); 720 status = ProcessReconEvent(raidPtr, event);
719  721
720 if (status == RF_RECON_WRITE_ERROR) { 722 if (status == RF_RECON_WRITE_ERROR) {
 723 num_writes++;
721 recon_error = 1; 724 recon_error = 1;
722 raidPtr->reconControl->error = 1; 725 raidPtr->reconControl->error = 1;
723 /* an error was encountered at the very end... bail */ 726 /* an error was encountered at the very end... bail */
724 } else if (status == RF_RECON_WRITE_DONE) { 727 } else if (status == RF_RECON_WRITE_DONE) {
725 num_writes++; 728 num_writes++;
726 } 729 } /* else it's something else, and we don't care */
727 } 730 }
728 if (recon_error ||  731 if (recon_error ||
729 (raidPtr->reconControl->lastPSID == lastPSID)) { 732 (raidPtr->reconControl->lastPSID == lastPSID)) {
730 done = 1; 733 done = 1;
731 break; 734 break;
732 } 735 }
733 736
734 prev = raidPtr->reconControl->lastPSID; 737 prev = raidPtr->reconControl->lastPSID;
735 raidPtr->reconControl->lastPSID += incPSID; 738 raidPtr->reconControl->lastPSID += incPSID;
736 739
737 if (raidPtr->reconControl->lastPSID > lastPSID) { 740 if (raidPtr->reconControl->lastPSID > lastPSID) {
738 pending_writes = lastPSID - prev; 741 pending_writes = lastPSID - prev;
739 raidPtr->reconControl->lastPSID = lastPSID; 742 raidPtr->reconControl->lastPSID = lastPSID;
@@ -1044,26 +1047,32 @@ ProcessReconEvent(RF_Raid_t *raidPtr, RF @@ -1044,26 +1047,32 @@ ProcessReconEvent(RF_Raid_t *raidPtr, RF
1044 retcode = 0; 1047 retcode = 0;
1045 } 1048 }
1046 break; 1049 break;
1047 1050
1048 /* A read I/O failed to complete */ 1051 /* A read I/O failed to complete */
1049 case RF_REVENT_READ_FAILED: 1052 case RF_REVENT_READ_FAILED:
1050 retcode = RF_RECON_READ_ERROR; 1053 retcode = RF_RECON_READ_ERROR;
1051 break; 1054 break;
1052 1055
1053 /* A write I/O failed to complete */ 1056 /* A write I/O failed to complete */
1054 case RF_REVENT_WRITE_FAILED: 1057 case RF_REVENT_WRITE_FAILED:
1055 retcode = RF_RECON_WRITE_ERROR; 1058 retcode = RF_RECON_WRITE_ERROR;
1056 1059
 1060 /* This is an error, but it was a pending write.
 1061 Account for it. */
 1062 RF_LOCK_MUTEX(raidPtr->reconControl->rb_mutex);
 1063 raidPtr->reconControl->pending_writes--;
 1064 RF_UNLOCK_MUTEX(raidPtr->reconControl->rb_mutex);
 1065
1057 rbuf = (RF_ReconBuffer_t *) event->arg; 1066 rbuf = (RF_ReconBuffer_t *) event->arg;
1058 1067
1059 /* cleanup the disk queue data */ 1068 /* cleanup the disk queue data */
1060 rf_FreeDiskQueueData((RF_DiskQueueData_t *) rbuf->arg); 1069 rf_FreeDiskQueueData((RF_DiskQueueData_t *) rbuf->arg);
1061 1070
1062 /* At this point we're erroring out, badly, and floatingRbufs 1071 /* At this point we're erroring out, badly, and floatingRbufs
1063 may not even be valid. Rather than putting this back onto 1072 may not even be valid. Rather than putting this back onto
1064 the floatingRbufs list, just arrange for its immediate 1073 the floatingRbufs list, just arrange for its immediate
1065 destruction. 1074 destruction.
1066 */ 1075 */
1067 rf_FreeReconBuffer(rbuf); 1076 rf_FreeReconBuffer(rbuf);
1068 break; 1077 break;
1069 1078