| @@ -1,14 +1,14 @@ | | | @@ -1,14 +1,14 @@ |
1 | /* $NetBSD: rf_reconstruct.c,v 1.106 2008/12/20 17:04:51 oster Exp $ */ | | 1 | /* $NetBSD: rf_reconstruct.c,v 1.107 2009/02/11 23:54:10 oster Exp $ */ |
2 | /* | | 2 | /* |
3 | * Copyright (c) 1995 Carnegie-Mellon University. | | 3 | * Copyright (c) 1995 Carnegie-Mellon University. |
4 | * All rights reserved. | | 4 | * All rights reserved. |
5 | * | | 5 | * |
6 | * Author: Mark Holland | | 6 | * Author: Mark Holland |
7 | * | | 7 | * |
8 | * Permission to use, copy, modify and distribute this software and | | 8 | * Permission to use, copy, modify and distribute this software and |
9 | * its documentation is hereby granted, provided that both the copyright | | 9 | * its documentation is hereby granted, provided that both the copyright |
10 | * notice and this permission notice appear in all copies of the | | 10 | * notice and this permission notice appear in all copies of the |
11 | * software, derivative works or modified versions, and any portions | | 11 | * software, derivative works or modified versions, and any portions |
12 | * thereof, and that both notices appear in supporting documentation. | | 12 | * thereof, and that both notices appear in supporting documentation. |
13 | * | | 13 | * |
14 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" | | 14 | * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" |
| @@ -23,27 +23,27 @@ | | | @@ -23,27 +23,27 @@ |
23 | * Pittsburgh PA 15213-3890 | | 23 | * Pittsburgh PA 15213-3890 |
24 | * | | 24 | * |
25 | * any improvements or extensions that they make and grant Carnegie the | | 25 | * any improvements or extensions that they make and grant Carnegie the |
26 | * rights to redistribute these changes. | | 26 | * rights to redistribute these changes. |
27 | */ | | 27 | */ |
28 | | | 28 | |
29 | /************************************************************ | | 29 | /************************************************************ |
30 | * | | 30 | * |
31 | * rf_reconstruct.c -- code to perform on-line reconstruction | | 31 | * rf_reconstruct.c -- code to perform on-line reconstruction |
32 | * | | 32 | * |
33 | ************************************************************/ | | 33 | ************************************************************/ |
34 | | | 34 | |
35 | #include <sys/cdefs.h> | | 35 | #include <sys/cdefs.h> |
36 | __KERNEL_RCSID(0, "$NetBSD: rf_reconstruct.c,v 1.106 2008/12/20 17:04:51 oster Exp $"); | | 36 | __KERNEL_RCSID(0, "$NetBSD: rf_reconstruct.c,v 1.107 2009/02/11 23:54:10 oster Exp $"); |
37 | | | 37 | |
38 | #include <sys/param.h> | | 38 | #include <sys/param.h> |
39 | #include <sys/time.h> | | 39 | #include <sys/time.h> |
40 | #include <sys/buf.h> | | 40 | #include <sys/buf.h> |
41 | #include <sys/errno.h> | | 41 | #include <sys/errno.h> |
42 | #include <sys/systm.h> | | 42 | #include <sys/systm.h> |
43 | #include <sys/proc.h> | | 43 | #include <sys/proc.h> |
44 | #include <sys/ioctl.h> | | 44 | #include <sys/ioctl.h> |
45 | #include <sys/fcntl.h> | | 45 | #include <sys/fcntl.h> |
46 | #include <sys/vnode.h> | | 46 | #include <sys/vnode.h> |
47 | #include <dev/raidframe/raidframevar.h> | | 47 | #include <dev/raidframe/raidframevar.h> |
48 | | | 48 | |
49 | #include "rf_raid.h" | | 49 | #include "rf_raid.h" |
| @@ -666,28 +666,30 @@ rf_ContinueReconstructFailedDisk(RF_Raid | | | @@ -666,28 +666,30 @@ rf_ContinueReconstructFailedDisk(RF_Raid |
666 | */ | | 666 | */ |
667 | recon_error = 1; | | 667 | recon_error = 1; |
668 | raidPtr->reconControl->error = 1; | | 668 | raidPtr->reconControl->error = 1; |
669 | | | 669 | |
670 | /* bump the numDisksDone count for reads, | | 670 | /* bump the numDisksDone count for reads, |
671 | but not for writes */ | | 671 | but not for writes */ |
672 | if (status == RF_RECON_READ_ERROR) | | 672 | if (status == RF_RECON_READ_ERROR) |
673 | reconDesc->numDisksDone++; | | 673 | reconDesc->numDisksDone++; |
674 | | | 674 | |
675 | /* write errors are special -- when we are | | 675 | /* write errors are special -- when we are |
676 | done dealing with the reads that are | | 676 | done dealing with the reads that are |
677 | finished, we don't want to wait for any | | 677 | finished, we don't want to wait for any |
678 | writes */ | | 678 | writes */ |
679 | if (status == RF_RECON_WRITE_ERROR) | | 679 | if (status == RF_RECON_WRITE_ERROR) { |
680 | write_error = 1; | | 680 | write_error = 1; |
| | | 681 | num_writes++; |
| | | 682 | } |
681 | | | 683 | |
682 | } else if (status == RF_RECON_READ_STOPPED) { | | 684 | } else if (status == RF_RECON_READ_STOPPED) { |
683 | /* count this component as being "done" */ | | 685 | /* count this component as being "done" */ |
684 | reconDesc->numDisksDone++; | | 686 | reconDesc->numDisksDone++; |
685 | } else if (status == RF_RECON_WRITE_DONE) { | | 687 | } else if (status == RF_RECON_WRITE_DONE) { |
686 | num_writes++; | | 688 | num_writes++; |
687 | } | | 689 | } |
688 | | | 690 | |
689 | if (recon_error) { | | 691 | if (recon_error) { |
690 | /* make sure any stragglers are woken up so that | | 692 | /* make sure any stragglers are woken up so that |
691 | their theads will complete, and we can get out | | 693 | their theads will complete, and we can get out |
692 | of here with all IO processed */ | | 694 | of here with all IO processed */ |
693 | | | 695 | |
| @@ -708,32 +710,33 @@ rf_ContinueReconstructFailedDisk(RF_Raid | | | @@ -708,32 +710,33 @@ rf_ContinueReconstructFailedDisk(RF_Raid |
708 | } | | 710 | } |
709 | #endif | | 711 | #endif |
710 | } | | 712 | } |
711 | | | 713 | |
712 | /* reads done, wakup any waiters, and then wait for writes */ | | 714 | /* reads done, wakup any waiters, and then wait for writes */ |
713 | | | 715 | |
714 | rf_WakeupHeadSepCBWaiters(raidPtr); | | 716 | rf_WakeupHeadSepCBWaiters(raidPtr); |
715 | | | 717 | |
716 | while (!recon_error && (num_writes < pending_writes)) { | | 718 | while (!recon_error && (num_writes < pending_writes)) { |
717 | event = rf_GetNextReconEvent(reconDesc); | | 719 | event = rf_GetNextReconEvent(reconDesc); |
718 | status = ProcessReconEvent(raidPtr, event); | | 720 | status = ProcessReconEvent(raidPtr, event); |
719 | | | 721 | |
720 | if (status == RF_RECON_WRITE_ERROR) { | | 722 | if (status == RF_RECON_WRITE_ERROR) { |
| | | 723 | num_writes++; |
721 | recon_error = 1; | | 724 | recon_error = 1; |
722 | raidPtr->reconControl->error = 1; | | 725 | raidPtr->reconControl->error = 1; |
723 | /* an error was encountered at the very end... bail */ | | 726 | /* an error was encountered at the very end... bail */ |
724 | } else if (status == RF_RECON_WRITE_DONE) { | | 727 | } else if (status == RF_RECON_WRITE_DONE) { |
725 | num_writes++; | | 728 | num_writes++; |
726 | } | | 729 | } /* else it's something else, and we don't care */ |
727 | } | | 730 | } |
728 | if (recon_error || | | 731 | if (recon_error || |
729 | (raidPtr->reconControl->lastPSID == lastPSID)) { | | 732 | (raidPtr->reconControl->lastPSID == lastPSID)) { |
730 | done = 1; | | 733 | done = 1; |
731 | break; | | 734 | break; |
732 | } | | 735 | } |
733 | | | 736 | |
734 | prev = raidPtr->reconControl->lastPSID; | | 737 | prev = raidPtr->reconControl->lastPSID; |
735 | raidPtr->reconControl->lastPSID += incPSID; | | 738 | raidPtr->reconControl->lastPSID += incPSID; |
736 | | | 739 | |
737 | if (raidPtr->reconControl->lastPSID > lastPSID) { | | 740 | if (raidPtr->reconControl->lastPSID > lastPSID) { |
738 | pending_writes = lastPSID - prev; | | 741 | pending_writes = lastPSID - prev; |
739 | raidPtr->reconControl->lastPSID = lastPSID; | | 742 | raidPtr->reconControl->lastPSID = lastPSID; |
| @@ -1044,26 +1047,32 @@ ProcessReconEvent(RF_Raid_t *raidPtr, RF | | | @@ -1044,26 +1047,32 @@ ProcessReconEvent(RF_Raid_t *raidPtr, RF |
1044 | retcode = 0; | | 1047 | retcode = 0; |
1045 | } | | 1048 | } |
1046 | break; | | 1049 | break; |
1047 | | | 1050 | |
1048 | /* A read I/O failed to complete */ | | 1051 | /* A read I/O failed to complete */ |
1049 | case RF_REVENT_READ_FAILED: | | 1052 | case RF_REVENT_READ_FAILED: |
1050 | retcode = RF_RECON_READ_ERROR; | | 1053 | retcode = RF_RECON_READ_ERROR; |
1051 | break; | | 1054 | break; |
1052 | | | 1055 | |
1053 | /* A write I/O failed to complete */ | | 1056 | /* A write I/O failed to complete */ |
1054 | case RF_REVENT_WRITE_FAILED: | | 1057 | case RF_REVENT_WRITE_FAILED: |
1055 | retcode = RF_RECON_WRITE_ERROR; | | 1058 | retcode = RF_RECON_WRITE_ERROR; |
1056 | | | 1059 | |
| | | 1060 | /* This is an error, but it was a pending write. |
| | | 1061 | Account for it. */ |
| | | 1062 | RF_LOCK_MUTEX(raidPtr->reconControl->rb_mutex); |
| | | 1063 | raidPtr->reconControl->pending_writes--; |
| | | 1064 | RF_UNLOCK_MUTEX(raidPtr->reconControl->rb_mutex); |
| | | 1065 | |
1057 | rbuf = (RF_ReconBuffer_t *) event->arg; | | 1066 | rbuf = (RF_ReconBuffer_t *) event->arg; |
1058 | | | 1067 | |
1059 | /* cleanup the disk queue data */ | | 1068 | /* cleanup the disk queue data */ |
1060 | rf_FreeDiskQueueData((RF_DiskQueueData_t *) rbuf->arg); | | 1069 | rf_FreeDiskQueueData((RF_DiskQueueData_t *) rbuf->arg); |
1061 | | | 1070 | |
1062 | /* At this point we're erroring out, badly, and floatingRbufs | | 1071 | /* At this point we're erroring out, badly, and floatingRbufs |
1063 | may not even be valid. Rather than putting this back onto | | 1072 | may not even be valid. Rather than putting this back onto |
1064 | the floatingRbufs list, just arrange for its immediate | | 1073 | the floatingRbufs list, just arrange for its immediate |
1065 | destruction. | | 1074 | destruction. |
1066 | */ | | 1075 | */ |
1067 | rf_FreeReconBuffer(rbuf); | | 1076 | rf_FreeReconBuffer(rbuf); |
1068 | break; | | 1077 | break; |
1069 | | | 1078 | |