<html><head><meta name="color-scheme" content="light dark"></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">
From: NeilBrown &lt;neilb@cse.unsw.edu.au&gt;

When an array is degraded, bit in the intent-bitmap are never cleared.  So if
a recently failed drive is re-added, we only need to reconstruct the block
that are still reflected in the bitmap.

This patch adds support for this re-adding.

Signed-off-by: Neil Brown &lt;neilb@cse.unsw.edu.au&gt;
Signed-off-by: Andrew Morton &lt;akpm@osdl.org&gt;
---

 25-akpm/drivers/md/md.c           |   71 +++++++++++++++++++++++++++++---------
 25-akpm/drivers/md/raid1.c        |    7 +++
 25-akpm/include/linux/raid/md_k.h |    4 ++
 3 files changed, 65 insertions(+), 17 deletions(-)

diff -puN drivers/md/md.c~md-optimise-reconstruction-when-re-adding-a-recently-failed-drive drivers/md/md.c
--- 25/drivers/md/md.c~md-optimise-reconstruction-when-re-adding-a-recently-failed-drive	2005-03-10 19:10:05.000000000 -0800
+++ 25-akpm/drivers/md/md.c	2005-03-10 19:10:05.000000000 -0800
@@ -577,6 +577,8 @@ static int super_90_validate(mddev_t *md
 	mdp_disk_t *desc;
 	mdp_super_t *sb = (mdp_super_t *)page_address(rdev-&gt;sb_page);
 
+	rdev-&gt;raid_disk = -1;
+	rdev-&gt;in_sync = 0;
 	if (mddev-&gt;raid_disks == 0) {
 		mddev-&gt;major_version = 0;
 		mddev-&gt;minor_version = sb-&gt;minor_version;
@@ -607,16 +609,24 @@ static int super_90_validate(mddev_t *md
 		memcpy(mddev-&gt;uuid+12,&amp;sb-&gt;set_uuid3, 4);
 
 		mddev-&gt;max_disks = MD_SB_DISKS;
-	} else {
-		__u64 ev1;
-		ev1 = md_event(sb);
+	} else if (mddev-&gt;pers == NULL) {
+		/* Insist on good event counter while assembling */
+		__u64 ev1 = md_event(sb);
 		++ev1;
 		if (ev1 &lt; mddev-&gt;events) 
 			return -EINVAL;
-	}
+	} else if (mddev-&gt;bitmap) {
+		/* if adding to array with a bitmap, then we can accept an
+		 * older device ... but not too old.
+		 */
+		__u64 ev1 = md_event(sb);
+		if (ev1 &lt; mddev-&gt;bitmap-&gt;events_cleared)
+			return 0;
+	} else /* just a hot-add of a new device, leave raid_disk at -1 */
+		return 0;
+
 	if (mddev-&gt;level != LEVEL_MULTIPATH) {
-		rdev-&gt;raid_disk = -1;
-		rdev-&gt;in_sync = rdev-&gt;faulty = 0;
+		rdev-&gt;faulty = 0;
 		desc = sb-&gt;disks + rdev-&gt;desc_nr;
 
 		if (desc-&gt;state &amp; (1&lt;&lt;MD_DISK_FAULTY))
@@ -626,7 +636,8 @@ static int super_90_validate(mddev_t *md
 			rdev-&gt;in_sync = 1;
 			rdev-&gt;raid_disk = desc-&gt;raid_disk;
 		}
-	}
+	} else /* MULTIPATH are always insync */
+		rdev-&gt;in_sync = 1;
 	return 0;
 }
 
@@ -868,6 +879,8 @@ static int super_1_validate(mddev_t *mdd
 {
 	struct mdp_superblock_1 *sb = (struct mdp_superblock_1*)page_address(rdev-&gt;sb_page);
 
+	rdev-&gt;raid_disk = -1;
+	rdev-&gt;in_sync = 0;
 	if (mddev-&gt;raid_disks == 0) {
 		mddev-&gt;major_version = 1;
 		mddev-&gt;patch_version = 0;
@@ -885,13 +898,21 @@ static int super_1_validate(mddev_t *mdd
 		memcpy(mddev-&gt;uuid, sb-&gt;set_uuid, 16);
 
 		mddev-&gt;max_disks =  (4096-256)/2;
-	} else {
-		__u64 ev1;
-		ev1 = le64_to_cpu(sb-&gt;events);
+	} else if (mddev-&gt;pers == NULL) {
+		/* Insist of good event counter while assembling */
+		__u64 ev1 = le64_to_cpu(sb-&gt;events);
 		++ev1;
 		if (ev1 &lt; mddev-&gt;events)
 			return -EINVAL;
-	}
+	} else if (mddev-&gt;bitmap) {
+		/* If adding to array with a bitmap, then we can accept an
+		 * older device, but not too old.
+		 */
+		__u64 ev1 = le64_to_cpu(sb-&gt;events);
+		if (ev1 &lt; mddev-&gt;bitmap-&gt;events_cleared)
+			return 0;
+	} else /* just a hot-add of a new device, leave raid_disk at -1 */
+		return 0;
 
 	if (mddev-&gt;level != LEVEL_MULTIPATH) {
 		int role;
@@ -899,14 +920,10 @@ static int super_1_validate(mddev_t *mdd
 		role = le16_to_cpu(sb-&gt;dev_roles[rdev-&gt;desc_nr]);
 		switch(role) {
 		case 0xffff: /* spare */
-			rdev-&gt;in_sync = 0;
 			rdev-&gt;faulty = 0;
-			rdev-&gt;raid_disk = -1;
 			break;
 		case 0xfffe: /* faulty */
-			rdev-&gt;in_sync = 0;
 			rdev-&gt;faulty = 1;
-			rdev-&gt;raid_disk = -1;
 			break;
 		default:
 			rdev-&gt;in_sync = 1;
@@ -914,7 +931,9 @@ static int super_1_validate(mddev_t *mdd
 			rdev-&gt;raid_disk = role;
 			break;
 		}
-	}
+	} else /* MULTIPATH are always insync */
+		rdev-&gt;in_sync = 1;
+
 	return 0;
 }
 
@@ -2166,6 +2185,18 @@ static int add_new_disk(mddev_t * mddev,
 				PTR_ERR(rdev));
 			return PTR_ERR(rdev);
 		}
+		/* set save_raid_disk if appropriate */
+		if (!mddev-&gt;persistent) {
+			if (info-&gt;state &amp; (1&lt;&lt;MD_DISK_SYNC)  &amp;&amp;
+			    info-&gt;raid_disk &lt; mddev-&gt;raid_disks)
+				rdev-&gt;raid_disk = info-&gt;raid_disk;
+			else
+				rdev-&gt;raid_disk = -1;
+		} else
+			super_types[mddev-&gt;major_version].
+				validate_super(mddev, rdev);
+		rdev-&gt;saved_raid_disk = rdev-&gt;raid_disk;
+
 		rdev-&gt;in_sync = 0; /* just to be sure */
 		rdev-&gt;raid_disk = -1;
 		err = bind_rdev_to_array(rdev, mddev);
@@ -3722,6 +3753,14 @@ void md_check_recovery(mddev_t *mddev)
 				mddev-&gt;pers-&gt;spare_active(mddev);
 			}
 			md_update_sb(mddev);
+
+			/* if array is no-longer degraded, then any saved_raid_disk
+			 * information must be scrapped
+			 */
+			if (!mddev-&gt;degraded)
+				ITERATE_RDEV(mddev,rdev,rtmp)
+					rdev-&gt;saved_raid_disk = -1;
+
 			mddev-&gt;recovery = 0;
 			/* flag recovery needed just to double check */
 			set_bit(MD_RECOVERY_NEEDED, &amp;mddev-&gt;recovery);
diff -puN drivers/md/raid1.c~md-optimise-reconstruction-when-re-adding-a-recently-failed-drive drivers/md/raid1.c
--- 25/drivers/md/raid1.c~md-optimise-reconstruction-when-re-adding-a-recently-failed-drive	2005-03-10 19:10:05.000000000 -0800
+++ 25-akpm/drivers/md/raid1.c	2005-03-10 19:10:05.000000000 -0800
@@ -811,9 +811,12 @@ static int raid1_add_disk(mddev_t *mddev
 {
 	conf_t *conf = mddev-&gt;private;
 	int found = 0;
-	int mirror;
+	int mirror = 0;
 	mirror_info_t *p;
 
+	if (rdev-&gt;saved_raid_disk &gt;= 0 &amp;&amp;
+	    conf-&gt;mirrors[rdev-&gt;saved_raid_disk].rdev == NULL)
+		mirror = rdev-&gt;saved_raid_disk;
 	for (mirror=0; mirror &lt; mddev-&gt;raid_disks; mirror++)
 		if ( !(p=conf-&gt;mirrors+mirror)-&gt;rdev) {
 
@@ -830,6 +833,8 @@ static int raid1_add_disk(mddev_t *mddev
 			p-&gt;head_position = 0;
 			rdev-&gt;raid_disk = mirror;
 			found = 1;
+			if (rdev-&gt;saved_raid_disk != mirror)
+				conf-&gt;fullsync = 1;
 			p-&gt;rdev = rdev;
 			break;
 		}
diff -puN include/linux/raid/md_k.h~md-optimise-reconstruction-when-re-adding-a-recently-failed-drive include/linux/raid/md_k.h
--- 25/include/linux/raid/md_k.h~md-optimise-reconstruction-when-re-adding-a-recently-failed-drive	2005-03-10 19:10:05.000000000 -0800
+++ 25-akpm/include/linux/raid/md_k.h	2005-03-10 19:10:05.000000000 -0800
@@ -183,6 +183,10 @@ struct mdk_rdev_s
 
 	int desc_nr;			/* descriptor index in the superblock */
 	int raid_disk;			/* role of device in array */
+	int saved_raid_disk;		/* role that device used to have in the
+					 * array and could again if we did a partial
+					 * resync from the bitmap
+					 */
 
 	atomic_t	nr_pending;	/* number of pending requests.
 					 * only maintained for arrays that
_
</pre></body></html>