md/raid1: Fix data corruption for degraded array with slow disk

YuKuai-huawei · liu-song-6 · commit c916ca35308d · 2024-08-15T13:38:17.000-07:00
read_balance() will avoid reading from slow disks as much as possible, however, if valid data only lands in slow disks, and a new normal disk is still in recovery, unrecovered data can be read: raid1_read_request read_balance raid1_should_read_first -> return false choose_best_rdev -> normal disk is not recovered, return -1 choose_bb_rdev -> missing the checking of recovery, return the normal disk -> read unrecovered data Root cause is that the checking of recovery is missing in choose_bb_rdev(). Hence add such checking to fix the problem. Also fix similar problem in choose_slow_rdev(). Cc: stable@vger.kernel.org Fixes: 9f3ced7 ("md/raid1: factor out choose_bb_rdev() from read_balance()") Fixes: dfa8ecd ("md/raid1: factor out choose_slow_rdev() from read_balance()") Reported-and-tested-by: Mateusz Jończyk <mat.jonczyk@o2.pl> Closes: https://lore.kernel.org/all/9952f532-2554-44bf-b906-4880b2e88e3a@o2.pl/ Signed-off-by: Yu Kuai <yukuai3@huawei.com> Link: https://lore.kernel.org/r/20240803091137.3197008-1-yukuai1@huaweicloud.com Signed-off-by: Song Liu <song@kernel.org>
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
@@ -617,6 +617,12 @@ static int choose_first_rdev(struct r1conf *conf, struct r1bio *r1_bio,
 	return -1;
 }
 
+static bool rdev_in_recovery(struct md_rdev *rdev, struct r1bio *r1_bio)
+{
+	return !test_bit(In_sync, &rdev->flags) &&
+	       rdev->recovery_offset < r1_bio->sector + r1_bio->sectors;
+}
+
 static int choose_bb_rdev(struct r1conf *conf, struct r1bio *r1_bio,
 			  int *max_sectors)
 {
@@ -635,6 +641,7 @@ static int choose_bb_rdev(struct r1conf *conf, struct r1bio *r1_bio,
 
 		rdev = conf->mirrors[disk].rdev;
 		if (!rdev || test_bit(Faulty, &rdev->flags) ||
+		    rdev_in_recovery(rdev, r1_bio) ||
 		    test_bit(WriteMostly, &rdev->flags))
 			continue;
 
@@ -673,7 +680,8 @@ static int choose_slow_rdev(struct r1conf *conf, struct r1bio *r1_bio,
 
 		rdev = conf->mirrors[disk].rdev;
 		if (!rdev || test_bit(Faulty, &rdev->flags) ||
-		    !test_bit(WriteMostly, &rdev->flags))
+		    !test_bit(WriteMostly, &rdev->flags) ||
+		    rdev_in_recovery(rdev, r1_bio))
 			continue;
 
 		/* there are no bad blocks, we can use this disk */
@@ -733,9 +741,7 @@ static bool rdev_readable(struct md_rdev *rdev, struct r1bio *r1_bio)
 	if (!rdev || test_bit(Faulty, &rdev->flags))
 		return false;
 
-	/* still in recovery */
-	if (!test_bit(In_sync, &rdev->flags) &&
-	    rdev->recovery_offset < r1_bio->sector + r1_bio->sectors)
+	if (rdev_in_recovery(rdev, r1_bio))
 		return false;
 
 	/* don't read from slow disk unless have to */