Skip to content

Commit 4e2c656

Browse files
committed
Revert "md/raid10: pull codes that wait for blocked dev into one function"
This reverts commit f046f5d. Matthew Ruffell reported data corruption in raid10 due to the changes in discard handling [1]. Revert these changes before we find a proper fix. [1] https://bugs.launchpad.net/ubuntu/+source/linux/+bug/1907262/ Cc: Matthew Ruffell <[email protected]> Cc: Xiao Ni <[email protected]> Signed-off-by: Song Liu <[email protected]>
1 parent d7cb6be commit 4e2c656

File tree

1 file changed

+51
-67
lines changed

1 file changed

+51
-67
lines changed

drivers/md/raid10.c

Lines changed: 51 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1275,75 +1275,12 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
12751275
}
12761276
}
12771277

1278-
static void wait_blocked_dev(struct mddev *mddev, struct r10bio *r10_bio)
1279-
{
1280-
int i;
1281-
struct r10conf *conf = mddev->private;
1282-
struct md_rdev *blocked_rdev;
1283-
1284-
retry_wait:
1285-
blocked_rdev = NULL;
1286-
rcu_read_lock();
1287-
for (i = 0; i < conf->copies; i++) {
1288-
struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
1289-
struct md_rdev *rrdev = rcu_dereference(
1290-
conf->mirrors[i].replacement);
1291-
if (rdev == rrdev)
1292-
rrdev = NULL;
1293-
if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
1294-
atomic_inc(&rdev->nr_pending);
1295-
blocked_rdev = rdev;
1296-
break;
1297-
}
1298-
if (rrdev && unlikely(test_bit(Blocked, &rrdev->flags))) {
1299-
atomic_inc(&rrdev->nr_pending);
1300-
blocked_rdev = rrdev;
1301-
break;
1302-
}
1303-
1304-
if (rdev && test_bit(WriteErrorSeen, &rdev->flags)) {
1305-
sector_t first_bad;
1306-
sector_t dev_sector = r10_bio->devs[i].addr;
1307-
int bad_sectors;
1308-
int is_bad;
1309-
1310-
/* Discard request doesn't care the write result
1311-
* so it doesn't need to wait blocked disk here.
1312-
*/
1313-
if (!r10_bio->sectors)
1314-
continue;
1315-
1316-
is_bad = is_badblock(rdev, dev_sector, r10_bio->sectors,
1317-
&first_bad, &bad_sectors);
1318-
if (is_bad < 0) {
1319-
/* Mustn't write here until the bad block
1320-
* is acknowledged
1321-
*/
1322-
atomic_inc(&rdev->nr_pending);
1323-
set_bit(BlockedBadBlocks, &rdev->flags);
1324-
blocked_rdev = rdev;
1325-
break;
1326-
}
1327-
}
1328-
}
1329-
rcu_read_unlock();
1330-
1331-
if (unlikely(blocked_rdev)) {
1332-
/* Have to wait for this device to get unblocked, then retry */
1333-
allow_barrier(conf);
1334-
raid10_log(conf->mddev, "%s wait rdev %d blocked",
1335-
__func__, blocked_rdev->raid_disk);
1336-
md_wait_for_blocked_rdev(blocked_rdev, mddev);
1337-
wait_barrier(conf);
1338-
goto retry_wait;
1339-
}
1340-
}
1341-
13421278
static void raid10_write_request(struct mddev *mddev, struct bio *bio,
13431279
struct r10bio *r10_bio)
13441280
{
13451281
struct r10conf *conf = mddev->private;
13461282
int i;
1283+
struct md_rdev *blocked_rdev;
13471284
sector_t sectors;
13481285
int max_sectors;
13491286

@@ -1401,9 +1338,8 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
14011338

14021339
r10_bio->read_slot = -1; /* make sure repl_bio gets freed */
14031340
raid10_find_phys(conf, r10_bio);
1404-
1405-
wait_blocked_dev(mddev, r10_bio);
1406-
1341+
retry_write:
1342+
blocked_rdev = NULL;
14071343
rcu_read_lock();
14081344
max_sectors = r10_bio->sectors;
14091345

@@ -1414,6 +1350,16 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
14141350
conf->mirrors[d].replacement);
14151351
if (rdev == rrdev)
14161352
rrdev = NULL;
1353+
if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) {
1354+
atomic_inc(&rdev->nr_pending);
1355+
blocked_rdev = rdev;
1356+
break;
1357+
}
1358+
if (rrdev && unlikely(test_bit(Blocked, &rrdev->flags))) {
1359+
atomic_inc(&rrdev->nr_pending);
1360+
blocked_rdev = rrdev;
1361+
break;
1362+
}
14171363
if (rdev && (test_bit(Faulty, &rdev->flags)))
14181364
rdev = NULL;
14191365
if (rrdev && (test_bit(Faulty, &rrdev->flags)))
@@ -1434,6 +1380,15 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
14341380

14351381
is_bad = is_badblock(rdev, dev_sector, max_sectors,
14361382
&first_bad, &bad_sectors);
1383+
if (is_bad < 0) {
1384+
/* Mustn't write here until the bad block
1385+
* is acknowledged
1386+
*/
1387+
atomic_inc(&rdev->nr_pending);
1388+
set_bit(BlockedBadBlocks, &rdev->flags);
1389+
blocked_rdev = rdev;
1390+
break;
1391+
}
14371392
if (is_bad && first_bad <= dev_sector) {
14381393
/* Cannot write here at all */
14391394
bad_sectors -= (dev_sector - first_bad);
@@ -1469,6 +1424,35 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
14691424
}
14701425
rcu_read_unlock();
14711426

1427+
if (unlikely(blocked_rdev)) {
1428+
/* Have to wait for this device to get unblocked, then retry */
1429+
int j;
1430+
int d;
1431+
1432+
for (j = 0; j < i; j++) {
1433+
if (r10_bio->devs[j].bio) {
1434+
d = r10_bio->devs[j].devnum;
1435+
rdev_dec_pending(conf->mirrors[d].rdev, mddev);
1436+
}
1437+
if (r10_bio->devs[j].repl_bio) {
1438+
struct md_rdev *rdev;
1439+
d = r10_bio->devs[j].devnum;
1440+
rdev = conf->mirrors[d].replacement;
1441+
if (!rdev) {
1442+
/* Race with remove_disk */
1443+
smp_mb();
1444+
rdev = conf->mirrors[d].rdev;
1445+
}
1446+
rdev_dec_pending(rdev, mddev);
1447+
}
1448+
}
1449+
allow_barrier(conf);
1450+
raid10_log(conf->mddev, "wait rdev %d blocked", blocked_rdev->raid_disk);
1451+
md_wait_for_blocked_rdev(blocked_rdev, mddev);
1452+
wait_barrier(conf);
1453+
goto retry_write;
1454+
}
1455+
14721456
if (max_sectors < r10_bio->sectors)
14731457
r10_bio->sectors = max_sectors;
14741458

0 commit comments

Comments
 (0)