Skip to content

Commit 16c4770

Browse files
YuKuai-huaweiliu-song-6
authored andcommitted
dm-raid: really frozen sync_thread during suspend
1) commit f52f5c7 ("md: fix stopping sync thread") remove MD_RECOVERY_FROZEN from __md_stop_writes() and doesn't realize that dm-raid relies on __md_stop_writes() to frozen sync_thread indirectly. Fix this problem by adding MD_RECOVERY_FROZEN in md_stop_writes(), and since stop_sync_thread() is only used for dm-raid in this case, also move stop_sync_thread() to md_stop_writes(). 2) The flag MD_RECOVERY_FROZEN doesn't mean that sync thread is frozen, it only prevent new sync_thread to start, and it can't stop the running sync thread; In order to frozen sync_thread, after seting the flag, stop_sync_thread() should be used. 3) The flag MD_RECOVERY_FROZEN doesn't mean that writes are stopped, use it as condition for md_stop_writes() in raid_postsuspend() doesn't look correct. Consider that reentrant stop_sync_thread() do nothing, always call md_stop_writes() in raid_postsuspend(). 4) raid_message can set/clear the flag MD_RECOVERY_FROZEN at anytime, and if MD_RECOVERY_FROZEN is cleared while the array is suspended, new sync_thread can start unexpected. Fix this by disallow raid_message() to change sync_thread status during suspend. Note that after commit f52f5c7 ("md: fix stopping sync thread"), the test shell/lvconvert-raid-reshape.sh start to hang in stop_sync_thread(), and with previous fixes, the test won't hang there anymore, however, the test will still fail and complain that ext4 is corrupted. And with this patch, the test won't hang due to stop_sync_thread() or fail due to ext4 is corrupted anymore. However, there is still a deadlock related to dm-raid456 that will be fixed in following patches. Reported-by: Mikulas Patocka <[email protected]> Closes: https://lore.kernel.org/all/[email protected]/ Fixes: 1af2048 ("dm raid: fix deadlock caused by premature md_stop_writes()") Fixes: 9dbd1aa ("dm raid: add reshaping support to the target") Fixes: f52f5c7 ("md: fix stopping sync thread") Cc: [email protected] # v6.7+ Signed-off-by: Yu Kuai <[email protected]> Signed-off-by: Xiao Ni <[email protected]> Acked-by: Mike Snitzer <[email protected]> Signed-off-by: Song Liu <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 503f9d4 commit 16c4770

File tree

2 files changed

+17
-11
lines changed

2 files changed

+17
-11
lines changed

drivers/md/dm-raid.c

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3240,11 +3240,12 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
32403240
rs->md.ro = 1;
32413241
rs->md.in_sync = 1;
32423242

3243-
/* Keep array frozen until resume. */
3244-
set_bit(MD_RECOVERY_FROZEN, &rs->md.recovery);
3245-
32463243
/* Has to be held on running the array */
32473244
mddev_suspend_and_lock_nointr(&rs->md);
3245+
3246+
/* Keep array frozen until resume. */
3247+
md_frozen_sync_thread(&rs->md);
3248+
32483249
r = md_run(&rs->md);
32493250
rs->md.in_sync = 0; /* Assume already marked dirty */
32503251
if (r) {
@@ -3722,6 +3723,9 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv,
37223723
if (!mddev->pers || !mddev->pers->sync_request)
37233724
return -EINVAL;
37243725

3726+
if (test_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags))
3727+
return -EBUSY;
3728+
37253729
if (!strcasecmp(argv[0], "frozen"))
37263730
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
37273731
else
@@ -3796,10 +3800,11 @@ static void raid_postsuspend(struct dm_target *ti)
37963800
struct raid_set *rs = ti->private;
37973801

37983802
if (!test_and_set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) {
3799-
/* Writes have to be stopped before suspending to avoid deadlocks. */
3800-
if (!test_bit(MD_RECOVERY_FROZEN, &rs->md.recovery))
3801-
md_stop_writes(&rs->md);
3802-
3803+
/*
3804+
* sync_thread must be stopped during suspend, and writes have
3805+
* to be stopped before suspending to avoid deadlocks.
3806+
*/
3807+
md_stop_writes(&rs->md);
38033808
mddev_suspend(&rs->md, false);
38043809
}
38053810
}
@@ -4012,8 +4017,6 @@ static int raid_preresume(struct dm_target *ti)
40124017
}
40134018

40144019
/* Check for any resize/reshape on @rs and adjust/initiate */
4015-
/* Be prepared for mddev_resume() in raid_resume() */
4016-
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
40174020
if (mddev->recovery_cp && mddev->recovery_cp < MaxSector) {
40184021
set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
40194022
mddev->resync_min = mddev->recovery_cp;
@@ -4055,10 +4058,12 @@ static void raid_resume(struct dm_target *ti)
40554058
if (mddev->delta_disks < 0)
40564059
rs_set_capacity(rs);
40574060

4061+
WARN_ON_ONCE(!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery));
4062+
WARN_ON_ONCE(test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
40584063
mddev_lock_nointr(mddev);
4059-
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
40604064
mddev->ro = 0;
40614065
mddev->in_sync = 0;
4066+
md_unfrozen_sync_thread(mddev);
40624067
mddev_unlock_and_resume(mddev);
40634068
}
40644069
}

drivers/md/md.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6364,7 +6364,6 @@ static void md_clean(struct mddev *mddev)
63646364

63656365
static void __md_stop_writes(struct mddev *mddev)
63666366
{
6367-
stop_sync_thread(mddev, true, false);
63686367
del_timer_sync(&mddev->safemode_timer);
63696368

63706369
if (mddev->pers && mddev->pers->quiesce) {
@@ -6389,6 +6388,8 @@ static void __md_stop_writes(struct mddev *mddev)
63896388
void md_stop_writes(struct mddev *mddev)
63906389
{
63916390
mddev_lock_nointr(mddev);
6391+
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6392+
stop_sync_thread(mddev, true, false);
63926393
__md_stop_writes(mddev);
63936394
mddev_unlock(mddev);
63946395
}

0 commit comments

Comments
 (0)