Skip to content

Commit 5ce10a3

Browse files
YuKuai-huaweiliu-song-6
authored andcommitted
md: don't fail action_store() if sync_thread is not registered
MD_RECOVERY_RUNNING will always be set when trying to register a new sync_thread, however, if md_start_sync() turns out to do nothing, MD_RECOVERY_RUNNING will be cleared in this case. And during the race window, action_store() will return -EBUSY, which will cause some mdadm tests to fail. For example: The test 07reshape5intr will add a new disk to array, then start reshape: mdadm /dev/md0 --add /dev/xxx mdadm --grow /dev/md0 -n 3 And add_bound_rdev() from mdadm --add will set MD_RECOVERY_NEEDED, then during the race windown, mdadm --grow will fail. Fix the problem by waiting in action_store() during the race window, fail only if sync_thread is registered. Signed-off-by: Yu Kuai <[email protected]> Signed-off-by: Song Liu <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent df79234 commit 5ce10a3

File tree

2 files changed

+33
-54
lines changed

2 files changed

+33
-54
lines changed

drivers/md/md.c

Lines changed: 33 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -753,7 +753,6 @@ int mddev_init(struct mddev *mddev)
753753

754754
mutex_init(&mddev->open_mutex);
755755
mutex_init(&mddev->reconfig_mutex);
756-
mutex_init(&mddev->sync_mutex);
757756
mutex_init(&mddev->suspend_mutex);
758757
mutex_init(&mddev->bitmap_info.mutex);
759758
INIT_LIST_HEAD(&mddev->disks);
@@ -5021,59 +5020,20 @@ void md_unfrozen_sync_thread(struct mddev *mddev)
50215020
}
50225021
EXPORT_SYMBOL_GPL(md_unfrozen_sync_thread);
50235022

5024-
static void idle_sync_thread(struct mddev *mddev)
5025-
{
5026-
mutex_lock(&mddev->sync_mutex);
5027-
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5028-
5029-
if (mddev_lock(mddev)) {
5030-
mutex_unlock(&mddev->sync_mutex);
5031-
return;
5032-
}
5033-
5034-
stop_sync_thread(mddev, false);
5035-
mutex_unlock(&mddev->sync_mutex);
5036-
}
5037-
5038-
static void frozen_sync_thread(struct mddev *mddev)
5039-
{
5040-
mutex_lock(&mddev->sync_mutex);
5041-
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5042-
5043-
if (mddev_lock(mddev)) {
5044-
mutex_unlock(&mddev->sync_mutex);
5045-
return;
5046-
}
5047-
5048-
stop_sync_thread(mddev, false);
5049-
mutex_unlock(&mddev->sync_mutex);
5050-
}
5051-
50525023
static int mddev_start_reshape(struct mddev *mddev)
50535024
{
50545025
int ret;
50555026

50565027
if (mddev->pers->start_reshape == NULL)
50575028
return -EINVAL;
50585029

5059-
ret = mddev_lock(mddev);
5060-
if (ret)
5061-
return ret;
5062-
5063-
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
5064-
mddev_unlock(mddev);
5065-
return -EBUSY;
5066-
}
5067-
50685030
if (mddev->reshape_position == MaxSector ||
50695031
mddev->pers->check_reshape == NULL ||
50705032
mddev->pers->check_reshape(mddev)) {
50715033
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
50725034
ret = mddev->pers->start_reshape(mddev);
5073-
if (ret) {
5074-
mddev_unlock(mddev);
5035+
if (ret)
50755036
return ret;
5076-
}
50775037
} else {
50785038
/*
50795039
* If reshape is still in progress, and md_check_recovery() can
@@ -5083,7 +5043,6 @@ static int mddev_start_reshape(struct mddev *mddev)
50835043
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
50845044
}
50855045

5086-
mddev_unlock(mddev);
50875046
sysfs_notify_dirent_safe(mddev->sysfs_degraded);
50885047
return 0;
50895048
}
@@ -5097,36 +5056,53 @@ action_store(struct mddev *mddev, const char *page, size_t len)
50975056
if (!mddev->pers || !mddev->pers->sync_request)
50985057
return -EINVAL;
50995058

5059+
retry:
5060+
if (work_busy(&mddev->sync_work))
5061+
flush_work(&mddev->sync_work);
5062+
5063+
ret = mddev_lock(mddev);
5064+
if (ret)
5065+
return ret;
5066+
5067+
if (work_busy(&mddev->sync_work)) {
5068+
mddev_unlock(mddev);
5069+
goto retry;
5070+
}
5071+
51005072
action = md_sync_action_by_name(page);
51015073

51025074
/* TODO: mdadm rely on "idle" to start sync_thread. */
51035075
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) {
51045076
switch (action) {
51055077
case ACTION_FROZEN:
5106-
frozen_sync_thread(mddev);
5107-
return len;
5078+
md_frozen_sync_thread(mddev);
5079+
ret = len;
5080+
goto out;
51085081
case ACTION_IDLE:
5109-
idle_sync_thread(mddev);
5082+
md_idle_sync_thread(mddev);
51105083
break;
51115084
case ACTION_RESHAPE:
51125085
case ACTION_RECOVER:
51135086
case ACTION_CHECK:
51145087
case ACTION_REPAIR:
51155088
case ACTION_RESYNC:
5116-
return -EBUSY;
5089+
ret = -EBUSY;
5090+
goto out;
51175091
default:
5118-
return -EINVAL;
5092+
ret = -EINVAL;
5093+
goto out;
51195094
}
51205095
} else {
51215096
switch (action) {
51225097
case ACTION_FROZEN:
51235098
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
5124-
return len;
5099+
ret = len;
5100+
goto out;
51255101
case ACTION_RESHAPE:
51265102
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
51275103
ret = mddev_start_reshape(mddev);
51285104
if (ret)
5129-
return ret;
5105+
goto out;
51305106
break;
51315107
case ACTION_RECOVER:
51325108
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
@@ -5144,22 +5120,27 @@ action_store(struct mddev *mddev, const char *page, size_t len)
51445120
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
51455121
break;
51465122
default:
5147-
return -EINVAL;
5123+
ret = -EINVAL;
5124+
goto out;
51485125
}
51495126
}
51505127

51515128
if (mddev->ro == MD_AUTO_READ) {
51525129
/* A write to sync_action is enough to justify
51535130
* canceling read-auto mode
51545131
*/
5155-
flush_work(&mddev->sync_work);
51565132
mddev->ro = MD_RDWR;
51575133
md_wakeup_thread(mddev->sync_thread);
51585134
}
5135+
51595136
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
51605137
md_wakeup_thread(mddev->thread);
51615138
sysfs_notify_dirent_safe(mddev->sysfs_action);
5162-
return len;
5139+
ret = len;
5140+
5141+
out:
5142+
mddev_unlock(mddev);
5143+
return ret;
51635144
}
51645145

51655146
static struct md_sysfs_entry md_scan_mode =

drivers/md/md.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -595,8 +595,6 @@ struct mddev {
595595
*/
596596
struct list_head deleting;
597597

598-
/* Used to synchronize idle and frozen for action_store() */
599-
struct mutex sync_mutex;
600598
/* The sequence number for sync thread */
601599
atomic_t sync_seq;
602600

0 commit comments

Comments
 (0)