Skip to content

Commit 3a889fd

Browse files
committed
Merge branch 'dmraid-fix-6.9' into md-6.9
This is the second half of fixes for dmraid. The first half is available at [1]. This set contains fixes: - reshape can start unexpected, cause data corruption, patch 1,5,6; - deadlocks that reshape concurrent with IO, patch 8; - a lockdep warning, patch 9; For all the dmraid related tests in lvm2 suite, there is no new regressions compared against 6.6 kernels (which is good baseline before recent regressions). [1] https://lore.kernel.org/all/CAPhsuW7u1UKHCDOBDhD7DzOVtkGemDz_QnJ4DUq_kSN-Q3G66Q@mail.gmail.com/ * dmraid-fix-6.9: dm-raid: fix lockdep waring in "pers->hot_add_disk" dm-raid456, md/raid456: fix a deadlock for dm-raid456 while io concurrent with reshape dm-raid: add a new helper prepare_suspend() in md_personality md/dm-raid: don't call md_reap_sync_thread() directly dm-raid: really frozen sync_thread during suspend md: add a new helper reshape_interrupted() md: export helper md_is_rdwr() md: export helpers to stop sync_thread md: don't clear MD_RECOVERY_FROZEN for new dm-raid until resume
2 parents 3445139 + 95009ae commit 3a889fd

File tree

4 files changed

+196
-40
lines changed

4 files changed

+196
-40
lines changed

drivers/md/dm-raid.c

Lines changed: 72 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,7 @@ struct raid_dev {
213213
#define RT_FLAG_RS_IN_SYNC 6
214214
#define RT_FLAG_RS_RESYNCING 7
215215
#define RT_FLAG_RS_GROW 8
216+
#define RT_FLAG_RS_FROZEN 9
216217

217218
/* Array elements of 64 bit needed for rebuild/failed disk bits */
218219
#define DISKS_ARRAY_ELEMS ((MAX_RAID_DEVICES + (sizeof(uint64_t) * 8 - 1)) / sizeof(uint64_t) / 8)
@@ -3240,11 +3241,12 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
32403241
rs->md.ro = 1;
32413242
rs->md.in_sync = 1;
32423243

3243-
/* Keep array frozen until resume. */
3244-
set_bit(MD_RECOVERY_FROZEN, &rs->md.recovery);
3245-
32463244
/* Has to be held on running the array */
32473245
mddev_suspend_and_lock_nointr(&rs->md);
3246+
3247+
/* Keep array frozen until resume. */
3248+
md_frozen_sync_thread(&rs->md);
3249+
32483250
r = md_run(&rs->md);
32493251
rs->md.in_sync = 0; /* Assume already marked dirty */
32503252
if (r) {
@@ -3339,7 +3341,8 @@ static int raid_map(struct dm_target *ti, struct bio *bio)
33393341
if (unlikely(bio_end_sector(bio) > mddev->array_sectors))
33403342
return DM_MAPIO_REQUEUE;
33413343

3342-
md_handle_request(mddev, bio);
3344+
if (unlikely(!md_handle_request(mddev, bio)))
3345+
return DM_MAPIO_REQUEUE;
33433346

33443347
return DM_MAPIO_SUBMITTED;
33453348
}
@@ -3718,21 +3721,33 @@ static int raid_message(struct dm_target *ti, unsigned int argc, char **argv,
37183721
{
37193722
struct raid_set *rs = ti->private;
37203723
struct mddev *mddev = &rs->md;
3724+
int ret = 0;
37213725

37223726
if (!mddev->pers || !mddev->pers->sync_request)
37233727
return -EINVAL;
37243728

3725-
if (!strcasecmp(argv[0], "frozen"))
3726-
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
3727-
else
3728-
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
3729+
if (test_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags) ||
3730+
test_bit(RT_FLAG_RS_FROZEN, &rs->runtime_flags))
3731+
return -EBUSY;
37293732

3730-
if (!strcasecmp(argv[0], "idle") || !strcasecmp(argv[0], "frozen")) {
3731-
if (mddev->sync_thread) {
3732-
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
3733-
md_reap_sync_thread(mddev);
3734-
}
3735-
} else if (decipher_sync_action(mddev, mddev->recovery) != st_idle)
3733+
if (!strcasecmp(argv[0], "frozen")) {
3734+
ret = mddev_lock(mddev);
3735+
if (ret)
3736+
return ret;
3737+
3738+
md_frozen_sync_thread(mddev);
3739+
mddev_unlock(mddev);
3740+
} else if (!strcasecmp(argv[0], "idle")) {
3741+
ret = mddev_lock(mddev);
3742+
if (ret)
3743+
return ret;
3744+
3745+
md_idle_sync_thread(mddev);
3746+
mddev_unlock(mddev);
3747+
}
3748+
3749+
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
3750+
if (decipher_sync_action(mddev, mddev->recovery) != st_idle)
37363751
return -EBUSY;
37373752
else if (!strcasecmp(argv[0], "resync"))
37383753
; /* MD_RECOVERY_NEEDED set below */
@@ -3791,15 +3806,46 @@ static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits)
37913806
blk_limits_io_opt(limits, chunk_size_bytes * mddev_data_stripes(rs));
37923807
}
37933808

3809+
static void raid_presuspend(struct dm_target *ti)
3810+
{
3811+
struct raid_set *rs = ti->private;
3812+
struct mddev *mddev = &rs->md;
3813+
3814+
/*
3815+
* From now on, disallow raid_message() to change sync_thread until
3816+
* resume, raid_postsuspend() is too late.
3817+
*/
3818+
set_bit(RT_FLAG_RS_FROZEN, &rs->runtime_flags);
3819+
3820+
if (!reshape_interrupted(mddev))
3821+
return;
3822+
3823+
/*
3824+
* For raid456, if reshape is interrupted, IO across reshape position
3825+
* will never make progress, while caller will wait for IO to be done.
3826+
* Inform raid456 to handle those IO to prevent deadlock.
3827+
*/
3828+
if (mddev->pers && mddev->pers->prepare_suspend)
3829+
mddev->pers->prepare_suspend(mddev);
3830+
}
3831+
3832+
static void raid_presuspend_undo(struct dm_target *ti)
3833+
{
3834+
struct raid_set *rs = ti->private;
3835+
3836+
clear_bit(RT_FLAG_RS_FROZEN, &rs->runtime_flags);
3837+
}
3838+
37943839
static void raid_postsuspend(struct dm_target *ti)
37953840
{
37963841
struct raid_set *rs = ti->private;
37973842

37983843
if (!test_and_set_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) {
3799-
/* Writes have to be stopped before suspending to avoid deadlocks. */
3800-
if (!test_bit(MD_RECOVERY_FROZEN, &rs->md.recovery))
3801-
md_stop_writes(&rs->md);
3802-
3844+
/*
3845+
* sync_thread must be stopped during suspend, and writes have
3846+
* to be stopped before suspending to avoid deadlocks.
3847+
*/
3848+
md_stop_writes(&rs->md);
38033849
mddev_suspend(&rs->md, false);
38043850
}
38053851
}
@@ -4012,8 +4058,6 @@ static int raid_preresume(struct dm_target *ti)
40124058
}
40134059

40144060
/* Check for any resize/reshape on @rs and adjust/initiate */
4015-
/* Be prepared for mddev_resume() in raid_resume() */
4016-
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
40174061
if (mddev->recovery_cp && mddev->recovery_cp < MaxSector) {
40184062
set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
40194063
mddev->resync_min = mddev->recovery_cp;
@@ -4047,18 +4091,23 @@ static void raid_resume(struct dm_target *ti)
40474091
* Take this opportunity to check whether any failed
40484092
* devices are reachable again.
40494093
*/
4094+
mddev_lock_nointr(mddev);
40504095
attempt_restore_of_faulty_devices(rs);
4096+
mddev_unlock(mddev);
40514097
}
40524098

40534099
if (test_and_clear_bit(RT_FLAG_RS_SUSPENDED, &rs->runtime_flags)) {
40544100
/* Only reduce raid set size before running a disk removing reshape. */
40554101
if (mddev->delta_disks < 0)
40564102
rs_set_capacity(rs);
40574103

4104+
WARN_ON_ONCE(!test_bit(MD_RECOVERY_FROZEN, &mddev->recovery));
4105+
WARN_ON_ONCE(test_bit(MD_RECOVERY_RUNNING, &mddev->recovery));
4106+
clear_bit(RT_FLAG_RS_FROZEN, &rs->runtime_flags);
40584107
mddev_lock_nointr(mddev);
4059-
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
40604108
mddev->ro = 0;
40614109
mddev->in_sync = 0;
4110+
md_unfrozen_sync_thread(mddev);
40624111
mddev_unlock_and_resume(mddev);
40634112
}
40644113
}
@@ -4074,6 +4123,8 @@ static struct target_type raid_target = {
40744123
.message = raid_message,
40754124
.iterate_devices = raid_iterate_devices,
40764125
.io_hints = raid_io_hints,
4126+
.presuspend = raid_presuspend,
4127+
.presuspend_undo = raid_presuspend_undo,
40774128
.postsuspend = raid_postsuspend,
40784129
.preresume = raid_preresume,
40794130
.resume = raid_resume,

drivers/md/md.c

Lines changed: 57 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -99,18 +99,6 @@ static void mddev_detach(struct mddev *mddev);
9999
static void export_rdev(struct md_rdev *rdev, struct mddev *mddev);
100100
static void md_wakeup_thread_directly(struct md_thread __rcu *thread);
101101

102-
enum md_ro_state {
103-
MD_RDWR,
104-
MD_RDONLY,
105-
MD_AUTO_READ,
106-
MD_MAX_STATE
107-
};
108-
109-
static bool md_is_rdwr(struct mddev *mddev)
110-
{
111-
return (mddev->ro == MD_RDWR);
112-
}
113-
114102
/*
115103
* Default number of read corrections we'll attempt on an rdev
116104
* before ejecting it from the array. We divide the read error
@@ -378,15 +366,15 @@ static bool is_suspended(struct mddev *mddev, struct bio *bio)
378366
return true;
379367
}
380368

381-
void md_handle_request(struct mddev *mddev, struct bio *bio)
369+
bool md_handle_request(struct mddev *mddev, struct bio *bio)
382370
{
383371
check_suspended:
384372
if (is_suspended(mddev, bio)) {
385373
DEFINE_WAIT(__wait);
386374
/* Bail out if REQ_NOWAIT is set for the bio */
387375
if (bio->bi_opf & REQ_NOWAIT) {
388376
bio_wouldblock_error(bio);
389-
return;
377+
return true;
390378
}
391379
for (;;) {
392380
prepare_to_wait(&mddev->sb_wait, &__wait,
@@ -402,10 +390,13 @@ void md_handle_request(struct mddev *mddev, struct bio *bio)
402390

403391
if (!mddev->pers->make_request(mddev, bio)) {
404392
percpu_ref_put(&mddev->active_io);
393+
if (!mddev->gendisk && mddev->pers->prepare_suspend)
394+
return false;
405395
goto check_suspended;
406396
}
407397

408398
percpu_ref_put(&mddev->active_io);
399+
return true;
409400
}
410401
EXPORT_SYMBOL(md_handle_request);
411402

@@ -4942,6 +4933,35 @@ static void stop_sync_thread(struct mddev *mddev, bool locked, bool check_seq)
49424933
mddev_lock_nointr(mddev);
49434934
}
49444935

4936+
void md_idle_sync_thread(struct mddev *mddev)
4937+
{
4938+
lockdep_assert_held(&mddev->reconfig_mutex);
4939+
4940+
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4941+
stop_sync_thread(mddev, true, true);
4942+
}
4943+
EXPORT_SYMBOL_GPL(md_idle_sync_thread);
4944+
4945+
void md_frozen_sync_thread(struct mddev *mddev)
4946+
{
4947+
lockdep_assert_held(&mddev->reconfig_mutex);
4948+
4949+
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4950+
stop_sync_thread(mddev, true, false);
4951+
}
4952+
EXPORT_SYMBOL_GPL(md_frozen_sync_thread);
4953+
4954+
void md_unfrozen_sync_thread(struct mddev *mddev)
4955+
{
4956+
lockdep_assert_held(&mddev->reconfig_mutex);
4957+
4958+
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
4959+
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
4960+
md_wakeup_thread(mddev->thread);
4961+
sysfs_notify_dirent_safe(mddev->sysfs_action);
4962+
}
4963+
EXPORT_SYMBOL_GPL(md_unfrozen_sync_thread);
4964+
49454965
static void idle_sync_thread(struct mddev *mddev)
49464966
{
49474967
mutex_lock(&mddev->sync_mutex);
@@ -6062,7 +6082,10 @@ int md_run(struct mddev *mddev)
60626082
pr_warn("True protection against single-disk failure might be compromised.\n");
60636083
}
60646084

6065-
mddev->recovery = 0;
6085+
/* dm-raid expect sync_thread to be frozen until resume */
6086+
if (mddev->gendisk)
6087+
mddev->recovery = 0;
6088+
60666089
/* may be over-ridden by personality */
60676090
mddev->resync_max_sectors = mddev->dev_sectors;
60686091

@@ -6344,7 +6367,6 @@ static void md_clean(struct mddev *mddev)
63446367

63456368
static void __md_stop_writes(struct mddev *mddev)
63466369
{
6347-
stop_sync_thread(mddev, true, false);
63486370
del_timer_sync(&mddev->safemode_timer);
63496371

63506372
if (mddev->pers && mddev->pers->quiesce) {
@@ -6369,6 +6391,8 @@ static void __md_stop_writes(struct mddev *mddev)
63696391
void md_stop_writes(struct mddev *mddev)
63706392
{
63716393
mddev_lock_nointr(mddev);
6394+
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
6395+
stop_sync_thread(mddev, true, false);
63726396
__md_stop_writes(mddev);
63736397
mddev_unlock(mddev);
63746398
}
@@ -8712,6 +8736,23 @@ void md_account_bio(struct mddev *mddev, struct bio **bio)
87128736
}
87138737
EXPORT_SYMBOL_GPL(md_account_bio);
87148738

8739+
void md_free_cloned_bio(struct bio *bio)
8740+
{
8741+
struct md_io_clone *md_io_clone = bio->bi_private;
8742+
struct bio *orig_bio = md_io_clone->orig_bio;
8743+
struct mddev *mddev = md_io_clone->mddev;
8744+
8745+
if (bio->bi_status && !orig_bio->bi_status)
8746+
orig_bio->bi_status = bio->bi_status;
8747+
8748+
if (md_io_clone->start_time)
8749+
bio_end_io_acct(orig_bio, md_io_clone->start_time);
8750+
8751+
bio_put(bio);
8752+
percpu_ref_put(&mddev->active_io);
8753+
}
8754+
EXPORT_SYMBOL_GPL(md_free_cloned_bio);
8755+
87158756
/* md_allow_write(mddev)
87168757
* Calling this ensures that the array is marked 'active' so that writes
87178758
* may proceed without blocking. It is important to call this before

drivers/md/md.h

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -569,6 +569,37 @@ enum recovery_flags {
569569
MD_RESYNCING_REMOTE, /* remote node is running resync thread */
570570
};
571571

572+
enum md_ro_state {
573+
MD_RDWR,
574+
MD_RDONLY,
575+
MD_AUTO_READ,
576+
MD_MAX_STATE
577+
};
578+
579+
static inline bool md_is_rdwr(struct mddev *mddev)
580+
{
581+
return (mddev->ro == MD_RDWR);
582+
}
583+
584+
static inline bool reshape_interrupted(struct mddev *mddev)
585+
{
586+
/* reshape never start */
587+
if (mddev->reshape_position == MaxSector)
588+
return false;
589+
590+
/* interrupted */
591+
if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
592+
return true;
593+
594+
/* running reshape will be interrupted soon. */
595+
if (test_bit(MD_RECOVERY_WAIT, &mddev->recovery) ||
596+
test_bit(MD_RECOVERY_INTR, &mddev->recovery) ||
597+
test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
598+
return true;
599+
600+
return false;
601+
}
602+
572603
static inline int __must_check mddev_lock(struct mddev *mddev)
573604
{
574605
return mutex_lock_interruptible(&mddev->reconfig_mutex);
@@ -628,6 +659,7 @@ struct md_personality
628659
int (*start_reshape) (struct mddev *mddev);
629660
void (*finish_reshape) (struct mddev *mddev);
630661
void (*update_reshape_pos) (struct mddev *mddev);
662+
void (*prepare_suspend) (struct mddev *mddev);
631663
/* quiesce suspends or resumes internal processing.
632664
* 1 - stop new actions and wait for action io to complete
633665
* 0 - return to normal behaviour
@@ -761,6 +793,7 @@ extern void md_finish_reshape(struct mddev *mddev);
761793
void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
762794
struct bio *bio, sector_t start, sector_t size);
763795
void md_account_bio(struct mddev *mddev, struct bio **bio);
796+
void md_free_cloned_bio(struct bio *bio);
764797

765798
extern bool __must_check md_flush_request(struct mddev *mddev, struct bio *bio);
766799
extern void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
@@ -789,9 +822,12 @@ extern void md_stop_writes(struct mddev *mddev);
789822
extern int md_rdev_init(struct md_rdev *rdev);
790823
extern void md_rdev_clear(struct md_rdev *rdev);
791824

792-
extern void md_handle_request(struct mddev *mddev, struct bio *bio);
825+
extern bool md_handle_request(struct mddev *mddev, struct bio *bio);
793826
extern int mddev_suspend(struct mddev *mddev, bool interruptible);
794827
extern void mddev_resume(struct mddev *mddev);
828+
extern void md_idle_sync_thread(struct mddev *mddev);
829+
extern void md_frozen_sync_thread(struct mddev *mddev);
830+
extern void md_unfrozen_sync_thread(struct mddev *mddev);
795831

796832
extern void md_reload_sb(struct mddev *mddev, int raid_disk);
797833
extern void md_update_sb(struct mddev *mddev, int force);

0 commit comments

Comments
 (0)