Skip to content

Commit f63f173

Browse files
Christoph Hellwigliu-song-6
authored andcommitted
md/raid5: use the atomic queue limit update APIs
Build the queue limits outside the queue and apply them using queue_limits_set. To make the code more obvious also split the queue limits handling into separate helpers. Signed-off-by: Christoph Hellwig <[email protected]> Reviewed--by: Song Liu <[email protected]> Tested-by: Song Liu <[email protected]> Signed-off-by: Song Liu <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 97894f7 commit f63f173

File tree

1 file changed

+65
-65
lines changed

1 file changed

+65
-65
lines changed

drivers/md/raid5.c

Lines changed: 65 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -7691,10 +7691,65 @@ static int only_parity(int raid_disk, int algo, int raid_disks, int max_degraded
76917691
return 0;
76927692
}
76937693

7694-
static void raid5_set_io_opt(struct r5conf *conf)
7694+
static int raid5_set_limits(struct mddev *mddev)
76957695
{
7696-
blk_queue_io_opt(conf->mddev->queue, (conf->chunk_sectors << 9) *
7697-
(conf->raid_disks - conf->max_degraded));
7696+
struct r5conf *conf = mddev->private;
7697+
struct queue_limits lim;
7698+
int data_disks, stripe;
7699+
struct md_rdev *rdev;
7700+
7701+
/*
7702+
* The read-ahead size must cover two whole stripes, which is
7703+
* 2 * (datadisks) * chunksize where 'n' is the number of raid devices.
7704+
*/
7705+
data_disks = conf->previous_raid_disks - conf->max_degraded;
7706+
7707+
/*
7708+
* We can only discard a whole stripe. It doesn't make sense to
7709+
* discard data disk but write parity disk
7710+
*/
7711+
stripe = roundup_pow_of_two(data_disks * (mddev->chunk_sectors << 9));
7712+
7713+
blk_set_stacking_limits(&lim);
7714+
lim.io_min = mddev->chunk_sectors << 9;
7715+
lim.io_opt = lim.io_min * (conf->raid_disks - conf->max_degraded);
7716+
lim.raid_partial_stripes_expensive = 1;
7717+
lim.discard_granularity = stripe;
7718+
lim.max_write_zeroes_sectors = 0;
7719+
mddev_stack_rdev_limits(mddev, &lim);
7720+
rdev_for_each(rdev, mddev)
7721+
queue_limits_stack_bdev(&lim, rdev->bdev, rdev->new_data_offset,
7722+
mddev->gendisk->disk_name);
7723+
7724+
/*
7725+
* Zeroing is required for discard, otherwise data could be lost.
7726+
*
7727+
* Consider a scenario: discard a stripe (the stripe could be
7728+
* inconsistent if discard_zeroes_data is 0); write one disk of the
7729+
* stripe (the stripe could be inconsistent again depending on which
7730+
* disks are used to calculate parity); the disk is broken; The stripe
7731+
* data of this disk is lost.
7732+
*
7733+
* We only allow DISCARD if the sysadmin has confirmed that only safe
7734+
* devices are in use by setting a module parameter. A better idea
7735+
* might be to turn DISCARD into WRITE_ZEROES requests, as that is
7736+
* required to be safe.
7737+
*/
7738+
if (!devices_handle_discard_safely ||
7739+
lim.max_discard_sectors < (stripe >> 9) ||
7740+
lim.discard_granularity < stripe)
7741+
lim.max_hw_discard_sectors = 0;
7742+
7743+
/*
7744+
* Requests require having a bitmap for each stripe.
7745+
* Limit the max sectors based on this.
7746+
*/
7747+
lim.max_hw_sectors = RAID5_MAX_REQ_STRIPES << RAID5_STRIPE_SHIFT(conf);
7748+
7749+
/* No restrictions on the number of segments in the request */
7750+
lim.max_segments = USHRT_MAX;
7751+
7752+
return queue_limits_set(mddev->queue, &lim);
76987753
}
76997754

77007755
static int raid5_run(struct mddev *mddev)
@@ -7707,6 +7762,7 @@ static int raid5_run(struct mddev *mddev)
77077762
int i;
77087763
long long min_offset_diff = 0;
77097764
int first = 1;
7765+
int ret = -EIO;
77107766

77117767
if (mddev->recovery_cp != MaxSector)
77127768
pr_notice("md/raid:%s: not clean -- starting background reconstruction\n",
@@ -7960,65 +8016,9 @@ static int raid5_run(struct mddev *mddev)
79608016
md_set_array_sectors(mddev, raid5_size(mddev, 0, 0));
79618017

79628018
if (!mddev_is_dm(mddev)) {
7963-
int chunk_size;
7964-
/* read-ahead size must cover two whole stripes, which
7965-
* is 2 * (datadisks) * chunksize where 'n' is the
7966-
* number of raid devices
7967-
*/
7968-
int data_disks = conf->previous_raid_disks - conf->max_degraded;
7969-
int stripe = data_disks *
7970-
((mddev->chunk_sectors << 9) / PAGE_SIZE);
7971-
7972-
chunk_size = mddev->chunk_sectors << 9;
7973-
blk_queue_io_min(mddev->queue, chunk_size);
7974-
raid5_set_io_opt(conf);
7975-
mddev->queue->limits.raid_partial_stripes_expensive = 1;
7976-
/*
7977-
* We can only discard a whole stripe. It doesn't make sense to
7978-
* discard data disk but write parity disk
7979-
*/
7980-
stripe = stripe * PAGE_SIZE;
7981-
stripe = roundup_pow_of_two(stripe);
7982-
mddev->queue->limits.discard_granularity = stripe;
7983-
7984-
blk_queue_max_write_zeroes_sectors(mddev->queue, 0);
7985-
7986-
rdev_for_each(rdev, mddev) {
7987-
disk_stack_limits(mddev->gendisk, rdev->bdev,
7988-
rdev->data_offset << 9);
7989-
disk_stack_limits(mddev->gendisk, rdev->bdev,
7990-
rdev->new_data_offset << 9);
7991-
}
7992-
7993-
/*
7994-
* zeroing is required, otherwise data
7995-
* could be lost. Consider a scenario: discard a stripe
7996-
* (the stripe could be inconsistent if
7997-
* discard_zeroes_data is 0); write one disk of the
7998-
* stripe (the stripe could be inconsistent again
7999-
* depending on which disks are used to calculate
8000-
* parity); the disk is broken; The stripe data of this
8001-
* disk is lost.
8002-
*
8003-
* We only allow DISCARD if the sysadmin has confirmed that
8004-
* only safe devices are in use by setting a module parameter.
8005-
* A better idea might be to turn DISCARD into WRITE_ZEROES
8006-
* requests, as that is required to be safe.
8007-
*/
8008-
if (!devices_handle_discard_safely ||
8009-
mddev->queue->limits.max_discard_sectors < (stripe >> 9) ||
8010-
mddev->queue->limits.discard_granularity < stripe)
8011-
blk_queue_max_discard_sectors(mddev->queue, 0);
8012-
8013-
/*
8014-
* Requests require having a bitmap for each stripe.
8015-
* Limit the max sectors based on this.
8016-
*/
8017-
blk_queue_max_hw_sectors(mddev->queue,
8018-
RAID5_MAX_REQ_STRIPES << RAID5_STRIPE_SHIFT(conf));
8019-
8020-
/* No restrictions on the number of segments in the request */
8021-
blk_queue_max_segments(mddev->queue, USHRT_MAX);
8019+
ret = raid5_set_limits(mddev);
8020+
if (ret)
8021+
goto abort;
80228022
}
80238023

80248024
if (log_init(conf, journal_dev, raid5_has_ppl(conf)))
@@ -8031,7 +8031,7 @@ static int raid5_run(struct mddev *mddev)
80318031
free_conf(conf);
80328032
mddev->private = NULL;
80338033
pr_warn("md/raid:%s: failed to run raid set.\n", mdname(mddev));
8034-
return -EIO;
8034+
return ret;
80358035
}
80368036

80378037
static void raid5_free(struct mddev *mddev, void *priv)
@@ -8563,8 +8563,8 @@ static void end_reshape(struct r5conf *conf)
85638563
spin_unlock_irq(&conf->device_lock);
85648564
wake_up(&conf->wait_for_overlap);
85658565

8566-
if (!mddev_is_dm(conf->mddev))
8567-
raid5_set_io_opt(conf);
8566+
mddev_update_io_opt(conf->mddev,
8567+
conf->raid_disks - conf->max_degraded);
85688568
}
85698569
}
85708570

0 commit comments

Comments
 (0)