@@ -7691,10 +7691,65 @@ static int only_parity(int raid_disk, int algo, int raid_disks, int max_degraded
7691
7691
return 0 ;
7692
7692
}
7693
7693
7694
- static void raid5_set_io_opt (struct r5conf * conf )
7694
+ static int raid5_set_limits (struct mddev * mddev )
7695
7695
{
7696
- blk_queue_io_opt (conf -> mddev -> queue , (conf -> chunk_sectors << 9 ) *
7697
- (conf -> raid_disks - conf -> max_degraded ));
7696
+ struct r5conf * conf = mddev -> private ;
7697
+ struct queue_limits lim ;
7698
+ int data_disks , stripe ;
7699
+ struct md_rdev * rdev ;
7700
+
7701
+ /*
7702
+ * The read-ahead size must cover two whole stripes, which is
7703
+ * 2 * (datadisks) * chunksize where 'n' is the number of raid devices.
7704
+ */
7705
+ data_disks = conf -> previous_raid_disks - conf -> max_degraded ;
7706
+
7707
+ /*
7708
+ * We can only discard a whole stripe. It doesn't make sense to
7709
+ * discard data disk but write parity disk
7710
+ */
7711
+ stripe = roundup_pow_of_two (data_disks * (mddev -> chunk_sectors << 9 ));
7712
+
7713
+ blk_set_stacking_limits (& lim );
7714
+ lim .io_min = mddev -> chunk_sectors << 9 ;
7715
+ lim .io_opt = lim .io_min * (conf -> raid_disks - conf -> max_degraded );
7716
+ lim .raid_partial_stripes_expensive = 1 ;
7717
+ lim .discard_granularity = stripe ;
7718
+ lim .max_write_zeroes_sectors = 0 ;
7719
+ mddev_stack_rdev_limits (mddev , & lim );
7720
+ rdev_for_each (rdev , mddev )
7721
+ queue_limits_stack_bdev (& lim , rdev -> bdev , rdev -> new_data_offset ,
7722
+ mddev -> gendisk -> disk_name );
7723
+
7724
+ /*
7725
+ * Zeroing is required for discard, otherwise data could be lost.
7726
+ *
7727
+ * Consider a scenario: discard a stripe (the stripe could be
7728
+ * inconsistent if discard_zeroes_data is 0); write one disk of the
7729
+ * stripe (the stripe could be inconsistent again depending on which
7730
+ * disks are used to calculate parity); the disk is broken; The stripe
7731
+ * data of this disk is lost.
7732
+ *
7733
+ * We only allow DISCARD if the sysadmin has confirmed that only safe
7734
+ * devices are in use by setting a module parameter. A better idea
7735
+ * might be to turn DISCARD into WRITE_ZEROES requests, as that is
7736
+ * required to be safe.
7737
+ */
7738
+ if (!devices_handle_discard_safely ||
7739
+ lim .max_discard_sectors < (stripe >> 9 ) ||
7740
+ lim .discard_granularity < stripe )
7741
+ lim .max_hw_discard_sectors = 0 ;
7742
+
7743
+ /*
7744
+ * Requests require having a bitmap for each stripe.
7745
+ * Limit the max sectors based on this.
7746
+ */
7747
+ lim .max_hw_sectors = RAID5_MAX_REQ_STRIPES << RAID5_STRIPE_SHIFT (conf );
7748
+
7749
+ /* No restrictions on the number of segments in the request */
7750
+ lim .max_segments = USHRT_MAX ;
7751
+
7752
+ return queue_limits_set (mddev -> queue , & lim );
7698
7753
}
7699
7754
7700
7755
static int raid5_run (struct mddev * mddev )
@@ -7707,6 +7762,7 @@ static int raid5_run(struct mddev *mddev)
7707
7762
int i ;
7708
7763
long long min_offset_diff = 0 ;
7709
7764
int first = 1 ;
7765
+ int ret = - EIO ;
7710
7766
7711
7767
if (mddev -> recovery_cp != MaxSector )
7712
7768
pr_notice ("md/raid:%s: not clean -- starting background reconstruction\n" ,
@@ -7960,65 +8016,9 @@ static int raid5_run(struct mddev *mddev)
7960
8016
md_set_array_sectors (mddev , raid5_size (mddev , 0 , 0 ));
7961
8017
7962
8018
if (!mddev_is_dm (mddev )) {
7963
- int chunk_size ;
7964
- /* read-ahead size must cover two whole stripes, which
7965
- * is 2 * (datadisks) * chunksize where 'n' is the
7966
- * number of raid devices
7967
- */
7968
- int data_disks = conf -> previous_raid_disks - conf -> max_degraded ;
7969
- int stripe = data_disks *
7970
- ((mddev -> chunk_sectors << 9 ) / PAGE_SIZE );
7971
-
7972
- chunk_size = mddev -> chunk_sectors << 9 ;
7973
- blk_queue_io_min (mddev -> queue , chunk_size );
7974
- raid5_set_io_opt (conf );
7975
- mddev -> queue -> limits .raid_partial_stripes_expensive = 1 ;
7976
- /*
7977
- * We can only discard a whole stripe. It doesn't make sense to
7978
- * discard data disk but write parity disk
7979
- */
7980
- stripe = stripe * PAGE_SIZE ;
7981
- stripe = roundup_pow_of_two (stripe );
7982
- mddev -> queue -> limits .discard_granularity = stripe ;
7983
-
7984
- blk_queue_max_write_zeroes_sectors (mddev -> queue , 0 );
7985
-
7986
- rdev_for_each (rdev , mddev ) {
7987
- disk_stack_limits (mddev -> gendisk , rdev -> bdev ,
7988
- rdev -> data_offset << 9 );
7989
- disk_stack_limits (mddev -> gendisk , rdev -> bdev ,
7990
- rdev -> new_data_offset << 9 );
7991
- }
7992
-
7993
- /*
7994
- * zeroing is required, otherwise data
7995
- * could be lost. Consider a scenario: discard a stripe
7996
- * (the stripe could be inconsistent if
7997
- * discard_zeroes_data is 0); write one disk of the
7998
- * stripe (the stripe could be inconsistent again
7999
- * depending on which disks are used to calculate
8000
- * parity); the disk is broken; The stripe data of this
8001
- * disk is lost.
8002
- *
8003
- * We only allow DISCARD if the sysadmin has confirmed that
8004
- * only safe devices are in use by setting a module parameter.
8005
- * A better idea might be to turn DISCARD into WRITE_ZEROES
8006
- * requests, as that is required to be safe.
8007
- */
8008
- if (!devices_handle_discard_safely ||
8009
- mddev -> queue -> limits .max_discard_sectors < (stripe >> 9 ) ||
8010
- mddev -> queue -> limits .discard_granularity < stripe )
8011
- blk_queue_max_discard_sectors (mddev -> queue , 0 );
8012
-
8013
- /*
8014
- * Requests require having a bitmap for each stripe.
8015
- * Limit the max sectors based on this.
8016
- */
8017
- blk_queue_max_hw_sectors (mddev -> queue ,
8018
- RAID5_MAX_REQ_STRIPES << RAID5_STRIPE_SHIFT (conf ));
8019
-
8020
- /* No restrictions on the number of segments in the request */
8021
- blk_queue_max_segments (mddev -> queue , USHRT_MAX );
8019
+ ret = raid5_set_limits (mddev );
8020
+ if (ret )
8021
+ goto abort ;
8022
8022
}
8023
8023
8024
8024
if (log_init (conf , journal_dev , raid5_has_ppl (conf )))
@@ -8031,7 +8031,7 @@ static int raid5_run(struct mddev *mddev)
8031
8031
free_conf (conf );
8032
8032
mddev -> private = NULL ;
8033
8033
pr_warn ("md/raid:%s: failed to run raid set.\n" , mdname (mddev ));
8034
- return - EIO ;
8034
+ return ret ;
8035
8035
}
8036
8036
8037
8037
static void raid5_free (struct mddev * mddev , void * priv )
@@ -8563,8 +8563,8 @@ static void end_reshape(struct r5conf *conf)
8563
8563
spin_unlock_irq (& conf -> device_lock );
8564
8564
wake_up (& conf -> wait_for_overlap );
8565
8565
8566
- if (! mddev_is_dm ( conf -> mddev ))
8567
- raid5_set_io_opt ( conf );
8566
+ mddev_update_io_opt ( conf -> mddev ,
8567
+ conf -> raid_disks - conf -> max_degraded );
8568
8568
}
8569
8569
}
8570
8570
0 commit comments