@@ -7691,10 +7691,65 @@ static int only_parity(int raid_disk, int algo, int raid_disks, int max_degraded
76917691 return 0 ;
76927692}
76937693
7694- static void raid5_set_io_opt (struct r5conf * conf )
7694+ static int raid5_set_limits (struct mddev * mddev )
76957695{
7696- blk_queue_io_opt (conf -> mddev -> queue , (conf -> chunk_sectors << 9 ) *
7697- (conf -> raid_disks - conf -> max_degraded ));
7696+ struct r5conf * conf = mddev -> private ;
7697+ struct queue_limits lim ;
7698+ int data_disks , stripe ;
7699+ struct md_rdev * rdev ;
7700+
7701+ /*
7702+ * The read-ahead size must cover two whole stripes, which is
7703+ * 2 * (datadisks) * chunksize where 'n' is the number of raid devices.
7704+ */
7705+ data_disks = conf -> previous_raid_disks - conf -> max_degraded ;
7706+
7707+ /*
7708+ * We can only discard a whole stripe. It doesn't make sense to
7709+ * discard data disk but write parity disk
7710+ */
7711+ stripe = roundup_pow_of_two (data_disks * (mddev -> chunk_sectors << 9 ));
7712+
7713+ blk_set_stacking_limits (& lim );
7714+ lim .io_min = mddev -> chunk_sectors << 9 ;
7715+ lim .io_opt = lim .io_min * (conf -> raid_disks - conf -> max_degraded );
7716+ lim .raid_partial_stripes_expensive = 1 ;
7717+ lim .discard_granularity = stripe ;
7718+ lim .max_write_zeroes_sectors = 0 ;
7719+ mddev_stack_rdev_limits (mddev , & lim );
7720+ rdev_for_each (rdev , mddev )
7721+ queue_limits_stack_bdev (& lim , rdev -> bdev , rdev -> new_data_offset ,
7722+ mddev -> gendisk -> disk_name );
7723+
7724+ /*
7725+ * Zeroing is required for discard, otherwise data could be lost.
7726+ *
7727+ * Consider a scenario: discard a stripe (the stripe could be
7728+ * inconsistent if discard_zeroes_data is 0); write one disk of the
7729+ * stripe (the stripe could be inconsistent again depending on which
7730+ * disks are used to calculate parity); the disk is broken; The stripe
7731+ * data of this disk is lost.
7732+ *
7733+ * We only allow DISCARD if the sysadmin has confirmed that only safe
7734+ * devices are in use by setting a module parameter. A better idea
7735+ * might be to turn DISCARD into WRITE_ZEROES requests, as that is
7736+ * required to be safe.
7737+ */
7738+ if (!devices_handle_discard_safely ||
7739+ lim .max_discard_sectors < (stripe >> 9 ) ||
7740+ lim .discard_granularity < stripe )
7741+ lim .max_hw_discard_sectors = 0 ;
7742+
7743+ /*
7744+ * Requests require having a bitmap for each stripe.
7745+ * Limit the max sectors based on this.
7746+ */
7747+ lim .max_hw_sectors = RAID5_MAX_REQ_STRIPES << RAID5_STRIPE_SHIFT (conf );
7748+
7749+ /* No restrictions on the number of segments in the request */
7750+ lim .max_segments = USHRT_MAX ;
7751+
7752+ return queue_limits_set (mddev -> queue , & lim );
76987753}
76997754
77007755static int raid5_run (struct mddev * mddev )
@@ -7707,6 +7762,7 @@ static int raid5_run(struct mddev *mddev)
77077762 int i ;
77087763 long long min_offset_diff = 0 ;
77097764 int first = 1 ;
7765+ int ret = - EIO ;
77107766
77117767 if (mddev -> recovery_cp != MaxSector )
77127768 pr_notice ("md/raid:%s: not clean -- starting background reconstruction\n" ,
@@ -7960,65 +8016,9 @@ static int raid5_run(struct mddev *mddev)
79608016 md_set_array_sectors (mddev , raid5_size (mddev , 0 , 0 ));
79618017
79628018 if (!mddev_is_dm (mddev )) {
7963- int chunk_size ;
7964- /* read-ahead size must cover two whole stripes, which
7965- * is 2 * (datadisks) * chunksize where 'n' is the
7966- * number of raid devices
7967- */
7968- int data_disks = conf -> previous_raid_disks - conf -> max_degraded ;
7969- int stripe = data_disks *
7970- ((mddev -> chunk_sectors << 9 ) / PAGE_SIZE );
7971-
7972- chunk_size = mddev -> chunk_sectors << 9 ;
7973- blk_queue_io_min (mddev -> queue , chunk_size );
7974- raid5_set_io_opt (conf );
7975- mddev -> queue -> limits .raid_partial_stripes_expensive = 1 ;
7976- /*
7977- * We can only discard a whole stripe. It doesn't make sense to
7978- * discard data disk but write parity disk
7979- */
7980- stripe = stripe * PAGE_SIZE ;
7981- stripe = roundup_pow_of_two (stripe );
7982- mddev -> queue -> limits .discard_granularity = stripe ;
7983-
7984- blk_queue_max_write_zeroes_sectors (mddev -> queue , 0 );
7985-
7986- rdev_for_each (rdev , mddev ) {
7987- disk_stack_limits (mddev -> gendisk , rdev -> bdev ,
7988- rdev -> data_offset << 9 );
7989- disk_stack_limits (mddev -> gendisk , rdev -> bdev ,
7990- rdev -> new_data_offset << 9 );
7991- }
7992-
7993- /*
7994- * zeroing is required, otherwise data
7995- * could be lost. Consider a scenario: discard a stripe
7996- * (the stripe could be inconsistent if
7997- * discard_zeroes_data is 0); write one disk of the
7998- * stripe (the stripe could be inconsistent again
7999- * depending on which disks are used to calculate
8000- * parity); the disk is broken; The stripe data of this
8001- * disk is lost.
8002- *
8003- * We only allow DISCARD if the sysadmin has confirmed that
8004- * only safe devices are in use by setting a module parameter.
8005- * A better idea might be to turn DISCARD into WRITE_ZEROES
8006- * requests, as that is required to be safe.
8007- */
8008- if (!devices_handle_discard_safely ||
8009- mddev -> queue -> limits .max_discard_sectors < (stripe >> 9 ) ||
8010- mddev -> queue -> limits .discard_granularity < stripe )
8011- blk_queue_max_discard_sectors (mddev -> queue , 0 );
8012-
8013- /*
8014- * Requests require having a bitmap for each stripe.
8015- * Limit the max sectors based on this.
8016- */
8017- blk_queue_max_hw_sectors (mddev -> queue ,
8018- RAID5_MAX_REQ_STRIPES << RAID5_STRIPE_SHIFT (conf ));
8019-
8020- /* No restrictions on the number of segments in the request */
8021- blk_queue_max_segments (mddev -> queue , USHRT_MAX );
8019+ ret = raid5_set_limits (mddev );
8020+ if (ret )
8021+ goto abort ;
80228022 }
80238023
80248024 if (log_init (conf , journal_dev , raid5_has_ppl (conf )))
@@ -8031,7 +8031,7 @@ static int raid5_run(struct mddev *mddev)
80318031 free_conf (conf );
80328032 mddev -> private = NULL ;
80338033 pr_warn ("md/raid:%s: failed to run raid set.\n" , mdname (mddev ));
8034- return - EIO ;
8034+ return ret ;
80358035}
80368036
80378037static void raid5_free (struct mddev * mddev , void * priv )
@@ -8563,8 +8563,8 @@ static void end_reshape(struct r5conf *conf)
85638563 spin_unlock_irq (& conf -> device_lock );
85648564 wake_up (& conf -> wait_for_overlap );
85658565
8566- if (! mddev_is_dm ( conf -> mddev ))
8567- raid5_set_io_opt ( conf );
8566+ mddev_update_io_opt ( conf -> mddev ,
8567+ conf -> raid_disks - conf -> max_degraded );
85688568 }
85698569}
85708570
0 commit comments