@@ -705,6 +705,31 @@ static int choose_slow_rdev(struct r1conf *conf, struct r1bio *r1_bio,
705
705
return bb_disk ;
706
706
}
707
707
708
+ static bool is_sequential (struct r1conf * conf , int disk , struct r1bio * r1_bio )
709
+ {
710
+ /* TODO: address issues with this check and concurrency. */
711
+ return conf -> mirrors [disk ].next_seq_sect == r1_bio -> sector ||
712
+ conf -> mirrors [disk ].head_position == r1_bio -> sector ;
713
+ }
714
+
715
+ /*
716
+ * If buffered sequential IO size exceeds optimal iosize, check if there is idle
717
+ * disk. If yes, choose the idle disk.
718
+ */
719
+ static bool should_choose_next (struct r1conf * conf , int disk )
720
+ {
721
+ struct raid1_info * mirror = & conf -> mirrors [disk ];
722
+ int opt_iosize ;
723
+
724
+ if (!test_bit (Nonrot , & mirror -> rdev -> flags ))
725
+ return false;
726
+
727
+ opt_iosize = bdev_io_opt (mirror -> rdev -> bdev ) >> 9 ;
728
+ return opt_iosize > 0 && mirror -> seq_start != MaxSector &&
729
+ mirror -> next_seq_sect > opt_iosize &&
730
+ mirror -> next_seq_sect - opt_iosize >= mirror -> seq_start ;
731
+ }
732
+
708
733
/*
709
734
* This routine returns the disk from which the requested read should
710
735
* be done. There is a per-array 'next expected sequential IO' sector
@@ -768,43 +793,21 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
768
793
pending = atomic_read (& rdev -> nr_pending );
769
794
dist = abs (this_sector - conf -> mirrors [disk ].head_position );
770
795
/* Don't change to another disk for sequential reads */
771
- if (conf -> mirrors [disk ].next_seq_sect == this_sector
772
- || dist == 0 ) {
773
- int opt_iosize = bdev_io_opt (rdev -> bdev ) >> 9 ;
774
- struct raid1_info * mirror = & conf -> mirrors [disk ];
775
-
776
- /*
777
- * If buffered sequential IO size exceeds optimal
778
- * iosize, check if there is idle disk. If yes, choose
779
- * the idle disk. read_balance could already choose an
780
- * idle disk before noticing it's a sequential IO in
781
- * this disk. This doesn't matter because this disk
782
- * will idle, next time it will be utilized after the
783
- * first disk has IO size exceeds optimal iosize. In
784
- * this way, iosize of the first disk will be optimal
785
- * iosize at least. iosize of the second disk might be
786
- * small, but not a big deal since when the second disk
787
- * starts IO, the first disk is likely still busy.
788
- */
789
- if (test_bit (Nonrot , & rdev -> flags ) && opt_iosize > 0 &&
790
- mirror -> seq_start != MaxSector &&
791
- mirror -> next_seq_sect > opt_iosize &&
792
- mirror -> next_seq_sect - opt_iosize >=
793
- mirror -> seq_start ) {
794
- /*
795
- * Add 'pending' to avoid choosing this disk if
796
- * there is other idle disk.
797
- */
798
- pending ++ ;
799
- /*
800
- * If there is no other idle disk, this disk
801
- * will be chosen.
802
- */
803
- sequential_disk = disk ;
804
- } else {
796
+ if (is_sequential (conf , disk , r1_bio )) {
797
+ if (!should_choose_next (conf , disk )) {
805
798
best_disk = disk ;
806
799
break ;
807
800
}
801
+ /*
802
+ * Add 'pending' to avoid choosing this disk if
803
+ * there is other idle disk.
804
+ */
805
+ pending ++ ;
806
+ /*
807
+ * If there is no other idle disk, this disk
808
+ * will be chosen.
809
+ */
810
+ sequential_disk = disk ;
808
811
}
809
812
810
813
if (min_pending > pending ) {
0 commit comments