Skip to content

Commit ba58f57

Browse files
YuKuai-huaweiliu-song-6
authored andcommitted
md/raid1: factor out the code to manage sequential IO
There is no functional change for now, make read_balance() cleaner and prepare to fix problems and refactor the handler of sequential IO. Co-developed-by: Paul Luse <[email protected]> Signed-off-by: Paul Luse <[email protected]> Signed-off-by: Yu Kuai <[email protected]> Reviewed-by: Xiao Ni <[email protected]> Signed-off-by: Song Liu <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 9f3ced7 commit ba58f57

File tree

1 file changed

+37
-34
lines changed

1 file changed

+37
-34
lines changed

drivers/md/raid1.c

Lines changed: 37 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -705,6 +705,31 @@ static int choose_slow_rdev(struct r1conf *conf, struct r1bio *r1_bio,
705705
return bb_disk;
706706
}
707707

708+
static bool is_sequential(struct r1conf *conf, int disk, struct r1bio *r1_bio)
709+
{
710+
/* TODO: address issues with this check and concurrency. */
711+
return conf->mirrors[disk].next_seq_sect == r1_bio->sector ||
712+
conf->mirrors[disk].head_position == r1_bio->sector;
713+
}
714+
715+
/*
716+
* If buffered sequential IO size exceeds optimal iosize, check if there is idle
717+
* disk. If yes, choose the idle disk.
718+
*/
719+
static bool should_choose_next(struct r1conf *conf, int disk)
720+
{
721+
struct raid1_info *mirror = &conf->mirrors[disk];
722+
int opt_iosize;
723+
724+
if (!test_bit(Nonrot, &mirror->rdev->flags))
725+
return false;
726+
727+
opt_iosize = bdev_io_opt(mirror->rdev->bdev) >> 9;
728+
return opt_iosize > 0 && mirror->seq_start != MaxSector &&
729+
mirror->next_seq_sect > opt_iosize &&
730+
mirror->next_seq_sect - opt_iosize >= mirror->seq_start;
731+
}
732+
708733
/*
709734
* This routine returns the disk from which the requested read should
710735
* be done. There is a per-array 'next expected sequential IO' sector
@@ -768,43 +793,21 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
768793
pending = atomic_read(&rdev->nr_pending);
769794
dist = abs(this_sector - conf->mirrors[disk].head_position);
770795
/* Don't change to another disk for sequential reads */
771-
if (conf->mirrors[disk].next_seq_sect == this_sector
772-
|| dist == 0) {
773-
int opt_iosize = bdev_io_opt(rdev->bdev) >> 9;
774-
struct raid1_info *mirror = &conf->mirrors[disk];
775-
776-
/*
777-
* If buffered sequential IO size exceeds optimal
778-
* iosize, check if there is idle disk. If yes, choose
779-
* the idle disk. read_balance could already choose an
780-
* idle disk before noticing it's a sequential IO in
781-
* this disk. This doesn't matter because this disk
782-
* will idle, next time it will be utilized after the
783-
* first disk has IO size exceeds optimal iosize. In
784-
* this way, iosize of the first disk will be optimal
785-
* iosize at least. iosize of the second disk might be
786-
* small, but not a big deal since when the second disk
787-
* starts IO, the first disk is likely still busy.
788-
*/
789-
if (test_bit(Nonrot, &rdev->flags) && opt_iosize > 0 &&
790-
mirror->seq_start != MaxSector &&
791-
mirror->next_seq_sect > opt_iosize &&
792-
mirror->next_seq_sect - opt_iosize >=
793-
mirror->seq_start) {
794-
/*
795-
* Add 'pending' to avoid choosing this disk if
796-
* there is other idle disk.
797-
*/
798-
pending++;
799-
/*
800-
* If there is no other idle disk, this disk
801-
* will be chosen.
802-
*/
803-
sequential_disk = disk;
804-
} else {
796+
if (is_sequential(conf, disk, r1_bio)) {
797+
if (!should_choose_next(conf, disk)) {
805798
best_disk = disk;
806799
break;
807800
}
801+
/*
802+
* Add 'pending' to avoid choosing this disk if
803+
* there is other idle disk.
804+
*/
805+
pending++;
806+
/*
807+
* If there is no other idle disk, this disk
808+
* will be chosen.
809+
*/
810+
sequential_disk = disk;
808811
}
809812

810813
if (min_pending > pending) {

0 commit comments

Comments
 (0)