Skip to content

Commit 71f2c82

Browse files
chaseyuJaegeuk Kim
authored andcommitted
f2fs: multidevice: support direct IO
Commit 3c62be1 ("f2fs: support multiple devices") missed to support direct IO for multiple device feature, this patch adds to support the missing part of multidevice feature. In addition, for multiple device image, we should be aware of any issued direct write IO rather than just buffered write IO, so that fsync and syncfs can issue a preflush command to the device where direct write IO goes, to persist user data for posix compliant. Signed-off-by: Chao Yu <[email protected]> Signed-off-by: Jaegeuk Kim <[email protected]>
1 parent 6691d94 commit 71f2c82

File tree

5 files changed

+120
-25
lines changed

5 files changed

+120
-25
lines changed

fs/f2fs/data.c

Lines changed: 54 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1465,10 +1465,15 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
14651465
struct extent_info ei = {0, };
14661466
block_t blkaddr;
14671467
unsigned int start_pgofs;
1468+
int bidx = 0;
14681469

14691470
if (!maxblocks)
14701471
return 0;
14711472

1473+
map->m_bdev = inode->i_sb->s_bdev;
1474+
map->m_multidev_dio =
1475+
f2fs_allow_multi_device_dio(F2FS_I_SB(inode), flag);
1476+
14721477
map->m_len = 0;
14731478
map->m_flags = 0;
14741479

@@ -1491,6 +1496,21 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
14911496
if (flag == F2FS_GET_BLOCK_DIO)
14921497
f2fs_wait_on_block_writeback_range(inode,
14931498
map->m_pblk, map->m_len);
1499+
1500+
if (map->m_multidev_dio) {
1501+
block_t blk_addr = map->m_pblk;
1502+
1503+
bidx = f2fs_target_device_index(sbi, map->m_pblk);
1504+
1505+
map->m_bdev = FDEV(bidx).bdev;
1506+
map->m_pblk -= FDEV(bidx).start_blk;
1507+
map->m_len = min(map->m_len,
1508+
FDEV(bidx).end_blk + 1 - map->m_pblk);
1509+
1510+
if (map->m_may_create)
1511+
f2fs_update_device_state(sbi, inode->i_ino,
1512+
blk_addr, map->m_len);
1513+
}
14941514
goto out;
14951515
}
14961516

@@ -1609,6 +1629,9 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
16091629
if (flag == F2FS_GET_BLOCK_PRE_AIO)
16101630
goto skip;
16111631

1632+
if (map->m_multidev_dio)
1633+
bidx = f2fs_target_device_index(sbi, blkaddr);
1634+
16121635
if (map->m_len == 0) {
16131636
/* preallocated unwritten block should be mapped for fiemap. */
16141637
if (blkaddr == NEW_ADDR)
@@ -1617,10 +1640,15 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
16171640

16181641
map->m_pblk = blkaddr;
16191642
map->m_len = 1;
1643+
1644+
if (map->m_multidev_dio)
1645+
map->m_bdev = FDEV(bidx).bdev;
16201646
} else if ((map->m_pblk != NEW_ADDR &&
16211647
blkaddr == (map->m_pblk + ofs)) ||
16221648
(map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) ||
16231649
flag == F2FS_GET_BLOCK_PRE_DIO) {
1650+
if (map->m_multidev_dio && map->m_bdev != FDEV(bidx).bdev)
1651+
goto sync_out;
16241652
ofs++;
16251653
map->m_len++;
16261654
} else {
@@ -1673,11 +1701,31 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
16731701

16741702
sync_out:
16751703

1676-
/* for hardware encryption, but to avoid potential issue in future */
1677-
if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED)
1704+
if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED) {
1705+
/*
1706+
* for hardware encryption, but to avoid potential issue
1707+
* in future
1708+
*/
16781709
f2fs_wait_on_block_writeback_range(inode,
16791710
map->m_pblk, map->m_len);
16801711

1712+
if (map->m_multidev_dio) {
1713+
block_t blk_addr = map->m_pblk;
1714+
1715+
bidx = f2fs_target_device_index(sbi, map->m_pblk);
1716+
1717+
map->m_bdev = FDEV(bidx).bdev;
1718+
map->m_pblk -= FDEV(bidx).start_blk;
1719+
1720+
if (map->m_may_create)
1721+
f2fs_update_device_state(sbi, inode->i_ino,
1722+
blk_addr, map->m_len);
1723+
1724+
f2fs_bug_on(sbi, blk_addr + map->m_len >
1725+
FDEV(bidx).end_blk + 1);
1726+
}
1727+
}
1728+
16811729
if (flag == F2FS_GET_BLOCK_PRECACHE) {
16821730
if (map->m_flags & F2FS_MAP_MAPPED) {
16831731
unsigned int ofs = start_pgofs - map->m_lblk;
@@ -1696,7 +1744,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
16961744
f2fs_balance_fs(sbi, dn.node_changed);
16971745
}
16981746
out:
1699-
trace_f2fs_map_blocks(inode, map, err);
1747+
trace_f2fs_map_blocks(inode, map, create, flag, err);
17001748
return err;
17011749
}
17021750

@@ -1755,6 +1803,9 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
17551803
map_bh(bh, inode->i_sb, map.m_pblk);
17561804
bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
17571805
bh->b_size = blks_to_bytes(inode, map.m_len);
1806+
1807+
if (map.m_multidev_dio)
1808+
bh->b_bdev = map.m_bdev;
17581809
}
17591810
return err;
17601811
}

fs/f2fs/f2fs.h

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -620,6 +620,7 @@ struct extent_tree {
620620
F2FS_MAP_UNWRITTEN)
621621

622622
struct f2fs_map_blocks {
623+
struct block_device *m_bdev; /* for multi-device dio */
623624
block_t m_pblk;
624625
block_t m_lblk;
625626
unsigned int m_len;
@@ -628,6 +629,7 @@ struct f2fs_map_blocks {
628629
pgoff_t *m_next_extent; /* point to next possible extent */
629630
int m_seg_type;
630631
bool m_may_create; /* indicate it is from write path */
632+
bool m_multidev_dio; /* indicate it allows multi-device dio */
631633
};
632634

633635
/* for flag in get_data_block */
@@ -1733,12 +1735,15 @@ struct f2fs_sb_info {
17331735

17341736
/* For shrinker support */
17351737
struct list_head s_list;
1738+
struct mutex umount_mutex;
1739+
unsigned int shrinker_run_no;
1740+
1741+
/* For multi devices */
17361742
int s_ndevs; /* number of devices */
17371743
struct f2fs_dev_info *devs; /* for device list */
17381744
unsigned int dirty_device; /* for checkpoint data flush */
17391745
spinlock_t dev_lock; /* protect dirty_device */
1740-
struct mutex umount_mutex;
1741-
unsigned int shrinker_run_no;
1746+
bool aligned_blksize; /* all devices has the same logical blksize */
17421747

17431748
/* For write statistics */
17441749
u64 sectors_written_start;
@@ -3500,6 +3505,8 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
35003505
block_t old_blkaddr, block_t *new_blkaddr,
35013506
struct f2fs_summary *sum, int type,
35023507
struct f2fs_io_info *fio);
3508+
void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino,
3509+
block_t blkaddr, unsigned int blkcnt);
35033510
void f2fs_wait_on_page_writeback(struct page *page,
35043511
enum page_type type, bool ordered, bool locked);
35053512
void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr);
@@ -4320,6 +4327,16 @@ static inline int block_unaligned_IO(struct inode *inode,
43204327
return align & blocksize_mask;
43214328
}
43224329

4330+
static inline bool f2fs_allow_multi_device_dio(struct f2fs_sb_info *sbi,
4331+
int flag)
4332+
{
4333+
if (!f2fs_is_multi_device(sbi))
4334+
return false;
4335+
if (flag != F2FS_GET_BLOCK_DIO)
4336+
return false;
4337+
return sbi->aligned_blksize;
4338+
}
4339+
43234340
static inline bool f2fs_force_buffered_io(struct inode *inode,
43244341
struct kiocb *iocb, struct iov_iter *iter)
43254342
{
@@ -4328,7 +4345,9 @@ static inline bool f2fs_force_buffered_io(struct inode *inode,
43284345

43294346
if (f2fs_post_read_required(inode))
43304347
return true;
4331-
if (f2fs_is_multi_device(sbi))
4348+
4349+
/* disallow direct IO if any of devices has unaligned blksize */
4350+
if (f2fs_is_multi_device(sbi) && !sbi->aligned_blksize)
43324351
return true;
43334352
/*
43344353
* for blkzoned device, fallback direct IO to buffered IO, so

fs/f2fs/segment.c

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3520,24 +3520,30 @@ void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
35203520
up_read(&SM_I(sbi)->curseg_lock);
35213521
}
35223522

3523-
static void update_device_state(struct f2fs_io_info *fio)
3523+
void f2fs_update_device_state(struct f2fs_sb_info *sbi, nid_t ino,
3524+
block_t blkaddr, unsigned int blkcnt)
35243525
{
3525-
struct f2fs_sb_info *sbi = fio->sbi;
3526-
unsigned int devidx;
3527-
35283526
if (!f2fs_is_multi_device(sbi))
35293527
return;
35303528

3531-
devidx = f2fs_target_device_index(sbi, fio->new_blkaddr);
3529+
while (1) {
3530+
unsigned int devidx = f2fs_target_device_index(sbi, blkaddr);
3531+
unsigned int blks = FDEV(devidx).end_blk - blkaddr + 1;
35323532

3533-
/* update device state for fsync */
3534-
f2fs_set_dirty_device(sbi, fio->ino, devidx, FLUSH_INO);
3533+
/* update device state for fsync */
3534+
f2fs_set_dirty_device(sbi, ino, devidx, FLUSH_INO);
35353535

3536-
/* update device state for checkpoint */
3537-
if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) {
3538-
spin_lock(&sbi->dev_lock);
3539-
f2fs_set_bit(devidx, (char *)&sbi->dirty_device);
3540-
spin_unlock(&sbi->dev_lock);
3536+
/* update device state for checkpoint */
3537+
if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) {
3538+
spin_lock(&sbi->dev_lock);
3539+
f2fs_set_bit(devidx, (char *)&sbi->dirty_device);
3540+
spin_unlock(&sbi->dev_lock);
3541+
}
3542+
3543+
if (blkcnt <= blks)
3544+
break;
3545+
blkcnt -= blks;
3546+
blkaddr += blks;
35413547
}
35423548
}
35433549

@@ -3564,7 +3570,7 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
35643570
goto reallocate;
35653571
}
35663572

3567-
update_device_state(fio);
3573+
f2fs_update_device_state(fio->sbi, fio->ino, fio->new_blkaddr, 1);
35683574

35693575
if (keep_order)
35703576
up_read(&fio->sbi->io_order_lock);
@@ -3653,7 +3659,8 @@ int f2fs_inplace_write_data(struct f2fs_io_info *fio)
36533659
else
36543660
err = f2fs_submit_page_bio(fio);
36553661
if (!err) {
3656-
update_device_state(fio);
3662+
f2fs_update_device_state(fio->sbi, fio->ino,
3663+
fio->new_blkaddr, 1);
36573664
f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
36583665
}
36593666

fs/f2fs/super.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3757,6 +3757,7 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
37573757
{
37583758
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
37593759
unsigned int max_devices = MAX_DEVICES;
3760+
unsigned int logical_blksize;
37603761
int i;
37613762

37623763
/* Initialize single device information */
@@ -3777,6 +3778,9 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
37773778
if (!sbi->devs)
37783779
return -ENOMEM;
37793780

3781+
logical_blksize = bdev_logical_block_size(sbi->sb->s_bdev);
3782+
sbi->aligned_blksize = true;
3783+
37803784
for (i = 0; i < max_devices; i++) {
37813785

37823786
if (i > 0 && !RDEV(i).path[0])
@@ -3813,6 +3817,9 @@ static int f2fs_scan_devices(struct f2fs_sb_info *sbi)
38133817
/* to release errored devices */
38143818
sbi->s_ndevs = i + 1;
38153819

3820+
if (logical_blksize != bdev_logical_block_size(FDEV(i).bdev))
3821+
sbi->aligned_blksize = false;
3822+
38163823
#ifdef CONFIG_BLK_DEV_ZONED
38173824
if (bdev_zoned_model(FDEV(i).bdev) == BLK_ZONED_HM &&
38183825
!f2fs_sb_has_blkzoned(sbi)) {

include/trace/events/f2fs.h

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -570,9 +570,10 @@ TRACE_EVENT(f2fs_file_write_iter,
570570
);
571571

572572
TRACE_EVENT(f2fs_map_blocks,
573-
TP_PROTO(struct inode *inode, struct f2fs_map_blocks *map, int ret),
573+
TP_PROTO(struct inode *inode, struct f2fs_map_blocks *map,
574+
int create, int flag, int ret),
574575

575-
TP_ARGS(inode, map, ret),
576+
TP_ARGS(inode, map, create, flag, ret),
576577

577578
TP_STRUCT__entry(
578579
__field(dev_t, dev)
@@ -583,31 +584,41 @@ TRACE_EVENT(f2fs_map_blocks,
583584
__field(unsigned int, m_flags)
584585
__field(int, m_seg_type)
585586
__field(bool, m_may_create)
587+
__field(bool, m_multidev_dio)
588+
__field(int, create)
589+
__field(int, flag)
586590
__field(int, ret)
587591
),
588592

589593
TP_fast_assign(
590-
__entry->dev = inode->i_sb->s_dev;
594+
__entry->dev = map->m_bdev->bd_dev;
591595
__entry->ino = inode->i_ino;
592596
__entry->m_lblk = map->m_lblk;
593597
__entry->m_pblk = map->m_pblk;
594598
__entry->m_len = map->m_len;
595599
__entry->m_flags = map->m_flags;
596600
__entry->m_seg_type = map->m_seg_type;
597601
__entry->m_may_create = map->m_may_create;
602+
__entry->m_multidev_dio = map->m_multidev_dio;
603+
__entry->create = create;
604+
__entry->flag = flag;
598605
__entry->ret = ret;
599606
),
600607

601608
TP_printk("dev = (%d,%d), ino = %lu, file offset = %llu, "
602-
"start blkaddr = 0x%llx, len = 0x%llx, flags = %u,"
603-
"seg_type = %d, may_create = %d, err = %d",
609+
"start blkaddr = 0x%llx, len = 0x%llx, flags = %u, "
610+
"seg_type = %d, may_create = %d, multidevice = %d, "
611+
"create = %d, flag = %d, err = %d",
604612
show_dev_ino(__entry),
605613
(unsigned long long)__entry->m_lblk,
606614
(unsigned long long)__entry->m_pblk,
607615
(unsigned long long)__entry->m_len,
608616
__entry->m_flags,
609617
__entry->m_seg_type,
610618
__entry->m_may_create,
619+
__entry->m_multidev_dio,
620+
__entry->create,
621+
__entry->flag,
611622
__entry->ret)
612623
);
613624

0 commit comments

Comments
 (0)