Skip to content

Commit 5664896

Browse files
committed
Merge tag 'f2fs-for-5.16-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim: "In this cycle, we've applied relatively small number of patches which fix subtle corner cases mainly, while introducing a new mount option to be able to fragment the disk intentionally for performance tests. Enhancements: - add a mount option to fragmente on-disk layout to understand the performance - support direct IO for multi-partitions - add a fault injection of dquot_initialize Bug fixes: - address some lockdep complaints - fix a deadlock issue with quota - fix a memory tuning condition - fix compression condition to improve the ratio - fix disabling compression on the non-empty compressed file - invalidate cached pages before IPU/DIO writes And, we've added some minor clean-ups as usual" * tag 'f2fs-for-5.16-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: f2fs: fix UAF in f2fs_available_free_memory f2fs: invalidate META_MAPPING before IPU/DIO write f2fs: support fault injection for dquot_initialize() f2fs: fix incorrect return value in f2fs_sanity_check_ckpt() f2fs: compress: disallow disabling compress on non-empty compressed file f2fs: compress: fix overwrite may reduce compress ratio unproperly f2fs: multidevice: support direct IO f2fs: introduce fragment allocation mode mount option f2fs: replace snprintf in show functions with sysfs_emit f2fs: include non-compressed blocks in compr_written_block f2fs: fix wrong condition to trigger background checkpoint correctly f2fs: fix to use WHINT_MODE f2fs: fix up f2fs_lookup tracepoints f2fs: set SBI_NEED_FSCK flag when inconsistent node block found f2fs: introduce excess_dirty_threshold() f2fs: avoid attaching SB_ACTIVE flag during mount f2fs: quota: fix potential deadlock f2fs: should use GFP_NOFS for directory inodes
2 parents 0f7ddea + 5429c9d commit 5664896

File tree

21 files changed

+359
-106
lines changed

21 files changed

+359
-106
lines changed

Documentation/ABI/testing/sysfs-fs-f2fs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -512,3 +512,19 @@ Date: July 2021
512512
Contact: "Daeho Jeong" <[email protected]>
513513
Description: You can control the multiplier value of bdi device readahead window size
514514
between 2 (default) and 256 for POSIX_FADV_SEQUENTIAL advise option.
515+
516+
What: /sys/fs/f2fs/<disk>/max_fragment_chunk
517+
Date: August 2021
518+
Contact: "Daeho Jeong" <[email protected]>
519+
Description: With "mode=fragment:block" mount options, we can scatter block allocation.
520+
f2fs will allocate 1..<max_fragment_chunk> blocks in a chunk and make a hole
521+
in the length of 1..<max_fragment_hole> by turns. This value can be set
522+
between 1..512 and the default value is 4.
523+
524+
What: /sys/fs/f2fs/<disk>/max_fragment_hole
525+
Date: August 2021
526+
Contact: "Daeho Jeong" <[email protected]>
527+
Description: With "mode=fragment:block" mount options, we can scatter block allocation.
528+
f2fs will allocate 1..<max_fragment_chunk> blocks in a chunk and make a hole
529+
in the length of 1..<max_fragment_hole> by turns. This value can be set
530+
between 1..512 and the default value is 4.

Documentation/filesystems/f2fs.rst

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,10 +197,29 @@ fault_type=%d Support configuring fault injection type, should be
197197
FAULT_DISCARD 0x000002000
198198
FAULT_WRITE_IO 0x000004000
199199
FAULT_SLAB_ALLOC 0x000008000
200+
FAULT_DQUOT_INIT 0x000010000
200201
=================== ===========
201202
mode=%s Control block allocation mode which supports "adaptive"
202203
and "lfs". In "lfs" mode, there should be no random
203204
writes towards main area.
205+
"fragment:segment" and "fragment:block" are newly added here.
206+
These are developer options for experiments to simulate filesystem
207+
fragmentation/after-GC situation itself. The developers use these
208+
modes to understand filesystem fragmentation/after-GC condition well,
209+
and eventually get some insights to handle them better.
210+
In "fragment:segment", f2fs allocates a new segment in ramdom
211+
position. With this, we can simulate the after-GC condition.
212+
In "fragment:block", we can scatter block allocation with
213+
"max_fragment_chunk" and "max_fragment_hole" sysfs nodes.
214+
We added some randomness to both chunk and hole size to make
215+
it close to realistic IO pattern. So, in this mode, f2fs will allocate
216+
1..<max_fragment_chunk> blocks in a chunk and make a hole in the
217+
length of 1..<max_fragment_hole> by turns. With this, the newly
218+
allocated blocks will be scattered throughout the whole partition.
219+
Note that "fragment:block" implicitly enables "fragment:segment"
220+
option for more randomness.
221+
Please, use these options for your experiments and we strongly
222+
recommend to re-format the filesystem after using these options.
204223
io_bits=%u Set the bit size of write IO requests. It should be set
205224
with "mode=lfs".
206225
usrquota Enable plain user disk quota accounting.

fs/f2fs/checkpoint.c

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -653,7 +653,7 @@ static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
653653
return PTR_ERR(inode);
654654
}
655655

656-
err = dquot_initialize(inode);
656+
err = f2fs_dquot_initialize(inode);
657657
if (err) {
658658
iput(inode);
659659
goto err_out;
@@ -705,9 +705,6 @@ int f2fs_recover_orphan_inodes(struct f2fs_sb_info *sbi)
705705
}
706706

707707
#ifdef CONFIG_QUOTA
708-
/* Needed for iput() to work correctly and not trash data */
709-
sbi->sb->s_flags |= SB_ACTIVE;
710-
711708
/*
712709
* Turn on quotas which were not enabled for read-only mounts if
713710
* filesystem has quota feature, so that they are updated correctly.
@@ -1162,7 +1159,8 @@ static bool __need_flush_quota(struct f2fs_sb_info *sbi)
11621159
if (!is_journalled_quota(sbi))
11631160
return false;
11641161

1165-
down_write(&sbi->quota_sem);
1162+
if (!down_write_trylock(&sbi->quota_sem))
1163+
return true;
11661164
if (is_sbi_flag_set(sbi, SBI_QUOTA_SKIP_FLUSH)) {
11671165
ret = false;
11681166
} else if (is_sbi_flag_set(sbi, SBI_QUOTA_NEED_REPAIR)) {

fs/f2fs/compress.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -882,6 +882,25 @@ bool f2fs_cluster_can_merge_page(struct compress_ctx *cc, pgoff_t index)
882882
return is_page_in_cluster(cc, index);
883883
}
884884

885+
bool f2fs_all_cluster_page_loaded(struct compress_ctx *cc, struct pagevec *pvec,
886+
int index, int nr_pages)
887+
{
888+
unsigned long pgidx;
889+
int i;
890+
891+
if (nr_pages - index < cc->cluster_size)
892+
return false;
893+
894+
pgidx = pvec->pages[index]->index;
895+
896+
for (i = 1; i < cc->cluster_size; i++) {
897+
if (pvec->pages[index + i]->index != pgidx + i)
898+
return false;
899+
}
900+
901+
return true;
902+
}
903+
885904
static bool cluster_has_invalid_data(struct compress_ctx *cc)
886905
{
887906
loff_t i_size = i_size_read(cc->inode);
@@ -1531,6 +1550,7 @@ int f2fs_write_multi_pages(struct compress_ctx *cc,
15311550
if (cluster_may_compress(cc)) {
15321551
err = f2fs_compress_pages(cc);
15331552
if (err == -EAGAIN) {
1553+
add_compr_block_stat(cc->inode, cc->cluster_size);
15341554
goto write;
15351555
} else if (err) {
15361556
f2fs_put_rpages_wbc(cc, wbc, true, 1);

fs/f2fs/data.c

Lines changed: 74 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1465,10 +1465,15 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
14651465
struct extent_info ei = {0, };
14661466
block_t blkaddr;
14671467
unsigned int start_pgofs;
1468+
int bidx = 0;
14681469

14691470
if (!maxblocks)
14701471
return 0;
14711472

1473+
map->m_bdev = inode->i_sb->s_bdev;
1474+
map->m_multidev_dio =
1475+
f2fs_allow_multi_device_dio(F2FS_I_SB(inode), flag);
1476+
14721477
map->m_len = 0;
14731478
map->m_flags = 0;
14741479

@@ -1491,6 +1496,21 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
14911496
if (flag == F2FS_GET_BLOCK_DIO)
14921497
f2fs_wait_on_block_writeback_range(inode,
14931498
map->m_pblk, map->m_len);
1499+
1500+
if (map->m_multidev_dio) {
1501+
block_t blk_addr = map->m_pblk;
1502+
1503+
bidx = f2fs_target_device_index(sbi, map->m_pblk);
1504+
1505+
map->m_bdev = FDEV(bidx).bdev;
1506+
map->m_pblk -= FDEV(bidx).start_blk;
1507+
map->m_len = min(map->m_len,
1508+
FDEV(bidx).end_blk + 1 - map->m_pblk);
1509+
1510+
if (map->m_may_create)
1511+
f2fs_update_device_state(sbi, inode->i_ino,
1512+
blk_addr, map->m_len);
1513+
}
14941514
goto out;
14951515
}
14961516

@@ -1609,6 +1629,9 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
16091629
if (flag == F2FS_GET_BLOCK_PRE_AIO)
16101630
goto skip;
16111631

1632+
if (map->m_multidev_dio)
1633+
bidx = f2fs_target_device_index(sbi, blkaddr);
1634+
16121635
if (map->m_len == 0) {
16131636
/* preallocated unwritten block should be mapped for fiemap. */
16141637
if (blkaddr == NEW_ADDR)
@@ -1617,10 +1640,15 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
16171640

16181641
map->m_pblk = blkaddr;
16191642
map->m_len = 1;
1643+
1644+
if (map->m_multidev_dio)
1645+
map->m_bdev = FDEV(bidx).bdev;
16201646
} else if ((map->m_pblk != NEW_ADDR &&
16211647
blkaddr == (map->m_pblk + ofs)) ||
16221648
(map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) ||
16231649
flag == F2FS_GET_BLOCK_PRE_DIO) {
1650+
if (map->m_multidev_dio && map->m_bdev != FDEV(bidx).bdev)
1651+
goto sync_out;
16241652
ofs++;
16251653
map->m_len++;
16261654
} else {
@@ -1673,10 +1701,32 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
16731701

16741702
sync_out:
16751703

1676-
/* for hardware encryption, but to avoid potential issue in future */
1677-
if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED)
1704+
if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED) {
1705+
/*
1706+
* for hardware encryption, but to avoid potential issue
1707+
* in future
1708+
*/
16781709
f2fs_wait_on_block_writeback_range(inode,
16791710
map->m_pblk, map->m_len);
1711+
invalidate_mapping_pages(META_MAPPING(sbi),
1712+
map->m_pblk, map->m_pblk);
1713+
1714+
if (map->m_multidev_dio) {
1715+
block_t blk_addr = map->m_pblk;
1716+
1717+
bidx = f2fs_target_device_index(sbi, map->m_pblk);
1718+
1719+
map->m_bdev = FDEV(bidx).bdev;
1720+
map->m_pblk -= FDEV(bidx).start_blk;
1721+
1722+
if (map->m_may_create)
1723+
f2fs_update_device_state(sbi, inode->i_ino,
1724+
blk_addr, map->m_len);
1725+
1726+
f2fs_bug_on(sbi, blk_addr + map->m_len >
1727+
FDEV(bidx).end_blk + 1);
1728+
}
1729+
}
16801730

16811731
if (flag == F2FS_GET_BLOCK_PRECACHE) {
16821732
if (map->m_flags & F2FS_MAP_MAPPED) {
@@ -1696,7 +1746,7 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
16961746
f2fs_balance_fs(sbi, dn.node_changed);
16971747
}
16981748
out:
1699-
trace_f2fs_map_blocks(inode, map, err);
1749+
trace_f2fs_map_blocks(inode, map, create, flag, err);
17001750
return err;
17011751
}
17021752

@@ -1755,6 +1805,9 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
17551805
map_bh(bh, inode->i_sb, map.m_pblk);
17561806
bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
17571807
bh->b_size = blks_to_bytes(inode, map.m_len);
1808+
1809+
if (map.m_multidev_dio)
1810+
bh->b_bdev = map.m_bdev;
17581811
}
17591812
return err;
17601813
}
@@ -2989,6 +3042,10 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
29893042
need_readd = false;
29903043
#ifdef CONFIG_F2FS_FS_COMPRESSION
29913044
if (f2fs_compressed_file(inode)) {
3045+
void *fsdata = NULL;
3046+
struct page *pagep;
3047+
int ret2;
3048+
29923049
ret = f2fs_init_compress_ctx(&cc);
29933050
if (ret) {
29943051
done = 1;
@@ -3007,27 +3064,23 @@ static int f2fs_write_cache_pages(struct address_space *mapping,
30073064
if (unlikely(f2fs_cp_error(sbi)))
30083065
goto lock_page;
30093066

3010-
if (f2fs_cluster_is_empty(&cc)) {
3011-
void *fsdata = NULL;
3012-
struct page *pagep;
3013-
int ret2;
3067+
if (!f2fs_cluster_is_empty(&cc))
3068+
goto lock_page;
30143069

3015-
ret2 = f2fs_prepare_compress_overwrite(
3070+
ret2 = f2fs_prepare_compress_overwrite(
30163071
inode, &pagep,
30173072
page->index, &fsdata);
3018-
if (ret2 < 0) {
3019-
ret = ret2;
3020-
done = 1;
3021-
break;
3022-
} else if (ret2 &&
3023-
!f2fs_compress_write_end(inode,
3024-
fsdata, page->index,
3025-
1)) {
3026-
retry = 1;
3027-
break;
3028-
}
3029-
} else {
3030-
goto lock_page;
3073+
if (ret2 < 0) {
3074+
ret = ret2;
3075+
done = 1;
3076+
break;
3077+
} else if (ret2 &&
3078+
(!f2fs_compress_write_end(inode,
3079+
fsdata, page->index, 1) ||
3080+
!f2fs_all_cluster_page_loaded(&cc,
3081+
&pvec, i, nr_pages))) {
3082+
retry = 1;
3083+
break;
30313084
}
30323085
}
30333086
#endif

0 commit comments

Comments
 (0)