Skip to content

Commit ff7dcfe

Browse files
committed
Merge tag 'ext4_for_linus_6.17-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 updates from Ted Ts'o: "Major ext4 changes for 6.17: - Better scalability for ext4 block allocation - Fix insufficient credits when writing back large folios Miscellaneous bug fixes, especially when handling exteded attriutes, inline data, and fast commit" * tag 'ext4_for_linus_6.17-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (39 commits) ext4: do not BUG when INLINE_DATA_FL lacks system.data xattr ext4: implement linear-like traversal across order xarrays ext4: refactor choose group to scan group ext4: convert free groups order lists to xarrays ext4: factor out ext4_mb_scan_group() ext4: factor out ext4_mb_might_prefetch() ext4: factor out __ext4_mb_scan_group() ext4: fix largest free orders lists corruption on mb_optimize_scan switch ext4: fix zombie groups in average fragment size lists ext4: merge freed extent with existing extents before insertion ext4: convert sbi->s_mb_free_pending to atomic_t ext4: fix typo in CR_GOAL_LEN_SLOW comment ext4: get rid of some obsolete EXT4_MB_HINT flags ext4: utilize multiple global goals to reduce contention ext4: remove unnecessary s_md_lock on update s_mb_last_group ext4: remove unnecessary s_mb_last_start ext4: separate stream goal hits from s_bal_goals for better tracking ext4: add ext4_try_lock_group() to skip busy groups ext4: initialize superblock fields in the kballoc-test.c kunit tests ext4: refactor the inline directory conversion and new directory codepaths ...
2 parents 44a8c96 + 099b847 commit ff7dcfe

File tree

15 files changed

+900
-692
lines changed

15 files changed

+900
-692
lines changed

fs/ext4/balloc.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -703,7 +703,7 @@ int ext4_should_retry_alloc(struct super_block *sb, int *retries)
703703
* possible we just missed a transaction commit that did so
704704
*/
705705
smp_mb();
706-
if (sbi->s_mb_free_pending == 0) {
706+
if (atomic_read(&sbi->s_mb_free_pending) == 0) {
707707
if (test_opt(sb, DISCARD)) {
708708
atomic_inc(&sbi->s_retry_alloc_pending);
709709
flush_work(&sbi->s_discard_work);

fs/ext4/ext4.h

Lines changed: 30 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -157,7 +157,7 @@ enum criteria {
157157

158158
/*
159159
* Reads each block group sequentially, performing disk IO if
160-
* necessary, to find find_suitable block group. Tries to
160+
* necessary, to find suitable block group. Tries to
161161
* allocate goal length but might trim the request if nothing
162162
* is found after enough tries.
163163
*/
@@ -185,14 +185,8 @@ enum criteria {
185185

186186
/* prefer goal again. length */
187187
#define EXT4_MB_HINT_MERGE 0x0001
188-
/* blocks already reserved */
189-
#define EXT4_MB_HINT_RESERVED 0x0002
190-
/* metadata is being allocated */
191-
#define EXT4_MB_HINT_METADATA 0x0004
192188
/* first blocks in the file */
193189
#define EXT4_MB_HINT_FIRST 0x0008
194-
/* search for the best chunk */
195-
#define EXT4_MB_HINT_BEST 0x0010
196190
/* data is being allocated */
197191
#define EXT4_MB_HINT_DATA 0x0020
198192
/* don't preallocate (for tails) */
@@ -213,15 +207,6 @@ enum criteria {
213207
#define EXT4_MB_USE_RESERVED 0x2000
214208
/* Do strict check for free blocks while retrying block allocation */
215209
#define EXT4_MB_STRICT_CHECK 0x4000
216-
/* Large fragment size list lookup succeeded at least once for
217-
* CR_POWER2_ALIGNED */
218-
#define EXT4_MB_CR_POWER2_ALIGNED_OPTIMIZED 0x8000
219-
/* Avg fragment size rb tree lookup succeeded at least once for
220-
* CR_GOAL_LEN_FAST */
221-
#define EXT4_MB_CR_GOAL_LEN_FAST_OPTIMIZED 0x00010000
222-
/* Avg fragment size rb tree lookup succeeded at least once for
223-
* CR_BEST_AVAIL_LEN */
224-
#define EXT4_MB_CR_BEST_AVAIL_LEN_OPTIMIZED 0x00020000
225210

226211
struct ext4_allocation_request {
227212
/* target inode for block we're allocating */
@@ -1608,16 +1593,14 @@ struct ext4_sb_info {
16081593
unsigned short *s_mb_offsets;
16091594
unsigned int *s_mb_maxs;
16101595
unsigned int s_group_info_size;
1611-
unsigned int s_mb_free_pending;
1596+
atomic_t s_mb_free_pending;
16121597
struct list_head s_freed_data_list[2]; /* List of blocks to be freed
16131598
after commit completed */
16141599
struct list_head s_discard_list;
16151600
struct work_struct s_discard_work;
16161601
atomic_t s_retry_alloc_pending;
1617-
struct list_head *s_mb_avg_fragment_size;
1618-
rwlock_t *s_mb_avg_fragment_size_locks;
1619-
struct list_head *s_mb_largest_free_orders;
1620-
rwlock_t *s_mb_largest_free_orders_locks;
1602+
struct xarray *s_mb_avg_fragment_size;
1603+
struct xarray *s_mb_largest_free_orders;
16211604

16221605
/* tunables */
16231606
unsigned long s_stripe;
@@ -1629,15 +1612,16 @@ struct ext4_sb_info {
16291612
unsigned int s_mb_order2_reqs;
16301613
unsigned int s_mb_group_prealloc;
16311614
unsigned int s_max_dir_size_kb;
1632-
/* where last allocation was done - for stream allocation */
1633-
unsigned long s_mb_last_group;
1634-
unsigned long s_mb_last_start;
16351615
unsigned int s_mb_prefetch;
16361616
unsigned int s_mb_prefetch_limit;
16371617
unsigned int s_mb_best_avail_max_trim_order;
16381618
unsigned int s_sb_update_sec;
16391619
unsigned int s_sb_update_kb;
16401620

1621+
/* where last allocation was done - for stream allocation */
1622+
ext4_group_t *s_mb_last_groups;
1623+
unsigned int s_mb_nr_global_goals;
1624+
16411625
/* stats for buddy allocator */
16421626
atomic_t s_bal_reqs; /* number of reqs with len > 1 */
16431627
atomic_t s_bal_success; /* we found long enough chunks */
@@ -1646,12 +1630,10 @@ struct ext4_sb_info {
16461630
atomic_t s_bal_cX_ex_scanned[EXT4_MB_NUM_CRS]; /* total extents scanned */
16471631
atomic_t s_bal_groups_scanned; /* number of groups scanned */
16481632
atomic_t s_bal_goals; /* goal hits */
1633+
atomic_t s_bal_stream_goals; /* stream allocation global goal hits */
16491634
atomic_t s_bal_len_goals; /* len goal hits */
16501635
atomic_t s_bal_breaks; /* too long searches */
16511636
atomic_t s_bal_2orders; /* 2^order hits */
1652-
atomic_t s_bal_p2_aligned_bad_suggestions;
1653-
atomic_t s_bal_goal_fast_bad_suggestions;
1654-
atomic_t s_bal_best_avail_bad_suggestions;
16551637
atomic64_t s_bal_cX_groups_considered[EXT4_MB_NUM_CRS];
16561638
atomic64_t s_bal_cX_hits[EXT4_MB_NUM_CRS];
16571639
atomic64_t s_bal_cX_failed[EXT4_MB_NUM_CRS]; /* cX loop didn't find blocks */
@@ -3020,7 +3002,7 @@ int ext4_walk_page_buffers(handle_t *handle,
30203002
struct buffer_head *bh));
30213003
int do_journal_get_write_access(handle_t *handle, struct inode *inode,
30223004
struct buffer_head *bh);
3023-
bool ext4_should_enable_large_folio(struct inode *inode);
3005+
void ext4_set_inode_mapping_order(struct inode *inode);
30243006
#define FALL_BACK_TO_NONDELALLOC 1
30253007
#define CONVERT_INLINE_DATA 2
30263008

@@ -3064,9 +3046,9 @@ extern int ext4_punch_hole(struct file *file, loff_t offset, loff_t length);
30643046
extern void ext4_set_inode_flags(struct inode *, bool init);
30653047
extern int ext4_alloc_da_blocks(struct inode *inode);
30663048
extern void ext4_set_aops(struct inode *inode);
3067-
extern int ext4_writepage_trans_blocks(struct inode *);
30683049
extern int ext4_normal_submit_inode_data_buffers(struct jbd2_inode *jinode);
30693050
extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
3051+
extern int ext4_chunk_trans_extent(struct inode *inode, int nrblocks);
30703052
extern int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
30713053
int pextents);
30723054
extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
@@ -3489,8 +3471,6 @@ struct ext4_group_info {
34893471
void *bb_bitmap;
34903472
#endif
34913473
struct rw_semaphore alloc_sem;
3492-
struct list_head bb_avg_fragment_size_node;
3493-
struct list_head bb_largest_free_order_node;
34943474
ext4_grpblk_t bb_counters[]; /* Nr of free power-of-two-block
34953475
* regions, index is order.
34963476
* bb_counters[3] = 5 means
@@ -3541,23 +3521,28 @@ static inline int ext4_fs_is_busy(struct ext4_sb_info *sbi)
35413521
return (atomic_read(&sbi->s_lock_busy) > EXT4_CONTENTION_THRESHOLD);
35423522
}
35433523

3524+
static inline bool ext4_try_lock_group(struct super_block *sb, ext4_group_t group)
3525+
{
3526+
if (!spin_trylock(ext4_group_lock_ptr(sb, group)))
3527+
return false;
3528+
/*
3529+
* We're able to grab the lock right away, so drop the lock
3530+
* contention counter.
3531+
*/
3532+
atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, -1, 0);
3533+
return true;
3534+
}
3535+
35443536
static inline void ext4_lock_group(struct super_block *sb, ext4_group_t group)
35453537
{
3546-
spinlock_t *lock = ext4_group_lock_ptr(sb, group);
3547-
if (spin_trylock(lock))
3548-
/*
3549-
* We're able to grab the lock right away, so drop the
3550-
* lock contention counter.
3551-
*/
3552-
atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, -1, 0);
3553-
else {
3538+
if (!ext4_try_lock_group(sb, group)) {
35543539
/*
35553540
* The lock is busy, so bump the contention counter,
35563541
* and then wait on the spin lock.
35573542
*/
35583543
atomic_add_unless(&EXT4_SB(sb)->s_lock_busy, 1,
35593544
EXT4_MAX_CONTENTION);
3560-
spin_lock(lock);
3545+
spin_lock(ext4_group_lock_ptr(sb, group));
35613546
}
35623547
}
35633548

@@ -3612,6 +3597,7 @@ extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin);
36123597
extern int ext4_get_max_inline_size(struct inode *inode);
36133598
extern int ext4_find_inline_data_nolock(struct inode *inode);
36143599
extern int ext4_destroy_inline_data(handle_t *handle, struct inode *inode);
3600+
extern void ext4_update_final_de(void *de_buf, int old_size, int new_size);
36153601

36163602
int ext4_readpage_inline(struct inode *inode, struct folio *folio);
36173603
extern int ext4_try_to_write_inline_data(struct address_space *mapping,
@@ -3671,10 +3657,10 @@ static inline int ext4_has_inline_data(struct inode *inode)
36713657
extern const struct inode_operations ext4_dir_inode_operations;
36723658
extern const struct inode_operations ext4_special_inode_operations;
36733659
extern struct dentry *ext4_get_parent(struct dentry *child);
3674-
extern struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode,
3675-
struct ext4_dir_entry_2 *de,
3676-
int blocksize, int csum_size,
3677-
unsigned int parent_ino, int dotdot_real_len);
3660+
extern int ext4_init_dirblock(handle_t *handle, struct inode *inode,
3661+
struct buffer_head *dir_block,
3662+
unsigned int parent_ino, void *inline_buf,
3663+
int inline_size);
36783664
extern void ext4_initialize_dirent_tail(struct buffer_head *bh,
36793665
unsigned int blocksize);
36803666
extern int ext4_handle_dirty_dirblock(handle_t *handle, struct inode *inode,

fs/ext4/ext4_extents.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,6 @@
3030
*/
3131
#define CHECK_BINSEARCH__
3232

33-
/*
34-
* If EXT_STATS is defined then stats numbers are collected.
35-
* These number will be displayed at umount time.
36-
*/
37-
#define EXT_STATS_
38-
39-
4033
/*
4134
* ext4_inode has i_block array (60 bytes total).
4235
* The first 12 bytes store ext4_extent_header;

fs/ext4/extents.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5215,7 +5215,7 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
52155215
credits = depth + 2;
52165216
}
52175217

5218-
restart_credits = ext4_writepage_trans_blocks(inode);
5218+
restart_credits = ext4_chunk_trans_extent(inode, 0);
52195219
err = ext4_datasem_ensure_credits(handle, inode, credits,
52205220
restart_credits, 0);
52215221
if (err) {
@@ -5475,7 +5475,7 @@ static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len)
54755475

54765476
truncate_pagecache(inode, start);
54775477

5478-
credits = ext4_writepage_trans_blocks(inode);
5478+
credits = ext4_chunk_trans_extent(inode, 0);
54795479
handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
54805480
if (IS_ERR(handle))
54815481
return PTR_ERR(handle);
@@ -5571,7 +5571,7 @@ static int ext4_insert_range(struct file *file, loff_t offset, loff_t len)
55715571

55725572
truncate_pagecache(inode, start);
55735573

5574-
credits = ext4_writepage_trans_blocks(inode);
5574+
credits = ext4_chunk_trans_extent(inode, 0);
55755575
handle = ext4_journal_start(inode, EXT4_HT_TRUNCATE, credits);
55765576
if (IS_ERR(handle))
55775577
return PTR_ERR(handle);

fs/ext4/ialloc.c

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1335,8 +1335,7 @@ struct inode *__ext4_new_inode(struct mnt_idmap *idmap,
13351335
}
13361336
}
13371337

1338-
if (ext4_should_enable_large_folio(inode))
1339-
mapping_set_large_folios(inode->i_mapping);
1338+
ext4_set_inode_mapping_order(inode);
13401339

13411340
ext4_update_inode_fsync_trans(handle, inode, 1);
13421341

0 commit comments

Comments
 (0)