Skip to content

Commit 53ea167

Browse files
committed
Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 updates from Ted Ts'o: "Various cleanups and bug fixes in ext4's extent status tree, journalling, and block allocator subsystems. Also improve performance for parallel DIO overwrites" * tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (55 commits) ext4: avoid updating the superblock on a r/o mount if not needed jbd2: skip reading super block if it has been verified ext4: fix to check return value of freeze_bdev() in ext4_shutdown() ext4: refactoring to use the unified helper ext4_quotas_off() ext4: turn quotas off if mount failed after enabling quotas ext4: update doc about journal superblock description ext4: add journal cycled recording support jbd2: continue to record log between each mount jbd2: remove j_format_version jbd2: factor out journal initialization from journal_get_superblock() jbd2: switch to check format version in superblock directly jbd2: remove unused feature macros ext4: ext4_put_super: Remove redundant checking for 'sbi->s_journal_bdev' ext4: Fix reusing stale buffer heads from last failed mounting ext4: allow concurrent unaligned dio overwrites ext4: clean up mballoc criteria comments ext4: make ext4_zeroout_es() return void ext4: make ext4_es_insert_extent() return void ext4: make ext4_es_insert_delayed_block() return void ext4: make ext4_es_remove_extent() return void ...
2 parents b9d02c2 + 2ef6c32 commit 53ea167

File tree

19 files changed

+939
-615
lines changed

19 files changed

+939
-615
lines changed

Documentation/filesystems/ext4/journal.rst

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -260,8 +260,13 @@ which is 1024 bytes long:
260260
- s_num_fc_blocks
261261
- Number of fast commit blocks in the journal.
262262
* - 0x58
263+
- __be32
264+
- s_head
265+
- Block number of the head (first unused block) of the journal, only
266+
up-to-date when the journal is empty.
267+
* - 0x5C
263268
- __u32
264-
- s_padding[42]
269+
- s_padding[40]
265270
-
266271
* - 0xFC
267272
- __be32

fs/ext4/ext4.h

Lines changed: 68 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,58 @@ enum SHIFT_DIRECTION {
127127
SHIFT_RIGHT,
128128
};
129129

130+
/*
131+
* For each criteria, mballoc has slightly different way of finding
132+
* the required blocks nad usually, higher the criteria the slower the
133+
* allocation. We start at lower criterias and keep falling back to
134+
* higher ones if we are not able to find any blocks. Lower (earlier)
135+
* criteria are faster.
136+
*/
137+
enum criteria {
138+
/*
139+
* Used when number of blocks needed is a power of 2. This
140+
* doesn't trigger any disk IO except prefetch and is the
141+
* fastest criteria.
142+
*/
143+
CR_POWER2_ALIGNED,
144+
145+
/*
146+
* Tries to lookup in-memory data structures to find the most
147+
* suitable group that satisfies goal request. No disk IO
148+
* except block prefetch.
149+
*/
150+
CR_GOAL_LEN_FAST,
151+
152+
/*
153+
* Same as CR_GOAL_LEN_FAST but is allowed to reduce the goal
154+
* length to the best available length for faster allocation.
155+
*/
156+
CR_BEST_AVAIL_LEN,
157+
158+
/*
159+
* Reads each block group sequentially, performing disk IO if
160+
* necessary, to find find_suitable block group. Tries to
161+
* allocate goal length but might trim the request if nothing
162+
* is found after enough tries.
163+
*/
164+
CR_GOAL_LEN_SLOW,
165+
166+
/*
167+
* Finds the first free set of blocks and allocates
168+
* those. This is only used in rare cases when
169+
* CR_GOAL_LEN_SLOW also fails to allocate anything.
170+
*/
171+
CR_ANY_FREE,
172+
173+
/*
174+
* Number of criterias defined.
175+
*/
176+
EXT4_MB_NUM_CRS
177+
};
178+
179+
/* criteria below which we use fast block scanning and avoid unnecessary IO */
180+
#define CR_FAST CR_GOAL_LEN_SLOW
181+
130182
/*
131183
* Flags used in mballoc's allocation_context flags field.
132184
*
@@ -165,9 +217,12 @@ enum SHIFT_DIRECTION {
165217
/* Do strict check for free blocks while retrying block allocation */
166218
#define EXT4_MB_STRICT_CHECK 0x4000
167219
/* Large fragment size list lookup succeeded at least once for cr = 0 */
168-
#define EXT4_MB_CR0_OPTIMIZED 0x8000
220+
#define EXT4_MB_CR_POWER2_ALIGNED_OPTIMIZED 0x8000
169221
/* Avg fragment size rb tree lookup succeeded at least once for cr = 1 */
170-
#define EXT4_MB_CR1_OPTIMIZED 0x00010000
222+
#define EXT4_MB_CR_GOAL_LEN_FAST_OPTIMIZED 0x00010000
223+
/* Avg fragment size rb tree lookup succeeded at least once for cr = 1.5 */
224+
#define EXT4_MB_CR_BEST_AVAIL_LEN_OPTIMIZED 0x00020000
225+
171226
struct ext4_allocation_request {
172227
/* target inode for block we're allocating */
173228
struct inode *inode;
@@ -1532,21 +1587,25 @@ struct ext4_sb_info {
15321587
unsigned long s_mb_last_start;
15331588
unsigned int s_mb_prefetch;
15341589
unsigned int s_mb_prefetch_limit;
1590+
unsigned int s_mb_best_avail_max_trim_order;
15351591

15361592
/* stats for buddy allocator */
15371593
atomic_t s_bal_reqs; /* number of reqs with len > 1 */
15381594
atomic_t s_bal_success; /* we found long enough chunks */
15391595
atomic_t s_bal_allocated; /* in blocks */
15401596
atomic_t s_bal_ex_scanned; /* total extents scanned */
1597+
atomic_t s_bal_cX_ex_scanned[EXT4_MB_NUM_CRS]; /* total extents scanned */
15411598
atomic_t s_bal_groups_scanned; /* number of groups scanned */
15421599
atomic_t s_bal_goals; /* goal hits */
1600+
atomic_t s_bal_len_goals; /* len goal hits */
15431601
atomic_t s_bal_breaks; /* too long searches */
15441602
atomic_t s_bal_2orders; /* 2^order hits */
1545-
atomic_t s_bal_cr0_bad_suggestions;
1546-
atomic_t s_bal_cr1_bad_suggestions;
1547-
atomic64_t s_bal_cX_groups_considered[4];
1548-
atomic64_t s_bal_cX_hits[4];
1549-
atomic64_t s_bal_cX_failed[4]; /* cX loop didn't find blocks */
1603+
atomic_t s_bal_p2_aligned_bad_suggestions;
1604+
atomic_t s_bal_goal_fast_bad_suggestions;
1605+
atomic_t s_bal_best_avail_bad_suggestions;
1606+
atomic64_t s_bal_cX_groups_considered[EXT4_MB_NUM_CRS];
1607+
atomic64_t s_bal_cX_hits[EXT4_MB_NUM_CRS];
1608+
atomic64_t s_bal_cX_failed[EXT4_MB_NUM_CRS]; /* cX loop didn't find blocks */
15501609
atomic_t s_mb_buddies_generated; /* number of buddies generated */
15511610
atomic64_t s_mb_generation_time;
15521611
atomic_t s_mb_lost_chunks;
@@ -2632,10 +2691,6 @@ extern void ext4_get_group_no_and_offset(struct super_block *sb,
26322691
extern ext4_group_t ext4_get_group_number(struct super_block *sb,
26332692
ext4_fsblk_t block);
26342693

2635-
extern unsigned int ext4_block_group(struct super_block *sb,
2636-
ext4_fsblk_t blocknr);
2637-
extern ext4_grpblk_t ext4_block_group_offset(struct super_block *sb,
2638-
ext4_fsblk_t blocknr);
26392694
extern int ext4_bg_has_super(struct super_block *sb, ext4_group_t group);
26402695
extern unsigned long ext4_bg_num_gdb(struct super_block *sb,
26412696
ext4_group_t group);
@@ -2841,8 +2896,6 @@ int ext4_fc_record_regions(struct super_block *sb, int ino,
28412896
/* mballoc.c */
28422897
extern const struct seq_operations ext4_mb_seq_groups_ops;
28432898
extern const struct seq_operations ext4_mb_seq_structs_summary_ops;
2844-
extern long ext4_mb_stats;
2845-
extern long ext4_mb_max_to_scan;
28462899
extern int ext4_seq_mb_stats_show(struct seq_file *seq, void *offset);
28472900
extern int ext4_mb_init(struct super_block *);
28482901
extern int ext4_mb_release(struct super_block *);
@@ -3481,14 +3534,8 @@ extern int ext4_try_to_write_inline_data(struct address_space *mapping,
34813534
struct inode *inode,
34823535
loff_t pos, unsigned len,
34833536
struct page **pagep);
3484-
extern int ext4_write_inline_data_end(struct inode *inode,
3485-
loff_t pos, unsigned len,
3486-
unsigned copied,
3487-
struct page *page);
3488-
extern struct buffer_head *
3489-
ext4_journalled_write_inline_data(struct inode *inode,
3490-
unsigned len,
3491-
struct page *page);
3537+
int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len,
3538+
unsigned copied, struct folio *folio);
34923539
extern int ext4_da_write_inline_data_begin(struct address_space *mapping,
34933540
struct inode *inode,
34943541
loff_t pos, unsigned len,

fs/ext4/extents.c

Lines changed: 13 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -3123,7 +3123,7 @@ void ext4_ext_release(struct super_block *sb)
31233123
#endif
31243124
}
31253125

3126-
static int ext4_zeroout_es(struct inode *inode, struct ext4_extent *ex)
3126+
static void ext4_zeroout_es(struct inode *inode, struct ext4_extent *ex)
31273127
{
31283128
ext4_lblk_t ee_block;
31293129
ext4_fsblk_t ee_pblock;
@@ -3134,10 +3134,10 @@ static int ext4_zeroout_es(struct inode *inode, struct ext4_extent *ex)
31343134
ee_pblock = ext4_ext_pblock(ex);
31353135

31363136
if (ee_len == 0)
3137-
return 0;
3137+
return;
31383138

3139-
return ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock,
3140-
EXTENT_STATUS_WRITTEN);
3139+
ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock,
3140+
EXTENT_STATUS_WRITTEN);
31413141
}
31423142

31433143
/* FIXME!! we need to try to merge to left or right after zero-out */
@@ -3287,7 +3287,7 @@ static int ext4_split_extent_at(handle_t *handle,
32873287
err = ext4_ext_dirty(handle, inode, path + path->p_depth);
32883288
if (!err)
32893289
/* update extent status tree */
3290-
err = ext4_zeroout_es(inode, &zero_ex);
3290+
ext4_zeroout_es(inode, &zero_ex);
32913291
/* If we failed at this point, we don't know in which
32923292
* state the extent tree exactly is so don't try to fix
32933293
* length of the original extent as it may do even more
@@ -3640,9 +3640,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
36403640
out:
36413641
/* If we have gotten a failure, don't zero out status tree */
36423642
if (!err) {
3643-
err = ext4_zeroout_es(inode, &zero_ex1);
3644-
if (!err)
3645-
err = ext4_zeroout_es(inode, &zero_ex2);
3643+
ext4_zeroout_es(inode, &zero_ex1);
3644+
ext4_zeroout_es(inode, &zero_ex2);
36463645
}
36473646
return err ? err : allocated;
36483647
}
@@ -4403,15 +4402,8 @@ int ext4_ext_truncate(handle_t *handle, struct inode *inode)
44034402

44044403
last_block = (inode->i_size + sb->s_blocksize - 1)
44054404
>> EXT4_BLOCK_SIZE_BITS(sb);
4406-
retry:
4407-
err = ext4_es_remove_extent(inode, last_block,
4408-
EXT_MAX_BLOCKS - last_block);
4409-
if (err == -ENOMEM) {
4410-
memalloc_retry_wait(GFP_ATOMIC);
4411-
goto retry;
4412-
}
4413-
if (err)
4414-
return err;
4405+
ext4_es_remove_extent(inode, last_block, EXT_MAX_BLOCKS - last_block);
4406+
44154407
retry_remove_space:
44164408
err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1);
44174409
if (err == -ENOMEM) {
@@ -5363,13 +5355,7 @@ static int ext4_collapse_range(struct file *file, loff_t offset, loff_t len)
53635355

53645356
down_write(&EXT4_I(inode)->i_data_sem);
53655357
ext4_discard_preallocations(inode, 0);
5366-
5367-
ret = ext4_es_remove_extent(inode, punch_start,
5368-
EXT_MAX_BLOCKS - punch_start);
5369-
if (ret) {
5370-
up_write(&EXT4_I(inode)->i_data_sem);
5371-
goto out_stop;
5372-
}
5358+
ext4_es_remove_extent(inode, punch_start, EXT_MAX_BLOCKS - punch_start);
53735359

53745360
ret = ext4_ext_remove_space(inode, punch_start, punch_stop - 1);
53755361
if (ret) {
@@ -5547,12 +5533,7 @@ static int ext4_insert_range(struct file *file, loff_t offset, loff_t len)
55475533
ext4_free_ext_path(path);
55485534
}
55495535

5550-
ret = ext4_es_remove_extent(inode, offset_lblk,
5551-
EXT_MAX_BLOCKS - offset_lblk);
5552-
if (ret) {
5553-
up_write(&EXT4_I(inode)->i_data_sem);
5554-
goto out_stop;
5555-
}
5536+
ext4_es_remove_extent(inode, offset_lblk, EXT_MAX_BLOCKS - offset_lblk);
55565537

55575538
/*
55585539
* if offset_lblk lies in a hole which is at start of file, use
@@ -5610,12 +5591,8 @@ ext4_swap_extents(handle_t *handle, struct inode *inode1,
56105591
BUG_ON(!inode_is_locked(inode1));
56115592
BUG_ON(!inode_is_locked(inode2));
56125593

5613-
*erp = ext4_es_remove_extent(inode1, lblk1, count);
5614-
if (unlikely(*erp))
5615-
return 0;
5616-
*erp = ext4_es_remove_extent(inode2, lblk2, count);
5617-
if (unlikely(*erp))
5618-
return 0;
5594+
ext4_es_remove_extent(inode1, lblk1, count);
5595+
ext4_es_remove_extent(inode2, lblk2, count);
56195596

56205597
while (count) {
56215598
struct ext4_extent *ex1, *ex2, tmp_ex;

0 commit comments

Comments
 (0)