Skip to content

Commit c041918

Browse files
committed
Merge tag 'for-5.17-rc5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs fixes from David Sterba: "This is a hopefully last batch of fixes for defrag that got broken in 5.16, all stable material. The remaining reported problem is excessive IO with autodefrag due to various conditions in the defrag code not met or missing" * tag 'for-5.17-rc5-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: btrfs: reduce extent threshold for autodefrag btrfs: autodefrag: only scan one inode once btrfs: defrag: don't use merged extent map for their generation check btrfs: defrag: bring back the old file extent search behavior btrfs: defrag: remove an ambiguous condition for rejection btrfs: defrag: don't defrag extents which are already at max capacity btrfs: defrag: don't try to merge regular extents with preallocated extents btrfs: defrag: allow defrag_one_cluster() to skip large extent which is not a target btrfs: prevent copying too big compressed lzo segment
2 parents ca74572 + 558732d commit c041918

File tree

7 files changed

+296
-84
lines changed

7 files changed

+296
-84
lines changed

fs/btrfs/ctree.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3291,7 +3291,7 @@ void btrfs_exclop_balance(struct btrfs_fs_info *fs_info,
32913291
int __init btrfs_auto_defrag_init(void);
32923292
void __cold btrfs_auto_defrag_exit(void);
32933293
int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
3294-
struct btrfs_inode *inode);
3294+
struct btrfs_inode *inode, u32 extent_thresh);
32953295
int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info);
32963296
void btrfs_cleanup_defrag_inodes(struct btrfs_fs_info *fs_info);
32973297
int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync);

fs/btrfs/extent_map.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,7 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
261261
em->mod_len = (em->mod_len + em->mod_start) - merge->mod_start;
262262
em->mod_start = merge->mod_start;
263263
em->generation = max(em->generation, merge->generation);
264+
set_bit(EXTENT_FLAG_MERGED, &em->flags);
264265

265266
rb_erase_cached(&merge->rb_node, &tree->map);
266267
RB_CLEAR_NODE(&merge->rb_node);
@@ -278,6 +279,7 @@ static void try_merge_map(struct extent_map_tree *tree, struct extent_map *em)
278279
RB_CLEAR_NODE(&merge->rb_node);
279280
em->mod_len = (merge->mod_start + merge->mod_len) - em->mod_start;
280281
em->generation = max(em->generation, merge->generation);
282+
set_bit(EXTENT_FLAG_MERGED, &em->flags);
281283
free_extent_map(merge);
282284
}
283285
}

fs/btrfs/extent_map.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ enum {
2525
EXTENT_FLAG_FILLING,
2626
/* filesystem extent mapping type */
2727
EXTENT_FLAG_FS_MAPPING,
28+
/* This em is merged from two or more physically adjacent ems */
29+
EXTENT_FLAG_MERGED,
2830
};
2931

3032
struct extent_map {
@@ -40,6 +42,12 @@ struct extent_map {
4042
u64 ram_bytes;
4143
u64 block_start;
4244
u64 block_len;
45+
46+
/*
47+
* Generation of the extent map, for merged em it's the highest
48+
* generation of all merged ems.
49+
* For non-merged extents, it's from btrfs_file_extent_item::generation.
50+
*/
4351
u64 generation;
4452
unsigned long flags;
4553
/* Used for chunk mappings, flag EXTENT_FLAG_FS_MAPPING must be set */

fs/btrfs/file.c

Lines changed: 35 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -50,11 +50,14 @@ struct inode_defrag {
5050
/* root objectid */
5151
u64 root;
5252

53-
/* last offset we were able to defrag */
54-
u64 last_offset;
55-
56-
/* if we've wrapped around back to zero once already */
57-
int cycled;
53+
/*
54+
* The extent size threshold for autodefrag.
55+
*
56+
* This value is different for compressed/non-compressed extents,
57+
* thus needs to be passed from higher layer.
58+
* (aka, inode_should_defrag())
59+
*/
60+
u32 extent_thresh;
5861
};
5962

6063
static int __compare_inode_defrag(struct inode_defrag *defrag1,
@@ -107,8 +110,8 @@ static int __btrfs_add_inode_defrag(struct btrfs_inode *inode,
107110
*/
108111
if (defrag->transid < entry->transid)
109112
entry->transid = defrag->transid;
110-
if (defrag->last_offset > entry->last_offset)
111-
entry->last_offset = defrag->last_offset;
113+
entry->extent_thresh = min(defrag->extent_thresh,
114+
entry->extent_thresh);
112115
return -EEXIST;
113116
}
114117
}
@@ -134,7 +137,7 @@ static inline int __need_auto_defrag(struct btrfs_fs_info *fs_info)
134137
* enabled
135138
*/
136139
int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
137-
struct btrfs_inode *inode)
140+
struct btrfs_inode *inode, u32 extent_thresh)
138141
{
139142
struct btrfs_root *root = inode->root;
140143
struct btrfs_fs_info *fs_info = root->fs_info;
@@ -160,6 +163,7 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
160163
defrag->ino = btrfs_ino(inode);
161164
defrag->transid = transid;
162165
defrag->root = root->root_key.objectid;
166+
defrag->extent_thresh = extent_thresh;
163167

164168
spin_lock(&fs_info->defrag_inodes_lock);
165169
if (!test_bit(BTRFS_INODE_IN_DEFRAG, &inode->runtime_flags)) {
@@ -178,34 +182,6 @@ int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans,
178182
return 0;
179183
}
180184

181-
/*
182-
* Requeue the defrag object. If there is a defrag object that points to
183-
* the same inode in the tree, we will merge them together (by
184-
* __btrfs_add_inode_defrag()) and free the one that we want to requeue.
185-
*/
186-
static void btrfs_requeue_inode_defrag(struct btrfs_inode *inode,
187-
struct inode_defrag *defrag)
188-
{
189-
struct btrfs_fs_info *fs_info = inode->root->fs_info;
190-
int ret;
191-
192-
if (!__need_auto_defrag(fs_info))
193-
goto out;
194-
195-
/*
196-
* Here we don't check the IN_DEFRAG flag, because we need merge
197-
* them together.
198-
*/
199-
spin_lock(&fs_info->defrag_inodes_lock);
200-
ret = __btrfs_add_inode_defrag(inode, defrag);
201-
spin_unlock(&fs_info->defrag_inodes_lock);
202-
if (ret)
203-
goto out;
204-
return;
205-
out:
206-
kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
207-
}
208-
209185
/*
210186
* pick the defragable inode that we want, if it doesn't exist, we will get
211187
* the next one.
@@ -278,8 +254,14 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
278254
struct btrfs_root *inode_root;
279255
struct inode *inode;
280256
struct btrfs_ioctl_defrag_range_args range;
281-
int num_defrag;
282-
int ret;
257+
int ret = 0;
258+
u64 cur = 0;
259+
260+
again:
261+
if (test_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state))
262+
goto cleanup;
263+
if (!__need_auto_defrag(fs_info))
264+
goto cleanup;
283265

284266
/* get the inode */
285267
inode_root = btrfs_get_fs_root(fs_info, defrag->root, true);
@@ -295,39 +277,30 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
295277
goto cleanup;
296278
}
297279

280+
if (cur >= i_size_read(inode)) {
281+
iput(inode);
282+
goto cleanup;
283+
}
284+
298285
/* do a chunk of defrag */
299286
clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
300287
memset(&range, 0, sizeof(range));
301288
range.len = (u64)-1;
302-
range.start = defrag->last_offset;
289+
range.start = cur;
290+
range.extent_thresh = defrag->extent_thresh;
303291

304292
sb_start_write(fs_info->sb);
305-
num_defrag = btrfs_defrag_file(inode, NULL, &range, defrag->transid,
293+
ret = btrfs_defrag_file(inode, NULL, &range, defrag->transid,
306294
BTRFS_DEFRAG_BATCH);
307295
sb_end_write(fs_info->sb);
308-
/*
309-
* if we filled the whole defrag batch, there
310-
* must be more work to do. Queue this defrag
311-
* again
312-
*/
313-
if (num_defrag == BTRFS_DEFRAG_BATCH) {
314-
defrag->last_offset = range.start;
315-
btrfs_requeue_inode_defrag(BTRFS_I(inode), defrag);
316-
} else if (defrag->last_offset && !defrag->cycled) {
317-
/*
318-
* we didn't fill our defrag batch, but
319-
* we didn't start at zero. Make sure we loop
320-
* around to the start of the file.
321-
*/
322-
defrag->last_offset = 0;
323-
defrag->cycled = 1;
324-
btrfs_requeue_inode_defrag(BTRFS_I(inode), defrag);
325-
} else {
326-
kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
327-
}
328-
329296
iput(inode);
330-
return 0;
297+
298+
if (ret < 0)
299+
goto cleanup;
300+
301+
cur = max(cur + fs_info->sectorsize, range.start);
302+
goto again;
303+
331304
cleanup:
332305
kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
333306
return ret;

fs/btrfs/inode.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -560,12 +560,12 @@ static inline int inode_need_compress(struct btrfs_inode *inode, u64 start,
560560
}
561561

562562
static inline void inode_should_defrag(struct btrfs_inode *inode,
563-
u64 start, u64 end, u64 num_bytes, u64 small_write)
563+
u64 start, u64 end, u64 num_bytes, u32 small_write)
564564
{
565565
/* If this is a small write inside eof, kick off a defrag */
566566
if (num_bytes < small_write &&
567567
(start > 0 || end + 1 < inode->disk_i_size))
568-
btrfs_add_inode_defrag(NULL, inode);
568+
btrfs_add_inode_defrag(NULL, inode, small_write);
569569
}
570570

571571
/*

0 commit comments

Comments
 (0)