Skip to content

Commit 71c061d

Browse files
committed
Merge tag 'for-5.11-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs fixes from David Sterba: "A few more fixes that arrived before the end of the year: - a bunch of fixes related to transaction handle lifetime wrt various operations (umount, remount, qgroup scan, orphan cleanup) - async discard scheduling fixes - fix item size calculation when item keys collide for extend refs (hardlinks) - fix qgroup flushing from running transaction - fix send, wrong file path when there is an inode with a pending rmdir - fix deadlock when cloning inline extent and low on free metadata space" * tag 'for-5.11-rc2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: btrfs: run delayed iputs when remounting RO to avoid leaking them btrfs: add assertion for empty list of transactions at late stage of umount btrfs: fix race between RO remount and the cleaner task btrfs: fix transaction leak and crash after cleaning up orphans on RO mount btrfs: fix transaction leak and crash after RO remount caused by qgroup rescan btrfs: merge critical sections of discard lock in workfn btrfs: fix racy access to discard_ctl data btrfs: fix async discard stall btrfs: tests: initialize test inodes location btrfs: send: fix wrong file path when there is an inode with a pending rmdir btrfs: qgroup: don't try to wait flushing if we're already holding a transaction btrfs: correctly calculate item size used when item key collision happens btrfs: fix deadlock when cloning inline extent and low on free metadata space
2 parents 9f1abbe + a8cc263 commit 71c061d

18 files changed

+243
-97
lines changed

fs/btrfs/btrfs_inode.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,15 @@ enum {
4242
* to an inode.
4343
*/
4444
BTRFS_INODE_NO_XATTRS,
45+
/*
46+
* Set when we are in a context where we need to start a transaction and
47+
* have dirty pages with the respective file range locked. This is to
48+
* ensure that when reserving space for the transaction, if we are low
49+
* on available space and need to flush delalloc, we will not flush
50+
* delalloc for this inode, because that could result in a deadlock (on
51+
* the file range, inode's io_tree).
52+
*/
53+
BTRFS_INODE_NO_DELALLOC_FLUSH,
4554
};
4655

4756
/* in memory btrfs inode */

fs/btrfs/ctree.c

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2555,8 +2555,14 @@ static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root,
25552555
* @p: Holds all btree nodes along the search path
25562556
* @root: The root node of the tree
25572557
* @key: The key we are looking for
2558-
* @ins_len: Indicates purpose of search, for inserts it is 1, for
2559-
* deletions it's -1. 0 for plain searches
2558+
* @ins_len: Indicates purpose of search:
2559+
* >0 for inserts it's size of item inserted (*)
2560+
* <0 for deletions
2561+
* 0 for plain searches, not modifying the tree
2562+
*
2563+
* (*) If size of item inserted doesn't include
2564+
* sizeof(struct btrfs_item), then p->search_for_extension must
2565+
* be set.
25602566
* @cow: boolean should CoW operations be performed. Must always be 1
25612567
* when modifying the tree.
25622568
*
@@ -2717,6 +2723,20 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
27172723

27182724
if (level == 0) {
27192725
p->slots[level] = slot;
2726+
/*
2727+
* Item key already exists. In this case, if we are
2728+
* allowed to insert the item (for example, in dir_item
2729+
* case, item key collision is allowed), it will be
2730+
* merged with the original item. Only the item size
2731+
* grows, no new btrfs item will be added. If
2732+
* search_for_extension is not set, ins_len already
2733+
* accounts the size btrfs_item, deduct it here so leaf
2734+
* space check will be correct.
2735+
*/
2736+
if (ret == 0 && ins_len > 0 && !p->search_for_extension) {
2737+
ASSERT(ins_len >= sizeof(struct btrfs_item));
2738+
ins_len -= sizeof(struct btrfs_item);
2739+
}
27202740
if (ins_len > 0 &&
27212741
btrfs_leaf_free_space(b) < ins_len) {
27222742
if (write_lock_level < 1) {

fs/btrfs/ctree.h

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,8 @@ enum {
131131
* defrag
132132
*/
133133
BTRFS_FS_STATE_REMOUNTING,
134+
/* Filesystem in RO mode */
135+
BTRFS_FS_STATE_RO,
134136
/* Track if a transaction abort has been reported on this filesystem */
135137
BTRFS_FS_STATE_TRANS_ABORTED,
136138
/*
@@ -367,6 +369,12 @@ struct btrfs_path {
367369
unsigned int search_commit_root:1;
368370
unsigned int need_commit_sem:1;
369371
unsigned int skip_release_on_error:1;
372+
/*
373+
* Indicate that new item (btrfs_search_slot) is extending already
374+
* existing item and ins_len contains only the data size and not item
375+
* header (ie. sizeof(struct btrfs_item) is not included).
376+
*/
377+
unsigned int search_for_extension:1;
370378
};
371379
#define BTRFS_MAX_EXTENT_ITEM_SIZE(r) ((BTRFS_LEAF_DATA_SIZE(r->fs_info) >> 4) - \
372380
sizeof(struct btrfs_item))
@@ -2885,10 +2893,26 @@ static inline int btrfs_fs_closing(struct btrfs_fs_info *fs_info)
28852893
* If we remount the fs to be R/O or umount the fs, the cleaner needn't do
28862894
* anything except sleeping. This function is used to check the status of
28872895
* the fs.
2896+
* We check for BTRFS_FS_STATE_RO to avoid races with a concurrent remount,
2897+
* since setting and checking for SB_RDONLY in the superblock's flags is not
2898+
* atomic.
28882899
*/
28892900
static inline int btrfs_need_cleaner_sleep(struct btrfs_fs_info *fs_info)
28902901
{
2891-
return fs_info->sb->s_flags & SB_RDONLY || btrfs_fs_closing(fs_info);
2902+
return test_bit(BTRFS_FS_STATE_RO, &fs_info->fs_state) ||
2903+
btrfs_fs_closing(fs_info);
2904+
}
2905+
2906+
static inline void btrfs_set_sb_rdonly(struct super_block *sb)
2907+
{
2908+
sb->s_flags |= SB_RDONLY;
2909+
set_bit(BTRFS_FS_STATE_RO, &btrfs_sb(sb)->fs_state);
2910+
}
2911+
2912+
static inline void btrfs_clear_sb_rdonly(struct super_block *sb)
2913+
{
2914+
sb->s_flags &= ~SB_RDONLY;
2915+
clear_bit(BTRFS_FS_STATE_RO, &btrfs_sb(sb)->fs_state);
28922916
}
28932917

28942918
/* tree mod log functions from ctree.c */
@@ -3073,7 +3097,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
30733097
u32 min_type);
30743098

30753099
int btrfs_start_delalloc_snapshot(struct btrfs_root *root);
3076-
int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr);
3100+
int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr,
3101+
bool in_reclaim_context);
30773102
int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
30783103
unsigned int extra_bits,
30793104
struct extent_state **cached_state);

fs/btrfs/dev-replace.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -715,7 +715,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
715715
* flush all outstanding I/O and inode extent mappings before the
716716
* copy operation is declared as being finished
717717
*/
718-
ret = btrfs_start_delalloc_roots(fs_info, U64_MAX);
718+
ret = btrfs_start_delalloc_roots(fs_info, U64_MAX, false);
719719
if (ret) {
720720
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
721721
return ret;

fs/btrfs/discard.c

Lines changed: 36 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -199,16 +199,15 @@ static struct btrfs_block_group *find_next_block_group(
199199
static struct btrfs_block_group *peek_discard_list(
200200
struct btrfs_discard_ctl *discard_ctl,
201201
enum btrfs_discard_state *discard_state,
202-
int *discard_index)
202+
int *discard_index, u64 now)
203203
{
204204
struct btrfs_block_group *block_group;
205-
const u64 now = ktime_get_ns();
206205

207206
spin_lock(&discard_ctl->lock);
208207
again:
209208
block_group = find_next_block_group(discard_ctl, now);
210209

211-
if (block_group && now > block_group->discard_eligible_time) {
210+
if (block_group && now >= block_group->discard_eligible_time) {
212211
if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED &&
213212
block_group->used != 0) {
214213
if (btrfs_is_block_group_data_only(block_group))
@@ -222,12 +221,11 @@ static struct btrfs_block_group *peek_discard_list(
222221
block_group->discard_state = BTRFS_DISCARD_EXTENTS;
223222
}
224223
discard_ctl->block_group = block_group;
224+
}
225+
if (block_group) {
225226
*discard_state = block_group->discard_state;
226227
*discard_index = block_group->discard_index;
227-
} else {
228-
block_group = NULL;
229228
}
230-
231229
spin_unlock(&discard_ctl->lock);
232230

233231
return block_group;
@@ -330,28 +328,15 @@ void btrfs_discard_queue_work(struct btrfs_discard_ctl *discard_ctl,
330328
btrfs_discard_schedule_work(discard_ctl, false);
331329
}
332330

333-
/**
334-
* btrfs_discard_schedule_work - responsible for scheduling the discard work
335-
* @discard_ctl: discard control
336-
* @override: override the current timer
337-
*
338-
* Discards are issued by a delayed workqueue item. @override is used to
339-
* update the current delay as the baseline delay interval is reevaluated on
340-
* transaction commit. This is also maxed with any other rate limit.
341-
*/
342-
void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
343-
bool override)
331+
static void __btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
332+
u64 now, bool override)
344333
{
345334
struct btrfs_block_group *block_group;
346-
const u64 now = ktime_get_ns();
347-
348-
spin_lock(&discard_ctl->lock);
349335

350336
if (!btrfs_run_discard_work(discard_ctl))
351-
goto out;
352-
337+
return;
353338
if (!override && delayed_work_pending(&discard_ctl->work))
354-
goto out;
339+
return;
355340

356341
block_group = find_next_block_group(discard_ctl, now);
357342
if (block_group) {
@@ -393,7 +378,24 @@ void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
393378
mod_delayed_work(discard_ctl->discard_workers,
394379
&discard_ctl->work, nsecs_to_jiffies(delay));
395380
}
396-
out:
381+
}
382+
383+
/*
384+
* btrfs_discard_schedule_work - responsible for scheduling the discard work
385+
* @discard_ctl: discard control
386+
* @override: override the current timer
387+
*
388+
* Discards are issued by a delayed workqueue item. @override is used to
389+
* update the current delay as the baseline delay interval is reevaluated on
390+
* transaction commit. This is also maxed with any other rate limit.
391+
*/
392+
void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
393+
bool override)
394+
{
395+
const u64 now = ktime_get_ns();
396+
397+
spin_lock(&discard_ctl->lock);
398+
__btrfs_discard_schedule_work(discard_ctl, now, override);
397399
spin_unlock(&discard_ctl->lock);
398400
}
399401

@@ -438,13 +440,18 @@ static void btrfs_discard_workfn(struct work_struct *work)
438440
int discard_index = 0;
439441
u64 trimmed = 0;
440442
u64 minlen = 0;
443+
u64 now = ktime_get_ns();
441444

442445
discard_ctl = container_of(work, struct btrfs_discard_ctl, work.work);
443446

444447
block_group = peek_discard_list(discard_ctl, &discard_state,
445-
&discard_index);
448+
&discard_index, now);
446449
if (!block_group || !btrfs_run_discard_work(discard_ctl))
447450
return;
451+
if (now < block_group->discard_eligible_time) {
452+
btrfs_discard_schedule_work(discard_ctl, false);
453+
return;
454+
}
448455

449456
/* Perform discarding */
450457
minlen = discard_minlen[discard_index];
@@ -474,13 +481,6 @@ static void btrfs_discard_workfn(struct work_struct *work)
474481
discard_ctl->discard_extent_bytes += trimmed;
475482
}
476483

477-
/*
478-
* Updated without locks as this is inside the workfn and nothing else
479-
* is reading the values
480-
*/
481-
discard_ctl->prev_discard = trimmed;
482-
discard_ctl->prev_discard_time = ktime_get_ns();
483-
484484
/* Determine next steps for a block_group */
485485
if (block_group->discard_cursor >= btrfs_block_group_end(block_group)) {
486486
if (discard_state == BTRFS_DISCARD_BITMAPS) {
@@ -496,11 +496,13 @@ static void btrfs_discard_workfn(struct work_struct *work)
496496
}
497497
}
498498

499+
now = ktime_get_ns();
499500
spin_lock(&discard_ctl->lock);
501+
discard_ctl->prev_discard = trimmed;
502+
discard_ctl->prev_discard_time = now;
500503
discard_ctl->block_group = NULL;
504+
__btrfs_discard_schedule_work(discard_ctl, now, false);
501505
spin_unlock(&discard_ctl->lock);
502-
503-
btrfs_discard_schedule_work(discard_ctl, false);
504506
}
505507

506508
/**

fs/btrfs/disk-io.c

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1729,7 +1729,7 @@ static int cleaner_kthread(void *arg)
17291729
*/
17301730
btrfs_delete_unused_bgs(fs_info);
17311731
sleep:
1732-
clear_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags);
1732+
clear_and_wake_up_bit(BTRFS_FS_CLEANER_RUNNING, &fs_info->flags);
17331733
if (kthread_should_park())
17341734
kthread_parkme();
17351735
if (kthread_should_stop())
@@ -2830,6 +2830,9 @@ static int init_mount_fs_info(struct btrfs_fs_info *fs_info, struct super_block
28302830
return -ENOMEM;
28312831
btrfs_init_delayed_root(fs_info->delayed_root);
28322832

2833+
if (sb_rdonly(sb))
2834+
set_bit(BTRFS_FS_STATE_RO, &fs_info->fs_state);
2835+
28332836
return btrfs_alloc_stripe_hash_table(fs_info);
28342837
}
28352838

@@ -2969,6 +2972,7 @@ int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info)
29692972
}
29702973
}
29712974

2975+
ret = btrfs_find_orphan_roots(fs_info);
29722976
out:
29732977
return ret;
29742978
}
@@ -3383,10 +3387,6 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
33833387
}
33843388
}
33853389

3386-
ret = btrfs_find_orphan_roots(fs_info);
3387-
if (ret)
3388-
goto fail_qgroup;
3389-
33903390
fs_info->fs_root = btrfs_get_fs_root(fs_info, BTRFS_FS_TREE_OBJECTID, true);
33913391
if (IS_ERR(fs_info->fs_root)) {
33923392
err = PTR_ERR(fs_info->fs_root);
@@ -4181,6 +4181,9 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
41814181
invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
41824182
btrfs_stop_all_workers(fs_info);
41834183

4184+
/* We shouldn't have any transaction open at this point */
4185+
ASSERT(list_empty(&fs_info->trans_list));
4186+
41844187
clear_bit(BTRFS_FS_OPEN, &fs_info->flags);
41854188
free_root_pointers(fs_info, true);
41864189
btrfs_free_fs_roots(fs_info);

fs/btrfs/extent-tree.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -844,6 +844,7 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
844844
want = extent_ref_type(parent, owner);
845845
if (insert) {
846846
extra_size = btrfs_extent_inline_ref_size(want);
847+
path->search_for_extension = 1;
847848
path->keep_locks = 1;
848849
} else
849850
extra_size = -1;
@@ -996,6 +997,7 @@ int lookup_inline_extent_backref(struct btrfs_trans_handle *trans,
996997
out:
997998
if (insert) {
998999
path->keep_locks = 0;
1000+
path->search_for_extension = 0;
9991001
btrfs_unlock_up_safe(path, 1);
10001002
}
10011003
return err;

fs/btrfs/file-item.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1016,8 +1016,10 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans,
10161016
}
10171017

10181018
btrfs_release_path(path);
1019+
path->search_for_extension = 1;
10191020
ret = btrfs_search_slot(trans, root, &file_key, path,
10201021
csum_size, 1);
1022+
path->search_for_extension = 0;
10211023
if (ret < 0)
10221024
goto out;
10231025

fs/btrfs/inode.c

Lines changed: 11 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9390,7 +9390,8 @@ static struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode
93909390
* some fairly slow code that needs optimization. This walks the list
93919391
* of all the inodes with pending delalloc and forces them to disk.
93929392
*/
9393-
static int start_delalloc_inodes(struct btrfs_root *root, u64 *nr, bool snapshot)
9393+
static int start_delalloc_inodes(struct btrfs_root *root, u64 *nr, bool snapshot,
9394+
bool in_reclaim_context)
93949395
{
93959396
struct btrfs_inode *binode;
93969397
struct inode *inode;
@@ -9411,6 +9412,11 @@ static int start_delalloc_inodes(struct btrfs_root *root, u64 *nr, bool snapshot
94119412

94129413
list_move_tail(&binode->delalloc_inodes,
94139414
&root->delalloc_inodes);
9415+
9416+
if (in_reclaim_context &&
9417+
test_bit(BTRFS_INODE_NO_DELALLOC_FLUSH, &binode->runtime_flags))
9418+
continue;
9419+
94149420
inode = igrab(&binode->vfs_inode);
94159421
if (!inode) {
94169422
cond_resched_lock(&root->delalloc_lock);
@@ -9464,10 +9470,11 @@ int btrfs_start_delalloc_snapshot(struct btrfs_root *root)
94649470
if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
94659471
return -EROFS;
94669472

9467-
return start_delalloc_inodes(root, &nr, true);
9473+
return start_delalloc_inodes(root, &nr, true, false);
94689474
}
94699475

9470-
int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr)
9476+
int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr,
9477+
bool in_reclaim_context)
94719478
{
94729479
struct btrfs_root *root;
94739480
struct list_head splice;
@@ -9490,7 +9497,7 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, u64 nr)
94909497
&fs_info->delalloc_roots);
94919498
spin_unlock(&fs_info->delalloc_root_lock);
94929499

9493-
ret = start_delalloc_inodes(root, &nr, false);
9500+
ret = start_delalloc_inodes(root, &nr, false, in_reclaim_context);
94949501
btrfs_put_root(root);
94959502
if (ret < 0)
94969503
goto out;

fs/btrfs/ioctl.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4951,7 +4951,7 @@ long btrfs_ioctl(struct file *file, unsigned int
49514951
case BTRFS_IOC_SYNC: {
49524952
int ret;
49534953

4954-
ret = btrfs_start_delalloc_roots(fs_info, U64_MAX);
4954+
ret = btrfs_start_delalloc_roots(fs_info, U64_MAX, false);
49554955
if (ret)
49564956
return ret;
49574957
ret = btrfs_sync_fs(inode->i_sb, 1);

0 commit comments

Comments
 (0)