Skip to content

Commit 4c06e63

Browse files
committed
Merge tag 'for-6.16-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs fixes from David Sterba: - tree-log fixes: - fixes of log tracking of directories and subvolumes - fix iteration and error handling of inode references during log replay - fix free space tree rebuild (reported by syzbot) * tag 'for-6.16-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: btrfs: use btrfs_record_snapshot_destroy() during rmdir btrfs: propagate last_unlink_trans earlier when doing a rmdir btrfs: record new subvolume in parent dir earlier to avoid dir logging races btrfs: fix inode lookup error handling during log replay btrfs: fix iteration of extrefs during log replay btrfs: fix missing error handling when searching for inode refs during log replay btrfs: fix failure to rebuild free space tree using multiple transactions
2 parents 025c197 + 157501b commit 4c06e63

File tree

5 files changed

+131
-88
lines changed

5 files changed

+131
-88
lines changed

fs/btrfs/block-group.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,8 @@ enum btrfs_block_group_flags {
8383
BLOCK_GROUP_FLAG_ZONED_DATA_RELOC,
8484
/* Does the block group need to be added to the free space tree? */
8585
BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE,
86+
/* Set after we add a new block group to the free space tree. */
87+
BLOCK_GROUP_FLAG_FREE_SPACE_ADDED,
8688
/* Indicate that the block group is placed on a sequential zone */
8789
BLOCK_GROUP_FLAG_SEQUENTIAL_ZONE,
8890
/*

fs/btrfs/free-space-tree.c

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1241,6 +1241,7 @@ static int clear_free_space_tree(struct btrfs_trans_handle *trans,
12411241
{
12421242
BTRFS_PATH_AUTO_FREE(path);
12431243
struct btrfs_key key;
1244+
struct rb_node *node;
12441245
int nr;
12451246
int ret;
12461247

@@ -1269,6 +1270,16 @@ static int clear_free_space_tree(struct btrfs_trans_handle *trans,
12691270
btrfs_release_path(path);
12701271
}
12711272

1273+
node = rb_first_cached(&trans->fs_info->block_group_cache_tree);
1274+
while (node) {
1275+
struct btrfs_block_group *bg;
1276+
1277+
bg = rb_entry(node, struct btrfs_block_group, cache_node);
1278+
clear_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &bg->runtime_flags);
1279+
node = rb_next(node);
1280+
cond_resched();
1281+
}
1282+
12721283
return 0;
12731284
}
12741285

@@ -1358,12 +1369,18 @@ int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info)
13581369

13591370
block_group = rb_entry(node, struct btrfs_block_group,
13601371
cache_node);
1372+
1373+
if (test_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED,
1374+
&block_group->runtime_flags))
1375+
goto next;
1376+
13611377
ret = populate_free_space_tree(trans, block_group);
13621378
if (ret) {
13631379
btrfs_abort_transaction(trans, ret);
13641380
btrfs_end_transaction(trans);
13651381
return ret;
13661382
}
1383+
next:
13671384
if (btrfs_should_end_transaction(trans)) {
13681385
btrfs_end_transaction(trans);
13691386
trans = btrfs_start_transaction(free_space_root, 1);
@@ -1390,6 +1407,29 @@ static int __add_block_group_free_space(struct btrfs_trans_handle *trans,
13901407

13911408
clear_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags);
13921409

1410+
/*
1411+
* While rebuilding the free space tree we may allocate new metadata
1412+
* block groups while modifying the free space tree.
1413+
*
1414+
* Because during the rebuild (at btrfs_rebuild_free_space_tree()) we
1415+
* can use multiple transactions, every time btrfs_end_transaction() is
1416+
* called at btrfs_rebuild_free_space_tree() we finish the creation of
1417+
* new block groups by calling btrfs_create_pending_block_groups(), and
1418+
* that in turn calls us, through add_block_group_free_space(), to add
1419+
* a free space info item and a free space extent item for the block
1420+
* group.
1421+
*
1422+
* Then later btrfs_rebuild_free_space_tree() may find such new block
1423+
* groups and processes them with populate_free_space_tree(), which can
1424+
* fail with EEXIST since there are already items for the block group in
1425+
* the free space tree. Notice that we say "may find" because a new
1426+
* block group may be added to the block groups rbtree in a node before
1427+
* or after the block group currently being processed by the rebuild
1428+
* process. So signal the rebuild process to skip such new block groups
1429+
* if it finds them.
1430+
*/
1431+
set_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &block_group->runtime_flags);
1432+
13931433
ret = add_new_free_space_info(trans, block_group, path);
13941434
if (ret)
13951435
return ret;

fs/btrfs/inode.c

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4710,7 +4710,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
47104710
struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
47114711
int ret = 0;
47124712
struct btrfs_trans_handle *trans;
4713-
u64 last_unlink_trans;
47144713
struct fscrypt_name fname;
47154714

47164715
if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
@@ -4736,6 +4735,23 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
47364735
goto out_notrans;
47374736
}
47384737

4738+
/*
4739+
* Propagate the last_unlink_trans value of the deleted dir to its
4740+
* parent directory. This is to prevent an unrecoverable log tree in the
4741+
* case we do something like this:
4742+
* 1) create dir foo
4743+
* 2) create snapshot under dir foo
4744+
* 3) delete the snapshot
4745+
* 4) rmdir foo
4746+
* 5) mkdir foo
4747+
* 6) fsync foo or some file inside foo
4748+
*
4749+
* This is because we can't unlink other roots when replaying the dir
4750+
* deletes for directory foo.
4751+
*/
4752+
if (BTRFS_I(inode)->last_unlink_trans >= trans->transid)
4753+
btrfs_record_snapshot_destroy(trans, BTRFS_I(dir));
4754+
47394755
if (unlikely(btrfs_ino(BTRFS_I(inode)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
47404756
ret = btrfs_unlink_subvol(trans, BTRFS_I(dir), dentry);
47414757
goto out;
@@ -4745,27 +4761,11 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
47454761
if (ret)
47464762
goto out;
47474763

4748-
last_unlink_trans = BTRFS_I(inode)->last_unlink_trans;
4749-
47504764
/* now the directory is empty */
47514765
ret = btrfs_unlink_inode(trans, BTRFS_I(dir), BTRFS_I(d_inode(dentry)),
47524766
&fname.disk_name);
4753-
if (!ret) {
4767+
if (!ret)
47544768
btrfs_i_size_write(BTRFS_I(inode), 0);
4755-
/*
4756-
* Propagate the last_unlink_trans value of the deleted dir to
4757-
* its parent directory. This is to prevent an unrecoverable
4758-
* log tree in the case we do something like this:
4759-
* 1) create dir foo
4760-
* 2) create snapshot under dir foo
4761-
* 3) delete the snapshot
4762-
* 4) rmdir foo
4763-
* 5) mkdir foo
4764-
* 6) fsync foo or some file inside foo
4765-
*/
4766-
if (last_unlink_trans >= trans->transid)
4767-
BTRFS_I(dir)->last_unlink_trans = last_unlink_trans;
4768-
}
47694769
out:
47704770
btrfs_end_transaction(trans);
47714771
out_notrans:

fs/btrfs/ioctl.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -666,14 +666,14 @@ static noinline int create_subvol(struct mnt_idmap *idmap,
666666
goto out;
667667
}
668668

669+
btrfs_record_new_subvolume(trans, BTRFS_I(dir));
670+
669671
ret = btrfs_create_new_inode(trans, &new_inode_args);
670672
if (ret) {
671673
btrfs_abort_transaction(trans, ret);
672674
goto out;
673675
}
674676

675-
btrfs_record_new_subvolume(trans, BTRFS_I(dir));
676-
677677
d_instantiate_new(dentry, new_inode_args.inode);
678678
new_inode_args.inode = NULL;
679679

0 commit comments

Comments
 (0)