Skip to content

Commit 5ca7fe2

Browse files
committed
Merge tag 'for-6.16-rc3-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs fixes from David Sterba: "Fixes: - fix invalid inode pointer dereferences during log replay - fix a race between renames and directory logging - fix shutting down delayed iput worker - fix device byte accounting when dropping chunk - in zoned mode, fix offset calculations for DUP profile when conventional and sequential zones are used together Regression fixes: - fix possible double unlock of extent buffer tree (xarray conversion) - in zoned mode, fix extent buffer refcount when writing out extents (xarray conversion) Error handling fixes and updates: - handle unexpected extent type when replaying log - check and warn if there are remaining delayed inodes when putting a root - fix assertion when building free space tree - handle csum tree error with mount option 'rescue=ibadroot' Other: - error message updates: add prefix to all scrub related messages, include other information in messages" * tag 'for-6.16-rc3-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: btrfs: zoned: fix alloc_offset calculation for partly conventional block groups btrfs: handle csum tree error with rescue=ibadroots correctly btrfs: fix race between async reclaim worker and close_ctree() btrfs: fix assertion when building free space tree btrfs: don't silently ignore unexpected extent type when replaying log btrfs: fix invalid inode pointer dereferences during log replay btrfs: fix double unlock of buffer_tree xarray when releasing subpage eb btrfs: update superblock's device bytes_used when dropping chunk btrfs: fix a race between renames and directory logging btrfs: scrub: add prefix for the error messages btrfs: warn if leaking delayed_nodes in btrfs_put_root() btrfs: fix delayed ref refcount leak in debug assertion btrfs: include root in error message when unlinking inode btrfs: don't drop a reference if btrfs_check_write_meta_pointer() fails
2 parents c069445 + c0d90a7 commit 5ca7fe2

File tree

10 files changed

+219
-82
lines changed

10 files changed

+219
-82
lines changed

fs/btrfs/delayed-inode.c

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1377,7 +1377,10 @@ static int btrfs_wq_run_delayed_node(struct btrfs_delayed_root *delayed_root,
13771377

13781378
void btrfs_assert_delayed_root_empty(struct btrfs_fs_info *fs_info)
13791379
{
1380-
WARN_ON(btrfs_first_delayed_node(fs_info->delayed_root));
1380+
struct btrfs_delayed_node *node = btrfs_first_delayed_node(fs_info->delayed_root);
1381+
1382+
if (WARN_ON(node))
1383+
refcount_dec(&node->refs);
13811384
}
13821385

13831386
static bool could_end_wait(struct btrfs_delayed_root *delayed_root, int seq)

fs/btrfs/disk-io.c

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1835,6 +1835,8 @@ void btrfs_put_root(struct btrfs_root *root)
18351835
if (refcount_dec_and_test(&root->refs)) {
18361836
if (WARN_ON(!xa_empty(&root->inodes)))
18371837
xa_destroy(&root->inodes);
1838+
if (WARN_ON(!xa_empty(&root->delayed_nodes)))
1839+
xa_destroy(&root->delayed_nodes);
18381840
WARN_ON(test_bit(BTRFS_ROOT_DEAD_RELOC_TREE, &root->state));
18391841
if (root->anon_dev)
18401842
free_anon_bdev(root->anon_dev);
@@ -2156,8 +2158,7 @@ static int load_global_roots_objectid(struct btrfs_root *tree_root,
21562158
found = true;
21572159
root = read_tree_root_path(tree_root, path, &key);
21582160
if (IS_ERR(root)) {
2159-
if (!btrfs_test_opt(fs_info, IGNOREBADROOTS))
2160-
ret = PTR_ERR(root);
2161+
ret = PTR_ERR(root);
21612162
break;
21622163
}
21632164
set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
@@ -4310,8 +4311,8 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
43104311
*
43114312
* So wait for all ongoing ordered extents to complete and then run
43124313
* delayed iputs. This works because once we reach this point no one
4313-
* can either create new ordered extents nor create delayed iputs
4314-
* through some other means.
4314+
* can create new ordered extents, but delayed iputs can still be added
4315+
* by a reclaim worker (see comments further below).
43154316
*
43164317
* Also note that btrfs_wait_ordered_roots() is not safe here, because
43174318
* it waits for BTRFS_ORDERED_COMPLETE to be set on an ordered extent,
@@ -4322,15 +4323,29 @@ void __cold close_ctree(struct btrfs_fs_info *fs_info)
43224323
btrfs_flush_workqueue(fs_info->endio_write_workers);
43234324
/* Ordered extents for free space inodes. */
43244325
btrfs_flush_workqueue(fs_info->endio_freespace_worker);
4326+
/*
4327+
* Run delayed iputs in case an async reclaim worker is waiting for them
4328+
* to be run as mentioned above.
4329+
*/
43254330
btrfs_run_delayed_iputs(fs_info);
4326-
/* There should be no more workload to generate new delayed iputs. */
4327-
set_bit(BTRFS_FS_STATE_NO_DELAYED_IPUT, &fs_info->fs_state);
43284331

43294332
cancel_work_sync(&fs_info->async_reclaim_work);
43304333
cancel_work_sync(&fs_info->async_data_reclaim_work);
43314334
cancel_work_sync(&fs_info->preempt_reclaim_work);
43324335
cancel_work_sync(&fs_info->em_shrinker_work);
43334336

4337+
/*
4338+
* Run delayed iputs again because an async reclaim worker may have
4339+
* added new ones if it was flushing delalloc:
4340+
*
4341+
* shrink_delalloc() -> btrfs_start_delalloc_roots() ->
4342+
* start_delalloc_inodes() -> btrfs_add_delayed_iput()
4343+
*/
4344+
btrfs_run_delayed_iputs(fs_info);
4345+
4346+
/* There should be no more workload to generate new delayed iputs. */
4347+
set_bit(BTRFS_FS_STATE_NO_DELAYED_IPUT, &fs_info->fs_state);
4348+
43344349
/* Cancel or finish ongoing discard work */
43354350
btrfs_discard_cleanup(fs_info);
43364351

fs/btrfs/extent_io.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4312,7 +4312,6 @@ static int try_release_subpage_extent_buffer(struct folio *folio)
43124312
spin_unlock(&eb->refs_lock);
43134313
continue;
43144314
}
4315-
xa_unlock_irq(&fs_info->buffer_tree);
43164315

43174316
/*
43184317
* If tree ref isn't set then we know the ref on this eb is a
@@ -4329,6 +4328,7 @@ static int try_release_subpage_extent_buffer(struct folio *folio)
43294328
* check the folio private at the end. And
43304329
* release_extent_buffer() will release the refs_lock.
43314330
*/
4331+
xa_unlock_irq(&fs_info->buffer_tree);
43324332
release_extent_buffer(eb);
43334333
xa_lock_irq(&fs_info->buffer_tree);
43344334
}

fs/btrfs/free-space-tree.c

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1115,11 +1115,21 @@ static int populate_free_space_tree(struct btrfs_trans_handle *trans,
11151115
ret = btrfs_search_slot_for_read(extent_root, &key, path, 1, 0);
11161116
if (ret < 0)
11171117
goto out_locked;
1118-
ASSERT(ret == 0);
1118+
/*
1119+
* If ret is 1 (no key found), it means this is an empty block group,
1120+
* without any extents allocated from it and there's no block group
1121+
* item (key BTRFS_BLOCK_GROUP_ITEM_KEY) located in the extent tree
1122+
* because we are using the block group tree feature, so block group
1123+
* items are stored in the block group tree. It also means there are no
1124+
* extents allocated for block groups with a start offset beyond this
1125+
* block group's end offset (this is the last, highest, block group).
1126+
*/
1127+
if (!btrfs_fs_compat_ro(trans->fs_info, BLOCK_GROUP_TREE))
1128+
ASSERT(ret == 0);
11191129

11201130
start = block_group->start;
11211131
end = block_group->start + block_group->length;
1122-
while (1) {
1132+
while (ret == 0) {
11231133
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
11241134

11251135
if (key.type == BTRFS_EXTENT_ITEM_KEY ||
@@ -1149,8 +1159,6 @@ static int populate_free_space_tree(struct btrfs_trans_handle *trans,
11491159
ret = btrfs_next_item(extent_root, path);
11501160
if (ret < 0)
11511161
goto out_locked;
1152-
if (ret)
1153-
break;
11541162
}
11551163
if (start < end) {
11561164
ret = __add_to_free_space_tree(trans, block_group, path2,

fs/btrfs/inode.c

Lines changed: 67 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4250,9 +4250,9 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
42504250

42514251
ret = btrfs_del_inode_ref(trans, root, name, ino, dir_ino, &index);
42524252
if (ret) {
4253-
btrfs_info(fs_info,
4254-
"failed to delete reference to %.*s, inode %llu parent %llu",
4255-
name->len, name->name, ino, dir_ino);
4253+
btrfs_crit(fs_info,
4254+
"failed to delete reference to %.*s, root %llu inode %llu parent %llu",
4255+
name->len, name->name, btrfs_root_id(root), ino, dir_ino);
42564256
btrfs_abort_transaction(trans, ret);
42574257
goto err;
42584258
}
@@ -8059,6 +8059,7 @@ static int btrfs_rename_exchange(struct inode *old_dir,
80598059
int ret;
80608060
int ret2;
80618061
bool need_abort = false;
8062+
bool logs_pinned = false;
80628063
struct fscrypt_name old_fname, new_fname;
80638064
struct fscrypt_str *old_name, *new_name;
80648065

@@ -8182,6 +8183,31 @@ static int btrfs_rename_exchange(struct inode *old_dir,
81828183
inode_inc_iversion(new_inode);
81838184
simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
81848185

8186+
if (old_ino != BTRFS_FIRST_FREE_OBJECTID &&
8187+
new_ino != BTRFS_FIRST_FREE_OBJECTID) {
8188+
/*
8189+
* If we are renaming in the same directory (and it's not for
8190+
* root entries) pin the log early to prevent any concurrent
8191+
* task from logging the directory after we removed the old
8192+
* entries and before we add the new entries, otherwise that
8193+
* task can sync a log without any entry for the inodes we are
8194+
* renaming and therefore replaying that log, if a power failure
8195+
* happens after syncing the log, would result in deleting the
8196+
* inodes.
8197+
*
8198+
* If the rename affects two different directories, we want to
8199+
* make sure the that there's no log commit that contains
8200+
* updates for only one of the directories but not for the
8201+
* other.
8202+
*
8203+
* If we are renaming an entry for a root, we don't care about
8204+
* log updates since we called btrfs_set_log_full_commit().
8205+
*/
8206+
btrfs_pin_log_trans(root);
8207+
btrfs_pin_log_trans(dest);
8208+
logs_pinned = true;
8209+
}
8210+
81858211
if (old_dentry->d_parent != new_dentry->d_parent) {
81868212
btrfs_record_unlink_dir(trans, BTRFS_I(old_dir),
81878213
BTRFS_I(old_inode), true);
@@ -8253,30 +8279,23 @@ static int btrfs_rename_exchange(struct inode *old_dir,
82538279
BTRFS_I(new_inode)->dir_index = new_idx;
82548280

82558281
/*
8256-
* Now pin the logs of the roots. We do it to ensure that no other task
8257-
* can sync the logs while we are in progress with the rename, because
8258-
* that could result in an inconsistency in case any of the inodes that
8259-
* are part of this rename operation were logged before.
8282+
* Do the log updates for all inodes.
8283+
*
8284+
* If either entry is for a root we don't need to update the logs since
8285+
* we've called btrfs_set_log_full_commit() before.
82608286
*/
8261-
if (old_ino != BTRFS_FIRST_FREE_OBJECTID)
8262-
btrfs_pin_log_trans(root);
8263-
if (new_ino != BTRFS_FIRST_FREE_OBJECTID)
8264-
btrfs_pin_log_trans(dest);
8265-
8266-
/* Do the log updates for all inodes. */
8267-
if (old_ino != BTRFS_FIRST_FREE_OBJECTID)
8287+
if (logs_pinned) {
82688288
btrfs_log_new_name(trans, old_dentry, BTRFS_I(old_dir),
82698289
old_rename_ctx.index, new_dentry->d_parent);
8270-
if (new_ino != BTRFS_FIRST_FREE_OBJECTID)
82718290
btrfs_log_new_name(trans, new_dentry, BTRFS_I(new_dir),
82728291
new_rename_ctx.index, old_dentry->d_parent);
8292+
}
82738293

8274-
/* Now unpin the logs. */
8275-
if (old_ino != BTRFS_FIRST_FREE_OBJECTID)
8294+
out_fail:
8295+
if (logs_pinned) {
82768296
btrfs_end_log_trans(root);
8277-
if (new_ino != BTRFS_FIRST_FREE_OBJECTID)
82788297
btrfs_end_log_trans(dest);
8279-
out_fail:
8298+
}
82808299
ret2 = btrfs_end_transaction(trans);
82818300
ret = ret ? ret : ret2;
82828301
out_notrans:
@@ -8326,6 +8345,7 @@ static int btrfs_rename(struct mnt_idmap *idmap,
83268345
int ret2;
83278346
u64 old_ino = btrfs_ino(BTRFS_I(old_inode));
83288347
struct fscrypt_name old_fname, new_fname;
8348+
bool logs_pinned = false;
83298349

83308350
if (btrfs_ino(BTRFS_I(new_dir)) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)
83318351
return -EPERM;
@@ -8460,6 +8480,29 @@ static int btrfs_rename(struct mnt_idmap *idmap,
84608480
inode_inc_iversion(old_inode);
84618481
simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
84628482

8483+
if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {
8484+
/*
8485+
* If we are renaming in the same directory (and it's not a
8486+
* root entry) pin the log to prevent any concurrent task from
8487+
* logging the directory after we removed the old entry and
8488+
* before we add the new entry, otherwise that task can sync
8489+
* a log without any entry for the inode we are renaming and
8490+
* therefore replaying that log, if a power failure happens
8491+
* after syncing the log, would result in deleting the inode.
8492+
*
8493+
* If the rename affects two different directories, we want to
8494+
* make sure the that there's no log commit that contains
8495+
* updates for only one of the directories but not for the
8496+
* other.
8497+
*
8498+
* If we are renaming an entry for a root, we don't care about
8499+
* log updates since we called btrfs_set_log_full_commit().
8500+
*/
8501+
btrfs_pin_log_trans(root);
8502+
btrfs_pin_log_trans(dest);
8503+
logs_pinned = true;
8504+
}
8505+
84638506
if (old_dentry->d_parent != new_dentry->d_parent)
84648507
btrfs_record_unlink_dir(trans, BTRFS_I(old_dir),
84658508
BTRFS_I(old_inode), true);
@@ -8524,7 +8567,7 @@ static int btrfs_rename(struct mnt_idmap *idmap,
85248567
if (old_inode->i_nlink == 1)
85258568
BTRFS_I(old_inode)->dir_index = index;
85268569

8527-
if (old_ino != BTRFS_FIRST_FREE_OBJECTID)
8570+
if (logs_pinned)
85288571
btrfs_log_new_name(trans, old_dentry, BTRFS_I(old_dir),
85298572
rename_ctx.index, new_dentry->d_parent);
85308573

@@ -8540,6 +8583,10 @@ static int btrfs_rename(struct mnt_idmap *idmap,
85408583
}
85418584
}
85428585
out_fail:
8586+
if (logs_pinned) {
8587+
btrfs_end_log_trans(root);
8588+
btrfs_end_log_trans(dest);
8589+
}
85438590
ret2 = btrfs_end_transaction(trans);
85448591
ret = ret ? ret : ret2;
85458592
out_notrans:

fs/btrfs/ioctl.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3139,7 +3139,7 @@ static long btrfs_ioctl_scrub(struct file *file, void __user *arg)
31393139
return -EPERM;
31403140

31413141
if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) {
3142-
btrfs_err(fs_info, "scrub is not supported on extent tree v2 yet");
3142+
btrfs_err(fs_info, "scrub: extent tree v2 not yet supported");
31433143
return -EINVAL;
31443144
}
31453145

0 commit comments

Comments
 (0)