Skip to content

Commit e3c94a5

Browse files
committed
Merge tag 'for-6.17-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs fixes from David Sterba: - fix a few races related to inode link count - fix inode leak on failure to add link to inode - move transaction aborts closer to where they happen * tag 'for-6.17-rc4-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: btrfs: avoid load/store tearing races when checking if an inode was logged btrfs: fix race between setting last_dir_index_offset and inode logging btrfs: fix race between logging inode and checking if it was logged before btrfs: simplify error handling logic for btrfs_link() btrfs: fix inode leak on failure to add link to inode btrfs: abort transaction on failure to add link to inode
2 parents b320789 + 986bf6e commit e3c94a5

File tree

3 files changed

+76
-48
lines changed

3 files changed

+76
-48
lines changed

fs/btrfs/btrfs_inode.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,7 @@ struct btrfs_inode {
248248
u64 new_delalloc_bytes;
249249
/*
250250
* The offset of the last dir index key that was logged.
251-
* This is used only for directories.
251+
* This is used only for directories. Protected by 'log_mutex'.
252252
*/
253253
u64 last_dir_index_offset;
254254
};

fs/btrfs/inode.c

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -6805,7 +6805,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
68056805
struct fscrypt_name fname;
68066806
u64 index;
68076807
int ret;
6808-
int drop_inode = 0;
68096808

68106809
/* do not allow sys_link's with other subvols of the same device */
68116810
if (btrfs_root_id(root) != btrfs_root_id(BTRFS_I(inode)->root))
@@ -6837,44 +6836,44 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
68376836

68386837
/* There are several dir indexes for this inode, clear the cache. */
68396838
BTRFS_I(inode)->dir_index = 0ULL;
6840-
inc_nlink(inode);
68416839
inode_inc_iversion(inode);
68426840
inode_set_ctime_current(inode);
6843-
ihold(inode);
68446841
set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags);
68456842

68466843
ret = btrfs_add_link(trans, BTRFS_I(dir), BTRFS_I(inode),
68476844
&fname.disk_name, 1, index);
6845+
if (ret)
6846+
goto fail;
68486847

6848+
/* Link added now we update the inode item with the new link count. */
6849+
inc_nlink(inode);
6850+
ret = btrfs_update_inode(trans, BTRFS_I(inode));
68496851
if (ret) {
6850-
drop_inode = 1;
6851-
} else {
6852-
struct dentry *parent = dentry->d_parent;
6852+
btrfs_abort_transaction(trans, ret);
6853+
goto fail;
6854+
}
68536855

6854-
ret = btrfs_update_inode(trans, BTRFS_I(inode));
6855-
if (ret)
6856+
if (inode->i_nlink == 1) {
6857+
/*
6858+
* If the new hard link count is 1, it's a file created with the
6859+
* open(2) O_TMPFILE flag.
6860+
*/
6861+
ret = btrfs_orphan_del(trans, BTRFS_I(inode));
6862+
if (ret) {
6863+
btrfs_abort_transaction(trans, ret);
68566864
goto fail;
6857-
if (inode->i_nlink == 1) {
6858-
/*
6859-
* If new hard link count is 1, it's a file created
6860-
* with open(2) O_TMPFILE flag.
6861-
*/
6862-
ret = btrfs_orphan_del(trans, BTRFS_I(inode));
6863-
if (ret)
6864-
goto fail;
68656865
}
6866-
d_instantiate(dentry, inode);
6867-
btrfs_log_new_name(trans, old_dentry, NULL, 0, parent);
68686866
}
68696867

6868+
/* Grab reference for the new dentry passed to d_instantiate(). */
6869+
ihold(inode);
6870+
d_instantiate(dentry, inode);
6871+
btrfs_log_new_name(trans, old_dentry, NULL, 0, dentry->d_parent);
6872+
68706873
fail:
68716874
fscrypt_free_filename(&fname);
68726875
if (trans)
68736876
btrfs_end_transaction(trans);
6874-
if (drop_inode) {
6875-
inode_dec_link_count(inode);
6876-
iput(inode);
6877-
}
68786877
btrfs_btree_balance_dirty(fs_info);
68796878
return ret;
68806879
}
@@ -7830,6 +7829,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
78307829
ei->last_sub_trans = 0;
78317830
ei->logged_trans = 0;
78327831
ei->delalloc_bytes = 0;
7832+
/* new_delalloc_bytes and last_dir_index_offset are in a union. */
78337833
ei->new_delalloc_bytes = 0;
78347834
ei->defrag_bytes = 0;
78357835
ei->disk_i_size = 0;

fs/btrfs/tree-log.c

Lines changed: 53 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -3340,6 +3340,31 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
33403340
return 0;
33413341
}
33423342

3343+
static bool mark_inode_as_not_logged(const struct btrfs_trans_handle *trans,
3344+
struct btrfs_inode *inode)
3345+
{
3346+
bool ret = false;
3347+
3348+
/*
3349+
* Do this only if ->logged_trans is still 0 to prevent races with
3350+
* concurrent logging as we may see the inode not logged when
3351+
* inode_logged() is called but it gets logged after inode_logged() did
3352+
* not find it in the log tree and we end up setting ->logged_trans to a
3353+
* value less than trans->transid after the concurrent logging task has
3354+
* set it to trans->transid. As a consequence, subsequent rename, unlink
3355+
* and link operations may end up not logging new names and removing old
3356+
* names from the log.
3357+
*/
3358+
spin_lock(&inode->lock);
3359+
if (inode->logged_trans == 0)
3360+
inode->logged_trans = trans->transid - 1;
3361+
else if (inode->logged_trans == trans->transid)
3362+
ret = true;
3363+
spin_unlock(&inode->lock);
3364+
3365+
return ret;
3366+
}
3367+
33433368
/*
33443369
* Check if an inode was logged in the current transaction. This correctly deals
33453370
* with the case where the inode was logged but has a logged_trans of 0, which
@@ -3357,15 +3382,32 @@ static int inode_logged(const struct btrfs_trans_handle *trans,
33573382
struct btrfs_key key;
33583383
int ret;
33593384

3360-
if (inode->logged_trans == trans->transid)
3385+
/*
3386+
* Quick lockless call, since once ->logged_trans is set to the current
3387+
* transaction, we never set it to a lower value anywhere else.
3388+
*/
3389+
if (data_race(inode->logged_trans) == trans->transid)
33613390
return 1;
33623391

33633392
/*
3364-
* If logged_trans is not 0, then we know the inode logged was not logged
3365-
* in this transaction, so we can return false right away.
3393+
* If logged_trans is not 0 and not trans->transid, then we know the
3394+
* inode was not logged in this transaction, so we can return false
3395+
* right away. We take the lock to avoid a race caused by load/store
3396+
* tearing with a concurrent btrfs_log_inode() call or a concurrent task
3397+
* in this function further below - an update to trans->transid can be
3398+
* teared into two 32 bits updates for example, in which case we could
3399+
* see a positive value that is not trans->transid and assume the inode
3400+
* was not logged when it was.
33663401
*/
3367-
if (inode->logged_trans > 0)
3402+
spin_lock(&inode->lock);
3403+
if (inode->logged_trans == trans->transid) {
3404+
spin_unlock(&inode->lock);
3405+
return 1;
3406+
} else if (inode->logged_trans > 0) {
3407+
spin_unlock(&inode->lock);
33683408
return 0;
3409+
}
3410+
spin_unlock(&inode->lock);
33693411

33703412
/*
33713413
* If no log tree was created for this root in this transaction, then
@@ -3374,10 +3416,8 @@ static int inode_logged(const struct btrfs_trans_handle *trans,
33743416
* transaction's ID, to avoid the search below in a future call in case
33753417
* a log tree gets created after this.
33763418
*/
3377-
if (!test_bit(BTRFS_ROOT_HAS_LOG_TREE, &inode->root->state)) {
3378-
inode->logged_trans = trans->transid - 1;
3379-
return 0;
3380-
}
3419+
if (!test_bit(BTRFS_ROOT_HAS_LOG_TREE, &inode->root->state))
3420+
return mark_inode_as_not_logged(trans, inode);
33813421

33823422
/*
33833423
* We have a log tree and the inode's logged_trans is 0. We can't tell
@@ -3431,29 +3471,17 @@ static int inode_logged(const struct btrfs_trans_handle *trans,
34313471
* Set logged_trans to a value greater than 0 and less then the
34323472
* current transaction to avoid doing the search in future calls.
34333473
*/
3434-
inode->logged_trans = trans->transid - 1;
3435-
return 0;
3474+
return mark_inode_as_not_logged(trans, inode);
34363475
}
34373476

34383477
/*
34393478
* The inode was previously logged and then evicted, set logged_trans to
34403479
* the current transacion's ID, to avoid future tree searches as long as
34413480
* the inode is not evicted again.
34423481
*/
3482+
spin_lock(&inode->lock);
34433483
inode->logged_trans = trans->transid;
3444-
3445-
/*
3446-
* If it's a directory, then we must set last_dir_index_offset to the
3447-
* maximum possible value, so that the next attempt to log the inode does
3448-
* not skip checking if dir index keys found in modified subvolume tree
3449-
* leaves have been logged before, otherwise it would result in attempts
3450-
* to insert duplicate dir index keys in the log tree. This must be done
3451-
* because last_dir_index_offset is an in-memory only field, not persisted
3452-
* in the inode item or any other on-disk structure, so its value is lost
3453-
* once the inode is evicted.
3454-
*/
3455-
if (S_ISDIR(inode->vfs_inode.i_mode))
3456-
inode->last_dir_index_offset = (u64)-1;
3484+
spin_unlock(&inode->lock);
34573485

34583486
return 1;
34593487
}
@@ -4045,7 +4073,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
40454073

40464074
/*
40474075
* If the inode was logged before and it was evicted, then its
4048-
* last_dir_index_offset is (u64)-1, so we don't the value of the last index
4076+
* last_dir_index_offset is 0, so we don't know the value of the last index
40494077
* key offset. If that's the case, search for it and update the inode. This
40504078
* is to avoid lookups in the log tree every time we try to insert a dir index
40514079
* key from a leaf changed in the current transaction, and to allow us to always
@@ -4061,7 +4089,7 @@ static int update_last_dir_index_offset(struct btrfs_inode *inode,
40614089

40624090
lockdep_assert_held(&inode->log_mutex);
40634091

4064-
if (inode->last_dir_index_offset != (u64)-1)
4092+
if (inode->last_dir_index_offset != 0)
40654093
return 0;
40664094

40674095
if (!ctx->logged_before) {

0 commit comments

Comments
 (0)