@@ -3340,6 +3340,31 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
3340
3340
return 0 ;
3341
3341
}
3342
3342
3343
+ static bool mark_inode_as_not_logged (const struct btrfs_trans_handle * trans ,
3344
+ struct btrfs_inode * inode )
3345
+ {
3346
+ bool ret = false;
3347
+
3348
+ /*
3349
+ * Do this only if ->logged_trans is still 0 to prevent races with
3350
+ * concurrent logging as we may see the inode not logged when
3351
+ * inode_logged() is called but it gets logged after inode_logged() did
3352
+ * not find it in the log tree and we end up setting ->logged_trans to a
3353
+ * value less than trans->transid after the concurrent logging task has
3354
+ * set it to trans->transid. As a consequence, subsequent rename, unlink
3355
+ * and link operations may end up not logging new names and removing old
3356
+ * names from the log.
3357
+ */
3358
+ spin_lock (& inode -> lock );
3359
+ if (inode -> logged_trans == 0 )
3360
+ inode -> logged_trans = trans -> transid - 1 ;
3361
+ else if (inode -> logged_trans == trans -> transid )
3362
+ ret = true;
3363
+ spin_unlock (& inode -> lock );
3364
+
3365
+ return ret ;
3366
+ }
3367
+
3343
3368
/*
3344
3369
* Check if an inode was logged in the current transaction. This correctly deals
3345
3370
* with the case where the inode was logged but has a logged_trans of 0, which
@@ -3357,15 +3382,32 @@ static int inode_logged(const struct btrfs_trans_handle *trans,
3357
3382
struct btrfs_key key ;
3358
3383
int ret ;
3359
3384
3360
- if (inode -> logged_trans == trans -> transid )
3385
+ /*
3386
+ * Quick lockless call, since once ->logged_trans is set to the current
3387
+ * transaction, we never set it to a lower value anywhere else.
3388
+ */
3389
+ if (data_race (inode -> logged_trans ) == trans -> transid )
3361
3390
return 1 ;
3362
3391
3363
3392
/*
3364
- * If logged_trans is not 0, then we know the inode logged was not logged
3365
- * in this transaction, so we can return false right away.
3393
+ * If logged_trans is not 0 and not trans->transid, then we know the
3394
+ * inode was not logged in this transaction, so we can return false
3395
+ * right away. We take the lock to avoid a race caused by load/store
3396
+ * tearing with a concurrent btrfs_log_inode() call or a concurrent task
3397
+ * in this function further below - an update to trans->transid can be
3398
+ * teared into two 32 bits updates for example, in which case we could
3399
+ * see a positive value that is not trans->transid and assume the inode
3400
+ * was not logged when it was.
3366
3401
*/
3367
- if (inode -> logged_trans > 0 )
3402
+ spin_lock (& inode -> lock );
3403
+ if (inode -> logged_trans == trans -> transid ) {
3404
+ spin_unlock (& inode -> lock );
3405
+ return 1 ;
3406
+ } else if (inode -> logged_trans > 0 ) {
3407
+ spin_unlock (& inode -> lock );
3368
3408
return 0 ;
3409
+ }
3410
+ spin_unlock (& inode -> lock );
3369
3411
3370
3412
/*
3371
3413
* If no log tree was created for this root in this transaction, then
@@ -3374,10 +3416,8 @@ static int inode_logged(const struct btrfs_trans_handle *trans,
3374
3416
* transaction's ID, to avoid the search below in a future call in case
3375
3417
* a log tree gets created after this.
3376
3418
*/
3377
- if (!test_bit (BTRFS_ROOT_HAS_LOG_TREE , & inode -> root -> state )) {
3378
- inode -> logged_trans = trans -> transid - 1 ;
3379
- return 0 ;
3380
- }
3419
+ if (!test_bit (BTRFS_ROOT_HAS_LOG_TREE , & inode -> root -> state ))
3420
+ return mark_inode_as_not_logged (trans , inode );
3381
3421
3382
3422
/*
3383
3423
* We have a log tree and the inode's logged_trans is 0. We can't tell
@@ -3431,29 +3471,17 @@ static int inode_logged(const struct btrfs_trans_handle *trans,
3431
3471
* Set logged_trans to a value greater than 0 and less then the
3432
3472
* current transaction to avoid doing the search in future calls.
3433
3473
*/
3434
- inode -> logged_trans = trans -> transid - 1 ;
3435
- return 0 ;
3474
+ return mark_inode_as_not_logged (trans , inode );
3436
3475
}
3437
3476
3438
3477
/*
3439
3478
* The inode was previously logged and then evicted, set logged_trans to
3440
3479
* the current transacion's ID, to avoid future tree searches as long as
3441
3480
* the inode is not evicted again.
3442
3481
*/
3482
+ spin_lock (& inode -> lock );
3443
3483
inode -> logged_trans = trans -> transid ;
3444
-
3445
- /*
3446
- * If it's a directory, then we must set last_dir_index_offset to the
3447
- * maximum possible value, so that the next attempt to log the inode does
3448
- * not skip checking if dir index keys found in modified subvolume tree
3449
- * leaves have been logged before, otherwise it would result in attempts
3450
- * to insert duplicate dir index keys in the log tree. This must be done
3451
- * because last_dir_index_offset is an in-memory only field, not persisted
3452
- * in the inode item or any other on-disk structure, so its value is lost
3453
- * once the inode is evicted.
3454
- */
3455
- if (S_ISDIR (inode -> vfs_inode .i_mode ))
3456
- inode -> last_dir_index_offset = (u64 )- 1 ;
3484
+ spin_unlock (& inode -> lock );
3457
3485
3458
3486
return 1 ;
3459
3487
}
@@ -4045,7 +4073,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans,
4045
4073
4046
4074
/*
4047
4075
* If the inode was logged before and it was evicted, then its
4048
- * last_dir_index_offset is (u64)-1 , so we don't the value of the last index
4076
+ * last_dir_index_offset is 0 , so we don't know the value of the last index
4049
4077
* key offset. If that's the case, search for it and update the inode. This
4050
4078
* is to avoid lookups in the log tree every time we try to insert a dir index
4051
4079
* key from a leaf changed in the current transaction, and to allow us to always
@@ -4061,7 +4089,7 @@ static int update_last_dir_index_offset(struct btrfs_inode *inode,
4061
4089
4062
4090
lockdep_assert_held (& inode -> log_mutex );
4063
4091
4064
- if (inode -> last_dir_index_offset != ( u64 ) - 1 )
4092
+ if (inode -> last_dir_index_offset != 0 )
4065
4093
return 0 ;
4066
4094
4067
4095
if (!ctx -> logged_before ) {
0 commit comments