Skip to content

Commit 77d20c6

Browse files
josefbacikkdave
authored andcommitted
btrfs: do not block starts waiting on previous transaction commit
Internally I got a report of very long stalls on normal operations like creating a new file when auto relocation was running. The reporter used the 'bpf offcputime' tracer to show that we would get stuck in start_transaction for 5 to 30 seconds, and were always being woken up by the transaction commit. Using my timing-everything script, which times how long a function takes and what percentage of that total time is taken up by its children, I saw several traces like this 1083 took 32812902424 ns 29929002926 ns 91.2110% wait_for_commit_duration 25568 ns 7.7920e-05% commit_fs_roots_duration 1007751 ns 0.00307% commit_cowonly_roots_duration 446855602 ns 1.36182% btrfs_run_delayed_refs_duration 271980 ns 0.00082% btrfs_run_delayed_items_duration 2008 ns 6.1195e-06% btrfs_apply_pending_changes_duration 9656 ns 2.9427e-05% switch_commit_roots_duration 1598 ns 4.8700e-06% btrfs_commit_device_sizes_duration 4314 ns 1.3147e-05% btrfs_free_log_root_tree_duration Here I was only tracing functions that happen where we are between START_COMMIT and UNBLOCKED in order to see what would be keeping us blocked for so long. The wait_for_commit() we do is where we wait for a previous transaction that hasn't completed it's commit. This can include all of the unpin work and other cleanups, which tends to be the longest part of our transaction commit. There is no reason we should be blocking new things from entering the transaction at this point, it just adds to random latency spikes for no reason. Fix this by adding a PREP stage. This allows us to properly deal with multiple committers coming in at the same time, we retain the behavior that the winner waits on the previous transaction and the losers all wait for this transaction commit to occur. Nothing else is blocked during the PREP stage, and then once the wait is complete we switch to COMMIT_START and all of the same behavior as before is maintained. Reviewed-by: Filipe Manana <[email protected]> Signed-off-by: Josef Bacik <[email protected]> Reviewed-by: David Sterba <[email protected]> Signed-off-by: David Sterba <[email protected]>
1 parent ee34a82 commit 77d20c6

File tree

4 files changed

+30
-20
lines changed

4 files changed

+30
-20
lines changed

fs/btrfs/disk-io.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1547,7 +1547,7 @@ static int transaction_kthread(void *arg)
15471547

15481548
delta = ktime_get_seconds() - cur->start_time;
15491549
if (!test_and_clear_bit(BTRFS_FS_COMMIT_TRANS, &fs_info->flags) &&
1550-
cur->state < TRANS_STATE_COMMIT_START &&
1550+
cur->state < TRANS_STATE_COMMIT_PREP &&
15511551
delta < fs_info->commit_interval) {
15521552
spin_unlock(&fs_info->trans_lock);
15531553
delay -= msecs_to_jiffies((delta - 1) * 1000);
@@ -2682,8 +2682,8 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
26822682
btrfs_lockdep_init_map(fs_info, btrfs_trans_num_extwriters);
26832683
btrfs_lockdep_init_map(fs_info, btrfs_trans_pending_ordered);
26842684
btrfs_lockdep_init_map(fs_info, btrfs_ordered_extent);
2685-
btrfs_state_lockdep_init_map(fs_info, btrfs_trans_commit_start,
2686-
BTRFS_LOCKDEP_TRANS_COMMIT_START);
2685+
btrfs_state_lockdep_init_map(fs_info, btrfs_trans_commit_prep,
2686+
BTRFS_LOCKDEP_TRANS_COMMIT_PREP);
26872687
btrfs_state_lockdep_init_map(fs_info, btrfs_trans_unblocked,
26882688
BTRFS_LOCKDEP_TRANS_UNBLOCKED);
26892689
btrfs_state_lockdep_init_map(fs_info, btrfs_trans_super_committed,
@@ -4870,7 +4870,7 @@ static int btrfs_cleanup_transaction(struct btrfs_fs_info *fs_info)
48704870
while (!list_empty(&fs_info->trans_list)) {
48714871
t = list_first_entry(&fs_info->trans_list,
48724872
struct btrfs_transaction, list);
4873-
if (t->state >= TRANS_STATE_COMMIT_START) {
4873+
if (t->state >= TRANS_STATE_COMMIT_PREP) {
48744874
refcount_inc(&t->use_count);
48754875
spin_unlock(&fs_info->trans_lock);
48764876
btrfs_wait_for_commit(fs_info, t->transid);

fs/btrfs/locking.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ enum btrfs_lock_nesting {
7979
};
8080

8181
enum btrfs_lockdep_trans_states {
82-
BTRFS_LOCKDEP_TRANS_COMMIT_START,
82+
BTRFS_LOCKDEP_TRANS_COMMIT_PREP,
8383
BTRFS_LOCKDEP_TRANS_UNBLOCKED,
8484
BTRFS_LOCKDEP_TRANS_SUPER_COMMITTED,
8585
BTRFS_LOCKDEP_TRANS_COMPLETED,

fs/btrfs/transaction.c

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -56,12 +56,17 @@ static struct kmem_cache *btrfs_trans_handle_cachep;
5656
* | Call btrfs_commit_transaction() on any trans handle attached to
5757
* | transaction N
5858
* V
59-
* Transaction N [[TRANS_STATE_COMMIT_START]]
59+
* Transaction N [[TRANS_STATE_COMMIT_PREP]]
60+
* |
61+
* | If there are simultaneous calls to btrfs_commit_transaction() one will win
62+
* | the race and the rest will wait for the winner to commit the transaction.
63+
* |
64+
* | The winner will wait for previous running transaction to completely finish
65+
* | if there is one.
6066
* |
61-
* | Will wait for previous running transaction to completely finish if there
62-
* | is one
67+
* Transaction N [[TRANS_STATE_COMMIT_START]]
6368
* |
64-
* | Then one of the following happes:
69+
* | Then one of the following happens:
6570
* | - Wait for all other trans handle holders to release.
6671
* | The btrfs_commit_transaction() caller will do the commit work.
6772
* | - Wait for current transaction to be committed by others.
@@ -112,6 +117,7 @@ static struct kmem_cache *btrfs_trans_handle_cachep;
112117
*/
113118
static const unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = {
114119
[TRANS_STATE_RUNNING] = 0U,
120+
[TRANS_STATE_COMMIT_PREP] = 0U,
115121
[TRANS_STATE_COMMIT_START] = (__TRANS_START | __TRANS_ATTACH),
116122
[TRANS_STATE_COMMIT_DOING] = (__TRANS_START |
117123
__TRANS_ATTACH |
@@ -1983,7 +1989,7 @@ void btrfs_commit_transaction_async(struct btrfs_trans_handle *trans)
19831989
* Wait for the current transaction commit to start and block
19841990
* subsequent transaction joins
19851991
*/
1986-
btrfs_might_wait_for_state(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_START);
1992+
btrfs_might_wait_for_state(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_PREP);
19871993
wait_event(fs_info->transaction_blocked_wait,
19881994
cur_trans->state >= TRANS_STATE_COMMIT_START ||
19891995
TRANS_ABORTED(cur_trans));
@@ -2130,7 +2136,7 @@ static void add_pending_snapshot(struct btrfs_trans_handle *trans)
21302136
return;
21312137

21322138
lockdep_assert_held(&trans->fs_info->trans_lock);
2133-
ASSERT(cur_trans->state >= TRANS_STATE_COMMIT_START);
2139+
ASSERT(cur_trans->state >= TRANS_STATE_COMMIT_PREP);
21342140

21352141
list_add(&trans->pending_snapshot->list, &cur_trans->pending_snapshots);
21362142
}
@@ -2154,7 +2160,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
21542160
ktime_t interval;
21552161

21562162
ASSERT(refcount_read(&trans->use_count) == 1);
2157-
btrfs_trans_state_lockdep_acquire(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_START);
2163+
btrfs_trans_state_lockdep_acquire(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_PREP);
21582164

21592165
clear_bit(BTRFS_FS_NEED_TRANS_COMMIT, &fs_info->flags);
21602166

@@ -2214,7 +2220,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
22142220
}
22152221

22162222
spin_lock(&fs_info->trans_lock);
2217-
if (cur_trans->state >= TRANS_STATE_COMMIT_START) {
2223+
if (cur_trans->state >= TRANS_STATE_COMMIT_PREP) {
22182224
enum btrfs_trans_state want_state = TRANS_STATE_COMPLETED;
22192225

22202226
add_pending_snapshot(trans);
@@ -2226,7 +2232,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
22262232
want_state = TRANS_STATE_SUPER_COMMITTED;
22272233

22282234
btrfs_trans_state_lockdep_release(fs_info,
2229-
BTRFS_LOCKDEP_TRANS_COMMIT_START);
2235+
BTRFS_LOCKDEP_TRANS_COMMIT_PREP);
22302236
ret = btrfs_end_transaction(trans);
22312237
wait_for_commit(cur_trans, want_state);
22322238

@@ -2238,9 +2244,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
22382244
return ret;
22392245
}
22402246

2241-
cur_trans->state = TRANS_STATE_COMMIT_START;
2247+
cur_trans->state = TRANS_STATE_COMMIT_PREP;
22422248
wake_up(&fs_info->transaction_blocked_wait);
2243-
btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_START);
2249+
btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_PREP);
22442250

22452251
if (cur_trans->list.prev != &fs_info->trans_list) {
22462252
enum btrfs_trans_state want_state = TRANS_STATE_COMPLETED;
@@ -2261,23 +2267,26 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
22612267
btrfs_put_transaction(prev_trans);
22622268
if (ret)
22632269
goto lockdep_release;
2264-
} else {
2265-
spin_unlock(&fs_info->trans_lock);
2270+
spin_lock(&fs_info->trans_lock);
22662271
}
22672272
} else {
2268-
spin_unlock(&fs_info->trans_lock);
22692273
/*
22702274
* The previous transaction was aborted and was already removed
22712275
* from the list of transactions at fs_info->trans_list. So we
22722276
* abort to prevent writing a new superblock that reflects a
22732277
* corrupt state (pointing to trees with unwritten nodes/leafs).
22742278
*/
22752279
if (BTRFS_FS_ERROR(fs_info)) {
2280+
spin_unlock(&fs_info->trans_lock);
22762281
ret = -EROFS;
22772282
goto lockdep_release;
22782283
}
22792284
}
22802285

2286+
cur_trans->state = TRANS_STATE_COMMIT_START;
2287+
wake_up(&fs_info->transaction_blocked_wait);
2288+
spin_unlock(&fs_info->trans_lock);
2289+
22812290
/*
22822291
* Get the time spent on the work done by the commit thread and not
22832292
* the time spent waiting on a previous commit
@@ -2587,7 +2596,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
25872596
goto cleanup_transaction;
25882597

25892598
lockdep_trans_commit_start_release:
2590-
btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_START);
2599+
btrfs_trans_state_lockdep_release(fs_info, BTRFS_LOCKDEP_TRANS_COMMIT_PREP);
25912600
btrfs_end_transaction(trans);
25922601
return ret;
25932602
}

fs/btrfs/transaction.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
enum btrfs_trans_state {
1616
TRANS_STATE_RUNNING,
17+
TRANS_STATE_COMMIT_PREP,
1718
TRANS_STATE_COMMIT_START,
1819
TRANS_STATE_COMMIT_DOING,
1920
TRANS_STATE_UNBLOCKED,

0 commit comments

Comments
 (0)