Skip to content

Commit 387df33

Browse files
author
Kent Overstreet
committed
bcachefs: Start copygc, rebalance threads earlier
Previously, copygc and rebalance weren't started until the very end of mounting, after all recvoery passes have finished. But copygc really should be started earlier, since it may be needed for allocations to make forward progress. Additionally, we've been seeing occasional bug reports where starting the kthread fails due to a pending signal - i.e. we're getting timed out by systemd (during a version upgrade), but we're not seeing the signal until mount is about to complete. Additionally, we now have copygc/rebalance explicitly wait for check_snapshots to complete (if being run); they require that for snapshot_is_ancestor() in the data move path. Signed-off-by: Kent Overstreet <[email protected]>
1 parent d64e8e8 commit 387df33

File tree

5 files changed

+39
-36
lines changed

5 files changed

+39
-36
lines changed

fs/bcachefs/movinggc.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -356,6 +356,13 @@ static int bch2_copygc_thread(void *arg)
356356

357357
set_freezable();
358358

359+
/*
360+
* Data move operations can't run until after check_snapshots has
361+
* completed, and bch2_snapshot_is_ancestor() is available.
362+
*/
363+
kthread_wait_freezable(c->recovery_pass_done > BCH_RECOVERY_PASS_check_snapshots ||
364+
kthread_should_stop());
365+
359366
bch2_move_stats_init(&move_stats, "copygc");
360367
bch2_moving_ctxt_init(&ctxt, c, NULL, &move_stats,
361368
writepoint_ptr(&c->copygc_write_point),

fs/bcachefs/rebalance.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -581,6 +581,13 @@ static int bch2_rebalance_thread(void *arg)
581581

582582
set_freezable();
583583

584+
/*
585+
* Data move operations can't run until after check_snapshots has
586+
* completed, and bch2_snapshot_is_ancestor() is available.
587+
*/
588+
kthread_wait_freezable(c->recovery_pass_done > BCH_RECOVERY_PASS_check_snapshots ||
589+
kthread_should_stop());
590+
584591
bch2_moving_ctxt_init(&ctxt, c, NULL, &r->work_stats,
585592
writepoint_ptr(&c->rebalance_write_point),
586593
true);

fs/bcachefs/recovery.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "journal_seq_blacklist.h"
1919
#include "logged_ops.h"
2020
#include "move.h"
21+
#include "movinggc.h"
2122
#include "namei.h"
2223
#include "quota.h"
2324
#include "rebalance.h"
@@ -1194,6 +1195,9 @@ int bch2_fs_initialize(struct bch_fs *c)
11941195

11951196
c->recovery_pass_done = BCH_RECOVERY_PASS_NR - 1;
11961197

1198+
bch2_copygc_wakeup(c);
1199+
bch2_rebalance_wakeup(c);
1200+
11971201
if (enabled_qtypes(c)) {
11981202
ret = bch2_fs_quota_read(c);
11991203
if (ret)

fs/bcachefs/recovery_passes.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,7 @@ int bch2_run_recovery_passes(struct bch_fs *c)
266266
spin_lock_irq(&c->recovery_pass_lock);
267267

268268
while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns) && !ret) {
269+
unsigned prev_done = c->recovery_pass_done;
269270
unsigned pass = c->curr_recovery_pass;
270271

271272
c->next_recovery_pass = pass + 1;
@@ -299,6 +300,12 @@ int bch2_run_recovery_passes(struct bch_fs *c)
299300
}
300301

301302
c->curr_recovery_pass = c->next_recovery_pass;
303+
304+
if (prev_done <= BCH_RECOVERY_PASS_check_snapshots &&
305+
c->recovery_pass_done > BCH_RECOVERY_PASS_check_snapshots) {
306+
bch2_copygc_wakeup(c);
307+
bch2_rebalance_wakeup(c);
308+
}
302309
}
303310

304311
spin_unlock_irq(&c->recovery_pass_lock);

fs/bcachefs/super.c

Lines changed: 14 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -418,32 +418,6 @@ bool bch2_fs_emergency_read_only_locked(struct bch_fs *c)
418418
return ret;
419419
}
420420

421-
static int bch2_fs_read_write_late(struct bch_fs *c)
422-
{
423-
int ret;
424-
425-
/*
426-
* Data move operations can't run until after check_snapshots has
427-
* completed, and bch2_snapshot_is_ancestor() is available.
428-
*
429-
* Ideally we'd start copygc/rebalance earlier instead of waiting for
430-
* all of recovery/fsck to complete:
431-
*/
432-
ret = bch2_copygc_start(c);
433-
if (ret) {
434-
bch_err(c, "error starting copygc thread");
435-
return ret;
436-
}
437-
438-
ret = bch2_rebalance_start(c);
439-
if (ret) {
440-
bch_err(c, "error starting rebalance thread");
441-
return ret;
442-
}
443-
444-
return 0;
445-
}
446-
447421
static int __bch2_fs_read_write(struct bch_fs *c, bool early)
448422
{
449423
int ret;
@@ -503,10 +477,17 @@ static int __bch2_fs_read_write(struct bch_fs *c, bool early)
503477
atomic_long_inc(&c->writes[i]);
504478
}
505479
#endif
506-
if (!early) {
507-
ret = bch2_fs_read_write_late(c);
508-
if (ret)
509-
goto err;
480+
481+
ret = bch2_copygc_start(c);
482+
if (ret) {
483+
bch_err_msg(c, ret, "error starting copygc thread");
484+
goto err;
485+
}
486+
487+
ret = bch2_rebalance_start(c);
488+
if (ret) {
489+
bch_err_msg(c, ret, "error starting rebalance thread");
490+
goto err;
510491
}
511492

512493
bch2_do_discards(c);
@@ -1082,13 +1063,10 @@ int bch2_fs_start(struct bch_fs *c)
10821063
wake_up(&c->ro_ref_wait);
10831064

10841065
down_write(&c->state_lock);
1085-
if (c->opts.read_only) {
1066+
if (c->opts.read_only)
10861067
bch2_fs_read_only(c);
1087-
} else {
1088-
ret = !test_bit(BCH_FS_rw, &c->flags)
1089-
? bch2_fs_read_write(c)
1090-
: bch2_fs_read_write_late(c);
1091-
}
1068+
else if (!test_bit(BCH_FS_rw, &c->flags))
1069+
ret = bch2_fs_read_write(c);
10921070
up_write(&c->state_lock);
10931071

10941072
err:

0 commit comments

Comments
 (0)