Skip to content

Commit 36df6f7

Browse files
committed
Merge tag 'bcachefs-2025-06-12' of git://evilpiepirate.org/bcachefs
Pull bcachefs fixes from Kent Overstreet: "As usual, highlighting the ones users have been noticing: - Fix a small issue with has_case_insensitive not being propagated on snapshot creation; this led to fsck errors, which we're harmless because we're not using this flag yet (it's for overlayfs + casefolding). - Log the error being corrected in the journal when we're doing fsck repair: this was one of the "lessons learned" from the i_nlink 0 -> subvolume deletion bug, where reconstructing what had happened by analyzing the journal was a bit more difficult than it needed to be. - Don't schedule btree node scan to run in the superblock: this fixes a regression from the 6.16 recovery passes rework, and let to it running unnecessarily. The real issue here is that we don't have online, "self healing" style topology repair yet: topology repair currently has to run before we go RW, which means that we may schedule it unnecessarily after a transient error. This will be fixed in the future. - We now track, in btree node flags, the reason it was scheduled to be rewritten. We discovered a deadlock in recovery when many btree nodes need to be rewritten because they're degraded: fully fixing this will take some work but it's now easier to see what's going on. For the bug report where this came up, a device had been kicked RO due to transient errors: manually setting it back to RW was sufficient to allow recovery to succeed. - Mark a few more fsck errors as autofix: as a reminder to users, please do keep reporting cases where something needs to be repaired and is not repaired automatically (i.e. cases where -o fix_errors or fsck -y is required). - rcu_pending.c now works with PREEMPT_RT - 'bcachefs device add', then umount, then remount wasn't working - we now emit a uevent so that the new device's new superblock is correctly picked up - Assorted repair fixes: btree node scan will no longer incorrectly update sb->version_min, - Assorted syzbot fixes" * tag 'bcachefs-2025-06-12' of git://evilpiepirate.org/bcachefs: (23 commits) bcachefs: Don't trace should_be_locked unless changing bcachefs: Ensure that snapshot creation propagates has_case_insensitive bcachefs: Print devices we're mounting on multi device filesystems bcachefs: Don't trust sb->nr_devices in members_to_text() bcachefs: Fix version checks in validate_bset() bcachefs: ioctl: avoid stack overflow warning bcachefs: Don't pass trans to fsck_err() in gc_accounting_done bcachefs: Fix leak in bch2_fs_recovery() error path bcachefs: Fix rcu_pending for PREEMPT_RT bcachefs: Fix downgrade_table_extra() bcachefs: Don't put rhashtable on stack bcachefs: Make sure opts.read_only gets propagated back to VFS bcachefs: Fix possible console lock involved deadlock bcachefs: mark more errors autofix bcachefs: Don't persistently run scan_for_btree_nodes bcachefs: Read error message now prints if self healing bcachefs: Only run 'increase_depth' for keys from btree node csan bcachefs: Mark need_discard_freespace_key_bad autofix bcachefs: Update /dev/disk/by-uuid on device add bcachefs: Add more flags to btree nodes for rewrite reason ...
2 parents d080d3b + aef22f6 commit 36df6f7

25 files changed

+319
-116
lines changed

fs/bcachefs/bcachefs.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,6 @@ do { \
296296
#define bch2_fmt(_c, fmt) bch2_log_msg(_c, fmt "\n")
297297

298298
void bch2_print_str(struct bch_fs *, const char *, const char *);
299-
void bch2_print_str_nonblocking(struct bch_fs *, const char *, const char *);
300299

301300
__printf(2, 3)
302301
void bch2_print_opts(struct bch_opts *, const char *, ...);

fs/bcachefs/btree_gc.c

Lines changed: 60 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -397,7 +397,11 @@ static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct
397397
continue;
398398
}
399399

400-
ret = btree_check_node_boundaries(trans, b, prev, cur, pulled_from_scan);
400+
ret = lockrestart_do(trans,
401+
btree_check_node_boundaries(trans, b, prev, cur, pulled_from_scan));
402+
if (ret < 0)
403+
goto err;
404+
401405
if (ret == DID_FILL_FROM_SCAN) {
402406
new_pass = true;
403407
ret = 0;
@@ -438,7 +442,8 @@ static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct
438442

439443
if (!ret && !IS_ERR_OR_NULL(prev)) {
440444
BUG_ON(cur);
441-
ret = btree_repair_node_end(trans, b, prev, pulled_from_scan);
445+
ret = lockrestart_do(trans,
446+
btree_repair_node_end(trans, b, prev, pulled_from_scan));
442447
if (ret == DID_FILL_FROM_SCAN) {
443448
new_pass = true;
444449
ret = 0;
@@ -519,49 +524,65 @@ static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct
519524
bch2_bkey_buf_exit(&prev_k, c);
520525
bch2_bkey_buf_exit(&cur_k, c);
521526
printbuf_exit(&buf);
527+
bch_err_fn(c, ret);
528+
return ret;
529+
}
530+
531+
static int bch2_check_root(struct btree_trans *trans, enum btree_id i,
532+
bool *reconstructed_root)
533+
{
534+
struct bch_fs *c = trans->c;
535+
struct btree_root *r = bch2_btree_id_root(c, i);
536+
struct printbuf buf = PRINTBUF;
537+
int ret = 0;
538+
539+
bch2_btree_id_to_text(&buf, i);
540+
541+
if (r->error) {
542+
bch_info(c, "btree root %s unreadable, must recover from scan", buf.buf);
543+
544+
r->alive = false;
545+
r->error = 0;
546+
547+
if (!bch2_btree_has_scanned_nodes(c, i)) {
548+
__fsck_err(trans,
549+
FSCK_CAN_FIX|(!btree_id_important(i) ? FSCK_AUTOFIX : 0),
550+
btree_root_unreadable_and_scan_found_nothing,
551+
"no nodes found for btree %s, continue?", buf.buf);
552+
bch2_btree_root_alloc_fake_trans(trans, i, 0);
553+
} else {
554+
bch2_btree_root_alloc_fake_trans(trans, i, 1);
555+
bch2_shoot_down_journal_keys(c, i, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
556+
ret = bch2_get_scanned_nodes(c, i, 0, POS_MIN, SPOS_MAX);
557+
if (ret)
558+
goto err;
559+
}
560+
561+
*reconstructed_root = true;
562+
}
563+
err:
564+
fsck_err:
565+
printbuf_exit(&buf);
566+
bch_err_fn(c, ret);
522567
return ret;
523568
}
524569

525570
int bch2_check_topology(struct bch_fs *c)
526571
{
527572
struct btree_trans *trans = bch2_trans_get(c);
528573
struct bpos pulled_from_scan = POS_MIN;
529-
struct printbuf buf = PRINTBUF;
530574
int ret = 0;
531575

532576
bch2_trans_srcu_unlock(trans);
533577

534578
for (unsigned i = 0; i < btree_id_nr_alive(c) && !ret; i++) {
535-
struct btree_root *r = bch2_btree_id_root(c, i);
536579
bool reconstructed_root = false;
580+
recover:
581+
ret = lockrestart_do(trans, bch2_check_root(trans, i, &reconstructed_root));
582+
if (ret)
583+
break;
537584

538-
printbuf_reset(&buf);
539-
bch2_btree_id_to_text(&buf, i);
540-
541-
if (r->error) {
542-
reconstruct_root:
543-
bch_info(c, "btree root %s unreadable, must recover from scan", buf.buf);
544-
545-
r->alive = false;
546-
r->error = 0;
547-
548-
if (!bch2_btree_has_scanned_nodes(c, i)) {
549-
__fsck_err(trans,
550-
FSCK_CAN_FIX|(!btree_id_important(i) ? FSCK_AUTOFIX : 0),
551-
btree_root_unreadable_and_scan_found_nothing,
552-
"no nodes found for btree %s, continue?", buf.buf);
553-
bch2_btree_root_alloc_fake_trans(trans, i, 0);
554-
} else {
555-
bch2_btree_root_alloc_fake_trans(trans, i, 1);
556-
bch2_shoot_down_journal_keys(c, i, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
557-
ret = bch2_get_scanned_nodes(c, i, 0, POS_MIN, SPOS_MAX);
558-
if (ret)
559-
break;
560-
}
561-
562-
reconstructed_root = true;
563-
}
564-
585+
struct btree_root *r = bch2_btree_id_root(c, i);
565586
struct btree *b = r->b;
566587

567588
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read);
@@ -575,17 +596,21 @@ int bch2_check_topology(struct bch_fs *c)
575596

576597
r->b = NULL;
577598

578-
if (!reconstructed_root)
579-
goto reconstruct_root;
599+
if (!reconstructed_root) {
600+
r->error = -EIO;
601+
goto recover;
602+
}
580603

604+
struct printbuf buf = PRINTBUF;
605+
bch2_btree_id_to_text(&buf, i);
581606
bch_err(c, "empty btree root %s", buf.buf);
607+
printbuf_exit(&buf);
582608
bch2_btree_root_alloc_fake_trans(trans, i, 0);
583609
r->alive = false;
584610
ret = 0;
585611
}
586612
}
587-
fsck_err:
588-
printbuf_exit(&buf);
613+
589614
bch2_trans_put(trans);
590615
return ret;
591616
}

fs/bcachefs/btree_io.c

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -741,16 +741,22 @@ static int validate_bset(struct bch_fs *c, struct bch_dev *ca,
741741
BCH_VERSION_MAJOR(version),
742742
BCH_VERSION_MINOR(version));
743743

744-
if (btree_err_on(version < c->sb.version_min,
744+
if (c->recovery.curr_pass != BCH_RECOVERY_PASS_scan_for_btree_nodes &&
745+
btree_err_on(version < c->sb.version_min,
745746
-BCH_ERR_btree_node_read_err_fixable,
746747
c, NULL, b, i, NULL,
747748
btree_node_bset_older_than_sb_min,
748749
"bset version %u older than superblock version_min %u",
749750
version, c->sb.version_min)) {
750-
mutex_lock(&c->sb_lock);
751-
c->disk_sb.sb->version_min = cpu_to_le16(version);
752-
bch2_write_super(c);
753-
mutex_unlock(&c->sb_lock);
751+
if (bch2_version_compatible(version)) {
752+
mutex_lock(&c->sb_lock);
753+
c->disk_sb.sb->version_min = cpu_to_le16(version);
754+
bch2_write_super(c);
755+
mutex_unlock(&c->sb_lock);
756+
} else {
757+
/* We have no idea what's going on: */
758+
i->version = cpu_to_le16(c->sb.version);
759+
}
754760
}
755761

756762
if (btree_err_on(BCH_VERSION_MAJOR(version) >
@@ -1045,6 +1051,7 @@ static int validate_bset_keys(struct bch_fs *c, struct btree *b,
10451051
le16_add_cpu(&i->u64s, -next_good_key);
10461052
memmove_u64s_down(k, (u64 *) k + next_good_key, (u64 *) vstruct_end(i) - (u64 *) k);
10471053
set_btree_node_need_rewrite(b);
1054+
set_btree_node_need_rewrite_error(b);
10481055
}
10491056
fsck_err:
10501057
printbuf_exit(&buf);
@@ -1305,6 +1312,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
13051312
(u64 *) vstruct_end(i) - (u64 *) k);
13061313
set_btree_bset_end(b, b->set);
13071314
set_btree_node_need_rewrite(b);
1315+
set_btree_node_need_rewrite_error(b);
13081316
continue;
13091317
}
13101318
if (ret)
@@ -1329,12 +1337,16 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
13291337
bkey_for_each_ptr(bch2_bkey_ptrs(bkey_i_to_s(&b->key)), ptr) {
13301338
struct bch_dev *ca2 = bch2_dev_rcu(c, ptr->dev);
13311339

1332-
if (!ca2 || ca2->mi.state != BCH_MEMBER_STATE_rw)
1340+
if (!ca2 || ca2->mi.state != BCH_MEMBER_STATE_rw) {
13331341
set_btree_node_need_rewrite(b);
1342+
set_btree_node_need_rewrite_degraded(b);
1343+
}
13341344
}
13351345

1336-
if (!ptr_written)
1346+
if (!ptr_written) {
13371347
set_btree_node_need_rewrite(b);
1348+
set_btree_node_need_rewrite_ptr_written_zero(b);
1349+
}
13381350
fsck_err:
13391351
mempool_free(iter, &c->fill_iter);
13401352
printbuf_exit(&buf);

fs/bcachefs/btree_locking.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -213,7 +213,7 @@ static noinline __noreturn void break_cycle_fail(struct lock_graph *g)
213213
prt_newline(&buf);
214214
}
215215

216-
bch2_print_str_nonblocking(g->g->trans->c, KERN_ERR, buf.buf);
216+
bch2_print_str(g->g->trans->c, KERN_ERR, buf.buf);
217217
printbuf_exit(&buf);
218218
BUG();
219219
}

fs/bcachefs/btree_locking.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -417,8 +417,10 @@ static inline void btree_path_set_should_be_locked(struct btree_trans *trans, st
417417
EBUG_ON(!btree_node_locked(path, path->level));
418418
EBUG_ON(path->uptodate);
419419

420-
path->should_be_locked = true;
421-
trace_btree_path_should_be_locked(trans, path);
420+
if (!path->should_be_locked) {
421+
path->should_be_locked = true;
422+
trace_btree_path_should_be_locked(trans, path);
423+
}
422424
}
423425

424426
static inline void __btree_path_set_level_up(struct btree_trans *trans,

fs/bcachefs/btree_types.h

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -617,6 +617,9 @@ enum btree_write_type {
617617
x(dying) \
618618
x(fake) \
619619
x(need_rewrite) \
620+
x(need_rewrite_error) \
621+
x(need_rewrite_degraded) \
622+
x(need_rewrite_ptr_written_zero) \
620623
x(never_write) \
621624
x(pinned)
622625

@@ -641,6 +644,32 @@ static inline void clear_btree_node_ ## flag(struct btree *b) \
641644
BTREE_FLAGS()
642645
#undef x
643646

647+
#define BTREE_NODE_REWRITE_REASON() \
648+
x(none) \
649+
x(unknown) \
650+
x(error) \
651+
x(degraded) \
652+
x(ptr_written_zero)
653+
654+
enum btree_node_rewrite_reason {
655+
#define x(n) BTREE_NODE_REWRITE_##n,
656+
BTREE_NODE_REWRITE_REASON()
657+
#undef x
658+
};
659+
660+
static inline enum btree_node_rewrite_reason btree_node_rewrite_reason(struct btree *b)
661+
{
662+
if (btree_node_need_rewrite_ptr_written_zero(b))
663+
return BTREE_NODE_REWRITE_ptr_written_zero;
664+
if (btree_node_need_rewrite_degraded(b))
665+
return BTREE_NODE_REWRITE_degraded;
666+
if (btree_node_need_rewrite_error(b))
667+
return BTREE_NODE_REWRITE_error;
668+
if (btree_node_need_rewrite(b))
669+
return BTREE_NODE_REWRITE_unknown;
670+
return BTREE_NODE_REWRITE_none;
671+
}
672+
644673
static inline struct btree_write *btree_current_write(struct btree *b)
645674
{
646675
return b->writes + btree_node_write_idx(b);

fs/bcachefs/btree_update_interior.c

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1138,6 +1138,13 @@ static void bch2_btree_update_done(struct btree_update *as, struct btree_trans *
11381138
start_time);
11391139
}
11401140

1141+
static const char * const btree_node_reawrite_reason_strs[] = {
1142+
#define x(n) #n,
1143+
BTREE_NODE_REWRITE_REASON()
1144+
#undef x
1145+
NULL,
1146+
};
1147+
11411148
static struct btree_update *
11421149
bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
11431150
unsigned level_start, bool split,
@@ -1232,6 +1239,15 @@ bch2_btree_update_start(struct btree_trans *trans, struct btree_path *path,
12321239
list_add_tail(&as->list, &c->btree_interior_update_list);
12331240
mutex_unlock(&c->btree_interior_update_lock);
12341241

1242+
struct btree *b = btree_path_node(path, path->level);
1243+
as->node_start = b->data->min_key;
1244+
as->node_end = b->data->max_key;
1245+
as->node_needed_rewrite = btree_node_rewrite_reason(b);
1246+
as->node_written = b->written;
1247+
as->node_sectors = btree_buf_bytes(b) >> 9;
1248+
as->node_remaining = __bch2_btree_u64s_remaining(b,
1249+
btree_bkey_last(b, bset_tree_last(b)));
1250+
12351251
/*
12361252
* We don't want to allocate if we're in an error state, that can cause
12371253
* deadlock on emergency shutdown due to open buckets getting stuck in
@@ -2108,6 +2124,9 @@ int __bch2_foreground_maybe_merge(struct btree_trans *trans,
21082124
if (ret)
21092125
goto err;
21102126

2127+
as->node_start = prev->data->min_key;
2128+
as->node_end = next->data->max_key;
2129+
21112130
trace_and_count(c, btree_node_merge, trans, b);
21122131

21132132
n = bch2_btree_node_alloc(as, trans, b->c.level);
@@ -2681,9 +2700,19 @@ static void bch2_btree_update_to_text(struct printbuf *out, struct btree_update
26812700

26822701
prt_str(out, " ");
26832702
bch2_btree_id_to_text(out, as->btree_id);
2684-
prt_printf(out, " l=%u-%u mode=%s nodes_written=%u cl.remaining=%u journal_seq=%llu\n",
2703+
prt_printf(out, " l=%u-%u ",
26852704
as->update_level_start,
2686-
as->update_level_end,
2705+
as->update_level_end);
2706+
bch2_bpos_to_text(out, as->node_start);
2707+
prt_char(out, ' ');
2708+
bch2_bpos_to_text(out, as->node_end);
2709+
prt_printf(out, "\nwritten %u/%u u64s_remaining %u need_rewrite %s",
2710+
as->node_written,
2711+
as->node_sectors,
2712+
as->node_remaining,
2713+
btree_node_reawrite_reason_strs[as->node_needed_rewrite]);
2714+
2715+
prt_printf(out, "\nmode=%s nodes_written=%u cl.remaining=%u journal_seq=%llu\n",
26872716
bch2_btree_update_modes[as->mode],
26882717
as->nodes_written,
26892718
closure_nr_remaining(&as->cl),

fs/bcachefs/btree_update_interior.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,13 @@ struct btree_update {
5757
unsigned took_gc_lock:1;
5858

5959
enum btree_id btree_id;
60+
struct bpos node_start;
61+
struct bpos node_end;
62+
enum btree_node_rewrite_reason node_needed_rewrite;
63+
u16 node_written;
64+
u16 node_sectors;
65+
u16 node_remaining;
66+
6067
unsigned update_level_start;
6168
unsigned update_level_end;
6269

fs/bcachefs/chardev.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -399,7 +399,7 @@ static long bch2_ioctl_data(struct bch_fs *c,
399399
return ret;
400400
}
401401

402-
static long bch2_ioctl_fs_usage(struct bch_fs *c,
402+
static noinline_for_stack long bch2_ioctl_fs_usage(struct bch_fs *c,
403403
struct bch_ioctl_fs_usage __user *user_arg)
404404
{
405405
struct bch_ioctl_fs_usage arg = {};
@@ -469,7 +469,7 @@ static long bch2_ioctl_query_accounting(struct bch_fs *c,
469469
}
470470

471471
/* obsolete, didn't allow for new data types: */
472-
static long bch2_ioctl_dev_usage(struct bch_fs *c,
472+
static noinline_for_stack long bch2_ioctl_dev_usage(struct bch_fs *c,
473473
struct bch_ioctl_dev_usage __user *user_arg)
474474
{
475475
struct bch_ioctl_dev_usage arg;

fs/bcachefs/disk_accounting.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -618,7 +618,9 @@ int bch2_gc_accounting_done(struct bch_fs *c)
618618
for (unsigned j = 0; j < nr; j++)
619619
src_v[j] -= dst_v[j];
620620

621-
if (fsck_err(trans, accounting_mismatch, "%s", buf.buf)) {
621+
bch2_trans_unlock_long(trans);
622+
623+
if (fsck_err(c, accounting_mismatch, "%s", buf.buf)) {
622624
percpu_up_write(&c->mark_lock);
623625
ret = commit_do(trans, NULL, NULL, 0,
624626
bch2_disk_accounting_mod(trans, &acc_k, src_v, nr, false));

0 commit comments

Comments
 (0)