Skip to content

Commit 19773ec

Browse files
author
Kent Overstreet
committed
bcachefs: Disk accounting device validation fixes
- Fix failure to validate that accounting replicas entries point to valid devices: this wasn't a real bug since they'd be cleaned up by GC, but is still something we should know about - Fix failure to validate that dev_data_type entries point to valid devices: this does fix a real bug, since bch2_accounting_read() would then try to copy the counters to that device and pop an inconsistent error when the device didn't exist - Remove accounting entries that are zeroed or invalid: if we're not validating them we need to get rid of them: they might not exist in the superblock, so we need the to trigger the superblock mark path when they're readded. This fixes the replication.ktest rereplicate test, which was failing with "superblock not marked for replicas..." Signed-off-by: Kent Overstreet <[email protected]>
1 parent 9d86178 commit 19773ec

File tree

3 files changed

+118
-37
lines changed

3 files changed

+118
-37
lines changed

fs/bcachefs/disk_accounting.c

Lines changed: 114 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -242,16 +242,22 @@ void bch2_accounting_swab(struct bkey_s k)
242242
*p = swab64(*p);
243243
}
244244

245+
static inline void __accounting_to_replicas(struct bch_replicas_entry_v1 *r,
246+
struct disk_accounting_pos acc)
247+
{
248+
unsafe_memcpy(r, &acc.replicas,
249+
replicas_entry_bytes(&acc.replicas),
250+
"variable length struct");
251+
}
252+
245253
static inline bool accounting_to_replicas(struct bch_replicas_entry_v1 *r, struct bpos p)
246254
{
247255
struct disk_accounting_pos acc_k;
248256
bpos_to_disk_accounting_pos(&acc_k, p);
249257

250258
switch (acc_k.type) {
251259
case BCH_DISK_ACCOUNTING_replicas:
252-
unsafe_memcpy(r, &acc_k.replicas,
253-
replicas_entry_bytes(&acc_k.replicas),
254-
"variable length struct");
260+
__accounting_to_replicas(r, acc_k);
255261
return true;
256262
default:
257263
return false;
@@ -608,6 +614,81 @@ static int accounting_read_key(struct btree_trans *trans, struct bkey_s_c k)
608614
return ret;
609615
}
610616

617+
static int bch2_disk_accounting_validate_late(struct btree_trans *trans,
618+
struct disk_accounting_pos acc,
619+
u64 *v, unsigned nr)
620+
{
621+
struct bch_fs *c = trans->c;
622+
struct printbuf buf = PRINTBUF;
623+
int ret = 0, invalid_dev = -1;
624+
625+
switch (acc.type) {
626+
case BCH_DISK_ACCOUNTING_replicas: {
627+
struct bch_replicas_padded r;
628+
__accounting_to_replicas(&r.e, acc);
629+
630+
for (unsigned i = 0; i < r.e.nr_devs; i++)
631+
if (r.e.devs[i] != BCH_SB_MEMBER_INVALID &&
632+
!bch2_dev_exists(c, r.e.devs[i])) {
633+
invalid_dev = r.e.devs[i];
634+
goto invalid_device;
635+
}
636+
637+
/*
638+
* All replicas entry checks except for invalid device are done
639+
* in bch2_accounting_validate
640+
*/
641+
BUG_ON(bch2_replicas_entry_validate(&r.e, c, &buf));
642+
643+
if (fsck_err_on(!bch2_replicas_marked_locked(c, &r.e),
644+
trans, accounting_replicas_not_marked,
645+
"accounting not marked in superblock replicas\n %s",
646+
(printbuf_reset(&buf),
647+
bch2_accounting_key_to_text(&buf, &acc),
648+
buf.buf))) {
649+
/*
650+
* We're not RW yet and still single threaded, dropping
651+
* and retaking lock is ok:
652+
*/
653+
percpu_up_write(&c->mark_lock);
654+
ret = bch2_mark_replicas(c, &r.e);
655+
if (ret)
656+
goto fsck_err;
657+
percpu_down_write(&c->mark_lock);
658+
}
659+
break;
660+
}
661+
662+
case BCH_DISK_ACCOUNTING_dev_data_type:
663+
if (!bch2_dev_exists(c, acc.dev_data_type.dev)) {
664+
invalid_dev = acc.dev_data_type.dev;
665+
goto invalid_device;
666+
}
667+
break;
668+
}
669+
670+
fsck_err:
671+
printbuf_exit(&buf);
672+
return ret;
673+
invalid_device:
674+
if (fsck_err(trans, accounting_to_invalid_device,
675+
"accounting entry points to invalid device %i\n %s",
676+
invalid_dev,
677+
(printbuf_reset(&buf),
678+
bch2_accounting_key_to_text(&buf, &acc),
679+
buf.buf))) {
680+
for (unsigned i = 0; i < nr; i++)
681+
v[i] = -v[i];
682+
683+
ret = commit_do(trans, NULL, NULL, 0,
684+
bch2_disk_accounting_mod(trans, &acc, v, nr, false)) ?:
685+
-BCH_ERR_remove_disk_accounting_entry;
686+
} else {
687+
ret = -BCH_ERR_remove_disk_accounting_entry;
688+
}
689+
goto fsck_err;
690+
}
691+
611692
/*
612693
* At startup time, initialize the in memory accounting from the btree (and
613694
* journal)
@@ -666,44 +747,42 @@ int bch2_accounting_read(struct bch_fs *c)
666747
}
667748
keys->gap = keys->nr = dst - keys->data;
668749

669-
percpu_down_read(&c->mark_lock);
670-
for (unsigned i = 0; i < acc->k.nr; i++) {
671-
u64 v[BCH_ACCOUNTING_MAX_COUNTERS];
672-
bch2_accounting_mem_read_counters(acc, i, v, ARRAY_SIZE(v), false);
750+
percpu_down_write(&c->mark_lock);
751+
unsigned i = 0;
752+
while (i < acc->k.nr) {
753+
unsigned idx = inorder_to_eytzinger0(i, acc->k.nr);
673754

674-
if (bch2_is_zero(v, sizeof(v[0]) * acc->k.data[i].nr_counters))
675-
continue;
755+
struct disk_accounting_pos acc_k;
756+
bpos_to_disk_accounting_pos(&acc_k, acc->k.data[idx].pos);
676757

677-
struct bch_replicas_padded r;
678-
if (!accounting_to_replicas(&r.e, acc->k.data[i].pos))
679-
continue;
758+
u64 v[BCH_ACCOUNTING_MAX_COUNTERS];
759+
bch2_accounting_mem_read_counters(acc, idx, v, ARRAY_SIZE(v), false);
680760

681761
/*
682-
* If the replicas entry is invalid it'll get cleaned up by
683-
* check_allocations:
762+
* If the entry counters are zeroed, it should be treated as
763+
* nonexistent - it might point to an invalid device.
764+
*
765+
* Remove it, so that if it's re-added it gets re-marked in the
766+
* superblock:
684767
*/
685-
if (bch2_replicas_entry_validate(&r.e, c, &buf))
768+
ret = bch2_is_zero(v, sizeof(v[0]) * acc->k.data[idx].nr_counters)
769+
? -BCH_ERR_remove_disk_accounting_entry
770+
: bch2_disk_accounting_validate_late(trans, acc_k,
771+
v, acc->k.data[idx].nr_counters);
772+
773+
if (ret == -BCH_ERR_remove_disk_accounting_entry) {
774+
free_percpu(acc->k.data[idx].v[0]);
775+
free_percpu(acc->k.data[idx].v[1]);
776+
darray_remove_item(&acc->k, &acc->k.data[idx]);
777+
eytzinger0_sort(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
778+
accounting_pos_cmp, NULL);
779+
ret = 0;
686780
continue;
687-
688-
struct disk_accounting_pos k;
689-
bpos_to_disk_accounting_pos(&k, acc->k.data[i].pos);
690-
691-
if (fsck_err_on(!bch2_replicas_marked_locked(c, &r.e),
692-
trans, accounting_replicas_not_marked,
693-
"accounting not marked in superblock replicas\n %s",
694-
(printbuf_reset(&buf),
695-
bch2_accounting_key_to_text(&buf, &k),
696-
buf.buf))) {
697-
/*
698-
* We're not RW yet and still single threaded, dropping
699-
* and retaking lock is ok:
700-
*/
701-
percpu_up_read(&c->mark_lock);
702-
ret = bch2_mark_replicas(c, &r.e);
703-
if (ret)
704-
goto fsck_err;
705-
percpu_down_read(&c->mark_lock);
706781
}
782+
783+
if (ret)
784+
goto fsck_err;
785+
i++;
707786
}
708787

709788
preempt_disable();
@@ -742,7 +821,7 @@ int bch2_accounting_read(struct bch_fs *c)
742821
}
743822
preempt_enable();
744823
fsck_err:
745-
percpu_up_read(&c->mark_lock);
824+
percpu_up_write(&c->mark_lock);
746825
err:
747826
printbuf_exit(&buf);
748827
bch2_trans_put(trans);

fs/bcachefs/errcode.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,8 @@
268268
x(BCH_ERR_nopromote, nopromote_no_writes) \
269269
x(BCH_ERR_nopromote, nopromote_enomem) \
270270
x(0, invalid_snapshot_node) \
271-
x(0, option_needs_open_fs)
271+
x(0, option_needs_open_fs) \
272+
x(0, remove_disk_accounting_entry)
272273

273274
enum bch_errcode {
274275
BCH_ERR_START = 2048,

fs/bcachefs/sb-errors_format.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,7 @@ enum bch_fsck_flags {
291291
x(alloc_key_stripe_sectors_wrong, 271, FSCK_AUTOFIX) \
292292
x(accounting_mismatch, 272, FSCK_AUTOFIX) \
293293
x(accounting_replicas_not_marked, 273, 0) \
294+
x(accounting_to_invalid_device, 289, 0) \
294295
x(invalid_btree_id, 274, 0) \
295296
x(alloc_key_io_time_bad, 275, 0) \
296297
x(alloc_key_fragmentation_lru_wrong, 276, FSCK_AUTOFIX) \
@@ -300,7 +301,7 @@ enum bch_fsck_flags {
300301
x(accounting_key_replicas_devs_unsorted, 280, FSCK_AUTOFIX) \
301302
x(accounting_key_version_0, 282, FSCK_AUTOFIX) \
302303
x(logged_op_but_clean, 283, FSCK_AUTOFIX) \
303-
x(MAX, 289, 0)
304+
x(MAX, 290, 0)
304305

305306
enum bch_sb_error_id {
306307
#define x(t, n, ...) BCH_FSCK_ERR_##t = n,

0 commit comments

Comments
 (0)