Skip to content

Commit e49cf9b

Browse files
author
Kent Overstreet
committed
bcachefs: Make check_key_has_snapshot safer
Snapshot deletion v2 added sentinal values for deleted snapshots, so "key for deleted snapshot" - i.e. snapshot deletion missed something - is safe to repair automatically. But if we find a key for a missing snapshot we have no idea what happened, and we shouldn't delete it unless we're very sure that everything else is consistent. So hook it up to the new bch2_require_recovery_pass(), we'll now only delete if snapshots and subvolumes have recenlty been checked. Signed-off-by: Kent Overstreet <[email protected]>
1 parent 0942b85 commit e49cf9b

File tree

2 files changed

+37
-22
lines changed

2 files changed

+37
-22
lines changed

fs/bcachefs/data_update.c

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -822,16 +822,11 @@ int bch2_data_update_init(struct btree_trans *trans,
822822
int ret = 0;
823823

824824
if (k.k->p.snapshot) {
825-
/*
826-
* We'll go ERO if we see a key for a missing snapshot, and if
827-
* we're still in recovery we want to give that a chance to
828-
* repair:
829-
*/
830-
if (unlikely(test_bit(BCH_FS_in_recovery, &c->flags) &&
831-
bch2_snapshot_id_state(c, k.k->p.snapshot) == SNAPSHOT_ID_empty))
832-
return bch_err_throw(c, data_update_done_no_snapshot);
833-
834825
ret = bch2_check_key_has_snapshot(trans, iter, k);
826+
if (bch2_err_matches(ret, BCH_ERR_recovery_will_run)) {
827+
/* Can't repair yet, waiting on other recovery passes */
828+
return bch_err_throw(c, data_update_done_no_snapshot);
829+
}
835830
if (ret < 0)
836831
return ret;
837832
if (ret) /* key was deleted */

fs/bcachefs/snapshot.c

Lines changed: 33 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1045,19 +1045,39 @@ int __bch2_check_key_has_snapshot(struct btree_trans *trans,
10451045
ret = bch2_btree_delete_at(trans, iter,
10461046
BTREE_UPDATE_internal_snapshot_node) ?: 1;
10471047

1048-
/*
1049-
* Snapshot missing: we should have caught this with btree_lost_data and
1050-
* kicked off reconstruct_snapshots, so if we end up here we have no
1051-
* idea what happened:
1052-
*/
1053-
if (fsck_err_on(state == SNAPSHOT_ID_empty,
1054-
trans, bkey_in_missing_snapshot,
1055-
"key in missing snapshot %s, delete?",
1056-
(bch2_btree_id_to_text(&buf, iter->btree_id),
1057-
prt_char(&buf, ' '),
1058-
bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
1059-
ret = bch2_btree_delete_at(trans, iter,
1060-
BTREE_UPDATE_internal_snapshot_node) ?: 1;
1048+
if (state == SNAPSHOT_ID_empty) {
1049+
/*
1050+
* Snapshot missing: we should have caught this with btree_lost_data and
1051+
* kicked off reconstruct_snapshots, so if we end up here we have no
1052+
* idea what happened.
1053+
*
1054+
* Do not delete unless we know that subvolumes and snapshots
1055+
* are consistent:
1056+
*
1057+
* XXX:
1058+
*
1059+
* We could be smarter here, and instead of using the generic
1060+
* recovery pass ratelimiting, track if there have been any
1061+
* changes to the snapshots or inodes btrees since those passes
1062+
* last ran.
1063+
*/
1064+
ret = bch2_require_recovery_pass(c, &buf, BCH_RECOVERY_PASS_check_snapshots) ?: ret;
1065+
ret = bch2_require_recovery_pass(c, &buf, BCH_RECOVERY_PASS_check_subvols) ?: ret;
1066+
1067+
if (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_snapshots))
1068+
ret = bch2_require_recovery_pass(c, &buf, BCH_RECOVERY_PASS_reconstruct_snapshots) ?: ret;
1069+
1070+
unsigned repair_flags = FSCK_CAN_IGNORE | (!ret ? FSCK_CAN_FIX : 0);
1071+
1072+
if (__fsck_err(trans, repair_flags, bkey_in_missing_snapshot,
1073+
"key in missing snapshot %s, delete?",
1074+
(bch2_btree_id_to_text(&buf, iter->btree_id),
1075+
prt_char(&buf, ' '),
1076+
bch2_bkey_val_to_text(&buf, c, k), buf.buf))) {
1077+
ret = bch2_btree_delete_at(trans, iter,
1078+
BTREE_UPDATE_internal_snapshot_node) ?: 1;
1079+
}
1080+
}
10611081
fsck_err:
10621082
printbuf_exit(&buf);
10631083
return ret;

0 commit comments

Comments
 (0)