Skip to content

Commit bc6d2d1

Browse files
author
Kent Overstreet
committed
bcachefs: fsck: Improve hash_check_key()
hash_check_key() checks and repairs the hash table btrees: dirents and xattrs are open addressing hash tables. We recently had a corruption reported where the hash type on an inode somehow got flipped, which made the existing dirents invisible and allowed new ones to be created with the same name. Now, hash_check_key() can repair duplicates: it will delete one of them, if it has an xattr or dangling dirent, but if it has two valid dirents one of them gets renamed. Signed-off-by: Kent Overstreet <[email protected]>
1 parent dc96656 commit bc6d2d1

File tree

3 files changed

+194
-50
lines changed

3 files changed

+194
-50
lines changed

fs/bcachefs/dirent.c

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -250,13 +250,6 @@ int bch2_dirent_create(struct btree_trans *trans, subvol_inum dir,
250250
return ret;
251251
}
252252

253-
static void dirent_copy_target(struct bkey_i_dirent *dst,
254-
struct bkey_s_c_dirent src)
255-
{
256-
dst->v.d_inum = src.v->d_inum;
257-
dst->v.d_type = src.v->d_type;
258-
}
259-
260253
int bch2_dirent_read_target(struct btree_trans *trans, subvol_inum dir,
261254
struct bkey_s_c_dirent d, subvol_inum *target)
262255
{

fs/bcachefs/dirent.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,13 @@ static inline unsigned dirent_val_u64s(unsigned len)
3434
int bch2_dirent_read_target(struct btree_trans *, subvol_inum,
3535
struct bkey_s_c_dirent, subvol_inum *);
3636

37+
static inline void dirent_copy_target(struct bkey_i_dirent *dst,
38+
struct bkey_s_c_dirent src)
39+
{
40+
dst->v.d_inum = src.v->d_inum;
41+
dst->v.d_type = src.v->d_type;
42+
}
43+
3744
int bch2_dirent_create_snapshot(struct btree_trans *, u32, u64, u32,
3845
const struct bch_hash_info *, u8,
3946
const struct qstr *, u64, u64 *,

fs/bcachefs/fsck.c

Lines changed: 187 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -929,35 +929,138 @@ static int get_visible_inodes(struct btree_trans *trans,
929929
return ret;
930930
}
931931

932-
static int hash_redo_key(struct btree_trans *trans,
933-
const struct bch_hash_desc desc,
934-
struct bch_hash_info *hash_info,
935-
struct btree_iter *k_iter, struct bkey_s_c k)
936-
{
937-
struct bkey_i *delete;
938-
struct bkey_i *tmp;
939-
940-
delete = bch2_trans_kmalloc(trans, sizeof(*delete));
941-
if (IS_ERR(delete))
942-
return PTR_ERR(delete);
943-
944-
tmp = bch2_bkey_make_mut_noupdate(trans, k);
945-
if (IS_ERR(tmp))
946-
return PTR_ERR(tmp);
947-
948-
bkey_init(&delete->k);
949-
delete->k.p = k_iter->pos;
950-
return bch2_btree_iter_traverse(k_iter) ?:
951-
bch2_trans_update(trans, k_iter, delete, 0) ?:
952-
bch2_hash_set_in_snapshot(trans, desc, hash_info,
953-
(subvol_inum) { 0, k.k->p.inode },
954-
k.k->p.snapshot, tmp,
955-
STR_HASH_must_create|
956-
BTREE_UPDATE_internal_snapshot_node) ?:
957-
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc);
932+
static int dirent_has_target(struct btree_trans *trans, struct bkey_s_c_dirent d)
933+
{
934+
if (d.v->d_type == DT_SUBVOL) {
935+
u32 snap;
936+
u64 inum;
937+
int ret = subvol_lookup(trans, le32_to_cpu(d.v->d_child_subvol), &snap, &inum);
938+
if (ret && !bch2_err_matches(ret, ENOENT))
939+
return ret;
940+
return !ret;
941+
} else {
942+
struct btree_iter iter;
943+
struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes,
944+
SPOS(0, le64_to_cpu(d.v->d_inum), d.k->p.snapshot), 0);
945+
int ret = bkey_err(k);
946+
if (ret)
947+
return ret;
948+
949+
ret = bkey_is_inode(k.k);
950+
bch2_trans_iter_exit(trans, &iter);
951+
return ret;
952+
}
953+
}
954+
955+
/*
956+
* Prefer to delete the first one, since that will be the one at the wrong
957+
* offset:
958+
* return value: 0 -> delete k1, 1 -> delete k2
959+
*/
960+
static int hash_pick_winner(struct btree_trans *trans,
961+
const struct bch_hash_desc desc,
962+
struct bch_hash_info *hash_info,
963+
struct bkey_s_c k1,
964+
struct bkey_s_c k2)
965+
{
966+
if (bkey_val_bytes(k1.k) == bkey_val_bytes(k2.k) &&
967+
!memcmp(k1.v, k2.v, bkey_val_bytes(k1.k)))
968+
return 0;
969+
970+
switch (desc.btree_id) {
971+
case BTREE_ID_dirents: {
972+
int ret = dirent_has_target(trans, bkey_s_c_to_dirent(k1));
973+
if (ret < 0)
974+
return ret;
975+
if (!ret)
976+
return 0;
977+
978+
ret = dirent_has_target(trans, bkey_s_c_to_dirent(k2));
979+
if (ret < 0)
980+
return ret;
981+
if (!ret)
982+
return 1;
983+
return 2;
984+
}
985+
default:
986+
return 0;
987+
}
988+
}
989+
990+
static int fsck_update_backpointers(struct btree_trans *trans,
991+
struct snapshots_seen *s,
992+
const struct bch_hash_desc desc,
993+
struct bch_hash_info *hash_info,
994+
struct bkey_i *new)
995+
{
996+
if (new->k.type != KEY_TYPE_dirent)
997+
return 0;
998+
999+
struct bkey_i_dirent *d = bkey_i_to_dirent(new);
1000+
struct inode_walker target = inode_walker_init();
1001+
int ret = 0;
1002+
1003+
if (d->v.d_type == DT_SUBVOL) {
1004+
BUG();
1005+
} else {
1006+
ret = get_visible_inodes(trans, &target, s, le64_to_cpu(d->v.d_inum));
1007+
if (ret)
1008+
goto err;
1009+
1010+
darray_for_each(target.inodes, i) {
1011+
i->inode.bi_dir_offset = d->k.p.offset;
1012+
ret = __bch2_fsck_write_inode(trans, &i->inode);
1013+
if (ret)
1014+
goto err;
1015+
}
1016+
}
1017+
err:
1018+
inode_walker_exit(&target);
1019+
return ret;
1020+
}
1021+
1022+
static int fsck_rename_dirent(struct btree_trans *trans,
1023+
struct snapshots_seen *s,
1024+
const struct bch_hash_desc desc,
1025+
struct bch_hash_info *hash_info,
1026+
struct bkey_s_c_dirent old)
1027+
{
1028+
struct qstr old_name = bch2_dirent_get_name(old);
1029+
struct bkey_i_dirent *new = bch2_trans_kmalloc(trans, bkey_bytes(old.k) + 32);
1030+
int ret = PTR_ERR_OR_ZERO(new);
1031+
if (ret)
1032+
return ret;
1033+
1034+
bkey_dirent_init(&new->k_i);
1035+
dirent_copy_target(new, old);
1036+
new->k.p = old.k->p;
1037+
1038+
for (unsigned i = 0; i < 1000; i++) {
1039+
unsigned len = sprintf(new->v.d_name, "%.*s.fsck_renamed-%u",
1040+
old_name.len, old_name.name, i);
1041+
unsigned u64s = BKEY_U64s + dirent_val_u64s(len);
1042+
1043+
if (u64s > U8_MAX)
1044+
return -EINVAL;
1045+
1046+
new->k.u64s = u64s;
1047+
1048+
ret = bch2_hash_set_in_snapshot(trans, bch2_dirent_hash_desc, hash_info,
1049+
(subvol_inum) { 0, old.k->p.inode },
1050+
old.k->p.snapshot, &new->k_i,
1051+
BTREE_UPDATE_internal_snapshot_node);
1052+
if (!bch2_err_matches(ret, EEXIST))
1053+
break;
1054+
}
1055+
1056+
if (ret)
1057+
return ret;
1058+
1059+
return fsck_update_backpointers(trans, s, desc, hash_info, &new->k_i);
9581060
}
9591061

9601062
static int hash_check_key(struct btree_trans *trans,
1063+
struct snapshots_seen *s,
9611064
const struct bch_hash_desc desc,
9621065
struct bch_hash_info *hash_info,
9631066
struct btree_iter *k_iter, struct bkey_s_c hash_k)
@@ -986,16 +1089,9 @@ static int hash_check_key(struct btree_trans *trans,
9861089
if (bkey_eq(k.k->p, hash_k.k->p))
9871090
break;
9881091

989-
if (fsck_err_on(k.k->type == desc.key_type &&
990-
!desc.cmp_bkey(k, hash_k),
991-
trans, hash_table_key_duplicate,
992-
"duplicate hash table keys:\n%s",
993-
(printbuf_reset(&buf),
994-
bch2_bkey_val_to_text(&buf, c, hash_k),
995-
buf.buf))) {
996-
ret = bch2_hash_delete_at(trans, desc, hash_info, k_iter, 0) ?: 1;
997-
break;
998-
}
1092+
if (k.k->type == desc.key_type &&
1093+
!desc.cmp_bkey(k, hash_k))
1094+
goto duplicate_entries;
9991095

10001096
if (bkey_deleted(k.k)) {
10011097
bch2_trans_iter_exit(trans, &iter);
@@ -1008,18 +1104,66 @@ static int hash_check_key(struct btree_trans *trans,
10081104
return ret;
10091105
bad_hash:
10101106
if (fsck_err(trans, hash_table_key_wrong_offset,
1011-
"hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n%s",
1107+
"hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n %s",
10121108
bch2_btree_id_str(desc.btree_id), hash_k.k->p.inode, hash_k.k->p.offset, hash,
10131109
(printbuf_reset(&buf),
10141110
bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf))) {
1015-
ret = hash_redo_key(trans, desc, hash_info, k_iter, hash_k);
1016-
bch_err_fn(c, ret);
1111+
struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, hash_k);
1112+
if (IS_ERR(new))
1113+
return PTR_ERR(new);
1114+
1115+
k = bch2_hash_set_or_get_in_snapshot(trans, &iter, desc, hash_info,
1116+
(subvol_inum) { 0, hash_k.k->p.inode },
1117+
hash_k.k->p.snapshot, new,
1118+
STR_HASH_must_create|
1119+
BTREE_ITER_with_updates|
1120+
BTREE_UPDATE_internal_snapshot_node);
1121+
ret = bkey_err(k);
10171122
if (ret)
1018-
return ret;
1019-
ret = -BCH_ERR_transaction_restart_nested;
1123+
goto out;
1124+
if (k.k)
1125+
goto duplicate_entries;
1126+
1127+
ret = bch2_hash_delete_at(trans, desc, hash_info, k_iter,
1128+
BTREE_UPDATE_internal_snapshot_node) ?:
1129+
fsck_update_backpointers(trans, s, desc, hash_info, new) ?:
1130+
bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?:
1131+
-BCH_ERR_transaction_restart_nested;
1132+
goto out;
10201133
}
10211134
fsck_err:
10221135
goto out;
1136+
duplicate_entries:
1137+
ret = hash_pick_winner(trans, desc, hash_info, hash_k, k);
1138+
if (ret < 0)
1139+
goto out;
1140+
1141+
if (!fsck_err(trans, hash_table_key_duplicate,
1142+
"duplicate hash table keys%s:\n%s",
1143+
ret != 2 ? "" : ", both point to valid inodes",
1144+
(printbuf_reset(&buf),
1145+
bch2_bkey_val_to_text(&buf, c, hash_k),
1146+
prt_newline(&buf),
1147+
bch2_bkey_val_to_text(&buf, c, k),
1148+
buf.buf)))
1149+
goto out;
1150+
1151+
switch (ret) {
1152+
case 0:
1153+
ret = bch2_hash_delete_at(trans, desc, hash_info, k_iter, 0);
1154+
break;
1155+
case 1:
1156+
ret = bch2_hash_delete_at(trans, desc, hash_info, &iter, 0);
1157+
break;
1158+
case 2:
1159+
ret = fsck_rename_dirent(trans, s, desc, hash_info, bkey_s_c_to_dirent(hash_k)) ?:
1160+
bch2_hash_delete_at(trans, desc, hash_info, k_iter, 0);
1161+
goto out;
1162+
}
1163+
1164+
ret = bch2_trans_commit(trans, NULL, NULL, 0) ?:
1165+
-BCH_ERR_transaction_restart_nested;
1166+
goto out;
10231167
}
10241168

10251169
static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans,
@@ -2336,7 +2480,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
23362480
*hash_info = bch2_hash_info_init(c, &i->inode);
23372481
dir->first_this_inode = false;
23382482

2339-
ret = hash_check_key(trans, bch2_dirent_hash_desc, hash_info, iter, k);
2483+
ret = hash_check_key(trans, s, bch2_dirent_hash_desc, hash_info, iter, k);
23402484
if (ret < 0)
23412485
goto err;
23422486
if (ret) {
@@ -2450,7 +2594,7 @@ static int check_xattr(struct btree_trans *trans, struct btree_iter *iter,
24502594
*hash_info = bch2_hash_info_init(c, &i->inode);
24512595
inode->first_this_inode = false;
24522596

2453-
ret = hash_check_key(trans, bch2_xattr_hash_desc, hash_info, iter, k);
2597+
ret = hash_check_key(trans, NULL, bch2_xattr_hash_desc, hash_info, iter, k);
24542598
bch_err_fn(c, ret);
24552599
return ret;
24562600
}

0 commit comments

Comments
 (0)