Skip to content

Commit 38864ec

Browse files
author
Kent Overstreet
committed
bcachefs: reattach_inode() now correctly handles interior snapshot nodes
When we find an unreachable inode, we now reattach it in the oldest version that needs to be reattached (thus avoiding redundant work reattaching every single version), and we now fix up inode -> dirent backpointers in newer versions as needed - or white out the reattaching dirent in newer versions, if the newer version isn't supposed to be reattached. This results in the second verify fsck now passing cleanly after repairing on a user-provided filesystem image with thousands of different snapshots. Reported-by: Christopher Snowhill <[email protected]> Signed-off-by: Kent Overstreet <[email protected]>
1 parent bade971 commit 38864ec

File tree

2 files changed

+158
-20
lines changed

2 files changed

+158
-20
lines changed

fs/bcachefs/btree_iter.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -857,6 +857,14 @@ struct bkey_s_c bch2_btree_iter_peek_and_restart_outlined(struct btree_iter *);
857857
for_each_btree_key_upto_norestart(_trans, _iter, _btree_id, _start,\
858858
SPOS_MAX, _flags, _k, _ret)
859859

860+
#define for_each_btree_key_reverse_norestart(_trans, _iter, _btree_id, \
861+
_start, _flags, _k, _ret) \
862+
for (bch2_trans_iter_init((_trans), &(_iter), (_btree_id), \
863+
(_start), (_flags)); \
864+
(_k) = bch2_btree_iter_peek_prev_type(&(_iter), _flags), \
865+
!((_ret) = bkey_err(_k)) && (_k).k; \
866+
bch2_btree_iter_rewind(&(_iter)))
867+
860868
#define for_each_btree_key_continue_norestart(_iter, _flags, _k, _ret) \
861869
for_each_btree_key_upto_continue_norestart(_iter, SPOS_MAX, _flags, _k, _ret)
862870

fs/bcachefs/fsck.c

Lines changed: 150 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -326,17 +326,54 @@ static int lookup_lostfound(struct btree_trans *trans, u32 snapshot,
326326
return ret;
327327
}
328328

329+
static inline bool inode_should_reattach(struct bch_inode_unpacked *inode)
330+
{
331+
if (inode->bi_inum == BCACHEFS_ROOT_INO &&
332+
inode->bi_subvol == BCACHEFS_ROOT_SUBVOL)
333+
return false;
334+
335+
return !inode->bi_dir && !(inode->bi_flags & BCH_INODE_unlinked);
336+
}
337+
338+
static int maybe_delete_dirent(struct btree_trans *trans, struct bpos d_pos, u32 snapshot)
339+
{
340+
struct btree_iter iter;
341+
struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_dirents,
342+
SPOS(d_pos.inode, d_pos.offset, snapshot),
343+
BTREE_ITER_intent|
344+
BTREE_ITER_with_updates);
345+
int ret = bkey_err(k);
346+
if (ret)
347+
return ret;
348+
349+
if (bpos_eq(k.k->p, d_pos)) {
350+
/*
351+
* delet_at() doesn't work because the update path doesn't
352+
* internally use BTREE_ITER_with_updates yet
353+
*/
354+
struct bkey_i *k = bch2_trans_kmalloc(trans, sizeof(*k));
355+
ret = PTR_ERR_OR_ZERO(k);
356+
if (ret)
357+
goto err;
358+
359+
bkey_init(&k->k);
360+
k->k.type = KEY_TYPE_whiteout;
361+
k->k.p = iter.pos;
362+
ret = bch2_trans_update(trans, &iter, k, BTREE_UPDATE_internal_snapshot_node);
363+
}
364+
err:
365+
bch2_trans_iter_exit(trans, &iter);
366+
return ret;
367+
}
368+
329369
static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked *inode)
330370
{
331371
struct bch_fs *c = trans->c;
332-
struct bch_hash_info dir_hash;
333372
struct bch_inode_unpacked lostfound;
334373
char name_buf[20];
335-
struct qstr name;
336-
u64 dir_offset = 0;
337-
u32 dirent_snapshot = inode->bi_snapshot;
338374
int ret;
339375

376+
u32 dirent_snapshot = inode->bi_snapshot;
340377
if (inode->bi_subvol) {
341378
inode->bi_parent_subvol = BCACHEFS_ROOT_SUBVOL;
342379

@@ -367,9 +404,10 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked *
367404
if (ret)
368405
return ret;
369406

370-
dir_hash = bch2_hash_info_init(c, &lostfound);
407+
struct bch_hash_info dir_hash = bch2_hash_info_init(c, &lostfound);
408+
struct qstr name = (struct qstr) QSTR(name_buf);
371409

372-
name = (struct qstr) QSTR(name_buf);
410+
inode->bi_dir = lostfound.bi_inum;
373411

374412
ret = bch2_dirent_create_snapshot(trans,
375413
inode->bi_parent_subvol, lostfound.bi_inum,
@@ -378,17 +416,70 @@ static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked *
378416
inode_d_type(inode),
379417
&name,
380418
inode->bi_subvol ?: inode->bi_inum,
381-
&dir_offset,
419+
&inode->bi_dir_offset,
382420
STR_HASH_must_create);
383421
if (ret) {
384422
bch_err_msg(c, ret, "error creating dirent");
385423
return ret;
386424
}
387425

388-
inode->bi_dir = lostfound.bi_inum;
389-
inode->bi_dir_offset = dir_offset;
426+
ret = __bch2_fsck_write_inode(trans, inode);
427+
if (ret)
428+
return ret;
429+
430+
/*
431+
* Fix up inodes in child snapshots: if they should also be reattached
432+
* update the backpointer field, if they should not be we need to emit
433+
* whiteouts for the dirent we just created.
434+
*/
435+
if (!inode->bi_subvol && bch2_snapshot_is_leaf(c, inode->bi_snapshot) <= 0) {
436+
snapshot_id_list whiteouts_done;
437+
struct btree_iter iter;
438+
struct bkey_s_c k;
439+
440+
darray_init(&whiteouts_done);
390441

391-
return __bch2_fsck_write_inode(trans, inode);
442+
for_each_btree_key_reverse_norestart(trans, iter,
443+
BTREE_ID_inodes, SPOS(0, inode->bi_inum, inode->bi_snapshot - 1),
444+
BTREE_ITER_all_snapshots|BTREE_ITER_intent, k, ret) {
445+
if (k.k->p.offset != inode->bi_inum)
446+
break;
447+
448+
if (!bkey_is_inode(k.k) ||
449+
!bch2_snapshot_is_ancestor(c, k.k->p.snapshot, inode->bi_snapshot) ||
450+
snapshot_list_has_ancestor(c, &whiteouts_done, k.k->p.snapshot))
451+
continue;
452+
453+
struct bch_inode_unpacked child_inode;
454+
bch2_inode_unpack(k, &child_inode);
455+
456+
if (!inode_should_reattach(&child_inode)) {
457+
ret = maybe_delete_dirent(trans,
458+
SPOS(lostfound.bi_inum, inode->bi_dir_offset,
459+
dirent_snapshot),
460+
k.k->p.snapshot);
461+
if (ret)
462+
break;
463+
464+
ret = snapshot_list_add(c, &whiteouts_done, k.k->p.snapshot);
465+
if (ret)
466+
break;
467+
} else {
468+
iter.snapshot = k.k->p.snapshot;
469+
child_inode.bi_dir = inode->bi_dir;
470+
child_inode.bi_dir_offset = inode->bi_dir_offset;
471+
472+
ret = bch2_inode_write_flags(trans, &iter, &child_inode,
473+
BTREE_UPDATE_internal_snapshot_node);
474+
if (ret)
475+
break;
476+
}
477+
}
478+
darray_exit(&whiteouts_done);
479+
bch2_trans_iter_exit(trans, &iter);
480+
}
481+
482+
return ret;
392483
}
393484

394485
static int remove_backpointer(struct btree_trans *trans,
@@ -1292,11 +1383,49 @@ int bch2_check_inodes(struct bch_fs *c)
12921383
return ret;
12931384
}
12941385

1386+
static int find_oldest_inode_needs_reattach(struct btree_trans *trans,
1387+
struct bch_inode_unpacked *inode)
1388+
{
1389+
struct bch_fs *c = trans->c;
1390+
struct btree_iter iter;
1391+
struct bkey_s_c k;
1392+
int ret = 0;
1393+
1394+
/*
1395+
* We look for inodes to reattach in natural key order, leaves first,
1396+
* but we should do the reattach at the oldest version that needs to be
1397+
* reattached:
1398+
*/
1399+
for_each_btree_key_norestart(trans, iter,
1400+
BTREE_ID_inodes,
1401+
SPOS(0, inode->bi_inum, inode->bi_snapshot + 1),
1402+
BTREE_ITER_all_snapshots, k, ret) {
1403+
if (k.k->p.offset != inode->bi_inum)
1404+
break;
1405+
1406+
if (!bch2_snapshot_is_ancestor(c, inode->bi_snapshot, k.k->p.snapshot))
1407+
continue;
1408+
1409+
if (!bkey_is_inode(k.k))
1410+
break;
1411+
1412+
struct bch_inode_unpacked parent_inode;
1413+
bch2_inode_unpack(k, &parent_inode);
1414+
1415+
if (!inode_should_reattach(&parent_inode))
1416+
break;
1417+
1418+
*inode = parent_inode;
1419+
}
1420+
bch2_trans_iter_exit(trans, &iter);
1421+
1422+
return ret;
1423+
}
1424+
12951425
static int check_unreachable_inode(struct btree_trans *trans,
12961426
struct btree_iter *iter,
12971427
struct bkey_s_c k)
12981428
{
1299-
struct bch_fs *c = trans->c;
13001429
struct printbuf buf = PRINTBUF;
13011430
int ret = 0;
13021431

@@ -1306,18 +1435,17 @@ static int check_unreachable_inode(struct btree_trans *trans,
13061435
struct bch_inode_unpacked inode;
13071436
BUG_ON(bch2_inode_unpack(k, &inode));
13081437

1309-
if (inode.bi_subvol)
1438+
if (!inode_should_reattach(&inode))
13101439
return 0;
13111440

1312-
if (inode.bi_flags & BCH_INODE_unlinked)
1313-
return 0;
1441+
ret = find_oldest_inode_needs_reattach(trans, &inode);
1442+
if (ret)
1443+
return ret;
13141444

1315-
if (fsck_err_on(!inode.bi_dir,
1316-
trans, inode_unreachable,
1317-
"unreachable inode:\n%s",
1318-
(printbuf_reset(&buf),
1319-
bch2_bkey_val_to_text(&buf, c, k),
1320-
buf.buf)))
1445+
if (fsck_err(trans, inode_unreachable,
1446+
"unreachable inode:\n%s",
1447+
(bch2_inode_unpacked_to_text(&buf, &inode),
1448+
buf.buf)))
13211449
ret = reattach_inode(trans, &inode);
13221450
fsck_err:
13231451
printbuf_exit(&buf);
@@ -1331,6 +1459,8 @@ static int check_unreachable_inode(struct btree_trans *trans,
13311459
* backpointer fields point to valid dirents, and every inode that has a dirent
13321460
* that points to it has its backpointer field set - so we're just looking for
13331461
* non-unlinked inodes without backpointers:
1462+
*
1463+
* XXX: this is racy w.r.t. hardlink removal in online fsck
13341464
*/
13351465
int bch2_check_unreachable_inodes(struct bch_fs *c)
13361466
{

0 commit comments

Comments
 (0)