Skip to content

Commit 9b23fdb

Browse files
author
Kent Overstreet
committed
bcachefs: bcachefs_metadata_version_inode_has_child_snapshots
There's an inherent race in taking a snapshot while an unlinked file is open, and then reattaching it in the child snapshot. In the interior snapshot node the file will appear unlinked, as though it should be deleted - it's not referenced by anything in that snapshot - but we can't delete it, because the file data is referenced by the child snapshot. This was being handled incorrectly with propagate_key_to_snapshot_leaves() - but that doesn't resolve the fundamental inconsistency of "this file looks like it should be deleted according to normal rules, but - ". To fix this, we need to fix the rule for when an inode is deleted. The previous rule, ignoring snapshots (there was no well-defined rule for with snapshots) was: Unlinked, non open files are deleted, either at recovery time or during online fsck The new rule is: Unlinked, non open files, that do not exist in child snapshots, are deleted. To make this work transactionally, we add a new inode flag, BCH_INODE_has_child_snapshot; it overrides BCH_INODE_unlinked when considering whether to delete an inode, or put it on the deleted list. For transactional consistency, clearing it handled by the inode trigger: when deleting an inode we check if there are parent inodes which can now have the BCH_INODE_has_child_snapshot flag cleared. Signed-off-by: Kent Overstreet <[email protected]>
1 parent cba31b7 commit 9b23fdb

File tree

9 files changed

+302
-78
lines changed

9 files changed

+302
-78
lines changed

fs/bcachefs/bcachefs_format.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -678,7 +678,8 @@ struct bch_sb_field_ext {
678678
x(disk_accounting_v2, BCH_VERSION(1, 9)) \
679679
x(disk_accounting_v3, BCH_VERSION(1, 10)) \
680680
x(disk_accounting_inum, BCH_VERSION(1, 11)) \
681-
x(rebalance_work_acct_fix, BCH_VERSION(1, 12))
681+
x(rebalance_work_acct_fix, BCH_VERSION(1, 12)) \
682+
x(inode_has_child_snapshots, BCH_VERSION(1, 13))
682683

683684
enum bcachefs_metadata_version {
684685
bcachefs_metadata_version_min = 9,

fs/bcachefs/fs.c

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,11 +174,30 @@ static const struct rhashtable_params bch2_vfs_inodes_params = {
174174
.automatic_shrinking = true,
175175
};
176176

177-
struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *c, subvol_inum inum)
177+
static struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *c, subvol_inum inum)
178178
{
179179
return rhashtable_lookup_fast(&c->vfs_inodes_table, &inum, bch2_vfs_inodes_params);
180180
}
181181

182+
bool bch2_inode_is_open(struct bch_fs *c, struct bpos p)
183+
{
184+
if (!test_bit(BCH_FS_started, &c->flags))
185+
return false;
186+
187+
subvol_inum inum = {
188+
.subvol = snapshot_t(c, p.snapshot)->subvol,
189+
.inum = p.offset,
190+
};
191+
192+
/* snapshot tree interior node, can't safely delete while online (yet) */
193+
if (!inum.subvol) {
194+
bch_warn_ratelimited(c, "%s(): snapshot %u has no subvol, unlinked but can't safely delete", __func__, p.snapshot);
195+
return true;
196+
}
197+
198+
return __bch2_inode_hash_find(c, inum) != NULL;
199+
}
200+
182201
static void __wait_on_freeing_inode(struct bch_fs *c,
183202
struct bch_inode_info *inode,
184203
subvol_inum inum)

fs/bcachefs/fs.h

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,6 @@ static inline subvol_inum inode_inum(struct bch_inode_info *inode)
5454
return inode->ei_inum;
5555
}
5656

57-
struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *, subvol_inum);
58-
5957
/*
6058
* Set if we've gotten a btree error for this inode, and thus the vfs inode and
6159
* btree inode may be inconsistent:
@@ -181,6 +179,8 @@ void bch2_inode_update_after_write(struct btree_trans *,
181179
int __must_check bch2_write_inode(struct bch_fs *, struct bch_inode_info *,
182180
inode_set_fn, void *, unsigned);
183181

182+
bool bch2_inode_is_open(struct bch_fs *c, struct bpos p);
183+
184184
int bch2_setattr_nonsize(struct mnt_idmap *,
185185
struct bch_inode_info *,
186186
struct iattr *);
@@ -198,10 +198,7 @@ int bch2_vfs_init(void);
198198

199199
#define bch2_inode_update_after_write(_trans, _inode, _inode_u, _fields) ({ do {} while (0); })
200200

201-
static inline struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *c, subvol_inum inum)
202-
{
203-
return NULL;
204-
}
201+
static inline bool bch2_inode_is_open(struct bch_fs *c, struct bpos p) { return false; }
205202

206203
static inline void bch2_evict_subvolume_inodes(struct bch_fs *c,
207204
snapshot_id_list *s) {}

fs/bcachefs/fsck.c

Lines changed: 17 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1096,22 +1096,6 @@ static int check_inode_dirent_inode(struct btree_trans *trans,
10961096
return ret;
10971097
}
10981098

1099-
static bool bch2_inode_is_open(struct bch_fs *c, struct bpos p)
1100-
{
1101-
subvol_inum inum = {
1102-
.subvol = snapshot_t(c, p.snapshot)->subvol,
1103-
.inum = p.offset,
1104-
};
1105-
1106-
/* snapshot tree corruption, can't safely delete */
1107-
if (!inum.subvol) {
1108-
bch_warn_ratelimited(c, "%s(): snapshot %u has no subvol, unlinked but can't safely delete", __func__, p.snapshot);
1109-
return true;
1110-
}
1111-
1112-
return __bch2_inode_hash_find(c, inum) != NULL;
1113-
}
1114-
11151099
static int check_inode(struct btree_trans *trans,
11161100
struct btree_iter *iter,
11171101
struct bkey_s_c k,
@@ -1184,28 +1168,27 @@ static int check_inode(struct btree_trans *trans,
11841168
ret = 0;
11851169
}
11861170

1187-
if ((u.bi_flags & BCH_INODE_unlinked) &&
1188-
bch2_key_has_snapshot_overwrites(trans, BTREE_ID_inodes, k.k->p)) {
1189-
struct bpos new_min_pos;
1190-
1191-
ret = bch2_propagate_key_to_snapshot_leaves(trans, iter->btree_id, k, &new_min_pos);
1192-
if (ret)
1193-
goto err;
1194-
1195-
u.bi_flags &= ~BCH_INODE_unlinked;
1196-
1197-
ret = __bch2_fsck_write_inode(trans, &u);
1171+
ret = bch2_inode_has_child_snapshots(trans, k.k->p);
1172+
if (ret < 0)
1173+
goto err;
11981174

1199-
bch_err_msg(c, ret, "in fsck updating inode");
1175+
if (fsck_err_on(ret != !!(u.bi_flags & BCH_INODE_has_child_snapshot),
1176+
trans, inode_has_child_snapshots_wrong,
1177+
"inode has_child_snapshots flag wrong (should be %u)\n%s",
1178+
ret,
1179+
(printbuf_reset(&buf),
1180+
bch2_inode_unpacked_to_text(&buf, &u),
1181+
buf.buf))) {
12001182
if (ret)
1201-
goto err_noprint;
1202-
1203-
if (!bpos_eq(new_min_pos, POS_MIN))
1204-
bch2_btree_iter_set_pos(iter, bpos_predecessor(new_min_pos));
1205-
goto err_noprint;
1183+
u.bi_flags |= BCH_INODE_has_child_snapshot;
1184+
else
1185+
u.bi_flags &= ~BCH_INODE_has_child_snapshot;
1186+
do_update = true;
12061187
}
1188+
ret = 0;
12071189

1208-
if (u.bi_flags & BCH_INODE_unlinked) {
1190+
if ((u.bi_flags & BCH_INODE_unlinked) &&
1191+
!(u.bi_flags & BCH_INODE_has_child_snapshot)) {
12091192
if (!test_bit(BCH_FS_started, &c->flags)) {
12101193
/*
12111194
* If we're not in online fsck, don't delete unlinked

0 commit comments

Comments
 (0)