Skip to content

Commit 9d86178

Browse files
author
Kent Overstreet
committed
bcachefs: bch2_inode_or_descendents_is_open()
fsck can now correctly check if inodes in interior snapshot nodes are open/in use. - Tweak the vfs inode rhashtable so that the subvolume ID isn't hashed, meaning inums in different subvolumes will hash to the same slot. Note that this is a hack, and will cause problems if anyone ever has the same file in many different snapshots open all at the same time. - Then check if any of those subvolumes is a descendent of the snapshot ID being checked Signed-off-by: Kent Overstreet <[email protected]>
1 parent 84878e8 commit 9d86178

File tree

4 files changed

+103
-21
lines changed

4 files changed

+103
-21
lines changed

fs/bcachefs/fs.c

Lines changed: 91 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,20 @@ static bool subvol_inum_eq(subvol_inum a, subvol_inum b)
157157
return a.subvol == b.subvol && a.inum == b.inum;
158158
}
159159

160+
static u32 bch2_vfs_inode_hash_fn(const void *data, u32 len, u32 seed)
161+
{
162+
const subvol_inum *inum = data;
163+
164+
return jhash(&inum->inum, sizeof(inum->inum), seed);
165+
}
166+
167+
static u32 bch2_vfs_inode_obj_hash_fn(const void *data, u32 len, u32 seed)
168+
{
169+
const struct bch_inode_info *inode = data;
170+
171+
return bch2_vfs_inode_hash_fn(&inode->ei_inum, sizeof(inode->ei_inum), seed);
172+
}
173+
160174
static int bch2_vfs_inode_cmp_fn(struct rhashtable_compare_arg *arg,
161175
const void *obj)
162176
{
@@ -170,32 +184,93 @@ static const struct rhashtable_params bch2_vfs_inodes_params = {
170184
.head_offset = offsetof(struct bch_inode_info, hash),
171185
.key_offset = offsetof(struct bch_inode_info, ei_inum),
172186
.key_len = sizeof(subvol_inum),
187+
.hashfn = bch2_vfs_inode_hash_fn,
188+
.obj_hashfn = bch2_vfs_inode_obj_hash_fn,
173189
.obj_cmpfn = bch2_vfs_inode_cmp_fn,
174190
.automatic_shrinking = true,
175191
};
176192

177-
static struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *c, subvol_inum inum)
193+
int bch2_inode_or_descendents_is_open(struct btree_trans *trans, struct bpos p)
178194
{
179-
return rhashtable_lookup_fast(&c->vfs_inodes_table, &inum, bch2_vfs_inodes_params);
180-
}
195+
struct bch_fs *c = trans->c;
196+
struct rhashtable *ht = &c->vfs_inodes_table;
197+
subvol_inum inum = (subvol_inum) { .inum = p.offset };
198+
DARRAY(u32) subvols;
199+
int ret = 0;
181200

182-
bool bch2_inode_is_open(struct bch_fs *c, struct bpos p)
183-
{
184201
if (!test_bit(BCH_FS_started, &c->flags))
185202
return false;
186203

187-
subvol_inum inum = {
188-
.subvol = snapshot_t(c, p.snapshot)->subvol,
189-
.inum = p.offset,
190-
};
204+
darray_init(&subvols);
205+
restart_from_top:
206+
207+
/*
208+
* Tweaked version of __rhashtable_lookup(); we need to get a list of
209+
* subvolumes in which the given inode number is open.
210+
*
211+
* For this to work, we don't include the subvolume ID in the key that
212+
* we hash - all inodes with the same inode number regardless of
213+
* subvolume will hash to the same slot.
214+
*
215+
* This will be less than ideal if the same file is ever open
216+
* simultaneously in many different snapshots:
217+
*/
218+
rcu_read_lock();
219+
struct rhash_lock_head __rcu *const *bkt;
220+
struct rhash_head *he;
221+
unsigned int hash;
222+
struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
223+
restart:
224+
hash = rht_key_hashfn(ht, tbl, &inum, bch2_vfs_inodes_params);
225+
bkt = rht_bucket(tbl, hash);
226+
do {
227+
struct bch_inode_info *inode;
228+
229+
rht_for_each_entry_rcu_from(inode, he, rht_ptr_rcu(bkt), tbl, hash, hash) {
230+
if (inode->ei_inum.inum == inum.inum) {
231+
ret = darray_push_gfp(&subvols, inode->ei_inum.subvol,
232+
GFP_NOWAIT|__GFP_NOWARN);
233+
if (ret) {
234+
rcu_read_unlock();
235+
ret = darray_make_room(&subvols, 1);
236+
if (ret)
237+
goto err;
238+
subvols.nr = 0;
239+
goto restart_from_top;
240+
}
241+
}
242+
}
243+
/* An object might have been moved to a different hash chain,
244+
* while we walk along it - better check and retry.
245+
*/
246+
} while (he != RHT_NULLS_MARKER(bkt));
247+
248+
/* Ensure we see any new tables. */
249+
smp_rmb();
250+
251+
tbl = rht_dereference_rcu(tbl->future_tbl, ht);
252+
if (unlikely(tbl))
253+
goto restart;
254+
rcu_read_unlock();
255+
256+
darray_for_each(subvols, i) {
257+
u32 snap;
258+
ret = bch2_subvolume_get_snapshot(trans, *i, &snap);
259+
if (ret)
260+
goto err;
191261

192-
/* snapshot tree interior node, can't safely delete while online (yet) */
193-
if (!inum.subvol) {
194-
bch_warn_ratelimited(c, "%s(): snapshot %u has no subvol, unlinked but can't safely delete", __func__, p.snapshot);
195-
return true;
262+
ret = bch2_snapshot_is_ancestor(c, snap, p.snapshot);
263+
if (ret)
264+
break;
196265
}
266+
err:
267+
darray_exit(&subvols);
268+
return ret;
269+
}
197270

198-
return __bch2_inode_hash_find(c, inum) != NULL;
271+
static struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *c, subvol_inum inum)
272+
{
273+
return rhashtable_lookup_fast(&c->vfs_inodes_table, &inum, bch2_vfs_inodes_params);
199274
}
200275

201276
static void __wait_on_freeing_inode(struct bch_fs *c,
@@ -271,7 +346,8 @@ static struct bch_inode_info *bch2_inode_hash_insert(struct bch_fs *c,
271346

272347
set_bit(EI_INODE_HASHED, &inode->ei_flags);
273348
retry:
274-
if (unlikely(rhashtable_lookup_insert_fast(&c->vfs_inodes_table,
349+
if (unlikely(rhashtable_lookup_insert_key(&c->vfs_inodes_table,
350+
&inode->ei_inum,
275351
&inode->hash,
276352
bch2_vfs_inodes_params))) {
277353
old = bch2_inode_hash_find(c, trans, inode->ei_inum);

fs/bcachefs/fs.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,8 @@ struct bch_inode_info *
146146
__bch2_create(struct mnt_idmap *, struct bch_inode_info *,
147147
struct dentry *, umode_t, dev_t, subvol_inum, unsigned);
148148

149+
int bch2_inode_or_descendents_is_open(struct btree_trans *trans, struct bpos p);
150+
149151
int bch2_fs_quota_transfer(struct bch_fs *,
150152
struct bch_inode_info *,
151153
struct bch_qid,
@@ -179,8 +181,6 @@ void bch2_inode_update_after_write(struct btree_trans *,
179181
int __must_check bch2_write_inode(struct bch_fs *, struct bch_inode_info *,
180182
inode_set_fn, void *, unsigned);
181183

182-
bool bch2_inode_is_open(struct bch_fs *c, struct bpos p);
183-
184184
int bch2_setattr_nonsize(struct mnt_idmap *,
185185
struct bch_inode_info *,
186186
struct iattr *);
@@ -198,7 +198,7 @@ int bch2_vfs_init(void);
198198

199199
#define bch2_inode_update_after_write(_trans, _inode, _inode_u, _fields) ({ do {} while (0); })
200200

201-
static inline bool bch2_inode_is_open(struct bch_fs *c, struct bpos p) { return false; }
201+
static inline int bch2_inode_or_descendents_is_open(struct btree_trans *trans, struct bpos p) { return 0; }
202202

203203
static inline void bch2_evict_subvolume_inodes(struct bch_fs *c,
204204
snapshot_id_list *s) {}

fs/bcachefs/fsck.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1213,14 +1213,19 @@ static int check_inode(struct btree_trans *trans,
12131213
if (ret)
12141214
goto err;
12151215
} else {
1216-
if (fsck_err_on(!bch2_inode_is_open(c, k.k->p),
1216+
ret = bch2_inode_or_descendents_is_open(trans, k.k->p);
1217+
if (ret < 0)
1218+
goto err;
1219+
1220+
if (fsck_err_on(!ret,
12171221
trans, inode_unlinked_and_not_open,
12181222
"inode %llu%u unlinked and not open",
12191223
u.bi_inum, u.bi_snapshot)) {
12201224
ret = bch2_inode_rm_snapshot(trans, u.bi_inum, iter->pos.snapshot);
12211225
bch_err_msg(c, ret, "in fsck deleting inode");
12221226
goto err_noprint;
12231227
}
1228+
ret = 0;
12241229
}
12251230
}
12261231

fs/bcachefs/inode.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1244,8 +1244,9 @@ static int delete_ancestor_snapshot_inodes(struct btree_trans *trans, struct bpo
12441244
if (!unlinked)
12451245
return 0;
12461246

1247-
if (bch2_inode_is_open(trans->c, pos))
1248-
return 0;
1247+
ret = lockrestart_do(trans, bch2_inode_or_descendents_is_open(trans, pos));
1248+
if (ret)
1249+
return ret < 0 ? ret : 0;
12491250

12501251
ret = __bch2_inode_rm_snapshot(trans, pos.offset, pos.snapshot);
12511252
if (ret)

0 commit comments

Comments
 (0)