Skip to content

Commit e1dc191

Browse files
committed
Merge tag 'bcachefs-2024-04-10' of https://evilpiepirate.org/git/bcachefs
Pull more bcachefs fixes from Kent Overstreet: "Notable user impacting bugs - On multi device filesystems, recovery was looping in btree_trans_too_many_iters(). This checks if a transaction has touched too many btree paths (because of iteration over many keys), and isuses a restart to drop unneeded paths. But it's now possible for some paths to exceed the previous limit without iteration in the interior btree update path, since the transaction commit will do alloc updates for every old and new btree node, and during journal replay we don't use the btree write buffer for locking reasons and thus those updates use btree paths when they wouldn't normally. - Fix a corner case in rebalance when moving extents on a durability=0 device. This wouldn't be hit when a device was formatted with durability=0 since in that case we'll only use it as a write through cache (only cached extents will live on it), but durability can now be changed on an existing device. - bch2_get_acl() could rarely forget to handle a transaction restart; this manifested as the occasional missing acl that came back after dropping caches. - Fix a major performance regression on high iops multithreaded write workloads (only since 6.9-rc1); a previous fix for a deadlock in the interior btree update path to check the journal watermark introduced a dependency on the state of btree write buffer flushing that we didn't want. - Assorted other repair paths and recovery fixes" * tag 'bcachefs-2024-04-10' of https://evilpiepirate.org/git/bcachefs: (25 commits) bcachefs: Fix __bch2_btree_and_journal_iter_init_node_iter() bcachefs: Kill read lock dropping in bch2_btree_node_lock_write_nofail() bcachefs: Fix a race in btree_update_nodes_written() bcachefs: btree_node_scan: Respect member.data_allowed bcachefs: Don't scan for btree nodes when we can reconstruct bcachefs: Fix check_topology() when using node scan bcachefs: fix eytzinger0_find_gt() bcachefs: fix bch2_get_acl() transaction restart handling bcachefs: fix the count of nr_freed_pcpu after changing bc->freed_nonpcpu list bcachefs: Fix gap buffer bug in bch2_journal_key_insert_take() bcachefs: Rename struct field swap to prevent macro naming collision MAINTAINERS: Add entry for bcachefs documentation Documentation: filesystems: Add bcachefs toctree bcachefs: JOURNAL_SPACE_LOW bcachefs: Disable errors=panic for BCH_IOCTL_FSCK_OFFLINE bcachefs: Fix BCH_IOCTL_FSCK_OFFLINE for encrypted filesystems bcachefs: fix rand_delete unit test bcachefs: fix ! vs ~ typo in __clear_bit_le64() bcachefs: Fix rebalance from durability=0 device bcachefs: Print shutdown journal sequence number ...
2 parents 346668f + 1189bdd commit e1dc191

27 files changed

+372
-238
lines changed
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
.. SPDX-License-Identifier: GPL-2.0
2+
3+
======================
4+
bcachefs Documentation
5+
======================
6+
7+
.. toctree::
8+
:maxdepth: 2
9+
:numbered:
10+
11+
errorcodes

Documentation/filesystems/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ Documentation for filesystem implementations.
6969
afs
7070
autofs
7171
autofs-mount-control
72+
bcachefs/index
7273
befs
7374
bfs
7475
btrfs

MAINTAINERS

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3573,6 +3573,7 @@ S: Supported
35733573
C: irc://irc.oftc.net/bcache
35743574
T: git https://evilpiepirate.org/git/bcachefs.git
35753575
F: fs/bcachefs/
3576+
F: Documentation/filesystems/bcachefs/
35763577

35773578
BDISP ST MEDIA DRIVER
35783579
M: Fabien Dessenne <fabien.dessenne@foss.st.com>

fs/bcachefs/acl.c

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,6 @@ struct posix_acl *bch2_get_acl(struct mnt_idmap *idmap,
281281
struct xattr_search_key search = X_SEARCH(acl_to_xattr_type(type), "", 0);
282282
struct btree_trans *trans = bch2_trans_get(c);
283283
struct btree_iter iter = { NULL };
284-
struct bkey_s_c_xattr xattr;
285284
struct posix_acl *acl = NULL;
286285
struct bkey_s_c k;
287286
int ret;
@@ -290,28 +289,27 @@ struct posix_acl *bch2_get_acl(struct mnt_idmap *idmap,
290289

291290
ret = bch2_hash_lookup(trans, &iter, bch2_xattr_hash_desc,
292291
&hash, inode_inum(inode), &search, 0);
293-
if (ret) {
294-
if (!bch2_err_matches(ret, ENOENT))
295-
acl = ERR_PTR(ret);
296-
goto out;
297-
}
292+
if (ret)
293+
goto err;
298294

299295
k = bch2_btree_iter_peek_slot(&iter);
300296
ret = bkey_err(k);
301-
if (ret) {
302-
acl = ERR_PTR(ret);
303-
goto out;
304-
}
297+
if (ret)
298+
goto err;
305299

306-
xattr = bkey_s_c_to_xattr(k);
300+
struct bkey_s_c_xattr xattr = bkey_s_c_to_xattr(k);
307301
acl = bch2_acl_from_disk(trans, xattr_val(xattr.v),
308-
le16_to_cpu(xattr.v->x_val_len));
302+
le16_to_cpu(xattr.v->x_val_len));
303+
ret = PTR_ERR_OR_ZERO(acl);
304+
err:
305+
if (bch2_err_matches(ret, BCH_ERR_transaction_restart))
306+
goto retry;
309307

310-
if (!IS_ERR(acl))
308+
if (ret)
309+
acl = !bch2_err_matches(ret, ENOENT) ? ERR_PTR(ret) : NULL;
310+
311+
if (!IS_ERR_OR_NULL(acl))
311312
set_cached_acl(&inode->v, type, acl);
312-
out:
313-
if (bch2_err_matches(PTR_ERR_OR_ZERO(acl), BCH_ERR_transaction_restart))
314-
goto retry;
315313

316314
bch2_trans_iter_exit(trans, &iter);
317315
bch2_trans_put(trans);

fs/bcachefs/bcachefs_format.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1535,6 +1535,20 @@ enum btree_id {
15351535
BTREE_ID_NR
15361536
};
15371537

1538+
static inline bool btree_id_is_alloc(enum btree_id id)
1539+
{
1540+
switch (id) {
1541+
case BTREE_ID_alloc:
1542+
case BTREE_ID_backpointers:
1543+
case BTREE_ID_need_discard:
1544+
case BTREE_ID_freespace:
1545+
case BTREE_ID_bucket_gens:
1546+
return true;
1547+
default:
1548+
return false;
1549+
}
1550+
}
1551+
15381552
#define BTREE_MAX_DEPTH 4U
15391553

15401554
/* Btree nodes */

fs/bcachefs/btree_gc.c

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -368,11 +368,16 @@ static int bch2_btree_repair_topology_recurse(struct btree_trans *trans, struct
368368
buf.buf)) {
369369
bch2_btree_node_evict(trans, cur_k.k);
370370
cur = NULL;
371-
ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?:
372-
bch2_journal_key_delete(c, b->c.btree_id,
373-
b->c.level, cur_k.k->k.p);
371+
ret = bch2_journal_key_delete(c, b->c.btree_id,
372+
b->c.level, cur_k.k->k.p);
374373
if (ret)
375374
break;
375+
376+
if (!btree_id_is_alloc(b->c.btree_id)) {
377+
ret = bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_scan_for_btree_nodes);
378+
if (ret)
379+
break;
380+
}
376381
continue;
377382
}
378383

@@ -544,12 +549,12 @@ int bch2_check_topology(struct bch_fs *c)
544549
bch2_btree_root_alloc_fake(c, i, 0);
545550
} else {
546551
bch2_btree_root_alloc_fake(c, i, 1);
552+
bch2_shoot_down_journal_keys(c, i, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
547553
ret = bch2_get_scanned_nodes(c, i, 0, POS_MIN, SPOS_MAX);
548554
if (ret)
549555
break;
550556
}
551557

552-
bch2_shoot_down_journal_keys(c, i, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
553558
reconstructed_root = true;
554559
}
555560

fs/bcachefs/btree_iter.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -642,7 +642,7 @@ int __bch2_btree_trans_too_many_iters(struct btree_trans *);
642642

643643
static inline int btree_trans_too_many_iters(struct btree_trans *trans)
644644
{
645-
if (bitmap_weight(trans->paths_allocated, trans->nr_paths) > BTREE_ITER_INITIAL - 8)
645+
if (bitmap_weight(trans->paths_allocated, trans->nr_paths) > BTREE_ITER_NORMAL_LIMIT - 8)
646646
return __bch2_btree_trans_too_many_iters(trans);
647647

648648
return 0;

fs/bcachefs/btree_journal_iter.c

Lines changed: 52 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -130,22 +130,45 @@ struct bkey_i *bch2_journal_keys_peek_slot(struct bch_fs *c, enum btree_id btree
130130
return bch2_journal_keys_peek_upto(c, btree_id, level, pos, pos, &idx);
131131
}
132132

133+
static void journal_iter_verify(struct journal_iter *iter)
134+
{
135+
struct journal_keys *keys = iter->keys;
136+
size_t gap_size = keys->size - keys->nr;
137+
138+
BUG_ON(iter->idx >= keys->gap &&
139+
iter->idx < keys->gap + gap_size);
140+
141+
if (iter->idx < keys->size) {
142+
struct journal_key *k = keys->data + iter->idx;
143+
144+
int cmp = cmp_int(k->btree_id, iter->btree_id) ?:
145+
cmp_int(k->level, iter->level);
146+
BUG_ON(cmp < 0);
147+
}
148+
}
149+
133150
static void journal_iters_fix(struct bch_fs *c)
134151
{
135152
struct journal_keys *keys = &c->journal_keys;
136153
/* The key we just inserted is immediately before the gap: */
137154
size_t gap_end = keys->gap + (keys->size - keys->nr);
138-
struct btree_and_journal_iter *iter;
155+
struct journal_key *new_key = &keys->data[keys->gap - 1];
156+
struct journal_iter *iter;
139157

140158
/*
141159
* If an iterator points one after the key we just inserted, decrement
142160
* the iterator so it points at the key we just inserted - if the
143161
* decrement was unnecessary, bch2_btree_and_journal_iter_peek() will
144162
* handle that:
145163
*/
146-
list_for_each_entry(iter, &c->journal_iters, journal.list)
147-
if (iter->journal.idx == gap_end)
148-
iter->journal.idx = keys->gap - 1;
164+
list_for_each_entry(iter, &c->journal_iters, list) {
165+
journal_iter_verify(iter);
166+
if (iter->idx == gap_end &&
167+
new_key->btree_id == iter->btree_id &&
168+
new_key->level == iter->level)
169+
iter->idx = keys->gap - 1;
170+
journal_iter_verify(iter);
171+
}
149172
}
150173

151174
static void journal_iters_move_gap(struct bch_fs *c, size_t old_gap, size_t new_gap)
@@ -192,7 +215,12 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
192215
if (idx > keys->gap)
193216
idx -= keys->size - keys->nr;
194217

218+
size_t old_gap = keys->gap;
219+
195220
if (keys->nr == keys->size) {
221+
journal_iters_move_gap(c, old_gap, keys->size);
222+
old_gap = keys->size;
223+
196224
struct journal_keys new_keys = {
197225
.nr = keys->nr,
198226
.size = max_t(size_t, keys->size, 8) * 2,
@@ -216,7 +244,7 @@ int bch2_journal_key_insert_take(struct bch_fs *c, enum btree_id id,
216244
keys->gap = keys->nr;
217245
}
218246

219-
journal_iters_move_gap(c, keys->gap, idx);
247+
journal_iters_move_gap(c, old_gap, idx);
220248

221249
move_gap(keys, idx);
222250

@@ -301,16 +329,21 @@ static void bch2_journal_iter_advance(struct journal_iter *iter)
301329

302330
static struct bkey_s_c bch2_journal_iter_peek(struct journal_iter *iter)
303331
{
304-
struct journal_key *k = iter->keys->data + iter->idx;
332+
journal_iter_verify(iter);
333+
334+
while (iter->idx < iter->keys->size) {
335+
struct journal_key *k = iter->keys->data + iter->idx;
336+
337+
int cmp = cmp_int(k->btree_id, iter->btree_id) ?:
338+
cmp_int(k->level, iter->level);
339+
if (cmp > 0)
340+
break;
341+
BUG_ON(cmp);
305342

306-
while (k < iter->keys->data + iter->keys->size &&
307-
k->btree_id == iter->btree_id &&
308-
k->level == iter->level) {
309343
if (!k->overwritten)
310344
return bkey_i_to_s_c(k->k);
311345

312346
bch2_journal_iter_advance(iter);
313-
k = iter->keys->data + iter->idx;
314347
}
315348

316349
return bkey_s_c_null;
@@ -330,6 +363,8 @@ static void bch2_journal_iter_init(struct bch_fs *c,
330363
iter->level = level;
331364
iter->keys = &c->journal_keys;
332365
iter->idx = bch2_journal_key_search(&c->journal_keys, id, level, pos);
366+
367+
journal_iter_verify(iter);
333368
}
334369

335370
static struct bkey_s_c bch2_journal_iter_peek_btree(struct btree_and_journal_iter *iter)
@@ -434,10 +469,15 @@ void __bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *trans,
434469
iter->trans = trans;
435470
iter->b = b;
436471
iter->node_iter = node_iter;
437-
bch2_journal_iter_init(trans->c, &iter->journal, b->c.btree_id, b->c.level, pos);
438-
INIT_LIST_HEAD(&iter->journal.list);
439472
iter->pos = b->data->min_key;
440473
iter->at_end = false;
474+
INIT_LIST_HEAD(&iter->journal.list);
475+
476+
if (trans->journal_replay_not_finished) {
477+
bch2_journal_iter_init(trans->c, &iter->journal, b->c.btree_id, b->c.level, pos);
478+
if (!test_bit(BCH_FS_may_go_rw, &trans->c->flags))
479+
list_add(&iter->journal.list, &trans->c->journal_iters);
480+
}
441481
}
442482

443483
/*
@@ -452,9 +492,6 @@ void bch2_btree_and_journal_iter_init_node_iter(struct btree_trans *trans,
452492

453493
bch2_btree_node_iter_init_from_start(&node_iter, b);
454494
__bch2_btree_and_journal_iter_init_node_iter(trans, iter, b, node_iter, b->data->min_key);
455-
if (trans->journal_replay_not_finished &&
456-
!test_bit(BCH_FS_may_go_rw, &trans->c->flags))
457-
list_add(&iter->journal.list, &trans->c->journal_iters);
458495
}
459496

460497
/* sort and dedup all keys in the journal: */

fs/bcachefs/btree_key_cache.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,7 @@ static void bkey_cached_move_to_freelist(struct btree_key_cache *bc,
169169
} else {
170170
mutex_lock(&bc->lock);
171171
list_move_tail(&ck->list, &bc->freed_pcpu);
172+
bc->nr_freed_pcpu++;
172173
mutex_unlock(&bc->lock);
173174
}
174175
}
@@ -245,6 +246,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path,
245246
if (!list_empty(&bc->freed_pcpu)) {
246247
ck = list_last_entry(&bc->freed_pcpu, struct bkey_cached, list);
247248
list_del_init(&ck->list);
249+
bc->nr_freed_pcpu--;
248250
}
249251
mutex_unlock(&bc->lock);
250252
}
@@ -659,7 +661,7 @@ static int btree_key_cache_flush_pos(struct btree_trans *trans,
659661
commit_flags |= BCH_WATERMARK_reclaim;
660662

661663
if (ck->journal.seq != journal_last_seq(j) ||
662-
j->watermark == BCH_WATERMARK_stripe)
664+
!test_bit(JOURNAL_SPACE_LOW, &c->journal.flags))
663665
commit_flags |= BCH_TRANS_COMMIT_no_journal_res;
664666

665667
ret = bch2_btree_iter_traverse(&b_iter) ?:

fs/bcachefs/btree_locking.c

Lines changed: 1 addition & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -440,33 +440,7 @@ void bch2_btree_node_lock_write_nofail(struct btree_trans *trans,
440440
struct btree_path *path,
441441
struct btree_bkey_cached_common *b)
442442
{
443-
struct btree_path *linked;
444-
unsigned i, iter;
445-
int ret;
446-
447-
/*
448-
* XXX BIG FAT NOTICE
449-
*
450-
* Drop all read locks before taking a write lock:
451-
*
452-
* This is a hack, because bch2_btree_node_lock_write_nofail() is a
453-
* hack - but by dropping read locks first, this should never fail, and
454-
* we only use this in code paths where whatever read locks we've
455-
* already taken are no longer needed:
456-
*/
457-
458-
trans_for_each_path(trans, linked, iter) {
459-
if (!linked->nodes_locked)
460-
continue;
461-
462-
for (i = 0; i < BTREE_MAX_DEPTH; i++)
463-
if (btree_node_read_locked(linked, i)) {
464-
btree_node_unlock(trans, linked, i);
465-
btree_path_set_dirty(linked, BTREE_ITER_NEED_RELOCK);
466-
}
467-
}
468-
469-
ret = __btree_node_lock_write(trans, path, b, true);
443+
int ret = __btree_node_lock_write(trans, path, b, true);
470444
BUG_ON(ret);
471445
}
472446

0 commit comments

Comments
 (0)