Skip to content

Commit 1854c7f

Browse files
committed
Merge tag 'bcachefs-2025-02-12' of git://evilpiepirate.org/bcachefs
Pull bcachefs fixes from Kent Overstreet: "Just small stuff. As a general announcement, on disk format is now frozen in my master branch - future on disk format changes will be optional, not required. - More fixes for going read-only: the previous fix was insufficient, but with more work on ordering journal reclaim flushing (and a btree node accounting fix so we don't split until we have to) the tiering_replication test now consistently goes read-only in less than a second. - fix for fsck when we have reflink pointers to missing indirect extents - some transaction restart handling fixes from Alan; the "Pass _orig_restart_count to trans_was_restarted" likely fixes some rare undefined behaviour heisenbugs" * tag 'bcachefs-2025-02-12' of git://evilpiepirate.org/bcachefs: bcachefs: Reuse transaction bcachefs: Pass _orig_restart_count to trans_was_restarted bcachefs: CONFIG_BCACHEFS_INJECT_TRANSACTION_RESTARTS bcachefs: Fix want_new_bset() so we write until the end of the btree node bcachefs: Split out journal pins by btree level bcachefs: Fix use after free bcachefs: Fix marking reflink pointers to missing indirect extents
2 parents ab68d7e + 406e445 commit 1854c7f

12 files changed

+100
-27
lines changed

fs/bcachefs/Kconfig

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,13 @@ config BCACHEFS_DEBUG
6161
The resulting code will be significantly slower than normal; you
6262
probably shouldn't select this option unless you're a developer.
6363

64+
config BCACHEFS_INJECT_TRANSACTION_RESTARTS
65+
bool "Randomly inject transaction restarts"
66+
depends on BCACHEFS_DEBUG
67+
help
68+
Randomly inject transaction restarts in a few core paths - may have a
69+
significant performance penalty
70+
6471
config BCACHEFS_TESTS
6572
bool "bcachefs unit and performance tests"
6673
depends on BCACHEFS_FS

fs/bcachefs/btree_iter.c

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2357,6 +2357,12 @@ struct bkey_s_c bch2_btree_iter_peek_max(struct btree_iter *iter, struct bpos en
23572357
bch2_btree_iter_verify_entry_exit(iter);
23582358
EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && bkey_eq(end, POS_MAX));
23592359

2360+
ret = trans_maybe_inject_restart(trans, _RET_IP_);
2361+
if (unlikely(ret)) {
2362+
k = bkey_s_c_err(ret);
2363+
goto out_no_locked;
2364+
}
2365+
23602366
if (iter->update_path) {
23612367
bch2_path_put_nokeep(trans, iter->update_path,
23622368
iter->flags & BTREE_ITER_intent);
@@ -2622,6 +2628,12 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_iter *iter, struct bp
26222628
bch2_btree_iter_verify_entry_exit(iter);
26232629
EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && bpos_eq(end, POS_MIN));
26242630

2631+
int ret = trans_maybe_inject_restart(trans, _RET_IP_);
2632+
if (unlikely(ret)) {
2633+
k = bkey_s_c_err(ret);
2634+
goto out_no_locked;
2635+
}
2636+
26252637
while (1) {
26262638
k = __bch2_btree_iter_peek_prev(iter, search_key);
26272639
if (unlikely(!k.k))
@@ -2749,6 +2761,12 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
27492761
bch2_btree_iter_verify_entry_exit(iter);
27502762
EBUG_ON(btree_iter_path(trans, iter)->level && (iter->flags & BTREE_ITER_with_key_cache));
27512763

2764+
ret = trans_maybe_inject_restart(trans, _RET_IP_);
2765+
if (unlikely(ret)) {
2766+
k = bkey_s_c_err(ret);
2767+
goto out_no_locked;
2768+
}
2769+
27522770
/* extents can't span inode numbers: */
27532771
if ((iter->flags & BTREE_ITER_is_extents) &&
27542772
unlikely(iter->pos.offset == KEY_OFFSET_MAX)) {
@@ -3106,6 +3124,10 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
31063124

31073125
WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX);
31083126

3127+
ret = trans_maybe_inject_restart(trans, _RET_IP_);
3128+
if (ret)
3129+
return ERR_PTR(ret);
3130+
31093131
struct btree_transaction_stats *s = btree_trans_stats(trans);
31103132
s->max_mem = max(s->max_mem, new_bytes);
31113133

@@ -3163,7 +3185,8 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
31633185

31643186
if (old_bytes) {
31653187
trace_and_count(c, trans_restart_mem_realloced, trans, _RET_IP_, new_bytes);
3166-
return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_mem_realloced));
3188+
return ERR_PTR(btree_trans_restart_ip(trans,
3189+
BCH_ERR_transaction_restart_mem_realloced, _RET_IP_));
31673190
}
31683191
out_change_top:
31693192
p = trans->mem + trans->mem_top;
@@ -3271,6 +3294,14 @@ u32 bch2_trans_begin(struct btree_trans *trans)
32713294

32723295
trans->last_begin_ip = _RET_IP_;
32733296

3297+
#ifdef CONFIG_BCACHEFS_INJECT_TRANSACTION_RESTARTS
3298+
if (trans->restarted) {
3299+
trans->restart_count_this_trans++;
3300+
} else {
3301+
trans->restart_count_this_trans = 0;
3302+
}
3303+
#endif
3304+
32743305
trans_set_locked(trans, false);
32753306

32763307
if (trans->restarted) {

fs/bcachefs/btree_iter.h

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -355,6 +355,18 @@ static int btree_trans_restart(struct btree_trans *trans, int err)
355355
return btree_trans_restart_ip(trans, err, _THIS_IP_);
356356
}
357357

358+
static inline int trans_maybe_inject_restart(struct btree_trans *trans, unsigned long ip)
359+
{
360+
#ifdef CONFIG_BCACHEFS_INJECT_TRANSACTION_RESTARTS
361+
if (!(ktime_get_ns() & ~(~0ULL << min(63, (10 + trans->restart_count_this_trans))))) {
362+
trace_and_count(trans->c, trans_restart_injected, trans, ip);
363+
return btree_trans_restart_ip(trans,
364+
BCH_ERR_transaction_restart_fault_inject, ip);
365+
}
366+
#endif
367+
return 0;
368+
}
369+
358370
bool bch2_btree_node_upgrade(struct btree_trans *,
359371
struct btree_path *, unsigned);
360372

@@ -739,7 +751,7 @@ transaction_restart: \
739751
if (!_ret2) \
740752
bch2_trans_verify_not_restarted(_trans, _restart_count);\
741753
\
742-
_ret2 ?: trans_was_restarted(_trans, _restart_count); \
754+
_ret2 ?: trans_was_restarted(_trans, _orig_restart_count); \
743755
})
744756

745757
#define for_each_btree_key_max_continue(_trans, _iter, \

fs/bcachefs/btree_trans_commit.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -999,6 +999,10 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
999999

10001000
bch2_trans_verify_not_unlocked_or_in_restart(trans);
10011001

1002+
ret = trans_maybe_inject_restart(trans, _RET_IP_);
1003+
if (unlikely(ret))
1004+
goto out_reset;
1005+
10021006
if (!trans->nr_updates &&
10031007
!trans->journal_entries_u64s)
10041008
goto out_reset;

fs/bcachefs/btree_types.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -509,6 +509,9 @@ struct btree_trans {
509509
bool notrace_relock_fail:1;
510510
enum bch_errcode restarted:16;
511511
u32 restart_count;
512+
#ifdef CONFIG_BCACHEFS_INJECT_TRANSACTION_RESTARTS
513+
u32 restart_count_this_trans;
514+
#endif
512515

513516
u64 last_begin_time;
514517
unsigned long last_begin_ip;

fs/bcachefs/btree_update_interior.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -278,12 +278,12 @@ static inline struct btree_node_entry *want_new_bset(struct bch_fs *c, struct bt
278278
{
279279
struct bset_tree *t = bset_tree_last(b);
280280
struct btree_node_entry *bne = max(write_block(b),
281-
(void *) btree_bkey_last(b, bset_tree_last(b)));
281+
(void *) btree_bkey_last(b, t));
282282
ssize_t remaining_space =
283283
__bch2_btree_u64s_remaining(b, bne->keys.start);
284284

285285
if (unlikely(bset_written(b, bset(b, t)))) {
286-
if (remaining_space > (ssize_t) (block_bytes(c) >> 3))
286+
if (b->written + block_sectors(c) <= btree_sectors(c))
287287
return bne;
288288
} else {
289289
if (unlikely(bset_u64s(t) * sizeof(u64) > btree_write_set_buffer(b)) &&

fs/bcachefs/disk_accounting.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,11 +210,13 @@ static inline void bch2_accounting_mem_read_counters(struct bch_accounting_mem *
210210
static inline void bch2_accounting_mem_read(struct bch_fs *c, struct bpos p,
211211
u64 *v, unsigned nr)
212212
{
213+
percpu_down_read(&c->mark_lock);
213214
struct bch_accounting_mem *acc = &c->accounting;
214215
unsigned idx = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
215216
accounting_pos_cmp, &p);
216217

217218
bch2_accounting_mem_read_counters(acc, idx, v, nr, false);
219+
percpu_up_read(&c->mark_lock);
218220
}
219221

220222
static inline struct bversion journal_pos_to_bversion(struct journal_res *res, unsigned offset)

fs/bcachefs/io_write.c

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -411,6 +411,16 @@ void bch2_write_op_error(struct printbuf *out, struct bch_write_op *op)
411411
__bch2_write_op_error(out, op, op->pos.offset);
412412
}
413413

414+
static void bch2_write_op_error_trans(struct btree_trans *trans, struct printbuf *out,
415+
struct bch_write_op *op, u64 offset)
416+
{
417+
bch2_inum_offset_err_msg_trans(trans, out,
418+
(subvol_inum) { op->subvol, op->pos.inode, },
419+
offset << 9);
420+
prt_printf(out, "write error%s: ",
421+
op->flags & BCH_WRITE_MOVE ? "(internal move)" : "");
422+
}
423+
414424
void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
415425
enum bch_data_type type,
416426
const struct bkey_i *k,
@@ -1193,7 +1203,7 @@ static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op)
11931203
struct bkey_i *insert = bch2_keylist_front(&op->insert_keys);
11941204

11951205
struct printbuf buf = PRINTBUF;
1196-
__bch2_write_op_error(&buf, op, bkey_start_offset(&insert->k));
1206+
bch2_write_op_error_trans(trans, &buf, op, bkey_start_offset(&insert->k));
11971207
prt_printf(&buf, "btree update error: %s", bch2_err_str(ret));
11981208
bch_err_ratelimited(c, "%s", buf.buf);
11991209
printbuf_exit(&buf);

fs/bcachefs/journal_reclaim.c

Lines changed: 18 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -384,12 +384,16 @@ void bch2_journal_pin_drop(struct journal *j,
384384
spin_unlock(&j->lock);
385385
}
386386

387-
static enum journal_pin_type journal_pin_type(journal_pin_flush_fn fn)
387+
static enum journal_pin_type journal_pin_type(struct journal_entry_pin *pin,
388+
journal_pin_flush_fn fn)
388389
{
389390
if (fn == bch2_btree_node_flush0 ||
390-
fn == bch2_btree_node_flush1)
391-
return JOURNAL_PIN_TYPE_btree;
392-
else if (fn == bch2_btree_key_cache_journal_flush)
391+
fn == bch2_btree_node_flush1) {
392+
unsigned idx = fn == bch2_btree_node_flush1;
393+
struct btree *b = container_of(pin, struct btree, writes[idx].journal);
394+
395+
return JOURNAL_PIN_TYPE_btree0 - b->c.level;
396+
} else if (fn == bch2_btree_key_cache_journal_flush)
393397
return JOURNAL_PIN_TYPE_key_cache;
394398
else
395399
return JOURNAL_PIN_TYPE_other;
@@ -441,7 +445,7 @@ void bch2_journal_pin_copy(struct journal *j,
441445

442446
bool reclaim = __journal_pin_drop(j, dst);
443447

444-
bch2_journal_pin_set_locked(j, seq, dst, flush_fn, journal_pin_type(flush_fn));
448+
bch2_journal_pin_set_locked(j, seq, dst, flush_fn, journal_pin_type(dst, flush_fn));
445449

446450
if (reclaim)
447451
bch2_journal_reclaim_fast(j);
@@ -465,7 +469,7 @@ void bch2_journal_pin_set(struct journal *j, u64 seq,
465469

466470
bool reclaim = __journal_pin_drop(j, pin);
467471

468-
bch2_journal_pin_set_locked(j, seq, pin, flush_fn, journal_pin_type(flush_fn));
472+
bch2_journal_pin_set_locked(j, seq, pin, flush_fn, journal_pin_type(pin, flush_fn));
469473

470474
if (reclaim)
471475
bch2_journal_reclaim_fast(j);
@@ -587,7 +591,7 @@ static size_t journal_flush_pins(struct journal *j,
587591
spin_lock(&j->lock);
588592
/* Pin might have been dropped or rearmed: */
589593
if (likely(!err && !j->flush_in_progress_dropped))
590-
list_move(&pin->list, &journal_seq_pin(j, seq)->flushed[journal_pin_type(flush_fn)]);
594+
list_move(&pin->list, &journal_seq_pin(j, seq)->flushed[journal_pin_type(pin, flush_fn)]);
591595
j->flush_in_progress = NULL;
592596
j->flush_in_progress_dropped = false;
593597
spin_unlock(&j->lock);
@@ -869,18 +873,13 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush,
869873

870874
mutex_lock(&j->reclaim_lock);
871875

872-
if (journal_flush_pins_or_still_flushing(j, seq_to_flush,
873-
BIT(JOURNAL_PIN_TYPE_key_cache)|
874-
BIT(JOURNAL_PIN_TYPE_other))) {
875-
*did_work = true;
876-
goto unlock;
877-
}
878-
879-
if (journal_flush_pins_or_still_flushing(j, seq_to_flush,
880-
BIT(JOURNAL_PIN_TYPE_btree))) {
881-
*did_work = true;
882-
goto unlock;
883-
}
876+
for (int type = JOURNAL_PIN_TYPE_NR - 1;
877+
type >= 0;
878+
--type)
879+
if (journal_flush_pins_or_still_flushing(j, seq_to_flush, BIT(type))) {
880+
*did_work = true;
881+
goto unlock;
882+
}
884883

885884
if (seq_to_flush > journal_cur_seq(j))
886885
bch2_journal_entry_close(j);

fs/bcachefs/journal_types.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,10 @@ struct journal_buf {
5353
*/
5454

5555
enum journal_pin_type {
56-
JOURNAL_PIN_TYPE_btree,
56+
JOURNAL_PIN_TYPE_btree3,
57+
JOURNAL_PIN_TYPE_btree2,
58+
JOURNAL_PIN_TYPE_btree1,
59+
JOURNAL_PIN_TYPE_btree0,
5760
JOURNAL_PIN_TYPE_key_cache,
5861
JOURNAL_PIN_TYPE_other,
5962
JOURNAL_PIN_TYPE_NR,

0 commit comments

Comments
 (0)