Skip to content

Commit cd63a27

Browse files
committed
Merge tag 'bcachefs-2024-06-28' of https://evilpiepirate.org/git/bcachefs
Pull bcachefs fixes from Kent Overstreet: "Simple stuff: - NULL ptr/err ptr deref fixes - fix for getting wedged on shutdown after journal error - fix missing recalc_capacity() call, capacity now changes correctly after a device goes read only however: our capacity calculation still doesn't take into account when we have mixed ro/rw devices and the ro devices have data on them, that's going to be a more involved fix to separate accounting for "capacity used on ro devices" and "capacity used on rw devices" - boring syzbot stuff Slightly more involved: - discard, invalidate workers are now per device this has the effect of simplifying how we take device refs in these paths, and the device ref cleanup fixes a longstanding race between the device removal path and the discard path - fixes for how the debugfs code takes refs on btree_trans objects we have debugfs code that prints in use btree_trans objects. It uses closure_get() on trans->ref, which is mainly for the cycle detector, but the debugfs code was using it on a closure that may have hit 0, which is not allowed; for performance reasons we cannot avoid having not-in-use transactions on the global list. Introduce some new primitives to fix this and make the synchronization here a whole lot saner" * tag 'bcachefs-2024-06-28' of https://evilpiepirate.org/git/bcachefs: bcachefs: Fix kmalloc bug in __snapshot_t_mut bcachefs: Discard, invalidate workers are now per device bcachefs: Fix shift-out-of-bounds in bch2_blacklist_entries_gc bcachefs: slab-use-after-free Read in bch2_sb_errors_from_cpu bcachefs: Add missing bch2_journal_do_writes() call bcachefs: Fix null ptr deref in journal_pins_to_text() bcachefs: Add missing recalc_capacity() call bcachefs: Fix btree_trans list ordering bcachefs: Fix race between trans_put() and btree_transactions_read() closures: closure_get_not_zero(), closure_return_sync() bcachefs: Make btree_deadlock_to_text() clearer bcachefs: fix seqmutex_relock() bcachefs: Fix freeing of error pointers
2 parents cd17613 + 64cd7de commit cd63a27

16 files changed

+342
-207
lines changed

fs/bcachefs/alloc_background.c

Lines changed: 139 additions & 124 deletions
Large diffs are not rendered by default.

fs/bcachefs/alloc_background.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,7 @@ int bch2_trigger_alloc(struct btree_trans *, enum btree_id, unsigned,
275275
enum btree_iter_update_trigger_flags);
276276
int bch2_check_alloc_info(struct bch_fs *);
277277
int bch2_check_alloc_to_lru_refs(struct bch_fs *);
278+
void bch2_dev_do_discards(struct bch_dev *);
278279
void bch2_do_discards(struct bch_fs *);
279280

280281
static inline u64 should_invalidate_buckets(struct bch_dev *ca,
@@ -289,6 +290,7 @@ static inline u64 should_invalidate_buckets(struct bch_dev *ca,
289290
return clamp_t(s64, want_free - free, 0, u.d[BCH_DATA_cached].buckets);
290291
}
291292

293+
void bch2_dev_do_invalidates(struct bch_dev *);
292294
void bch2_do_invalidates(struct bch_fs *);
293295

294296
static inline struct bch_backpointer *alloc_v4_backpointers(struct bch_alloc_v4 *a)
@@ -312,7 +314,9 @@ u64 bch2_min_rw_member_capacity(struct bch_fs *);
312314
void bch2_dev_allocator_remove(struct bch_fs *, struct bch_dev *);
313315
void bch2_dev_allocator_add(struct bch_fs *, struct bch_dev *);
314316

315-
void bch2_fs_allocator_background_exit(struct bch_fs *);
317+
void bch2_dev_allocator_background_exit(struct bch_dev *);
318+
void bch2_dev_allocator_background_init(struct bch_dev *);
319+
316320
void bch2_fs_allocator_background_init(struct bch_fs *);
317321

318322
#endif /* _BCACHEFS_ALLOC_BACKGROUND_H */

fs/bcachefs/alloc_foreground.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -621,13 +621,13 @@ static struct open_bucket *bch2_bucket_alloc_trans(struct btree_trans *trans,
621621
avail = dev_buckets_free(ca, *usage, watermark);
622622

623623
if (usage->d[BCH_DATA_need_discard].buckets > avail)
624-
bch2_do_discards(c);
624+
bch2_dev_do_discards(ca);
625625

626626
if (usage->d[BCH_DATA_need_gc_gens].buckets > avail)
627627
bch2_gc_gens_async(c);
628628

629629
if (should_invalidate_buckets(ca, *usage))
630-
bch2_do_invalidates(c);
630+
bch2_dev_do_invalidates(ca);
631631

632632
if (!avail) {
633633
if (cl && !waiting) {

fs/bcachefs/bcachefs.h

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -493,6 +493,11 @@ struct io_count {
493493
u64 sectors[2][BCH_DATA_NR];
494494
};
495495

496+
struct discard_in_flight {
497+
bool in_progress:1;
498+
u64 bucket:63;
499+
};
500+
496501
struct bch_dev {
497502
struct kobject kobj;
498503
#ifdef CONFIG_BCACHEFS_DEBUG
@@ -554,6 +559,12 @@ struct bch_dev {
554559
size_t inc_gen_really_needs_gc;
555560
size_t buckets_waiting_on_journal;
556561

562+
struct work_struct invalidate_work;
563+
struct work_struct discard_work;
564+
struct mutex discard_buckets_in_flight_lock;
565+
DARRAY(struct discard_in_flight) discard_buckets_in_flight;
566+
struct work_struct discard_fast_work;
567+
557568
atomic64_t rebalance_work;
558569

559570
struct journal_device journal;
@@ -915,11 +926,6 @@ struct bch_fs {
915926
unsigned write_points_nr;
916927

917928
struct buckets_waiting_for_journal buckets_waiting_for_journal;
918-
struct work_struct invalidate_work;
919-
struct work_struct discard_work;
920-
struct mutex discard_buckets_in_flight_lock;
921-
DARRAY(struct bpos) discard_buckets_in_flight;
922-
struct work_struct discard_fast_work;
923929

924930
/* GARBAGE COLLECTION */
925931
struct work_struct gc_gens_work;

fs/bcachefs/btree_iter.c

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3130,7 +3130,6 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx)
31303130

31313131
trans = mempool_alloc(&c->btree_trans_pool, GFP_NOFS);
31323132
memset(trans, 0, sizeof(*trans));
3133-
closure_init_stack(&trans->ref);
31343133

31353134
seqmutex_lock(&c->btree_trans_lock);
31363135
if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG)) {
@@ -3150,18 +3149,12 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx)
31503149
BUG_ON(pos_task &&
31513150
pid == pos_task->pid &&
31523151
pos->locked);
3153-
3154-
if (pos_task && pid < pos_task->pid) {
3155-
list_add_tail(&trans->list, &pos->list);
3156-
goto list_add_done;
3157-
}
31583152
}
31593153
}
3160-
list_add_tail(&trans->list, &c->btree_trans_list);
3161-
list_add_done:
3154+
3155+
list_add(&trans->list, &c->btree_trans_list);
31623156
seqmutex_unlock(&c->btree_trans_lock);
31633157
got_trans:
3164-
trans->ref.closure_get_happened = false;
31653158
trans->c = c;
31663159
trans->last_begin_time = local_clock();
31673160
trans->fn_idx = fn_idx;
@@ -3200,6 +3193,8 @@ struct btree_trans *__bch2_trans_get(struct bch_fs *c, unsigned fn_idx)
32003193
trans->srcu_idx = srcu_read_lock(&c->btree_trans_barrier);
32013194
trans->srcu_lock_time = jiffies;
32023195
trans->srcu_held = true;
3196+
3197+
closure_init_stack_release(&trans->ref);
32033198
return trans;
32043199
}
32053200

@@ -3257,10 +3252,10 @@ void bch2_trans_put(struct btree_trans *trans)
32573252
bch2_journal_keys_put(c);
32583253

32593254
/*
3260-
* trans->ref protects trans->locking_wait.task, btree_paths arary; used
3255+
* trans->ref protects trans->locking_wait.task, btree_paths array; used
32613256
* by cycle detector
32623257
*/
3263-
closure_sync(&trans->ref);
3258+
closure_return_sync(&trans->ref);
32643259
trans->locking_wait.task = NULL;
32653260

32663261
unsigned long *paths_allocated = trans->paths_allocated;
@@ -3385,8 +3380,6 @@ void bch2_fs_btree_iter_exit(struct bch_fs *c)
33853380
per_cpu_ptr(c->btree_trans_bufs, cpu)->trans;
33863381

33873382
if (trans) {
3388-
closure_sync(&trans->ref);
3389-
33903383
seqmutex_lock(&c->btree_trans_lock);
33913384
list_del(&trans->list);
33923385
seqmutex_unlock(&c->btree_trans_lock);

fs/bcachefs/chardev.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -216,7 +216,8 @@ static long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_a
216216

217217
ret = PTR_ERR_OR_ZERO(optstr) ?:
218218
bch2_parse_mount_opts(NULL, &thr->opts, optstr);
219-
kfree(optstr);
219+
if (!IS_ERR(optstr))
220+
kfree(optstr);
220221

221222
if (ret)
222223
goto err;
@@ -319,7 +320,8 @@ static long bch2_ioctl_disk_add(struct bch_fs *c, struct bch_ioctl_disk arg)
319320
return ret;
320321

321322
ret = bch2_dev_add(c, path);
322-
kfree(path);
323+
if (!IS_ERR(path))
324+
kfree(path);
323325

324326
return ret;
325327
}
@@ -850,7 +852,8 @@ static long bch2_ioctl_fsck_online(struct bch_fs *c,
850852

851853
ret = PTR_ERR_OR_ZERO(optstr) ?:
852854
bch2_parse_mount_opts(c, &thr->opts, optstr);
853-
kfree(optstr);
855+
if (!IS_ERR(optstr))
856+
kfree(optstr);
854857

855858
if (ret)
856859
goto err;

fs/bcachefs/debug.c

Lines changed: 69 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -568,48 +568,72 @@ static const struct file_operations cached_btree_nodes_ops = {
568568
.read = bch2_cached_btree_nodes_read,
569569
};
570570

571+
typedef int (*list_cmp_fn)(const struct list_head *l, const struct list_head *r);
572+
573+
static void list_sort(struct list_head *head, list_cmp_fn cmp)
574+
{
575+
struct list_head *pos;
576+
577+
list_for_each(pos, head)
578+
while (!list_is_last(pos, head) &&
579+
cmp(pos, pos->next) > 0) {
580+
struct list_head *pos2, *next = pos->next;
581+
582+
list_del(next);
583+
list_for_each(pos2, head)
584+
if (cmp(next, pos2) < 0)
585+
goto pos_found;
586+
BUG();
587+
pos_found:
588+
list_add_tail(next, pos2);
589+
}
590+
}
591+
592+
static int list_ptr_order_cmp(const struct list_head *l, const struct list_head *r)
593+
{
594+
return cmp_int(l, r);
595+
}
596+
571597
static ssize_t bch2_btree_transactions_read(struct file *file, char __user *buf,
572598
size_t size, loff_t *ppos)
573599
{
574600
struct dump_iter *i = file->private_data;
575601
struct bch_fs *c = i->c;
576602
struct btree_trans *trans;
577603
ssize_t ret = 0;
578-
u32 seq;
579604

580605
i->ubuf = buf;
581606
i->size = size;
582607
i->ret = 0;
583608
restart:
584609
seqmutex_lock(&c->btree_trans_lock);
585-
list_for_each_entry(trans, &c->btree_trans_list, list) {
586-
struct task_struct *task = READ_ONCE(trans->locking_wait.task);
610+
list_sort(&c->btree_trans_list, list_ptr_order_cmp);
587611

588-
if (!task || task->pid <= i->iter)
612+
list_for_each_entry(trans, &c->btree_trans_list, list) {
613+
if ((ulong) trans < i->iter)
589614
continue;
590615

591-
closure_get(&trans->ref);
592-
seq = seqmutex_seq(&c->btree_trans_lock);
593-
seqmutex_unlock(&c->btree_trans_lock);
616+
i->iter = (ulong) trans;
594617

595-
ret = flush_buf(i);
596-
if (ret) {
597-
closure_put(&trans->ref);
598-
goto unlocked;
599-
}
618+
if (!closure_get_not_zero(&trans->ref))
619+
continue;
620+
621+
u32 seq = seqmutex_unlock(&c->btree_trans_lock);
600622

601623
bch2_btree_trans_to_text(&i->buf, trans);
602624

603625
prt_printf(&i->buf, "backtrace:\n");
604626
printbuf_indent_add(&i->buf, 2);
605-
bch2_prt_task_backtrace(&i->buf, task, 0, GFP_KERNEL);
627+
bch2_prt_task_backtrace(&i->buf, trans->locking_wait.task, 0, GFP_KERNEL);
606628
printbuf_indent_sub(&i->buf, 2);
607629
prt_newline(&i->buf);
608630

609-
i->iter = task->pid;
610-
611631
closure_put(&trans->ref);
612632

633+
ret = flush_buf(i);
634+
if (ret)
635+
goto unlocked;
636+
613637
if (!seqmutex_relock(&c->btree_trans_lock, seq))
614638
goto restart;
615639
}
@@ -804,50 +828,55 @@ static const struct file_operations btree_transaction_stats_op = {
804828
.read = btree_transaction_stats_read,
805829
};
806830

807-
static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf,
808-
size_t size, loff_t *ppos)
831+
/* walk btree transactions until we find a deadlock and print it */
832+
static void btree_deadlock_to_text(struct printbuf *out, struct bch_fs *c)
809833
{
810-
struct dump_iter *i = file->private_data;
811-
struct bch_fs *c = i->c;
812834
struct btree_trans *trans;
813-
ssize_t ret = 0;
814-
u32 seq;
815-
816-
i->ubuf = buf;
817-
i->size = size;
818-
i->ret = 0;
819-
820-
if (i->iter)
821-
goto out;
835+
pid_t iter = 0;
822836
restart:
823837
seqmutex_lock(&c->btree_trans_lock);
824838
list_for_each_entry(trans, &c->btree_trans_list, list) {
825839
struct task_struct *task = READ_ONCE(trans->locking_wait.task);
826840

827-
if (!task || task->pid <= i->iter)
841+
if (!task || task->pid <= iter)
828842
continue;
829843

830-
closure_get(&trans->ref);
831-
seq = seqmutex_seq(&c->btree_trans_lock);
832-
seqmutex_unlock(&c->btree_trans_lock);
844+
iter = task->pid;
833845

834-
ret = flush_buf(i);
835-
if (ret) {
836-
closure_put(&trans->ref);
837-
goto out;
838-
}
846+
if (!closure_get_not_zero(&trans->ref))
847+
continue;
839848

840-
bch2_check_for_deadlock(trans, &i->buf);
849+
u32 seq = seqmutex_unlock(&c->btree_trans_lock);
841850

842-
i->iter = task->pid;
851+
bool found = bch2_check_for_deadlock(trans, out) != 0;
843852

844853
closure_put(&trans->ref);
845854

855+
if (found)
856+
return;
857+
846858
if (!seqmutex_relock(&c->btree_trans_lock, seq))
847859
goto restart;
848860
}
849861
seqmutex_unlock(&c->btree_trans_lock);
850-
out:
862+
}
863+
864+
static ssize_t bch2_btree_deadlock_read(struct file *file, char __user *buf,
865+
size_t size, loff_t *ppos)
866+
{
867+
struct dump_iter *i = file->private_data;
868+
struct bch_fs *c = i->c;
869+
ssize_t ret = 0;
870+
871+
i->ubuf = buf;
872+
i->size = size;
873+
i->ret = 0;
874+
875+
if (!i->iter) {
876+
btree_deadlock_to_text(&i->buf, c);
877+
i->iter++;
878+
}
879+
851880
if (i->buf.allocation_failure)
852881
ret = -ENOMEM;
853882

fs/bcachefs/journal.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1521,6 +1521,11 @@ bool bch2_journal_seq_pins_to_text(struct printbuf *out, struct journal *j, u64
15211521
struct journal_entry_pin *pin;
15221522

15231523
spin_lock(&j->lock);
1524+
if (!test_bit(JOURNAL_running, &j->flags)) {
1525+
spin_unlock(&j->lock);
1526+
return true;
1527+
}
1528+
15241529
*seq = max(*seq, j->pin.front);
15251530

15261531
if (*seq >= j->pin.back) {

fs/bcachefs/journal_io.c

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1677,6 +1677,13 @@ static CLOSURE_CALLBACK(journal_write_done)
16771677
mod_delayed_work(j->wq, &j->write_work, max(0L, delta));
16781678
}
16791679

1680+
/*
1681+
* We don't typically trigger journal writes from her - the next journal
1682+
* write will be triggered immediately after the previous one is
1683+
* allocated, in bch2_journal_write() - but the journal write error path
1684+
* is special:
1685+
*/
1686+
bch2_journal_do_writes(j);
16801687
spin_unlock(&j->lock);
16811688
}
16821689

fs/bcachefs/journal_seq_blacklist.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ bool bch2_blacklist_entries_gc(struct bch_fs *c)
232232
BUG_ON(nr != t->nr);
233233

234234
unsigned i;
235-
for (src = bl->start, i = eytzinger0_first(t->nr);
235+
for (src = bl->start, i = t->nr == 0 ? 0 : eytzinger0_first(t->nr);
236236
src < bl->start + nr;
237237
src++, i = eytzinger0_next(i, nr)) {
238238
BUG_ON(t->entries[i].start != le64_to_cpu(src->start));

0 commit comments

Comments
 (0)