Skip to content

Commit 0b4989e

Browse files
committed
Merge tag 'bcachefs-2024-06-12' of https://evilpiepirate.org/git/bcachefs
Pull bcachefs fixes from Kent Overstreet: - fix kworker explosion, due to calling submit_bio() (which can block) from a multithreaded workqueue - fix error handling in btree node scan - forward compat fix: kill an old debug assert - key cache shrinker fixes This is a partial fix for stalls doing multithreaded creates - there were various O(n^2) issues the key cache shrinker was hitting [1]. There's more work coming here; I'm working on a patch to delete the key cache lock, which initial testing shows to be a pretty drastic performance improvement - assorted syzbot fixes Link: https://lore.kernel.org/linux-bcachefs/CAGudoHGenxzk0ZqPXXi1_QDbfqQhGHu+wUwzyS6WmfkUZ1HiXA@mail.gmail.com/ [1] * tag 'bcachefs-2024-06-12' of https://evilpiepirate.org/git/bcachefs: bcachefs: Fix rcu_read_lock() leak in drop_extra_replicas bcachefs: Add missing bch_inode_info.ei_flags init bcachefs: Add missing synchronize_srcu_expedited() call when shutting down bcachefs: Check for invalid bucket from bucket_gen(), gc_bucket() bcachefs: Replace bucket_valid() asserts in bucket lookup with proper checks bcachefs: Fix snapshot_create_lock lock ordering bcachefs: Fix refcount leak in check_fix_ptrs() bcachefs: Leave a buffer in the btree key cache to avoid lock thrashing bcachefs: Fix reporting of freed objects from key cache shrinker bcachefs: set sb->s_shrinker->seeks = 0 bcachefs: increase key cache shrinker batch size bcachefs: Enable automatic shrinking for rhashtables bcachefs: fix the display format for show-super bcachefs: fix stack frame size in fsck.c bcachefs: Delete incorrect BTREE_ID_NR assertion bcachefs: Fix incorrect error handling found_btree_node_is_readable() bcachefs: Split out btree_write_submit_wq
2 parents cea2a26 + f2736b9 commit 0b4989e

22 files changed

+344
-220
lines changed

fs/bcachefs/alloc_background.c

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -741,6 +741,7 @@ int bch2_trigger_alloc(struct btree_trans *trans,
741741
enum btree_iter_update_trigger_flags flags)
742742
{
743743
struct bch_fs *c = trans->c;
744+
struct printbuf buf = PRINTBUF;
744745
int ret = 0;
745746

746747
struct bch_dev *ca = bch2_dev_bucket_tryget(c, new.k->p);
@@ -860,8 +861,14 @@ int bch2_trigger_alloc(struct btree_trans *trans,
860861
}
861862

862863
percpu_down_read(&c->mark_lock);
863-
if (new_a->gen != old_a->gen)
864-
*bucket_gen(ca, new.k->p.offset) = new_a->gen;
864+
if (new_a->gen != old_a->gen) {
865+
u8 *gen = bucket_gen(ca, new.k->p.offset);
866+
if (unlikely(!gen)) {
867+
percpu_up_read(&c->mark_lock);
868+
goto invalid_bucket;
869+
}
870+
*gen = new_a->gen;
871+
}
865872

866873
bch2_dev_usage_update(c, ca, old_a, new_a, journal_seq, false);
867874
percpu_up_read(&c->mark_lock);
@@ -895,6 +902,11 @@ int bch2_trigger_alloc(struct btree_trans *trans,
895902

896903
percpu_down_read(&c->mark_lock);
897904
struct bucket *g = gc_bucket(ca, new.k->p.offset);
905+
if (unlikely(!g)) {
906+
percpu_up_read(&c->mark_lock);
907+
goto invalid_bucket;
908+
}
909+
g->gen_valid = 1;
898910

899911
bucket_lock(g);
900912

@@ -910,8 +922,14 @@ int bch2_trigger_alloc(struct btree_trans *trans,
910922
percpu_up_read(&c->mark_lock);
911923
}
912924
err:
925+
printbuf_exit(&buf);
913926
bch2_dev_put(ca);
914927
return ret;
928+
invalid_bucket:
929+
bch2_fs_inconsistent(c, "reference to invalid bucket\n %s",
930+
(bch2_bkey_val_to_text(&buf, c, new.s_c), buf.buf));
931+
ret = -EIO;
932+
goto err;
915933
}
916934

917935
/*

fs/bcachefs/bcachefs.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -790,7 +790,8 @@ struct bch_fs {
790790

791791
/* BTREE CACHE */
792792
struct bio_set btree_bio;
793-
struct workqueue_struct *io_complete_wq;
793+
struct workqueue_struct *btree_read_complete_wq;
794+
struct workqueue_struct *btree_write_submit_wq;
794795

795796
struct btree_root btree_roots_known[BTREE_ID_NR];
796797
DARRAY(struct btree_root) btree_roots_extra;

fs/bcachefs/btree_cache.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -91,10 +91,11 @@ static int bch2_btree_cache_cmp_fn(struct rhashtable_compare_arg *arg,
9191
}
9292

9393
static const struct rhashtable_params bch_btree_cache_params = {
94-
.head_offset = offsetof(struct btree, hash),
95-
.key_offset = offsetof(struct btree, hash_val),
96-
.key_len = sizeof(u64),
97-
.obj_cmpfn = bch2_btree_cache_cmp_fn,
94+
.head_offset = offsetof(struct btree, hash),
95+
.key_offset = offsetof(struct btree, hash_val),
96+
.key_len = sizeof(u64),
97+
.obj_cmpfn = bch2_btree_cache_cmp_fn,
98+
.automatic_shrinking = true,
9899
};
99100

100101
static int btree_node_data_alloc(struct bch_fs *c, struct btree *b, gfp_t gfp)

fs/bcachefs/btree_gc.c

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -874,6 +874,9 @@ static int bch2_alloc_write_key(struct btree_trans *trans,
874874
const struct bch_alloc_v4 *old;
875875
int ret;
876876

877+
if (!bucket_valid(ca, k.k->p.offset))
878+
return 0;
879+
877880
old = bch2_alloc_to_v4(k, &old_convert);
878881
gc = new = *old;
879882

@@ -990,6 +993,8 @@ static int bch2_gc_alloc_start(struct bch_fs *c)
990993

991994
buckets->first_bucket = ca->mi.first_bucket;
992995
buckets->nbuckets = ca->mi.nbuckets;
996+
buckets->nbuckets_minus_first =
997+
buckets->nbuckets - buckets->first_bucket;
993998
rcu_assign_pointer(ca->buckets_gc, buckets);
994999
}
9951000

@@ -1003,12 +1008,14 @@ static int bch2_gc_alloc_start(struct bch_fs *c)
10031008
continue;
10041009
}
10051010

1006-
struct bch_alloc_v4 a_convert;
1007-
const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &a_convert);
1011+
if (bucket_valid(ca, k.k->p.offset)) {
1012+
struct bch_alloc_v4 a_convert;
1013+
const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &a_convert);
10081014

1009-
struct bucket *g = gc_bucket(ca, k.k->p.offset);
1010-
g->gen_valid = 1;
1011-
g->gen = a->gen;
1015+
struct bucket *g = gc_bucket(ca, k.k->p.offset);
1016+
g->gen_valid = 1;
1017+
g->gen = a->gen;
1018+
}
10121019
0;
10131020
})));
10141021
bch2_dev_put(ca);

fs/bcachefs/btree_io.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1389,7 +1389,7 @@ static void btree_node_read_endio(struct bio *bio)
13891389
bch2_latency_acct(ca, rb->start_time, READ);
13901390
}
13911391

1392-
queue_work(c->io_complete_wq, &rb->work);
1392+
queue_work(c->btree_read_complete_wq, &rb->work);
13931393
}
13941394

13951395
struct btree_node_read_all {
@@ -1656,7 +1656,7 @@ static int btree_node_read_all_replicas(struct bch_fs *c, struct btree *b, bool
16561656
btree_node_read_all_replicas_done(&ra->cl.work);
16571657
} else {
16581658
continue_at(&ra->cl, btree_node_read_all_replicas_done,
1659-
c->io_complete_wq);
1659+
c->btree_read_complete_wq);
16601660
}
16611661

16621662
return 0;
@@ -1737,7 +1737,7 @@ void bch2_btree_node_read(struct btree_trans *trans, struct btree *b,
17371737
if (sync)
17381738
btree_node_read_work(&rb->work);
17391739
else
1740-
queue_work(c->io_complete_wq, &rb->work);
1740+
queue_work(c->btree_read_complete_wq, &rb->work);
17411741
}
17421742
}
17431743

@@ -2229,7 +2229,7 @@ void __bch2_btree_node_write(struct bch_fs *c, struct btree *b, unsigned flags)
22292229
atomic64_add(bytes_to_write, &c->btree_write_stats[type].bytes);
22302230

22312231
INIT_WORK(&wbio->work, btree_write_submit);
2232-
queue_work(c->io_complete_wq, &wbio->work);
2232+
queue_work(c->btree_write_submit_wq, &wbio->work);
22332233
return;
22342234
err:
22352235
set_btree_node_noevict(b);

fs/bcachefs/btree_iter.c

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -221,11 +221,8 @@ static void bch2_btree_path_verify(struct btree_trans *trans,
221221
struct btree_path *path)
222222
{
223223
struct bch_fs *c = trans->c;
224-
unsigned i;
225-
226-
EBUG_ON(path->btree_id >= BTREE_ID_NR);
227224

228-
for (i = 0; i < (!path->cached ? BTREE_MAX_DEPTH : 1); i++) {
225+
for (unsigned i = 0; i < (!path->cached ? BTREE_MAX_DEPTH : 1); i++) {
229226
if (!path->l[i].b) {
230227
BUG_ON(!path->cached &&
231228
bch2_btree_id_root(c, path->btree_id)->b->c.level > i);
@@ -251,8 +248,6 @@ static void bch2_btree_iter_verify(struct btree_iter *iter)
251248
{
252249
struct btree_trans *trans = iter->trans;
253250

254-
BUG_ON(iter->btree_id >= BTREE_ID_NR);
255-
256251
BUG_ON(!!(iter->flags & BTREE_ITER_cached) != btree_iter_path(trans, iter)->cached);
257252

258253
BUG_ON((iter->flags & BTREE_ITER_is_extents) &&
@@ -3406,8 +3401,10 @@ void bch2_fs_btree_iter_exit(struct bch_fs *c)
34063401
bch2_time_stats_exit(&s->lock_hold_times);
34073402
}
34083403

3409-
if (c->btree_trans_barrier_initialized)
3404+
if (c->btree_trans_barrier_initialized) {
3405+
synchronize_srcu_expedited(&c->btree_trans_barrier);
34103406
cleanup_srcu_struct(&c->btree_trans_barrier);
3407+
}
34113408
mempool_exit(&c->btree_trans_mem_pool);
34123409
mempool_exit(&c->btree_trans_pool);
34133410
}

fs/bcachefs/btree_key_cache.c

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,11 @@ static int bch2_btree_key_cache_cmp_fn(struct rhashtable_compare_arg *arg,
3232
}
3333

3434
static const struct rhashtable_params bch2_btree_key_cache_params = {
35-
.head_offset = offsetof(struct bkey_cached, hash),
36-
.key_offset = offsetof(struct bkey_cached, key),
37-
.key_len = sizeof(struct bkey_cached_key),
38-
.obj_cmpfn = bch2_btree_key_cache_cmp_fn,
35+
.head_offset = offsetof(struct bkey_cached, hash),
36+
.key_offset = offsetof(struct bkey_cached, key),
37+
.key_len = sizeof(struct bkey_cached_key),
38+
.obj_cmpfn = bch2_btree_key_cache_cmp_fn,
39+
.automatic_shrinking = true,
3940
};
4041

4142
__flatten
@@ -840,7 +841,6 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
840841
six_lock_exit(&ck->c.lock);
841842
kmem_cache_free(bch2_key_cache, ck);
842843
atomic_long_dec(&bc->nr_freed);
843-
freed++;
844844
bc->nr_freed_nonpcpu--;
845845
bc->freed++;
846846
}
@@ -854,7 +854,6 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
854854
six_lock_exit(&ck->c.lock);
855855
kmem_cache_free(bch2_key_cache, ck);
856856
atomic_long_dec(&bc->nr_freed);
857-
freed++;
858857
bc->nr_freed_pcpu--;
859858
bc->freed++;
860859
}
@@ -876,23 +875,22 @@ static unsigned long bch2_btree_key_cache_scan(struct shrinker *shrink,
876875

877876
if (test_bit(BKEY_CACHED_DIRTY, &ck->flags)) {
878877
bc->skipped_dirty++;
879-
goto next;
880878
} else if (test_bit(BKEY_CACHED_ACCESSED, &ck->flags)) {
881879
clear_bit(BKEY_CACHED_ACCESSED, &ck->flags);
882880
bc->skipped_accessed++;
883-
goto next;
884-
} else if (bkey_cached_lock_for_evict(ck)) {
881+
} else if (!bkey_cached_lock_for_evict(ck)) {
882+
bc->skipped_lock_fail++;
883+
} else {
885884
bkey_cached_evict(bc, ck);
886885
bkey_cached_free(bc, ck);
887886
bc->moved_to_freelist++;
888-
} else {
889-
bc->skipped_lock_fail++;
887+
freed++;
890888
}
891889

892890
scanned++;
893891
if (scanned >= nr)
894892
break;
895-
next:
893+
896894
pos = next;
897895
}
898896

@@ -917,6 +915,14 @@ static unsigned long bch2_btree_key_cache_count(struct shrinker *shrink,
917915
long nr = atomic_long_read(&bc->nr_keys) -
918916
atomic_long_read(&bc->nr_dirty);
919917

918+
/*
919+
* Avoid hammering our shrinker too much if it's nearly empty - the
920+
* shrinker code doesn't take into account how big our cache is, if it's
921+
* mostly empty but the system is under memory pressure it causes nasty
922+
* lock contention:
923+
*/
924+
nr -= 128;
925+
920926
return max(0L, nr);
921927
}
922928

@@ -1025,9 +1031,10 @@ int bch2_fs_btree_key_cache_init(struct btree_key_cache *bc)
10251031
if (!shrink)
10261032
return -BCH_ERR_ENOMEM_fs_btree_cache_init;
10271033
bc->shrink = shrink;
1028-
shrink->seeks = 0;
10291034
shrink->count_objects = bch2_btree_key_cache_count;
10301035
shrink->scan_objects = bch2_btree_key_cache_scan;
1036+
shrink->batch = 1 << 14;
1037+
shrink->seeks = 0;
10311038
shrink->private_data = c;
10321039
shrinker_register(shrink);
10331040
return 0;

fs/bcachefs/btree_node_scan.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -72,10 +72,11 @@ static bool found_btree_node_is_readable(struct btree_trans *trans,
7272

7373
struct btree *b = bch2_btree_node_get_noiter(trans, &k.k, f->btree_id, f->level, false);
7474
bool ret = !IS_ERR_OR_NULL(b);
75-
if (ret) {
76-
f->sectors_written = b->written;
77-
six_unlock_read(&b->c.lock);
78-
}
75+
if (!ret)
76+
return ret;
77+
78+
f->sectors_written = b->written;
79+
six_unlock_read(&b->c.lock);
7980

8081
/*
8182
* We might update this node's range; if that happens, we need the node

0 commit comments

Comments
 (0)