Skip to content

Commit 48fd7eb

Browse files
visitorckwakpm00
authored andcommitted
Revert "bcache: remove heap-related macros and switch to generic min_heap"
This reverts commit 866898e. The generic bottom-up min_heap implementation causes performance regression in invalidate_buckets_lru(), a hot path in bcache. Before the cache is fully populated, new_bucket_prio() often returns zero, leading to many equal comparisons. In such cases, bottom-up sift_down performs up to 2 * log2(n) comparisons, while the original top-down approach completes with just O() comparisons, resulting in a measurable performance gap. The performance degradation is further worsened by the non-inlined min_heap API functions introduced in commit 92a8b22 ("lib/min_heap: introduce non-inline versions of min heap API functions"), adding function call overhead to this critical path. As reported by Robert, bcache now suffers from latency spikes, with P100 (max) latency increasing from 600 ms to 2.4 seconds every 5 minutes. These regressions degrade bcache's effectiveness as a low-latency cache layer and lead to frequent timeouts and application stalls in production environments. This revert aims to restore bcache's original low-latency behavior. Link: https://lore.kernel.org/lkml/CAJhEC05+0S69z+3+FB2Cd0hD+pCRyWTKLEOsc8BOmH73p1m+KQ@mail.gmail.com Link: https://lkml.kernel.org/r/[email protected] Fixes: 866898e ("bcache: remove heap-related macros and switch to generic min_heap") Fixes: 92a8b22 ("lib/min_heap: introduce non-inline versions of min heap API functions") Signed-off-by: Kuan-Wei Chiu <[email protected]> Reported-by: Robert Pang <[email protected]> Closes: https://lore.kernel.org/linux-bcache/CAJhEC06F_AtrPgw2-7CvCqZgeStgCtitbD-ryuPpXQA-JG5XXw@mail.gmail.com Acked-by: Coly Li <[email protected]> Cc: Ching-Chun (Jim) Huang <[email protected]> Cc: Kent Overstreet <[email protected]> Cc: <[email protected]> Signed-off-by: Andrew Morton <[email protected]>
1 parent 845f1f2 commit 48fd7eb

File tree

11 files changed

+217
-263
lines changed

11 files changed

+217
-263
lines changed

drivers/md/bcache/alloc.c

Lines changed: 17 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -164,68 +164,40 @@ static void bch_invalidate_one_bucket(struct cache *ca, struct bucket *b)
164164
* prio is worth 1/8th of what INITIAL_PRIO is worth.
165165
*/
166166

167-
static inline unsigned int new_bucket_prio(struct cache *ca, struct bucket *b)
168-
{
169-
unsigned int min_prio = (INITIAL_PRIO - ca->set->min_prio) / 8;
170-
171-
return (b->prio - ca->set->min_prio + min_prio) * GC_SECTORS_USED(b);
172-
}
173-
174-
static inline bool new_bucket_max_cmp(const void *l, const void *r, void *args)
175-
{
176-
struct bucket **lhs = (struct bucket **)l;
177-
struct bucket **rhs = (struct bucket **)r;
178-
struct cache *ca = args;
179-
180-
return new_bucket_prio(ca, *lhs) > new_bucket_prio(ca, *rhs);
181-
}
182-
183-
static inline bool new_bucket_min_cmp(const void *l, const void *r, void *args)
184-
{
185-
struct bucket **lhs = (struct bucket **)l;
186-
struct bucket **rhs = (struct bucket **)r;
187-
struct cache *ca = args;
188-
189-
return new_bucket_prio(ca, *lhs) < new_bucket_prio(ca, *rhs);
190-
}
191-
192-
static inline void new_bucket_swap(void *l, void *r, void __always_unused *args)
193-
{
194-
struct bucket **lhs = l, **rhs = r;
167+
#define bucket_prio(b) \
168+
({ \
169+
unsigned int min_prio = (INITIAL_PRIO - ca->set->min_prio) / 8; \
170+
\
171+
(b->prio - ca->set->min_prio + min_prio) * GC_SECTORS_USED(b); \
172+
})
195173

196-
swap(*lhs, *rhs);
197-
}
174+
#define bucket_max_cmp(l, r) (bucket_prio(l) < bucket_prio(r))
175+
#define bucket_min_cmp(l, r) (bucket_prio(l) > bucket_prio(r))
198176

199177
static void invalidate_buckets_lru(struct cache *ca)
200178
{
201179
struct bucket *b;
202-
const struct min_heap_callbacks bucket_max_cmp_callback = {
203-
.less = new_bucket_max_cmp,
204-
.swp = new_bucket_swap,
205-
};
206-
const struct min_heap_callbacks bucket_min_cmp_callback = {
207-
.less = new_bucket_min_cmp,
208-
.swp = new_bucket_swap,
209-
};
180+
ssize_t i;
210181

211-
ca->heap.nr = 0;
182+
ca->heap.used = 0;
212183

213184
for_each_bucket(b, ca) {
214185
if (!bch_can_invalidate_bucket(ca, b))
215186
continue;
216187

217-
if (!min_heap_full(&ca->heap))
218-
min_heap_push(&ca->heap, &b, &bucket_max_cmp_callback, ca);
219-
else if (!new_bucket_max_cmp(&b, min_heap_peek(&ca->heap), ca)) {
188+
if (!heap_full(&ca->heap))
189+
heap_add(&ca->heap, b, bucket_max_cmp);
190+
else if (bucket_max_cmp(b, heap_peek(&ca->heap))) {
220191
ca->heap.data[0] = b;
221-
min_heap_sift_down(&ca->heap, 0, &bucket_max_cmp_callback, ca);
192+
heap_sift(&ca->heap, 0, bucket_max_cmp);
222193
}
223194
}
224195

225-
min_heapify_all(&ca->heap, &bucket_min_cmp_callback, ca);
196+
for (i = ca->heap.used / 2 - 1; i >= 0; --i)
197+
heap_sift(&ca->heap, i, bucket_min_cmp);
226198

227199
while (!fifo_full(&ca->free_inc)) {
228-
if (!ca->heap.nr) {
200+
if (!heap_pop(&ca->heap, b, bucket_min_cmp)) {
229201
/*
230202
* We don't want to be calling invalidate_buckets()
231203
* multiple times when it can't do anything
@@ -234,8 +206,6 @@ static void invalidate_buckets_lru(struct cache *ca)
234206
wake_up_gc(ca->set);
235207
return;
236208
}
237-
b = min_heap_peek(&ca->heap)[0];
238-
min_heap_pop(&ca->heap, &bucket_min_cmp_callback, ca);
239209

240210
bch_invalidate_one_bucket(ca, b);
241211
}

drivers/md/bcache/bcache.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -458,7 +458,7 @@ struct cache {
458458
/* Allocation stuff: */
459459
struct bucket *buckets;
460460

461-
DEFINE_MIN_HEAP(struct bucket *, cache_heap) heap;
461+
DECLARE_HEAP(struct bucket *, heap);
462462

463463
/*
464464
* If nonzero, we know we aren't going to find any buckets to invalidate

drivers/md/bcache/bset.c

Lines changed: 45 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -54,11 +54,9 @@ void bch_dump_bucket(struct btree_keys *b)
5454
int __bch_count_data(struct btree_keys *b)
5555
{
5656
unsigned int ret = 0;
57-
struct btree_iter iter;
57+
struct btree_iter_stack iter;
5858
struct bkey *k;
5959

60-
min_heap_init(&iter.heap, NULL, MAX_BSETS);
61-
6260
if (b->ops->is_extents)
6361
for_each_key(b, k, &iter)
6462
ret += KEY_SIZE(k);
@@ -69,11 +67,9 @@ void __bch_check_keys(struct btree_keys *b, const char *fmt, ...)
6967
{
7068
va_list args;
7169
struct bkey *k, *p = NULL;
72-
struct btree_iter iter;
70+
struct btree_iter_stack iter;
7371
const char *err;
7472

75-
min_heap_init(&iter.heap, NULL, MAX_BSETS);
76-
7773
for_each_key(b, k, &iter) {
7874
if (b->ops->is_extents) {
7975
err = "Keys out of order";
@@ -114,9 +110,9 @@ void __bch_check_keys(struct btree_keys *b, const char *fmt, ...)
114110

115111
static void bch_btree_iter_next_check(struct btree_iter *iter)
116112
{
117-
struct bkey *k = iter->heap.data->k, *next = bkey_next(k);
113+
struct bkey *k = iter->data->k, *next = bkey_next(k);
118114

119-
if (next < iter->heap.data->end &&
115+
if (next < iter->data->end &&
120116
bkey_cmp(k, iter->b->ops->is_extents ?
121117
&START_KEY(next) : next) > 0) {
122118
bch_dump_bucket(iter->b);
@@ -883,14 +879,12 @@ unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k,
883879
unsigned int status = BTREE_INSERT_STATUS_NO_INSERT;
884880
struct bset *i = bset_tree_last(b)->data;
885881
struct bkey *m, *prev = NULL;
886-
struct btree_iter iter;
882+
struct btree_iter_stack iter;
887883
struct bkey preceding_key_on_stack = ZERO_KEY;
888884
struct bkey *preceding_key_p = &preceding_key_on_stack;
889885

890886
BUG_ON(b->ops->is_extents && !KEY_SIZE(k));
891887

892-
min_heap_init(&iter.heap, NULL, MAX_BSETS);
893-
894888
/*
895889
* If k has preceding key, preceding_key_p will be set to address
896890
* of k's preceding key; otherwise preceding_key_p will be set
@@ -901,9 +895,9 @@ unsigned int bch_btree_insert_key(struct btree_keys *b, struct bkey *k,
901895
else
902896
preceding_key(k, &preceding_key_p);
903897

904-
m = bch_btree_iter_init(b, &iter, preceding_key_p);
898+
m = bch_btree_iter_stack_init(b, &iter, preceding_key_p);
905899

906-
if (b->ops->insert_fixup(b, k, &iter, replace_key))
900+
if (b->ops->insert_fixup(b, k, &iter.iter, replace_key))
907901
return status;
908902

909903
status = BTREE_INSERT_STATUS_INSERT;
@@ -1083,110 +1077,87 @@ struct bkey *__bch_bset_search(struct btree_keys *b, struct bset_tree *t,
10831077

10841078
/* Btree iterator */
10851079

1086-
typedef bool (new_btree_iter_cmp_fn)(const void *, const void *, void *);
1087-
1088-
static inline bool new_btree_iter_cmp(const void *l, const void *r, void __always_unused *args)
1089-
{
1090-
const struct btree_iter_set *_l = l;
1091-
const struct btree_iter_set *_r = r;
1092-
1093-
return bkey_cmp(_l->k, _r->k) <= 0;
1094-
}
1080+
typedef bool (btree_iter_cmp_fn)(struct btree_iter_set,
1081+
struct btree_iter_set);
10951082

1096-
static inline void new_btree_iter_swap(void *iter1, void *iter2, void __always_unused *args)
1083+
static inline bool btree_iter_cmp(struct btree_iter_set l,
1084+
struct btree_iter_set r)
10971085
{
1098-
struct btree_iter_set *_iter1 = iter1;
1099-
struct btree_iter_set *_iter2 = iter2;
1100-
1101-
swap(*_iter1, *_iter2);
1086+
return bkey_cmp(l.k, r.k) > 0;
11021087
}
11031088

11041089
static inline bool btree_iter_end(struct btree_iter *iter)
11051090
{
1106-
return !iter->heap.nr;
1091+
return !iter->used;
11071092
}
11081093

11091094
void bch_btree_iter_push(struct btree_iter *iter, struct bkey *k,
11101095
struct bkey *end)
11111096
{
1112-
const struct min_heap_callbacks callbacks = {
1113-
.less = new_btree_iter_cmp,
1114-
.swp = new_btree_iter_swap,
1115-
};
1116-
11171097
if (k != end)
1118-
BUG_ON(!min_heap_push(&iter->heap,
1119-
&((struct btree_iter_set) { k, end }),
1120-
&callbacks,
1121-
NULL));
1098+
BUG_ON(!heap_add(iter,
1099+
((struct btree_iter_set) { k, end }),
1100+
btree_iter_cmp));
11221101
}
11231102

1124-
static struct bkey *__bch_btree_iter_init(struct btree_keys *b,
1125-
struct btree_iter *iter,
1126-
struct bkey *search,
1127-
struct bset_tree *start)
1103+
static struct bkey *__bch_btree_iter_stack_init(struct btree_keys *b,
1104+
struct btree_iter_stack *iter,
1105+
struct bkey *search,
1106+
struct bset_tree *start)
11281107
{
11291108
struct bkey *ret = NULL;
11301109

1131-
iter->heap.size = ARRAY_SIZE(iter->heap.preallocated);
1132-
iter->heap.nr = 0;
1110+
iter->iter.size = ARRAY_SIZE(iter->stack_data);
1111+
iter->iter.used = 0;
11331112

11341113
#ifdef CONFIG_BCACHE_DEBUG
1135-
iter->b = b;
1114+
iter->iter.b = b;
11361115
#endif
11371116

11381117
for (; start <= bset_tree_last(b); start++) {
11391118
ret = bch_bset_search(b, start, search);
1140-
bch_btree_iter_push(iter, ret, bset_bkey_last(start->data));
1119+
bch_btree_iter_push(&iter->iter, ret, bset_bkey_last(start->data));
11411120
}
11421121

11431122
return ret;
11441123
}
11451124

1146-
struct bkey *bch_btree_iter_init(struct btree_keys *b,
1147-
struct btree_iter *iter,
1125+
struct bkey *bch_btree_iter_stack_init(struct btree_keys *b,
1126+
struct btree_iter_stack *iter,
11481127
struct bkey *search)
11491128
{
1150-
return __bch_btree_iter_init(b, iter, search, b->set);
1129+
return __bch_btree_iter_stack_init(b, iter, search, b->set);
11511130
}
11521131

11531132
static inline struct bkey *__bch_btree_iter_next(struct btree_iter *iter,
1154-
new_btree_iter_cmp_fn *cmp)
1133+
btree_iter_cmp_fn *cmp)
11551134
{
11561135
struct btree_iter_set b __maybe_unused;
11571136
struct bkey *ret = NULL;
1158-
const struct min_heap_callbacks callbacks = {
1159-
.less = cmp,
1160-
.swp = new_btree_iter_swap,
1161-
};
11621137

11631138
if (!btree_iter_end(iter)) {
11641139
bch_btree_iter_next_check(iter);
11651140

1166-
ret = iter->heap.data->k;
1167-
iter->heap.data->k = bkey_next(iter->heap.data->k);
1141+
ret = iter->data->k;
1142+
iter->data->k = bkey_next(iter->data->k);
11681143

1169-
if (iter->heap.data->k > iter->heap.data->end) {
1144+
if (iter->data->k > iter->data->end) {
11701145
WARN_ONCE(1, "bset was corrupt!\n");
1171-
iter->heap.data->k = iter->heap.data->end;
1146+
iter->data->k = iter->data->end;
11721147
}
11731148

1174-
if (iter->heap.data->k == iter->heap.data->end) {
1175-
if (iter->heap.nr) {
1176-
b = min_heap_peek(&iter->heap)[0];
1177-
min_heap_pop(&iter->heap, &callbacks, NULL);
1178-
}
1179-
}
1149+
if (iter->data->k == iter->data->end)
1150+
heap_pop(iter, b, cmp);
11801151
else
1181-
min_heap_sift_down(&iter->heap, 0, &callbacks, NULL);
1152+
heap_sift(iter, 0, cmp);
11821153
}
11831154

11841155
return ret;
11851156
}
11861157

11871158
struct bkey *bch_btree_iter_next(struct btree_iter *iter)
11881159
{
1189-
return __bch_btree_iter_next(iter, new_btree_iter_cmp);
1160+
return __bch_btree_iter_next(iter, btree_iter_cmp);
11901161

11911162
}
11921163

@@ -1224,18 +1195,16 @@ static void btree_mergesort(struct btree_keys *b, struct bset *out,
12241195
struct btree_iter *iter,
12251196
bool fixup, bool remove_stale)
12261197
{
1198+
int i;
12271199
struct bkey *k, *last = NULL;
12281200
BKEY_PADDED(k) tmp;
12291201
bool (*bad)(struct btree_keys *, const struct bkey *) = remove_stale
12301202
? bch_ptr_bad
12311203
: bch_ptr_invalid;
1232-
const struct min_heap_callbacks callbacks = {
1233-
.less = b->ops->sort_cmp,
1234-
.swp = new_btree_iter_swap,
1235-
};
12361204

12371205
/* Heapify the iterator, using our comparison function */
1238-
min_heapify_all(&iter->heap, &callbacks, NULL);
1206+
for (i = iter->used / 2 - 1; i >= 0; --i)
1207+
heap_sift(iter, i, b->ops->sort_cmp);
12391208

12401209
while (!btree_iter_end(iter)) {
12411210
if (b->ops->sort_fixup && fixup)
@@ -1324,11 +1293,10 @@ void bch_btree_sort_partial(struct btree_keys *b, unsigned int start,
13241293
struct bset_sort_state *state)
13251294
{
13261295
size_t order = b->page_order, keys = 0;
1327-
struct btree_iter iter;
1296+
struct btree_iter_stack iter;
13281297
int oldsize = bch_count_data(b);
13291298

1330-
min_heap_init(&iter.heap, NULL, MAX_BSETS);
1331-
__bch_btree_iter_init(b, &iter, NULL, &b->set[start]);
1299+
__bch_btree_iter_stack_init(b, &iter, NULL, &b->set[start]);
13321300

13331301
if (start) {
13341302
unsigned int i;
@@ -1339,7 +1307,7 @@ void bch_btree_sort_partial(struct btree_keys *b, unsigned int start,
13391307
order = get_order(__set_bytes(b->set->data, keys));
13401308
}
13411309

1342-
__btree_sort(b, &iter, start, order, false, state);
1310+
__btree_sort(b, &iter.iter, start, order, false, state);
13431311

13441312
EBUG_ON(oldsize >= 0 && bch_count_data(b) != oldsize);
13451313
}
@@ -1355,13 +1323,11 @@ void bch_btree_sort_into(struct btree_keys *b, struct btree_keys *new,
13551323
struct bset_sort_state *state)
13561324
{
13571325
uint64_t start_time = local_clock();
1358-
struct btree_iter iter;
1359-
1360-
min_heap_init(&iter.heap, NULL, MAX_BSETS);
1326+
struct btree_iter_stack iter;
13611327

1362-
bch_btree_iter_init(b, &iter, NULL);
1328+
bch_btree_iter_stack_init(b, &iter, NULL);
13631329

1364-
btree_mergesort(b, new->set->data, &iter, false, true);
1330+
btree_mergesort(b, new->set->data, &iter.iter, false, true);
13651331

13661332
bch_time_stats_update(&state->time, start_time);
13671333

0 commit comments

Comments
 (0)