Skip to content

Commit aa5888a

Browse files
visitorckwakpm00
authored andcommitted
lib min_heap: optimize min heap by prescaling counters for better performance
Improve the efficiency of the min heap by prescaling counters, eliminating the need to repeatedly compute 'index * element_size' when accessing elements. By doing so, we avoid the overhead associated with recalculating the byte offset for each heap operation. However, with prescaling, the calculation for the parent element's location is no longer as simple as '(i - 1) / 2'. To address this, we copy the parent function from 'lib/sort.c', which calculates the parent offset in a branchless manner without using any division instructions. This optimization should result in a more efficient heap implementation by reducing the computational overhead of finding parent and child offsets. Link: https://lkml.kernel.org/r/[email protected] Signed-off-by: Kuan-Wei Chiu <[email protected]> Cc: Adrian Hunter <[email protected]> Cc: Arnaldo Carvalho de Melo <[email protected]> Cc: Ching-Chun (Jim) Huang <[email protected]> Cc: Coly Li <[email protected]> Cc: Ian Rogers <[email protected]> Cc: Ingo Molnar <[email protected]> Cc: Jiri Olsa <[email protected]> Cc: Jonathan Corbet <[email protected]> Cc: Kent Overstreet <[email protected]> Cc: "Liang, Kan" <[email protected]> Cc: Mark Rutland <[email protected]> Cc: Matthew Sakai <[email protected]> Cc: Matthew Wilcox (Oracle) <[email protected]> Cc: Namhyung Kim <[email protected]> Cc: Peter Zijlstra <[email protected]> Signed-off-by: Andrew Morton <[email protected]>
1 parent 92a8b22 commit aa5888a

File tree

1 file changed

+49
-24
lines changed

1 file changed

+49
-24
lines changed

include/linux/min_heap.h

Lines changed: 49 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,32 @@ struct min_heap_callbacks {
3838
void (*swp)(void *lhs, void *rhs, void *args);
3939
};
4040

41+
/**
42+
* parent - given the offset of the child, find the offset of the parent.
43+
* @i: the offset of the heap element whose parent is sought. Non-zero.
44+
* @lsbit: a precomputed 1-bit mask, equal to "size & -size"
45+
* @size: size of each element
46+
*
47+
* In terms of array indexes, the parent of element j = @i/@size is simply
48+
* (j-1)/2. But when working in byte offsets, we can't use implicit
49+
* truncation of integer divides.
50+
*
51+
* Fortunately, we only need one bit of the quotient, not the full divide.
52+
* @size has a least significant bit. That bit will be clear if @i is
53+
* an even multiple of @size, and set if it's an odd multiple.
54+
*
55+
* Logically, we're doing "if (i & lsbit) i -= size;", but since the
56+
* branch is unpredictable, it's done with a bit of clever branch-free
57+
* code instead.
58+
*/
59+
__attribute_const__ __always_inline
60+
static size_t parent(size_t i, unsigned int lsbit, size_t size)
61+
{
62+
i -= size;
63+
i -= size & -(i & lsbit);
64+
return i / 2;
65+
}
66+
4167
/* Initialize a min-heap. */
4268
static __always_inline
4369
void __min_heap_init_inline(min_heap_char *heap, void *data, int size)
@@ -78,33 +104,30 @@ static __always_inline
78104
void __min_heap_sift_down_inline(min_heap_char *heap, int pos, size_t elem_size,
79105
const struct min_heap_callbacks *func, void *args)
80106
{
81-
void *left, *right;
107+
const unsigned long lsbit = elem_size & -elem_size;
82108
void *data = heap->data;
83-
void *root = data + pos * elem_size;
84-
int i = pos, j;
109+
/* pre-scale counters for performance */
110+
size_t a = pos * elem_size;
111+
size_t b, c, d;
112+
size_t n = heap->nr * elem_size;
85113

86114
/* Find the sift-down path all the way to the leaves. */
87-
for (;;) {
88-
if (i * 2 + 2 >= heap->nr)
89-
break;
90-
left = data + (i * 2 + 1) * elem_size;
91-
right = data + (i * 2 + 2) * elem_size;
92-
i = func->less(left, right, args) ? i * 2 + 1 : i * 2 + 2;
93-
}
115+
for (b = a; c = 2 * b + elem_size, (d = c + elem_size) < n;)
116+
b = func->less(data + c, data + d, args) ? c : d;
94117

95118
/* Special case for the last leaf with no sibling. */
96-
if (i * 2 + 2 == heap->nr)
97-
i = i * 2 + 1;
119+
if (d == n)
120+
b = c;
98121

99122
/* Backtrack to the correct location. */
100-
while (i != pos && func->less(root, data + i * elem_size, args))
101-
i = (i - 1) / 2;
123+
while (b != a && func->less(data + a, data + b, args))
124+
b = parent(b, lsbit, elem_size);
102125

103126
/* Shift the element into its correct place. */
104-
j = i;
105-
while (i != pos) {
106-
i = (i - 1) / 2;
107-
func->swp(data + i * elem_size, data + j * elem_size, args);
127+
c = b;
128+
while (b != a) {
129+
b = parent(b, lsbit, elem_size);
130+
func->swp(data + b, data + c, args);
108131
}
109132
}
110133

@@ -117,15 +140,17 @@ static __always_inline
117140
void __min_heap_sift_up_inline(min_heap_char *heap, size_t elem_size, size_t idx,
118141
const struct min_heap_callbacks *func, void *args)
119142
{
143+
const unsigned long lsbit = elem_size & -elem_size;
120144
void *data = heap->data;
121-
size_t parent;
145+
/* pre-scale counters for performance */
146+
size_t a = idx * elem_size, b;
122147

123-
while (idx) {
124-
parent = (idx - 1) / 2;
125-
if (func->less(data + parent * elem_size, data + idx * elem_size, args))
148+
while (a) {
149+
b = parent(a, lsbit, elem_size);
150+
if (func->less(data + b, data + a, args))
126151
break;
127-
func->swp(data + parent * elem_size, data + idx * elem_size, args);
128-
idx = parent;
152+
func->swp(data + a, data + b, args);
153+
a = b;
129154
}
130155
}
131156

0 commit comments

Comments
 (0)