Skip to content

Commit 4bbd934

Browse files
mattwuqmhiramat
authored andcommitted
kprobes: kretprobe scalability improvement
kretprobe is using freelist to manage return-instances, but freelist, as LIFO queue based on singly linked list, scales badly and reduces the overall throughput of kretprobed routines, especially for high contention scenarios. Here's a typical throughput test of sys_prctl (counts in 10 seconds, measured with perf stat -a -I 10000 -e syscalls:sys_enter_prctl): OS: Debian 10 X86_64, Linux 6.5rc7 with freelist HW: XEON 8336C x 2, 64 cores/128 threads, DDR4 3200MT/s 1T 2T 4T 8T 16T 24T 24150045 29317964 15446741 12494489 18287272 17708768 32T 48T 64T 72T 96T 128T 16200682 13737658 11645677 11269858 10470118 9931051 This patch introduces objpool to replace freelist. objpool is a high performance queue, which can bring near-linear scalability to kretprobed routines. Tests of kretprobe throughput show the biggest ratio as 159x of original freelist. Here's the result: 1T 2T 4T 8T 16T native: 41186213 82336866 164250978 328662645 658810299 freelist: 24150045 29317964 15446741 12494489 18287272 objpool: 23926730 48010314 96125218 191782984 385091769 32T 48T 64T 96T 128T native: 1330338351 1969957941 2512291791 2615754135 2671040914 freelist: 16200682 13737658 11645677 10470118 9931051 objpool: 764481096 1147149781 1456220214 1502109662 1579015050 Testings on 96-core ARM64 output similarly, but with the biggest ratio up to 448x: OS: Debian 10 AARCH64, Linux 6.5rc7 HW: Kunpeng-920 96 cores/2 sockets/4 NUMA nodes, DDR4 2933 MT/s 1T 2T 4T 8T 16T native: . 30066096 63569843 126194076 257447289 505800181 freelist: 16152090 11064397 11124068 7215768 5663013 objpool: 13997541 28032100 55726624 110099926 221498787 24T 32T 48T 64T 96T native: 763305277 1015925192 1521075123 2033009392 3021013752 freelist: 5015810 4602893 3766792 3382478 2945292 objpool: 328192025 439439564 668534502 887401381 1319972072 Link: https://lore.kernel.org/all/[email protected]/ Signed-off-by: wuqiang.matt <[email protected]> Acked-by: Masami Hiramatsu (Google) <[email protected]> Signed-off-by: Masami Hiramatsu (Google) <[email protected]>
1 parent 92f90d3 commit 4bbd934

File tree

5 files changed

+96
-140
lines changed

5 files changed

+96
-140
lines changed

include/linux/kprobes.h

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,7 @@
2626
#include <linux/rcupdate.h>
2727
#include <linux/mutex.h>
2828
#include <linux/ftrace.h>
29-
#include <linux/refcount.h>
30-
#include <linux/freelist.h>
29+
#include <linux/objpool.h>
3130
#include <linux/rethook.h>
3231
#include <asm/kprobes.h>
3332

@@ -141,7 +140,7 @@ static inline bool kprobe_ftrace(struct kprobe *p)
141140
*/
142141
struct kretprobe_holder {
143142
struct kretprobe *rp;
144-
refcount_t ref;
143+
struct objpool_head pool;
145144
};
146145

147146
struct kretprobe {
@@ -154,7 +153,6 @@ struct kretprobe {
154153
#ifdef CONFIG_KRETPROBE_ON_RETHOOK
155154
struct rethook *rh;
156155
#else
157-
struct freelist_head freelist;
158156
struct kretprobe_holder *rph;
159157
#endif
160158
};
@@ -165,10 +163,7 @@ struct kretprobe_instance {
165163
#ifdef CONFIG_KRETPROBE_ON_RETHOOK
166164
struct rethook_node node;
167165
#else
168-
union {
169-
struct freelist_node freelist;
170-
struct rcu_head rcu;
171-
};
166+
struct rcu_head rcu;
172167
struct llist_node llist;
173168
struct kretprobe_holder *rph;
174169
kprobe_opcode_t *ret_addr;

include/linux/rethook.h

Lines changed: 4 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,10 @@
66
#define _LINUX_RETHOOK_H
77

88
#include <linux/compiler.h>
9-
#include <linux/freelist.h>
9+
#include <linux/objpool.h>
1010
#include <linux/kallsyms.h>
1111
#include <linux/llist.h>
1212
#include <linux/rcupdate.h>
13-
#include <linux/refcount.h>
1413

1514
struct rethook_node;
1615

@@ -30,14 +29,12 @@ typedef void (*rethook_handler_t) (struct rethook_node *, void *, unsigned long,
3029
struct rethook {
3130
void *data;
3231
rethook_handler_t handler;
33-
struct freelist_head pool;
34-
refcount_t ref;
32+
struct objpool_head pool;
3533
struct rcu_head rcu;
3634
};
3735

3836
/**
3937
* struct rethook_node - The rethook shadow-stack entry node.
40-
* @freelist: The freelist, linked to struct rethook::pool.
4138
* @rcu: The rcu_head for deferred freeing.
4239
* @llist: The llist, linked to a struct task_struct::rethooks.
4340
* @rethook: The pointer to the struct rethook.
@@ -48,20 +45,16 @@ struct rethook {
4845
* on each entry of the shadow stack.
4946
*/
5047
struct rethook_node {
51-
union {
52-
struct freelist_node freelist;
53-
struct rcu_head rcu;
54-
};
48+
struct rcu_head rcu;
5549
struct llist_node llist;
5650
struct rethook *rethook;
5751
unsigned long ret_addr;
5852
unsigned long frame;
5953
};
6054

61-
struct rethook *rethook_alloc(void *data, rethook_handler_t handler);
55+
struct rethook *rethook_alloc(void *data, rethook_handler_t handler, int size, int num);
6256
void rethook_stop(struct rethook *rh);
6357
void rethook_free(struct rethook *rh);
64-
void rethook_add_node(struct rethook *rh, struct rethook_node *node);
6558
struct rethook_node *rethook_try_get(struct rethook *rh);
6659
void rethook_recycle(struct rethook_node *node);
6760
void rethook_hook(struct rethook_node *node, struct pt_regs *regs, bool mcount);
@@ -98,4 +91,3 @@ void rethook_flush_task(struct task_struct *tk);
9891
#endif
9992

10093
#endif
101-

kernel/kprobes.c

Lines changed: 39 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1877,13 +1877,27 @@ static struct notifier_block kprobe_exceptions_nb = {
18771877
#ifdef CONFIG_KRETPROBES
18781878

18791879
#if !defined(CONFIG_KRETPROBE_ON_RETHOOK)
1880+
1881+
/* callbacks for objpool of kretprobe instances */
1882+
static int kretprobe_init_inst(void *nod, void *context)
1883+
{
1884+
struct kretprobe_instance *ri = nod;
1885+
1886+
ri->rph = context;
1887+
return 0;
1888+
}
1889+
static int kretprobe_fini_pool(struct objpool_head *head, void *context)
1890+
{
1891+
kfree(context);
1892+
return 0;
1893+
}
1894+
18801895
static void free_rp_inst_rcu(struct rcu_head *head)
18811896
{
18821897
struct kretprobe_instance *ri = container_of(head, struct kretprobe_instance, rcu);
1898+
struct kretprobe_holder *rph = ri->rph;
18831899

1884-
if (refcount_dec_and_test(&ri->rph->ref))
1885-
kfree(ri->rph);
1886-
kfree(ri);
1900+
objpool_drop(ri, &rph->pool);
18871901
}
18881902
NOKPROBE_SYMBOL(free_rp_inst_rcu);
18891903

@@ -1892,7 +1906,7 @@ static void recycle_rp_inst(struct kretprobe_instance *ri)
18921906
struct kretprobe *rp = get_kretprobe(ri);
18931907

18941908
if (likely(rp))
1895-
freelist_add(&ri->freelist, &rp->freelist);
1909+
objpool_push(ri, &rp->rph->pool);
18961910
else
18971911
call_rcu(&ri->rcu, free_rp_inst_rcu);
18981912
}
@@ -1929,23 +1943,12 @@ NOKPROBE_SYMBOL(kprobe_flush_task);
19291943

19301944
static inline void free_rp_inst(struct kretprobe *rp)
19311945
{
1932-
struct kretprobe_instance *ri;
1933-
struct freelist_node *node;
1934-
int count = 0;
1935-
1936-
node = rp->freelist.head;
1937-
while (node) {
1938-
ri = container_of(node, struct kretprobe_instance, freelist);
1939-
node = node->next;
1940-
1941-
kfree(ri);
1942-
count++;
1943-
}
1946+
struct kretprobe_holder *rph = rp->rph;
19441947

1945-
if (refcount_sub_and_test(count, &rp->rph->ref)) {
1946-
kfree(rp->rph);
1947-
rp->rph = NULL;
1948-
}
1948+
if (!rph)
1949+
return;
1950+
rp->rph = NULL;
1951+
objpool_fini(&rph->pool);
19491952
}
19501953

19511954
/* This assumes the 'tsk' is the current task or the is not running. */
@@ -2087,19 +2090,17 @@ NOKPROBE_SYMBOL(__kretprobe_trampoline_handler)
20872090
static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
20882091
{
20892092
struct kretprobe *rp = container_of(p, struct kretprobe, kp);
2093+
struct kretprobe_holder *rph = rp->rph;
20902094
struct kretprobe_instance *ri;
2091-
struct freelist_node *fn;
20922095

2093-
fn = freelist_try_get(&rp->freelist);
2094-
if (!fn) {
2096+
ri = objpool_pop(&rph->pool);
2097+
if (!ri) {
20952098
rp->nmissed++;
20962099
return 0;
20972100
}
20982101

2099-
ri = container_of(fn, struct kretprobe_instance, freelist);
2100-
21012102
if (rp->entry_handler && rp->entry_handler(ri, regs)) {
2102-
freelist_add(&ri->freelist, &rp->freelist);
2103+
objpool_push(ri, &rph->pool);
21032104
return 0;
21042105
}
21052106

@@ -2193,7 +2194,6 @@ int kprobe_on_func_entry(kprobe_opcode_t *addr, const char *sym, unsigned long o
21932194
int register_kretprobe(struct kretprobe *rp)
21942195
{
21952196
int ret;
2196-
struct kretprobe_instance *inst;
21972197
int i;
21982198
void *addr;
21992199

@@ -2227,19 +2227,12 @@ int register_kretprobe(struct kretprobe *rp)
22272227
rp->maxactive = max_t(unsigned int, 10, 2*num_possible_cpus());
22282228

22292229
#ifdef CONFIG_KRETPROBE_ON_RETHOOK
2230-
rp->rh = rethook_alloc((void *)rp, kretprobe_rethook_handler);
2231-
if (!rp->rh)
2232-
return -ENOMEM;
2230+
rp->rh = rethook_alloc((void *)rp, kretprobe_rethook_handler,
2231+
sizeof(struct kretprobe_instance) +
2232+
rp->data_size, rp->maxactive);
2233+
if (IS_ERR(rp->rh))
2234+
return PTR_ERR(rp->rh);
22332235

2234-
for (i = 0; i < rp->maxactive; i++) {
2235-
inst = kzalloc(struct_size(inst, data, rp->data_size), GFP_KERNEL);
2236-
if (inst == NULL) {
2237-
rethook_free(rp->rh);
2238-
rp->rh = NULL;
2239-
return -ENOMEM;
2240-
}
2241-
rethook_add_node(rp->rh, &inst->node);
2242-
}
22432236
rp->nmissed = 0;
22442237
/* Establish function entry probe point */
22452238
ret = register_kprobe(&rp->kp);
@@ -2248,24 +2241,18 @@ int register_kretprobe(struct kretprobe *rp)
22482241
rp->rh = NULL;
22492242
}
22502243
#else /* !CONFIG_KRETPROBE_ON_RETHOOK */
2251-
rp->freelist.head = NULL;
22522244
rp->rph = kzalloc(sizeof(struct kretprobe_holder), GFP_KERNEL);
22532245
if (!rp->rph)
22542246
return -ENOMEM;
22552247

2256-
rp->rph->rp = rp;
2257-
for (i = 0; i < rp->maxactive; i++) {
2258-
inst = kzalloc(struct_size(inst, data, rp->data_size), GFP_KERNEL);
2259-
if (inst == NULL) {
2260-
refcount_set(&rp->rph->ref, i);
2261-
free_rp_inst(rp);
2262-
return -ENOMEM;
2263-
}
2264-
inst->rph = rp->rph;
2265-
freelist_add(&inst->freelist, &rp->freelist);
2248+
if (objpool_init(&rp->rph->pool, rp->maxactive, rp->data_size +
2249+
sizeof(struct kretprobe_instance), GFP_KERNEL,
2250+
rp->rph, kretprobe_init_inst, kretprobe_fini_pool)) {
2251+
kfree(rp->rph);
2252+
rp->rph = NULL;
2253+
return -ENOMEM;
22662254
}
2267-
refcount_set(&rp->rph->ref, i);
2268-
2255+
rp->rph->rp = rp;
22692256
rp->nmissed = 0;
22702257
/* Establish function entry probe point */
22712258
ret = register_kprobe(&rp->kp);

kernel/trace/fprobe.c

Lines changed: 10 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -187,7 +187,7 @@ static void fprobe_init(struct fprobe *fp)
187187

188188
static int fprobe_init_rethook(struct fprobe *fp, int num)
189189
{
190-
int i, size;
190+
int size;
191191

192192
if (num < 0)
193193
return -EINVAL;
@@ -205,26 +205,18 @@ static int fprobe_init_rethook(struct fprobe *fp, int num)
205205
if (size < 0)
206206
return -E2BIG;
207207

208-
fp->rethook = rethook_alloc((void *)fp, fprobe_exit_handler);
209-
if (!fp->rethook)
210-
return -ENOMEM;
211-
for (i = 0; i < size; i++) {
212-
struct fprobe_rethook_node *node;
213-
214-
node = kzalloc(sizeof(*node) + fp->entry_data_size, GFP_KERNEL);
215-
if (!node) {
216-
rethook_free(fp->rethook);
217-
fp->rethook = NULL;
218-
return -ENOMEM;
219-
}
220-
rethook_add_node(fp->rethook, &node->node);
221-
}
208+
/* Initialize rethook */
209+
fp->rethook = rethook_alloc((void *)fp, fprobe_exit_handler,
210+
sizeof(struct fprobe_rethook_node), size);
211+
if (IS_ERR(fp->rethook))
212+
return PTR_ERR(fp->rethook);
213+
222214
return 0;
223215
}
224216

225217
static void fprobe_fail_cleanup(struct fprobe *fp)
226218
{
227-
if (fp->rethook) {
219+
if (!IS_ERR_OR_NULL(fp->rethook)) {
228220
/* Don't need to cleanup rethook->handler because this is not used. */
229221
rethook_free(fp->rethook);
230222
fp->rethook = NULL;
@@ -379,14 +371,14 @@ int unregister_fprobe(struct fprobe *fp)
379371
if (!fprobe_is_registered(fp))
380372
return -EINVAL;
381373

382-
if (fp->rethook)
374+
if (!IS_ERR_OR_NULL(fp->rethook))
383375
rethook_stop(fp->rethook);
384376

385377
ret = unregister_ftrace_function(&fp->ops);
386378
if (ret < 0)
387379
return ret;
388380

389-
if (fp->rethook)
381+
if (!IS_ERR_OR_NULL(fp->rethook))
390382
rethook_free(fp->rethook);
391383

392384
ftrace_free_filter(&fp->ops);

0 commit comments

Comments
 (0)