Skip to content

Commit a3a9952

Browse files
image-dragonKernel Patches Daemon
authored andcommitted
fprobe: use rhltable for fprobe_ip_table
For now, all the kernel functions who are hooked by the fprobe will be added to the hash table "fprobe_ip_table". The key of it is the function address, and the value of it is "struct fprobe_hlist_node". The budget of the hash table is FPROBE_IP_TABLE_SIZE, which is 256. And this means the overhead of the hash table lookup will grow linearly if the count of the functions in the fprobe more than 256. When we try to hook all the kernel functions, the overhead will be huge. Therefore, replace the hash table with rhltable to reduce the overhead. Signed-off-by: Menglong Dong <[email protected]>
1 parent bf66d41 commit a3a9952

File tree

2 files changed

+87
-67
lines changed

2 files changed

+87
-67
lines changed

include/linux/fprobe.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include <linux/ftrace.h>
88
#include <linux/rcupdate.h>
99
#include <linux/refcount.h>
10+
#include <linux/rhashtable.h>
1011
#include <linux/slab.h>
1112

1213
struct fprobe;
@@ -26,7 +27,7 @@ typedef void (*fprobe_exit_cb)(struct fprobe *fp, unsigned long entry_ip,
2627
* @fp: The fprobe which owns this.
2728
*/
2829
struct fprobe_hlist_node {
29-
struct hlist_node hlist;
30+
struct rhlist_head hlist;
3031
unsigned long addr;
3132
struct fprobe *fp;
3233
};

kernel/trace/fprobe.c

Lines changed: 85 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <linux/kprobes.h>
1111
#include <linux/list.h>
1212
#include <linux/mutex.h>
13+
#include <linux/rhashtable.h>
1314
#include <linux/slab.h>
1415
#include <linux/sort.h>
1516

@@ -41,47 +42,46 @@
4142
* - RCU hlist traversal under disabling preempt
4243
*/
4344
static struct hlist_head fprobe_table[FPROBE_TABLE_SIZE];
44-
static struct hlist_head fprobe_ip_table[FPROBE_IP_TABLE_SIZE];
45+
static struct rhltable fprobe_ip_table;
4546
static DEFINE_MUTEX(fprobe_mutex);
4647

47-
/*
48-
* Find first fprobe in the hlist. It will be iterated twice in the entry
49-
* probe, once for correcting the total required size, the second time is
50-
* calling back the user handlers.
51-
* Thus the hlist in the fprobe_table must be sorted and new probe needs to
52-
* be added *before* the first fprobe.
53-
*/
54-
static struct fprobe_hlist_node *find_first_fprobe_node(unsigned long ip)
48+
static u32 fprobe_node_hashfn(const void *data, u32 len, u32 seed)
5549
{
56-
struct fprobe_hlist_node *node;
57-
struct hlist_head *head;
50+
return hash_ptr(*(unsigned long **)data, 32);
51+
}
5852

59-
head = &fprobe_ip_table[hash_ptr((void *)ip, FPROBE_IP_HASH_BITS)];
60-
hlist_for_each_entry_rcu(node, head, hlist,
61-
lockdep_is_held(&fprobe_mutex)) {
62-
if (node->addr == ip)
63-
return node;
64-
}
65-
return NULL;
53+
static int fprobe_node_cmp(struct rhashtable_compare_arg *arg,
54+
const void *ptr)
55+
{
56+
unsigned long key = *(unsigned long *)arg->key;
57+
const struct fprobe_hlist_node *n = ptr;
58+
59+
return n->addr != key;
6660
}
67-
NOKPROBE_SYMBOL(find_first_fprobe_node);
6861

69-
/* Node insertion and deletion requires the fprobe_mutex */
70-
static void insert_fprobe_node(struct fprobe_hlist_node *node)
62+
static u32 fprobe_node_obj_hashfn(const void *data, u32 len, u32 seed)
7163
{
72-
unsigned long ip = node->addr;
73-
struct fprobe_hlist_node *next;
74-
struct hlist_head *head;
64+
const struct fprobe_hlist_node *n = data;
65+
66+
return hash_ptr((void *)n->addr, 32);
67+
}
68+
69+
static const struct rhashtable_params fprobe_rht_params = {
70+
.head_offset = offsetof(struct fprobe_hlist_node, hlist),
71+
.key_offset = offsetof(struct fprobe_hlist_node, addr),
72+
.key_len = sizeof_field(struct fprobe_hlist_node, addr),
73+
.hashfn = fprobe_node_hashfn,
74+
.obj_hashfn = fprobe_node_obj_hashfn,
75+
.obj_cmpfn = fprobe_node_cmp,
76+
.automatic_shrinking = true,
77+
};
7578

79+
/* Node insertion and deletion requires the fprobe_mutex */
80+
static int insert_fprobe_node(struct fprobe_hlist_node *node)
81+
{
7682
lockdep_assert_held(&fprobe_mutex);
7783

78-
next = find_first_fprobe_node(ip);
79-
if (next) {
80-
hlist_add_before_rcu(&node->hlist, &next->hlist);
81-
return;
82-
}
83-
head = &fprobe_ip_table[hash_ptr((void *)ip, FPROBE_IP_HASH_BITS)];
84-
hlist_add_head_rcu(&node->hlist, head);
84+
return rhltable_insert(&fprobe_ip_table, &node->hlist, fprobe_rht_params);
8585
}
8686

8787
/* Return true if there are synonims */
@@ -92,9 +92,11 @@ static bool delete_fprobe_node(struct fprobe_hlist_node *node)
9292
/* Avoid double deleting */
9393
if (READ_ONCE(node->fp) != NULL) {
9494
WRITE_ONCE(node->fp, NULL);
95-
hlist_del_rcu(&node->hlist);
95+
rhltable_remove(&fprobe_ip_table, &node->hlist,
96+
fprobe_rht_params);
9697
}
97-
return !!find_first_fprobe_node(node->addr);
98+
return !!rhltable_lookup(&fprobe_ip_table, &node->addr,
99+
fprobe_rht_params);
98100
}
99101

100102
/* Check existence of the fprobe */
@@ -249,9 +251,10 @@ static inline int __fprobe_kprobe_handler(unsigned long ip, unsigned long parent
249251
static int fprobe_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
250252
struct ftrace_regs *fregs)
251253
{
252-
struct fprobe_hlist_node *node, *first;
253254
unsigned long *fgraph_data = NULL;
254255
unsigned long func = trace->func;
256+
struct fprobe_hlist_node *node;
257+
struct rhlist_head *head, *pos;
255258
unsigned long ret_ip;
256259
int reserved_words;
257260
struct fprobe *fp;
@@ -260,14 +263,11 @@ static int fprobe_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
260263
if (WARN_ON_ONCE(!fregs))
261264
return 0;
262265

263-
first = node = find_first_fprobe_node(func);
264-
if (unlikely(!first))
265-
return 0;
266-
266+
head = rhltable_lookup(&fprobe_ip_table, &func, fprobe_rht_params);
267267
reserved_words = 0;
268-
hlist_for_each_entry_from_rcu(node, hlist) {
268+
rhl_for_each_entry_rcu(node, pos, head, hlist) {
269269
if (node->addr != func)
270-
break;
270+
continue;
271271
fp = READ_ONCE(node->fp);
272272
if (!fp || !fp->exit_handler)
273273
continue;
@@ -278,13 +278,12 @@ static int fprobe_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
278278
reserved_words +=
279279
FPROBE_HEADER_SIZE_IN_LONG + SIZE_IN_LONG(fp->entry_data_size);
280280
}
281-
node = first;
282281
if (reserved_words) {
283282
fgraph_data = fgraph_reserve_data(gops->idx, reserved_words * sizeof(long));
284283
if (unlikely(!fgraph_data)) {
285-
hlist_for_each_entry_from_rcu(node, hlist) {
284+
rhl_for_each_entry_rcu(node, pos, head, hlist) {
286285
if (node->addr != func)
287-
break;
286+
continue;
288287
fp = READ_ONCE(node->fp);
289288
if (fp && !fprobe_disabled(fp))
290289
fp->nmissed++;
@@ -299,12 +298,12 @@ static int fprobe_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
299298
*/
300299
ret_ip = ftrace_regs_get_return_address(fregs);
301300
used = 0;
302-
hlist_for_each_entry_from_rcu(node, hlist) {
301+
rhl_for_each_entry_rcu(node, pos, head, hlist) {
303302
int data_size;
304303
void *data;
305304

306305
if (node->addr != func)
307-
break;
306+
continue;
308307
fp = READ_ONCE(node->fp);
309308
if (!fp || fprobe_disabled(fp))
310309
continue;
@@ -448,34 +447,31 @@ static int fprobe_addr_list_add(struct fprobe_addr_list *alist, unsigned long ad
448447
return 0;
449448
}
450449

451-
static void fprobe_remove_node_in_module(struct module *mod, struct hlist_head *head,
452-
struct fprobe_addr_list *alist)
450+
static void fprobe_remove_node_in_module(struct module *mod, struct fprobe_hlist_node *node,
451+
struct fprobe_addr_list *alist)
453452
{
454-
struct fprobe_hlist_node *node;
455453
int ret = 0;
456454

457-
hlist_for_each_entry_rcu(node, head, hlist,
458-
lockdep_is_held(&fprobe_mutex)) {
459-
if (!within_module(node->addr, mod))
460-
continue;
461-
if (delete_fprobe_node(node))
462-
continue;
463-
/*
464-
* If failed to update alist, just continue to update hlist.
465-
* Therefore, at list user handler will not hit anymore.
466-
*/
467-
if (!ret)
468-
ret = fprobe_addr_list_add(alist, node->addr);
469-
}
455+
if (!within_module(node->addr, mod))
456+
return;
457+
if (delete_fprobe_node(node))
458+
return;
459+
/*
460+
* If failed to update alist, just continue to update hlist.
461+
* Therefore, at list user handler will not hit anymore.
462+
*/
463+
if (!ret)
464+
ret = fprobe_addr_list_add(alist, node->addr);
470465
}
471466

472467
/* Handle module unloading to manage fprobe_ip_table. */
473468
static int fprobe_module_callback(struct notifier_block *nb,
474469
unsigned long val, void *data)
475470
{
476471
struct fprobe_addr_list alist = {.size = FPROBE_IPS_BATCH_INIT};
472+
struct fprobe_hlist_node *node;
473+
struct rhashtable_iter iter;
477474
struct module *mod = data;
478-
int i;
479475

480476
if (val != MODULE_STATE_GOING)
481477
return NOTIFY_DONE;
@@ -486,8 +482,16 @@ static int fprobe_module_callback(struct notifier_block *nb,
486482
return NOTIFY_DONE;
487483

488484
mutex_lock(&fprobe_mutex);
489-
for (i = 0; i < FPROBE_IP_TABLE_SIZE; i++)
490-
fprobe_remove_node_in_module(mod, &fprobe_ip_table[i], &alist);
485+
rhltable_walk_enter(&fprobe_ip_table, &iter);
486+
do {
487+
rhashtable_walk_start(&iter);
488+
489+
while ((node = rhashtable_walk_next(&iter)) && !IS_ERR(node))
490+
fprobe_remove_node_in_module(mod, node, &alist);
491+
492+
rhashtable_walk_stop(&iter);
493+
} while (node == ERR_PTR(-EAGAIN));
494+
rhashtable_walk_exit(&iter);
491495

492496
if (alist.index < alist.size && alist.index > 0)
493497
ftrace_set_filter_ips(&fprobe_graph_ops.ops,
@@ -727,8 +731,16 @@ int register_fprobe_ips(struct fprobe *fp, unsigned long *addrs, int num)
727731
ret = fprobe_graph_add_ips(addrs, num);
728732
if (!ret) {
729733
add_fprobe_hash(fp);
730-
for (i = 0; i < hlist_array->size; i++)
731-
insert_fprobe_node(&hlist_array->array[i]);
734+
for (i = 0; i < hlist_array->size; i++) {
735+
ret = insert_fprobe_node(&hlist_array->array[i]);
736+
if (ret)
737+
break;
738+
}
739+
/* fallback on insert error */
740+
if (ret) {
741+
for (i--; i >= 0; i--)
742+
delete_fprobe_node(&hlist_array->array[i]);
743+
}
732744
}
733745
mutex_unlock(&fprobe_mutex);
734746

@@ -824,3 +836,10 @@ int unregister_fprobe(struct fprobe *fp)
824836
return ret;
825837
}
826838
EXPORT_SYMBOL_GPL(unregister_fprobe);
839+
840+
static int __init fprobe_initcall(void)
841+
{
842+
rhltable_init(&fprobe_ip_table, &fprobe_rht_params);
843+
return 0;
844+
}
845+
late_initcall(fprobe_initcall);

0 commit comments

Comments
 (0)