Skip to content

Commit 6545e7d

Browse files
image-dragonKernel Patches Daemon
authored andcommitted
fprobe: use rhashtable
For now, all the kernel functions who are hooked by the fprobe will be added to the hash table "fprobe_ip_table". The key of it is the function address, and the value of it is "struct fprobe_hlist_node". The budget of the hash table is FPROBE_IP_TABLE_SIZE, which is 256. And this means the overhead of the hash table lookup will grow linearly if the count of the functions in the fprobe more than 256. When we try to hook all the kernel functions, the overhead will be huge. Therefore, replace the hash table with rhashtable to reduce the overhead. Signed-off-by: Menglong Dong <[email protected]>
1 parent 6978e3e commit 6545e7d

File tree

2 files changed

+82
-64
lines changed

2 files changed

+82
-64
lines changed

include/linux/fprobe.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ typedef void (*fprobe_exit_cb)(struct fprobe *fp, unsigned long entry_ip,
2626
* @fp: The fprobe which owns this.
2727
*/
2828
struct fprobe_hlist_node {
29-
struct hlist_node hlist;
29+
struct rhash_head hlist;
3030
unsigned long addr;
3131
struct fprobe *fp;
3232
};

kernel/trace/fprobe.c

Lines changed: 81 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include <linux/mutex.h>
1313
#include <linux/slab.h>
1414
#include <linux/sort.h>
15+
#include <linux/rhashtable.h>
1516

1617
#include <asm/fprobe.h>
1718

@@ -41,47 +42,47 @@
4142
* - RCU hlist traversal under disabling preempt
4243
*/
4344
static struct hlist_head fprobe_table[FPROBE_TABLE_SIZE];
44-
static struct hlist_head fprobe_ip_table[FPROBE_IP_TABLE_SIZE];
45+
static struct rhashtable fprobe_ip_table;
4546
static DEFINE_MUTEX(fprobe_mutex);
4647

47-
/*
48-
* Find first fprobe in the hlist. It will be iterated twice in the entry
49-
* probe, once for correcting the total required size, the second time is
50-
* calling back the user handlers.
51-
* Thus the hlist in the fprobe_table must be sorted and new probe needs to
52-
* be added *before* the first fprobe.
53-
*/
54-
static struct fprobe_hlist_node *find_first_fprobe_node(unsigned long ip)
48+
static u32 fprobe_node_hashfn(const void *data, u32 len, u32 seed)
5549
{
56-
struct fprobe_hlist_node *node;
57-
struct hlist_head *head;
50+
return hash_ptr(*(unsigned long **)data, 32);
51+
}
5852

59-
head = &fprobe_ip_table[hash_ptr((void *)ip, FPROBE_IP_HASH_BITS)];
60-
hlist_for_each_entry_rcu(node, head, hlist,
61-
lockdep_is_held(&fprobe_mutex)) {
62-
if (node->addr == ip)
63-
return node;
64-
}
65-
return NULL;
53+
static int fprobe_node_cmp(struct rhashtable_compare_arg *arg,
54+
const void *ptr)
55+
{
56+
unsigned long key = *(unsigned long *)arg->key;
57+
const struct fprobe_hlist_node *n = ptr;
58+
59+
return n->addr != key;
60+
}
61+
62+
static u32 fprobe_node_obj_hashfn(const void *data, u32 len, u32 seed)
63+
{
64+
const struct fprobe_hlist_node *n = data;
65+
66+
return hash_ptr((void *)n->addr, 32);
6667
}
67-
NOKPROBE_SYMBOL(find_first_fprobe_node);
68+
69+
static const struct rhashtable_params fprobe_rht_params = {
70+
.head_offset = offsetof(struct fprobe_hlist_node, hlist),
71+
.key_offset = offsetof(struct fprobe_hlist_node, addr),
72+
.key_len = sizeof_field(struct fprobe_hlist_node, addr),
73+
.hashfn = fprobe_node_hashfn,
74+
.obj_hashfn = fprobe_node_obj_hashfn,
75+
.obj_cmpfn = fprobe_node_cmp,
76+
.automatic_shrinking = true,
77+
};
6878

6979
/* Node insertion and deletion requires the fprobe_mutex */
7080
static void insert_fprobe_node(struct fprobe_hlist_node *node)
7181
{
72-
unsigned long ip = node->addr;
73-
struct fprobe_hlist_node *next;
74-
struct hlist_head *head;
75-
7682
lockdep_assert_held(&fprobe_mutex);
7783

78-
next = find_first_fprobe_node(ip);
79-
if (next) {
80-
hlist_add_before_rcu(&node->hlist, &next->hlist);
81-
return;
82-
}
83-
head = &fprobe_ip_table[hash_ptr((void *)ip, FPROBE_IP_HASH_BITS)];
84-
hlist_add_head_rcu(&node->hlist, head);
84+
rhashtable_insert_fast(&fprobe_ip_table, &node->hlist,
85+
fprobe_rht_params);
8586
}
8687

8788
/* Return true if there are synonims */
@@ -92,9 +93,11 @@ static bool delete_fprobe_node(struct fprobe_hlist_node *node)
9293
/* Avoid double deleting */
9394
if (READ_ONCE(node->fp) != NULL) {
9495
WRITE_ONCE(node->fp, NULL);
95-
hlist_del_rcu(&node->hlist);
96+
rhashtable_remove_fast(&fprobe_ip_table, &node->hlist,
97+
fprobe_rht_params);
9698
}
97-
return !!find_first_fprobe_node(node->addr);
99+
return !!rhashtable_lookup_fast(&fprobe_ip_table, &node->addr,
100+
fprobe_rht_params);
98101
}
99102

100103
/* Check existence of the fprobe */
@@ -249,25 +252,28 @@ static inline int __fprobe_kprobe_handler(unsigned long ip, unsigned long parent
249252
static int fprobe_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
250253
struct ftrace_regs *fregs)
251254
{
252-
struct fprobe_hlist_node *node, *first;
255+
struct rhash_lock_head __rcu *const *bkt;
256+
struct fprobe_hlist_node *node;
253257
unsigned long *fgraph_data = NULL;
254258
unsigned long func = trace->func;
259+
struct bucket_table *tbl;
260+
struct rhash_head *head;
255261
unsigned long ret_ip;
256262
int reserved_words;
257263
struct fprobe *fp;
264+
unsigned int key;
258265
int used, ret;
259266

260267
if (WARN_ON_ONCE(!fregs))
261268
return 0;
262269

263-
first = node = find_first_fprobe_node(func);
264-
if (unlikely(!first))
265-
return 0;
266-
270+
tbl = rht_dereference_rcu(fprobe_ip_table.tbl, &fprobe_ip_table);
271+
key = rht_key_hashfn(&fprobe_ip_table, tbl, &func, fprobe_rht_params);
272+
bkt = rht_bucket(tbl, key);
267273
reserved_words = 0;
268-
hlist_for_each_entry_from_rcu(node, hlist) {
274+
rht_for_each_entry_rcu_from(node, head, rht_ptr_rcu(bkt), tbl, key, hlist) {
269275
if (node->addr != func)
270-
break;
276+
continue;
271277
fp = READ_ONCE(node->fp);
272278
if (!fp || !fp->exit_handler)
273279
continue;
@@ -278,13 +284,13 @@ static int fprobe_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
278284
reserved_words +=
279285
FPROBE_HEADER_SIZE_IN_LONG + SIZE_IN_LONG(fp->entry_data_size);
280286
}
281-
node = first;
282287
if (reserved_words) {
283288
fgraph_data = fgraph_reserve_data(gops->idx, reserved_words * sizeof(long));
284289
if (unlikely(!fgraph_data)) {
285-
hlist_for_each_entry_from_rcu(node, hlist) {
290+
rht_for_each_entry_rcu_from(node, head, rht_ptr_rcu(bkt),
291+
tbl, key, hlist) {
286292
if (node->addr != func)
287-
break;
293+
continue;
288294
fp = READ_ONCE(node->fp);
289295
if (fp && !fprobe_disabled(fp))
290296
fp->nmissed++;
@@ -299,12 +305,12 @@ static int fprobe_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
299305
*/
300306
ret_ip = ftrace_regs_get_return_address(fregs);
301307
used = 0;
302-
hlist_for_each_entry_from_rcu(node, hlist) {
308+
rht_for_each_entry_rcu_from(node, head, rht_ptr_rcu(bkt), tbl, key, hlist) {
303309
int data_size;
304310
void *data;
305311

306312
if (node->addr != func)
307-
break;
313+
continue;
308314
fp = READ_ONCE(node->fp);
309315
if (!fp || fprobe_disabled(fp))
310316
continue;
@@ -448,34 +454,31 @@ static int fprobe_addr_list_add(struct fprobe_addr_list *alist, unsigned long ad
448454
return 0;
449455
}
450456

451-
static void fprobe_remove_node_in_module(struct module *mod, struct hlist_head *head,
452-
struct fprobe_addr_list *alist)
457+
static void fprobe_remove_node_in_module(struct module *mod, struct fprobe_hlist_node *node,
458+
struct fprobe_addr_list *alist)
453459
{
454-
struct fprobe_hlist_node *node;
455460
int ret = 0;
456461

457-
hlist_for_each_entry_rcu(node, head, hlist,
458-
lockdep_is_held(&fprobe_mutex)) {
459-
if (!within_module(node->addr, mod))
460-
continue;
461-
if (delete_fprobe_node(node))
462-
continue;
463-
/*
464-
* If failed to update alist, just continue to update hlist.
465-
* Therefore, at list user handler will not hit anymore.
466-
*/
467-
if (!ret)
468-
ret = fprobe_addr_list_add(alist, node->addr);
469-
}
462+
if (!within_module(node->addr, mod))
463+
return;
464+
if (delete_fprobe_node(node))
465+
return;
466+
/*
467+
* If failed to update alist, just continue to update hlist.
468+
* Therefore, at list user handler will not hit anymore.
469+
*/
470+
if (!ret)
471+
ret = fprobe_addr_list_add(alist, node->addr);
470472
}
471473

472474
/* Handle module unloading to manage fprobe_ip_table. */
473475
static int fprobe_module_callback(struct notifier_block *nb,
474476
unsigned long val, void *data)
475477
{
476478
struct fprobe_addr_list alist = {.size = FPROBE_IPS_BATCH_INIT};
479+
struct fprobe_hlist_node *node;
480+
struct rhashtable_iter iter;
477481
struct module *mod = data;
478-
int i;
479482

480483
if (val != MODULE_STATE_GOING)
481484
return NOTIFY_DONE;
@@ -486,8 +489,16 @@ static int fprobe_module_callback(struct notifier_block *nb,
486489
return NOTIFY_DONE;
487490

488491
mutex_lock(&fprobe_mutex);
489-
for (i = 0; i < FPROBE_IP_TABLE_SIZE; i++)
490-
fprobe_remove_node_in_module(mod, &fprobe_ip_table[i], &alist);
492+
rhashtable_walk_enter(&fprobe_ip_table, &iter);
493+
do {
494+
rhashtable_walk_start(&iter);
495+
496+
while ((node = rhashtable_walk_next(&iter)) && !IS_ERR(node))
497+
fprobe_remove_node_in_module(mod, node, &alist);
498+
499+
rhashtable_walk_stop(&iter);
500+
} while (node == ERR_PTR(-EAGAIN));
501+
rhashtable_walk_exit(&iter);
491502

492503
if (alist.index < alist.size && alist.index > 0)
493504
ftrace_set_filter_ips(&fprobe_graph_ops.ops,
@@ -819,3 +830,10 @@ int unregister_fprobe(struct fprobe *fp)
819830
return ret;
820831
}
821832
EXPORT_SYMBOL_GPL(unregister_fprobe);
833+
834+
static int __init fprobe_initcall(void)
835+
{
836+
rhashtable_init(&fprobe_ip_table, &fprobe_rht_params);
837+
return 0;
838+
}
839+
late_initcall(fprobe_initcall);

0 commit comments

Comments
 (0)