Skip to content

fprobe: use rhashtable for fprobe_ip_table #9504

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: bpf-next_base
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion include/linux/fprobe.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <linux/ftrace.h>
#include <linux/rcupdate.h>
#include <linux/refcount.h>
#include <linux/rhashtable.h>
#include <linux/slab.h>

struct fprobe;
Expand All @@ -26,7 +27,7 @@ typedef void (*fprobe_exit_cb)(struct fprobe *fp, unsigned long entry_ip,
* @fp: The fprobe which owns this.
*/
struct fprobe_hlist_node {
struct hlist_node hlist;
struct rhlist_head hlist;
unsigned long addr;
struct fprobe *fp;
};
Expand Down
151 changes: 85 additions & 66 deletions kernel/trace/fprobe.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <linux/kprobes.h>
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/rhashtable.h>
#include <linux/slab.h>
#include <linux/sort.h>

Expand Down Expand Up @@ -41,47 +42,46 @@
* - RCU hlist traversal under disabling preempt
*/
static struct hlist_head fprobe_table[FPROBE_TABLE_SIZE];
static struct hlist_head fprobe_ip_table[FPROBE_IP_TABLE_SIZE];
static struct rhltable fprobe_ip_table;
static DEFINE_MUTEX(fprobe_mutex);

/*
* Find first fprobe in the hlist. It will be iterated twice in the entry
* probe, once for correcting the total required size, the second time is
* calling back the user handlers.
* Thus the hlist in the fprobe_table must be sorted and new probe needs to
* be added *before* the first fprobe.
*/
static struct fprobe_hlist_node *find_first_fprobe_node(unsigned long ip)
static u32 fprobe_node_hashfn(const void *data, u32 len, u32 seed)
{
struct fprobe_hlist_node *node;
struct hlist_head *head;
return hash_ptr(*(unsigned long **)data, 32);
}

head = &fprobe_ip_table[hash_ptr((void *)ip, FPROBE_IP_HASH_BITS)];
hlist_for_each_entry_rcu(node, head, hlist,
lockdep_is_held(&fprobe_mutex)) {
if (node->addr == ip)
return node;
}
return NULL;
static int fprobe_node_cmp(struct rhashtable_compare_arg *arg,
const void *ptr)
{
unsigned long key = *(unsigned long *)arg->key;
const struct fprobe_hlist_node *n = ptr;

return n->addr != key;
}
NOKPROBE_SYMBOL(find_first_fprobe_node);

/* Node insertion and deletion requires the fprobe_mutex */
static void insert_fprobe_node(struct fprobe_hlist_node *node)
static u32 fprobe_node_obj_hashfn(const void *data, u32 len, u32 seed)
{
unsigned long ip = node->addr;
struct fprobe_hlist_node *next;
struct hlist_head *head;
const struct fprobe_hlist_node *n = data;

return hash_ptr((void *)n->addr, 32);
}

static const struct rhashtable_params fprobe_rht_params = {
.head_offset = offsetof(struct fprobe_hlist_node, hlist),
.key_offset = offsetof(struct fprobe_hlist_node, addr),
.key_len = sizeof_field(struct fprobe_hlist_node, addr),
.hashfn = fprobe_node_hashfn,
.obj_hashfn = fprobe_node_obj_hashfn,
.obj_cmpfn = fprobe_node_cmp,
.automatic_shrinking = true,
};

/* Node insertion and deletion requires the fprobe_mutex */
static int insert_fprobe_node(struct fprobe_hlist_node *node)
{
lockdep_assert_held(&fprobe_mutex);

next = find_first_fprobe_node(ip);
if (next) {
hlist_add_before_rcu(&node->hlist, &next->hlist);
return;
}
head = &fprobe_ip_table[hash_ptr((void *)ip, FPROBE_IP_HASH_BITS)];
hlist_add_head_rcu(&node->hlist, head);
return rhltable_insert(&fprobe_ip_table, &node->hlist, fprobe_rht_params);
}

/* Return true if there are synonims */
Expand All @@ -92,9 +92,11 @@ static bool delete_fprobe_node(struct fprobe_hlist_node *node)
/* Avoid double deleting */
if (READ_ONCE(node->fp) != NULL) {
WRITE_ONCE(node->fp, NULL);
hlist_del_rcu(&node->hlist);
rhltable_remove(&fprobe_ip_table, &node->hlist,
fprobe_rht_params);
}
return !!find_first_fprobe_node(node->addr);
return !!rhltable_lookup(&fprobe_ip_table, &node->addr,
fprobe_rht_params);
}

/* Check existence of the fprobe */
Expand Down Expand Up @@ -249,9 +251,10 @@ static inline int __fprobe_kprobe_handler(unsigned long ip, unsigned long parent
static int fprobe_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
struct ftrace_regs *fregs)
{
struct fprobe_hlist_node *node, *first;
unsigned long *fgraph_data = NULL;
unsigned long func = trace->func;
struct fprobe_hlist_node *node;
struct rhlist_head *head, *pos;
unsigned long ret_ip;
int reserved_words;
struct fprobe *fp;
Expand All @@ -260,14 +263,11 @@ static int fprobe_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
if (WARN_ON_ONCE(!fregs))
return 0;

first = node = find_first_fprobe_node(func);
if (unlikely(!first))
return 0;

head = rhltable_lookup(&fprobe_ip_table, &func, fprobe_rht_params);
reserved_words = 0;
hlist_for_each_entry_from_rcu(node, hlist) {
rhl_for_each_entry_rcu(node, pos, head, hlist) {
if (node->addr != func)
break;
continue;
fp = READ_ONCE(node->fp);
if (!fp || !fp->exit_handler)
continue;
Expand All @@ -278,13 +278,12 @@ static int fprobe_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
reserved_words +=
FPROBE_HEADER_SIZE_IN_LONG + SIZE_IN_LONG(fp->entry_data_size);
}
node = first;
if (reserved_words) {
fgraph_data = fgraph_reserve_data(gops->idx, reserved_words * sizeof(long));
if (unlikely(!fgraph_data)) {
hlist_for_each_entry_from_rcu(node, hlist) {
rhl_for_each_entry_rcu(node, pos, head, hlist) {
if (node->addr != func)
break;
continue;
fp = READ_ONCE(node->fp);
if (fp && !fprobe_disabled(fp))
fp->nmissed++;
Expand All @@ -299,12 +298,12 @@ static int fprobe_entry(struct ftrace_graph_ent *trace, struct fgraph_ops *gops,
*/
ret_ip = ftrace_regs_get_return_address(fregs);
used = 0;
hlist_for_each_entry_from_rcu(node, hlist) {
rhl_for_each_entry_rcu(node, pos, head, hlist) {
int data_size;
void *data;

if (node->addr != func)
break;
continue;
fp = READ_ONCE(node->fp);
if (!fp || fprobe_disabled(fp))
continue;
Expand Down Expand Up @@ -448,34 +447,31 @@ static int fprobe_addr_list_add(struct fprobe_addr_list *alist, unsigned long ad
return 0;
}

static void fprobe_remove_node_in_module(struct module *mod, struct hlist_head *head,
struct fprobe_addr_list *alist)
static void fprobe_remove_node_in_module(struct module *mod, struct fprobe_hlist_node *node,
struct fprobe_addr_list *alist)
{
struct fprobe_hlist_node *node;
int ret = 0;

hlist_for_each_entry_rcu(node, head, hlist,
lockdep_is_held(&fprobe_mutex)) {
if (!within_module(node->addr, mod))
continue;
if (delete_fprobe_node(node))
continue;
/*
* If failed to update alist, just continue to update hlist.
* Therefore, at list user handler will not hit anymore.
*/
if (!ret)
ret = fprobe_addr_list_add(alist, node->addr);
}
if (!within_module(node->addr, mod))
return;
if (delete_fprobe_node(node))
return;
/*
* If failed to update alist, just continue to update hlist.
* Therefore, at list user handler will not hit anymore.
*/
if (!ret)
ret = fprobe_addr_list_add(alist, node->addr);
}

/* Handle module unloading to manage fprobe_ip_table. */
static int fprobe_module_callback(struct notifier_block *nb,
unsigned long val, void *data)
{
struct fprobe_addr_list alist = {.size = FPROBE_IPS_BATCH_INIT};
struct fprobe_hlist_node *node;
struct rhashtable_iter iter;
struct module *mod = data;
int i;

if (val != MODULE_STATE_GOING)
return NOTIFY_DONE;
Expand All @@ -486,8 +482,16 @@ static int fprobe_module_callback(struct notifier_block *nb,
return NOTIFY_DONE;

mutex_lock(&fprobe_mutex);
for (i = 0; i < FPROBE_IP_TABLE_SIZE; i++)
fprobe_remove_node_in_module(mod, &fprobe_ip_table[i], &alist);
rhltable_walk_enter(&fprobe_ip_table, &iter);
do {
rhashtable_walk_start(&iter);

while ((node = rhashtable_walk_next(&iter)) && !IS_ERR(node))
fprobe_remove_node_in_module(mod, node, &alist);

rhashtable_walk_stop(&iter);
} while (node == ERR_PTR(-EAGAIN));
rhashtable_walk_exit(&iter);

if (alist.index < alist.size && alist.index > 0)
ftrace_set_filter_ips(&fprobe_graph_ops.ops,
Expand Down Expand Up @@ -727,8 +731,16 @@ int register_fprobe_ips(struct fprobe *fp, unsigned long *addrs, int num)
ret = fprobe_graph_add_ips(addrs, num);
if (!ret) {
add_fprobe_hash(fp);
for (i = 0; i < hlist_array->size; i++)
insert_fprobe_node(&hlist_array->array[i]);
for (i = 0; i < hlist_array->size; i++) {
ret = insert_fprobe_node(&hlist_array->array[i]);
if (ret)
break;
}
/* fallback on insert error */
if (ret) {
for (i--; i >= 0; i--)
delete_fprobe_node(&hlist_array->array[i]);
}
}
mutex_unlock(&fprobe_mutex);

Expand Down Expand Up @@ -824,3 +836,10 @@ int unregister_fprobe(struct fprobe *fp)
return ret;
}
EXPORT_SYMBOL_GPL(unregister_fprobe);

static int __init fprobe_initcall(void)
{
rhltable_init(&fprobe_ip_table, &fprobe_rht_params);
return 0;
}
late_initcall(fprobe_initcall);
4 changes: 4 additions & 0 deletions tools/testing/selftests/bpf/bench.c
Original file line number Diff line number Diff line change
Expand Up @@ -510,6 +510,8 @@ extern const struct bench bench_trig_kretprobe;
extern const struct bench bench_trig_kprobe_multi;
extern const struct bench bench_trig_kretprobe_multi;
extern const struct bench bench_trig_fentry;
extern const struct bench bench_trig_kprobe_multi_all;
extern const struct bench bench_trig_kretprobe_multi_all;
extern const struct bench bench_trig_fexit;
extern const struct bench bench_trig_fmodret;
extern const struct bench bench_trig_tp;
Expand Down Expand Up @@ -578,6 +580,8 @@ static const struct bench *benchs[] = {
&bench_trig_kprobe_multi,
&bench_trig_kretprobe_multi,
&bench_trig_fentry,
&bench_trig_kprobe_multi_all,
&bench_trig_kretprobe_multi_all,
&bench_trig_fexit,
&bench_trig_fmodret,
&bench_trig_tp,
Expand Down
54 changes: 54 additions & 0 deletions tools/testing/selftests/bpf/benchs/bench_trigger.c
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,58 @@ static void trigger_fentry_setup(void)
attach_bpf(ctx.skel->progs.bench_trigger_fentry);
}

static void attach_ksyms_all(struct bpf_program *empty, bool kretprobe)
{
LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
char **syms = NULL;
size_t cnt = 0;

if (bpf_get_ksyms(&syms, &cnt, true)) {
printf("failed to get ksyms\n");
exit(1);
}

printf("found %zu ksyms\n", cnt);
opts.syms = (const char **) syms;
opts.cnt = cnt;
opts.retprobe = kretprobe;
/* attach empty to all the kernel functions except bpf_get_numa_node_id. */
if (!bpf_program__attach_kprobe_multi_opts(empty, NULL, &opts)) {
printf("failed to attach bpf_program__attach_kprobe_multi_opts to all\n");
exit(1);
}
}

static void trigger_kprobe_multi_all_setup(void)
{
struct bpf_program *prog, *empty;

setup_ctx();
empty = ctx.skel->progs.bench_kprobe_multi_empty;
prog = ctx.skel->progs.bench_trigger_kprobe_multi;
bpf_program__set_autoload(empty, true);
bpf_program__set_autoload(prog, true);
load_ctx();

attach_ksyms_all(empty, false);
attach_bpf(prog);
}

static void trigger_kretprobe_multi_all_setup(void)
{
struct bpf_program *prog, *empty;

setup_ctx();
empty = ctx.skel->progs.bench_kretprobe_multi_empty;
prog = ctx.skel->progs.bench_trigger_kretprobe_multi;
bpf_program__set_autoload(empty, true);
bpf_program__set_autoload(prog, true);
load_ctx();

attach_ksyms_all(empty, true);
attach_bpf(prog);
}

static void trigger_fexit_setup(void)
{
setup_ctx();
Expand Down Expand Up @@ -512,6 +564,8 @@ BENCH_TRIG_KERNEL(kretprobe, "kretprobe");
BENCH_TRIG_KERNEL(kprobe_multi, "kprobe-multi");
BENCH_TRIG_KERNEL(kretprobe_multi, "kretprobe-multi");
BENCH_TRIG_KERNEL(fentry, "fentry");
BENCH_TRIG_KERNEL(kprobe_multi_all, "kprobe-multi-all");
BENCH_TRIG_KERNEL(kretprobe_multi_all, "kretprobe-multi-all");
BENCH_TRIG_KERNEL(fexit, "fexit");
BENCH_TRIG_KERNEL(fmodret, "fmodret");
BENCH_TRIG_KERNEL(tp, "tp");
Expand Down
4 changes: 2 additions & 2 deletions tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ def_tests=( \
usermode-count kernel-count syscall-count \
fentry fexit fmodret \
rawtp tp \
kprobe kprobe-multi \
kretprobe kretprobe-multi \
kprobe kprobe-multi kprobe-multi-all \
kretprobe kretprobe-multi kretprobe-multi-all \
)

tests=("$@")
Expand Down
Loading
Loading