Skip to content

Commit 10cdb82

Browse files
anakryikomhiramat
authored andcommitted
uprobes: turn trace_uprobe's nhit counter to be per-CPU one
trace_uprobe->nhit counter is not incremented atomically, so its value is questionable in when uprobe is hit on multiple CPUs simultaneously. Also, doing this shared counter increment across many CPUs causes heavy cache line bouncing, limiting uprobe/uretprobe performance scaling with number of CPUs. Solve both problems by making this a per-CPU counter. Link: https://lore.kernel.org/all/[email protected]/ Reviewed-by: Oleg Nesterov <[email protected]> Signed-off-by: Andrii Nakryiko <[email protected]> Signed-off-by: Masami Hiramatsu (Google) <[email protected]>
1 parent da3ea35 commit 10cdb82

File tree

1 file changed

+21
-3
lines changed

1 file changed

+21
-3
lines changed

kernel/trace/trace_uprobe.c

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include <linux/string.h>
1818
#include <linux/rculist.h>
1919
#include <linux/filter.h>
20+
#include <linux/percpu.h>
2021

2122
#include "trace_dynevent.h"
2223
#include "trace_probe.h"
@@ -62,7 +63,7 @@ struct trace_uprobe {
6263
char *filename;
6364
unsigned long offset;
6465
unsigned long ref_ctr_offset;
65-
unsigned long nhit;
66+
unsigned long __percpu *nhits;
6667
struct trace_probe tp;
6768
};
6869

@@ -337,6 +338,12 @@ alloc_trace_uprobe(const char *group, const char *event, int nargs, bool is_ret)
337338
if (!tu)
338339
return ERR_PTR(-ENOMEM);
339340

341+
tu->nhits = alloc_percpu(unsigned long);
342+
if (!tu->nhits) {
343+
ret = -ENOMEM;
344+
goto error;
345+
}
346+
340347
ret = trace_probe_init(&tu->tp, event, group, true, nargs);
341348
if (ret < 0)
342349
goto error;
@@ -349,6 +356,7 @@ alloc_trace_uprobe(const char *group, const char *event, int nargs, bool is_ret)
349356
return tu;
350357

351358
error:
359+
free_percpu(tu->nhits);
352360
kfree(tu);
353361

354362
return ERR_PTR(ret);
@@ -362,6 +370,7 @@ static void free_trace_uprobe(struct trace_uprobe *tu)
362370
path_put(&tu->path);
363371
trace_probe_cleanup(&tu->tp);
364372
kfree(tu->filename);
373+
free_percpu(tu->nhits);
365374
kfree(tu);
366375
}
367376

@@ -815,13 +824,21 @@ static int probes_profile_seq_show(struct seq_file *m, void *v)
815824
{
816825
struct dyn_event *ev = v;
817826
struct trace_uprobe *tu;
827+
unsigned long nhits;
828+
int cpu;
818829

819830
if (!is_trace_uprobe(ev))
820831
return 0;
821832

822833
tu = to_trace_uprobe(ev);
834+
835+
nhits = 0;
836+
for_each_possible_cpu(cpu) {
837+
nhits += per_cpu(*tu->nhits, cpu);
838+
}
839+
823840
seq_printf(m, " %s %-44s %15lu\n", tu->filename,
824-
trace_probe_name(&tu->tp), tu->nhit);
841+
trace_probe_name(&tu->tp), nhits);
825842
return 0;
826843
}
827844

@@ -1512,7 +1529,8 @@ static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs *regs)
15121529
int ret = 0;
15131530

15141531
tu = container_of(con, struct trace_uprobe, consumer);
1515-
tu->nhit++;
1532+
1533+
this_cpu_inc(*tu->nhits);
15161534

15171535
udd.tu = tu;
15181536
udd.bp_addr = instruction_pointer(regs);

0 commit comments

Comments
 (0)