Skip to content

Commit 87195a1

Browse files
anakryikoPeter Zijlstra
authored andcommitted
uprobes: switch to RCU Tasks Trace flavor for better performance
This patch switches uprobes SRCU usage to RCU Tasks Trace flavor, which is optimized for more lightweight and quick readers (at the expense of slower writers, which for uprobes is a fine tradeof) and has better performance and scalability with number of CPUs. Similarly to baseline vs SRCU, we've benchmarked SRCU-based implementation vs RCU Tasks Trace implementation. SRCU ==== uprobe-nop ( 1 cpus): 3.276 ± 0.005M/s ( 3.276M/s/cpu) uprobe-nop ( 2 cpus): 4.125 ± 0.002M/s ( 2.063M/s/cpu) uprobe-nop ( 4 cpus): 7.713 ± 0.002M/s ( 1.928M/s/cpu) uprobe-nop ( 8 cpus): 8.097 ± 0.006M/s ( 1.012M/s/cpu) uprobe-nop (16 cpus): 6.501 ± 0.056M/s ( 0.406M/s/cpu) uprobe-nop (32 cpus): 4.398 ± 0.084M/s ( 0.137M/s/cpu) uprobe-nop (64 cpus): 6.452 ± 0.000M/s ( 0.101M/s/cpu) uretprobe-nop ( 1 cpus): 2.055 ± 0.001M/s ( 2.055M/s/cpu) uretprobe-nop ( 2 cpus): 2.677 ± 0.000M/s ( 1.339M/s/cpu) uretprobe-nop ( 4 cpus): 4.561 ± 0.003M/s ( 1.140M/s/cpu) uretprobe-nop ( 8 cpus): 5.291 ± 0.002M/s ( 0.661M/s/cpu) uretprobe-nop (16 cpus): 5.065 ± 0.019M/s ( 0.317M/s/cpu) uretprobe-nop (32 cpus): 3.622 ± 0.003M/s ( 0.113M/s/cpu) uretprobe-nop (64 cpus): 3.723 ± 0.002M/s ( 0.058M/s/cpu) RCU Tasks Trace =============== uprobe-nop ( 1 cpus): 3.396 ± 0.002M/s ( 3.396M/s/cpu) uprobe-nop ( 2 cpus): 4.271 ± 0.006M/s ( 2.135M/s/cpu) uprobe-nop ( 4 cpus): 8.499 ± 0.015M/s ( 2.125M/s/cpu) uprobe-nop ( 8 cpus): 10.355 ± 0.028M/s ( 1.294M/s/cpu) uprobe-nop (16 cpus): 7.615 ± 0.099M/s ( 0.476M/s/cpu) uprobe-nop (32 cpus): 4.430 ± 0.007M/s ( 0.138M/s/cpu) uprobe-nop (64 cpus): 6.887 ± 0.020M/s ( 0.108M/s/cpu) uretprobe-nop ( 1 cpus): 2.174 ± 0.001M/s ( 2.174M/s/cpu) uretprobe-nop ( 2 cpus): 2.853 ± 0.001M/s ( 1.426M/s/cpu) uretprobe-nop ( 4 cpus): 4.913 ± 0.002M/s ( 1.228M/s/cpu) uretprobe-nop ( 8 cpus): 5.883 ± 0.002M/s ( 0.735M/s/cpu) uretprobe-nop (16 cpus): 5.147 ± 0.001M/s ( 0.322M/s/cpu) uretprobe-nop (32 cpus): 3.738 ± 0.008M/s ( 0.117M/s/cpu) uretprobe-nop (64 cpus): 4.397 ± 0.002M/s ( 0.069M/s/cpu) Peak throughput for uprobes increases from 8 mln/s to 10.3 mln/s (+28%!), and for uretprobes from 5.3 mln/s to 5.8 mln/s (+11%), as we have more work to do on uretprobes side. Even single-thread (no contention) performance is slightly better: 3.276 mln/s to 3.396 mln/s (+3.5%) for uprobes, and 2.055 mln/s to 2.174 mln/s (+5.8%) for uretprobes. We also select TASKS_TRACE_RCU for UPROBES in Kconfig due to the new dependency. Signed-off-by: Andrii Nakryiko <[email protected]> Signed-off-by: Peter Zijlstra (Intel) <[email protected]> Reviewed-by: Oleg Nesterov <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent 8cf0b93 commit 87195a1

File tree

2 files changed

+17
-22
lines changed

2 files changed

+17
-22
lines changed

arch/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ config KPROBES_ON_FTRACE
135135
config UPROBES
136136
def_bool n
137137
depends on ARCH_SUPPORTS_UPROBES
138+
select TASKS_TRACE_RCU
138139
help
139140
Uprobes is the user-space counterpart to kprobes: they
140141
enable instrumentation applications (such as 'perf probe')

kernel/events/uprobes.c

Lines changed: 16 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include <linux/task_work.h>
2727
#include <linux/shmem_fs.h>
2828
#include <linux/khugepaged.h>
29+
#include <linux/rcupdate_trace.h>
2930

3031
#include <linux/uprobes.h>
3132

@@ -42,8 +43,6 @@ static struct rb_root uprobes_tree = RB_ROOT;
4243
static DEFINE_RWLOCK(uprobes_treelock); /* serialize rbtree access */
4344
static seqcount_rwlock_t uprobes_seqcount = SEQCNT_RWLOCK_ZERO(uprobes_seqcount, &uprobes_treelock);
4445

45-
DEFINE_STATIC_SRCU(uprobes_srcu);
46-
4746
#define UPROBES_HASH_SZ 13
4847
/* serialize uprobe->pending_list */
4948
static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ];
@@ -651,7 +650,7 @@ static void put_uprobe(struct uprobe *uprobe)
651650
delayed_uprobe_remove(uprobe, NULL);
652651
mutex_unlock(&delayed_uprobe_lock);
653652

654-
call_srcu(&uprobes_srcu, &uprobe->rcu, uprobe_free_rcu);
653+
call_rcu_tasks_trace(&uprobe->rcu, uprobe_free_rcu);
655654
}
656655

657656
static __always_inline
@@ -706,7 +705,7 @@ static struct uprobe *find_uprobe_rcu(struct inode *inode, loff_t offset)
706705
struct rb_node *node;
707706
unsigned int seq;
708707

709-
lockdep_assert(srcu_read_lock_held(&uprobes_srcu));
708+
lockdep_assert(rcu_read_lock_trace_held());
710709

711710
do {
712711
seq = read_seqcount_begin(&uprobes_seqcount);
@@ -934,8 +933,7 @@ static bool filter_chain(struct uprobe *uprobe, struct mm_struct *mm)
934933
bool ret = false;
935934

936935
down_read(&uprobe->consumer_rwsem);
937-
list_for_each_entry_srcu(uc, &uprobe->consumers, cons_node,
938-
srcu_read_lock_held(&uprobes_srcu)) {
936+
list_for_each_entry_rcu(uc, &uprobe->consumers, cons_node, rcu_read_lock_trace_held()) {
939937
ret = consumer_filter(uc, mm);
940938
if (ret)
941939
break;
@@ -1156,7 +1154,7 @@ void uprobe_unregister_sync(void)
11561154
* unlucky enough caller can free consumer's memory and cause
11571155
* handler_chain() or handle_uretprobe_chain() to do an use-after-free.
11581156
*/
1159-
synchronize_srcu(&uprobes_srcu);
1157+
synchronize_rcu_tasks_trace();
11601158
}
11611159
EXPORT_SYMBOL_GPL(uprobe_unregister_sync);
11621160

@@ -1240,19 +1238,18 @@ EXPORT_SYMBOL_GPL(uprobe_register);
12401238
int uprobe_apply(struct uprobe *uprobe, struct uprobe_consumer *uc, bool add)
12411239
{
12421240
struct uprobe_consumer *con;
1243-
int ret = -ENOENT, srcu_idx;
1241+
int ret = -ENOENT;
12441242

12451243
down_write(&uprobe->register_rwsem);
12461244

1247-
srcu_idx = srcu_read_lock(&uprobes_srcu);
1248-
list_for_each_entry_srcu(con, &uprobe->consumers, cons_node,
1249-
srcu_read_lock_held(&uprobes_srcu)) {
1245+
rcu_read_lock_trace();
1246+
list_for_each_entry_rcu(con, &uprobe->consumers, cons_node, rcu_read_lock_trace_held()) {
12501247
if (con == uc) {
12511248
ret = register_for_each_vma(uprobe, add ? uc : NULL);
12521249
break;
12531250
}
12541251
}
1255-
srcu_read_unlock(&uprobes_srcu, srcu_idx);
1252+
rcu_read_unlock_trace();
12561253

12571254
up_write(&uprobe->register_rwsem);
12581255

@@ -2134,8 +2131,7 @@ static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
21342131

21352132
current->utask->auprobe = &uprobe->arch;
21362133

2137-
list_for_each_entry_srcu(uc, &uprobe->consumers, cons_node,
2138-
srcu_read_lock_held(&uprobes_srcu)) {
2134+
list_for_each_entry_rcu(uc, &uprobe->consumers, cons_node, rcu_read_lock_trace_held()) {
21392135
int rc = 0;
21402136

21412137
if (uc->handler) {
@@ -2173,15 +2169,13 @@ handle_uretprobe_chain(struct return_instance *ri, struct pt_regs *regs)
21732169
{
21742170
struct uprobe *uprobe = ri->uprobe;
21752171
struct uprobe_consumer *uc;
2176-
int srcu_idx;
21772172

2178-
srcu_idx = srcu_read_lock(&uprobes_srcu);
2179-
list_for_each_entry_srcu(uc, &uprobe->consumers, cons_node,
2180-
srcu_read_lock_held(&uprobes_srcu)) {
2173+
rcu_read_lock_trace();
2174+
list_for_each_entry_rcu(uc, &uprobe->consumers, cons_node, rcu_read_lock_trace_held()) {
21812175
if (uc->ret_handler)
21822176
uc->ret_handler(uc, ri->func, regs);
21832177
}
2184-
srcu_read_unlock(&uprobes_srcu, srcu_idx);
2178+
rcu_read_unlock_trace();
21852179
}
21862180

21872181
static struct return_instance *find_next_ret_chain(struct return_instance *ri)
@@ -2266,13 +2260,13 @@ static void handle_swbp(struct pt_regs *regs)
22662260
{
22672261
struct uprobe *uprobe;
22682262
unsigned long bp_vaddr;
2269-
int is_swbp, srcu_idx;
2263+
int is_swbp;
22702264

22712265
bp_vaddr = uprobe_get_swbp_addr(regs);
22722266
if (bp_vaddr == uprobe_get_trampoline_vaddr())
22732267
return uprobe_handle_trampoline(regs);
22742268

2275-
srcu_idx = srcu_read_lock(&uprobes_srcu);
2269+
rcu_read_lock_trace();
22762270

22772271
uprobe = find_active_uprobe_rcu(bp_vaddr, &is_swbp);
22782272
if (!uprobe) {
@@ -2330,7 +2324,7 @@ static void handle_swbp(struct pt_regs *regs)
23302324

23312325
out:
23322326
/* arch_uprobe_skip_sstep() succeeded, or restart if can't singlestep */
2333-
srcu_read_unlock(&uprobes_srcu, srcu_idx);
2327+
rcu_read_unlock_trace();
23342328
}
23352329

23362330
/*

0 commit comments

Comments
 (0)