Skip to content

Commit ff474a7

Browse files
olsajirimhiramat
authored andcommitted
uprobe: Add uretprobe syscall to speed up return probe
Adding uretprobe syscall instead of trap to speed up return probe. At the moment the uretprobe setup/path is: - install entry uprobe - when the uprobe is hit, it overwrites probed function's return address on stack with address of the trampoline that contains breakpoint instruction - the breakpoint trap code handles the uretprobe consumers execution and jumps back to original return address This patch replaces the above trampoline's breakpoint instruction with new ureprobe syscall call. This syscall does exactly the same job as the trap with some more extra work: - syscall trampoline must save original value for rax/r11/rcx registers on stack - rax is set to syscall number and r11/rcx are changed and used by syscall instruction - the syscall code reads the original values of those registers and restore those values in task's pt_regs area - only caller from trampoline exposed in '[uprobes]' is allowed, the process will receive SIGILL signal otherwise Even with some extra work, using the uretprobes syscall shows speed improvement (compared to using standard breakpoint): On Intel (11th Gen Intel(R) Core(TM) i7-1165G7 @ 2.80GHz) current: uretprobe-nop : 1.498 ± 0.000M/s uretprobe-push : 1.448 ± 0.001M/s uretprobe-ret : 0.816 ± 0.001M/s with the fix: uretprobe-nop : 1.969 ± 0.002M/s < 31% speed up uretprobe-push : 1.910 ± 0.000M/s < 31% speed up uretprobe-ret : 0.934 ± 0.000M/s < 14% speed up On Amd (AMD Ryzen 7 5700U) current: uretprobe-nop : 0.778 ± 0.001M/s uretprobe-push : 0.744 ± 0.001M/s uretprobe-ret : 0.540 ± 0.001M/s with the fix: uretprobe-nop : 0.860 ± 0.001M/s < 10% speed up uretprobe-push : 0.818 ± 0.001M/s < 10% speed up uretprobe-ret : 0.578 ± 0.000M/s < 7% speed up The performance test spawns a thread that runs loop which triggers uprobe with attached bpf program that increments the counter that gets printed in results above. The uprobe (and uretprobe) kind is determined by which instruction is being patched with breakpoint instruction. That's also important for uretprobes, because uprobe is installed for each uretprobe. The performance test is part of bpf selftests: tools/testing/selftests/bpf/run_bench_uprobes.sh Note at the moment uretprobe syscall is supported only for native 64-bit process, compat process still uses standard breakpoint. Note that when shadow stack is enabled the uretprobe syscall returns via iret, which is slower than return via sysret, but won't cause the shadow stack violation. Link: https://lore.kernel.org/all/[email protected]/ Suggested-by: Andrii Nakryiko <[email protected]> Reviewed-by: Oleg Nesterov <[email protected]> Reviewed-by: Masami Hiramatsu (Google) <[email protected]> Acked-by: Andrii Nakryiko <[email protected]> Signed-off-by: Oleg Nesterov <[email protected]> Signed-off-by: Jiri Olsa <[email protected]> Signed-off-by: Masami Hiramatsu (Google) <[email protected]>
1 parent 190fec7 commit ff474a7

File tree

5 files changed

+144
-7
lines changed

5 files changed

+144
-7
lines changed

arch/x86/include/asm/shstk.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ void shstk_free(struct task_struct *p);
2222
int setup_signal_shadow_stack(struct ksignal *ksig);
2323
int restore_signal_shadow_stack(void);
2424
int shstk_update_last_frame(unsigned long val);
25+
bool shstk_is_enabled(void);
2526
#else
2627
static inline long shstk_prctl(struct task_struct *task, int option,
2728
unsigned long arg2) { return -EINVAL; }
@@ -33,6 +34,7 @@ static inline void shstk_free(struct task_struct *p) {}
3334
static inline int setup_signal_shadow_stack(struct ksignal *ksig) { return 0; }
3435
static inline int restore_signal_shadow_stack(void) { return 0; }
3536
static inline int shstk_update_last_frame(unsigned long val) { return 0; }
37+
static inline bool shstk_is_enabled(void) { return false; }
3638
#endif /* CONFIG_X86_USER_SHADOW_STACK */
3739

3840
#endif /* __ASSEMBLY__ */

arch/x86/kernel/shstk.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -588,3 +588,8 @@ int shstk_update_last_frame(unsigned long val)
588588
ssp = get_user_shstk_addr();
589589
return write_user_shstk_64((u64 __user *)ssp, (u64)val);
590590
}
591+
592+
bool shstk_is_enabled(void)
593+
{
594+
return features_enabled(ARCH_SHSTK_SHSTK);
595+
}

arch/x86/kernel/uprobes.c

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include <linux/ptrace.h>
1313
#include <linux/uprobes.h>
1414
#include <linux/uaccess.h>
15+
#include <linux/syscalls.h>
1516

1617
#include <linux/kdebug.h>
1718
#include <asm/processor.h>
@@ -308,6 +309,122 @@ static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool
308309
}
309310

310311
#ifdef CONFIG_X86_64
312+
313+
asm (
314+
".pushsection .rodata\n"
315+
".global uretprobe_trampoline_entry\n"
316+
"uretprobe_trampoline_entry:\n"
317+
"pushq %rax\n"
318+
"pushq %rcx\n"
319+
"pushq %r11\n"
320+
"movq $" __stringify(__NR_uretprobe) ", %rax\n"
321+
"syscall\n"
322+
".global uretprobe_syscall_check\n"
323+
"uretprobe_syscall_check:\n"
324+
"popq %r11\n"
325+
"popq %rcx\n"
326+
327+
/* The uretprobe syscall replaces stored %rax value with final
328+
* return address, so we don't restore %rax in here and just
329+
* call ret.
330+
*/
331+
"retq\n"
332+
".global uretprobe_trampoline_end\n"
333+
"uretprobe_trampoline_end:\n"
334+
".popsection\n"
335+
);
336+
337+
extern u8 uretprobe_trampoline_entry[];
338+
extern u8 uretprobe_trampoline_end[];
339+
extern u8 uretprobe_syscall_check[];
340+
341+
void *arch_uprobe_trampoline(unsigned long *psize)
342+
{
343+
static uprobe_opcode_t insn = UPROBE_SWBP_INSN;
344+
struct pt_regs *regs = task_pt_regs(current);
345+
346+
/*
347+
* At the moment the uretprobe syscall trampoline is supported
348+
* only for native 64-bit process, the compat process still uses
349+
* standard breakpoint.
350+
*/
351+
if (user_64bit_mode(regs)) {
352+
*psize = uretprobe_trampoline_end - uretprobe_trampoline_entry;
353+
return uretprobe_trampoline_entry;
354+
}
355+
356+
*psize = UPROBE_SWBP_INSN_SIZE;
357+
return &insn;
358+
}
359+
360+
static unsigned long trampoline_check_ip(void)
361+
{
362+
unsigned long tramp = uprobe_get_trampoline_vaddr();
363+
364+
return tramp + (uretprobe_syscall_check - uretprobe_trampoline_entry);
365+
}
366+
367+
SYSCALL_DEFINE0(uretprobe)
368+
{
369+
struct pt_regs *regs = task_pt_regs(current);
370+
unsigned long err, ip, sp, r11_cx_ax[3];
371+
372+
if (regs->ip != trampoline_check_ip())
373+
goto sigill;
374+
375+
err = copy_from_user(r11_cx_ax, (void __user *)regs->sp, sizeof(r11_cx_ax));
376+
if (err)
377+
goto sigill;
378+
379+
/* expose the "right" values of r11/cx/ax/sp to uprobe_consumer/s */
380+
regs->r11 = r11_cx_ax[0];
381+
regs->cx = r11_cx_ax[1];
382+
regs->ax = r11_cx_ax[2];
383+
regs->sp += sizeof(r11_cx_ax);
384+
regs->orig_ax = -1;
385+
386+
ip = regs->ip;
387+
sp = regs->sp;
388+
389+
uprobe_handle_trampoline(regs);
390+
391+
/*
392+
* Some of the uprobe consumers has changed sp, we can do nothing,
393+
* just return via iret.
394+
* .. or shadow stack is enabled, in which case we need to skip
395+
* return through the user space stack address.
396+
*/
397+
if (regs->sp != sp || shstk_is_enabled())
398+
return regs->ax;
399+
regs->sp -= sizeof(r11_cx_ax);
400+
401+
/* for the case uprobe_consumer has changed r11/cx */
402+
r11_cx_ax[0] = regs->r11;
403+
r11_cx_ax[1] = regs->cx;
404+
405+
/*
406+
* ax register is passed through as return value, so we can use
407+
* its space on stack for ip value and jump to it through the
408+
* trampoline's ret instruction
409+
*/
410+
r11_cx_ax[2] = regs->ip;
411+
regs->ip = ip;
412+
413+
err = copy_to_user((void __user *)regs->sp, r11_cx_ax, sizeof(r11_cx_ax));
414+
if (err)
415+
goto sigill;
416+
417+
/* ensure sysret, see do_syscall_64() */
418+
regs->r11 = regs->flags;
419+
regs->cx = regs->ip;
420+
421+
return regs->ax;
422+
423+
sigill:
424+
force_sig(SIGILL);
425+
return -1;
426+
}
427+
311428
/*
312429
* If arch_uprobe->insn doesn't use rip-relative addressing, return
313430
* immediately. Otherwise, rewrite the instruction so that it accesses

include/linux/uprobes.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,9 @@ extern bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check c
138138
extern bool arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs);
139139
extern void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
140140
void *src, unsigned long len);
141+
extern void uprobe_handle_trampoline(struct pt_regs *regs);
142+
extern void *arch_uprobe_trampoline(unsigned long *psize);
143+
extern unsigned long uprobe_get_trampoline_vaddr(void);
141144
#else /* !CONFIG_UPROBES */
142145
struct uprobes_state {
143146
};

kernel/events/uprobes.c

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1474,11 +1474,20 @@ static int xol_add_vma(struct mm_struct *mm, struct xol_area *area)
14741474
return ret;
14751475
}
14761476

1477+
void * __weak arch_uprobe_trampoline(unsigned long *psize)
1478+
{
1479+
static uprobe_opcode_t insn = UPROBE_SWBP_INSN;
1480+
1481+
*psize = UPROBE_SWBP_INSN_SIZE;
1482+
return &insn;
1483+
}
1484+
14771485
static struct xol_area *__create_xol_area(unsigned long vaddr)
14781486
{
14791487
struct mm_struct *mm = current->mm;
1480-
uprobe_opcode_t insn = UPROBE_SWBP_INSN;
1488+
unsigned long insns_size;
14811489
struct xol_area *area;
1490+
void *insns;
14821491

14831492
area = kmalloc(sizeof(*area), GFP_KERNEL);
14841493
if (unlikely(!area))
@@ -1502,7 +1511,8 @@ static struct xol_area *__create_xol_area(unsigned long vaddr)
15021511
/* Reserve the 1st slot for get_trampoline_vaddr() */
15031512
set_bit(0, area->bitmap);
15041513
atomic_set(&area->slot_count, 1);
1505-
arch_uprobe_copy_ixol(area->pages[0], 0, &insn, UPROBE_SWBP_INSN_SIZE);
1514+
insns = arch_uprobe_trampoline(&insns_size);
1515+
arch_uprobe_copy_ixol(area->pages[0], 0, insns, insns_size);
15061516

15071517
if (!xol_add_vma(mm, area))
15081518
return area;
@@ -1827,7 +1837,7 @@ void uprobe_copy_process(struct task_struct *t, unsigned long flags)
18271837
*
18281838
* Returns -1 in case the xol_area is not allocated.
18291839
*/
1830-
static unsigned long get_trampoline_vaddr(void)
1840+
unsigned long uprobe_get_trampoline_vaddr(void)
18311841
{
18321842
struct xol_area *area;
18331843
unsigned long trampoline_vaddr = -1;
@@ -1878,7 +1888,7 @@ static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
18781888
if (!ri)
18791889
return;
18801890

1881-
trampoline_vaddr = get_trampoline_vaddr();
1891+
trampoline_vaddr = uprobe_get_trampoline_vaddr();
18821892
orig_ret_vaddr = arch_uretprobe_hijack_return_addr(trampoline_vaddr, regs);
18831893
if (orig_ret_vaddr == -1)
18841894
goto fail;
@@ -2123,7 +2133,7 @@ static struct return_instance *find_next_ret_chain(struct return_instance *ri)
21232133
return ri;
21242134
}
21252135

2126-
static void handle_trampoline(struct pt_regs *regs)
2136+
void uprobe_handle_trampoline(struct pt_regs *regs)
21272137
{
21282138
struct uprobe_task *utask;
21292139
struct return_instance *ri, *next;
@@ -2187,8 +2197,8 @@ static void handle_swbp(struct pt_regs *regs)
21872197
int is_swbp;
21882198

21892199
bp_vaddr = uprobe_get_swbp_addr(regs);
2190-
if (bp_vaddr == get_trampoline_vaddr())
2191-
return handle_trampoline(regs);
2200+
if (bp_vaddr == uprobe_get_trampoline_vaddr())
2201+
return uprobe_handle_trampoline(regs);
21922202

21932203
uprobe = find_active_uprobe(bp_vaddr, &is_swbp);
21942204
if (!uprobe) {

0 commit comments

Comments
 (0)