Skip to content

Commit 4733f09

Browse files
zhangqingmychenhuacai
authored andcommitted
LoongArch/ftrace: Add dynamic function tracer support
The compiler has inserted 2 NOPs before the regular function prologue. T series registers are available and safe because of LoongArch's psABI. At runtime, we can replace nop with bl to enable ftrace call and replace bl with nop to disable ftrace call. The bl instruction requires us to save the original RA value, so it saves RA at t0 here. Details are: | Compiled | Disabled | Enabled | +------------+------------------------+------------------------+ | nop | move t0, ra | move t0, ra | | nop | nop | bl ftrace_caller | | func_body | func_body | func_body | The RA value will be recovered by ftrace_regs_entry, and restored into RA before returning to the regular function prologue. When a function is not being traced, the "move t0, ra" is not harmful. 1) ftrace_make_call, ftrace_make_nop (in kernel/ftrace.c) The two functions turn each recorded call site of filtered functions into a call to ftrace_caller or nops. 2) ftracce_update_ftrace_func (in kernel/ftrace.c) turns the nops at ftrace_call into a call to a generic entry for function tracers. 3) ftrace_caller (in kernel/mcount_dyn.S) The entry where each _mcount call sites calls to once they are filtered to be traced. Co-developed-by: Jinyang He <[email protected]> Signed-off-by: Jinyang He <[email protected]> Signed-off-by: Qing Zhang <[email protected]> Signed-off-by: Huacai Chen <[email protected]>
1 parent a0a458f commit 4733f09

File tree

10 files changed

+367
-10
lines changed

10 files changed

+367
-10
lines changed

arch/loongarch/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ config LOONGARCH
8888
select HAVE_C_RECORDMCOUNT
8989
select HAVE_DEBUG_STACKOVERFLOW
9090
select HAVE_DMA_CONTIGUOUS
91+
select HAVE_DYNAMIC_FTRACE
9192
select HAVE_EBPF_JIT
9293
select HAVE_EXIT_THREAD
9394
select HAVE_FAST_GUP

arch/loongarch/Makefile

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,11 @@ endif
2525
32bit-emul = elf32loongarch
2626
64bit-emul = elf64loongarch
2727

28+
ifdef CONFIG_DYNAMIC_FTRACE
29+
KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
30+
CC_FLAGS_FTRACE := -fpatchable-function-entry=2
31+
endif
32+
2833
ifdef CONFIG_64BIT
2934
tool-archpref = $(64bit-tool-archpref)
3035
UTS_MACHINE := loongarch64

arch/loongarch/include/asm/ftrace.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,30 @@
1111
#define MCOUNT_INSN_SIZE 4 /* sizeof mcount call */
1212

1313
#ifndef __ASSEMBLY__
14+
15+
#ifndef CONFIG_DYNAMIC_FTRACE
16+
1417
#define mcount _mcount
1518
extern void _mcount(void);
1619
extern void prepare_ftrace_return(unsigned long self_addr, unsigned long callsite_sp, unsigned long old);
20+
21+
#else
22+
23+
struct dyn_ftrace;
24+
struct dyn_arch_ftrace { };
25+
26+
#define ftrace_init_nop ftrace_init_nop
27+
int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
28+
29+
static inline unsigned long ftrace_call_adjust(unsigned long addr)
30+
{
31+
return addr;
32+
}
33+
34+
void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent);
35+
36+
#endif /* CONFIG_DYNAMIC_FTRACE */
37+
1738
#endif /* __ASSEMBLY__ */
1839

1940
#endif /* CONFIG_FUNCTION_TRACER */

arch/loongarch/include/asm/inst.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,17 @@ static inline bool is_stack_alloc_ins(union loongarch_instruction *ip)
349349
is_imm12_negative(ip->reg2i12_format.immediate);
350350
}
351351

352+
int larch_insn_read(void *addr, u32 *insnp);
353+
int larch_insn_write(void *addr, u32 insn);
354+
int larch_insn_patch_text(void *addr, u32 insn);
355+
356+
u32 larch_insn_gen_nop(void);
357+
u32 larch_insn_gen_b(unsigned long pc, unsigned long dest);
358+
u32 larch_insn_gen_bl(unsigned long pc, unsigned long dest);
359+
360+
u32 larch_insn_gen_or(enum loongarch_gpr rd, enum loongarch_gpr rj, enum loongarch_gpr rk);
361+
u32 larch_insn_gen_move(enum loongarch_gpr rd, enum loongarch_gpr rj);
362+
352363
u32 larch_insn_gen_lu32id(enum loongarch_gpr rd, int imm);
353364
u32 larch_insn_gen_lu52id(enum loongarch_gpr rd, enum loongarch_gpr rj, int imm);
354365
u32 larch_insn_gen_jirl(enum loongarch_gpr rd, enum loongarch_gpr rj, unsigned long pc, unsigned long dest);

arch/loongarch/include/asm/unwind.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ struct unwind_state {
2020
char type; /* UNWINDER_XXX */
2121
struct stack_info stack_info;
2222
struct task_struct *task;
23-
bool first, error;
23+
bool first, error, is_ftrace;
2424
unsigned long sp, pc, ra;
2525
};
2626

arch/loongarch/kernel/Makefile

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,16 @@ obj-$(CONFIG_EFI) += efi.o
1616
obj-$(CONFIG_CPU_HAS_FPU) += fpu.o
1717

1818
ifdef CONFIG_FUNCTION_TRACER
19-
obj-y += mcount.o ftrace.o
20-
CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
21-
CFLAGS_REMOVE_inst.o = $(CC_FLAGS_FTRACE)
22-
CFLAGS_REMOVE_time.o = $(CC_FLAGS_FTRACE)
23-
CFLAGS_REMOVE_perf_event.o = $(CC_FLAGS_FTRACE)
19+
ifndef CONFIG_DYNAMIC_FTRACE
20+
obj-y += mcount.o ftrace.o
21+
CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
22+
else
23+
obj-y += mcount_dyn.o ftrace_dyn.o
24+
CFLAGS_REMOVE_ftrace_dyn.o = $(CC_FLAGS_FTRACE)
25+
endif
26+
CFLAGS_REMOVE_inst.o = $(CC_FLAGS_FTRACE)
27+
CFLAGS_REMOVE_time.o = $(CC_FLAGS_FTRACE)
28+
CFLAGS_REMOVE_perf_event.o = $(CC_FLAGS_FTRACE)
2429
endif
2530

2631
obj-$(CONFIG_MODULES) += module.o module-sections.o

arch/loongarch/kernel/ftrace_dyn.c

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
/*
3+
* Based on arch/arm64/kernel/ftrace.c
4+
*
5+
* Copyright (C) 2022 Loongson Technology Corporation Limited
6+
*/
7+
8+
#include <linux/ftrace.h>
9+
#include <linux/uaccess.h>
10+
11+
#include <asm/inst.h>
12+
13+
static int ftrace_modify_code(unsigned long pc, u32 old, u32 new, bool validate)
14+
{
15+
u32 replaced;
16+
17+
if (validate) {
18+
if (larch_insn_read((void *)pc, &replaced))
19+
return -EFAULT;
20+
21+
if (replaced != old)
22+
return -EINVAL;
23+
}
24+
25+
if (larch_insn_patch_text((void *)pc, new))
26+
return -EPERM;
27+
28+
return 0;
29+
}
30+
31+
int ftrace_update_ftrace_func(ftrace_func_t func)
32+
{
33+
u32 new;
34+
unsigned long pc;
35+
36+
pc = (unsigned long)&ftrace_call;
37+
new = larch_insn_gen_bl(pc, (unsigned long)func);
38+
39+
return ftrace_modify_code(pc, 0, new, false);
40+
}
41+
42+
/*
43+
* The compiler has inserted 2 NOPs before the regular function prologue.
44+
* T series registers are available and safe because of LoongArch's psABI.
45+
*
46+
* At runtime, we can replace nop with bl to enable ftrace call and replace bl
47+
* with nop to disable ftrace call. The bl requires us to save the original RA
48+
* value, so it saves RA at t0 here.
49+
*
50+
* Details are:
51+
*
52+
* | Compiled | Disabled | Enabled |
53+
* +------------+------------------------+------------------------+
54+
* | nop | move t0, ra | move t0, ra |
55+
* | nop | nop | bl ftrace_caller |
56+
* | func_body | func_body | func_body |
57+
*
58+
* The RA value will be recovered by ftrace_regs_entry, and restored into RA
59+
* before returning to the regular function prologue. When a function is not
60+
* being traced, the "move t0, ra" is not harmful.
61+
*/
62+
63+
int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec)
64+
{
65+
u32 old, new;
66+
unsigned long pc;
67+
68+
pc = rec->ip;
69+
old = larch_insn_gen_nop();
70+
new = larch_insn_gen_move(LOONGARCH_GPR_T0, LOONGARCH_GPR_RA);
71+
72+
return ftrace_modify_code(pc, old, new, true);
73+
}
74+
75+
int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
76+
{
77+
u32 old, new;
78+
unsigned long pc;
79+
80+
pc = rec->ip + LOONGARCH_INSN_SIZE;
81+
82+
old = larch_insn_gen_nop();
83+
new = larch_insn_gen_bl(pc, addr);
84+
85+
return ftrace_modify_code(pc, old, new, true);
86+
}
87+
88+
int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr)
89+
{
90+
u32 old, new;
91+
unsigned long pc;
92+
93+
pc = rec->ip + LOONGARCH_INSN_SIZE;
94+
95+
new = larch_insn_gen_nop();
96+
old = larch_insn_gen_bl(pc, addr);
97+
98+
return ftrace_modify_code(pc, old, new, true);
99+
}
100+
101+
void arch_ftrace_update_code(int command)
102+
{
103+
command |= FTRACE_MAY_SLEEP;
104+
ftrace_modify_all_code(command);
105+
}
106+
107+
int __init ftrace_dyn_arch_init(void)
108+
{
109+
return 0;
110+
}

arch/loongarch/kernel/inst.c

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,100 @@
22
/*
33
* Copyright (C) 2020-2022 Loongson Technology Corporation Limited
44
*/
5+
#include <linux/sizes.h>
6+
#include <linux/uaccess.h>
7+
8+
#include <asm/cacheflush.h>
59
#include <asm/inst.h>
610

11+
static DEFINE_RAW_SPINLOCK(patch_lock);
12+
13+
int larch_insn_read(void *addr, u32 *insnp)
14+
{
15+
int ret;
16+
u32 val;
17+
18+
ret = copy_from_kernel_nofault(&val, addr, LOONGARCH_INSN_SIZE);
19+
if (!ret)
20+
*insnp = val;
21+
22+
return ret;
23+
}
24+
25+
int larch_insn_write(void *addr, u32 insn)
26+
{
27+
int ret;
28+
unsigned long flags = 0;
29+
30+
raw_spin_lock_irqsave(&patch_lock, flags);
31+
ret = copy_to_kernel_nofault(addr, &insn, LOONGARCH_INSN_SIZE);
32+
raw_spin_unlock_irqrestore(&patch_lock, flags);
33+
34+
return ret;
35+
}
36+
37+
int larch_insn_patch_text(void *addr, u32 insn)
38+
{
39+
int ret;
40+
u32 *tp = addr;
41+
42+
if ((unsigned long)tp & 3)
43+
return -EINVAL;
44+
45+
ret = larch_insn_write(tp, insn);
46+
if (!ret)
47+
flush_icache_range((unsigned long)tp,
48+
(unsigned long)tp + LOONGARCH_INSN_SIZE);
49+
50+
return ret;
51+
}
52+
53+
u32 larch_insn_gen_nop(void)
54+
{
55+
return INSN_NOP;
56+
}
57+
58+
u32 larch_insn_gen_bl(unsigned long pc, unsigned long dest)
59+
{
60+
long offset = dest - pc;
61+
unsigned int immediate_l, immediate_h;
62+
union loongarch_instruction insn;
63+
64+
if ((offset & 3) || offset < -SZ_128M || offset >= SZ_128M) {
65+
pr_warn("The generated bl instruction is out of range.\n");
66+
return INSN_BREAK;
67+
}
68+
69+
offset >>= 2;
70+
71+
immediate_l = offset & 0xffff;
72+
offset >>= 16;
73+
immediate_h = offset & 0x3ff;
74+
75+
insn.reg0i26_format.opcode = bl_op;
76+
insn.reg0i26_format.immediate_l = immediate_l;
77+
insn.reg0i26_format.immediate_h = immediate_h;
78+
79+
return insn.word;
80+
}
81+
82+
u32 larch_insn_gen_or(enum loongarch_gpr rd, enum loongarch_gpr rj, enum loongarch_gpr rk)
83+
{
84+
union loongarch_instruction insn;
85+
86+
insn.reg3_format.opcode = or_op;
87+
insn.reg3_format.rd = rd;
88+
insn.reg3_format.rj = rj;
89+
insn.reg3_format.rk = rk;
90+
91+
return insn.word;
92+
}
93+
94+
u32 larch_insn_gen_move(enum loongarch_gpr rd, enum loongarch_gpr rj)
95+
{
96+
return larch_insn_gen_or(rd, rj, 0);
97+
}
98+
799
u32 larch_insn_gen_lu32id(enum loongarch_gpr rd, int imm)
8100
{
9101
union loongarch_instruction insn;

arch/loongarch/kernel/mcount_dyn.S

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
/*
3+
* Copyright (C) 2022 Loongson Technology Corporation Limited
4+
*/
5+
6+
#include <asm/export.h>
7+
#include <asm/ftrace.h>
8+
#include <asm/regdef.h>
9+
#include <asm/stackframe.h>
10+
11+
.text
12+
/*
13+
* Due to -fpatchable-function-entry=2: the compiler inserted 2 NOPs before the
14+
* regular C function prologue. When PC arrived here, the last 2 instructions
15+
* are as follows:
16+
* move t0, ra
17+
* bl callsite (for modules, callsite is a tramplione)
18+
*
19+
* modules trampoline is as follows:
20+
* lu12i.w t1, callsite[31:12]
21+
* lu32i.d t1, callsite[51:32]
22+
* lu52i.d t1, t1, callsite[63:52]
23+
* jirl zero, t1, callsite[11:0] >> 2
24+
*
25+
* See arch/loongarch/kernel/ftrace_dyn.c for details. Here, pay attention to
26+
* that the T series regs are available and safe because each C functions
27+
* follows the LoongArch's psABI as well.
28+
*/
29+
30+
.macro ftrace_regs_entry
31+
PTR_ADDI sp, sp, -PT_SIZE
32+
PTR_S t0, sp, PT_R1 /* Save parent ra at PT_R1(RA) */
33+
PTR_S a0, sp, PT_R4
34+
PTR_S a1, sp, PT_R5
35+
PTR_S a2, sp, PT_R6
36+
PTR_S a3, sp, PT_R7
37+
PTR_S a4, sp, PT_R8
38+
PTR_S a5, sp, PT_R9
39+
PTR_S a6, sp, PT_R10
40+
PTR_S a7, sp, PT_R11
41+
PTR_S fp, sp, PT_R22
42+
PTR_S ra, sp, PT_ERA /* Save trace function ra at PT_ERA */
43+
PTR_ADDI t8, sp, PT_SIZE
44+
PTR_S t8, sp, PT_R3
45+
.endm
46+
47+
SYM_FUNC_START(ftrace_stub)
48+
jr ra
49+
SYM_FUNC_END(ftrace_stub)
50+
51+
SYM_CODE_START(ftrace_common)
52+
PTR_ADDI a0, ra, -8 /* arg0: ip */
53+
move a1, t0 /* arg1: parent_ip */
54+
la.pcrel t1, function_trace_op
55+
PTR_L a2, t1, 0 /* arg2: op */
56+
move a3, sp /* arg3: regs */
57+
58+
SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
59+
bl ftrace_stub
60+
/*
61+
* As we didn't use S series regs in this assmembly code and all calls
62+
* are C function which will save S series regs by themselves, there is
63+
* no need to restore S series regs. The T series is available and safe
64+
* at the callsite, so there is no need to restore the T series regs.
65+
*/
66+
ftrace_common_return:
67+
PTR_L ra, sp, PT_R1
68+
PTR_L a0, sp, PT_R4
69+
PTR_L a1, sp, PT_R5
70+
PTR_L a2, sp, PT_R6
71+
PTR_L a3, sp, PT_R7
72+
PTR_L a4, sp, PT_R8
73+
PTR_L a5, sp, PT_R9
74+
PTR_L a6, sp, PT_R10
75+
PTR_L a7, sp, PT_R11
76+
PTR_L fp, sp, PT_R22
77+
PTR_L t0, sp, PT_ERA
78+
PTR_ADDI sp, sp, PT_SIZE
79+
jr t0
80+
SYM_CODE_END(ftrace_common)
81+
82+
SYM_CODE_START(ftrace_caller)
83+
ftrace_regs_entry
84+
b ftrace_common
85+
SYM_CODE_END(ftrace_caller)

0 commit comments

Comments
 (0)