Skip to content

Commit 26299b3

Browse files
mrutland-armwilldeacon
authored andcommitted
ftrace: arm64: move from REGS to ARGS
This commit replaces arm64's support for FTRACE_WITH_REGS with support for FTRACE_WITH_ARGS. This removes some overhead and complexity, and removes some latent issues with inconsistent presentation of struct pt_regs (which can only be reliably saved/restored at exception boundaries). FTRACE_WITH_REGS has been supported on arm64 since commit: 3b23e49 ("arm64: implement ftrace with regs") As noted in the commit message, the major reasons for implementing FTRACE_WITH_REGS were: (1) To make it possible to use the ftrace graph tracer with pointer authentication, where it's necessary to snapshot/manipulate the LR before it is signed by the instrumented function. (2) To make it possible to implement LIVEPATCH in future, where we need to hook function entry before an instrumented function manipulates the stack or argument registers. Practically speaking, we need to preserve the argument/return registers, PC, LR, and SP. Neither of these need a struct pt_regs, and only require the set of registers which are live at function call/return boundaries. Our calling convention is defined by "Procedure Call Standard for the Arm® 64-bit Architecture (AArch64)" (AKA "AAPCS64"), which can currently be found at: https://github.com/ARM-software/abi-aa/blob/main/aapcs64/aapcs64.rst Per AAPCS64, all function call argument and return values are held in the following GPRs: * X0 - X7 : parameter / result registers * X8 : indirect result location register * SP : stack pointer (AKA SP) Additionally, ad function call boundaries, the following GPRs hold context/return information: * X29 : frame pointer (AKA FP) * X30 : link register (AKA LR) ... and for ftrace we need to capture the instrumented address: * PC : program counter No other GPRs are relevant, as none of the other arguments hold parameters or return values: * X9 - X17 : temporaries, may be clobbered * X18 : shadow call stack pointer (or temorary) * X19 - X28 : callee saved This patch implements FTRACE_WITH_ARGS for arm64, only saving/restoring the minimal set of registers necessary. This is always sufficient to manipulate control flow (e.g. for live-patching) or to manipulate function arguments and return values. This reduces the necessary stack usage from 336 bytes for pt_regs down to 112 bytes for ftrace_regs + 32 bytes for two frame records, freeing up 188 bytes. This could be reduced further with changes to the unwinder. As there is no longer a need to save different sets of registers for different features, we no longer need distinct `ftrace_caller` and `ftrace_regs_caller` trampolines. This allows the trampoline assembly to be simpler, and simplifies code which previously had to handle the two trampolines. I've tested this with the ftrace selftests, where there are no unexpected failures. Co-developed-by: Florent Revest <[email protected]> Signed-off-by: Mark Rutland <[email protected]> Signed-off-by: Florent Revest <[email protected]> Cc: Catalin Marinas <[email protected]> Cc: Masami Hiramatsu <[email protected]> Cc: Steven Rostedt <[email protected]> Cc: Will Deacon <[email protected]> Reviewed-by: Masami Hiramatsu (Google) <[email protected]> Reviewed-by: Steven Rostedt (Google) <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Will Deacon <[email protected]>
1 parent 94d095f commit 26299b3

File tree

7 files changed

+184
-123
lines changed

7 files changed

+184
-123
lines changed

arch/arm64/Kconfig

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -181,8 +181,10 @@ config ARM64
181181
select HAVE_DEBUG_KMEMLEAK
182182
select HAVE_DMA_CONTIGUOUS
183183
select HAVE_DYNAMIC_FTRACE
184+
select HAVE_DYNAMIC_FTRACE_WITH_ARGS \
185+
if $(cc-option,-fpatchable-function-entry=2)
184186
select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY \
185-
if DYNAMIC_FTRACE_WITH_REGS
187+
if DYNAMIC_FTRACE_WITH_ARGS
186188
select HAVE_EFFICIENT_UNALIGNED_ACCESS
187189
select HAVE_FAST_GUP
188190
select HAVE_FTRACE_MCOUNT_RECORD
@@ -233,16 +235,16 @@ config ARM64
233235
help
234236
ARM 64-bit (AArch64) Linux support.
235237

236-
config CLANG_SUPPORTS_DYNAMIC_FTRACE_WITH_REGS
238+
config CLANG_SUPPORTS_DYNAMIC_FTRACE_WITH_ARGS
237239
def_bool CC_IS_CLANG
238240
# https://github.com/ClangBuiltLinux/linux/issues/1507
239241
depends on AS_IS_GNU || (AS_IS_LLVM && (LD_IS_LLD || LD_VERSION >= 23600))
240-
select HAVE_DYNAMIC_FTRACE_WITH_REGS
242+
select HAVE_DYNAMIC_FTRACE_WITH_ARGS
241243

242-
config GCC_SUPPORTS_DYNAMIC_FTRACE_WITH_REGS
244+
config GCC_SUPPORTS_DYNAMIC_FTRACE_WITH_ARGS
243245
def_bool CC_IS_GCC
244246
depends on $(cc-option,-fpatchable-function-entry=2)
245-
select HAVE_DYNAMIC_FTRACE_WITH_REGS
247+
select HAVE_DYNAMIC_FTRACE_WITH_ARGS
246248

247249
config 64BIT
248250
def_bool y
@@ -1816,7 +1818,7 @@ config ARM64_PTR_AUTH_KERNEL
18161818
# which is only understood by binutils starting with version 2.33.1.
18171819
depends on LD_IS_LLD || LD_VERSION >= 23301 || (CC_IS_GCC && GCC_VERSION < 90100)
18181820
depends on !CC_IS_CLANG || AS_HAS_CFI_NEGATE_RA_STATE
1819-
depends on (!FUNCTION_GRAPH_TRACER || DYNAMIC_FTRACE_WITH_REGS)
1821+
depends on (!FUNCTION_GRAPH_TRACER || DYNAMIC_FTRACE_WITH_ARGS)
18201822
help
18211823
If the compiler supports the -mbranch-protection or
18221824
-msign-return-address flag (e.g. GCC 7 or later), then this option
@@ -1826,7 +1828,7 @@ config ARM64_PTR_AUTH_KERNEL
18261828
disabled with minimal loss of protection.
18271829

18281830
This feature works with FUNCTION_GRAPH_TRACER option only if
1829-
DYNAMIC_FTRACE_WITH_REGS is enabled.
1831+
DYNAMIC_FTRACE_WITH_ARGS is enabled.
18301832

18311833
config CC_HAS_BRANCH_PROT_PAC_RET
18321834
# GCC 9 or later, clang 8 or later
@@ -1924,7 +1926,7 @@ config ARM64_BTI_KERNEL
19241926
depends on !CC_IS_GCC
19251927
# https://github.com/llvm/llvm-project/commit/a88c722e687e6780dcd6a58718350dc76fcc4cc9
19261928
depends on !CC_IS_CLANG || CLANG_VERSION >= 120000
1927-
depends on (!FUNCTION_GRAPH_TRACER || DYNAMIC_FTRACE_WITH_REGS)
1929+
depends on (!FUNCTION_GRAPH_TRACER || DYNAMIC_FTRACE_WITH_ARGS)
19281930
help
19291931
Build the kernel with Branch Target Identification annotations
19301932
and enable enforcement of this for kernel code. When this option

arch/arm64/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ endif
128128

129129
CHECKFLAGS += -D__aarch64__
130130

131-
ifeq ($(CONFIG_DYNAMIC_FTRACE_WITH_REGS),y)
131+
ifeq ($(CONFIG_DYNAMIC_FTRACE_WITH_ARGS),y)
132132
KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
133133
CC_FLAGS_FTRACE := -fpatchable-function-entry=2
134134
endif

arch/arm64/include/asm/ftrace.h

Lines changed: 66 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
*/
2424
#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
2525

26-
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
26+
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
2727
#define ARCH_SUPPORTS_FTRACE_OPS 1
2828
#else
2929
#define MCOUNT_ADDR ((unsigned long)_mcount)
@@ -33,8 +33,7 @@
3333
#define MCOUNT_INSN_SIZE AARCH64_INSN_SIZE
3434

3535
#define FTRACE_PLT_IDX 0
36-
#define FTRACE_REGS_PLT_IDX 1
37-
#define NR_FTRACE_PLTS 2
36+
#define NR_FTRACE_PLTS 1
3837

3938
/*
4039
* Currently, gcc tends to save the link register after the local variables
@@ -69,7 +68,7 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
6968
* Adjust addr to point at the BL in the callsite.
7069
* See ftrace_init_nop() for the callsite sequence.
7170
*/
72-
if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS))
71+
if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_ARGS))
7372
return addr + AARCH64_INSN_SIZE;
7473
/*
7574
* addr is the address of the mcount call instruction.
@@ -78,10 +77,71 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr)
7877
return addr;
7978
}
8079

81-
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
80+
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
8281
struct dyn_ftrace;
8382
struct ftrace_ops;
84-
struct ftrace_regs;
83+
84+
#define arch_ftrace_get_regs(regs) NULL
85+
86+
struct ftrace_regs {
87+
/* x0 - x8 */
88+
unsigned long regs[9];
89+
unsigned long __unused;
90+
91+
unsigned long fp;
92+
unsigned long lr;
93+
94+
unsigned long sp;
95+
unsigned long pc;
96+
};
97+
98+
static __always_inline unsigned long
99+
ftrace_regs_get_instruction_pointer(const struct ftrace_regs *fregs)
100+
{
101+
return fregs->pc;
102+
}
103+
104+
static __always_inline void
105+
ftrace_regs_set_instruction_pointer(struct ftrace_regs *fregs,
106+
unsigned long pc)
107+
{
108+
fregs->pc = pc;
109+
}
110+
111+
static __always_inline unsigned long
112+
ftrace_regs_get_stack_pointer(const struct ftrace_regs *fregs)
113+
{
114+
return fregs->sp;
115+
}
116+
117+
static __always_inline unsigned long
118+
ftrace_regs_get_argument(struct ftrace_regs *fregs, unsigned int n)
119+
{
120+
if (n < 8)
121+
return fregs->regs[n];
122+
return 0;
123+
}
124+
125+
static __always_inline unsigned long
126+
ftrace_regs_get_return_value(const struct ftrace_regs *fregs)
127+
{
128+
return fregs->regs[0];
129+
}
130+
131+
static __always_inline void
132+
ftrace_regs_set_return_value(struct ftrace_regs *fregs,
133+
unsigned long ret)
134+
{
135+
fregs->regs[0] = ret;
136+
}
137+
138+
static __always_inline void
139+
ftrace_override_function_with_return(struct ftrace_regs *fregs)
140+
{
141+
fregs->pc = fregs->lr;
142+
}
143+
144+
int ftrace_regs_query_register_offset(const char *name);
85145

86146
int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec);
87147
#define ftrace_init_nop ftrace_init_nop

arch/arm64/kernel/asm-offsets.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,19 @@ int main(void)
8282
DEFINE(S_STACKFRAME, offsetof(struct pt_regs, stackframe));
8383
DEFINE(PT_REGS_SIZE, sizeof(struct pt_regs));
8484
BLANK();
85+
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
86+
DEFINE(FREGS_X0, offsetof(struct ftrace_regs, regs[0]));
87+
DEFINE(FREGS_X2, offsetof(struct ftrace_regs, regs[2]));
88+
DEFINE(FREGS_X4, offsetof(struct ftrace_regs, regs[4]));
89+
DEFINE(FREGS_X6, offsetof(struct ftrace_regs, regs[6]));
90+
DEFINE(FREGS_X8, offsetof(struct ftrace_regs, regs[8]));
91+
DEFINE(FREGS_FP, offsetof(struct ftrace_regs, fp));
92+
DEFINE(FREGS_LR, offsetof(struct ftrace_regs, lr));
93+
DEFINE(FREGS_SP, offsetof(struct ftrace_regs, sp));
94+
DEFINE(FREGS_PC, offsetof(struct ftrace_regs, pc));
95+
DEFINE(FREGS_SIZE, sizeof(struct ftrace_regs));
96+
BLANK();
97+
#endif
8598
#ifdef CONFIG_COMPAT
8699
DEFINE(COMPAT_SIGFRAME_REGS_OFFSET, offsetof(struct compat_sigframe, uc.uc_mcontext.arm_r0));
87100
DEFINE(COMPAT_RT_SIGFRAME_REGS_OFFSET, offsetof(struct compat_rt_sigframe, sig.uc.uc_mcontext.arm_r0));

arch/arm64/kernel/entry-ftrace.S

Lines changed: 46 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -13,83 +13,58 @@
1313
#include <asm/ftrace.h>
1414
#include <asm/insn.h>
1515

16-
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
16+
#ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
1717
/*
1818
* Due to -fpatchable-function-entry=2, the compiler has placed two NOPs before
1919
* the regular function prologue. For an enabled callsite, ftrace_init_nop() and
2020
* ftrace_make_call() have patched those NOPs to:
2121
*
2222
* MOV X9, LR
23-
* BL <entry>
24-
*
25-
* ... where <entry> is either ftrace_caller or ftrace_regs_caller.
23+
* BL ftrace_caller
2624
*
2725
* Each instrumented function follows the AAPCS, so here x0-x8 and x18-x30 are
2826
* live (x18 holds the Shadow Call Stack pointer), and x9-x17 are safe to
2927
* clobber.
3028
*
31-
* We save the callsite's context into a pt_regs before invoking any ftrace
32-
* callbacks. So that we can get a sensible backtrace, we create a stack record
33-
* for the callsite and the ftrace entry assembly. This is not sufficient for
34-
* reliable stacktrace: until we create the callsite stack record, its caller
35-
* is missing from the LR and existing chain of frame records.
29+
* We save the callsite's context into a struct ftrace_regs before invoking any
30+
* ftrace callbacks. So that we can get a sensible backtrace, we create frame
31+
* records for the callsite and the ftrace entry assembly. This is not
32+
* sufficient for reliable stacktrace: until we create the callsite stack
33+
* record, its caller is missing from the LR and existing chain of frame
34+
* records.
3635
*/
37-
.macro ftrace_regs_entry, allregs=0
38-
/* Make room for pt_regs, plus a callee frame */
39-
sub sp, sp, #(PT_REGS_SIZE + 16)
40-
41-
/* Save function arguments (and x9 for simplicity) */
42-
stp x0, x1, [sp, #S_X0]
43-
stp x2, x3, [sp, #S_X2]
44-
stp x4, x5, [sp, #S_X4]
45-
stp x6, x7, [sp, #S_X6]
46-
stp x8, x9, [sp, #S_X8]
47-
48-
/* Optionally save the callee-saved registers, always save the FP */
49-
.if \allregs == 1
50-
stp x10, x11, [sp, #S_X10]
51-
stp x12, x13, [sp, #S_X12]
52-
stp x14, x15, [sp, #S_X14]
53-
stp x16, x17, [sp, #S_X16]
54-
stp x18, x19, [sp, #S_X18]
55-
stp x20, x21, [sp, #S_X20]
56-
stp x22, x23, [sp, #S_X22]
57-
stp x24, x25, [sp, #S_X24]
58-
stp x26, x27, [sp, #S_X26]
59-
stp x28, x29, [sp, #S_X28]
60-
.else
61-
str x29, [sp, #S_FP]
62-
.endif
63-
64-
/* Save the callsite's SP and LR */
65-
add x10, sp, #(PT_REGS_SIZE + 16)
66-
stp x9, x10, [sp, #S_LR]
36+
SYM_CODE_START(ftrace_caller)
37+
bti c
6738

68-
/* Save the PC after the ftrace callsite */
69-
str x30, [sp, #S_PC]
39+
/* Save original SP */
40+
mov x10, sp
7041

71-
/* Create a frame record for the callsite above pt_regs */
72-
stp x29, x9, [sp, #PT_REGS_SIZE]
73-
add x29, sp, #PT_REGS_SIZE
42+
/* Make room for ftrace regs, plus two frame records */
43+
sub sp, sp, #(FREGS_SIZE + 32)
7444

75-
/* Create our frame record within pt_regs. */
76-
stp x29, x30, [sp, #S_STACKFRAME]
77-
add x29, sp, #S_STACKFRAME
78-
.endm
45+
/* Save function arguments */
46+
stp x0, x1, [sp, #FREGS_X0]
47+
stp x2, x3, [sp, #FREGS_X2]
48+
stp x4, x5, [sp, #FREGS_X4]
49+
stp x6, x7, [sp, #FREGS_X6]
50+
str x8, [sp, #FREGS_X8]
7951

80-
SYM_CODE_START(ftrace_regs_caller)
81-
bti c
82-
ftrace_regs_entry 1
83-
b ftrace_common
84-
SYM_CODE_END(ftrace_regs_caller)
52+
/* Save the callsite's FP, LR, SP */
53+
str x29, [sp, #FREGS_FP]
54+
str x9, [sp, #FREGS_LR]
55+
str x10, [sp, #FREGS_SP]
8556

86-
SYM_CODE_START(ftrace_caller)
87-
bti c
88-
ftrace_regs_entry 0
89-
b ftrace_common
90-
SYM_CODE_END(ftrace_caller)
57+
/* Save the PC after the ftrace callsite */
58+
str x30, [sp, #FREGS_PC]
59+
60+
/* Create a frame record for the callsite above the ftrace regs */
61+
stp x29, x9, [sp, #FREGS_SIZE + 16]
62+
add x29, sp, #FREGS_SIZE + 16
63+
64+
/* Create our frame record above the ftrace regs */
65+
stp x29, x30, [sp, #FREGS_SIZE]
66+
add x29, sp, #FREGS_SIZE
9167

92-
SYM_CODE_START(ftrace_common)
9368
sub x0, x30, #AARCH64_INSN_SIZE // ip (callsite's BL insn)
9469
mov x1, x9 // parent_ip (callsite's LR)
9570
ldr_l x2, function_trace_op // op
@@ -104,24 +79,24 @@ SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL)
10479
* to restore x0-x8, x29, and x30.
10580
*/
10681
/* Restore function arguments */
107-
ldp x0, x1, [sp]
108-
ldp x2, x3, [sp, #S_X2]
109-
ldp x4, x5, [sp, #S_X4]
110-
ldp x6, x7, [sp, #S_X6]
111-
ldr x8, [sp, #S_X8]
82+
ldp x0, x1, [sp, #FREGS_X0]
83+
ldp x2, x3, [sp, #FREGS_X2]
84+
ldp x4, x5, [sp, #FREGS_X4]
85+
ldp x6, x7, [sp, #FREGS_X6]
86+
ldr x8, [sp, #FREGS_X8]
11287

11388
/* Restore the callsite's FP, LR, PC */
114-
ldr x29, [sp, #S_FP]
115-
ldr x30, [sp, #S_LR]
116-
ldr x9, [sp, #S_PC]
89+
ldr x29, [sp, #FREGS_FP]
90+
ldr x30, [sp, #FREGS_LR]
91+
ldr x9, [sp, #FREGS_PC]
11792

11893
/* Restore the callsite's SP */
119-
add sp, sp, #PT_REGS_SIZE + 16
94+
add sp, sp, #FREGS_SIZE + 32
12095

12196
ret x9
122-
SYM_CODE_END(ftrace_common)
97+
SYM_CODE_END(ftrace_caller)
12398

124-
#else /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
99+
#else /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */
125100

126101
/*
127102
* Gcc with -pg will put the following code in the beginning of each function:
@@ -293,7 +268,7 @@ SYM_FUNC_START(ftrace_graph_caller)
293268
mcount_exit
294269
SYM_FUNC_END(ftrace_graph_caller)
295270
#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
296-
#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
271+
#endif /* CONFIG_DYNAMIC_FTRACE_WITH_ARGS */
297272

298273
SYM_TYPED_FUNC_START(ftrace_stub)
299274
ret

0 commit comments

Comments
 (0)