Skip to content

Commit 0982c8d

Browse files
author
Marc Zyngier
committed
Merge branch kvm-arm64/nvhe-stacktrace into kvmarm-master/next
* kvm-arm64/nvhe-stacktrace: (27 commits) : . : Add an overflow stack to the nVHE EL2 code, allowing : the implementation of an unwinder, courtesy of : Kalesh Singh. From the cover letter (slightly edited): : : "nVHE has two modes of operation: protected (pKVM) and unprotected : (conventional nVHE). Depending on the mode, a slightly different approach : is used to dump the hypervisor stacktrace but the core unwinding logic : remains the same. : : * Protected nVHE (pKVM) stacktraces: : : In protected nVHE mode, the host cannot directly access hypervisor memory. : : The hypervisor stack unwinding happens in EL2 and is made accessible to : the host via a shared buffer. Symbolizing and printing the stacktrace : addresses is delegated to the host and happens in EL1. : : * Non-protected (Conventional) nVHE stacktraces: : : In non-protected mode, the host is able to directly access the hypervisor : stack pages. : : The hypervisor stack unwinding and dumping of the stacktrace is performed : by the host in EL1, as this avoids the memory overhead of setting up : shared buffers between the host and hypervisor." : : Additional patches from Oliver Upton and Marc Zyngier, tidying up : the initial series. : . arm64: Update 'unwinder howto' KVM: arm64: Don't open code ARRAY_SIZE() KVM: arm64: Move nVHE-only helpers into kvm/stacktrace.c KVM: arm64: Make unwind()/on_accessible_stack() per-unwinder functions KVM: arm64: Move nVHE stacktrace unwinding into its own compilation unit KVM: arm64: Move PROTECTED_NVHE_STACKTRACE around KVM: arm64: Introduce pkvm_dump_backtrace() KVM: arm64: Implement protected nVHE hyp stack unwinder KVM: arm64: Save protected-nVHE (pKVM) hyp stacktrace KVM: arm64: Stub implementation of pKVM HYP stack unwinder KVM: arm64: Allocate shared pKVM hyp stacktrace buffers KVM: arm64: Add PROTECTED_NVHE_STACKTRACE Kconfig KVM: arm64: Introduce hyp_dump_backtrace() KVM: arm64: Implement non-protected nVHE hyp stack unwinder KVM: arm64: Prepare non-protected nVHE hypervisor stacktrace KVM: arm64: Stub implementation of non-protected nVHE HYP stack unwinder KVM: arm64: On stack overflow switch to hyp overflow_stack arm64: stacktrace: Add description of stacktrace/common.h arm64: stacktrace: Factor out common unwind() arm64: stacktrace: Handle frame pointer from different address spaces ... Signed-off-by: Marc Zyngier <[email protected]>
2 parents ae98a4a + a4c750e commit 0982c8d

File tree

16 files changed

+775
-170
lines changed

16 files changed

+775
-170
lines changed

arch/arm64/include/asm/kvm_asm.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,22 @@ struct kvm_nvhe_init_params {
176176
unsigned long vtcr;
177177
};
178178

179+
/*
180+
* Used by the host in EL1 to dump the nVHE hypervisor backtrace on
181+
* hyp_panic() in non-protected mode.
182+
*
183+
* @stack_base: hyp VA of the hyp_stack base.
184+
* @overflow_stack_base: hyp VA of the hyp_overflow_stack base.
185+
* @fp: hyp FP where the backtrace begins.
186+
* @pc: hyp PC where the backtrace begins.
187+
*/
188+
struct kvm_nvhe_stacktrace_info {
189+
unsigned long stack_base;
190+
unsigned long overflow_stack_base;
191+
unsigned long fp;
192+
unsigned long pc;
193+
};
194+
179195
/* Translate a kernel address @ptr into its equivalent linear mapping */
180196
#define kvm_ksym_ref(ptr) \
181197
({ \

arch/arm64/include/asm/memory.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,14 @@
113113

114114
#define OVERFLOW_STACK_SIZE SZ_4K
115115

116+
/*
117+
* With the minimum frame size of [x29, x30], exactly half the combined
118+
* sizes of the hyp and overflow stacks is the maximum size needed to
119+
* save the unwinded stacktrace; plus an additional entry to delimit the
120+
* end.
121+
*/
122+
#define NVHE_STACKTRACE_SIZE ((OVERFLOW_STACK_SIZE + PAGE_SIZE) / 2 + sizeof(long))
123+
116124
/*
117125
* Alignment of kernel segments (e.g. .text, .data).
118126
*

arch/arm64/include/asm/stacktrace.h

Lines changed: 2 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -8,52 +8,20 @@
88
#include <linux/percpu.h>
99
#include <linux/sched.h>
1010
#include <linux/sched/task_stack.h>
11-
#include <linux/types.h>
1211
#include <linux/llist.h>
1312

1413
#include <asm/memory.h>
14+
#include <asm/pointer_auth.h>
1515
#include <asm/ptrace.h>
1616
#include <asm/sdei.h>
1717

18-
enum stack_type {
19-
STACK_TYPE_UNKNOWN,
20-
STACK_TYPE_TASK,
21-
STACK_TYPE_IRQ,
22-
STACK_TYPE_OVERFLOW,
23-
STACK_TYPE_SDEI_NORMAL,
24-
STACK_TYPE_SDEI_CRITICAL,
25-
__NR_STACK_TYPES
26-
};
27-
28-
struct stack_info {
29-
unsigned long low;
30-
unsigned long high;
31-
enum stack_type type;
32-
};
18+
#include <asm/stacktrace/common.h>
3319

3420
extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk,
3521
const char *loglvl);
3622

3723
DECLARE_PER_CPU(unsigned long *, irq_stack_ptr);
3824

39-
static inline bool on_stack(unsigned long sp, unsigned long size,
40-
unsigned long low, unsigned long high,
41-
enum stack_type type, struct stack_info *info)
42-
{
43-
if (!low)
44-
return false;
45-
46-
if (sp < low || sp + size < sp || sp + size > high)
47-
return false;
48-
49-
if (info) {
50-
info->low = low;
51-
info->high = high;
52-
info->type = type;
53-
}
54-
return true;
55-
}
56-
5725
static inline bool on_irq_stack(unsigned long sp, unsigned long size,
5826
struct stack_info *info)
5927
{
@@ -89,30 +57,4 @@ static inline bool on_overflow_stack(unsigned long sp, unsigned long size,
8957
struct stack_info *info) { return false; }
9058
#endif
9159

92-
93-
/*
94-
* We can only safely access per-cpu stacks from current in a non-preemptible
95-
* context.
96-
*/
97-
static inline bool on_accessible_stack(const struct task_struct *tsk,
98-
unsigned long sp, unsigned long size,
99-
struct stack_info *info)
100-
{
101-
if (info)
102-
info->type = STACK_TYPE_UNKNOWN;
103-
104-
if (on_task_stack(tsk, sp, size, info))
105-
return true;
106-
if (tsk != current || preemptible())
107-
return false;
108-
if (on_irq_stack(sp, size, info))
109-
return true;
110-
if (on_overflow_stack(sp, size, info))
111-
return true;
112-
if (on_sdei_stack(sp, size, info))
113-
return true;
114-
115-
return false;
116-
}
117-
11860
#endif /* __ASM_STACKTRACE_H */
Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
/* SPDX-License-Identifier: GPL-2.0-only */
2+
/*
3+
* Common arm64 stack unwinder code.
4+
*
5+
* To implement a new arm64 stack unwinder:
6+
* 1) Include this header
7+
*
8+
* 2) Call into unwind_next_common() from your top level unwind
9+
* function, passing it the validation and translation callbacks
10+
* (though the later can be NULL if no translation is required).
11+
*
12+
* See: arch/arm64/kernel/stacktrace.c for the reference implementation.
13+
*
14+
* Copyright (C) 2012 ARM Ltd.
15+
*/
16+
#ifndef __ASM_STACKTRACE_COMMON_H
17+
#define __ASM_STACKTRACE_COMMON_H
18+
19+
#include <linux/bitmap.h>
20+
#include <linux/bitops.h>
21+
#include <linux/kprobes.h>
22+
#include <linux/types.h>
23+
24+
enum stack_type {
25+
STACK_TYPE_UNKNOWN,
26+
STACK_TYPE_TASK,
27+
STACK_TYPE_IRQ,
28+
STACK_TYPE_OVERFLOW,
29+
STACK_TYPE_SDEI_NORMAL,
30+
STACK_TYPE_SDEI_CRITICAL,
31+
STACK_TYPE_HYP,
32+
__NR_STACK_TYPES
33+
};
34+
35+
struct stack_info {
36+
unsigned long low;
37+
unsigned long high;
38+
enum stack_type type;
39+
};
40+
41+
/*
42+
* A snapshot of a frame record or fp/lr register values, along with some
43+
* accounting information necessary for robust unwinding.
44+
*
45+
* @fp: The fp value in the frame record (or the real fp)
46+
* @pc: The lr value in the frame record (or the real lr)
47+
*
48+
* @stacks_done: Stacks which have been entirely unwound, for which it is no
49+
* longer valid to unwind to.
50+
*
51+
* @prev_fp: The fp that pointed to this frame record, or a synthetic value
52+
* of 0. This is used to ensure that within a stack, each
53+
* subsequent frame record is at an increasing address.
54+
* @prev_type: The type of stack this frame record was on, or a synthetic
55+
* value of STACK_TYPE_UNKNOWN. This is used to detect a
56+
* transition from one stack to another.
57+
*
58+
* @kr_cur: When KRETPROBES is selected, holds the kretprobe instance
59+
* associated with the most recently encountered replacement lr
60+
* value.
61+
*
62+
* @task: The task being unwound.
63+
*/
64+
struct unwind_state {
65+
unsigned long fp;
66+
unsigned long pc;
67+
DECLARE_BITMAP(stacks_done, __NR_STACK_TYPES);
68+
unsigned long prev_fp;
69+
enum stack_type prev_type;
70+
#ifdef CONFIG_KRETPROBES
71+
struct llist_node *kr_cur;
72+
#endif
73+
struct task_struct *task;
74+
};
75+
76+
static inline bool on_stack(unsigned long sp, unsigned long size,
77+
unsigned long low, unsigned long high,
78+
enum stack_type type, struct stack_info *info)
79+
{
80+
if (!low)
81+
return false;
82+
83+
if (sp < low || sp + size < sp || sp + size > high)
84+
return false;
85+
86+
if (info) {
87+
info->low = low;
88+
info->high = high;
89+
info->type = type;
90+
}
91+
return true;
92+
}
93+
94+
static inline void unwind_init_common(struct unwind_state *state,
95+
struct task_struct *task)
96+
{
97+
state->task = task;
98+
#ifdef CONFIG_KRETPROBES
99+
state->kr_cur = NULL;
100+
#endif
101+
102+
/*
103+
* Prime the first unwind.
104+
*
105+
* In unwind_next() we'll check that the FP points to a valid stack,
106+
* which can't be STACK_TYPE_UNKNOWN, and the first unwind will be
107+
* treated as a transition to whichever stack that happens to be. The
108+
* prev_fp value won't be used, but we set it to 0 such that it is
109+
* definitely not an accessible stack address.
110+
*/
111+
bitmap_zero(state->stacks_done, __NR_STACK_TYPES);
112+
state->prev_fp = 0;
113+
state->prev_type = STACK_TYPE_UNKNOWN;
114+
}
115+
116+
/*
117+
* stack_trace_translate_fp_fn() - Translates a non-kernel frame pointer to
118+
* a kernel address.
119+
*
120+
* @fp: the frame pointer to be updated to its kernel address.
121+
* @type: the stack type associated with frame pointer @fp
122+
*
123+
* Returns true and success and @fp is updated to the corresponding
124+
* kernel virtual address; otherwise returns false.
125+
*/
126+
typedef bool (*stack_trace_translate_fp_fn)(unsigned long *fp,
127+
enum stack_type type);
128+
129+
/*
130+
* on_accessible_stack_fn() - Check whether a stack range is on any
131+
* of the possible stacks.
132+
*
133+
* @tsk: task whose stack is being unwound
134+
* @sp: stack address being checked
135+
* @size: size of the stack range being checked
136+
* @info: stack unwinding context
137+
*/
138+
typedef bool (*on_accessible_stack_fn)(const struct task_struct *tsk,
139+
unsigned long sp, unsigned long size,
140+
struct stack_info *info);
141+
142+
static inline int unwind_next_common(struct unwind_state *state,
143+
struct stack_info *info,
144+
on_accessible_stack_fn accessible,
145+
stack_trace_translate_fp_fn translate_fp)
146+
{
147+
unsigned long fp = state->fp, kern_fp = fp;
148+
struct task_struct *tsk = state->task;
149+
150+
if (fp & 0x7)
151+
return -EINVAL;
152+
153+
if (!accessible(tsk, fp, 16, info))
154+
return -EINVAL;
155+
156+
if (test_bit(info->type, state->stacks_done))
157+
return -EINVAL;
158+
159+
/*
160+
* If fp is not from the current address space perform the necessary
161+
* translation before dereferencing it to get the next fp.
162+
*/
163+
if (translate_fp && !translate_fp(&kern_fp, info->type))
164+
return -EINVAL;
165+
166+
/*
167+
* As stacks grow downward, any valid record on the same stack must be
168+
* at a strictly higher address than the prior record.
169+
*
170+
* Stacks can nest in several valid orders, e.g.
171+
*
172+
* TASK -> IRQ -> OVERFLOW -> SDEI_NORMAL
173+
* TASK -> SDEI_NORMAL -> SDEI_CRITICAL -> OVERFLOW
174+
* HYP -> OVERFLOW
175+
*
176+
* ... but the nesting itself is strict. Once we transition from one
177+
* stack to another, it's never valid to unwind back to that first
178+
* stack.
179+
*/
180+
if (info->type == state->prev_type) {
181+
if (fp <= state->prev_fp)
182+
return -EINVAL;
183+
} else {
184+
__set_bit(state->prev_type, state->stacks_done);
185+
}
186+
187+
/*
188+
* Record this frame record's values and location. The prev_fp and
189+
* prev_type are only meaningful to the next unwind_next() invocation.
190+
*/
191+
state->fp = READ_ONCE(*(unsigned long *)(kern_fp));
192+
state->pc = READ_ONCE(*(unsigned long *)(kern_fp + 8));
193+
state->prev_fp = fp;
194+
state->prev_type = info->type;
195+
196+
return 0;
197+
}
198+
199+
#endif /* __ASM_STACKTRACE_COMMON_H */
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
/* SPDX-License-Identifier: GPL-2.0-only */
2+
/*
3+
* KVM nVHE hypervisor stack tracing support.
4+
*
5+
* The unwinder implementation depends on the nVHE mode:
6+
*
7+
* 1) Non-protected nVHE mode - the host can directly access the
8+
* HYP stack pages and unwind the HYP stack in EL1. This saves having
9+
* to allocate shared buffers for the host to read the unwinded
10+
* stacktrace.
11+
*
12+
* 2) pKVM (protected nVHE) mode - the host cannot directly access
13+
* the HYP memory. The stack is unwinded in EL2 and dumped to a shared
14+
* buffer where the host can read and print the stacktrace.
15+
*
16+
* Copyright (C) 2022 Google LLC
17+
*/
18+
#ifndef __ASM_STACKTRACE_NVHE_H
19+
#define __ASM_STACKTRACE_NVHE_H
20+
21+
#include <asm/stacktrace/common.h>
22+
23+
/*
24+
* kvm_nvhe_unwind_init - Start an unwind from the given nVHE HYP fp and pc
25+
*
26+
* @state : unwind_state to initialize
27+
* @fp : frame pointer at which to start the unwinding.
28+
* @pc : program counter at which to start the unwinding.
29+
*/
30+
static inline void kvm_nvhe_unwind_init(struct unwind_state *state,
31+
unsigned long fp,
32+
unsigned long pc)
33+
{
34+
unwind_init_common(state, NULL);
35+
36+
state->fp = fp;
37+
state->pc = pc;
38+
}
39+
40+
#ifndef __KVM_NVHE_HYPERVISOR__
41+
/*
42+
* Conventional (non-protected) nVHE HYP stack unwinder
43+
*
44+
* In non-protected mode, the unwinding is done from kernel proper context
45+
* (by the host in EL1).
46+
*/
47+
48+
DECLARE_KVM_NVHE_PER_CPU(unsigned long [OVERFLOW_STACK_SIZE/sizeof(long)], overflow_stack);
49+
DECLARE_KVM_NVHE_PER_CPU(struct kvm_nvhe_stacktrace_info, kvm_stacktrace_info);
50+
DECLARE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page);
51+
52+
void kvm_nvhe_dump_backtrace(unsigned long hyp_offset);
53+
54+
#endif /* __KVM_NVHE_HYPERVISOR__ */
55+
#endif /* __ASM_STACKTRACE_NVHE_H */

arch/arm64/kernel/Makefile

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,11 @@ CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE)
1414
CFLAGS_REMOVE_syscall.o = -fstack-protector -fstack-protector-strong
1515
CFLAGS_syscall.o += -fno-stack-protector
1616

17+
# When KASAN is enabled, a stack trace is recorded for every alloc/free, which
18+
# can significantly impact performance. Avoid instrumenting the stack trace
19+
# collection code to minimize this impact.
20+
KASAN_SANITIZE_stacktrace.o := n
21+
1722
# It's not safe to invoke KCOV when portions of the kernel environment aren't
1823
# available or are out-of-sync with HW state. Since `noinstr` doesn't always
1924
# inhibit KCOV instrumentation, disable it for the entire compilation unit.

0 commit comments

Comments
 (0)