Skip to content

Commit b136021

Browse files
committed
Merge tag 'x86-fred-2024-09-17' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 FRED updates from Thomas Gleixner: - Enable FRED right after init_mem_mapping() because at that point the early IDT fault handler is replaced by the real fault handler. The real fault handler retrieves the faulting address from the stack frame and not from CR2 when the FRED feature is set. But that obviously only works when FRED is enabled in the CPU as well. - Set SS to __KERNEL_DS when enabling FRED to prevent a corner case where ERETS can observe a SS mismatch and raises a #GP. * tag 'x86-fred-2024-09-17' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/entry: Set FRED RSP0 on return to userspace instead of context switch x86/msr: Switch between WRMSRNS and WRMSR with the alternatives mechanism x86/entry: Test ti_work for zero before processing individual bits x86/fred: Set SS to __KERNEL_DS when enabling FRED x86/fred: Enable FRED right after init_mem_mapping() x86/fred: Move FRED RSP initialization into a separate function x86/fred: Parse cmdline param "fred=" in cpu_parse_early_param()
2 parents c3056a7 + fe85ee3 commit b136021

File tree

11 files changed

+111
-64
lines changed

11 files changed

+111
-64
lines changed

arch/x86/include/asm/entry-common.h

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include <asm/nospec-branch.h>
99
#include <asm/io_bitmap.h>
1010
#include <asm/fpu/api.h>
11+
#include <asm/fred.h>
1112

1213
/* Check that the stack and regs on entry from user mode are sane. */
1314
static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs)
@@ -44,8 +45,7 @@ static __always_inline void arch_enter_from_user_mode(struct pt_regs *regs)
4445
}
4546
#define arch_enter_from_user_mode arch_enter_from_user_mode
4647

47-
static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
48-
unsigned long ti_work)
48+
static inline void arch_exit_work(unsigned long ti_work)
4949
{
5050
if (ti_work & _TIF_USER_RETURN_NOTIFY)
5151
fire_user_return_notifiers();
@@ -56,6 +56,15 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
5656
fpregs_assert_state_consistent();
5757
if (unlikely(ti_work & _TIF_NEED_FPU_LOAD))
5858
switch_fpu_return();
59+
}
60+
61+
static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
62+
unsigned long ti_work)
63+
{
64+
if (IS_ENABLED(CONFIG_X86_DEBUG_FPU) || unlikely(ti_work))
65+
arch_exit_work(ti_work);
66+
67+
fred_update_rsp0();
5968

6069
#ifdef CONFIG_COMPAT
6170
/*

arch/x86/include/asm/fred.h

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636

3737
#ifdef CONFIG_X86_FRED
3838
#include <linux/kernel.h>
39+
#include <linux/sched/task_stack.h>
3940

4041
#include <asm/ptrace.h>
4142

@@ -84,13 +85,33 @@ static __always_inline void fred_entry_from_kvm(unsigned int type, unsigned int
8485
}
8586

8687
void cpu_init_fred_exceptions(void);
88+
void cpu_init_fred_rsps(void);
8789
void fred_complete_exception_setup(void);
8890

91+
DECLARE_PER_CPU(unsigned long, fred_rsp0);
92+
93+
static __always_inline void fred_sync_rsp0(unsigned long rsp0)
94+
{
95+
__this_cpu_write(fred_rsp0, rsp0);
96+
}
97+
98+
static __always_inline void fred_update_rsp0(void)
99+
{
100+
unsigned long rsp0 = (unsigned long) task_stack_page(current) + THREAD_SIZE;
101+
102+
if (cpu_feature_enabled(X86_FEATURE_FRED) && (__this_cpu_read(fred_rsp0) != rsp0)) {
103+
wrmsrns(MSR_IA32_FRED_RSP0, rsp0);
104+
__this_cpu_write(fred_rsp0, rsp0);
105+
}
106+
}
89107
#else /* CONFIG_X86_FRED */
90108
static __always_inline unsigned long fred_event_data(struct pt_regs *regs) { return 0; }
91109
static inline void cpu_init_fred_exceptions(void) { }
110+
static inline void cpu_init_fred_rsps(void) { }
92111
static inline void fred_complete_exception_setup(void) { }
93-
static __always_inline void fred_entry_from_kvm(unsigned int type, unsigned int vector) { }
112+
static inline void fred_entry_from_kvm(unsigned int type, unsigned int vector) { }
113+
static inline void fred_sync_rsp0(unsigned long rsp0) { }
114+
static inline void fred_update_rsp0(void) { }
94115
#endif /* CONFIG_X86_FRED */
95116
#endif /* !__ASSEMBLY__ */
96117

arch/x86/include/asm/msr.h

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -99,19 +99,6 @@ static __always_inline void __wrmsr(unsigned int msr, u32 low, u32 high)
9999
: : "c" (msr), "a"(low), "d" (high) : "memory");
100100
}
101101

102-
/*
103-
* WRMSRNS behaves exactly like WRMSR with the only difference being
104-
* that it is not a serializing instruction by default.
105-
*/
106-
static __always_inline void __wrmsrns(u32 msr, u32 low, u32 high)
107-
{
108-
/* Instruction opcode for WRMSRNS; supported in binutils >= 2.40. */
109-
asm volatile("1: .byte 0x0f,0x01,0xc6\n"
110-
"2:\n"
111-
_ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR)
112-
: : "c" (msr), "a"(low), "d" (high));
113-
}
114-
115102
#define native_rdmsr(msr, val1, val2) \
116103
do { \
117104
u64 __val = __rdmsr((msr)); \
@@ -312,9 +299,19 @@ do { \
312299

313300
#endif /* !CONFIG_PARAVIRT_XXL */
314301

302+
/* Instruction opcode for WRMSRNS supported in binutils >= 2.40 */
303+
#define WRMSRNS _ASM_BYTES(0x0f,0x01,0xc6)
304+
305+
/* Non-serializing WRMSR, when available. Falls back to a serializing WRMSR. */
315306
static __always_inline void wrmsrns(u32 msr, u64 val)
316307
{
317-
__wrmsrns(msr, val, val >> 32);
308+
/*
309+
* WRMSR is 2 bytes. WRMSRNS is 3 bytes. Pad WRMSR with a redundant
310+
* DS prefix to avoid a trailing NOP.
311+
*/
312+
asm volatile("1: " ALTERNATIVE("ds wrmsr", WRMSRNS, X86_FEATURE_WRMSRNS)
313+
"2: " _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_WRMSR)
314+
: : "c" (msr), "a" ((u32)val), "d" ((u32)(val >> 32)));
318315
}
319316

320317
/*

arch/x86/include/asm/processor.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -582,7 +582,8 @@ extern void switch_gdt_and_percpu_base(int);
582582
extern void load_direct_gdt(int);
583583
extern void load_fixmap_gdt(int);
584584
extern void cpu_init(void);
585-
extern void cpu_init_exception_handling(void);
585+
extern void cpu_init_exception_handling(bool boot_cpu);
586+
extern void cpu_init_replace_early_idt(void);
586587
extern void cr4_init(void);
587588

588589
extern void set_task_blockstep(struct task_struct *task, bool on);

arch/x86/include/asm/switch_to.h

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -70,13 +70,9 @@ static inline void update_task_stack(struct task_struct *task)
7070
#ifdef CONFIG_X86_32
7171
this_cpu_write(cpu_tss_rw.x86_tss.sp1, task->thread.sp0);
7272
#else
73-
if (cpu_feature_enabled(X86_FEATURE_FRED)) {
74-
/* WRMSRNS is a baseline feature for FRED. */
75-
wrmsrns(MSR_IA32_FRED_RSP0, (unsigned long)task_stack_page(task) + THREAD_SIZE);
76-
} else if (cpu_feature_enabled(X86_FEATURE_XENPV)) {
73+
if (!cpu_feature_enabled(X86_FEATURE_FRED) && cpu_feature_enabled(X86_FEATURE_XENPV))
7774
/* Xen PV enters the kernel on the thread stack. */
7875
load_sp0(task_top_of_stack(task));
79-
}
8076
#endif
8177
}
8278

arch/x86/kernel/cpu/common.c

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1510,6 +1510,11 @@ static void __init cpu_parse_early_param(void)
15101510
if (cmdline_find_option_bool(boot_command_line, "nousershstk"))
15111511
setup_clear_cpu_cap(X86_FEATURE_USER_SHSTK);
15121512

1513+
/* Minimize the gap between FRED is available and available but disabled. */
1514+
arglen = cmdline_find_option(boot_command_line, "fred", arg, sizeof(arg));
1515+
if (arglen != 2 || strncmp(arg, "on", 2))
1516+
setup_clear_cpu_cap(X86_FEATURE_FRED);
1517+
15131518
arglen = cmdline_find_option(boot_command_line, "clearcpuid", arg, sizeof(arg));
15141519
if (arglen <= 0)
15151520
return;
@@ -2171,7 +2176,7 @@ static inline void tss_setup_io_bitmap(struct tss_struct *tss)
21712176
* Setup everything needed to handle exceptions from the IDT, including the IST
21722177
* exceptions which use paranoid_entry().
21732178
*/
2174-
void cpu_init_exception_handling(void)
2179+
void cpu_init_exception_handling(bool boot_cpu)
21752180
{
21762181
struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw);
21772182
int cpu = raw_smp_processor_id();
@@ -2190,10 +2195,23 @@ void cpu_init_exception_handling(void)
21902195
/* GHCB needs to be setup to handle #VC. */
21912196
setup_ghcb();
21922197

2198+
if (cpu_feature_enabled(X86_FEATURE_FRED)) {
2199+
/* The boot CPU has enabled FRED during early boot */
2200+
if (!boot_cpu)
2201+
cpu_init_fred_exceptions();
2202+
2203+
cpu_init_fred_rsps();
2204+
} else {
2205+
load_current_idt();
2206+
}
2207+
}
2208+
2209+
void __init cpu_init_replace_early_idt(void)
2210+
{
21932211
if (cpu_feature_enabled(X86_FEATURE_FRED))
21942212
cpu_init_fred_exceptions();
21952213
else
2196-
load_current_idt();
2214+
idt_setup_early_pf();
21972215
}
21982216

21992217
/*

arch/x86/kernel/cpu/cpuid-deps.c

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,6 @@ static const struct cpuid_dep cpuid_deps[] = {
8383
{ X86_FEATURE_AMX_TILE, X86_FEATURE_XFD },
8484
{ X86_FEATURE_SHSTK, X86_FEATURE_XSAVES },
8585
{ X86_FEATURE_FRED, X86_FEATURE_LKGS },
86-
{ X86_FEATURE_FRED, X86_FEATURE_WRMSRNS },
8786
{}
8887
};
8988

arch/x86/kernel/fred.c

Lines changed: 36 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -21,17 +21,53 @@
2121

2222
#define FRED_STKLVL(vector, lvl) ((lvl) << (2 * (vector)))
2323

24+
DEFINE_PER_CPU(unsigned long, fred_rsp0);
25+
EXPORT_PER_CPU_SYMBOL(fred_rsp0);
26+
2427
void cpu_init_fred_exceptions(void)
2528
{
2629
/* When FRED is enabled by default, remove this log message */
2730
pr_info("Initialize FRED on CPU%d\n", smp_processor_id());
2831

32+
/*
33+
* If a kernel event is delivered before a CPU goes to user level for
34+
* the first time, its SS is NULL thus NULL is pushed into the SS field
35+
* of the FRED stack frame. But before ERETS is executed, the CPU may
36+
* context switch to another task and go to user level. Then when the
37+
* CPU comes back to kernel mode, SS is changed to __KERNEL_DS. Later
38+
* when ERETS is executed to return from the kernel event handler, a #GP
39+
* fault is generated because SS doesn't match the SS saved in the FRED
40+
* stack frame.
41+
*
42+
* Initialize SS to __KERNEL_DS when enabling FRED to avoid such #GPs.
43+
*/
44+
loadsegment(ss, __KERNEL_DS);
45+
2946
wrmsrl(MSR_IA32_FRED_CONFIG,
3047
/* Reserve for CALL emulation */
3148
FRED_CONFIG_REDZONE |
3249
FRED_CONFIG_INT_STKLVL(0) |
3350
FRED_CONFIG_ENTRYPOINT(asm_fred_entrypoint_user));
3451

52+
wrmsrl(MSR_IA32_FRED_STKLVLS, 0);
53+
wrmsrl(MSR_IA32_FRED_RSP0, 0);
54+
wrmsrl(MSR_IA32_FRED_RSP1, 0);
55+
wrmsrl(MSR_IA32_FRED_RSP2, 0);
56+
wrmsrl(MSR_IA32_FRED_RSP3, 0);
57+
58+
/* Enable FRED */
59+
cr4_set_bits(X86_CR4_FRED);
60+
/* Any further IDT use is a bug */
61+
idt_invalidate();
62+
63+
/* Use int $0x80 for 32-bit system calls in FRED mode */
64+
setup_clear_cpu_cap(X86_FEATURE_SYSENTER32);
65+
setup_clear_cpu_cap(X86_FEATURE_SYSCALL32);
66+
}
67+
68+
/* Must be called after setup_cpu_entry_areas() */
69+
void cpu_init_fred_rsps(void)
70+
{
3571
/*
3672
* The purpose of separate stacks for NMI, #DB and #MC *in the kernel*
3773
* (remember that user space faults are always taken on stack level 0)
@@ -47,13 +83,4 @@ void cpu_init_fred_exceptions(void)
4783
wrmsrl(MSR_IA32_FRED_RSP1, __this_cpu_ist_top_va(DB));
4884
wrmsrl(MSR_IA32_FRED_RSP2, __this_cpu_ist_top_va(NMI));
4985
wrmsrl(MSR_IA32_FRED_RSP3, __this_cpu_ist_top_va(DF));
50-
51-
/* Enable FRED */
52-
cr4_set_bits(X86_CR4_FRED);
53-
/* Any further IDT use is a bug */
54-
idt_invalidate();
55-
56-
/* Use int $0x80 for 32-bit system calls in FRED mode */
57-
setup_clear_cpu_cap(X86_FEATURE_SYSENTER32);
58-
setup_clear_cpu_cap(X86_FEATURE_SYSCALL32);
5986
}

arch/x86/kernel/setup.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1039,7 +1039,12 @@ void __init setup_arch(char **cmdline_p)
10391039

10401040
init_mem_mapping();
10411041

1042-
idt_setup_early_pf();
1042+
/*
1043+
* init_mem_mapping() relies on the early IDT page fault handling.
1044+
* Now either enable FRED or install the real page fault handler
1045+
* for 64-bit in the IDT.
1046+
*/
1047+
cpu_init_replace_early_idt();
10431048

10441049
/*
10451050
* Update mmu_cr4_features (and, indirectly, trampoline_cr4_features)

arch/x86/kernel/smpboot.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,7 @@ static void notrace start_secondary(void *unused)
246246
__flush_tlb_all();
247247
}
248248

249-
cpu_init_exception_handling();
249+
cpu_init_exception_handling(false);
250250

251251
/*
252252
* Load the microcode before reaching the AP alive synchronization

0 commit comments

Comments
 (0)