From 286db900731dac81f66e543b0081d891d191cf41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Wcis=C5=82o?= Date: Thu, 2 Oct 2025 23:35:51 +0200 Subject: [PATCH 01/13] x86: Fix misspelled Kconfig symbols MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit jira VULN-772 cve-pre CVE-2024-25744 commit-author Lukas Bulwahn commit 6bf8a55d8344df1f61a29b18c398bcdf3539e163 Fix misspelled Kconfig symbols as detected by scripts/checkkconfigsymbols.py. [ bp: Combine into a single patch. ] Signed-off-by: Lukas Bulwahn Signed-off-by: Borislav Petkov Link: https://lkml.kernel.org/r/20210803113531.30720-7-lukas.bulwahn@gmail.com (cherry picked from commit 6bf8a55d8344df1f61a29b18c398bcdf3539e163) Signed-off-by: Marcin Wcisło --- arch/x86/include/asm/ia32.h | 2 +- arch/x86/include/asm/irq_stack.h | 2 +- arch/x86/include/asm/page_32.h | 2 +- arch/x86/include/asm/uaccess.h | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h index 2c5f7861d373c..fada857f0a1ed 100644 --- a/arch/x86/include/asm/ia32.h +++ b/arch/x86/include/asm/ia32.h @@ -68,6 +68,6 @@ extern void ia32_pick_mmap_layout(struct mm_struct *mm); #endif -#endif /* !CONFIG_IA32_SUPPORT */ +#endif /* CONFIG_IA32_EMULATION */ #endif /* _ASM_X86_IA32_H */ diff --git a/arch/x86/include/asm/irq_stack.h b/arch/x86/include/asm/irq_stack.h index f8958fcb695eb..63f818aedf770 100644 --- a/arch/x86/include/asm/irq_stack.h +++ b/arch/x86/include/asm/irq_stack.h @@ -59,7 +59,7 @@ * the output constraints to make the compiler aware that R11 cannot be * reused after the asm() statement. * - * For builds with CONFIG_UNWIND_FRAME_POINTER ASM_CALL_CONSTRAINT is + * For builds with CONFIG_UNWINDER_FRAME_POINTER, ASM_CALL_CONSTRAINT is * required as well as this prevents certain creative GCC variants from * misplacing the ASM code. * diff --git a/arch/x86/include/asm/page_32.h b/arch/x86/include/asm/page_32.h index 94dbd51df58f8..b13f8488ac854 100644 --- a/arch/x86/include/asm/page_32.h +++ b/arch/x86/include/asm/page_32.h @@ -43,7 +43,7 @@ static inline void copy_page(void *to, void *from) { memcpy(to, from, PAGE_SIZE); } -#endif /* CONFIG_X86_3DNOW */ +#endif /* CONFIG_X86_USE_3DNOW */ #endif /* !__ASSEMBLY__ */ #endif /* _ASM_X86_PAGE_32_H */ diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 538646ec8027d..bbd2bb0d3beeb 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -379,7 +379,7 @@ do { \ : [umem] "m" (__m(addr)), \ "0" (err)) -#endif // CONFIG_CC_ASM_GOTO_OUTPUT +#endif // CONFIG_CC_HAS_ASM_GOTO_OUTPUT #ifdef CONFIG_CC_HAS_ASM_GOTO_TIED_OUTPUT #define __try_cmpxchg_user_asm(itype, ltype, _ptr, _pold, _new, label) ({ \ From b96a127058a41d7a94cf57839340c7f5b6d6fc59 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Wcis=C5=82o?= Date: Thu, 2 Oct 2025 23:39:16 +0200 Subject: [PATCH 02/13] x86: Introduce ia32_enabled() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit jira VULN-772 cve-pre CVE-2024-25744 commit-author Nikolay Borisov commit 1da5c9bc119d3a749b519596b93f9b2667e93c4a IA32 support on 64bit kernels depends on whether CONFIG_IA32_EMULATION is selected or not. As it is a compile time option it doesn't provide the flexibility to have distributions set their own policy for IA32 support and give the user the flexibility to override it. As a first step introduce ia32_enabled() which abstracts whether IA32 compat is turned on or off. Upcoming patches will implement the ability to set IA32 compat state at boot time. Signed-off-by: Nikolay Borisov Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230623111409.3047467-2-nik.borisov@suse.com (cherry picked from commit 1da5c9bc119d3a749b519596b93f9b2667e93c4a) Signed-off-by: Marcin Wcisło --- arch/x86/entry/common.c | 4 ++++ arch/x86/include/asm/ia32.h | 16 +++++++++++++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 6c2826417b337..cfbd3aec3ddc7 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -96,6 +96,10 @@ static __always_inline int syscall_32_enter(struct pt_regs *regs) return (int)regs->orig_ax; } +#ifdef CONFIG_IA32_EMULATION +bool __ia32_enabled __ro_after_init = true; +#endif + /* * Invoke a 32-bit syscall. Called with IRQs on in CONTEXT_KERNEL. */ diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h index fada857f0a1ed..5a2ae24b1204f 100644 --- a/arch/x86/include/asm/ia32.h +++ b/arch/x86/include/asm/ia32.h @@ -68,6 +68,20 @@ extern void ia32_pick_mmap_layout(struct mm_struct *mm); #endif -#endif /* CONFIG_IA32_EMULATION */ +extern bool __ia32_enabled; + +static inline bool ia32_enabled(void) +{ + return __ia32_enabled; +} + +#else /* !CONFIG_IA32_EMULATION */ + +static inline bool ia32_enabled(void) +{ + return IS_ENABLED(CONFIG_X86_32); +} + +#endif #endif /* _ASM_X86_IA32_H */ From 9c48d660b0ec9e07637e7460633178c9cd6330f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Wcis=C5=82o?= Date: Fri, 3 Oct 2025 00:44:38 +0200 Subject: [PATCH 03/13] x86/entry: Rename ignore_sysret() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit jira VULN-772 cve-pre CVE-2024-25744 commit-author Nikolay Borisov commit f71e1d2ff8e6a183bd4004bc97c453ba527b7dc6 The SYSCALL instruction cannot really be disabled in compatibility mode. The best that can be done is to configure the CSTAR msr to point to a minimal handler. Currently this handler has a rather misleading name - ignore_sysret() as it's not really doing anything with sysret. Give it a more descriptive name. Signed-off-by: Nikolay Borisov Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230623111409.3047467-3-nik.borisov@suse.com (cherry picked from commit f71e1d2ff8e6a183bd4004bc97c453ba527b7dc6) Signed-off-by: Marcin Wcisło --- arch/x86/entry/entry_64.S | 4 ++-- arch/x86/include/asm/processor.h | 2 +- arch/x86/kernel/cpu/common.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index f314ae35f0269..79dae74f2eb42 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1503,12 +1503,12 @@ SYM_CODE_END(asm_exc_nmi) * This handles SYSCALL from 32-bit code. There is no way to program * MSRs to fully disable 32-bit SYSCALL. */ -SYM_CODE_START(ignore_sysret) +SYM_CODE_START(entry_SYSCALL32_ignore) UNWIND_HINT_EMPTY ENDBR mov $-ENOSYS, %eax sysretl -SYM_CODE_END(ignore_sysret) +SYM_CODE_END(entry_SYSCALL32_ignore) #endif .pushsection .text, "ax" diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 3016e147f253f..ff290c94ad063 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -461,7 +461,7 @@ static inline unsigned long cpu_kernelmode_gs_base(int cpu) DECLARE_PER_CPU(void *, hardirq_stack_ptr); DECLARE_PER_CPU(bool, hardirq_stack_inuse); -extern asmlinkage void ignore_sysret(void); +extern asmlinkage void entry_SYSCALL32_ignore(void); /* Save actual FS/GS selectors and bases to current->thread */ void current_save_fsgs(void); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c4483102aaffc..c475e702e61f4 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -2025,7 +2025,7 @@ void syscall_init(void) (unsigned long)(cpu_entry_stack(smp_processor_id()) + 1)); wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat); #else - wrmsrl(MSR_CSTAR, (unsigned long)ignore_sysret); + wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL32_ignore); wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG); wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL); wrmsrl_safe(MSR_IA32_SYSENTER_EIP, 0ULL); From afb4eacc6f7a4ffb490a6fc2b027ac0b22a78dc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Wcis=C5=82o?= Date: Fri, 3 Oct 2025 00:58:55 +0200 Subject: [PATCH 04/13] x86/entry: Compile entry_SYSCALL32_ignore() unconditionally MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit jira VULN-772 cve-pre CVE-2024-25744 commit-author Nikolay Borisov commit 370dcd58548a360bbf8a65b89b410d09f56bf0c6 To limit the IA32 exposure on 64bit kernels while keeping the flexibility for the user to enable it when required, the compile time enable/disable via CONFIG_IA32_EMULATION is not good enough and will be complemented with a kernel command line option. Right now entry_SYSCALL32_ignore() is only compiled when CONFIG_IA32_EMULATION=n, but boot-time enable- / disablement obviously requires it to be unconditionally available. Remove the #ifndef CONFIG_IA32_EMULATION guard. Signed-off-by: Nikolay Borisov Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230623111409.3047467-4-nik.borisov@suse.com (cherry picked from commit 370dcd58548a360bbf8a65b89b410d09f56bf0c6) Signed-off-by: Marcin Wcisło --- arch/x86/entry/entry_64.S | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 79dae74f2eb42..e4ecbc5fdf701 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -1498,7 +1498,6 @@ nmi_restore: iretq SYM_CODE_END(asm_exc_nmi) -#ifndef CONFIG_IA32_EMULATION /* * This handles SYSCALL from 32-bit code. There is no way to program * MSRs to fully disable 32-bit SYSCALL. @@ -1509,7 +1508,6 @@ SYM_CODE_START(entry_SYSCALL32_ignore) mov $-ENOSYS, %eax sysretl SYM_CODE_END(entry_SYSCALL32_ignore) -#endif .pushsection .text, "ax" SYM_CODE_START(rewind_stack_and_make_dead) From efb829a890172fa0acbef915085ad2002eccb487 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Wcis=C5=82o?= Date: Fri, 3 Oct 2025 01:18:09 +0200 Subject: [PATCH 05/13] x86/entry: Make IA32 syscalls' availability depend on ia32_enabled() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit jira VULN-772 cve-pre CVE-2024-25744 commit-author Nikolay Borisov commit 61382281e9054df523d3f9cfdba2faff88955f97 upstream-diff Upstream code between the #ifdef / #else / #endif in `arch/x86/kernel/cpu/common.c' differs slightly from `ciqlts9_2' (`wrmsrl_cstar' function used instead of `wrmsrl'). Applied the same logic of #ifdef / #else -> if / else conversion to the existing codebase. Another major aspect of supporting running of 32bit processes is the ability to access 32bit syscalls. Such syscalls can be invoked by using the legacy int 0x80 handler and sysenter/syscall instructions. If IA32 emulation is disabled ensure that each of those 3 distinct mechanisms are also disabled. For int 0x80 a #GP exception would be generated since the respective descriptor is not going to be loaded at all. Invoking sysenter will also result in a #GP since IA32_SYSENTER_CS contains an invalid segment. Finally, syscall instruction cannot really be disabled so it's configured to execute a minimal handler. Signed-off-by: Nikolay Borisov Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230623111409.3047467-6-nik.borisov@suse.com (cherry picked from commit 61382281e9054df523d3f9cfdba2faff88955f97) Signed-off-by: Marcin Wcisło --- arch/x86/include/asm/proto.h | 3 +++ arch/x86/kernel/cpu/common.c | 37 ++++++++++++++++++------------------ arch/x86/kernel/idt.c | 7 +++++++ 3 files changed, 29 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index f042cfc9938f4..85d01c3edb4e2 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h @@ -36,6 +36,9 @@ void entry_INT80_compat(void); #ifdef CONFIG_XEN_PV void xen_entry_INT80_compat(void); #endif +#else /* !CONFIG_IA32_EMULATION */ +#define entry_SYSCALL_compat NULL +#define entry_SYSENTER_compat NULL #endif void x86_configure_nx(void); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c475e702e61f4..deb46089f7636 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -59,6 +59,7 @@ #include #include #include +#include #include #include @@ -2012,24 +2013,24 @@ void syscall_init(void) wrmsr(MSR_STAR, 0, (__USER32_CS << 16) | __KERNEL_CS); wrmsrl(MSR_LSTAR, (unsigned long)entry_SYSCALL_64); -#ifdef CONFIG_IA32_EMULATION - wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat); - /* - * This only works on Intel CPUs. - * On AMD CPUs these MSRs are 32-bit, CPU truncates MSR_IA32_SYSENTER_EIP. - * This does not cause SYSENTER to jump to the wrong location, because - * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit). - */ - wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); - wrmsrl_safe(MSR_IA32_SYSENTER_ESP, - (unsigned long)(cpu_entry_stack(smp_processor_id()) + 1)); - wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat); -#else - wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL32_ignore); - wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG); - wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL); - wrmsrl_safe(MSR_IA32_SYSENTER_EIP, 0ULL); -#endif + if (ia32_enabled()) { + wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL_compat); + /* + * This only works on Intel CPUs. + * On AMD CPUs these MSRs are 32-bit, CPU truncates MSR_IA32_SYSENTER_EIP. + * This does not cause SYSENTER to jump to the wrong location, because + * AMD doesn't allow SYSENTER in long mode (either 32- or 64-bit). + */ + wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)__KERNEL_CS); + wrmsrl_safe(MSR_IA32_SYSENTER_ESP, + (unsigned long)(cpu_entry_stack(smp_processor_id()) + 1)); + wrmsrl_safe(MSR_IA32_SYSENTER_EIP, (u64)entry_SYSENTER_compat); + } else { + wrmsrl(MSR_CSTAR, (unsigned long)entry_SYSCALL32_ignore); + wrmsrl_safe(MSR_IA32_SYSENTER_CS, (u64)GDT_ENTRY_INVALID_SEG); + wrmsrl_safe(MSR_IA32_SYSENTER_ESP, 0ULL); + wrmsrl_safe(MSR_IA32_SYSENTER_EIP, 0ULL); + } /* * Flags to clear on syscall; clear as much as possible diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index a58c6bc1cd68c..41f88da585027 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #define DPL0 0x0 @@ -116,6 +117,9 @@ static const __initconst struct idt_data def_idts[] = { #endif SYSG(X86_TRAP_OF, asm_exc_overflow), +}; + +static const struct idt_data ia32_idt[] __initconst = { #if defined(CONFIG_IA32_EMULATION) SYSG(IA32_SYSCALL_VECTOR, entry_INT80_compat), #elif defined(CONFIG_X86_32) @@ -226,6 +230,9 @@ void __init idt_setup_early_traps(void) void __init idt_setup_traps(void) { idt_setup_from_table(idt_table, def_idts, ARRAY_SIZE(def_idts), true); + + if (ia32_enabled()) + idt_setup_from_table(idt_table, ia32_idt, ARRAY_SIZE(ia32_idt), true); } #ifdef CONFIG_X86_64 From a9b2a1252b680749aaa39c9468eddef2bbb04e0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Wcis=C5=82o?= Date: Fri, 3 Oct 2025 01:38:17 +0200 Subject: [PATCH 06/13] x86: Make IA32_EMULATION boot time configurable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit jira VULN-772 cve-pre CVE-2024-25744 commit-author Nikolay Borisov commit a11e097504ac1889b35b6858f495565838325f88 Distributions would like to reduce their attack surface as much as possible but at the same time they'd want to retain flexibility to cater to a variety of legacy software. This stems from the conjecture that compat layer is likely rarely tested and could have latent security bugs. Ideally distributions will set their default policy and also give users the ability to override it as appropriate. To enable this use case, introduce CONFIG_IA32_EMULATION_DEFAULT_DISABLED compile time option, which controls whether 32bit processes/syscalls should be allowed or not. This option is aimed mainly at distributions to set their preferred default behavior in their kernels. To allow users to override the distro's policy, introduce the 'ia32_emulation' parameter which allows overriding CONFIG_IA32_EMULATION_DEFAULT_DISABLED state at boot time. Signed-off-by: Nikolay Borisov Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/20230623111409.3047467-7-nik.borisov@suse.com (cherry picked from commit a11e097504ac1889b35b6858f495565838325f88) Signed-off-by: Marcin Wcisło --- Documentation/admin-guide/kernel-parameters.txt | 6 ++++++ arch/x86/Kconfig | 9 +++++++++ arch/x86/entry/common.c | 9 ++++++++- 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 443807ae55d0d..eb998bf963192 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1741,6 +1741,12 @@ 0 -- machine default 1 -- force brightness inversion + ia32_emulation= [X86-64] + Format: + When true, allows loading 32-bit programs and executing 32-bit + syscalls, essentially overriding IA32_EMULATION_DEFAULT_DISABLED at + boot time. When false, unconditionally disables IA32 emulation. + icn= [HW,ISDN] Format: [,[,[,]]] diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 4af28a7a1ce41..af7d48f3506a0 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2944,6 +2944,15 @@ config IA32_EMULATION 64-bit kernel. You should likely turn this on, unless you're 100% sure that you don't have any 32-bit programs left. +config IA32_EMULATION_DEFAULT_DISABLED + bool "IA32 emulation disabled by default" + default n + depends on IA32_EMULATION + help + Make IA32 emulation disabled by default. This prevents loading 32-bit + processes and access to 32-bit syscalls. If unsure, leave it to its + default value. + config IA32_AOUT tristate "IA32 a.out support" depends on IA32_EMULATION diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index cfbd3aec3ddc7..a34e1a1adcf83 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -19,6 +19,7 @@ #include #include #include +#include #ifdef CONFIG_XEN_PV #include @@ -97,7 +98,13 @@ static __always_inline int syscall_32_enter(struct pt_regs *regs) } #ifdef CONFIG_IA32_EMULATION -bool __ia32_enabled __ro_after_init = true; +bool __ia32_enabled __ro_after_init = !IS_ENABLED(CONFIG_IA32_EMULATION_DEFAULT_DISABLED); + +static int ia32_emulation_override_cmdline(char *arg) +{ + return kstrtobool(arg, &__ia32_enabled); +} +early_param("ia32_emulation", ia32_emulation_override_cmdline); #endif /* From dfa91c6a1e7f56c641ca6d9a030790a992498a37 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Wcis=C5=82o?= Date: Fri, 3 Oct 2025 01:44:41 +0200 Subject: [PATCH 07/13] x86/entry: Add __init to ia32_emulation_override_cmdline() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit jira VULN-772 cve-pre CVE-2024-25744 commit-author Vitaly Kuznetsov commit d55f31e29047f2f987286d55928ae75775111fe7 ia32_emulation_override_cmdline() is an early_param() arg and these are only needed at boot time. In fact, all other early_param() functions in arch/x86 seem to have '__init' annotation and ia32_emulation_override_cmdline() is the only exception. Fixes: a11e097504ac ("x86: Make IA32_EMULATION boot time configurable") Signed-off-by: Vitaly Kuznetsov Signed-off-by: Dave Hansen Signed-off-by: Ingo Molnar Reviewed-by: Nikolay Borisov Link: https://lore.kernel.org/all/20241210151650.1746022-1-vkuznets%40redhat.com (cherry picked from commit d55f31e29047f2f987286d55928ae75775111fe7) Signed-off-by: Marcin Wcisło --- arch/x86/entry/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index a34e1a1adcf83..1570127c2b0a4 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -100,7 +100,7 @@ static __always_inline int syscall_32_enter(struct pt_regs *regs) #ifdef CONFIG_IA32_EMULATION bool __ia32_enabled __ro_after_init = !IS_ENABLED(CONFIG_IA32_EMULATION_DEFAULT_DISABLED); -static int ia32_emulation_override_cmdline(char *arg) +static int __init ia32_emulation_override_cmdline(char *arg) { return kstrtobool(arg, &__ia32_enabled); } From 1988eb4d375d1a71882dd63ae9e1b131926a2fef Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 4 Dec 2023 11:31:38 +0300 Subject: [PATCH 08/13] x86/coco: Disable 32-bit emulation by default on TDX and SEV MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit jira VULN-772 cve CVE-2024-25744 commit-author Kirill A. Shutemov commit b82a8dbd3d2f4563156f7150c6f2ecab6e960b30 The INT 0x80 instruction is used for 32-bit x86 Linux syscalls. The kernel expects to receive a software interrupt as a result of the INT 0x80 instruction. However, an external interrupt on the same vector triggers the same handler. The kernel interprets an external interrupt on vector 0x80 as a 32-bit system call that came from userspace. A VMM can inject external interrupts on any arbitrary vector at any time. This remains true even for TDX and SEV guests where the VMM is untrusted. Put together, this allows an untrusted VMM to trigger int80 syscall handling at any given point. The content of the guest register file at that moment defines what syscall is triggered and its arguments. It opens the guest OS to manipulation from the VMM side. Disable 32-bit emulation by default for TDX and SEV. User can override it with the ia32_emulation=y command line option. [ dhansen: reword the changelog ] Reported-by: Supraja Sridhara Reported-by: Benedict Schlüter Reported-by: Mark Kuhne Reported-by: Andrin Bertschi Reported-by: Shweta Shinde Signed-off-by: Kirill A. Shutemov Signed-off-by: Dave Hansen Reviewed-by: Thomas Gleixner Reviewed-by: Borislav Petkov (AMD) Cc: # v6.0+: 1da5c9b x86: Introduce ia32_enabled() Cc: # v6.0+ --- arch/x86/coco/tdx/tdx.c | 10 ++++++++++ arch/x86/include/asm/ia32.h | 7 +++++++ arch/x86/mm/mem_encrypt_amd.c | 11 +++++++++++ 3 files changed, 28 insertions(+) diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index cfd4c95b9f045..56b058531d894 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -824,5 +825,14 @@ void __init tdx_early_init(void) x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required; x86_platform.guest.enc_status_change_finish = tdx_enc_status_changed; + /* + * The VMM is capable of injecting interrupt 0x80 and triggering the + * compatibility syscall path. + * + * By default, the 32-bit emulation is disabled in order to ensure + * the safety of the VM. + */ + ia32_disable(); + pr_info("Guest detected\n"); } diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h index 5a2ae24b1204f..9805629479d96 100644 --- a/arch/x86/include/asm/ia32.h +++ b/arch/x86/include/asm/ia32.h @@ -75,6 +75,11 @@ static inline bool ia32_enabled(void) return __ia32_enabled; } +static inline void ia32_disable(void) +{ + __ia32_enabled = false; +} + #else /* !CONFIG_IA32_EMULATION */ static inline bool ia32_enabled(void) @@ -82,6 +87,8 @@ static inline bool ia32_enabled(void) return IS_ENABLED(CONFIG_X86_32); } +static inline void ia32_disable(void) {} + #endif #endif /* _ASM_X86_IA32_H */ diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c index e8f7953fda83a..c8b3bd1c406e8 100644 --- a/arch/x86/mm/mem_encrypt_amd.c +++ b/arch/x86/mm/mem_encrypt_amd.c @@ -32,6 +32,7 @@ #include #include #include +#include #include "mm_internal.h" @@ -499,6 +500,16 @@ void __init sme_early_init(void) x86_platform.guest.enc_status_change_finish = amd_enc_status_change_finish; x86_platform.guest.enc_tlb_flush_required = amd_enc_tlb_flush_required; x86_platform.guest.enc_cache_flush_required = amd_enc_cache_flush_required; + + /* + * The VMM is capable of injecting interrupt 0x80 and triggering the + * compatibility syscall path. + * + * By default, the 32-bit emulation is disabled in order to ensure + * the safety of the VM. + */ + if (sev_status & MSR_AMD64_SEV_ENABLED) + ia32_disable(); } void __init mem_encrypt_free_decrypted_mem(void) From 00bb28777bddb81ac83c6b1fb2b8639aada57853 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Wcis=C5=82o?= Date: Fri, 3 Oct 2025 01:56:16 +0200 Subject: [PATCH 09/13] x86/entry: Convert INT 0x80 emulation to IDTENTRY MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit jira VULN-772 cve CVE-2024-25744 commit-author Thomas Gleixner commit be5341eb0d43b1e754799498bd2e8756cc167a41 There is no real reason to have a separate ASM entry point implementation for the legacy INT 0x80 syscall emulation on 64-bit. IDTENTRY provides all the functionality needed with the only difference that it does not: - save the syscall number (AX) into pt_regs::orig_ax - set pt_regs::ax to -ENOSYS Both can be done safely in the C code of an IDTENTRY before invoking any of the syscall related functions which depend on this convention. Aside of ASM code reduction this prepares for detecting and handling a local APIC injected vector 0x80. [ kirill.shutemov: More verbose comments ] Suggested-by: Linus Torvalds Signed-off-by: Thomas Gleixner Signed-off-by: Kirill A. Shutemov Signed-off-by: Dave Hansen Reviewed-by: Borislav Petkov (AMD) Cc: # v6.0+ (cherry picked from commit be5341eb0d43b1e754799498bd2e8756cc167a41) Signed-off-by: Marcin Wcisło --- arch/x86/entry/common.c | 58 +++++++++++++++++++++++- arch/x86/entry/entry_64_compat.S | 77 -------------------------------- arch/x86/include/asm/idtentry.h | 4 ++ arch/x86/include/asm/proto.h | 4 -- arch/x86/kernel/idt.c | 2 +- arch/x86/xen/enlighten_pv.c | 2 +- arch/x86/xen/xen-asm.S | 2 +- 7 files changed, 64 insertions(+), 85 deletions(-) diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 1570127c2b0a4..91012af49083f 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -126,7 +126,62 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr) } } -/* Handles int $0x80 */ +#ifdef CONFIG_IA32_EMULATION +/** + * int80_emulation - 32-bit legacy syscall entry + * + * This entry point can be used by 32-bit and 64-bit programs to perform + * 32-bit system calls. Instances of INT $0x80 can be found inline in + * various programs and libraries. It is also used by the vDSO's + * __kernel_vsyscall fallback for hardware that doesn't support a faster + * entry method. Restarted 32-bit system calls also fall back to INT + * $0x80 regardless of what instruction was originally used to do the + * system call. + * + * This is considered a slow path. It is not used by most libc + * implementations on modern hardware except during process startup. + * + * The arguments for the INT $0x80 based syscall are on stack in the + * pt_regs structure: + * eax: system call number + * ebx, ecx, edx, esi, edi, ebp: arg1 - arg 6 + */ +DEFINE_IDTENTRY_RAW(int80_emulation) +{ + int nr; + + /* Establish kernel context. */ + enter_from_user_mode(regs); + + instrumentation_begin(); + add_random_kstack_offset(); + + /* + * The low level idtentry code pushed -1 into regs::orig_ax + * and regs::ax contains the syscall number. + * + * User tracing code (ptrace or signal handlers) might assume + * that the regs::orig_ax contains a 32-bit number on invoking + * a 32-bit syscall. + * + * Establish the syscall convention by saving the 32bit truncated + * syscall number in regs::orig_ax and by invalidating regs::ax. + */ + regs->orig_ax = regs->ax & GENMASK(31, 0); + regs->ax = -ENOSYS; + + nr = syscall_32_enter(regs); + + local_irq_enable(); + nr = syscall_enter_from_user_mode_work(regs, nr); + do_syscall_32_irqs_on(regs, nr); + + instrumentation_end(); + syscall_exit_to_user_mode(regs); +} +#else /* CONFIG_IA32_EMULATION */ + +/* Handles int $0x80 on a 32bit kernel */ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs) { int nr = syscall_32_enter(regs); @@ -145,6 +200,7 @@ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs) instrumentation_end(); syscall_exit_to_user_mode(regs); } +#endif /* !CONFIG_IA32_EMULATION */ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs) { diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index e7ae79e557a42..130751fd2b3cb 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -274,80 +274,3 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_unsafe_stack, SYM_L_GLOBAL) sysretl SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL) SYM_CODE_END(entry_SYSCALL_compat) - -/* - * 32-bit legacy system call entry. - * - * 32-bit x86 Linux system calls traditionally used the INT $0x80 - * instruction. INT $0x80 lands here. - * - * This entry point can be used by 32-bit and 64-bit programs to perform - * 32-bit system calls. Instances of INT $0x80 can be found inline in - * various programs and libraries. It is also used by the vDSO's - * __kernel_vsyscall fallback for hardware that doesn't support a faster - * entry method. Restarted 32-bit system calls also fall back to INT - * $0x80 regardless of what instruction was originally used to do the - * system call. - * - * This is considered a slow path. It is not used by most libc - * implementations on modern hardware except during process startup. - * - * Arguments: - * eax system call number - * ebx arg1 - * ecx arg2 - * edx arg3 - * esi arg4 - * edi arg5 - * ebp arg6 - */ -SYM_CODE_START(entry_INT80_compat) - UNWIND_HINT_EMPTY - ENDBR - /* - * Interrupts are off on entry. - */ - ASM_CLAC /* Do this early to minimize exposure */ - SWAPGS - - /* - * User tracing code (ptrace or signal handlers) might assume that - * the saved RAX contains a 32-bit number when we're invoking a 32-bit - * syscall. Just in case the high bits are nonzero, zero-extend - * the syscall number. (This could almost certainly be deleted - * with no ill effects.) - */ - movl %eax, %eax - - /* switch to thread stack expects orig_ax and rdi to be pushed */ - pushq %rax /* pt_regs->orig_ax */ - - /* Need to switch before accessing the thread stack. */ - SWITCH_TO_KERNEL_CR3 scratch_reg=%rax - - /* In the Xen PV case we already run on the thread stack. */ - ALTERNATIVE "", "jmp .Lint80_keep_stack", X86_FEATURE_XENPV - - movq %rsp, %rax - movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp - - pushq 5*8(%rax) /* regs->ss */ - pushq 4*8(%rax) /* regs->rsp */ - pushq 3*8(%rax) /* regs->eflags */ - pushq 2*8(%rax) /* regs->cs */ - pushq 1*8(%rax) /* regs->ip */ - pushq 0*8(%rax) /* regs->orig_ax */ -.Lint80_keep_stack: - - PUSH_AND_CLEAR_REGS rax=$-ENOSYS - UNWIND_HINT_REGS - - cld - - IBRS_ENTER - UNTRAIN_RET - - movq %rsp, %rdi - call do_int80_syscall_32 - jmp swapgs_restore_regs_and_return_to_usermode -SYM_CODE_END(entry_INT80_compat) diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 72184b0b2219e..fca710a93eb9c 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -569,6 +569,10 @@ DECLARE_IDTENTRY_RAW(X86_TRAP_UD, exc_invalid_op); DECLARE_IDTENTRY_RAW(X86_TRAP_BP, exc_int3); DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_PF, exc_page_fault); +#if defined(CONFIG_IA32_EMULATION) +DECLARE_IDTENTRY_RAW(IA32_SYSCALL_VECTOR, int80_emulation); +#endif + #ifdef CONFIG_X86_MCE #ifdef CONFIG_X86_64 DECLARE_IDTENTRY_MCE(X86_TRAP_MC, exc_machine_check); diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index 85d01c3edb4e2..6bca4344fd092 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h @@ -32,10 +32,6 @@ void entry_SYSCALL_compat(void); void entry_SYSCALL_compat_safe_stack(void); void entry_SYSRETL_compat_unsafe_stack(void); void entry_SYSRETL_compat_end(void); -void entry_INT80_compat(void); -#ifdef CONFIG_XEN_PV -void xen_entry_INT80_compat(void); -#endif #else /* !CONFIG_IA32_EMULATION */ #define entry_SYSCALL_compat NULL #define entry_SYSENTER_compat NULL diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 41f88da585027..06908a64db416 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -121,7 +121,7 @@ static const __initconst struct idt_data def_idts[] = { static const struct idt_data ia32_idt[] __initconst = { #if defined(CONFIG_IA32_EMULATION) - SYSG(IA32_SYSCALL_VECTOR, entry_INT80_compat), + SYSG(IA32_SYSCALL_VECTOR, asm_int80_emulation), #elif defined(CONFIG_X86_32) SYSG(IA32_SYSCALL_VECTOR, entry_INT80_32), #endif diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index c191817bd455f..9c4f4f89759a8 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -637,7 +637,7 @@ static struct trap_array_entry trap_array[] = { TRAP_ENTRY(exc_int3, false ), TRAP_ENTRY(exc_overflow, false ), #ifdef CONFIG_IA32_EMULATION - { entry_INT80_compat, xen_entry_INT80_compat, false }, + TRAP_ENTRY(int80_emulation, false ), #endif TRAP_ENTRY(exc_page_fault, false ), TRAP_ENTRY(exc_divide_error, false ), diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index 6bf9d45b91784..accea4262240d 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -156,7 +156,7 @@ xen_pv_trap asm_xenpv_exc_machine_check #endif /* CONFIG_X86_MCE */ xen_pv_trap asm_exc_simd_coprocessor_error #ifdef CONFIG_IA32_EMULATION -xen_pv_trap entry_INT80_compat +xen_pv_trap asm_int80_emulation #endif xen_pv_trap asm_exc_xen_unknown_trap xen_pv_trap asm_exc_xen_hypervisor_callback From 325b5d87fc9627ec68ac2b8ad8bbd706bdcc027f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Wcis=C5=82o?= Date: Thu, 2 Oct 2025 23:52:08 +0200 Subject: [PATCH 10/13] x86/entry: Do not allow external 0x80 interrupts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit jira VULN-772 cve CVE-2024-25744 commit-author Thomas Gleixner commit 55617fb991df535f953589586468612351575704 The INT 0x80 instruction is used for 32-bit x86 Linux syscalls. The kernel expects to receive a software interrupt as a result of the INT 0x80 instruction. However, an external interrupt on the same vector also triggers the same codepath. An external interrupt on vector 0x80 will currently be interpreted as a 32-bit system call, and assuming that it was a user context. Panic on external interrupts on the vector. To distinguish software interrupts from external ones, the kernel checks the APIC ISR bit relevant to the 0x80 vector. For software interrupts, this bit will be 0. Signed-off-by: Thomas Gleixner Signed-off-by: Kirill A. Shutemov Signed-off-by: Dave Hansen Reviewed-by: Borislav Petkov (AMD) Cc: # v6.0+ (cherry picked from commit 55617fb991df535f953589586468612351575704) Signed-off-by: Marcin Wcisło --- arch/x86/entry/common.c | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 91012af49083f..334e29e9d6ed0 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -26,6 +26,7 @@ #include #endif +#include #include #include #include @@ -127,6 +128,25 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr) } #ifdef CONFIG_IA32_EMULATION +static __always_inline bool int80_is_external(void) +{ + const unsigned int offs = (0x80 / 32) * 0x10; + const u32 bit = BIT(0x80 % 32); + + /* The local APIC on XENPV guests is fake */ + if (cpu_feature_enabled(X86_FEATURE_XENPV)) + return false; + + /* + * If vector 0x80 is set in the APIC ISR then this is an external + * interrupt. Either from broken hardware or injected by a VMM. + * + * Note: In guest mode this is only valid for secure guests where + * the secure module fully controls the vAPIC exposed to the guest. + */ + return apic_read(APIC_ISR + offs) & bit; +} + /** * int80_emulation - 32-bit legacy syscall entry * @@ -150,12 +170,27 @@ DEFINE_IDTENTRY_RAW(int80_emulation) { int nr; - /* Establish kernel context. */ + /* Kernel does not use INT $0x80! */ + if (unlikely(!user_mode(regs))) { + irqentry_enter(regs); + instrumentation_begin(); + panic("Unexpected external interrupt 0x80\n"); + } + + /* + * Establish kernel context for instrumentation, including for + * int80_is_external() below which calls into the APIC driver. + * Identical for soft and external interrupts. + */ enter_from_user_mode(regs); instrumentation_begin(); add_random_kstack_offset(); + /* Validate that this is a soft interrupt to the extent possible */ + if (unlikely(int80_is_external())) + panic("Unexpected external interrupt 0x80\n"); + /* * The low level idtentry code pushed -1 into regs::orig_ax * and regs::ax contains the syscall number. From 31adcfc191814e67310f6127ec1b6c8764aa4426 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Wcis=C5=82o?= Date: Fri, 3 Oct 2025 02:00:37 +0200 Subject: [PATCH 11/13] x86/tdx: Allow 32-bit emulation by default MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit jira VULN-772 cve CVE-2024-25744 commit-author Kirill A. Shutemov commit f4116bfc44621882556bbf70f5284fbf429a5cf6 32-bit emulation was disabled on TDX to prevent a possible attack by a VMM injecting an interrupt on vector 0x80. Now that int80_emulation() has a check for external interrupts the limitation can be lifted. To distinguish software interrupts from external ones, int80_emulation() checks the APIC ISR bit relevant to the 0x80 vector. For software interrupts, this bit will be 0. On TDX, the VAPIC state (including ISR) is protected and cannot be manipulated by the VMM. The ISR bit is set by the microcode flow during the handling of posted interrupts. [ dhansen: more changelog tweaks ] Signed-off-by: Kirill A. Shutemov Signed-off-by: Dave Hansen Reviewed-by: Thomas Gleixner Reviewed-by: Borislav Petkov (AMD) Cc: # v6.0+ (cherry picked from commit f4116bfc44621882556bbf70f5284fbf429a5cf6) Signed-off-by: Marcin Wcisło --- arch/x86/coco/tdx/tdx.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index 56b058531d894..2743d56906803 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -825,14 +825,5 @@ void __init tdx_early_init(void) x86_platform.guest.enc_tlb_flush_required = tdx_tlb_flush_required; x86_platform.guest.enc_status_change_finish = tdx_enc_status_changed; - /* - * The VMM is capable of injecting interrupt 0x80 and triggering the - * compatibility syscall path. - * - * By default, the 32-bit emulation is disabled in order to ensure - * the safety of the VM. - */ - ia32_disable(); - pr_info("Guest detected\n"); } From 30b76559b95cab66bf757f95b1f8f0ccebcb9500 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Fri, 5 Jan 2024 11:14:07 +0100 Subject: [PATCH 12/13] x86/sev: Harden #VC instruction emulation somewhat jira VULN-756 cve CVE-2024-25742 commit-author Borislav Petkov (AMD) commit e3ef461af35a8c74f2f4ce6616491ddb355a208f upstream-diff Added `#else' case for the `#ifndef __BOOT_COMPRESSED' which was modified in upstream but not present in `ciqlts9_2'. Compare the opcode bytes at rIP for each #VC exit reason to verify the instruction which raised the #VC exception is actually the right one. Signed-off-by: Borislav Petkov (AMD) Acked-by: Tom Lendacky Link: https://lore.kernel.org/r/20240105101407.11694-1-bp@alien8.de --- arch/x86/boot/compressed/sev.c | 4 ++ arch/x86/kernel/sev-shared.c | 103 ++++++++++++++++++++++++++++++++- arch/x86/kernel/sev.c | 5 +- 3 files changed, 109 insertions(+), 3 deletions(-) diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index 97f7271c3dc2c..30a344a66759a 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -227,6 +227,10 @@ void do_boot_stage2_vc(struct pt_regs *regs, unsigned long exit_code) if (result != ES_OK) goto finish; + result = vc_check_opcode_bytes(&ctxt, exit_code); + if (result != ES_OK) + goto finish; + switch (exit_code) { case SVM_EXIT_RDTSC: case SVM_EXIT_RDTSCP: diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c index 3a5b0c9c4fccc..6d9d984f1309a 100644 --- a/arch/x86/kernel/sev-shared.c +++ b/arch/x86/kernel/sev-shared.c @@ -10,8 +10,13 @@ */ #ifndef __BOOT_COMPRESSED -#define error(v) pr_err(v) -#define has_cpuflag(f) boot_cpu_has(f) +#define error(v) pr_err(v) +#define has_cpuflag(f) boot_cpu_has(f) +#define sev_printk(fmt, ...) printk(fmt, ##__VA_ARGS__) +#define sev_printk_rtl(fmt, ...) printk_ratelimited(fmt, ##__VA_ARGS__) +#else +#define sev_printk(fmt, ...) +#define sev_printk_rtl(fmt, ...) #endif /* I/O parameters for CPUID-related helpers */ @@ -531,6 +536,7 @@ void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code) { unsigned int subfn = lower_bits(regs->cx, 32); unsigned int fn = lower_bits(regs->ax, 32); + u16 opcode = *(unsigned short *)regs->ip; struct cpuid_leaf leaf; int ret; @@ -538,6 +544,10 @@ void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code) if (exit_code != SVM_EXIT_CPUID) goto fail; + /* Is it really a CPUID insn? */ + if (opcode != 0xa20f) + goto fail; + leaf.fn = fn; leaf.subfn = subfn; @@ -991,3 +1001,92 @@ static void __init setup_cpuid_table(const struct cc_blob_sev_info *cc_info) cpuid_ext_range_max = fn->eax; } } + +static enum es_result vc_check_opcode_bytes(struct es_em_ctxt *ctxt, + unsigned long exit_code) +{ + unsigned int opcode = (unsigned int)ctxt->insn.opcode.value; + u8 modrm = ctxt->insn.modrm.value; + + switch (exit_code) { + + case SVM_EXIT_IOIO: + case SVM_EXIT_NPF: + /* handled separately */ + return ES_OK; + + case SVM_EXIT_CPUID: + if (opcode == 0xa20f) + return ES_OK; + break; + + case SVM_EXIT_INVD: + if (opcode == 0x080f) + return ES_OK; + break; + + case SVM_EXIT_MONITOR: + if (opcode == 0x010f && modrm == 0xc8) + return ES_OK; + break; + + case SVM_EXIT_MWAIT: + if (opcode == 0x010f && modrm == 0xc9) + return ES_OK; + break; + + case SVM_EXIT_MSR: + /* RDMSR */ + if (opcode == 0x320f || + /* WRMSR */ + opcode == 0x300f) + return ES_OK; + break; + + case SVM_EXIT_RDPMC: + if (opcode == 0x330f) + return ES_OK; + break; + + case SVM_EXIT_RDTSC: + if (opcode == 0x310f) + return ES_OK; + break; + + case SVM_EXIT_RDTSCP: + if (opcode == 0x010f && modrm == 0xf9) + return ES_OK; + break; + + case SVM_EXIT_READ_DR7: + if (opcode == 0x210f && + X86_MODRM_REG(ctxt->insn.modrm.value) == 7) + return ES_OK; + break; + + case SVM_EXIT_VMMCALL: + if (opcode == 0x010f && modrm == 0xd9) + return ES_OK; + + break; + + case SVM_EXIT_WRITE_DR7: + if (opcode == 0x230f && + X86_MODRM_REG(ctxt->insn.modrm.value) == 7) + return ES_OK; + break; + + case SVM_EXIT_WBINVD: + if (opcode == 0x90f) + return ES_OK; + break; + + default: + break; + } + + sev_printk(KERN_ERR "Wrong/unhandled opcode bytes: 0x%x, exit_code: 0x%lx, rIP: 0x%lx\n", + opcode, exit_code, ctxt->regs->ip); + + return ES_UNSUPPORTED; +} diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 4fe4c8ecf4c95..aa2fc339878be 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -1738,7 +1738,10 @@ static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt, struct ghcb *ghcb, unsigned long exit_code) { - enum es_result result; + enum es_result result = vc_check_opcode_bytes(ctxt, exit_code); + + if (result != ES_OK) + return result; switch (exit_code) { case SVM_EXIT_READ_DR7: From 30d8f2116d7196f31d6f492a8da8afd94569e08b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Wcis=C5=82o?= Date: Fri, 3 Oct 2025 00:07:26 +0200 Subject: [PATCH 13/13] x86/sev: Check for MWAITX and MONITORX opcodes in the #VC handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit jira VULN-756 cve-bf CVE-2024-25742 commit-author Tom Lendacky commit e70316d17f6ab49a6038ffd115397fd68f8c7be8 The MWAITX and MONITORX instructions generate the same #VC error code as the MWAIT and MONITOR instructions, respectively. Update the #VC handler opcode checking to also support the MWAITX and MONITORX opcodes. Fixes: e3ef461af35a ("x86/sev: Harden #VC instruction emulation somewhat") Signed-off-by: Tom Lendacky Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/453d5a7cfb4b9fe818b6fb67f93ae25468bc9e23.1713793161.git.thomas.lendacky@amd.com (cherry picked from commit e70316d17f6ab49a6038ffd115397fd68f8c7be8) Signed-off-by: Marcin Wcisło --- arch/x86/kernel/sev-shared.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c index 6d9d984f1309a..cc1d91393192b 100644 --- a/arch/x86/kernel/sev-shared.c +++ b/arch/x86/kernel/sev-shared.c @@ -1026,12 +1026,14 @@ static enum es_result vc_check_opcode_bytes(struct es_em_ctxt *ctxt, break; case SVM_EXIT_MONITOR: - if (opcode == 0x010f && modrm == 0xc8) + /* MONITOR and MONITORX instructions generate the same error code */ + if (opcode == 0x010f && (modrm == 0xc8 || modrm == 0xfa)) return ES_OK; break; case SVM_EXIT_MWAIT: - if (opcode == 0x010f && modrm == 0xc9) + /* MWAIT and MWAITX instructions generate the same error code */ + if (opcode == 0x010f && (modrm == 0xc9 || modrm == 0xfb)) return ES_OK; break;