From 3e1baaeb8ecf5b0b7cb6047969f61088264fa4a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Wcis=C5=82o?= Date: Mon, 6 Oct 2025 21:58:30 +0200 Subject: [PATCH 1/8] x86/sev: Disable MMIO emulation from user mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit jira VULN-6719 cve CVE-2023-46813 commit-author Borislav Petkov (AMD) commit a37cd2a59d0cb270b1bba568fd3a3b8668b9d3ba A virt scenario can be constructed where MMIO memory can be user memory. When that happens, a race condition opens between when the hardware raises the #VC and when the #VC handler gets to emulate the instruction. If the MOVS is replaced with a MOVS accessing kernel memory in that small race window, then write to kernel memory happens as the access checks are not done at emulation time. Disable MMIO emulation in user mode temporarily until a sensible use case appears and justifies properly handling the race window. Fixes: 0118b604c2c9 ("x86/sev-es: Handle MMIO String Instructions") Reported-by: Tom Dohrmann Signed-off-by: Borislav Petkov (AMD) Tested-by: Tom Dohrmann Cc: (cherry picked from commit a37cd2a59d0cb270b1bba568fd3a3b8668b9d3ba) Signed-off-by: Marcin Wcisło --- arch/x86/kernel/sev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 4fe4c8ecf4c95..c2b7fc250c692 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -1533,6 +1533,9 @@ static enum es_result vc_handle_mmio(struct ghcb *ghcb, struct es_em_ctxt *ctxt) return ES_DECODE_FAILED; } + if (user_mode(ctxt->regs)) + return ES_UNSUPPORTED; + switch (mmio) { case MMIO_WRITE: memcpy(ghcb->shared_buffer, reg_data, bytes); From c90c4a4282248fd273e5075fa6de155c9599f841 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Wcis=C5=82o?= Date: Mon, 6 Oct 2025 21:58:41 +0200 Subject: [PATCH 2/8] x86/sev: Check IOBM for IOIO exceptions from user-space MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit jira VULN-6719 cve CVE-2023-46813 commit-author Joerg Roedel commit b9cb9c45583b911e0db71d09caa6b56469eb2bdf Check the IO permission bitmap (if present) before emulating IOIO #VC exceptions for user-space. These permissions are checked by hardware already before the #VC is raised, but due to the VC-handler decoding race it needs to be checked again in software. Fixes: 25189d08e516 ("x86/sev-es: Add support for handling IOIO exceptions") Reported-by: Tom Dohrmann Signed-off-by: Joerg Roedel Signed-off-by: Borislav Petkov (AMD) Tested-by: Tom Dohrmann Cc: (cherry picked from commit b9cb9c45583b911e0db71d09caa6b56469eb2bdf) Signed-off-by: Marcin Wcisło --- arch/x86/boot/compressed/sev.c | 5 +++++ arch/x86/kernel/sev-shared.c | 22 +++++++++++++++------- arch/x86/kernel/sev.c | 27 +++++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 7 deletions(-) diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index 97f7271c3dc2c..ee38e8ad98169 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -103,6 +103,11 @@ static enum es_result vc_read_mem(struct es_em_ctxt *ctxt, return ES_OK; } +static enum es_result vc_ioio_check(struct es_em_ctxt *ctxt, u16 port, size_t size) +{ + return ES_OK; +} + #undef __init #undef __pa #define __init diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c index 3a5b0c9c4fccc..ebb24c09ffd60 100644 --- a/arch/x86/kernel/sev-shared.c +++ b/arch/x86/kernel/sev-shared.c @@ -653,6 +653,9 @@ static enum es_result vc_insn_string_write(struct es_em_ctxt *ctxt, static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo) { struct insn *insn = &ctxt->insn; + size_t size; + u64 port; + *exitinfo = 0; switch (insn->opcode.bytes[0]) { @@ -661,7 +664,7 @@ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo) case 0x6d: *exitinfo |= IOIO_TYPE_INS; *exitinfo |= IOIO_SEG_ES; - *exitinfo |= (ctxt->regs->dx & 0xffff) << 16; + port = ctxt->regs->dx & 0xffff; break; /* OUTS opcodes */ @@ -669,41 +672,43 @@ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo) case 0x6f: *exitinfo |= IOIO_TYPE_OUTS; *exitinfo |= IOIO_SEG_DS; - *exitinfo |= (ctxt->regs->dx & 0xffff) << 16; + port = ctxt->regs->dx & 0xffff; break; /* IN immediate opcodes */ case 0xe4: case 0xe5: *exitinfo |= IOIO_TYPE_IN; - *exitinfo |= (u8)insn->immediate.value << 16; + port = (u8)insn->immediate.value & 0xffff; break; /* OUT immediate opcodes */ case 0xe6: case 0xe7: *exitinfo |= IOIO_TYPE_OUT; - *exitinfo |= (u8)insn->immediate.value << 16; + port = (u8)insn->immediate.value & 0xffff; break; /* IN register opcodes */ case 0xec: case 0xed: *exitinfo |= IOIO_TYPE_IN; - *exitinfo |= (ctxt->regs->dx & 0xffff) << 16; + port = ctxt->regs->dx & 0xffff; break; /* OUT register opcodes */ case 0xee: case 0xef: *exitinfo |= IOIO_TYPE_OUT; - *exitinfo |= (ctxt->regs->dx & 0xffff) << 16; + port = ctxt->regs->dx & 0xffff; break; default: return ES_DECODE_FAILED; } + *exitinfo |= port << 16; + switch (insn->opcode.bytes[0]) { case 0x6c: case 0x6e: @@ -713,12 +718,15 @@ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo) case 0xee: /* Single byte opcodes */ *exitinfo |= IOIO_DATA_8; + size = 1; break; default: /* Length determined by instruction parsing */ *exitinfo |= (insn->opnd_bytes == 2) ? IOIO_DATA_16 : IOIO_DATA_32; + size = (insn->opnd_bytes == 2) ? 2 : 4; } + switch (insn->addr_bytes) { case 2: *exitinfo |= IOIO_ADDR_16; @@ -734,7 +742,7 @@ static enum es_result vc_ioio_exitinfo(struct es_em_ctxt *ctxt, u64 *exitinfo) if (insn_has_rep_prefix(insn)) *exitinfo |= IOIO_REP; - return ES_OK; + return vc_ioio_check(ctxt, (u16)port, size); } static enum es_result vc_handle_ioio(struct ghcb *ghcb, struct es_em_ctxt *ctxt) diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index c2b7fc250c692..ae04fc58aa439 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -493,6 +493,33 @@ static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt return ES_OK; } +static enum es_result vc_ioio_check(struct es_em_ctxt *ctxt, u16 port, size_t size) +{ + BUG_ON(size > 4); + + if (user_mode(ctxt->regs)) { + struct thread_struct *t = ¤t->thread; + struct io_bitmap *iobm = t->io_bitmap; + size_t idx; + + if (!iobm) + goto fault; + + for (idx = port; idx < port + size; ++idx) { + if (test_bit(idx, iobm->bitmap)) + goto fault; + } + } + + return ES_OK; + +fault: + ctxt->fi.vector = X86_TRAP_GP; + ctxt->fi.error_code = 0; + + return ES_EXCEPTION; +} + /* Include code shared with pre-decompression boot stage */ #include "sev-shared.c" From 72de94dff9d27f7c1080a8be5fe59ff844df18ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Wcis=C5=82o?= Date: Mon, 6 Oct 2025 21:58:48 +0200 Subject: [PATCH 3/8] x86/sev: Check for user-space IOIO pointing to kernel space MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit jira VULN-6719 cve CVE-2023-46813 commit-author Joerg Roedel commit 63e44bc52047f182601e7817da969a105aa1f721 Check the memory operand of INS/OUTS before emulating the instruction. The #VC exception can get raised from user-space, but the memory operand can be manipulated to access kernel memory before the emulation actually begins and after the exception handler has run. [ bp: Massage commit message. ] Fixes: 597cfe48212a ("x86/boot/compressed/64: Setup a GHCB-based VC Exception handler") Reported-by: Tom Dohrmann Signed-off-by: Joerg Roedel Signed-off-by: Borislav Petkov (AMD) Cc: (cherry picked from commit 63e44bc52047f182601e7817da969a105aa1f721) Signed-off-by: Marcin Wcisło --- arch/x86/boot/compressed/sev.c | 5 +++++ arch/x86/kernel/sev-shared.c | 31 +++++++++++++++++++++++++++++-- 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index ee38e8ad98169..9a7cf3d599273 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -108,6 +108,11 @@ static enum es_result vc_ioio_check(struct es_em_ctxt *ctxt, u16 port, size_t si return ES_OK; } +static bool fault_in_kernel_space(unsigned long address) +{ + return false; +} + #undef __init #undef __pa #define __init diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c index ebb24c09ffd60..8abd18811238c 100644 --- a/arch/x86/kernel/sev-shared.c +++ b/arch/x86/kernel/sev-shared.c @@ -589,6 +589,23 @@ void __init do_vc_no_ghcb(struct pt_regs *regs, unsigned long exit_code) sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); } +static enum es_result vc_insn_string_check(struct es_em_ctxt *ctxt, + unsigned long address, + bool write) +{ + if (user_mode(ctxt->regs) && fault_in_kernel_space(address)) { + ctxt->fi.vector = X86_TRAP_PF; + ctxt->fi.error_code = X86_PF_USER; + ctxt->fi.cr2 = address; + if (write) + ctxt->fi.error_code |= X86_PF_WRITE; + + return ES_EXCEPTION; + } + + return ES_OK; +} + static enum es_result vc_insn_string_read(struct es_em_ctxt *ctxt, void *src, char *buf, unsigned int data_size, @@ -596,7 +613,12 @@ static enum es_result vc_insn_string_read(struct es_em_ctxt *ctxt, bool backwards) { int i, b = backwards ? -1 : 1; - enum es_result ret = ES_OK; + unsigned long address = (unsigned long)src; + enum es_result ret; + + ret = vc_insn_string_check(ctxt, address, false); + if (ret != ES_OK) + return ret; for (i = 0; i < count; i++) { void *s = src + (i * data_size * b); @@ -617,7 +639,12 @@ static enum es_result vc_insn_string_write(struct es_em_ctxt *ctxt, bool backwards) { int i, s = backwards ? -1 : 1; - enum es_result ret = ES_OK; + unsigned long address = (unsigned long)dst; + enum es_result ret; + + ret = vc_insn_string_check(ctxt, address, true); + if (ret != ES_OK) + return ret; for (i = 0; i < count; i++) { void *d = dst + (i * data_size * s); From d468641b9f5b86ea01d1e9ce6e58ed1387e9f653 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Wcis=C5=82o?= Date: Tue, 7 Oct 2025 23:10:59 +0200 Subject: [PATCH 4/8] x86/kasan: Map shadow for percpu pages on demand MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit jira VULN-8044 cve-pre CVE-2023-0597 commit-author Andrey Ryabinin commit 3f148f3318140035e87decc1214795ff0755757b KASAN maps shadow for the entire CPU-entry-area: [CPU_ENTRY_AREA_BASE, CPU_ENTRY_AREA_BASE + CPU_ENTRY_AREA_MAP_SIZE] This will explode once the per-cpu entry areas are randomized since it will increase CPU_ENTRY_AREA_MAP_SIZE to 512 GB and KASAN fails to allocate shadow for such big area. Fix this by allocating KASAN shadow only for really used cpu entry area addresses mapped by cea_map_percpu_pages() Thanks to the 0day folks for finding and reporting this to be an issue. [ dhansen: tweak changelog since this will get committed before peterz's actual cpu-entry-area randomization ] Signed-off-by: Andrey Ryabinin Signed-off-by: Dave Hansen Tested-by: Yujie Liu Cc: kernel test robot Link: https://lore.kernel.org/r/202210241508.2e203c3d-yujie.liu@intel.com (cherry picked from commit 3f148f3318140035e87decc1214795ff0755757b) Signed-off-by: Marcin Wcisło --- arch/x86/include/asm/kasan.h | 3 +++ arch/x86/mm/cpu_entry_area.c | 8 +++++++- arch/x86/mm/kasan_init_64.c | 15 ++++++++++++--- 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/kasan.h b/arch/x86/include/asm/kasan.h index 13e70da38beda..de75306b932ef 100644 --- a/arch/x86/include/asm/kasan.h +++ b/arch/x86/include/asm/kasan.h @@ -28,9 +28,12 @@ #ifdef CONFIG_KASAN void __init kasan_early_init(void); void __init kasan_init(void); +void __init kasan_populate_shadow_for_vaddr(void *va, size_t size, int nid); #else static inline void kasan_early_init(void) { } static inline void kasan_init(void) { } +static inline void kasan_populate_shadow_for_vaddr(void *va, size_t size, + int nid) { } #endif #endif diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index 6c2f1b76a0b61..d7081b1accca6 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -9,6 +9,7 @@ #include #include #include +#include static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage); @@ -53,8 +54,13 @@ void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags) static void __init cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot) { + phys_addr_t pa = per_cpu_ptr_to_phys(ptr); + + kasan_populate_shadow_for_vaddr(cea_vaddr, pages * PAGE_SIZE, + early_pfn_to_nid(PFN_DOWN(pa))); + for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE) - cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot); + cea_set_pte(cea_vaddr, pa, prot); } static void __init percpu_setup_debug_store(unsigned int cpu) diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index e7b9b464a82f1..d1416926ad526 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -316,6 +316,18 @@ void __init kasan_early_init(void) kasan_map_early_shadow(init_top_pgt); } +void __init kasan_populate_shadow_for_vaddr(void *va, size_t size, int nid) +{ + unsigned long shadow_start, shadow_end; + + shadow_start = (unsigned long)kasan_mem_to_shadow(va); + shadow_start = round_down(shadow_start, PAGE_SIZE); + shadow_end = (unsigned long)kasan_mem_to_shadow(va + size); + shadow_end = round_up(shadow_end, PAGE_SIZE); + + kasan_populate_shadow(shadow_start, shadow_end, nid); +} + void __init kasan_init(void) { int i; @@ -393,9 +405,6 @@ void __init kasan_init(void) kasan_mem_to_shadow((void *)VMALLOC_END + 1), shadow_cpu_entry_begin); - kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin, - (unsigned long)shadow_cpu_entry_end, 0); - kasan_populate_early_shadow(shadow_cpu_entry_end, kasan_mem_to_shadow((void *)__START_KERNEL_map)); From 198ff5558fe18554b70b3cc6f6e5d462760fdf1c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Wcis=C5=82o?= Date: Wed, 8 Oct 2025 00:46:36 +0200 Subject: [PATCH 5/8] x86/mm: Randomize per-cpu entry area MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit jira VULN-8044 cve CVE-2023-0597 commit-author Peter Zijlstra commit 97e3d26b5e5f371b3ee223d94dd123e6c442ba80 upstream-diff Included `linux/prandom.h' in `arch/x86/mm/cpu_entry_area.c' directly (compilation fails without it) Seth found that the CPU-entry-area; the piece of per-cpu data that is mapped into the userspace page-tables for kPTI is not subject to any randomization -- irrespective of kASLR settings. On x86_64 a whole P4D (512 GB) of virtual address space is reserved for this structure, which is plenty large enough to randomize things a little. As such, use a straight forward randomization scheme that avoids duplicates to spread the existing CPUs over the available space. [ bp: Fix le build. ] Reported-by: Seth Jenkins Reviewed-by: Kees Cook Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Dave Hansen Signed-off-by: Borislav Petkov (cherry picked from commit 97e3d26b5e5f371b3ee223d94dd123e6c442ba80) Signed-off-by: Marcin Wcisło --- arch/x86/include/asm/cpu_entry_area.h | 4 --- arch/x86/include/asm/pgtable_areas.h | 8 ++++- arch/x86/kernel/hw_breakpoint.c | 2 +- arch/x86/mm/cpu_entry_area.c | 47 ++++++++++++++++++++++++--- 4 files changed, 51 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h index dd5ea1bdf04c5..e2c04a5015b02 100644 --- a/arch/x86/include/asm/cpu_entry_area.h +++ b/arch/x86/include/asm/cpu_entry_area.h @@ -130,10 +130,6 @@ struct cpu_entry_area { }; #define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area)) -#define CPU_ENTRY_AREA_ARRAY_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS) - -/* Total size includes the readonly IDT mapping page as well: */ -#define CPU_ENTRY_AREA_TOTAL_SIZE (CPU_ENTRY_AREA_ARRAY_SIZE + PAGE_SIZE) DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area); DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks); diff --git a/arch/x86/include/asm/pgtable_areas.h b/arch/x86/include/asm/pgtable_areas.h index d34cce1b995cf..4f056fb88174b 100644 --- a/arch/x86/include/asm/pgtable_areas.h +++ b/arch/x86/include/asm/pgtable_areas.h @@ -11,6 +11,12 @@ #define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT) -#define CPU_ENTRY_AREA_MAP_SIZE (CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE) +#ifdef CONFIG_X86_32 +#define CPU_ENTRY_AREA_MAP_SIZE (CPU_ENTRY_AREA_PER_CPU + \ + (CPU_ENTRY_AREA_SIZE * NR_CPUS) - \ + CPU_ENTRY_AREA_BASE) +#else +#define CPU_ENTRY_AREA_MAP_SIZE P4D_SIZE +#endif #endif /* _ASM_X86_PGTABLE_AREAS_H */ diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 5b8d427a3d637..b01644c949b2a 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -266,7 +266,7 @@ static inline bool within_cpu_entry(unsigned long addr, unsigned long end) /* CPU entry erea is always used for CPU entry */ if (within_area(addr, end, CPU_ENTRY_AREA_BASE, - CPU_ENTRY_AREA_TOTAL_SIZE)) + CPU_ENTRY_AREA_MAP_SIZE)) return true; /* diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index d7081b1accca6..a88cadeb86ebb 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -16,16 +17,53 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage) #ifdef CONFIG_X86_64 static DEFINE_PER_CPU_PAGE_ALIGNED(struct exception_stacks, exception_stacks); DEFINE_PER_CPU(struct cea_exception_stacks*, cea_exception_stacks); -#endif -#ifdef CONFIG_X86_32 +static DEFINE_PER_CPU_READ_MOSTLY(unsigned long, _cea_offset); + +static __always_inline unsigned int cea_offset(unsigned int cpu) +{ + return per_cpu(_cea_offset, cpu); +} + +static __init void init_cea_offsets(void) +{ + unsigned int max_cea; + unsigned int i, j; + + max_cea = (CPU_ENTRY_AREA_MAP_SIZE - PAGE_SIZE) / CPU_ENTRY_AREA_SIZE; + + /* O(sodding terrible) */ + for_each_possible_cpu(i) { + unsigned int cea; + +again: + cea = prandom_u32_max(max_cea); + + for_each_possible_cpu(j) { + if (cea_offset(j) == cea) + goto again; + + if (i == j) + break; + } + + per_cpu(_cea_offset, i) = cea; + } +} +#else /* !X86_64 */ DECLARE_PER_CPU_PAGE_ALIGNED(struct doublefault_stack, doublefault_stack); + +static __always_inline unsigned int cea_offset(unsigned int cpu) +{ + return cpu; +} +static inline void init_cea_offsets(void) { } #endif /* Is called from entry code, so must be noinstr */ noinstr struct cpu_entry_area *get_cpu_entry_area(int cpu) { - unsigned long va = CPU_ENTRY_AREA_PER_CPU + cpu * CPU_ENTRY_AREA_SIZE; + unsigned long va = CPU_ENTRY_AREA_PER_CPU + cea_offset(cpu) * CPU_ENTRY_AREA_SIZE; BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0); return (struct cpu_entry_area *) va; @@ -211,7 +249,6 @@ static __init void setup_cpu_entry_area_ptes(void) /* The +1 is for the readonly IDT: */ BUILD_BUG_ON((CPU_ENTRY_AREA_PAGES+1)*PAGE_SIZE != CPU_ENTRY_AREA_MAP_SIZE); - BUILD_BUG_ON(CPU_ENTRY_AREA_TOTAL_SIZE != CPU_ENTRY_AREA_MAP_SIZE); BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK); start = CPU_ENTRY_AREA_BASE; @@ -227,6 +264,8 @@ void __init setup_cpu_entry_areas(void) { unsigned int cpu; + init_cea_offsets(); + setup_cpu_entry_area_ptes(); for_each_possible_cpu(cpu) From c051a401bdf5ffcd312acbe81843109d70a9b48b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Wcis=C5=82o?= Date: Tue, 7 Oct 2025 23:12:09 +0200 Subject: [PATCH 6/8] x86/mm: Recompute physical address for every page of per-CPU CEA mapping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit jira VULN-8044 cve-bf CVE-2023-0597 commit-author Sean Christopherson commit 80d72a8f76e8f3f0b5a70b8c7022578e17bde8e7 Recompute the physical address for each per-CPU page in the CPU entry area, a recent commit inadvertantly modified cea_map_percpu_pages() such that every PTE is mapped to the physical address of the first page. Fixes: 9fd429c28073 ("x86/kasan: Map shadow for percpu pages on demand") Signed-off-by: Sean Christopherson Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Andrey Ryabinin Link: https://lkml.kernel.org/r/20221110203504.1985010-2-seanjc@google.com (cherry picked from commit 80d72a8f76e8f3f0b5a70b8c7022578e17bde8e7) Signed-off-by: Marcin Wcisło --- arch/x86/mm/cpu_entry_area.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index a88cadeb86ebb..84c5a9f7e1420 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -98,7 +98,7 @@ cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot) early_pfn_to_nid(PFN_DOWN(pa))); for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE) - cea_set_pte(cea_vaddr, pa, prot); + cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot); } static void __init percpu_setup_debug_store(unsigned int cpu) From 7f0398a856022a1db725b62c6f09abfbc00a23b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Wcis=C5=82o?= Date: Tue, 7 Oct 2025 23:12:31 +0200 Subject: [PATCH 7/8] x86/mm: Populate KASAN shadow for entire per-CPU range of CPU entry area MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit jira VULN-8044 cve-bf CVE-2023-0597 commit-author Sean Christopherson commit 97650148a15e0b30099d6175ffe278b9f55ec66a Populate a KASAN shadow for the entire possible per-CPU range of the CPU entry area instead of requiring that each individual chunk map a shadow. Mapping shadows individually is error prone, e.g. the per-CPU GDT mapping was left behind, which can lead to not-present page faults during KASAN validation if the kernel performs a software lookup into the GDT. The DS buffer is also likely affected. The motivation for mapping the per-CPU areas on-demand was to avoid mapping the entire 512GiB range that's reserved for the CPU entry area, shaving a few bytes by not creating shadows for potentially unused memory was not a goal. The bug is most easily reproduced by doing a sigreturn with a garbage CS in the sigcontext, e.g. int main(void) { struct sigcontext regs; syscall(__NR_mmap, 0x1ffff000ul, 0x1000ul, 0ul, 0x32ul, -1, 0ul); syscall(__NR_mmap, 0x20000000ul, 0x1000000ul, 7ul, 0x32ul, -1, 0ul); syscall(__NR_mmap, 0x21000000ul, 0x1000ul, 0ul, 0x32ul, -1, 0ul); memset(®s, 0, sizeof(regs)); regs.cs = 0x1d0; syscall(__NR_rt_sigreturn); return 0; } to coerce the kernel into doing a GDT lookup to compute CS.base when reading the instruction bytes on the subsequent #GP to determine whether or not the #GP is something the kernel should handle, e.g. to fixup UMIP violations or to emulate CLI/STI for IOPL=3 applications. BUG: unable to handle page fault for address: fffffbc8379ace00 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 16c03a067 P4D 16c03a067 PUD 15b990067 PMD 15b98f067 PTE 0 Oops: 0000 [#1] PREEMPT SMP KASAN CPU: 3 PID: 851 Comm: r2 Not tainted 6.1.0-rc3-next-20221103+ #432 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015 RIP: 0010:kasan_check_range+0xdf/0x190 Call Trace: get_desc+0xb0/0x1d0 insn_get_seg_base+0x104/0x270 insn_fetch_from_user+0x66/0x80 fixup_umip_exception+0xb1/0x530 exc_general_protection+0x181/0x210 asm_exc_general_protection+0x22/0x30 RIP: 0003:0x0 Code: Unable to access opcode bytes at 0xffffffffffffffd6. RSP: 0003:0000000000000000 EFLAGS: 00000202 RAX: 0000000000000000 RBX: 0000000000000000 RCX: 00000000000001d0 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000000 RBP: 0000000000000000 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000 Fixes: 9fd429c28073 ("x86/kasan: Map shadow for percpu pages on demand") Reported-by: syzbot+ffb4f000dc2872c93f62@syzkaller.appspotmail.com Suggested-by: Andrey Ryabinin Signed-off-by: Sean Christopherson Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Andrey Ryabinin Link: https://lkml.kernel.org/r/20221110203504.1985010-3-seanjc@google.com (cherry picked from commit 97650148a15e0b30099d6175ffe278b9f55ec66a) Signed-off-by: Marcin Wcisło --- arch/x86/mm/cpu_entry_area.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index 84c5a9f7e1420..350868f181116 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -92,11 +92,6 @@ void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags) static void __init cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot) { - phys_addr_t pa = per_cpu_ptr_to_phys(ptr); - - kasan_populate_shadow_for_vaddr(cea_vaddr, pages * PAGE_SIZE, - early_pfn_to_nid(PFN_DOWN(pa))); - for ( ; pages; pages--, cea_vaddr+= PAGE_SIZE, ptr += PAGE_SIZE) cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot); } @@ -196,6 +191,9 @@ static void __init setup_cpu_entry_area(unsigned int cpu) pgprot_t tss_prot = PAGE_KERNEL; #endif + kasan_populate_shadow_for_vaddr(cea, CPU_ENTRY_AREA_SIZE, + early_cpu_to_node(cpu)); + cea_set_pte(&cea->gdt, get_cpu_gdt_paddr(cpu), gdt_prot); cea_map_percpu_pages(&cea->entry_stack_page, From c861f7ad11091c9b7dc7cfd3d426a27e621d0ee4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marcin=20Wcis=C5=82o?= Date: Tue, 7 Oct 2025 23:13:07 +0200 Subject: [PATCH 8/8] x86/mm: Do not shuffle CPU entry areas without KASLR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit jira VULN-8044 cve-bf CVE-2023-0597 commit-author Michal Koutný commit a3f547addcaa10df5a226526bc9e2d9a94542344 The commit 97e3d26b5e5f ("x86/mm: Randomize per-cpu entry area") fixed an omission of KASLR on CPU entry areas. It doesn't take into account KASLR switches though, which may result in unintended non-determinism when a user wants to avoid it (e.g. debugging, benchmarking). Generate only a single combination of CPU entry areas offsets -- the linear array that existed prior randomization when KASLR is turned off. Since we have 3f148f331814 ("x86/kasan: Map shadow for percpu pages on demand") and followups, we can use the more relaxed guard kasrl_enabled() (in contrast to kaslr_memory_enabled()). Fixes: 97e3d26b5e5f ("x86/mm: Randomize per-cpu entry area") Signed-off-by: Michal Koutný Signed-off-by: Dave Hansen Cc: stable@vger.kernel.org Link: https://lore.kernel.org/all/20230306193144.24605-1-mkoutny%40suse.com (cherry picked from commit a3f547addcaa10df5a226526bc9e2d9a94542344) Signed-off-by: Marcin Wcisło --- arch/x86/mm/cpu_entry_area.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index 350868f181116..4b7c9adc14ada 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -11,6 +11,7 @@ #include #include #include +#include static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage); @@ -30,6 +31,12 @@ static __init void init_cea_offsets(void) unsigned int max_cea; unsigned int i, j; + if (!kaslr_enabled()) { + for_each_possible_cpu(i) + per_cpu(_cea_offset, i) = i; + return; + } + max_cea = (CPU_ENTRY_AREA_MAP_SIZE - PAGE_SIZE) / CPU_ENTRY_AREA_SIZE; /* O(sodding terrible) */