Skip to content

Commit 1ceebe2

Browse files
kirylbp3tk0v
authored andcommitted
x86/acpi: Add support for CPU offlining for ACPI MADT wakeup method
MADT Multiprocessor Wakeup structure version 1 brings support for CPU offlining: BIOS provides a reset vector where the CPU has to jump to for offlining itself. The new TEST mailbox command can be used to test whether the CPU offlined itself which means the BIOS has control over the CPU and can online it again via the ACPI MADT wakeup method. Add CPU offlining support for the ACPI MADT wakeup method by implementing custom cpu_die(), play_dead() and stop_this_cpu() SMP operations. CPU offlining makes it possible to hand over secondary CPUs over kexec, not limiting the second kernel to a single CPU. The change conforms to the approved ACPI spec change proposal. See the Link. Signed-off-by: Kirill A. Shutemov <[email protected]> Signed-off-by: Borislav Petkov (AMD) <[email protected]> Reviewed-by: Kuppuswamy Sathyanarayanan <[email protected]> Reviewed-by: Thomas Gleixner <[email protected]> Acked-by: Kai Huang <[email protected]> Acked-by: Rafael J. Wysocki <[email protected]> Tested-by: Tao Liu <[email protected]> Link: https://lore.kernel.org/all/13356251.uLZWGnKmhe@kreacher Link: https://lore.kernel.org/r/[email protected]
1 parent d88e7b3 commit 1ceebe2

File tree

5 files changed

+227
-4
lines changed

5 files changed

+227
-4
lines changed

arch/x86/include/asm/acpi.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,8 @@ union acpi_subtable_headers;
8383
int __init acpi_parse_mp_wake(union acpi_subtable_headers *header,
8484
const unsigned long end);
8585

86+
void asm_acpi_mp_play_dead(u64 reset_vector, u64 pgd_pa);
87+
8688
/*
8789
* Check if the CPU can handle C2 and deeper
8890
*/

arch/x86/kernel/acpi/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ obj-$(CONFIG_ACPI) += boot.o
44
obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup_$(BITS).o
55
obj-$(CONFIG_ACPI_APEI) += apei.o
66
obj-$(CONFIG_ACPI_CPPC_LIB) += cppc.o
7-
obj-$(CONFIG_ACPI_MADT_WAKEUP) += madt_wakeup.o
7+
obj-$(CONFIG_ACPI_MADT_WAKEUP) += madt_wakeup.o madt_playdead.o
88

99
ifneq ($(CONFIG_ACPI_PROCESSOR),)
1010
obj-y += cstate.o

arch/x86/kernel/acpi/madt_playdead.S

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
#include <linux/linkage.h>
3+
#include <asm/nospec-branch.h>
4+
#include <asm/page_types.h>
5+
#include <asm/processor-flags.h>
6+
7+
.text
8+
.align PAGE_SIZE
9+
10+
/*
11+
* asm_acpi_mp_play_dead() - Hand over control of the CPU to the BIOS
12+
*
13+
* rdi: Address of the ACPI MADT MPWK ResetVector
14+
* rsi: PGD of the identity mapping
15+
*/
16+
SYM_FUNC_START(asm_acpi_mp_play_dead)
17+
/* Turn off global entries. Following CR3 write will flush them. */
18+
movq %cr4, %rdx
19+
andq $~(X86_CR4_PGE), %rdx
20+
movq %rdx, %cr4
21+
22+
/* Switch to identity mapping */
23+
movq %rsi, %cr3
24+
25+
/* Jump to reset vector */
26+
ANNOTATE_RETPOLINE_SAFE
27+
jmp *%rdi
28+
SYM_FUNC_END(asm_acpi_mp_play_dead)

arch/x86/kernel/acpi/madt_wakeup.c

Lines changed: 182 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,174 @@
11
// SPDX-License-Identifier: GPL-2.0-or-later
22
#include <linux/acpi.h>
33
#include <linux/cpu.h>
4+
#include <linux/delay.h>
45
#include <linux/io.h>
6+
#include <linux/kexec.h>
7+
#include <linux/memblock.h>
8+
#include <linux/pgtable.h>
9+
#include <linux/sched/hotplug.h>
510
#include <asm/apic.h>
611
#include <asm/barrier.h>
12+
#include <asm/init.h>
13+
#include <asm/intel_pt.h>
14+
#include <asm/nmi.h>
715
#include <asm/processor.h>
16+
#include <asm/reboot.h>
817

918
/* Physical address of the Multiprocessor Wakeup Structure mailbox */
1019
static u64 acpi_mp_wake_mailbox_paddr __ro_after_init;
1120

1221
/* Virtual address of the Multiprocessor Wakeup Structure mailbox */
1322
static struct acpi_madt_multiproc_wakeup_mailbox *acpi_mp_wake_mailbox __ro_after_init;
1423

24+
static u64 acpi_mp_pgd __ro_after_init;
25+
static u64 acpi_mp_reset_vector_paddr __ro_after_init;
26+
27+
static void acpi_mp_stop_this_cpu(void)
28+
{
29+
asm_acpi_mp_play_dead(acpi_mp_reset_vector_paddr, acpi_mp_pgd);
30+
}
31+
32+
static void acpi_mp_play_dead(void)
33+
{
34+
play_dead_common();
35+
asm_acpi_mp_play_dead(acpi_mp_reset_vector_paddr, acpi_mp_pgd);
36+
}
37+
38+
static void acpi_mp_cpu_die(unsigned int cpu)
39+
{
40+
u32 apicid = per_cpu(x86_cpu_to_apicid, cpu);
41+
unsigned long timeout;
42+
43+
/*
44+
* Use TEST mailbox command to prove that BIOS got control over
45+
* the CPU before declaring it dead.
46+
*
47+
* BIOS has to clear 'command' field of the mailbox.
48+
*/
49+
acpi_mp_wake_mailbox->apic_id = apicid;
50+
smp_store_release(&acpi_mp_wake_mailbox->command,
51+
ACPI_MP_WAKE_COMMAND_TEST);
52+
53+
/* Don't wait longer than a second. */
54+
timeout = USEC_PER_SEC;
55+
while (READ_ONCE(acpi_mp_wake_mailbox->command) && --timeout)
56+
udelay(1);
57+
58+
if (!timeout)
59+
pr_err("Failed to hand over CPU %d to BIOS\n", cpu);
60+
}
61+
62+
/* The argument is required to match type of x86_mapping_info::alloc_pgt_page */
63+
static void __init *alloc_pgt_page(void *dummy)
64+
{
65+
return memblock_alloc(PAGE_SIZE, PAGE_SIZE);
66+
}
67+
68+
static void __init free_pgt_page(void *pgt, void *dummy)
69+
{
70+
return memblock_free(pgt, PAGE_SIZE);
71+
}
72+
73+
/*
74+
* Make sure asm_acpi_mp_play_dead() is present in the identity mapping at
75+
* the same place as in the kernel page tables. asm_acpi_mp_play_dead() switches
76+
* to the identity mapping and the function has be present at the same spot in
77+
* the virtual address space before and after switching page tables.
78+
*/
79+
static int __init init_transition_pgtable(pgd_t *pgd)
80+
{
81+
pgprot_t prot = PAGE_KERNEL_EXEC_NOENC;
82+
unsigned long vaddr, paddr;
83+
p4d_t *p4d;
84+
pud_t *pud;
85+
pmd_t *pmd;
86+
pte_t *pte;
87+
88+
vaddr = (unsigned long)asm_acpi_mp_play_dead;
89+
pgd += pgd_index(vaddr);
90+
if (!pgd_present(*pgd)) {
91+
p4d = (p4d_t *)alloc_pgt_page(NULL);
92+
if (!p4d)
93+
return -ENOMEM;
94+
set_pgd(pgd, __pgd(__pa(p4d) | _KERNPG_TABLE));
95+
}
96+
p4d = p4d_offset(pgd, vaddr);
97+
if (!p4d_present(*p4d)) {
98+
pud = (pud_t *)alloc_pgt_page(NULL);
99+
if (!pud)
100+
return -ENOMEM;
101+
set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE));
102+
}
103+
pud = pud_offset(p4d, vaddr);
104+
if (!pud_present(*pud)) {
105+
pmd = (pmd_t *)alloc_pgt_page(NULL);
106+
if (!pmd)
107+
return -ENOMEM;
108+
set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
109+
}
110+
pmd = pmd_offset(pud, vaddr);
111+
if (!pmd_present(*pmd)) {
112+
pte = (pte_t *)alloc_pgt_page(NULL);
113+
if (!pte)
114+
return -ENOMEM;
115+
set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE));
116+
}
117+
pte = pte_offset_kernel(pmd, vaddr);
118+
119+
paddr = __pa(vaddr);
120+
set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, prot));
121+
122+
return 0;
123+
}
124+
125+
static int __init acpi_mp_setup_reset(u64 reset_vector)
126+
{
127+
struct x86_mapping_info info = {
128+
.alloc_pgt_page = alloc_pgt_page,
129+
.free_pgt_page = free_pgt_page,
130+
.page_flag = __PAGE_KERNEL_LARGE_EXEC,
131+
.kernpg_flag = _KERNPG_TABLE_NOENC,
132+
};
133+
pgd_t *pgd;
134+
135+
pgd = alloc_pgt_page(NULL);
136+
if (!pgd)
137+
return -ENOMEM;
138+
139+
for (int i = 0; i < nr_pfn_mapped; i++) {
140+
unsigned long mstart, mend;
141+
142+
mstart = pfn_mapped[i].start << PAGE_SHIFT;
143+
mend = pfn_mapped[i].end << PAGE_SHIFT;
144+
if (kernel_ident_mapping_init(&info, pgd, mstart, mend)) {
145+
kernel_ident_mapping_free(&info, pgd);
146+
return -ENOMEM;
147+
}
148+
}
149+
150+
if (kernel_ident_mapping_init(&info, pgd,
151+
PAGE_ALIGN_DOWN(reset_vector),
152+
PAGE_ALIGN(reset_vector + 1))) {
153+
kernel_ident_mapping_free(&info, pgd);
154+
return -ENOMEM;
155+
}
156+
157+
if (init_transition_pgtable(pgd)) {
158+
kernel_ident_mapping_free(&info, pgd);
159+
return -ENOMEM;
160+
}
161+
162+
smp_ops.play_dead = acpi_mp_play_dead;
163+
smp_ops.stop_this_cpu = acpi_mp_stop_this_cpu;
164+
smp_ops.cpu_die = acpi_mp_cpu_die;
165+
166+
acpi_mp_reset_vector_paddr = reset_vector;
167+
acpi_mp_pgd = __pa(pgd);
168+
169+
return 0;
170+
}
171+
15172
static int acpi_wakeup_cpu(u32 apicid, unsigned long start_ip)
16173
{
17174
if (!acpi_mp_wake_mailbox_paddr) {
@@ -97,14 +254,37 @@ int __init acpi_parse_mp_wake(union acpi_subtable_headers *header,
97254
struct acpi_madt_multiproc_wakeup *mp_wake;
98255

99256
mp_wake = (struct acpi_madt_multiproc_wakeup *)header;
100-
if (BAD_MADT_ENTRY(mp_wake, end))
257+
258+
/*
259+
* Cannot use the standard BAD_MADT_ENTRY() to sanity check the @mp_wake
260+
* entry. 'sizeof (struct acpi_madt_multiproc_wakeup)' can be larger
261+
* than the actual size of the MP wakeup entry in ACPI table because the
262+
* 'reset_vector' is only available in the V1 MP wakeup structure.
263+
*/
264+
if (!mp_wake)
265+
return -EINVAL;
266+
if (end - (unsigned long)mp_wake < ACPI_MADT_MP_WAKEUP_SIZE_V0)
267+
return -EINVAL;
268+
if (mp_wake->header.length < ACPI_MADT_MP_WAKEUP_SIZE_V0)
101269
return -EINVAL;
102270

103271
acpi_table_print_madt_entry(&header->common);
104272

105273
acpi_mp_wake_mailbox_paddr = mp_wake->mailbox_address;
106274

107-
acpi_mp_disable_offlining(mp_wake);
275+
if (mp_wake->version >= ACPI_MADT_MP_WAKEUP_VERSION_V1 &&
276+
mp_wake->header.length >= ACPI_MADT_MP_WAKEUP_SIZE_V1) {
277+
if (acpi_mp_setup_reset(mp_wake->reset_vector)) {
278+
pr_warn("Failed to setup MADT reset vector\n");
279+
acpi_mp_disable_offlining(mp_wake);
280+
}
281+
} else {
282+
/*
283+
* CPU offlining requires version 1 of the ACPI MADT wakeup
284+
* structure.
285+
*/
286+
acpi_mp_disable_offlining(mp_wake);
287+
}
108288

109289
apic_update_callback(wakeup_secondary_cpu_64, acpi_wakeup_cpu);
110290

include/acpi/actbl2.h

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1197,8 +1197,20 @@ struct acpi_madt_multiproc_wakeup {
11971197
u16 version;
11981198
u32 reserved; /* reserved - must be zero */
11991199
u64 mailbox_address;
1200+
u64 reset_vector;
12001201
};
12011202

1203+
/* Values for Version field above */
1204+
1205+
enum acpi_madt_multiproc_wakeup_version {
1206+
ACPI_MADT_MP_WAKEUP_VERSION_NONE = 0,
1207+
ACPI_MADT_MP_WAKEUP_VERSION_V1 = 1,
1208+
ACPI_MADT_MP_WAKEUP_VERSION_RESERVED = 2, /* 2 and greater are reserved */
1209+
};
1210+
1211+
#define ACPI_MADT_MP_WAKEUP_SIZE_V0 16
1212+
#define ACPI_MADT_MP_WAKEUP_SIZE_V1 24
1213+
12021214
#define ACPI_MULTIPROC_WAKEUP_MB_OS_SIZE 2032
12031215
#define ACPI_MULTIPROC_WAKEUP_MB_FIRMWARE_SIZE 2048
12041216

@@ -1211,7 +1223,8 @@ struct acpi_madt_multiproc_wakeup_mailbox {
12111223
u8 reserved_firmware[ACPI_MULTIPROC_WAKEUP_MB_FIRMWARE_SIZE]; /* reserved for firmware use */
12121224
};
12131225

1214-
#define ACPI_MP_WAKE_COMMAND_WAKEUP 1
1226+
#define ACPI_MP_WAKE_COMMAND_WAKEUP 1
1227+
#define ACPI_MP_WAKE_COMMAND_TEST 2
12151228

12161229
/* 17: CPU Core Interrupt Controller (ACPI 6.5) */
12171230

0 commit comments

Comments
 (0)