|
1 | 1 | // SPDX-License-Identifier: GPL-2.0-or-later
|
2 | 2 | #include <linux/acpi.h>
|
3 | 3 | #include <linux/cpu.h>
|
| 4 | +#include <linux/delay.h> |
4 | 5 | #include <linux/io.h>
|
| 6 | +#include <linux/kexec.h> |
| 7 | +#include <linux/memblock.h> |
| 8 | +#include <linux/pgtable.h> |
| 9 | +#include <linux/sched/hotplug.h> |
5 | 10 | #include <asm/apic.h>
|
6 | 11 | #include <asm/barrier.h>
|
| 12 | +#include <asm/init.h> |
| 13 | +#include <asm/intel_pt.h> |
| 14 | +#include <asm/nmi.h> |
7 | 15 | #include <asm/processor.h>
|
| 16 | +#include <asm/reboot.h> |
8 | 17 |
|
9 | 18 | /* Physical address of the Multiprocessor Wakeup Structure mailbox */
|
10 | 19 | static u64 acpi_mp_wake_mailbox_paddr __ro_after_init;
|
11 | 20 |
|
12 | 21 | /* Virtual address of the Multiprocessor Wakeup Structure mailbox */
|
13 | 22 | static struct acpi_madt_multiproc_wakeup_mailbox *acpi_mp_wake_mailbox __ro_after_init;
|
14 | 23 |
|
| 24 | +static u64 acpi_mp_pgd __ro_after_init; |
| 25 | +static u64 acpi_mp_reset_vector_paddr __ro_after_init; |
| 26 | + |
| 27 | +static void acpi_mp_stop_this_cpu(void) |
| 28 | +{ |
| 29 | + asm_acpi_mp_play_dead(acpi_mp_reset_vector_paddr, acpi_mp_pgd); |
| 30 | +} |
| 31 | + |
| 32 | +static void acpi_mp_play_dead(void) |
| 33 | +{ |
| 34 | + play_dead_common(); |
| 35 | + asm_acpi_mp_play_dead(acpi_mp_reset_vector_paddr, acpi_mp_pgd); |
| 36 | +} |
| 37 | + |
| 38 | +static void acpi_mp_cpu_die(unsigned int cpu) |
| 39 | +{ |
| 40 | + u32 apicid = per_cpu(x86_cpu_to_apicid, cpu); |
| 41 | + unsigned long timeout; |
| 42 | + |
| 43 | + /* |
| 44 | + * Use TEST mailbox command to prove that BIOS got control over |
| 45 | + * the CPU before declaring it dead. |
| 46 | + * |
| 47 | + * BIOS has to clear 'command' field of the mailbox. |
| 48 | + */ |
| 49 | + acpi_mp_wake_mailbox->apic_id = apicid; |
| 50 | + smp_store_release(&acpi_mp_wake_mailbox->command, |
| 51 | + ACPI_MP_WAKE_COMMAND_TEST); |
| 52 | + |
| 53 | + /* Don't wait longer than a second. */ |
| 54 | + timeout = USEC_PER_SEC; |
| 55 | + while (READ_ONCE(acpi_mp_wake_mailbox->command) && --timeout) |
| 56 | + udelay(1); |
| 57 | + |
| 58 | + if (!timeout) |
| 59 | + pr_err("Failed to hand over CPU %d to BIOS\n", cpu); |
| 60 | +} |
| 61 | + |
| 62 | +/* The argument is required to match type of x86_mapping_info::alloc_pgt_page */ |
| 63 | +static void __init *alloc_pgt_page(void *dummy) |
| 64 | +{ |
| 65 | + return memblock_alloc(PAGE_SIZE, PAGE_SIZE); |
| 66 | +} |
| 67 | + |
| 68 | +static void __init free_pgt_page(void *pgt, void *dummy) |
| 69 | +{ |
| 70 | + return memblock_free(pgt, PAGE_SIZE); |
| 71 | +} |
| 72 | + |
| 73 | +/* |
| 74 | + * Make sure asm_acpi_mp_play_dead() is present in the identity mapping at |
| 75 | + * the same place as in the kernel page tables. asm_acpi_mp_play_dead() switches |
| 76 | + * to the identity mapping and the function has be present at the same spot in |
| 77 | + * the virtual address space before and after switching page tables. |
| 78 | + */ |
| 79 | +static int __init init_transition_pgtable(pgd_t *pgd) |
| 80 | +{ |
| 81 | + pgprot_t prot = PAGE_KERNEL_EXEC_NOENC; |
| 82 | + unsigned long vaddr, paddr; |
| 83 | + p4d_t *p4d; |
| 84 | + pud_t *pud; |
| 85 | + pmd_t *pmd; |
| 86 | + pte_t *pte; |
| 87 | + |
| 88 | + vaddr = (unsigned long)asm_acpi_mp_play_dead; |
| 89 | + pgd += pgd_index(vaddr); |
| 90 | + if (!pgd_present(*pgd)) { |
| 91 | + p4d = (p4d_t *)alloc_pgt_page(NULL); |
| 92 | + if (!p4d) |
| 93 | + return -ENOMEM; |
| 94 | + set_pgd(pgd, __pgd(__pa(p4d) | _KERNPG_TABLE)); |
| 95 | + } |
| 96 | + p4d = p4d_offset(pgd, vaddr); |
| 97 | + if (!p4d_present(*p4d)) { |
| 98 | + pud = (pud_t *)alloc_pgt_page(NULL); |
| 99 | + if (!pud) |
| 100 | + return -ENOMEM; |
| 101 | + set_p4d(p4d, __p4d(__pa(pud) | _KERNPG_TABLE)); |
| 102 | + } |
| 103 | + pud = pud_offset(p4d, vaddr); |
| 104 | + if (!pud_present(*pud)) { |
| 105 | + pmd = (pmd_t *)alloc_pgt_page(NULL); |
| 106 | + if (!pmd) |
| 107 | + return -ENOMEM; |
| 108 | + set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE)); |
| 109 | + } |
| 110 | + pmd = pmd_offset(pud, vaddr); |
| 111 | + if (!pmd_present(*pmd)) { |
| 112 | + pte = (pte_t *)alloc_pgt_page(NULL); |
| 113 | + if (!pte) |
| 114 | + return -ENOMEM; |
| 115 | + set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE)); |
| 116 | + } |
| 117 | + pte = pte_offset_kernel(pmd, vaddr); |
| 118 | + |
| 119 | + paddr = __pa(vaddr); |
| 120 | + set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, prot)); |
| 121 | + |
| 122 | + return 0; |
| 123 | +} |
| 124 | + |
| 125 | +static int __init acpi_mp_setup_reset(u64 reset_vector) |
| 126 | +{ |
| 127 | + struct x86_mapping_info info = { |
| 128 | + .alloc_pgt_page = alloc_pgt_page, |
| 129 | + .free_pgt_page = free_pgt_page, |
| 130 | + .page_flag = __PAGE_KERNEL_LARGE_EXEC, |
| 131 | + .kernpg_flag = _KERNPG_TABLE_NOENC, |
| 132 | + }; |
| 133 | + pgd_t *pgd; |
| 134 | + |
| 135 | + pgd = alloc_pgt_page(NULL); |
| 136 | + if (!pgd) |
| 137 | + return -ENOMEM; |
| 138 | + |
| 139 | + for (int i = 0; i < nr_pfn_mapped; i++) { |
| 140 | + unsigned long mstart, mend; |
| 141 | + |
| 142 | + mstart = pfn_mapped[i].start << PAGE_SHIFT; |
| 143 | + mend = pfn_mapped[i].end << PAGE_SHIFT; |
| 144 | + if (kernel_ident_mapping_init(&info, pgd, mstart, mend)) { |
| 145 | + kernel_ident_mapping_free(&info, pgd); |
| 146 | + return -ENOMEM; |
| 147 | + } |
| 148 | + } |
| 149 | + |
| 150 | + if (kernel_ident_mapping_init(&info, pgd, |
| 151 | + PAGE_ALIGN_DOWN(reset_vector), |
| 152 | + PAGE_ALIGN(reset_vector + 1))) { |
| 153 | + kernel_ident_mapping_free(&info, pgd); |
| 154 | + return -ENOMEM; |
| 155 | + } |
| 156 | + |
| 157 | + if (init_transition_pgtable(pgd)) { |
| 158 | + kernel_ident_mapping_free(&info, pgd); |
| 159 | + return -ENOMEM; |
| 160 | + } |
| 161 | + |
| 162 | + smp_ops.play_dead = acpi_mp_play_dead; |
| 163 | + smp_ops.stop_this_cpu = acpi_mp_stop_this_cpu; |
| 164 | + smp_ops.cpu_die = acpi_mp_cpu_die; |
| 165 | + |
| 166 | + acpi_mp_reset_vector_paddr = reset_vector; |
| 167 | + acpi_mp_pgd = __pa(pgd); |
| 168 | + |
| 169 | + return 0; |
| 170 | +} |
| 171 | + |
15 | 172 | static int acpi_wakeup_cpu(u32 apicid, unsigned long start_ip)
|
16 | 173 | {
|
17 | 174 | if (!acpi_mp_wake_mailbox_paddr) {
|
@@ -97,14 +254,37 @@ int __init acpi_parse_mp_wake(union acpi_subtable_headers *header,
|
97 | 254 | struct acpi_madt_multiproc_wakeup *mp_wake;
|
98 | 255 |
|
99 | 256 | mp_wake = (struct acpi_madt_multiproc_wakeup *)header;
|
100 |
| - if (BAD_MADT_ENTRY(mp_wake, end)) |
| 257 | + |
| 258 | + /* |
| 259 | + * Cannot use the standard BAD_MADT_ENTRY() to sanity check the @mp_wake |
| 260 | + * entry. 'sizeof (struct acpi_madt_multiproc_wakeup)' can be larger |
| 261 | + * than the actual size of the MP wakeup entry in ACPI table because the |
| 262 | + * 'reset_vector' is only available in the V1 MP wakeup structure. |
| 263 | + */ |
| 264 | + if (!mp_wake) |
| 265 | + return -EINVAL; |
| 266 | + if (end - (unsigned long)mp_wake < ACPI_MADT_MP_WAKEUP_SIZE_V0) |
| 267 | + return -EINVAL; |
| 268 | + if (mp_wake->header.length < ACPI_MADT_MP_WAKEUP_SIZE_V0) |
101 | 269 | return -EINVAL;
|
102 | 270 |
|
103 | 271 | acpi_table_print_madt_entry(&header->common);
|
104 | 272 |
|
105 | 273 | acpi_mp_wake_mailbox_paddr = mp_wake->mailbox_address;
|
106 | 274 |
|
107 |
| - acpi_mp_disable_offlining(mp_wake); |
| 275 | + if (mp_wake->version >= ACPI_MADT_MP_WAKEUP_VERSION_V1 && |
| 276 | + mp_wake->header.length >= ACPI_MADT_MP_WAKEUP_SIZE_V1) { |
| 277 | + if (acpi_mp_setup_reset(mp_wake->reset_vector)) { |
| 278 | + pr_warn("Failed to setup MADT reset vector\n"); |
| 279 | + acpi_mp_disable_offlining(mp_wake); |
| 280 | + } |
| 281 | + } else { |
| 282 | + /* |
| 283 | + * CPU offlining requires version 1 of the ACPI MADT wakeup |
| 284 | + * structure. |
| 285 | + */ |
| 286 | + acpi_mp_disable_offlining(mp_wake); |
| 287 | + } |
108 | 288 |
|
109 | 289 | apic_update_callback(wakeup_secondary_cpu_64, acpi_wakeup_cpu);
|
110 | 290 |
|
|
0 commit comments