Skip to content

Commit 634ac23

Browse files
KAGA-KOKObp3tk0v
authored andcommitted
x86/microcode: Handle "nosmt" correctly
On CPUs where microcode loading is not NMI-safe the SMT siblings which are parked in one of the play_dead() variants still react to NMIs. So if an NMI hits while the primary thread updates the microcode the resulting behaviour is undefined. The default play_dead() implementation on modern CPUs is using MWAIT which is not guaranteed to be safe against a microcode update which affects MWAIT. Take the cpus_booted_once_mask into account to detect this case and refuse to load late if the vendor specific driver does not advertise that late loading is NMI safe. AMD stated that this is safe, so mark the AMD driver accordingly. This requirement will be partially lifted in later changes. Signed-off-by: Thomas Gleixner <[email protected]> Signed-off-by: Borislav Petkov (AMD) <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent ba48aa3 commit 634ac23

File tree

4 files changed

+44
-31
lines changed

4 files changed

+44
-31
lines changed

arch/x86/Kconfig

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1320,7 +1320,7 @@ config MICROCODE_INITRD32
13201320
config MICROCODE_LATE_LOADING
13211321
bool "Late microcode loading (DANGEROUS)"
13221322
default n
1323-
depends on MICROCODE
1323+
depends on MICROCODE && SMP
13241324
help
13251325
Loading microcode late, when the system is up and executing instructions
13261326
is a tricky business and should be avoided if possible. Just the sequence

arch/x86/kernel/cpu/microcode/amd.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -917,10 +917,11 @@ static void microcode_fini_cpu_amd(int cpu)
917917
}
918918

919919
static struct microcode_ops microcode_amd_ops = {
920-
.request_microcode_fw = request_microcode_amd,
921-
.collect_cpu_info = collect_cpu_info_amd,
922-
.apply_microcode = apply_microcode_amd,
923-
.microcode_fini_cpu = microcode_fini_cpu_amd,
920+
.request_microcode_fw = request_microcode_amd,
921+
.collect_cpu_info = collect_cpu_info_amd,
922+
.apply_microcode = apply_microcode_amd,
923+
.microcode_fini_cpu = microcode_fini_cpu_amd,
924+
.nmi_safe = true,
924925
};
925926

926927
struct microcode_ops * __init init_amd_microcode(void)

arch/x86/kernel/cpu/microcode/core.c

Lines changed: 32 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -254,23 +254,6 @@ static struct platform_device *microcode_pdev;
254254
*/
255255
#define SPINUNIT 100 /* 100 nsec */
256256

257-
static int check_online_cpus(void)
258-
{
259-
unsigned int cpu;
260-
261-
/*
262-
* Make sure all CPUs are online. It's fine for SMT to be disabled if
263-
* all the primary threads are still online.
264-
*/
265-
for_each_present_cpu(cpu) {
266-
if (topology_is_primary_thread(cpu) && !cpu_online(cpu)) {
267-
pr_err("Not all CPUs online, aborting microcode update.\n");
268-
return -EINVAL;
269-
}
270-
}
271-
272-
return 0;
273-
}
274257

275258
static atomic_t late_cpus_in;
276259
static atomic_t late_cpus_out;
@@ -387,6 +370,35 @@ static int microcode_reload_late(void)
387370
return ret;
388371
}
389372

373+
/*
374+
* Ensure that all required CPUs which are present and have been booted
375+
* once are online.
376+
*
377+
* To pass this check, all primary threads must be online.
378+
*
379+
* If the microcode load is not safe against NMI then all SMT threads
380+
* must be online as well because they still react to NMIs when they are
381+
* soft-offlined and parked in one of the play_dead() variants. So if a
382+
* NMI hits while the primary thread updates the microcode the resulting
383+
* behaviour is undefined. The default play_dead() implementation on
384+
* modern CPUs uses MWAIT, which is also not guaranteed to be safe
385+
* against a microcode update which affects MWAIT.
386+
*/
387+
static bool ensure_cpus_are_online(void)
388+
{
389+
unsigned int cpu;
390+
391+
for_each_cpu_and(cpu, cpu_present_mask, &cpus_booted_once_mask) {
392+
if (!cpu_online(cpu)) {
393+
if (topology_is_primary_thread(cpu) || !microcode_ops->nmi_safe) {
394+
pr_err("CPU %u not online\n", cpu);
395+
return false;
396+
}
397+
}
398+
}
399+
return true;
400+
}
401+
390402
static ssize_t reload_store(struct device *dev,
391403
struct device_attribute *attr,
392404
const char *buf, size_t size)
@@ -402,9 +414,10 @@ static ssize_t reload_store(struct device *dev,
402414

403415
cpus_read_lock();
404416

405-
ret = check_online_cpus();
406-
if (ret)
417+
if (!ensure_cpus_are_online()) {
418+
ret = -EBUSY;
407419
goto put;
420+
}
408421

409422
tmp_ret = microcode_ops->request_microcode_fw(bsp, &microcode_pdev->dev);
410423
if (tmp_ret != UCODE_NEW)

arch/x86/kernel/cpu/microcode/internal.h

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,18 +20,17 @@ enum ucode_state {
2020

2121
struct microcode_ops {
2222
enum ucode_state (*request_microcode_fw)(int cpu, struct device *dev);
23-
2423
void (*microcode_fini_cpu)(int cpu);
2524

2625
/*
27-
* The generic 'microcode_core' part guarantees that
28-
* the callbacks below run on a target cpu when they
29-
* are being called.
26+
* The generic 'microcode_core' part guarantees that the callbacks
27+
* below run on a target CPU when they are being called.
3028
* See also the "Synchronization" section in microcode_core.c.
3129
*/
32-
enum ucode_state (*apply_microcode)(int cpu);
33-
int (*collect_cpu_info)(int cpu, struct cpu_signature *csig);
34-
void (*finalize_late_load)(int result);
30+
enum ucode_state (*apply_microcode)(int cpu);
31+
int (*collect_cpu_info)(int cpu, struct cpu_signature *csig);
32+
void (*finalize_late_load)(int result);
33+
unsigned int nmi_safe : 1;
3534
};
3635

3736
extern struct ucode_cpu_info ucode_cpu_info[];

0 commit comments

Comments
 (0)