Skip to content

Commit 4b75395

Browse files
KAGA-KOKObp3tk0v
authored andcommitted
x86/microcode: Add per CPU result state
The microcode rendezvous is purely acting on global state, which does not allow to analyze fails in a coherent way. Introduce per CPU state where the results are written into, which allows to analyze the return codes of the individual CPUs. Initialize the state when walking the cpu_present_mask in the online check to avoid another for_each_cpu() loop. Enhance the result print out with that. The structure is intentionally named ucode_ctrl as it will gain control fields in subsequent changes. Signed-off-by: Thomas Gleixner <[email protected]> Signed-off-by: Borislav Petkov (AMD) <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 0772b9a commit 4b75395

File tree

2 files changed

+68
-47
lines changed

2 files changed

+68
-47
lines changed

arch/x86/kernel/cpu/microcode/core.c

Lines changed: 67 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -252,6 +252,11 @@ static struct platform_device *microcode_pdev;
252252
* requirement can be relaxed in the future. Right now, this is conservative
253253
* and good.
254254
*/
255+
struct microcode_ctrl {
256+
enum ucode_state result;
257+
};
258+
259+
static DEFINE_PER_CPU(struct microcode_ctrl, ucode_ctrl);
255260
static atomic_t late_cpus_in, late_cpus_out;
256261

257262
static bool wait_for_cpus(atomic_t *cnt)
@@ -274,23 +279,19 @@ static bool wait_for_cpus(atomic_t *cnt)
274279
return false;
275280
}
276281

277-
/*
278-
* Returns:
279-
* < 0 - on error
280-
* 0 - success (no update done or microcode was updated)
281-
*/
282-
static int __reload_late(void *info)
282+
static int load_cpus_stopped(void *unused)
283283
{
284284
int cpu = smp_processor_id();
285-
enum ucode_state err;
286-
int ret = 0;
285+
enum ucode_state ret;
287286

288287
/*
289288
* Wait for all CPUs to arrive. A load will not be attempted unless all
290289
* CPUs show up.
291290
* */
292-
if (!wait_for_cpus(&late_cpus_in))
293-
return -1;
291+
if (!wait_for_cpus(&late_cpus_in)) {
292+
this_cpu_write(ucode_ctrl.result, UCODE_TIMEOUT);
293+
return 0;
294+
}
294295

295296
/*
296297
* On an SMT system, it suffices to load the microcode on one sibling of
@@ -299,17 +300,11 @@ static int __reload_late(void *info)
299300
* loading attempts happen on multiple threads of an SMT core. See
300301
* below.
301302
*/
302-
if (cpumask_first(topology_sibling_cpumask(cpu)) == cpu)
303-
err = microcode_ops->apply_microcode(cpu);
304-
else
303+
if (cpumask_first(topology_sibling_cpumask(cpu)) != cpu)
305304
goto wait_for_siblings;
306305

307-
if (err >= UCODE_NFOUND) {
308-
if (err == UCODE_ERROR) {
309-
pr_warn("Error reloading microcode on CPU %d\n", cpu);
310-
ret = -1;
311-
}
312-
}
306+
ret = microcode_ops->apply_microcode(cpu);
307+
this_cpu_write(ucode_ctrl.result, ret);
313308

314309
wait_for_siblings:
315310
if (!wait_for_cpus(&late_cpus_out))
@@ -321,19 +316,18 @@ static int __reload_late(void *info)
321316
* per-cpu cpuinfo can be updated with right microcode
322317
* revision.
323318
*/
324-
if (cpumask_first(topology_sibling_cpumask(cpu)) != cpu)
325-
err = microcode_ops->apply_microcode(cpu);
319+
if (cpumask_first(topology_sibling_cpumask(cpu)) == cpu)
320+
return 0;
326321

327-
return ret;
322+
ret = microcode_ops->apply_microcode(cpu);
323+
this_cpu_write(ucode_ctrl.result, ret);
324+
return 0;
328325
}
329326

330-
/*
331-
* Reload microcode late on all CPUs. Wait for a sec until they
332-
* all gather together.
333-
*/
334-
static int microcode_reload_late(void)
327+
static int load_late_stop_cpus(void)
335328
{
336-
int old = boot_cpu_data.microcode, ret;
329+
unsigned int cpu, updated = 0, failed = 0, timedout = 0, siblings = 0;
330+
int old_rev = boot_cpu_data.microcode;
337331
struct cpuinfo_x86 prev_info;
338332

339333
pr_err("Attempting late microcode loading - it is dangerous and taints the kernel.\n");
@@ -348,26 +342,47 @@ static int microcode_reload_late(void)
348342
*/
349343
store_cpu_caps(&prev_info);
350344

351-
ret = stop_machine_cpuslocked(__reload_late, NULL, cpu_online_mask);
345+
stop_machine_cpuslocked(load_cpus_stopped, NULL, cpu_online_mask);
346+
347+
/* Analyze the results */
348+
for_each_cpu_and(cpu, cpu_present_mask, &cpus_booted_once_mask) {
349+
switch (per_cpu(ucode_ctrl.result, cpu)) {
350+
case UCODE_UPDATED: updated++; break;
351+
case UCODE_TIMEOUT: timedout++; break;
352+
case UCODE_OK: siblings++; break;
353+
default: failed++; break;
354+
}
355+
}
352356

353357
if (microcode_ops->finalize_late_load)
354-
microcode_ops->finalize_late_load(ret);
355-
356-
if (!ret) {
357-
pr_info("Reload succeeded, microcode revision: 0x%x -> 0x%x\n",
358-
old, boot_cpu_data.microcode);
359-
microcode_check(&prev_info);
360-
add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
361-
} else {
362-
pr_info("Reload failed, current microcode revision: 0x%x\n",
363-
boot_cpu_data.microcode);
358+
microcode_ops->finalize_late_load(!updated);
359+
360+
if (!updated) {
361+
/* Nothing changed. */
362+
if (!failed && !timedout)
363+
return 0;
364+
pr_err("update failed: %u CPUs failed %u CPUs timed out\n",
365+
failed, timedout);
366+
return -EIO;
367+
}
368+
369+
add_taint(TAINT_CPU_OUT_OF_SPEC, LOCKDEP_STILL_OK);
370+
pr_info("load: updated on %u primary CPUs with %u siblings\n", updated, siblings);
371+
if (failed || timedout) {
372+
pr_err("load incomplete. %u CPUs timed out or failed\n",
373+
num_online_cpus() - (updated + siblings));
364374
}
365-
return ret;
375+
pr_info("revision: 0x%x -> 0x%x\n", old_rev, boot_cpu_data.microcode);
376+
microcode_check(&prev_info);
377+
378+
return updated + siblings == num_online_cpus() ? 0 : -EIO;
366379
}
367380

368381
/*
369-
* Ensure that all required CPUs which are present and have been booted
370-
* once are online.
382+
* This function does two things:
383+
*
384+
* 1) Ensure that all required CPUs which are present and have been booted
385+
* once are online.
371386
*
372387
* To pass this check, all primary threads must be online.
373388
*
@@ -378,9 +393,12 @@ static int microcode_reload_late(void)
378393
* behaviour is undefined. The default play_dead() implementation on
379394
* modern CPUs uses MWAIT, which is also not guaranteed to be safe
380395
* against a microcode update which affects MWAIT.
396+
*
397+
* 2) Initialize the per CPU control structure
381398
*/
382-
static bool ensure_cpus_are_online(void)
399+
static bool setup_cpus(void)
383400
{
401+
struct microcode_ctrl ctrl = { .result = -1, };
384402
unsigned int cpu;
385403

386404
for_each_cpu_and(cpu, cpu_present_mask, &cpus_booted_once_mask) {
@@ -390,18 +408,20 @@ static bool ensure_cpus_are_online(void)
390408
return false;
391409
}
392410
}
411+
/* Initialize the per CPU state */
412+
per_cpu(ucode_ctrl, cpu) = ctrl;
393413
}
394414
return true;
395415
}
396416

397-
static int ucode_load_late_locked(void)
417+
static int load_late_locked(void)
398418
{
399-
if (!ensure_cpus_are_online())
419+
if (!setup_cpus())
400420
return -EBUSY;
401421

402422
switch (microcode_ops->request_microcode_fw(0, &microcode_pdev->dev)) {
403423
case UCODE_NEW:
404-
return microcode_reload_late();
424+
return load_late_stop_cpus();
405425
case UCODE_NFOUND:
406426
return -ENOENT;
407427
default:
@@ -421,7 +441,7 @@ static ssize_t reload_store(struct device *dev,
421441
return -EINVAL;
422442

423443
cpus_read_lock();
424-
ret = ucode_load_late_locked();
444+
ret = load_late_locked();
425445
cpus_read_unlock();
426446

427447
return ret ? : size;

arch/x86/kernel/cpu/microcode/internal.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ enum ucode_state {
1616
UCODE_UPDATED,
1717
UCODE_NFOUND,
1818
UCODE_ERROR,
19+
UCODE_TIMEOUT,
1920
};
2021

2122
struct microcode_ops {

0 commit comments

Comments
 (0)