Skip to content

Commit 69f5f98

Browse files
suryasaimadhuKAGA-KOKO
authored andcommitted
x86/microcode/AMD: Remove AP scanning optimization
The idea was to not scan the microcode blob on each AP (Application Processor) during boot and thus save us some milliseconds. However, on architectures where the microcode engine is shared between threads, this doesn't work. Here's why: The microcode on CPU0, i.e., the first thread, gets updated. The second thread, i.e., CPU1, i.e., the first AP walks into load_ucode_amd_ap(), sees that there's no container cached and goes and scans for the proper blob. It finds it and as a last step of apply_microcode_early_amd(), it tries to apply the patch but that core has already the updated microcode revision which it has received through CPU0's update. So it returns false and we do desc->size = -1 to prevent other APs from scanning. However, the next AP, CPU2, has a different microcode engine which hasn't been updated yet. The desc->size == -1 test prevents it from scanning the blob anew and we fail to update it. The fix is much more straight-forward than it looks: the BSP (BootStrapping Processor), i.e., CPU0, caches the microcode patch in amd_ucode_patch. We use that on the AP and try to apply it. In the 99.9999% of cases where we have homogeneous cores - *not* mixed-steppings - the application will be successful and we're good to go. In the remaining small set of systems, we will simply rescan the blob and find (or not, if none present) the proper patch and apply it then. Signed-off-by: Borislav Petkov <[email protected]> Reviewed-by: Thomas Gleixner <[email protected]> Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Thomas Gleixner <[email protected]>
1 parent 72edfe9 commit 69f5f98

File tree

1 file changed

+18
-60
lines changed
  • arch/x86/kernel/cpu/microcode

1 file changed

+18
-60
lines changed

arch/x86/kernel/cpu/microcode/amd.c

Lines changed: 18 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -45,14 +45,14 @@ static struct equiv_cpu_entry *equiv_cpu_table;
4545
* save from the initrd/builtin before jettisoning its contents. @mc is the
4646
* microcode patch we found to match.
4747
*/
48-
static struct cont_desc {
48+
struct cont_desc {
4949
struct microcode_amd *mc;
5050
u32 cpuid_1_eax;
5151
u32 psize;
5252
u16 eq_id;
5353
u8 *data;
5454
size_t size;
55-
} cont;
55+
};
5656

5757
static u32 ucode_new_rev;
5858
static u8 amd_ucode_patch[PATCH_MAX_SIZE];
@@ -201,8 +201,7 @@ static int __apply_microcode_amd(struct microcode_amd *mc)
201201
* Returns true if container found (sets @desc), false otherwise.
202202
*/
203203
static bool
204-
apply_microcode_early_amd(u32 cpuid_1_eax, void *ucode, size_t size,
205-
bool save_patch, struct cont_desc *ret_desc)
204+
apply_microcode_early_amd(u32 cpuid_1_eax, void *ucode, size_t size, bool save_patch)
206205
{
207206
struct cont_desc desc = { 0 };
208207
u8 (*patch)[PATCH_MAX_SIZE];
@@ -240,9 +239,6 @@ apply_microcode_early_amd(u32 cpuid_1_eax, void *ucode, size_t size,
240239
memcpy(patch, mc, min_t(u32, desc.psize, PATCH_MAX_SIZE));
241240
}
242241

243-
if (ret_desc)
244-
*ret_desc = desc;
245-
246242
return ret;
247243
}
248244

@@ -292,79 +288,41 @@ void __init load_ucode_amd_bsp(unsigned int cpuid_1_eax)
292288
struct cpio_data cp = { };
293289

294290
__load_ucode_amd(cpuid_1_eax, &cp);
295-
296291
if (!(cp.data && cp.size))
297292
return;
298293

299-
apply_microcode_early_amd(cpuid_1_eax, cp.data, cp.size, true, NULL);
294+
apply_microcode_early_amd(cpuid_1_eax, cp.data, cp.size, true);
300295
}
301296

302297
void load_ucode_amd_ap(unsigned int cpuid_1_eax)
303298
{
304-
struct equiv_cpu_entry *eq;
305299
struct microcode_amd *mc;
306-
struct cont_desc *desc;
307-
u16 eq_id;
300+
struct cpio_data cp;
301+
u32 *new_rev, rev, dummy;
308302

309303
if (IS_ENABLED(CONFIG_X86_32)) {
310-
mc = (struct microcode_amd *)__pa_nodebug(amd_ucode_patch);
311-
desc = (struct cont_desc *)__pa_nodebug(&cont);
304+
mc = (struct microcode_amd *)__pa_nodebug(amd_ucode_patch);
305+
new_rev = (u32 *)__pa_nodebug(&ucode_new_rev);
312306
} else {
313-
mc = (struct microcode_amd *)amd_ucode_patch;
314-
desc = &cont;
307+
mc = (struct microcode_amd *)amd_ucode_patch;
308+
new_rev = &ucode_new_rev;
315309
}
316310

317-
/* First AP hasn't cached it yet, go through the blob. */
318-
if (!desc->data) {
319-
struct cpio_data cp = { };
320-
321-
if (desc->size == -1)
322-
return;
323-
324-
reget:
325-
__load_ucode_amd(cpuid_1_eax, &cp);
326-
if (!(cp.data && cp.size)) {
327-
/*
328-
* Mark it so that other APs do not scan again for no
329-
* real reason and slow down boot needlessly.
330-
*/
331-
desc->size = -1;
332-
return;
333-
}
311+
native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
334312

335-
if (!apply_microcode_early_amd(cpuid_1_eax, cp.data, cp.size, false, desc)) {
336-
desc->data = NULL;
337-
desc->size = -1;
313+
/* Check whether we have saved a new patch already: */
314+
if (*new_rev && rev < mc->hdr.patch_id) {
315+
if (!__apply_microcode_amd(mc)) {
316+
*new_rev = mc->hdr.patch_id;
338317
return;
339318
}
340319
}
341320

342-
eq = (struct equiv_cpu_entry *)(desc->data + CONTAINER_HDR_SZ);
343-
344-
eq_id = find_equiv_id(eq, cpuid_1_eax);
345-
if (!eq_id)
321+
__load_ucode_amd(cpuid_1_eax, &cp);
322+
if (!(cp.data && cp.size))
346323
return;
347324

348-
if (eq_id == desc->eq_id) {
349-
u32 rev, dummy;
350-
351-
native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy);
352-
353-
mc = (struct microcode_amd *)amd_ucode_patch;
354-
355-
if (mc && rev < mc->hdr.patch_id) {
356-
if (!__apply_microcode_amd(mc))
357-
ucode_new_rev = mc->hdr.patch_id;
358-
}
359-
360-
} else {
361-
362-
/*
363-
* AP has a different equivalence ID than BSP, looks like
364-
* mixed-steppings silicon so go through the ucode blob anew.
365-
*/
366-
goto reget;
367-
}
325+
apply_microcode_early_amd(cpuid_1_eax, cp.data, cp.size, false);
368326
}
369327

370328
static enum ucode_state

0 commit comments

Comments
 (0)