Skip to content

Commit 415de44

Browse files
janeyasminsuryasaimadhu
authored andcommitted
x86/cpu: Fix migration safety with X86_BUG_NULL_SEL
Currently, Linux probes for X86_BUG_NULL_SEL unconditionally which makes it unsafe to migrate in a virtualised environment as the properties across the migration pool might differ. To be specific, the case which goes wrong is: 1. Zen1 (or earlier) and Zen2 (or later) in a migration pool 2. Linux boots on Zen2, probes and finds the absence of X86_BUG_NULL_SEL 3. Linux is then migrated to Zen1 Linux is now running on a X86_BUG_NULL_SEL-impacted CPU while believing that the bug is fixed. The only way to address the problem is to fully trust the "no longer affected" CPUID bit when virtualised, because in the above case it would be clear deliberately to indicate the fact "you might migrate to somewhere which has this behaviour". Zen3 adds the NullSelectorClearsBase CPUID bit to indicate that loading a NULL segment selector zeroes the base and limit fields, as well as just attributes. Zen2 also has this behaviour but doesn't have the NSCB bit. [ bp: Minor touchups. ] Signed-off-by: Jane Malalane <[email protected]> Signed-off-by: Borislav Petkov <[email protected]> CC: <[email protected]> Link: https://lkml.kernel.org/r/[email protected]
1 parent 639475d commit 415de44

File tree

4 files changed

+42
-7
lines changed

4 files changed

+42
-7
lines changed

arch/x86/kernel/cpu/amd.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -989,6 +989,8 @@ static void init_amd(struct cpuinfo_x86 *c)
989989
if (cpu_has(c, X86_FEATURE_IRPERF) &&
990990
!cpu_has_amd_erratum(c, amd_erratum_1054))
991991
msr_set_bit(MSR_K7_HWCR, MSR_K7_HWCR_IRPERF_EN_BIT);
992+
993+
check_null_seg_clears_base(c);
992994
}
993995

994996
#ifdef CONFIG_X86_32

arch/x86/kernel/cpu/common.c

Lines changed: 37 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1397,9 +1397,8 @@ void __init early_cpu_init(void)
13971397
early_identify_cpu(&boot_cpu_data);
13981398
}
13991399

1400-
static void detect_null_seg_behavior(struct cpuinfo_x86 *c)
1400+
static bool detect_null_seg_behavior(void)
14011401
{
1402-
#ifdef CONFIG_X86_64
14031402
/*
14041403
* Empirically, writing zero to a segment selector on AMD does
14051404
* not clear the base, whereas writing zero to a segment
@@ -1420,10 +1419,43 @@ static void detect_null_seg_behavior(struct cpuinfo_x86 *c)
14201419
wrmsrl(MSR_FS_BASE, 1);
14211420
loadsegment(fs, 0);
14221421
rdmsrl(MSR_FS_BASE, tmp);
1423-
if (tmp != 0)
1424-
set_cpu_bug(c, X86_BUG_NULL_SEG);
14251422
wrmsrl(MSR_FS_BASE, old_base);
1426-
#endif
1423+
return tmp == 0;
1424+
}
1425+
1426+
void check_null_seg_clears_base(struct cpuinfo_x86 *c)
1427+
{
1428+
/* BUG_NULL_SEG is only relevant with 64bit userspace */
1429+
if (!IS_ENABLED(CONFIG_X86_64))
1430+
return;
1431+
1432+
/* Zen3 CPUs advertise Null Selector Clears Base in CPUID. */
1433+
if (c->extended_cpuid_level >= 0x80000021 &&
1434+
cpuid_eax(0x80000021) & BIT(6))
1435+
return;
1436+
1437+
/*
1438+
* CPUID bit above wasn't set. If this kernel is still running
1439+
* as a HV guest, then the HV has decided not to advertize
1440+
* that CPUID bit for whatever reason. For example, one
1441+
* member of the migration pool might be vulnerable. Which
1442+
* means, the bug is present: set the BUG flag and return.
1443+
*/
1444+
if (cpu_has(c, X86_FEATURE_HYPERVISOR)) {
1445+
set_cpu_bug(c, X86_BUG_NULL_SEG);
1446+
return;
1447+
}
1448+
1449+
/*
1450+
* Zen2 CPUs also have this behaviour, but no CPUID bit.
1451+
* 0x18 is the respective family for Hygon.
1452+
*/
1453+
if ((c->x86 == 0x17 || c->x86 == 0x18) &&
1454+
detect_null_seg_behavior())
1455+
return;
1456+
1457+
/* All the remaining ones are affected */
1458+
set_cpu_bug(c, X86_BUG_NULL_SEG);
14271459
}
14281460

14291461
static void generic_identify(struct cpuinfo_x86 *c)
@@ -1459,8 +1491,6 @@ static void generic_identify(struct cpuinfo_x86 *c)
14591491

14601492
get_model_name(c); /* Default name */
14611493

1462-
detect_null_seg_behavior(c);
1463-
14641494
/*
14651495
* ESPFIX is a strange bug. All real CPUs have it. Paravirt
14661496
* systems that run Linux at CPL > 0 may or may not have the

arch/x86/kernel/cpu/cpu.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ extern int detect_extended_topology_early(struct cpuinfo_x86 *c);
7575
extern int detect_extended_topology(struct cpuinfo_x86 *c);
7676
extern int detect_ht_early(struct cpuinfo_x86 *c);
7777
extern void detect_ht(struct cpuinfo_x86 *c);
78+
extern void check_null_seg_clears_base(struct cpuinfo_x86 *c);
7879

7980
unsigned int aperfmperf_get_khz(int cpu);
8081

arch/x86/kernel/cpu/hygon.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,8 @@ static void init_hygon(struct cpuinfo_x86 *c)
335335
/* Hygon CPUs don't reset SS attributes on SYSRET, Xen does. */
336336
if (!cpu_has(c, X86_FEATURE_XENPV))
337337
set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
338+
339+
check_null_seg_clears_base(c);
338340
}
339341

340342
static void cpu_detect_tlb_hygon(struct cpuinfo_x86 *c)

0 commit comments

Comments
 (0)