Skip to content

Commit dcb988c

Browse files
committed
KVM: x86: Quirk initialization of feature MSRs to KVM's max configuration
Add a quirk to control KVM's misguided initialization of select feature MSRs to KVM's max configuration, as enabling features by default violates KVM's approach of letting userspace own the vCPU model, and is actively problematic for MSRs that are conditionally supported, as the vCPU will end up with an MSR value that userspace can't restore. E.g. if the vCPU is configured with PDCM=0, userspace will save and attempt to restore a non-zero PERF_CAPABILITIES, thanks to KVM's meddling. Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Sean Christopherson <[email protected]>
1 parent bc2ca36 commit dcb988c

File tree

6 files changed

+39
-8
lines changed

6 files changed

+39
-8
lines changed

Documentation/virt/kvm/api.rst

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8107,6 +8107,28 @@ KVM_X86_QUIRK_SLOT_ZAP_ALL By default, for KVM_X86_DEFAULT_VM VMs, KVM
81078107
or moved memslot isn't reachable, i.e KVM
81088108
_may_ invalidate only SPTEs related to the
81098109
memslot.
8110+
8111+
KVM_X86_QUIRK_STUFF_FEATURE_MSRS By default, at vCPU creation, KVM sets the
8112+
vCPU's MSR_IA32_PERF_CAPABILITIES (0x345),
8113+
MSR_IA32_ARCH_CAPABILITIES (0x10a),
8114+
MSR_PLATFORM_INFO (0xce), and all VMX MSRs
8115+
(0x480..0x492) to the maximal capabilities
8116+
supported by KVM. KVM also sets
8117+
MSR_IA32_UCODE_REV (0x8b) to an arbitrary
8118+
value (which is different for Intel vs.
8119+
AMD). Lastly, when guest CPUID is set (by
8120+
userspace), KVM modifies select VMX MSR
8121+
fields to force consistency between guest
8122+
CPUID and L2's effective ISA. When this
8123+
quirk is disabled, KVM zeroes the vCPU's MSR
8124+
values (with two exceptions, see below),
8125+
i.e. treats the feature MSRs like CPUID
8126+
leaves and gives userspace full control of
8127+
the vCPU model definition. This quirk does
8128+
not affect VMX MSRs CR0/CR4_FIXED1 (0x487
8129+
and 0x489), as KVM does now allow them to
8130+
be set by userspace (KVM sets them based on
8131+
guest CPUID, for safety purposes).
81108132
=================================== ============================================
81118133

81128134
7.32 KVM_CAP_MAX_VCPU_ID

arch/x86/include/asm/kvm_host.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2360,7 +2360,8 @@ int memslot_rmap_alloc(struct kvm_memory_slot *slot, unsigned long npages);
23602360
KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT | \
23612361
KVM_X86_QUIRK_FIX_HYPERCALL_INSN | \
23622362
KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS | \
2363-
KVM_X86_QUIRK_SLOT_ZAP_ALL)
2363+
KVM_X86_QUIRK_SLOT_ZAP_ALL | \
2364+
KVM_X86_QUIRK_STUFF_FEATURE_MSRS)
23642365

23652366
/*
23662367
* KVM previously used a u32 field in kvm_run to indicate the hypercall was

arch/x86/include/uapi/asm/kvm.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -440,6 +440,7 @@ struct kvm_sync_regs {
440440
#define KVM_X86_QUIRK_FIX_HYPERCALL_INSN (1 << 5)
441441
#define KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS (1 << 6)
442442
#define KVM_X86_QUIRK_SLOT_ZAP_ALL (1 << 7)
443+
#define KVM_X86_QUIRK_STUFF_FEATURE_MSRS (1 << 8)
443444

444445
#define KVM_STATE_NESTED_FORMAT_VMX 0
445446
#define KVM_STATE_NESTED_FORMAT_SVM 1

arch/x86/kvm/svm/svm.c

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1390,7 +1390,9 @@ static void __svm_vcpu_reset(struct kvm_vcpu *vcpu)
13901390
svm_vcpu_init_msrpm(vcpu, svm->msrpm);
13911391

13921392
svm_init_osvw(vcpu);
1393-
vcpu->arch.microcode_version = 0x01000065;
1393+
1394+
if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_STUFF_FEATURE_MSRS))
1395+
vcpu->arch.microcode_version = 0x01000065;
13941396
svm->tsc_ratio_msr = kvm_caps.default_tsc_scaling_ratio;
13951397

13961398
svm->nmi_masked = false;

arch/x86/kvm/vmx/vmx.c

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4572,7 +4572,8 @@ vmx_adjust_secondary_exec_control(struct vcpu_vmx *vmx, u32 *exec_control,
45724572
* Update the nested MSR settings so that a nested VMM can/can't set
45734573
* controls for features that are/aren't exposed to the guest.
45744574
*/
4575-
if (nested) {
4575+
if (nested &&
4576+
kvm_check_has_quirk(vmx->vcpu.kvm, KVM_X86_QUIRK_STUFF_FEATURE_MSRS)) {
45764577
/*
45774578
* All features that can be added or removed to VMX MSRs must
45784579
* be supported in the first place for nested virtualization.
@@ -4862,7 +4863,8 @@ static void __vmx_vcpu_reset(struct kvm_vcpu *vcpu)
48624863

48634864
init_vmcs(vmx);
48644865

4865-
if (nested)
4866+
if (nested &&
4867+
kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_STUFF_FEATURE_MSRS))
48664868
memcpy(&vmx->nested.msrs, &vmcs_config.nested, sizeof(vmx->nested.msrs));
48674869

48684870
vcpu_setup_sgx_lepubkeyhash(vcpu);
@@ -4875,7 +4877,8 @@ static void __vmx_vcpu_reset(struct kvm_vcpu *vcpu)
48754877
vmx->nested.hv_evmcs_vmptr = EVMPTR_INVALID;
48764878
#endif
48774879

4878-
vcpu->arch.microcode_version = 0x100000000ULL;
4880+
if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_STUFF_FEATURE_MSRS))
4881+
vcpu->arch.microcode_version = 0x100000000ULL;
48794882
vmx->msr_ia32_feature_control_valid_bits = FEAT_CTL_LOCKED;
48804883

48814884
/*

arch/x86/kvm/x86.c

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12314,9 +12314,11 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu)
1231412314

1231512315
kvm_async_pf_hash_reset(vcpu);
1231612316

12317-
vcpu->arch.arch_capabilities = kvm_get_arch_capabilities();
12318-
vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT;
12319-
vcpu->arch.perf_capabilities = kvm_caps.supported_perf_cap;
12317+
if (kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_STUFF_FEATURE_MSRS)) {
12318+
vcpu->arch.arch_capabilities = kvm_get_arch_capabilities();
12319+
vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT;
12320+
vcpu->arch.perf_capabilities = kvm_caps.supported_perf_cap;
12321+
}
1232012322
kvm_pmu_init(vcpu);
1232112323

1232212324
vcpu->arch.pending_external_vector = -1;

0 commit comments

Comments
 (0)