Skip to content

Commit 67f4b99

Browse files
sean-jcbonzini
authored andcommitted
KVM: nVMX: Handle dynamic MSR intercept toggling
Always check vmcs01's MSR bitmap when merging L0 and L1 bitmaps for L2, and always update the relevant bits in vmcs02. This fixes two distinct, but intertwined bugs related to dynamic MSR bitmap modifications. The first issue is that KVM fails to enable MSR interception in vmcs02 for the FS/GS base MSRs if L1 first runs L2 with interception disabled, and later enables interception. The second issue is that KVM fails to honor userspace MSR filtering when preparing vmcs02. Fix both issues simultaneous as fixing only one of the issues (doesn't matter which) would create a mess that no one should have to bisect. Fixing only the first bug would exacerbate the MSR filtering issue as userspace would see inconsistent behavior depending on the whims of L1. Fixing only the second bug (MSR filtering) effectively requires fixing the first, as the nVMX code only knows how to transition vmcs02's bitmap from 1->0. Move the various accessor/mutators that are currently buried in vmx.c into vmx.h so that they can be shared by the nested code. Fixes: 1a15525 ("KVM: x86: Introduce MSR filtering") Fixes: d69129b ("KVM: nVMX: Disable intercept for FS/GS base MSRs in vmcs02 when possible") Cc: [email protected] Cc: Alexander Graf <[email protected]> Signed-off-by: Sean Christopherson <[email protected]> Message-Id: <[email protected]> Signed-off-by: Paolo Bonzini <[email protected]>
1 parent 7dfbc62 commit 67f4b99

File tree

3 files changed

+111
-110
lines changed

3 files changed

+111
-110
lines changed

arch/x86/kvm/vmx/nested.c

Lines changed: 46 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -524,29 +524,6 @@ static int nested_vmx_check_tpr_shadow_controls(struct kvm_vcpu *vcpu,
524524
return 0;
525525
}
526526

527-
/*
528-
* Check if MSR is intercepted for L01 MSR bitmap.
529-
*/
530-
static bool msr_write_intercepted_l01(struct kvm_vcpu *vcpu, u32 msr)
531-
{
532-
unsigned long *msr_bitmap;
533-
int f = sizeof(unsigned long);
534-
535-
if (!cpu_has_vmx_msr_bitmap())
536-
return true;
537-
538-
msr_bitmap = to_vmx(vcpu)->vmcs01.msr_bitmap;
539-
540-
if (msr <= 0x1fff) {
541-
return !!test_bit(msr, msr_bitmap + 0x800 / f);
542-
} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
543-
msr &= 0x1fff;
544-
return !!test_bit(msr, msr_bitmap + 0xc00 / f);
545-
}
546-
547-
return true;
548-
}
549-
550527
/*
551528
* If a msr is allowed by L0, we should check whether it is allowed by L1.
552529
* The corresponding bit will be cleared unless both of L0 and L1 allow it.
@@ -600,17 +577,46 @@ static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap)
600577
}
601578
}
602579

580+
#define BUILD_NVMX_MSR_INTERCEPT_HELPER(rw) \
581+
static inline \
582+
void nested_vmx_set_msr_##rw##_intercept(struct vcpu_vmx *vmx, \
583+
unsigned long *msr_bitmap_l1, \
584+
unsigned long *msr_bitmap_l0, u32 msr) \
585+
{ \
586+
if (vmx_test_msr_bitmap_##rw(vmx->vmcs01.msr_bitmap, msr) || \
587+
vmx_test_msr_bitmap_##rw(msr_bitmap_l1, msr)) \
588+
vmx_set_msr_bitmap_##rw(msr_bitmap_l0, msr); \
589+
else \
590+
vmx_clear_msr_bitmap_##rw(msr_bitmap_l0, msr); \
591+
}
592+
BUILD_NVMX_MSR_INTERCEPT_HELPER(read)
593+
BUILD_NVMX_MSR_INTERCEPT_HELPER(write)
594+
595+
static inline void nested_vmx_set_intercept_for_msr(struct vcpu_vmx *vmx,
596+
unsigned long *msr_bitmap_l1,
597+
unsigned long *msr_bitmap_l0,
598+
u32 msr, int types)
599+
{
600+
if (types & MSR_TYPE_R)
601+
nested_vmx_set_msr_read_intercept(vmx, msr_bitmap_l1,
602+
msr_bitmap_l0, msr);
603+
if (types & MSR_TYPE_W)
604+
nested_vmx_set_msr_write_intercept(vmx, msr_bitmap_l1,
605+
msr_bitmap_l0, msr);
606+
}
607+
603608
/*
604609
* Merge L0's and L1's MSR bitmap, return false to indicate that
605610
* we do not use the hardware.
606611
*/
607612
static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
608613
struct vmcs12 *vmcs12)
609614
{
615+
struct vcpu_vmx *vmx = to_vmx(vcpu);
610616
int msr;
611617
unsigned long *msr_bitmap_l1;
612-
unsigned long *msr_bitmap_l0 = to_vmx(vcpu)->nested.vmcs02.msr_bitmap;
613-
struct kvm_host_map *map = &to_vmx(vcpu)->nested.msr_bitmap_map;
618+
unsigned long *msr_bitmap_l0 = vmx->nested.vmcs02.msr_bitmap;
619+
struct kvm_host_map *map = &vmx->nested.msr_bitmap_map;
614620

615621
/* Nothing to do if the MSR bitmap is not in use. */
616622
if (!cpu_has_vmx_msr_bitmap() ||
@@ -661,44 +667,27 @@ static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
661667
}
662668
}
663669

664-
/* KVM unconditionally exposes the FS/GS base MSRs to L1. */
670+
/*
671+
* Always check vmcs01's bitmap to honor userspace MSR filters and any
672+
* other runtime changes to vmcs01's bitmap, e.g. dynamic pass-through.
673+
*/
665674
#ifdef CONFIG_X86_64
666-
nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0,
667-
MSR_FS_BASE, MSR_TYPE_RW);
675+
nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
676+
MSR_FS_BASE, MSR_TYPE_RW);
668677

669-
nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0,
670-
MSR_GS_BASE, MSR_TYPE_RW);
678+
nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
679+
MSR_GS_BASE, MSR_TYPE_RW);
671680

672-
nested_vmx_disable_intercept_for_msr(msr_bitmap_l1, msr_bitmap_l0,
673-
MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
681+
nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
682+
MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
674683
#endif
684+
nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
685+
MSR_IA32_SPEC_CTRL, MSR_TYPE_RW);
675686

676-
/*
677-
* Checking the L0->L1 bitmap is trying to verify two things:
678-
*
679-
* 1. L0 gave a permission to L1 to actually passthrough the MSR. This
680-
* ensures that we do not accidentally generate an L02 MSR bitmap
681-
* from the L12 MSR bitmap that is too permissive.
682-
* 2. That L1 or L2s have actually used the MSR. This avoids
683-
* unnecessarily merging of the bitmap if the MSR is unused. This
684-
* works properly because we only update the L01 MSR bitmap lazily.
685-
* So even if L0 should pass L1 these MSRs, the L01 bitmap is only
686-
* updated to reflect this when L1 (or its L2s) actually write to
687-
* the MSR.
688-
*/
689-
if (!msr_write_intercepted_l01(vcpu, MSR_IA32_SPEC_CTRL))
690-
nested_vmx_disable_intercept_for_msr(
691-
msr_bitmap_l1, msr_bitmap_l0,
692-
MSR_IA32_SPEC_CTRL,
693-
MSR_TYPE_R | MSR_TYPE_W);
694-
695-
if (!msr_write_intercepted_l01(vcpu, MSR_IA32_PRED_CMD))
696-
nested_vmx_disable_intercept_for_msr(
697-
msr_bitmap_l1, msr_bitmap_l0,
698-
MSR_IA32_PRED_CMD,
699-
MSR_TYPE_W);
687+
nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
688+
MSR_IA32_PRED_CMD, MSR_TYPE_W);
700689

701-
kvm_vcpu_unmap(vcpu, &to_vmx(vcpu)->nested.msr_bitmap_map, false);
690+
kvm_vcpu_unmap(vcpu, &vmx->nested.msr_bitmap_map, false);
702691

703692
return true;
704693
}

arch/x86/kvm/vmx/vmx.c

Lines changed: 2 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -771,22 +771,11 @@ void vmx_update_exception_bitmap(struct kvm_vcpu *vcpu)
771771
*/
772772
static bool msr_write_intercepted(struct vcpu_vmx *vmx, u32 msr)
773773
{
774-
unsigned long *msr_bitmap;
775-
int f = sizeof(unsigned long);
776-
777774
if (!(exec_controls_get(vmx) & CPU_BASED_USE_MSR_BITMAPS))
778775
return true;
779776

780-
msr_bitmap = vmx->loaded_vmcs->msr_bitmap;
781-
782-
if (msr <= 0x1fff) {
783-
return !!test_bit(msr, msr_bitmap + 0x800 / f);
784-
} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
785-
msr &= 0x1fff;
786-
return !!test_bit(msr, msr_bitmap + 0xc00 / f);
787-
}
788-
789-
return true;
777+
return vmx_test_msr_bitmap_write(vmx->loaded_vmcs->msr_bitmap,
778+
MSR_IA32_SPEC_CTRL);
790779
}
791780

792781
static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
@@ -3697,46 +3686,6 @@ void free_vpid(int vpid)
36973686
spin_unlock(&vmx_vpid_lock);
36983687
}
36993688

3700-
static void vmx_clear_msr_bitmap_read(ulong *msr_bitmap, u32 msr)
3701-
{
3702-
int f = sizeof(unsigned long);
3703-
3704-
if (msr <= 0x1fff)
3705-
__clear_bit(msr, msr_bitmap + 0x000 / f);
3706-
else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
3707-
__clear_bit(msr & 0x1fff, msr_bitmap + 0x400 / f);
3708-
}
3709-
3710-
static void vmx_clear_msr_bitmap_write(ulong *msr_bitmap, u32 msr)
3711-
{
3712-
int f = sizeof(unsigned long);
3713-
3714-
if (msr <= 0x1fff)
3715-
__clear_bit(msr, msr_bitmap + 0x800 / f);
3716-
else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
3717-
__clear_bit(msr & 0x1fff, msr_bitmap + 0xc00 / f);
3718-
}
3719-
3720-
static void vmx_set_msr_bitmap_read(ulong *msr_bitmap, u32 msr)
3721-
{
3722-
int f = sizeof(unsigned long);
3723-
3724-
if (msr <= 0x1fff)
3725-
__set_bit(msr, msr_bitmap + 0x000 / f);
3726-
else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
3727-
__set_bit(msr & 0x1fff, msr_bitmap + 0x400 / f);
3728-
}
3729-
3730-
static void vmx_set_msr_bitmap_write(ulong *msr_bitmap, u32 msr)
3731-
{
3732-
int f = sizeof(unsigned long);
3733-
3734-
if (msr <= 0x1fff)
3735-
__set_bit(msr, msr_bitmap + 0x800 / f);
3736-
else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
3737-
__set_bit(msr & 0x1fff, msr_bitmap + 0xc00 / f);
3738-
}
3739-
37403689
void vmx_disable_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type)
37413690
{
37423691
struct vcpu_vmx *vmx = to_vmx(vcpu);

arch/x86/kvm/vmx/vmx.h

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -400,6 +400,69 @@ static inline void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr,
400400

401401
void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu);
402402

403+
static inline bool vmx_test_msr_bitmap_read(ulong *msr_bitmap, u32 msr)
404+
{
405+
int f = sizeof(unsigned long);
406+
407+
if (msr <= 0x1fff)
408+
return test_bit(msr, msr_bitmap + 0x000 / f);
409+
else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
410+
return test_bit(msr & 0x1fff, msr_bitmap + 0x400 / f);
411+
return true;
412+
}
413+
414+
static inline bool vmx_test_msr_bitmap_write(ulong *msr_bitmap, u32 msr)
415+
{
416+
int f = sizeof(unsigned long);
417+
418+
if (msr <= 0x1fff)
419+
return test_bit(msr, msr_bitmap + 0x800 / f);
420+
else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
421+
return test_bit(msr & 0x1fff, msr_bitmap + 0xc00 / f);
422+
return true;
423+
}
424+
425+
static inline void vmx_clear_msr_bitmap_read(ulong *msr_bitmap, u32 msr)
426+
{
427+
int f = sizeof(unsigned long);
428+
429+
if (msr <= 0x1fff)
430+
__clear_bit(msr, msr_bitmap + 0x000 / f);
431+
else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
432+
__clear_bit(msr & 0x1fff, msr_bitmap + 0x400 / f);
433+
}
434+
435+
static inline void vmx_clear_msr_bitmap_write(ulong *msr_bitmap, u32 msr)
436+
{
437+
int f = sizeof(unsigned long);
438+
439+
if (msr <= 0x1fff)
440+
__clear_bit(msr, msr_bitmap + 0x800 / f);
441+
else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
442+
__clear_bit(msr & 0x1fff, msr_bitmap + 0xc00 / f);
443+
}
444+
445+
static inline void vmx_set_msr_bitmap_read(ulong *msr_bitmap, u32 msr)
446+
{
447+
int f = sizeof(unsigned long);
448+
449+
if (msr <= 0x1fff)
450+
__set_bit(msr, msr_bitmap + 0x000 / f);
451+
else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
452+
__set_bit(msr & 0x1fff, msr_bitmap + 0x400 / f);
453+
}
454+
455+
static inline void vmx_set_msr_bitmap_write(ulong *msr_bitmap, u32 msr)
456+
{
457+
int f = sizeof(unsigned long);
458+
459+
if (msr <= 0x1fff)
460+
__set_bit(msr, msr_bitmap + 0x800 / f);
461+
else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff))
462+
__set_bit(msr & 0x1fff, msr_bitmap + 0xc00 / f);
463+
}
464+
465+
403466
static inline u8 vmx_get_rvi(void)
404467
{
405468
return vmcs_read16(GUEST_INTR_STATUS) & 0xff;

0 commit comments

Comments
 (0)