Skip to content

Commit ca0ea8a

Browse files
committed
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini: - Fix for compilation of selftests on non-x86 architectures - Fix for kvm_run->if_flag on SEV-ES - Fix for page table use-after-free if yielding during exit_mm() - Improve behavior when userspace starts a nested guest with invalid state - Fix missed wakeup with assigned devices but no VT-d posted interrupts - Do not tell userspace to save/restore an unsupported PMU MSR * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: KVM: VMX: Wake vCPU when delivering posted IRQ even if vCPU == this vCPU KVM: selftests: Add test to verify TRIPLE_FAULT on invalid L2 guest state KVM: VMX: Fix stale docs for kvm-intel.emulate_invalid_guest_state KVM: nVMX: Synthesize TRIPLE_FAULT for L2 if emulation is required KVM: VMX: Always clear vmx->fail on emulation_required selftests: KVM: Fix non-x86 compiling KVM: x86: Always set kvm_run->if_flag KVM: x86/mmu: Don't advance iterator after restart due to yielding KVM: x86: remove PMU FIXED_CTR3 from msrs_to_save_all
2 parents 5dbdc4c + fdba608 commit ca0ea8a

File tree

14 files changed

+195
-55
lines changed

14 files changed

+195
-55
lines changed

Documentation/admin-guide/kernel-parameters.txt

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2413,8 +2413,12 @@
24132413
Default is 1 (enabled)
24142414

24152415
kvm-intel.emulate_invalid_guest_state=
2416-
[KVM,Intel] Enable emulation of invalid guest states
2417-
Default is 0 (disabled)
2416+
[KVM,Intel] Disable emulation of invalid guest state.
2417+
Ignored if kvm-intel.enable_unrestricted_guest=1, as
2418+
guest state is never invalid for unrestricted guests.
2419+
This param doesn't apply to nested guests (L2), as KVM
2420+
never emulates invalid L2 guest state.
2421+
Default is 1 (enabled)
24182422

24192423
kvm-intel.flexpriority=
24202424
[KVM,Intel] Disable FlexPriority feature (TPR shadow).

arch/x86/include/asm/kvm-x86-ops.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ KVM_X86_OP(set_dr7)
4747
KVM_X86_OP(cache_reg)
4848
KVM_X86_OP(get_rflags)
4949
KVM_X86_OP(set_rflags)
50+
KVM_X86_OP(get_if_flag)
5051
KVM_X86_OP(tlb_flush_all)
5152
KVM_X86_OP(tlb_flush_current)
5253
KVM_X86_OP_NULL(tlb_remote_flush)

arch/x86/include/asm/kvm_host.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1349,6 +1349,7 @@ struct kvm_x86_ops {
13491349
void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
13501350
unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
13511351
void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
1352+
bool (*get_if_flag)(struct kvm_vcpu *vcpu);
13521353

13531354
void (*tlb_flush_all)(struct kvm_vcpu *vcpu);
13541355
void (*tlb_flush_current)(struct kvm_vcpu *vcpu);

arch/x86/kvm/mmu/tdp_iter.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ static gfn_t round_gfn_for_level(gfn_t gfn, int level)
2626
*/
2727
void tdp_iter_restart(struct tdp_iter *iter)
2828
{
29+
iter->yielded = false;
2930
iter->yielded_gfn = iter->next_last_level_gfn;
3031
iter->level = iter->root_level;
3132

@@ -160,6 +161,11 @@ static bool try_step_up(struct tdp_iter *iter)
160161
*/
161162
void tdp_iter_next(struct tdp_iter *iter)
162163
{
164+
if (iter->yielded) {
165+
tdp_iter_restart(iter);
166+
return;
167+
}
168+
163169
if (try_step_down(iter))
164170
return;
165171

arch/x86/kvm/mmu/tdp_iter.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,12 @@ struct tdp_iter {
4545
* iterator walks off the end of the paging structure.
4646
*/
4747
bool valid;
48+
/*
49+
* True if KVM dropped mmu_lock and yielded in the middle of a walk, in
50+
* which case tdp_iter_next() needs to restart the walk at the root
51+
* level instead of advancing to the next entry.
52+
*/
53+
bool yielded;
4854
};
4955

5056
/*

arch/x86/kvm/mmu/tdp_mmu.c

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -502,6 +502,8 @@ static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
502502
struct tdp_iter *iter,
503503
u64 new_spte)
504504
{
505+
WARN_ON_ONCE(iter->yielded);
506+
505507
lockdep_assert_held_read(&kvm->mmu_lock);
506508

507509
/*
@@ -575,6 +577,8 @@ static inline void __tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter,
575577
u64 new_spte, bool record_acc_track,
576578
bool record_dirty_log)
577579
{
580+
WARN_ON_ONCE(iter->yielded);
581+
578582
lockdep_assert_held_write(&kvm->mmu_lock);
579583

580584
/*
@@ -640,18 +644,19 @@ static inline void tdp_mmu_set_spte_no_dirty_log(struct kvm *kvm,
640644
* If this function should yield and flush is set, it will perform a remote
641645
* TLB flush before yielding.
642646
*
643-
* If this function yields, it will also reset the tdp_iter's walk over the
644-
* paging structure and the calling function should skip to the next
645-
* iteration to allow the iterator to continue its traversal from the
646-
* paging structure root.
647+
* If this function yields, iter->yielded is set and the caller must skip to
648+
* the next iteration, where tdp_iter_next() will reset the tdp_iter's walk
649+
* over the paging structures to allow the iterator to continue its traversal
650+
* from the paging structure root.
647651
*
648-
* Return true if this function yielded and the iterator's traversal was reset.
649-
* Return false if a yield was not needed.
652+
* Returns true if this function yielded.
650653
*/
651-
static inline bool tdp_mmu_iter_cond_resched(struct kvm *kvm,
652-
struct tdp_iter *iter, bool flush,
653-
bool shared)
654+
static inline bool __must_check tdp_mmu_iter_cond_resched(struct kvm *kvm,
655+
struct tdp_iter *iter,
656+
bool flush, bool shared)
654657
{
658+
WARN_ON(iter->yielded);
659+
655660
/* Ensure forward progress has been made before yielding. */
656661
if (iter->next_last_level_gfn == iter->yielded_gfn)
657662
return false;
@@ -671,12 +676,10 @@ static inline bool tdp_mmu_iter_cond_resched(struct kvm *kvm,
671676

672677
WARN_ON(iter->gfn > iter->next_last_level_gfn);
673678

674-
tdp_iter_restart(iter);
675-
676-
return true;
679+
iter->yielded = true;
677680
}
678681

679-
return false;
682+
return iter->yielded;
680683
}
681684

682685
/*

arch/x86/kvm/svm/svm.c

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1585,6 +1585,15 @@ static void svm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
15851585
to_svm(vcpu)->vmcb->save.rflags = rflags;
15861586
}
15871587

1588+
static bool svm_get_if_flag(struct kvm_vcpu *vcpu)
1589+
{
1590+
struct vmcb *vmcb = to_svm(vcpu)->vmcb;
1591+
1592+
return sev_es_guest(vcpu->kvm)
1593+
? vmcb->control.int_state & SVM_GUEST_INTERRUPT_MASK
1594+
: kvm_get_rflags(vcpu) & X86_EFLAGS_IF;
1595+
}
1596+
15881597
static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
15891598
{
15901599
switch (reg) {
@@ -3568,14 +3577,7 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu)
35683577
if (!gif_set(svm))
35693578
return true;
35703579

3571-
if (sev_es_guest(vcpu->kvm)) {
3572-
/*
3573-
* SEV-ES guests to not expose RFLAGS. Use the VMCB interrupt mask
3574-
* bit to determine the state of the IF flag.
3575-
*/
3576-
if (!(vmcb->control.int_state & SVM_GUEST_INTERRUPT_MASK))
3577-
return true;
3578-
} else if (is_guest_mode(vcpu)) {
3580+
if (is_guest_mode(vcpu)) {
35793581
/* As long as interrupts are being delivered... */
35803582
if ((svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK)
35813583
? !(svm->vmcb01.ptr->save.rflags & X86_EFLAGS_IF)
@@ -3586,7 +3588,7 @@ bool svm_interrupt_blocked(struct kvm_vcpu *vcpu)
35863588
if (nested_exit_on_intr(svm))
35873589
return false;
35883590
} else {
3589-
if (!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF))
3591+
if (!svm_get_if_flag(vcpu))
35903592
return true;
35913593
}
35923594

@@ -4621,6 +4623,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
46214623
.cache_reg = svm_cache_reg,
46224624
.get_rflags = svm_get_rflags,
46234625
.set_rflags = svm_set_rflags,
4626+
.get_if_flag = svm_get_if_flag,
46244627

46254628
.tlb_flush_all = svm_flush_tlb,
46264629
.tlb_flush_current = svm_flush_tlb,

arch/x86/kvm/vmx/vmx.c

Lines changed: 32 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1363,6 +1363,11 @@ void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
13631363
vmx->emulation_required = vmx_emulation_required(vcpu);
13641364
}
13651365

1366+
static bool vmx_get_if_flag(struct kvm_vcpu *vcpu)
1367+
{
1368+
return vmx_get_rflags(vcpu) & X86_EFLAGS_IF;
1369+
}
1370+
13661371
u32 vmx_get_interrupt_shadow(struct kvm_vcpu *vcpu)
13671372
{
13681373
u32 interruptibility = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
@@ -3959,8 +3964,7 @@ static int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector)
39593964
if (pi_test_and_set_on(&vmx->pi_desc))
39603965
return 0;
39613966

3962-
if (vcpu != kvm_get_running_vcpu() &&
3963-
!kvm_vcpu_trigger_posted_interrupt(vcpu, false))
3967+
if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false))
39643968
kvm_vcpu_kick(vcpu);
39653969

39663970
return 0;
@@ -5877,18 +5881,14 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
58775881
vmx_flush_pml_buffer(vcpu);
58785882

58795883
/*
5880-
* We should never reach this point with a pending nested VM-Enter, and
5881-
* more specifically emulation of L2 due to invalid guest state (see
5882-
* below) should never happen as that means we incorrectly allowed a
5883-
* nested VM-Enter with an invalid vmcs12.
5884+
* KVM should never reach this point with a pending nested VM-Enter.
5885+
* More specifically, short-circuiting VM-Entry to emulate L2 due to
5886+
* invalid guest state should never happen as that means KVM knowingly
5887+
* allowed a nested VM-Enter with an invalid vmcs12. More below.
58845888
*/
58855889
if (KVM_BUG_ON(vmx->nested.nested_run_pending, vcpu->kvm))
58865890
return -EIO;
58875891

5888-
/* If guest state is invalid, start emulating */
5889-
if (vmx->emulation_required)
5890-
return handle_invalid_guest_state(vcpu);
5891-
58925892
if (is_guest_mode(vcpu)) {
58935893
/*
58945894
* PML is never enabled when running L2, bail immediately if a
@@ -5910,10 +5910,30 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
59105910
*/
59115911
nested_mark_vmcs12_pages_dirty(vcpu);
59125912

5913+
/*
5914+
* Synthesize a triple fault if L2 state is invalid. In normal
5915+
* operation, nested VM-Enter rejects any attempt to enter L2
5916+
* with invalid state. However, those checks are skipped if
5917+
* state is being stuffed via RSM or KVM_SET_NESTED_STATE. If
5918+
* L2 state is invalid, it means either L1 modified SMRAM state
5919+
* or userspace provided bad state. Synthesize TRIPLE_FAULT as
5920+
* doing so is architecturally allowed in the RSM case, and is
5921+
* the least awful solution for the userspace case without
5922+
* risking false positives.
5923+
*/
5924+
if (vmx->emulation_required) {
5925+
nested_vmx_vmexit(vcpu, EXIT_REASON_TRIPLE_FAULT, 0, 0);
5926+
return 1;
5927+
}
5928+
59135929
if (nested_vmx_reflect_vmexit(vcpu))
59145930
return 1;
59155931
}
59165932

5933+
/* If guest state is invalid, start emulating. L2 is handled above. */
5934+
if (vmx->emulation_required)
5935+
return handle_invalid_guest_state(vcpu);
5936+
59175937
if (exit_reason.failed_vmentry) {
59185938
dump_vmcs(vcpu);
59195939
vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
@@ -6608,9 +6628,7 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
66086628
* consistency check VM-Exit due to invalid guest state and bail.
66096629
*/
66106630
if (unlikely(vmx->emulation_required)) {
6611-
6612-
/* We don't emulate invalid state of a nested guest */
6613-
vmx->fail = is_guest_mode(vcpu);
6631+
vmx->fail = 0;
66146632

66156633
vmx->exit_reason.full = EXIT_REASON_INVALID_STATE;
66166634
vmx->exit_reason.failed_vmentry = 1;
@@ -7579,6 +7597,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
75797597
.cache_reg = vmx_cache_reg,
75807598
.get_rflags = vmx_get_rflags,
75817599
.set_rflags = vmx_set_rflags,
7600+
.get_if_flag = vmx_get_if_flag,
75827601

75837602
.tlb_flush_all = vmx_flush_tlb_all,
75847603
.tlb_flush_current = vmx_flush_tlb_current,

arch/x86/kvm/x86.c

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1331,7 +1331,7 @@ static const u32 msrs_to_save_all[] = {
13311331
MSR_IA32_UMWAIT_CONTROL,
13321332

13331333
MSR_ARCH_PERFMON_FIXED_CTR0, MSR_ARCH_PERFMON_FIXED_CTR1,
1334-
MSR_ARCH_PERFMON_FIXED_CTR0 + 2, MSR_ARCH_PERFMON_FIXED_CTR0 + 3,
1334+
MSR_ARCH_PERFMON_FIXED_CTR0 + 2,
13351335
MSR_CORE_PERF_FIXED_CTR_CTRL, MSR_CORE_PERF_GLOBAL_STATUS,
13361336
MSR_CORE_PERF_GLOBAL_CTRL, MSR_CORE_PERF_GLOBAL_OVF_CTRL,
13371337
MSR_ARCH_PERFMON_PERFCTR0, MSR_ARCH_PERFMON_PERFCTR1,
@@ -9001,14 +9001,7 @@ static void post_kvm_run_save(struct kvm_vcpu *vcpu)
90019001
{
90029002
struct kvm_run *kvm_run = vcpu->run;
90039003

9004-
/*
9005-
* if_flag is obsolete and useless, so do not bother
9006-
* setting it for SEV-ES guests. Userspace can just
9007-
* use kvm_run->ready_for_interrupt_injection.
9008-
*/
9009-
kvm_run->if_flag = !vcpu->arch.guest_state_protected
9010-
&& (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
9011-
9004+
kvm_run->if_flag = static_call(kvm_x86_get_if_flag)(vcpu);
90129005
kvm_run->cr8 = kvm_get_cr8(vcpu);
90139006
kvm_run->apic_base = kvm_get_apic_base(vcpu);
90149007

tools/testing/selftests/kvm/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
/x86_64/vmx_apic_access_test
3636
/x86_64/vmx_close_while_nested_test
3737
/x86_64/vmx_dirty_log_test
38+
/x86_64/vmx_invalid_nested_guest_state
3839
/x86_64/vmx_preemption_timer_test
3940
/x86_64/vmx_set_nested_state_test
4041
/x86_64/vmx_tsc_adjust_test

0 commit comments

Comments
 (0)