Skip to content

Commit 04bc93c

Browse files
gaochaointelsean-jc
authored andcommitted
KVM: nVMX: Defer SVI update to vmcs01 on EOI when L2 is active w/o VID
If KVM emulates an EOI for L1's virtual APIC while L2 is active, defer updating GUEST_INTERUPT_STATUS.SVI, i.e. the VMCS's cache of the highest in-service IRQ, until L1 is active, as vmcs01, not vmcs02, needs to track vISR. The missed SVI update for vmcs01 can result in L1 interrupts being incorrectly blocked, e.g. if there is a pending interrupt with lower priority than the interrupt that was EOI'd. This bug only affects use cases where L1's vAPIC is effectively passed through to L2, e.g. in a pKVM scenario where L2 is L1's depriveleged host, as KVM will only emulate an EOI for L1's vAPIC if Virtual Interrupt Delivery (VID) is disabled in vmc12, and L1 isn't intercepting L2 accesses to its (virtual) APIC page (or if x2APIC is enabled, the EOI MSR). WARN() if KVM updates L1's ISR while L2 is active with VID enabled, as an EOI from L2 is supposed to affect L2's vAPIC, but still defer the update, to try to keep L1 alive. Specifically, KVM forwards all APICv-related VM-Exits to L1 via nested_vmx_l1_wants_exit(): case EXIT_REASON_APIC_ACCESS: case EXIT_REASON_APIC_WRITE: case EXIT_REASON_EOI_INDUCED: /* * The controls for "virtualize APIC accesses," "APIC- * register virtualization," and "virtual-interrupt * delivery" only come from vmcs12. */ return true; Fixes: c7c9c56 ("x86, apicv: add virtual interrupt delivery support") Cc: [email protected] Link: https://lore.kernel.org/kvm/[email protected] Reported-by: Markku Ahvenjärvi <[email protected]> Closes: https://lore.kernel.org/all/[email protected] Cc: Janne Karhunen <[email protected]> Signed-off-by: Chao Gao <[email protected]> [sean: drop request, handle in VMX, write changelog] Tested-by: Chao Gao <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Sean Christopherson <[email protected]>
1 parent 76bce9f commit 04bc93c

File tree

5 files changed

+39
-0
lines changed

5 files changed

+39
-0
lines changed

arch/x86/kvm/lapic.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -816,6 +816,17 @@ static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
816816
}
817817
}
818818

819+
void kvm_apic_update_hwapic_isr(struct kvm_vcpu *vcpu)
820+
{
821+
struct kvm_lapic *apic = vcpu->arch.apic;
822+
823+
if (WARN_ON_ONCE(!lapic_in_kernel(vcpu)) || !apic->apicv_active)
824+
return;
825+
826+
kvm_x86_call(hwapic_isr_update)(vcpu, apic_find_highest_isr(apic));
827+
}
828+
EXPORT_SYMBOL_GPL(kvm_apic_update_hwapic_isr);
829+
819830
int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
820831
{
821832
/* This may race with setting of irr in __apic_accept_irq() and

arch/x86/kvm/lapic.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high);
118118
int kvm_apic_set_base(struct kvm_vcpu *vcpu, u64 value, bool host_initiated);
119119
int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s);
120120
int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s);
121+
void kvm_apic_update_hwapic_isr(struct kvm_vcpu *vcpu);
121122
int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
122123

123124
u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu);

arch/x86/kvm/vmx/nested.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5050,6 +5050,11 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
50505050
kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
50515051
}
50525052

5053+
if (vmx->nested.update_vmcs01_hwapic_isr) {
5054+
vmx->nested.update_vmcs01_hwapic_isr = false;
5055+
kvm_apic_update_hwapic_isr(vcpu);
5056+
}
5057+
50535058
if ((vm_exit_reason != -1) &&
50545059
(enable_shadow_vmcs || nested_vmx_is_evmptr12_valid(vmx)))
50555060
vmx->nested.need_vmcs12_to_shadow_sync = true;

arch/x86/kvm/vmx/vmx.c

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6867,6 +6867,27 @@ void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
68676867
u16 status;
68686868
u8 old;
68696869

6870+
/*
6871+
* If L2 is active, defer the SVI update until vmcs01 is loaded, as SVI
6872+
* is only relevant for if and only if Virtual Interrupt Delivery is
6873+
* enabled in vmcs12, and if VID is enabled then L2 EOIs affect L2's
6874+
* vAPIC, not L1's vAPIC. KVM must update vmcs01 on the next nested
6875+
* VM-Exit, otherwise L1 with run with a stale SVI.
6876+
*/
6877+
if (is_guest_mode(vcpu)) {
6878+
/*
6879+
* KVM is supposed to forward intercepted L2 EOIs to L1 if VID
6880+
* is enabled in vmcs12; as above, the EOIs affect L2's vAPIC.
6881+
* Note, userspace can stuff state while L2 is active; assert
6882+
* that VID is disabled if and only if the vCPU is in KVM_RUN
6883+
* to avoid false positives if userspace is setting APIC state.
6884+
*/
6885+
WARN_ON_ONCE(vcpu->wants_to_run &&
6886+
nested_cpu_has_vid(get_vmcs12(vcpu)));
6887+
to_vmx(vcpu)->nested.update_vmcs01_hwapic_isr = true;
6888+
return;
6889+
}
6890+
68706891
if (max_isr == -1)
68716892
max_isr = 0;
68726893

arch/x86/kvm/vmx/vmx.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,7 @@ struct nested_vmx {
176176
bool reload_vmcs01_apic_access_page;
177177
bool update_vmcs01_cpu_dirty_logging;
178178
bool update_vmcs01_apicv_status;
179+
bool update_vmcs01_hwapic_isr;
179180

180181
/*
181182
* Enlightened VMCS has been enabled. It does not mean that L1 has to

0 commit comments

Comments
 (0)