Skip to content

Commit 3e89d5f

Browse files
committed
Merge tag 'kvm-x86-vmx-6.16' of https://github.com/kvm-x86/linux into HEAD
KVM VMX changes for 6.16: - Explicitly check MSR load/store list counts to fix a potential overflow on 32-bit kernels. - Flush shadow VMCSes on emergency reboot. - Revert mem_enc_ioctl() back to an optional hook, as it's nullified when SEV or TDX is disabled via Kconfig. - Macrofy the handling of vt_x86_ops to eliminate a pile of boilerplate code needed for TDX, and to optimize CONFIG_KVM_INTEL_TDX=n builds.
2 parents 3e0797f + 907092b commit 3e89d5f

File tree

9 files changed

+145
-190
lines changed

9 files changed

+145
-190
lines changed

arch/x86/include/asm/kvm-x86-ops.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ KVM_X86_OP(leave_smm)
127127
KVM_X86_OP(enable_smi_window)
128128
#endif
129129
KVM_X86_OP_OPTIONAL(dev_get_attr)
130-
KVM_X86_OP(mem_enc_ioctl)
130+
KVM_X86_OP_OPTIONAL(mem_enc_ioctl)
131131
KVM_X86_OP_OPTIONAL(vcpu_mem_enc_ioctl)
132132
KVM_X86_OP_OPTIONAL(mem_enc_register_region)
133133
KVM_X86_OP_OPTIONAL(mem_enc_unregister_region)

arch/x86/kvm/vmx/main.c

Lines changed: 103 additions & 105 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212

1313
#ifdef CONFIG_KVM_INTEL_TDX
1414
static_assert(offsetof(struct vcpu_vmx, vt) == offsetof(struct vcpu_tdx, vt));
15-
#endif
1615

1716
static void vt_disable_virtualization_cpu(void)
1817
{
@@ -240,7 +239,7 @@ static int vt_complete_emulated_msr(struct kvm_vcpu *vcpu, int err)
240239
if (is_td_vcpu(vcpu))
241240
return tdx_complete_emulated_msr(vcpu, err);
242241

243-
return kvm_complete_insn_gp(vcpu, err);
242+
return vmx_complete_emulated_msr(vcpu, err);
244243
}
245244

246245
#ifdef CONFIG_KVM_SMM
@@ -315,14 +314,6 @@ static void vt_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
315314
return vmx_set_virtual_apic_mode(vcpu);
316315
}
317316

318-
static void vt_apicv_pre_state_restore(struct kvm_vcpu *vcpu)
319-
{
320-
struct pi_desc *pi = vcpu_to_pi_desc(vcpu);
321-
322-
pi_clear_on(pi);
323-
memset(pi->pir, 0, sizeof(pi->pir));
324-
}
325-
326317
static void vt_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
327318
{
328319
if (is_td_vcpu(vcpu))
@@ -888,6 +879,13 @@ static int vt_gmem_private_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn)
888879
return 0;
889880
}
890881

882+
#define vt_op(name) vt_##name
883+
#define vt_op_tdx_only(name) vt_##name
884+
#else /* CONFIG_KVM_INTEL_TDX */
885+
#define vt_op(name) vmx_##name
886+
#define vt_op_tdx_only(name) NULL
887+
#endif /* CONFIG_KVM_INTEL_TDX */
888+
891889
#define VMX_REQUIRED_APICV_INHIBITS \
892890
(BIT(APICV_INHIBIT_REASON_DISABLED) | \
893891
BIT(APICV_INHIBIT_REASON_ABSENT) | \
@@ -905,152 +903,152 @@ struct kvm_x86_ops vt_x86_ops __initdata = {
905903
.hardware_unsetup = vmx_hardware_unsetup,
906904

907905
.enable_virtualization_cpu = vmx_enable_virtualization_cpu,
908-
.disable_virtualization_cpu = vt_disable_virtualization_cpu,
906+
.disable_virtualization_cpu = vt_op(disable_virtualization_cpu),
909907
.emergency_disable_virtualization_cpu = vmx_emergency_disable_virtualization_cpu,
910908

911-
.has_emulated_msr = vt_has_emulated_msr,
909+
.has_emulated_msr = vt_op(has_emulated_msr),
912910

913911
.vm_size = sizeof(struct kvm_vmx),
914912

915-
.vm_init = vt_vm_init,
916-
.vm_pre_destroy = vt_vm_pre_destroy,
917-
.vm_destroy = vt_vm_destroy,
913+
.vm_init = vt_op(vm_init),
914+
.vm_destroy = vt_op(vm_destroy),
915+
.vm_pre_destroy = vt_op_tdx_only(vm_pre_destroy),
918916

919-
.vcpu_precreate = vt_vcpu_precreate,
920-
.vcpu_create = vt_vcpu_create,
921-
.vcpu_free = vt_vcpu_free,
922-
.vcpu_reset = vt_vcpu_reset,
917+
.vcpu_precreate = vt_op(vcpu_precreate),
918+
.vcpu_create = vt_op(vcpu_create),
919+
.vcpu_free = vt_op(vcpu_free),
920+
.vcpu_reset = vt_op(vcpu_reset),
923921

924-
.prepare_switch_to_guest = vt_prepare_switch_to_guest,
925-
.vcpu_load = vt_vcpu_load,
926-
.vcpu_put = vt_vcpu_put,
922+
.prepare_switch_to_guest = vt_op(prepare_switch_to_guest),
923+
.vcpu_load = vt_op(vcpu_load),
924+
.vcpu_put = vt_op(vcpu_put),
927925

928-
.update_exception_bitmap = vt_update_exception_bitmap,
926+
.update_exception_bitmap = vt_op(update_exception_bitmap),
929927
.get_feature_msr = vmx_get_feature_msr,
930-
.get_msr = vt_get_msr,
931-
.set_msr = vt_set_msr,
932-
933-
.get_segment_base = vt_get_segment_base,
934-
.get_segment = vt_get_segment,
935-
.set_segment = vt_set_segment,
936-
.get_cpl = vt_get_cpl,
937-
.get_cpl_no_cache = vt_get_cpl_no_cache,
938-
.get_cs_db_l_bits = vt_get_cs_db_l_bits,
939-
.is_valid_cr0 = vt_is_valid_cr0,
940-
.set_cr0 = vt_set_cr0,
941-
.is_valid_cr4 = vt_is_valid_cr4,
942-
.set_cr4 = vt_set_cr4,
943-
.set_efer = vt_set_efer,
944-
.get_idt = vt_get_idt,
945-
.set_idt = vt_set_idt,
946-
.get_gdt = vt_get_gdt,
947-
.set_gdt = vt_set_gdt,
948-
.set_dr6 = vt_set_dr6,
949-
.set_dr7 = vt_set_dr7,
950-
.sync_dirty_debug_regs = vt_sync_dirty_debug_regs,
951-
.cache_reg = vt_cache_reg,
952-
.get_rflags = vt_get_rflags,
953-
.set_rflags = vt_set_rflags,
954-
.get_if_flag = vt_get_if_flag,
955-
956-
.flush_tlb_all = vt_flush_tlb_all,
957-
.flush_tlb_current = vt_flush_tlb_current,
958-
.flush_tlb_gva = vt_flush_tlb_gva,
959-
.flush_tlb_guest = vt_flush_tlb_guest,
960-
961-
.vcpu_pre_run = vt_vcpu_pre_run,
962-
.vcpu_run = vt_vcpu_run,
963-
.handle_exit = vt_handle_exit,
928+
.get_msr = vt_op(get_msr),
929+
.set_msr = vt_op(set_msr),
930+
931+
.get_segment_base = vt_op(get_segment_base),
932+
.get_segment = vt_op(get_segment),
933+
.set_segment = vt_op(set_segment),
934+
.get_cpl = vt_op(get_cpl),
935+
.get_cpl_no_cache = vt_op(get_cpl_no_cache),
936+
.get_cs_db_l_bits = vt_op(get_cs_db_l_bits),
937+
.is_valid_cr0 = vt_op(is_valid_cr0),
938+
.set_cr0 = vt_op(set_cr0),
939+
.is_valid_cr4 = vt_op(is_valid_cr4),
940+
.set_cr4 = vt_op(set_cr4),
941+
.set_efer = vt_op(set_efer),
942+
.get_idt = vt_op(get_idt),
943+
.set_idt = vt_op(set_idt),
944+
.get_gdt = vt_op(get_gdt),
945+
.set_gdt = vt_op(set_gdt),
946+
.set_dr6 = vt_op(set_dr6),
947+
.set_dr7 = vt_op(set_dr7),
948+
.sync_dirty_debug_regs = vt_op(sync_dirty_debug_regs),
949+
.cache_reg = vt_op(cache_reg),
950+
.get_rflags = vt_op(get_rflags),
951+
.set_rflags = vt_op(set_rflags),
952+
.get_if_flag = vt_op(get_if_flag),
953+
954+
.flush_tlb_all = vt_op(flush_tlb_all),
955+
.flush_tlb_current = vt_op(flush_tlb_current),
956+
.flush_tlb_gva = vt_op(flush_tlb_gva),
957+
.flush_tlb_guest = vt_op(flush_tlb_guest),
958+
959+
.vcpu_pre_run = vt_op(vcpu_pre_run),
960+
.vcpu_run = vt_op(vcpu_run),
961+
.handle_exit = vt_op(handle_exit),
964962
.skip_emulated_instruction = vmx_skip_emulated_instruction,
965963
.update_emulated_instruction = vmx_update_emulated_instruction,
966-
.set_interrupt_shadow = vt_set_interrupt_shadow,
967-
.get_interrupt_shadow = vt_get_interrupt_shadow,
968-
.patch_hypercall = vt_patch_hypercall,
969-
.inject_irq = vt_inject_irq,
970-
.inject_nmi = vt_inject_nmi,
971-
.inject_exception = vt_inject_exception,
972-
.cancel_injection = vt_cancel_injection,
973-
.interrupt_allowed = vt_interrupt_allowed,
974-
.nmi_allowed = vt_nmi_allowed,
975-
.get_nmi_mask = vt_get_nmi_mask,
976-
.set_nmi_mask = vt_set_nmi_mask,
977-
.enable_nmi_window = vt_enable_nmi_window,
978-
.enable_irq_window = vt_enable_irq_window,
979-
.update_cr8_intercept = vt_update_cr8_intercept,
964+
.set_interrupt_shadow = vt_op(set_interrupt_shadow),
965+
.get_interrupt_shadow = vt_op(get_interrupt_shadow),
966+
.patch_hypercall = vt_op(patch_hypercall),
967+
.inject_irq = vt_op(inject_irq),
968+
.inject_nmi = vt_op(inject_nmi),
969+
.inject_exception = vt_op(inject_exception),
970+
.cancel_injection = vt_op(cancel_injection),
971+
.interrupt_allowed = vt_op(interrupt_allowed),
972+
.nmi_allowed = vt_op(nmi_allowed),
973+
.get_nmi_mask = vt_op(get_nmi_mask),
974+
.set_nmi_mask = vt_op(set_nmi_mask),
975+
.enable_nmi_window = vt_op(enable_nmi_window),
976+
.enable_irq_window = vt_op(enable_irq_window),
977+
.update_cr8_intercept = vt_op(update_cr8_intercept),
980978

981979
.x2apic_icr_is_split = false,
982-
.set_virtual_apic_mode = vt_set_virtual_apic_mode,
983-
.set_apic_access_page_addr = vt_set_apic_access_page_addr,
984-
.refresh_apicv_exec_ctrl = vt_refresh_apicv_exec_ctrl,
985-
.load_eoi_exitmap = vt_load_eoi_exitmap,
986-
.apicv_pre_state_restore = vt_apicv_pre_state_restore,
980+
.set_virtual_apic_mode = vt_op(set_virtual_apic_mode),
981+
.set_apic_access_page_addr = vt_op(set_apic_access_page_addr),
982+
.refresh_apicv_exec_ctrl = vt_op(refresh_apicv_exec_ctrl),
983+
.load_eoi_exitmap = vt_op(load_eoi_exitmap),
984+
.apicv_pre_state_restore = pi_apicv_pre_state_restore,
987985
.required_apicv_inhibits = VMX_REQUIRED_APICV_INHIBITS,
988-
.hwapic_isr_update = vt_hwapic_isr_update,
989-
.sync_pir_to_irr = vt_sync_pir_to_irr,
990-
.deliver_interrupt = vt_deliver_interrupt,
986+
.hwapic_isr_update = vt_op(hwapic_isr_update),
987+
.sync_pir_to_irr = vt_op(sync_pir_to_irr),
988+
.deliver_interrupt = vt_op(deliver_interrupt),
991989
.dy_apicv_has_pending_interrupt = pi_has_pending_interrupt,
992990

993-
.set_tss_addr = vt_set_tss_addr,
994-
.set_identity_map_addr = vt_set_identity_map_addr,
991+
.set_tss_addr = vt_op(set_tss_addr),
992+
.set_identity_map_addr = vt_op(set_identity_map_addr),
995993
.get_mt_mask = vmx_get_mt_mask,
996994

997-
.get_exit_info = vt_get_exit_info,
998-
.get_entry_info = vt_get_entry_info,
995+
.get_exit_info = vt_op(get_exit_info),
996+
.get_entry_info = vt_op(get_entry_info),
999997

1000-
.vcpu_after_set_cpuid = vt_vcpu_after_set_cpuid,
998+
.vcpu_after_set_cpuid = vt_op(vcpu_after_set_cpuid),
1001999

10021000
.has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
10031001

1004-
.get_l2_tsc_offset = vt_get_l2_tsc_offset,
1005-
.get_l2_tsc_multiplier = vt_get_l2_tsc_multiplier,
1006-
.write_tsc_offset = vt_write_tsc_offset,
1007-
.write_tsc_multiplier = vt_write_tsc_multiplier,
1002+
.get_l2_tsc_offset = vt_op(get_l2_tsc_offset),
1003+
.get_l2_tsc_multiplier = vt_op(get_l2_tsc_multiplier),
1004+
.write_tsc_offset = vt_op(write_tsc_offset),
1005+
.write_tsc_multiplier = vt_op(write_tsc_multiplier),
10081006

1009-
.load_mmu_pgd = vt_load_mmu_pgd,
1007+
.load_mmu_pgd = vt_op(load_mmu_pgd),
10101008

10111009
.check_intercept = vmx_check_intercept,
10121010
.handle_exit_irqoff = vmx_handle_exit_irqoff,
10131011

1014-
.update_cpu_dirty_logging = vt_update_cpu_dirty_logging,
1012+
.update_cpu_dirty_logging = vt_op(update_cpu_dirty_logging),
10151013

10161014
.nested_ops = &vmx_nested_ops,
10171015

10181016
.pi_update_irte = vmx_pi_update_irte,
10191017
.pi_start_assignment = vmx_pi_start_assignment,
10201018

10211019
#ifdef CONFIG_X86_64
1022-
.set_hv_timer = vt_set_hv_timer,
1023-
.cancel_hv_timer = vt_cancel_hv_timer,
1020+
.set_hv_timer = vt_op(set_hv_timer),
1021+
.cancel_hv_timer = vt_op(cancel_hv_timer),
10241022
#endif
10251023

1026-
.setup_mce = vt_setup_mce,
1024+
.setup_mce = vt_op(setup_mce),
10271025

10281026
#ifdef CONFIG_KVM_SMM
1029-
.smi_allowed = vt_smi_allowed,
1030-
.enter_smm = vt_enter_smm,
1031-
.leave_smm = vt_leave_smm,
1032-
.enable_smi_window = vt_enable_smi_window,
1027+
.smi_allowed = vt_op(smi_allowed),
1028+
.enter_smm = vt_op(enter_smm),
1029+
.leave_smm = vt_op(leave_smm),
1030+
.enable_smi_window = vt_op(enable_smi_window),
10331031
#endif
10341032

1035-
.check_emulate_instruction = vt_check_emulate_instruction,
1036-
.apic_init_signal_blocked = vt_apic_init_signal_blocked,
1033+
.check_emulate_instruction = vt_op(check_emulate_instruction),
1034+
.apic_init_signal_blocked = vt_op(apic_init_signal_blocked),
10371035
.migrate_timers = vmx_migrate_timers,
10381036

1039-
.msr_filter_changed = vt_msr_filter_changed,
1040-
.complete_emulated_msr = vt_complete_emulated_msr,
1037+
.msr_filter_changed = vt_op(msr_filter_changed),
1038+
.complete_emulated_msr = vt_op(complete_emulated_msr),
10411039

10421040
.vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,
10431041

10441042
.get_untagged_addr = vmx_get_untagged_addr,
10451043

1046-
.mem_enc_ioctl = vt_mem_enc_ioctl,
1047-
.vcpu_mem_enc_ioctl = vt_vcpu_mem_enc_ioctl,
1044+
.mem_enc_ioctl = vt_op_tdx_only(mem_enc_ioctl),
1045+
.vcpu_mem_enc_ioctl = vt_op_tdx_only(vcpu_mem_enc_ioctl),
10481046

1049-
.private_max_mapping_level = vt_gmem_private_max_mapping_level
1047+
.private_max_mapping_level = vt_op_tdx_only(gmem_private_max_mapping_level)
10501048
};
10511049

10521050
struct kvm_x86_init_ops vt_init_ops __initdata = {
1053-
.hardware_setup = vt_hardware_setup,
1051+
.hardware_setup = vt_op(hardware_setup),
10541052
.handle_intel_pt_intr = NULL,
10551053

10561054
.runtime_ops = &vt_x86_ops,

arch/x86/kvm/vmx/nested.c

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -824,12 +824,30 @@ static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu,
824824
return 0;
825825
}
826826

827+
static u32 nested_vmx_max_atomic_switch_msrs(struct kvm_vcpu *vcpu)
828+
{
829+
struct vcpu_vmx *vmx = to_vmx(vcpu);
830+
u64 vmx_misc = vmx_control_msr(vmx->nested.msrs.misc_low,
831+
vmx->nested.msrs.misc_high);
832+
833+
return (vmx_misc_max_msr(vmx_misc) + 1) * VMX_MISC_MSR_LIST_MULTIPLIER;
834+
}
835+
827836
static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu,
828837
u32 count, u64 addr)
829838
{
830839
if (count == 0)
831840
return 0;
832841

842+
/*
843+
* Exceeding the limit results in architecturally _undefined_ behavior,
844+
* i.e. KVM is allowed to do literally anything in response to a bad
845+
* limit. Immediately generate a consistency check so that code that
846+
* consumes the count doesn't need to worry about extreme edge cases.
847+
*/
848+
if (count > nested_vmx_max_atomic_switch_msrs(vcpu))
849+
return -EINVAL;
850+
833851
if (!kvm_vcpu_is_legal_aligned_gpa(vcpu, addr, 16) ||
834852
!kvm_vcpu_is_legal_gpa(vcpu, (addr + count * sizeof(struct vmx_msr_entry) - 1)))
835853
return -EINVAL;
@@ -940,15 +958,6 @@ static int nested_vmx_store_msr_check(struct kvm_vcpu *vcpu,
940958
return 0;
941959
}
942960

943-
static u32 nested_vmx_max_atomic_switch_msrs(struct kvm_vcpu *vcpu)
944-
{
945-
struct vcpu_vmx *vmx = to_vmx(vcpu);
946-
u64 vmx_misc = vmx_control_msr(vmx->nested.msrs.misc_low,
947-
vmx->nested.msrs.misc_high);
948-
949-
return (vmx_misc_max_msr(vmx_misc) + 1) * VMX_MISC_MSR_LIST_MULTIPLIER;
950-
}
951-
952961
/*
953962
* Load guest's/host's msr at nested entry/exit.
954963
* return 0 for success, entry index for failure.
@@ -965,7 +974,7 @@ static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
965974
u32 max_msr_list_size = nested_vmx_max_atomic_switch_msrs(vcpu);
966975

967976
for (i = 0; i < count; i++) {
968-
if (unlikely(i >= max_msr_list_size))
977+
if (WARN_ON_ONCE(i >= max_msr_list_size))
969978
goto fail;
970979

971980
if (kvm_vcpu_read_guest(vcpu, gpa + i * sizeof(e),
@@ -1053,7 +1062,7 @@ static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
10531062
u32 max_msr_list_size = nested_vmx_max_atomic_switch_msrs(vcpu);
10541063

10551064
for (i = 0; i < count; i++) {
1056-
if (unlikely(i >= max_msr_list_size))
1065+
if (WARN_ON_ONCE(i >= max_msr_list_size))
10571066
return -EINVAL;
10581067

10591068
if (!read_and_check_msr_entry(vcpu, gpa, i, &e))

arch/x86/kvm/vmx/posted_intr.c

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ static DEFINE_PER_CPU(raw_spinlock_t, wakeup_vcpus_on_cpu_lock);
3434

3535
#define PI_LOCK_SCHED_OUT SINGLE_DEPTH_NESTING
3636

37-
struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
37+
static struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu)
3838
{
3939
return &(to_vt(vcpu)->pi_desc);
4040
}
@@ -263,6 +263,14 @@ void __init pi_init_cpu(int cpu)
263263
raw_spin_lock_init(&per_cpu(wakeup_vcpus_on_cpu_lock, cpu));
264264
}
265265

266+
void pi_apicv_pre_state_restore(struct kvm_vcpu *vcpu)
267+
{
268+
struct pi_desc *pi = vcpu_to_pi_desc(vcpu);
269+
270+
pi_clear_on(pi);
271+
memset(pi->pir, 0, sizeof(pi->pir));
272+
}
273+
266274
bool pi_has_pending_interrupt(struct kvm_vcpu *vcpu)
267275
{
268276
struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);

0 commit comments

Comments
 (0)