Skip to content

Commit 8bbe0de

Browse files
committed
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull more KVM updates from Paolo Bonzini: "x86 KVM changes: - The usual accuracy improvements for nested virtualization - The usual round of code cleanups from Sean - Added back optimizations that were prematurely removed in 5.2 (the bare minimum needed to fix the regression was in 5.3-rc8, here comes the rest) - Support for UMWAIT/UMONITOR/TPAUSE - Direct L2->L0 TLB flushing when L0 is Hyper-V and L1 is KVM - Tell Windows guests if SMT is disabled on the host - More accurate detection of vmexit cost - Revert a pvqspinlock pessimization" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (56 commits) KVM: nVMX: cleanup and fix host 64-bit mode checks KVM: vmx: fix build warnings in hv_enable_direct_tlbflush() on i386 KVM: x86: Don't check kvm_rebooting in __kvm_handle_fault_on_reboot() KVM: x86: Drop ____kvm_handle_fault_on_reboot() KVM: VMX: Add error handling to VMREAD helper KVM: VMX: Optimize VMX instruction error and fault handling KVM: x86: Check kvm_rebooting in kvm_spurious_fault() KVM: selftests: fix ucall on x86 Revert "locking/pvqspinlock: Don't wait if vCPU is preempted" kvm: nvmx: limit atomic switch MSRs kvm: svm: Intercept RDPRU kvm: x86: Add "significant index" flag to a few CPUID leaves KVM: x86/mmu: Skip invalid pages during zapping iff root_count is zero KVM: x86/mmu: Explicitly track only a single invalid mmu generation KVM: x86/mmu: Revert "KVM: x86/mmu: Remove is_obsolete() call" KVM: x86/mmu: Revert "Revert "KVM: MMU: reclaim the zapped-obsolete page first"" KVM: x86/mmu: Revert "Revert "KVM: MMU: collapse TLB flushes when zap all pages"" KVM: x86/mmu: Revert "Revert "KVM: MMU: zap pages in batch"" KVM: x86/mmu: Revert "Revert "KVM: MMU: add tracepoint for kvm_mmu_invalidate_all_pages"" KVM: x86/mmu: Revert "Revert "KVM: MMU: show mmu_valid_gen in shadow page related tracepoints"" ...
2 parents e37e3bc + fd3edd4 commit 8bbe0de

File tree

36 files changed

+906
-468
lines changed

36 files changed

+906
-468
lines changed

Documentation/virt/kvm/api.txt

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5309,3 +5309,16 @@ Architectures: x86
53095309
This capability indicates that KVM supports paravirtualized Hyper-V IPI send
53105310
hypercalls:
53115311
HvCallSendSyntheticClusterIpi, HvCallSendSyntheticClusterIpiEx.
5312+
8.21 KVM_CAP_HYPERV_DIRECT_TLBFLUSH
5313+
5314+
Architecture: x86
5315+
5316+
This capability indicates that KVM running on top of Hyper-V hypervisor
5317+
enables Direct TLB flush for its guests meaning that TLB flush
5318+
hypercalls are handled by Level 0 hypervisor (Hyper-V) bypassing KVM.
5319+
Due to the different ABI for hypercall parameters between Hyper-V and
5320+
KVM, enabling this capability effectively disables all hypercall
5321+
handling by KVM (as some KVM hypercall may be mistakenly treated as TLB
5322+
flush hypercalls by Hyper-V) so userspace should disable KVM identification
5323+
in CPUID and only exposes Hyper-V identification. In this case, guest
5324+
thinks it's running on Hyper-V and only use Hyper-V hypercalls.

arch/x86/include/asm/hyperv-tlfs.h

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,15 @@
180180
/* Recommend using enlightened VMCS */
181181
#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED BIT(14)
182182

183+
/*
184+
* Virtual processor will never share a physical core with another virtual
185+
* processor, except for virtual processors that are reported as sibling SMT
186+
* threads.
187+
*/
188+
#define HV_X64_NO_NONARCH_CORESHARING BIT(18)
189+
183190
/* Nested features. These are HYPERV_CPUID_NESTED_FEATURES.EAX bits. */
191+
#define HV_X64_NESTED_DIRECT_FLUSH BIT(17)
184192
#define HV_X64_NESTED_GUEST_MAPPING_FLUSH BIT(18)
185193
#define HV_X64_NESTED_MSR_BITMAP BIT(19)
186194

@@ -524,14 +532,24 @@ struct hv_timer_message_payload {
524532
__u64 delivery_time; /* When the message was delivered */
525533
} __packed;
526534

535+
struct hv_nested_enlightenments_control {
536+
struct {
537+
__u32 directhypercall:1;
538+
__u32 reserved:31;
539+
} features;
540+
struct {
541+
__u32 reserved;
542+
} hypercallControls;
543+
} __packed;
544+
527545
/* Define virtual processor assist page structure. */
528546
struct hv_vp_assist_page {
529547
__u32 apic_assist;
530-
__u32 reserved;
531-
__u64 vtl_control[2];
532-
__u64 nested_enlightenments_control[2];
533-
__u32 enlighten_vmentry;
534-
__u32 padding;
548+
__u32 reserved1;
549+
__u64 vtl_control[3];
550+
struct hv_nested_enlightenments_control nested_control;
551+
__u8 enlighten_vmentry;
552+
__u8 reserved2[7];
535553
__u64 current_nested_vmcs;
536554
} __packed;
537555

@@ -882,4 +900,7 @@ struct hv_tlb_flush_ex {
882900
u64 gva_list[];
883901
} __packed;
884902

903+
struct hv_partition_assist_pg {
904+
u32 tlb_lock_count;
905+
};
885906
#endif

arch/x86/include/asm/kvm_host.h

Lines changed: 41 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -320,6 +320,7 @@ struct kvm_mmu_page {
320320
struct list_head link;
321321
struct hlist_node hash_link;
322322
bool unsync;
323+
u8 mmu_valid_gen;
323324
bool mmio_cached;
324325

325326
/*
@@ -335,7 +336,6 @@ struct kvm_mmu_page {
335336
int root_count; /* Currently serving as active root */
336337
unsigned int unsync_children;
337338
struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */
338-
unsigned long mmu_valid_gen;
339339
DECLARE_BITMAP(unsync_child_bitmap, 512);
340340

341341
#ifdef CONFIG_X86_32
@@ -844,6 +844,8 @@ struct kvm_hv {
844844

845845
/* How many vCPUs have VP index != vCPU index */
846846
atomic_t num_mismatched_vp_indexes;
847+
848+
struct hv_partition_assist_pg *hv_pa_pg;
847849
};
848850

849851
enum kvm_irqchip_mode {
@@ -857,12 +859,13 @@ struct kvm_arch {
857859
unsigned long n_requested_mmu_pages;
858860
unsigned long n_max_mmu_pages;
859861
unsigned int indirect_shadow_pages;
860-
unsigned long mmu_valid_gen;
862+
u8 mmu_valid_gen;
861863
struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
862864
/*
863865
* Hash table of struct kvm_mmu_page.
864866
*/
865867
struct list_head active_mmu_pages;
868+
struct list_head zapped_obsolete_pages;
866869
struct kvm_page_track_notifier_node mmu_sp_tracker;
867870
struct kvm_page_track_notifier_head track_notifier_head;
868871

@@ -1213,6 +1216,7 @@ struct kvm_x86_ops {
12131216
bool (*need_emulation_on_page_fault)(struct kvm_vcpu *vcpu);
12141217

12151218
bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu);
1219+
int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu);
12161220
};
12171221

12181222
struct kvm_arch_async_pf {
@@ -1312,18 +1316,42 @@ extern u64 kvm_default_tsc_scaling_ratio;
13121316

13131317
extern u64 kvm_mce_cap_supported;
13141318

1315-
enum emulation_result {
1316-
EMULATE_DONE, /* no further processing */
1317-
EMULATE_USER_EXIT, /* kvm_run ready for userspace exit */
1318-
EMULATE_FAIL, /* can't emulate this instruction */
1319-
};
1320-
1319+
/*
1320+
* EMULTYPE_NO_DECODE - Set when re-emulating an instruction (after completing
1321+
* userspace I/O) to indicate that the emulation context
1322+
* should be resued as is, i.e. skip initialization of
1323+
* emulation context, instruction fetch and decode.
1324+
*
1325+
* EMULTYPE_TRAP_UD - Set when emulating an intercepted #UD from hardware.
1326+
* Indicates that only select instructions (tagged with
1327+
* EmulateOnUD) should be emulated (to minimize the emulator
1328+
* attack surface). See also EMULTYPE_TRAP_UD_FORCED.
1329+
*
1330+
* EMULTYPE_SKIP - Set when emulating solely to skip an instruction, i.e. to
1331+
* decode the instruction length. For use *only* by
1332+
* kvm_x86_ops->skip_emulated_instruction() implementations.
1333+
*
1334+
* EMULTYPE_ALLOW_RETRY - Set when the emulator should resume the guest to
1335+
* retry native execution under certain conditions.
1336+
*
1337+
* EMULTYPE_TRAP_UD_FORCED - Set when emulating an intercepted #UD that was
1338+
* triggered by KVM's magic "force emulation" prefix,
1339+
* which is opt in via module param (off by default).
1340+
* Bypasses EmulateOnUD restriction despite emulating
1341+
* due to an intercepted #UD (see EMULTYPE_TRAP_UD).
1342+
* Used to test the full emulator from userspace.
1343+
*
1344+
* EMULTYPE_VMWARE_GP - Set when emulating an intercepted #GP for VMware
1345+
* backdoor emulation, which is opt in via module param.
1346+
* VMware backoor emulation handles select instructions
1347+
* and reinjects the #GP for all other cases.
1348+
*/
13211349
#define EMULTYPE_NO_DECODE (1 << 0)
13221350
#define EMULTYPE_TRAP_UD (1 << 1)
13231351
#define EMULTYPE_SKIP (1 << 2)
13241352
#define EMULTYPE_ALLOW_RETRY (1 << 3)
1325-
#define EMULTYPE_NO_UD_ON_FAIL (1 << 4)
1326-
#define EMULTYPE_VMWARE (1 << 5)
1353+
#define EMULTYPE_TRAP_UD_FORCED (1 << 4)
1354+
#define EMULTYPE_VMWARE_GP (1 << 5)
13271355
int kvm_emulate_instruction(struct kvm_vcpu *vcpu, int emulation_type);
13281356
int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
13291357
void *insn, int insn_len);
@@ -1506,32 +1534,22 @@ enum {
15061534
#define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0)
15071535
#define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm)
15081536

1509-
asmlinkage void __noreturn kvm_spurious_fault(void);
1537+
asmlinkage void kvm_spurious_fault(void);
15101538

15111539
/*
15121540
* Hardware virtualization extension instructions may fault if a
15131541
* reboot turns off virtualization while processes are running.
15141542
* Usually after catching the fault we just panic; during reboot
15151543
* instead the instruction is ignored.
15161544
*/
1517-
#define ____kvm_handle_fault_on_reboot(insn, cleanup_insn) \
1545+
#define __kvm_handle_fault_on_reboot(insn) \
15181546
"666: \n\t" \
15191547
insn "\n\t" \
15201548
"jmp 668f \n\t" \
15211549
"667: \n\t" \
15221550
"call kvm_spurious_fault \n\t" \
15231551
"668: \n\t" \
1524-
".pushsection .fixup, \"ax\" \n\t" \
1525-
"700: \n\t" \
1526-
cleanup_insn "\n\t" \
1527-
"cmpb $0, kvm_rebooting\n\t" \
1528-
"je 667b \n\t" \
1529-
"jmp 668b \n\t" \
1530-
".popsection \n\t" \
1531-
_ASM_EXTABLE(666b, 700b)
1532-
1533-
#define __kvm_handle_fault_on_reboot(insn) \
1534-
____kvm_handle_fault_on_reboot(insn, "")
1552+
_ASM_EXTABLE(666b, 667b)
15351553

15361554
#define KVM_ARCH_WANT_MMU_NOTIFIER
15371555
int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end);

arch/x86/include/asm/svm.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ enum {
5252
INTERCEPT_MWAIT,
5353
INTERCEPT_MWAIT_COND,
5454
INTERCEPT_XSETBV,
55+
INTERCEPT_RDPRU,
5556
};
5657

5758

arch/x86/include/asm/vmx.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969
#define SECONDARY_EXEC_PT_USE_GPA 0x01000000
7070
#define SECONDARY_EXEC_MODE_BASED_EPT_EXEC 0x00400000
7171
#define SECONDARY_EXEC_TSC_SCALING 0x02000000
72+
#define SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE 0x04000000
7273

7374
#define PIN_BASED_EXT_INTR_MASK 0x00000001
7475
#define PIN_BASED_NMI_EXITING 0x00000008
@@ -110,6 +111,7 @@
110111
#define VMX_MISC_SAVE_EFER_LMA 0x00000020
111112
#define VMX_MISC_ACTIVITY_HLT 0x00000040
112113
#define VMX_MISC_ZERO_LEN_INS 0x40000000
114+
#define VMX_MISC_MSR_LIST_MULTIPLIER 512
113115

114116
/* VMFUNC functions */
115117
#define VMX_VMFUNC_EPTP_SWITCHING 0x00000001

arch/x86/include/uapi/asm/svm.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@
7575
#define SVM_EXIT_MWAIT 0x08b
7676
#define SVM_EXIT_MWAIT_COND 0x08c
7777
#define SVM_EXIT_XSETBV 0x08d
78+
#define SVM_EXIT_RDPRU 0x08e
7879
#define SVM_EXIT_NPF 0x400
7980
#define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401
8081
#define SVM_EXIT_AVIC_UNACCELERATED_ACCESS 0x402

arch/x86/include/uapi/asm/vmx.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,8 @@
8686
#define EXIT_REASON_PML_FULL 62
8787
#define EXIT_REASON_XSAVES 63
8888
#define EXIT_REASON_XRSTORS 64
89+
#define EXIT_REASON_UMWAIT 67
90+
#define EXIT_REASON_TPAUSE 68
8991

9092
#define VMX_EXIT_REASONS \
9193
{ EXIT_REASON_EXCEPTION_NMI, "EXCEPTION_NMI" }, \
@@ -144,7 +146,9 @@
144146
{ EXIT_REASON_RDSEED, "RDSEED" }, \
145147
{ EXIT_REASON_PML_FULL, "PML_FULL" }, \
146148
{ EXIT_REASON_XSAVES, "XSAVES" }, \
147-
{ EXIT_REASON_XRSTORS, "XRSTORS" }
149+
{ EXIT_REASON_XRSTORS, "XRSTORS" }, \
150+
{ EXIT_REASON_UMWAIT, "UMWAIT" }, \
151+
{ EXIT_REASON_TPAUSE, "TPAUSE" }
148152

149153
#define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1
150154
#define VMX_ABORT_LOAD_HOST_PDPTE_FAIL 2

arch/x86/kernel/cpu/umwait.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,12 @@
1717
*/
1818
static u32 umwait_control_cached = UMWAIT_CTRL_VAL(100000, UMWAIT_C02_ENABLE);
1919

20+
u32 get_umwait_control_msr(void)
21+
{
22+
return umwait_control_cached;
23+
}
24+
EXPORT_SYMBOL_GPL(get_umwait_control_msr);
25+
2026
/*
2127
* Cache the original IA32_UMWAIT_CONTROL MSR value which is configured by
2228
* hardware or BIOS before kernel boot.

arch/x86/kvm/cpuid.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -304,7 +304,13 @@ static void do_host_cpuid(struct kvm_cpuid_entry2 *entry, u32 function,
304304
case 7:
305305
case 0xb:
306306
case 0xd:
307+
case 0xf:
308+
case 0x10:
309+
case 0x12:
307310
case 0x14:
311+
case 0x17:
312+
case 0x18:
313+
case 0x1f:
308314
case 0x8000001d:
309315
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
310316
break;
@@ -360,7 +366,7 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index)
360366
F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ |
361367
F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
362368
F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
363-
F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B);
369+
F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/;
364370

365371
/* cpuid 7.0.edx*/
366372
const u32 kvm_cpuid_7_0_edx_x86_features =

arch/x86/kvm/hyperv.c

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "ioapic.h"
2424
#include "hyperv.h"
2525

26+
#include <linux/cpu.h>
2627
#include <linux/kvm_host.h>
2728
#include <linux/highmem.h>
2829
#include <linux/sched/cputime.h>
@@ -645,7 +646,9 @@ static int stimer_notify_direct(struct kvm_vcpu_hv_stimer *stimer)
645646
.vector = stimer->config.apic_vector
646647
};
647648

648-
return !kvm_apic_set_irq(vcpu, &irq, NULL);
649+
if (lapic_in_kernel(vcpu))
650+
return !kvm_apic_set_irq(vcpu, &irq, NULL);
651+
return 0;
649652
}
650653

651654
static void stimer_expiration(struct kvm_vcpu_hv_stimer *stimer)
@@ -1852,7 +1855,13 @@ int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
18521855

18531856
ent->edx |= HV_FEATURE_FREQUENCY_MSRS_AVAILABLE;
18541857
ent->edx |= HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE;
1855-
ent->edx |= HV_STIMER_DIRECT_MODE_AVAILABLE;
1858+
1859+
/*
1860+
* Direct Synthetic timers only make sense with in-kernel
1861+
* LAPIC
1862+
*/
1863+
if (lapic_in_kernel(vcpu))
1864+
ent->edx |= HV_STIMER_DIRECT_MODE_AVAILABLE;
18561865

18571866
break;
18581867

@@ -1864,7 +1873,8 @@ int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
18641873
ent->eax |= HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED;
18651874
if (evmcs_ver)
18661875
ent->eax |= HV_X64_ENLIGHTENED_VMCS_RECOMMENDED;
1867-
1876+
if (!cpu_smt_possible())
1877+
ent->eax |= HV_X64_NO_NONARCH_CORESHARING;
18681878
/*
18691879
* Default number of spinlock retry attempts, matches
18701880
* HyperV 2016.

0 commit comments

Comments
 (0)