Skip to content

Commit b145b0e

Browse files
committed
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM fixes from Paolo Bonzini: "ARM and x86 bugfixes of all kinds. The most visible one is that migrating a nested hypervisor has always been busted on Broadwell and newer processors, and that has finally been fixed" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (22 commits) KVM: x86: omit "impossible" pmu MSRs from MSR list KVM: nVMX: Fix consistency check on injected exception error code KVM: x86: omit absent pmu MSRs from MSR list selftests: kvm: Fix libkvm build error kvm: vmx: Limit guest PMCs to those supported on the host kvm: x86, powerpc: do not allow clearing largepages debugfs entry KVM: selftests: x86: clarify what is reported on KVM_GET_MSRS failure KVM: VMX: Set VMENTER_L1D_FLUSH_NOT_REQUIRED if !X86_BUG_L1TF selftests: kvm: add test for dirty logging inside nested guests KVM: x86: fix nested guest live migration with PML KVM: x86: assign two bits to track SPTE kinds KVM: x86: Expose XSAVEERPTR to the guest kvm: x86: Enumerate support for CLZERO instruction kvm: x86: Use AMD CPUID semantics for AMD vCPUs kvm: x86: Improve emulation of CPUID leaves 0BH and 1FH KVM: X86: Fix userspace set invalid CR4 kvm: x86: Fix a spurious -E2BIG in __do_cpuid_func KVM: LAPIC: Loosen filter for adaptive tuning of lapic_timer_advance_ns KVM: arm/arm64: vgic: Use the appropriate TRACE_INCLUDE_PATH arm64: KVM: Kill hyp_alternate_select() ...
2 parents 50dfd03 + cf05a67 commit b145b0e

File tree

23 files changed

+584
-182
lines changed

23 files changed

+584
-182
lines changed

arch/arm64/include/asm/kvm_hyp.h

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -47,30 +47,6 @@
4747
#define read_sysreg_el2(r) read_sysreg_elx(r, _EL2, _EL1)
4848
#define write_sysreg_el2(v,r) write_sysreg_elx(v, r, _EL2, _EL1)
4949

50-
/**
51-
* hyp_alternate_select - Generates patchable code sequences that are
52-
* used to switch between two implementations of a function, depending
53-
* on the availability of a feature.
54-
*
55-
* @fname: a symbol name that will be defined as a function returning a
56-
* function pointer whose type will match @orig and @alt
57-
* @orig: A pointer to the default function, as returned by @fname when
58-
* @cond doesn't hold
59-
* @alt: A pointer to the alternate function, as returned by @fname
60-
* when @cond holds
61-
* @cond: a CPU feature (as described in asm/cpufeature.h)
62-
*/
63-
#define hyp_alternate_select(fname, orig, alt, cond) \
64-
typeof(orig) * __hyp_text fname(void) \
65-
{ \
66-
typeof(alt) *val = orig; \
67-
asm volatile(ALTERNATIVE("nop \n", \
68-
"mov %0, %1 \n", \
69-
cond) \
70-
: "+r" (val) : "r" (alt)); \
71-
return val; \
72-
}
73-
7450
int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu);
7551

7652
void __vgic_v3_save_state(struct kvm_vcpu *vcpu);

arch/arm64/kvm/hyp/switch.c

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -229,20 +229,6 @@ static void __hyp_text __hyp_vgic_restore_state(struct kvm_vcpu *vcpu)
229229
}
230230
}
231231

232-
static bool __hyp_text __true_value(void)
233-
{
234-
return true;
235-
}
236-
237-
static bool __hyp_text __false_value(void)
238-
{
239-
return false;
240-
}
241-
242-
static hyp_alternate_select(__check_arm_834220,
243-
__false_value, __true_value,
244-
ARM64_WORKAROUND_834220);
245-
246232
static bool __hyp_text __translate_far_to_hpfar(u64 far, u64 *hpfar)
247233
{
248234
u64 par, tmp;
@@ -298,7 +284,8 @@ static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu)
298284
* resolve the IPA using the AT instruction.
299285
*/
300286
if (!(esr & ESR_ELx_S1PTW) &&
301-
(__check_arm_834220()() || (esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) {
287+
(cpus_have_const_cap(ARM64_WORKAROUND_834220) ||
288+
(esr & ESR_ELx_FSC_TYPE) == FSC_PERM)) {
302289
if (!__translate_far_to_hpfar(far, &hpfar))
303290
return false;
304291
} else {

arch/arm64/kvm/hyp/tlb.c

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -67,10 +67,14 @@ static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm,
6767
isb();
6868
}
6969

70-
static hyp_alternate_select(__tlb_switch_to_guest,
71-
__tlb_switch_to_guest_nvhe,
72-
__tlb_switch_to_guest_vhe,
73-
ARM64_HAS_VIRT_HOST_EXTN);
70+
static void __hyp_text __tlb_switch_to_guest(struct kvm *kvm,
71+
struct tlb_inv_context *cxt)
72+
{
73+
if (has_vhe())
74+
__tlb_switch_to_guest_vhe(kvm, cxt);
75+
else
76+
__tlb_switch_to_guest_nvhe(kvm, cxt);
77+
}
7478

7579
static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm,
7680
struct tlb_inv_context *cxt)
@@ -98,10 +102,14 @@ static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm *kvm,
98102
write_sysreg(0, vttbr_el2);
99103
}
100104

101-
static hyp_alternate_select(__tlb_switch_to_host,
102-
__tlb_switch_to_host_nvhe,
103-
__tlb_switch_to_host_vhe,
104-
ARM64_HAS_VIRT_HOST_EXTN);
105+
static void __hyp_text __tlb_switch_to_host(struct kvm *kvm,
106+
struct tlb_inv_context *cxt)
107+
{
108+
if (has_vhe())
109+
__tlb_switch_to_host_vhe(kvm, cxt);
110+
else
111+
__tlb_switch_to_host_nvhe(kvm, cxt);
112+
}
105113

106114
void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
107115
{
@@ -111,7 +119,7 @@ void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
111119

112120
/* Switch to requested VMID */
113121
kvm = kern_hyp_va(kvm);
114-
__tlb_switch_to_guest()(kvm, &cxt);
122+
__tlb_switch_to_guest(kvm, &cxt);
115123

116124
/*
117125
* We could do so much better if we had the VA as well.
@@ -154,7 +162,7 @@ void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa)
154162
if (!has_vhe() && icache_is_vpipt())
155163
__flush_icache_all();
156164

157-
__tlb_switch_to_host()(kvm, &cxt);
165+
__tlb_switch_to_host(kvm, &cxt);
158166
}
159167

160168
void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm)
@@ -165,13 +173,13 @@ void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm)
165173

166174
/* Switch to requested VMID */
167175
kvm = kern_hyp_va(kvm);
168-
__tlb_switch_to_guest()(kvm, &cxt);
176+
__tlb_switch_to_guest(kvm, &cxt);
169177

170178
__tlbi(vmalls12e1is);
171179
dsb(ish);
172180
isb();
173181

174-
__tlb_switch_to_host()(kvm, &cxt);
182+
__tlb_switch_to_host(kvm, &cxt);
175183
}
176184

177185
void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu)
@@ -180,13 +188,13 @@ void __hyp_text __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu)
180188
struct tlb_inv_context cxt;
181189

182190
/* Switch to requested VMID */
183-
__tlb_switch_to_guest()(kvm, &cxt);
191+
__tlb_switch_to_guest(kvm, &cxt);
184192

185193
__tlbi(vmalle1);
186194
dsb(nsh);
187195
isb();
188196

189-
__tlb_switch_to_host()(kvm, &cxt);
197+
__tlb_switch_to_host(kvm, &cxt);
190198
}
191199

192200
void __hyp_text __kvm_flush_vm_context(void)

arch/powerpc/kvm/book3s.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,8 @@
3636
#include "book3s.h"
3737
#include "trace.h"
3838

39-
#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM
40-
#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
39+
#define VM_STAT(x, ...) offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__
40+
#define VCPU_STAT(x, ...) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__
4141

4242
/* #define EXIT_DEBUG */
4343

@@ -69,8 +69,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
6969
{ "pthru_all", VCPU_STAT(pthru_all) },
7070
{ "pthru_host", VCPU_STAT(pthru_host) },
7171
{ "pthru_bad_aff", VCPU_STAT(pthru_bad_aff) },
72-
{ "largepages_2M", VM_STAT(num_2M_pages) },
73-
{ "largepages_1G", VM_STAT(num_1G_pages) },
72+
{ "largepages_2M", VM_STAT(num_2M_pages, .mode = 0444) },
73+
{ "largepages_1G", VM_STAT(num_1G_pages, .mode = 0444) },
7474
{ NULL }
7575
};
7676

arch/x86/include/asm/kvm_host.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -219,13 +219,6 @@ enum {
219219
PFERR_WRITE_MASK | \
220220
PFERR_PRESENT_MASK)
221221

222-
/*
223-
* The mask used to denote special SPTEs, which can be either MMIO SPTEs or
224-
* Access Tracking SPTEs. We use bit 62 instead of bit 63 to avoid conflicting
225-
* with the SVE bit in EPT PTEs.
226-
*/
227-
#define SPTE_SPECIAL_MASK (1ULL << 62)
228-
229222
/* apic attention bits */
230223
#define KVM_APIC_CHECK_VAPIC 0
231224
/*

arch/x86/kvm/cpuid.c

Lines changed: 60 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -485,6 +485,7 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
485485

486486
/* cpuid 0x80000008.ebx */
487487
const u32 kvm_cpuid_8000_0008_ebx_x86_features =
488+
F(CLZERO) | F(XSAVEERPTR) |
488489
F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) |
489490
F(AMD_SSB_NO) | F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON);
490491

@@ -618,16 +619,20 @@ static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
618619
*/
619620
case 0x1f:
620621
case 0xb: {
621-
int i, level_type;
622+
int i;
622623

623-
/* read more entries until level_type is zero */
624-
for (i = 1; ; ++i) {
624+
/*
625+
* We filled in entry[0] for CPUID(EAX=<function>,
626+
* ECX=00H) above. If its level type (ECX[15:8]) is
627+
* zero, then the leaf is unimplemented, and we're
628+
* done. Otherwise, continue to populate entries
629+
* until the level type (ECX[15:8]) of the previously
630+
* added entry is zero.
631+
*/
632+
for (i = 1; entry[i - 1].ecx & 0xff00; ++i) {
625633
if (*nent >= maxnent)
626634
goto out;
627635

628-
level_type = entry[i - 1].ecx & 0xff00;
629-
if (!level_type)
630-
break;
631636
do_host_cpuid(&entry[i], function, i);
632637
++*nent;
633638
}
@@ -969,53 +974,66 @@ struct kvm_cpuid_entry2 *kvm_find_cpuid_entry(struct kvm_vcpu *vcpu,
969974
EXPORT_SYMBOL_GPL(kvm_find_cpuid_entry);
970975

971976
/*
972-
* If no match is found, check whether we exceed the vCPU's limit
973-
* and return the content of the highest valid _standard_ leaf instead.
974-
* This is to satisfy the CPUID specification.
977+
* If the basic or extended CPUID leaf requested is higher than the
978+
* maximum supported basic or extended leaf, respectively, then it is
979+
* out of range.
975980
*/
976-
static struct kvm_cpuid_entry2* check_cpuid_limit(struct kvm_vcpu *vcpu,
977-
u32 function, u32 index)
981+
static bool cpuid_function_in_range(struct kvm_vcpu *vcpu, u32 function)
978982
{
979-
struct kvm_cpuid_entry2 *maxlevel;
980-
981-
maxlevel = kvm_find_cpuid_entry(vcpu, function & 0x80000000, 0);
982-
if (!maxlevel || maxlevel->eax >= function)
983-
return NULL;
984-
if (function & 0x80000000) {
985-
maxlevel = kvm_find_cpuid_entry(vcpu, 0, 0);
986-
if (!maxlevel)
987-
return NULL;
988-
}
989-
return kvm_find_cpuid_entry(vcpu, maxlevel->eax, index);
983+
struct kvm_cpuid_entry2 *max;
984+
985+
max = kvm_find_cpuid_entry(vcpu, function & 0x80000000, 0);
986+
return max && function <= max->eax;
990987
}
991988

992989
bool kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx,
993990
u32 *ecx, u32 *edx, bool check_limit)
994991
{
995992
u32 function = *eax, index = *ecx;
996-
struct kvm_cpuid_entry2 *best;
997-
bool entry_found = true;
998-
999-
best = kvm_find_cpuid_entry(vcpu, function, index);
1000-
1001-
if (!best) {
1002-
entry_found = false;
1003-
if (!check_limit)
1004-
goto out;
993+
struct kvm_cpuid_entry2 *entry;
994+
struct kvm_cpuid_entry2 *max;
995+
bool found;
1005996

1006-
best = check_cpuid_limit(vcpu, function, index);
997+
entry = kvm_find_cpuid_entry(vcpu, function, index);
998+
found = entry;
999+
/*
1000+
* Intel CPUID semantics treats any query for an out-of-range
1001+
* leaf as if the highest basic leaf (i.e. CPUID.0H:EAX) were
1002+
* requested. AMD CPUID semantics returns all zeroes for any
1003+
* undefined leaf, whether or not the leaf is in range.
1004+
*/
1005+
if (!entry && check_limit && !guest_cpuid_is_amd(vcpu) &&
1006+
!cpuid_function_in_range(vcpu, function)) {
1007+
max = kvm_find_cpuid_entry(vcpu, 0, 0);
1008+
if (max) {
1009+
function = max->eax;
1010+
entry = kvm_find_cpuid_entry(vcpu, function, index);
1011+
}
10071012
}
1008-
1009-
out:
1010-
if (best) {
1011-
*eax = best->eax;
1012-
*ebx = best->ebx;
1013-
*ecx = best->ecx;
1014-
*edx = best->edx;
1015-
} else
1013+
if (entry) {
1014+
*eax = entry->eax;
1015+
*ebx = entry->ebx;
1016+
*ecx = entry->ecx;
1017+
*edx = entry->edx;
1018+
} else {
10161019
*eax = *ebx = *ecx = *edx = 0;
1017-
trace_kvm_cpuid(function, *eax, *ebx, *ecx, *edx, entry_found);
1018-
return entry_found;
1020+
/*
1021+
* When leaf 0BH or 1FH is defined, CL is pass-through
1022+
* and EDX is always the x2APIC ID, even for undefined
1023+
* subleaves. Index 1 will exist iff the leaf is
1024+
* implemented, so we pass through CL iff leaf 1
1025+
* exists. EDX can be copied from any existing index.
1026+
*/
1027+
if (function == 0xb || function == 0x1f) {
1028+
entry = kvm_find_cpuid_entry(vcpu, function, 1);
1029+
if (entry) {
1030+
*ecx = index & 0xff;
1031+
*edx = entry->edx;
1032+
}
1033+
}
1034+
}
1035+
trace_kvm_cpuid(function, *eax, *ebx, *ecx, *edx, found);
1036+
return found;
10191037
}
10201038
EXPORT_SYMBOL_GPL(kvm_cpuid);
10211039

arch/x86/kvm/lapic.c

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -66,9 +66,10 @@
6666
#define X2APIC_BROADCAST 0xFFFFFFFFul
6767

6868
static bool lapic_timer_advance_dynamic __read_mostly;
69-
#define LAPIC_TIMER_ADVANCE_ADJUST_MIN 100
70-
#define LAPIC_TIMER_ADVANCE_ADJUST_MAX 5000
71-
#define LAPIC_TIMER_ADVANCE_ADJUST_INIT 1000
69+
#define LAPIC_TIMER_ADVANCE_ADJUST_MIN 100 /* clock cycles */
70+
#define LAPIC_TIMER_ADVANCE_ADJUST_MAX 10000 /* clock cycles */
71+
#define LAPIC_TIMER_ADVANCE_NS_INIT 1000
72+
#define LAPIC_TIMER_ADVANCE_NS_MAX 5000
7273
/* step-by-step approximation to mitigate fluctuation */
7374
#define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8
7475

@@ -1504,8 +1505,8 @@ static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu,
15041505
timer_advance_ns += ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP;
15051506
}
15061507

1507-
if (unlikely(timer_advance_ns > LAPIC_TIMER_ADVANCE_ADJUST_MAX))
1508-
timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT;
1508+
if (unlikely(timer_advance_ns > LAPIC_TIMER_ADVANCE_NS_MAX))
1509+
timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;
15091510
apic->lapic_timer.timer_advance_ns = timer_advance_ns;
15101511
}
15111512

@@ -2302,7 +2303,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
23022303
HRTIMER_MODE_ABS_HARD);
23032304
apic->lapic_timer.timer.function = apic_timer_fn;
23042305
if (timer_advance_ns == -1) {
2305-
apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT;
2306+
apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;
23062307
lapic_timer_advance_dynamic = true;
23072308
} else {
23082309
apic->lapic_timer.timer_advance_ns = timer_advance_ns;

0 commit comments

Comments
 (0)