Skip to content

Commit 7d41e24

Browse files
committed
Merge tag 'kvm-x86-misc-6.10' of https://github.com/kvm-x86/linux into HEAD
KVM x86 misc changes for 6.10: - Advertise the max mappable GPA in the "guest MAXPHYADDR" CPUID field, which is unused by hardware, so that KVM can communicate its inability to map GPAs that set bits 51:48 due to lack of 5-level paging. Guest firmware is expected to use the information to safely remap BARs in the uppermost GPA space, i.e to avoid placing a BAR at a legal, but unmappable, GPA. - Use vfree() instead of kvfree() for allocations that always use vcalloc() or __vcalloc(). - Don't completely ignore same-value writes to immutable feature MSRs, as doing so results in KVM failing to reject accesses to MSR that aren't supposed to exist given the vCPU model and/or KVM configuration. - Don't mark APICv as being inhibited due to ABSENT if APICv is disabled KVM-wide to avoid confusing debuggers (KVM will never bother clearing the ABSENT inhibit, even if userspace enables in-kernel local APIC).
2 parents 5a1c72e + 51937f2 commit 7d41e24

File tree

7 files changed

+53
-31
lines changed

7 files changed

+53
-31
lines changed

arch/x86/kvm/cpuid.c

Lines changed: 31 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1232,26 +1232,47 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function)
12321232
entry->eax = entry->ebx = entry->ecx = 0;
12331233
break;
12341234
case 0x80000008: {
1235-
unsigned g_phys_as = (entry->eax >> 16) & 0xff;
1236-
unsigned virt_as = max((entry->eax >> 8) & 0xff, 48U);
1237-
unsigned phys_as = entry->eax & 0xff;
1235+
/*
1236+
* GuestPhysAddrSize (EAX[23:16]) is intended for software
1237+
* use.
1238+
*
1239+
* KVM's ABI is to report the effective MAXPHYADDR for the
1240+
* guest in PhysAddrSize (phys_as), and the maximum
1241+
* *addressable* GPA in GuestPhysAddrSize (g_phys_as).
1242+
*
1243+
* GuestPhysAddrSize is valid if and only if TDP is enabled,
1244+
* in which case the max GPA that can be addressed by KVM may
1245+
* be less than the max GPA that can be legally generated by
1246+
* the guest, e.g. if MAXPHYADDR>48 but the CPU doesn't
1247+
* support 5-level TDP.
1248+
*/
1249+
unsigned int virt_as = max((entry->eax >> 8) & 0xff, 48U);
1250+
unsigned int phys_as, g_phys_as;
12381251

12391252
/*
12401253
* If TDP (NPT) is disabled use the adjusted host MAXPHYADDR as
12411254
* the guest operates in the same PA space as the host, i.e.
12421255
* reductions in MAXPHYADDR for memory encryption affect shadow
12431256
* paging, too.
12441257
*
1245-
* If TDP is enabled but an explicit guest MAXPHYADDR is not
1246-
* provided, use the raw bare metal MAXPHYADDR as reductions to
1247-
* the HPAs do not affect GPAs.
1258+
* If TDP is enabled, use the raw bare metal MAXPHYADDR as
1259+
* reductions to the HPAs do not affect GPAs. The max
1260+
* addressable GPA is the same as the max effective GPA, except
1261+
* that it's capped at 48 bits if 5-level TDP isn't supported
1262+
* (hardware processes bits 51:48 only when walking the fifth
1263+
* level page table).
12481264
*/
1249-
if (!tdp_enabled)
1250-
g_phys_as = boot_cpu_data.x86_phys_bits;
1251-
else if (!g_phys_as)
1265+
if (!tdp_enabled) {
1266+
phys_as = boot_cpu_data.x86_phys_bits;
1267+
g_phys_as = 0;
1268+
} else {
1269+
phys_as = entry->eax & 0xff;
12521270
g_phys_as = phys_as;
1271+
if (kvm_mmu_get_max_tdp_level() < 5)
1272+
g_phys_as = min(g_phys_as, 48);
1273+
}
12531274

1254-
entry->eax = g_phys_as | (virt_as << 8);
1275+
entry->eax = phys_as | (virt_as << 8) | (g_phys_as << 16);
12551276
entry->ecx &= ~(GENMASK(31, 16) | GENMASK(11, 8));
12561277
entry->edx = 0;
12571278
cpuid_entry_override(entry, CPUID_8000_0008_EBX);

arch/x86/kvm/mmu.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,8 @@ static inline u8 kvm_get_shadow_phys_bits(void)
100100
return boot_cpu_data.x86_phys_bits;
101101
}
102102

103+
u8 kvm_mmu_get_max_tdp_level(void);
104+
103105
void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask);
104106
void kvm_mmu_set_me_spte_mask(u64 me_value, u64 me_mask);
105107
void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only);

arch/x86/kvm/mmu/mmu.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5316,6 +5316,11 @@ static inline int kvm_mmu_get_tdp_level(struct kvm_vcpu *vcpu)
53165316
return max_tdp_level;
53175317
}
53185318

5319+
u8 kvm_mmu_get_max_tdp_level(void)
5320+
{
5321+
return tdp_root_level ? tdp_root_level : max_tdp_level;
5322+
}
5323+
53195324
static union kvm_mmu_page_role
53205325
kvm_calc_tdp_mmu_root_page_role(struct kvm_vcpu *vcpu,
53215326
union kvm_cpu_role cpu_role)

arch/x86/kvm/mmu/page_track.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ bool kvm_page_track_write_tracking_enabled(struct kvm *kvm)
4141

4242
void kvm_page_track_free_memslot(struct kvm_memory_slot *slot)
4343
{
44-
kvfree(slot->arch.gfn_write_track);
44+
vfree(slot->arch.gfn_write_track);
4545
slot->arch.gfn_write_track = NULL;
4646
}
4747

arch/x86/kvm/trace.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1074,7 +1074,7 @@ TRACE_EVENT(kvm_smm_transition,
10741074
);
10751075

10761076
/*
1077-
* Tracepoint for VT-d posted-interrupts.
1077+
* Tracepoint for VT-d posted-interrupts and AMD-Vi Guest Virtual APIC.
10781078
*/
10791079
TRACE_EVENT(kvm_pi_irte_update,
10801080
TP_PROTO(unsigned int host_irq, unsigned int vcpu_id,
@@ -1100,7 +1100,7 @@ TRACE_EVENT(kvm_pi_irte_update,
11001100
__entry->set = set;
11011101
),
11021102

1103-
TP_printk("VT-d PI is %s for irq %u, vcpu %u, gsi: 0x%x, "
1103+
TP_printk("PI is %s for irq %u, vcpu %u, gsi: 0x%x, "
11041104
"gvec: 0x%x, pi_desc_addr: 0x%llx",
11051105
__entry->set ? "enabled and being updated" : "disabled",
11061106
__entry->host_irq,

arch/x86/kvm/x86.c

Lines changed: 11 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2233,16 +2233,13 @@ static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
22332233
/*
22342234
* Disallow writes to immutable feature MSRs after KVM_RUN. KVM does
22352235
* not support modifying the guest vCPU model on the fly, e.g. changing
2236-
* the nVMX capabilities while L2 is running is nonsensical. Ignore
2236+
* the nVMX capabilities while L2 is running is nonsensical. Allow
22372237
* writes of the same value, e.g. to allow userspace to blindly stuff
22382238
* all MSRs when emulating RESET.
22392239
*/
2240-
if (kvm_vcpu_has_run(vcpu) && kvm_is_immutable_feature_msr(index)) {
2241-
if (do_get_msr(vcpu, index, &val) || *data != val)
2242-
return -EINVAL;
2243-
2244-
return 0;
2245-
}
2240+
if (kvm_vcpu_has_run(vcpu) && kvm_is_immutable_feature_msr(index) &&
2241+
(do_get_msr(vcpu, index, &val) || *data != val))
2242+
return -EINVAL;
22462243

22472244
return kvm_set_msr_ignored_check(vcpu, index, *data, true);
22482245
}
@@ -10031,15 +10028,12 @@ static void set_or_clear_apicv_inhibit(unsigned long *inhibits,
1003110028

1003210029
static void kvm_apicv_init(struct kvm *kvm)
1003310030
{
10034-
unsigned long *inhibits = &kvm->arch.apicv_inhibit_reasons;
10031+
enum kvm_apicv_inhibit reason = enable_apicv ? APICV_INHIBIT_REASON_ABSENT :
10032+
APICV_INHIBIT_REASON_DISABLE;
1003510033

10036-
init_rwsem(&kvm->arch.apicv_update_lock);
10037-
10038-
set_or_clear_apicv_inhibit(inhibits, APICV_INHIBIT_REASON_ABSENT, true);
10034+
set_or_clear_apicv_inhibit(&kvm->arch.apicv_inhibit_reasons, reason, true);
1003910035

10040-
if (!enable_apicv)
10041-
set_or_clear_apicv_inhibit(inhibits,
10042-
APICV_INHIBIT_REASON_DISABLE, true);
10036+
init_rwsem(&kvm->arch.apicv_update_lock);
1004310037
}
1004410038

1004510039
static void kvm_sched_yield(struct kvm_vcpu *vcpu, unsigned long dest_id)
@@ -12805,7 +12799,7 @@ static void memslot_rmap_free(struct kvm_memory_slot *slot)
1280512799
int i;
1280612800

1280712801
for (i = 0; i < KVM_NR_PAGE_SIZES; ++i) {
12808-
kvfree(slot->arch.rmap[i]);
12802+
vfree(slot->arch.rmap[i]);
1280912803
slot->arch.rmap[i] = NULL;
1281012804
}
1281112805
}
@@ -12817,7 +12811,7 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot)
1281712811
memslot_rmap_free(slot);
1281812812

1281912813
for (i = 1; i < KVM_NR_PAGE_SIZES; ++i) {
12820-
kvfree(slot->arch.lpage_info[i - 1]);
12814+
vfree(slot->arch.lpage_info[i - 1]);
1282112815
slot->arch.lpage_info[i - 1] = NULL;
1282212816
}
1282312817

@@ -12909,7 +12903,7 @@ static int kvm_alloc_memslot_metadata(struct kvm *kvm,
1290912903
memslot_rmap_free(slot);
1291012904

1291112905
for (i = 1; i < KVM_NR_PAGE_SIZES; ++i) {
12912-
kvfree(slot->arch.lpage_info[i - 1]);
12906+
vfree(slot->arch.lpage_info[i - 1]);
1291312907
slot->arch.lpage_info[i - 1] = NULL;
1291412908
}
1291512909
return -ENOMEM;

virt/kvm/kvm_main.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -974,7 +974,7 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot)
974974
if (!memslot->dirty_bitmap)
975975
return;
976976

977-
kvfree(memslot->dirty_bitmap);
977+
vfree(memslot->dirty_bitmap);
978978
memslot->dirty_bitmap = NULL;
979979
}
980980

0 commit comments

Comments
 (0)