Skip to content

Commit 7b1b868

Browse files
committed
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm fixes from Paolo Bonzini: "Bugfixes for ARM, x86 and tools" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: tools/kvm_stat: Exempt time-based counters KVM: mmu: Fix SPTE encoding of MMIO generation upper half kvm: x86/mmu: Use cpuid to determine max gfn kvm: svm: de-allocate svm_cpu_data for all cpus in svm_cpu_uninit() selftests: kvm/set_memory_region_test: Fix race in move region test KVM: arm64: Add usage of stage 2 fault lookup level in user_mem_abort() KVM: arm64: Fix handling of merging tables into a block entry KVM: arm64: Fix memory leak on stage2 update of a valid PTE
2 parents b53966f + 111d0bd commit 7b1b868

File tree

11 files changed

+74
-22
lines changed

11 files changed

+74
-22
lines changed

Documentation/virt/kvm/mmu.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -455,7 +455,7 @@ If the generation number of the spte does not equal the global generation
455455
number, it will ignore the cached MMIO information and handle the page
456456
fault through the slow path.
457457

458-
Since only 19 bits are used to store generation-number on mmio spte, all
458+
Since only 18 bits are used to store generation-number on mmio spte, all
459459
pages are zapped when there is an overflow.
460460

461461
Unfortunately, a single memory access might access kvm_memslots(kvm) multiple

arch/arm64/include/asm/esr.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@
104104
/* Shared ISS fault status code(IFSC/DFSC) for Data/Instruction aborts */
105105
#define ESR_ELx_FSC (0x3F)
106106
#define ESR_ELx_FSC_TYPE (0x3C)
107+
#define ESR_ELx_FSC_LEVEL (0x03)
107108
#define ESR_ELx_FSC_EXTABT (0x10)
108109
#define ESR_ELx_FSC_SERROR (0x11)
109110
#define ESR_ELx_FSC_ACCESS (0x08)

arch/arm64/include/asm/kvm_emulate.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,11 @@ static __always_inline u8 kvm_vcpu_trap_get_fault_type(const struct kvm_vcpu *vc
350350
return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC_TYPE;
351351
}
352352

353+
static __always_inline u8 kvm_vcpu_trap_get_fault_level(const struct kvm_vcpu *vcpu)
354+
{
355+
return kvm_vcpu_get_esr(vcpu) & ESR_ELx_FSC_LEVEL;
356+
}
357+
353358
static __always_inline bool kvm_vcpu_abt_issea(const struct kvm_vcpu *vcpu)
354359
{
355360
switch (kvm_vcpu_trap_get_fault(vcpu)) {

arch/arm64/kvm/hyp/pgtable.c

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -470,6 +470,15 @@ static bool stage2_map_walker_try_leaf(u64 addr, u64 end, u32 level,
470470
if (!kvm_block_mapping_supported(addr, end, phys, level))
471471
return false;
472472

473+
/*
474+
* If the PTE was already valid, drop the refcount on the table
475+
* early, as it will be bumped-up again in stage2_map_walk_leaf().
476+
* This ensures that the refcount stays constant across a valid to
477+
* valid PTE update.
478+
*/
479+
if (kvm_pte_valid(*ptep))
480+
put_page(virt_to_page(ptep));
481+
473482
if (kvm_set_valid_leaf_pte(ptep, phys, data->attr, level))
474483
goto out;
475484

@@ -493,7 +502,13 @@ static int stage2_map_walk_table_pre(u64 addr, u64 end, u32 level,
493502
return 0;
494503

495504
kvm_set_invalid_pte(ptep);
496-
kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, data->mmu, addr, 0);
505+
506+
/*
507+
* Invalidate the whole stage-2, as we may have numerous leaf
508+
* entries below us which would otherwise need invalidating
509+
* individually.
510+
*/
511+
kvm_call_hyp(__kvm_tlb_flush_vmid, data->mmu);
497512
data->anchor = ptep;
498513
return 0;
499514
}

arch/arm64/kvm/mmu.c

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -754,10 +754,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
754754
gfn_t gfn;
755755
kvm_pfn_t pfn;
756756
bool logging_active = memslot_is_logging(memslot);
757-
unsigned long vma_pagesize;
757+
unsigned long fault_level = kvm_vcpu_trap_get_fault_level(vcpu);
758+
unsigned long vma_pagesize, fault_granule;
758759
enum kvm_pgtable_prot prot = KVM_PGTABLE_PROT_R;
759760
struct kvm_pgtable *pgt;
760761

762+
fault_granule = 1UL << ARM64_HW_PGTABLE_LEVEL_SHIFT(fault_level);
761763
write_fault = kvm_is_write_fault(vcpu);
762764
exec_fault = kvm_vcpu_trap_is_exec_fault(vcpu);
763765
VM_BUG_ON(write_fault && exec_fault);
@@ -896,7 +898,12 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
896898
else if (cpus_have_const_cap(ARM64_HAS_CACHE_DIC))
897899
prot |= KVM_PGTABLE_PROT_X;
898900

899-
if (fault_status == FSC_PERM && !(logging_active && writable)) {
901+
/*
902+
* Under the premise of getting a FSC_PERM fault, we just need to relax
903+
* permissions only if vma_pagesize equals fault_granule. Otherwise,
904+
* kvm_pgtable_stage2_map() should be called to change block size.
905+
*/
906+
if (fault_status == FSC_PERM && vma_pagesize == fault_granule) {
900907
ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot);
901908
} else {
902909
ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize,

arch/x86/kvm/mmu/spte.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,8 @@ static u64 generation_mmio_spte_mask(u64 gen)
4040
WARN_ON(gen & ~MMIO_SPTE_GEN_MASK);
4141
BUILD_BUG_ON((MMIO_SPTE_GEN_HIGH_MASK | MMIO_SPTE_GEN_LOW_MASK) & SPTE_SPECIAL_MASK);
4242

43-
mask = (gen << MMIO_SPTE_GEN_LOW_START) & MMIO_SPTE_GEN_LOW_MASK;
44-
mask |= (gen << MMIO_SPTE_GEN_HIGH_START) & MMIO_SPTE_GEN_HIGH_MASK;
43+
mask = (gen << MMIO_SPTE_GEN_LOW_SHIFT) & MMIO_SPTE_GEN_LOW_MASK;
44+
mask |= (gen << MMIO_SPTE_GEN_HIGH_SHIFT) & MMIO_SPTE_GEN_HIGH_MASK;
4545
return mask;
4646
}
4747

arch/x86/kvm/mmu/spte.h

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -56,11 +56,11 @@
5656
#define SPTE_MMU_WRITEABLE (1ULL << (PT_FIRST_AVAIL_BITS_SHIFT + 1))
5757

5858
/*
59-
* Due to limited space in PTEs, the MMIO generation is a 19 bit subset of
59+
* Due to limited space in PTEs, the MMIO generation is a 18 bit subset of
6060
* the memslots generation and is derived as follows:
6161
*
6262
* Bits 0-8 of the MMIO generation are propagated to spte bits 3-11
63-
* Bits 9-18 of the MMIO generation are propagated to spte bits 52-61
63+
* Bits 9-17 of the MMIO generation are propagated to spte bits 54-62
6464
*
6565
* The KVM_MEMSLOT_GEN_UPDATE_IN_PROGRESS flag is intentionally not included in
6666
* the MMIO generation number, as doing so would require stealing a bit from
@@ -69,18 +69,29 @@
6969
* requires a full MMU zap). The flag is instead explicitly queried when
7070
* checking for MMIO spte cache hits.
7171
*/
72-
#define MMIO_SPTE_GEN_MASK GENMASK_ULL(17, 0)
7372

7473
#define MMIO_SPTE_GEN_LOW_START 3
7574
#define MMIO_SPTE_GEN_LOW_END 11
76-
#define MMIO_SPTE_GEN_LOW_MASK GENMASK_ULL(MMIO_SPTE_GEN_LOW_END, \
77-
MMIO_SPTE_GEN_LOW_START)
7875

7976
#define MMIO_SPTE_GEN_HIGH_START PT64_SECOND_AVAIL_BITS_SHIFT
8077
#define MMIO_SPTE_GEN_HIGH_END 62
78+
79+
#define MMIO_SPTE_GEN_LOW_MASK GENMASK_ULL(MMIO_SPTE_GEN_LOW_END, \
80+
MMIO_SPTE_GEN_LOW_START)
8181
#define MMIO_SPTE_GEN_HIGH_MASK GENMASK_ULL(MMIO_SPTE_GEN_HIGH_END, \
8282
MMIO_SPTE_GEN_HIGH_START)
8383

84+
#define MMIO_SPTE_GEN_LOW_BITS (MMIO_SPTE_GEN_LOW_END - MMIO_SPTE_GEN_LOW_START + 1)
85+
#define MMIO_SPTE_GEN_HIGH_BITS (MMIO_SPTE_GEN_HIGH_END - MMIO_SPTE_GEN_HIGH_START + 1)
86+
87+
/* remember to adjust the comment above as well if you change these */
88+
static_assert(MMIO_SPTE_GEN_LOW_BITS == 9 && MMIO_SPTE_GEN_HIGH_BITS == 9);
89+
90+
#define MMIO_SPTE_GEN_LOW_SHIFT (MMIO_SPTE_GEN_LOW_START - 0)
91+
#define MMIO_SPTE_GEN_HIGH_SHIFT (MMIO_SPTE_GEN_HIGH_START - MMIO_SPTE_GEN_LOW_BITS)
92+
93+
#define MMIO_SPTE_GEN_MASK GENMASK_ULL(MMIO_SPTE_GEN_LOW_BITS + MMIO_SPTE_GEN_HIGH_BITS - 1, 0)
94+
8495
extern u64 __read_mostly shadow_nx_mask;
8596
extern u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */
8697
extern u64 __read_mostly shadow_user_mask;
@@ -228,8 +239,8 @@ static inline u64 get_mmio_spte_generation(u64 spte)
228239
{
229240
u64 gen;
230241

231-
gen = (spte & MMIO_SPTE_GEN_LOW_MASK) >> MMIO_SPTE_GEN_LOW_START;
232-
gen |= (spte & MMIO_SPTE_GEN_HIGH_MASK) >> MMIO_SPTE_GEN_HIGH_START;
242+
gen = (spte & MMIO_SPTE_GEN_LOW_MASK) >> MMIO_SPTE_GEN_LOW_SHIFT;
243+
gen |= (spte & MMIO_SPTE_GEN_HIGH_MASK) >> MMIO_SPTE_GEN_HIGH_SHIFT;
233244
return gen;
234245
}
235246

arch/x86/kvm/mmu/tdp_mmu.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
6666

6767
void kvm_tdp_mmu_free_root(struct kvm *kvm, struct kvm_mmu_page *root)
6868
{
69-
gfn_t max_gfn = 1ULL << (boot_cpu_data.x86_phys_bits - PAGE_SHIFT);
69+
gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT);
7070

7171
lockdep_assert_held(&kvm->mmu_lock);
7272

@@ -456,7 +456,7 @@ bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end)
456456

457457
void kvm_tdp_mmu_zap_all(struct kvm *kvm)
458458
{
459-
gfn_t max_gfn = 1ULL << (boot_cpu_data.x86_phys_bits - PAGE_SHIFT);
459+
gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT);
460460
bool flush;
461461

462462
flush = kvm_tdp_mmu_zap_gfn_range(kvm, 0, max_gfn);

arch/x86/kvm/svm/svm.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -530,12 +530,12 @@ static int svm_hardware_enable(void)
530530

531531
static void svm_cpu_uninit(int cpu)
532532
{
533-
struct svm_cpu_data *sd = per_cpu(svm_data, raw_smp_processor_id());
533+
struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
534534

535535
if (!sd)
536536
return;
537537

538-
per_cpu(svm_data, raw_smp_processor_id()) = NULL;
538+
per_cpu(svm_data, cpu) = NULL;
539539
kfree(sd->sev_vmcbs);
540540
__free_page(sd->save_area);
541541
kfree(sd);

tools/kvm/kvm_stat/kvm_stat

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -742,7 +742,11 @@ class DebugfsProvider(Provider):
742742
The fields are all available KVM debugfs files
743743
744744
"""
745-
return self.walkdir(PATH_DEBUGFS_KVM)[2]
745+
exempt_list = ['halt_poll_fail_ns', 'halt_poll_success_ns']
746+
fields = [field for field in self.walkdir(PATH_DEBUGFS_KVM)[2]
747+
if field not in exempt_list]
748+
749+
return fields
746750

747751
def update_fields(self, fields_filter):
748752
"""Refresh fields, applying fields_filter"""

0 commit comments

Comments
 (0)