Skip to content

Commit fce886a

Browse files
Quentin PerretMarc Zyngier
authored andcommitted
KVM: arm64: Plumb the pKVM MMU in KVM
Introduce the KVM_PGT_CALL() helper macro to allow switching from the traditional pgtable code to the pKVM version easily in mmu.c. The cost of this 'indirection' is expected to be very minimal due to is_protected_kvm_enabled() being backed by a static key. With this, everything is in place to allow the delegation of non-protected guest stage-2 page-tables to pKVM, so let's stop using the host's kvm_s2_mmu from EL2 and enjoy the ride. Tested-by: Fuad Tabba <[email protected]> Reviewed-by: Fuad Tabba <[email protected]> Signed-off-by: Quentin Perret <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Marc Zyngier <[email protected]>
1 parent e912efe commit fce886a

File tree

4 files changed

+82
-32
lines changed

4 files changed

+82
-32
lines changed

arch/arm64/include/asm/kvm_mmu.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,22 @@ static inline bool kvm_is_nested_s2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
353353
return &kvm->arch.mmu != mmu;
354354
}
355355

356+
static inline void kvm_fault_lock(struct kvm *kvm)
357+
{
358+
if (is_protected_kvm_enabled())
359+
write_lock(&kvm->mmu_lock);
360+
else
361+
read_lock(&kvm->mmu_lock);
362+
}
363+
364+
static inline void kvm_fault_unlock(struct kvm *kvm)
365+
{
366+
if (is_protected_kvm_enabled())
367+
write_unlock(&kvm->mmu_lock);
368+
else
369+
read_unlock(&kvm->mmu_lock);
370+
}
371+
356372
#ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS
357373
void kvm_s2_ptdump_create_debugfs(struct kvm *kvm);
358374
#else

arch/arm64/kvm/arm.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -502,7 +502,10 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
502502

503503
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
504504
{
505-
kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
505+
if (!is_protected_kvm_enabled())
506+
kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
507+
else
508+
free_hyp_memcache(&vcpu->arch.pkvm_memcache);
506509
kvm_timer_vcpu_terminate(vcpu);
507510
kvm_pmu_vcpu_destroy(vcpu);
508511
kvm_vgic_vcpu_destroy(vcpu);
@@ -574,6 +577,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
574577
struct kvm_s2_mmu *mmu;
575578
int *last_ran;
576579

580+
if (is_protected_kvm_enabled())
581+
goto nommu;
582+
577583
if (vcpu_has_nv(vcpu))
578584
kvm_vcpu_load_hw_mmu(vcpu);
579585

@@ -594,6 +600,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
594600
*last_ran = vcpu->vcpu_idx;
595601
}
596602

603+
nommu:
597604
vcpu->cpu = cpu;
598605

599606
kvm_vgic_load(vcpu);

arch/arm64/kvm/hyp/nvhe/hyp-main.c

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -103,8 +103,6 @@ static void flush_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu)
103103
/* Limit guest vector length to the maximum supported by the host. */
104104
hyp_vcpu->vcpu.arch.sve_max_vl = min(host_vcpu->arch.sve_max_vl, kvm_host_sve_max_vl);
105105

106-
hyp_vcpu->vcpu.arch.hw_mmu = host_vcpu->arch.hw_mmu;
107-
108106
hyp_vcpu->vcpu.arch.mdcr_el2 = host_vcpu->arch.mdcr_el2;
109107
hyp_vcpu->vcpu.arch.hcr_el2 &= ~(HCR_TWI | HCR_TWE);
110108
hyp_vcpu->vcpu.arch.hcr_el2 |= READ_ONCE(host_vcpu->arch.hcr_el2) &

arch/arm64/kvm/mmu.c

Lines changed: 58 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <asm/kvm_arm.h>
1616
#include <asm/kvm_mmu.h>
1717
#include <asm/kvm_pgtable.h>
18+
#include <asm/kvm_pkvm.h>
1819
#include <asm/kvm_ras.h>
1920
#include <asm/kvm_asm.h>
2021
#include <asm/kvm_emulate.h>
@@ -31,6 +32,8 @@ static phys_addr_t __ro_after_init hyp_idmap_vector;
3132

3233
static unsigned long __ro_after_init io_map_base;
3334

35+
#define KVM_PGT_FN(fn) (!is_protected_kvm_enabled() ? fn : p ## fn)
36+
3437
static phys_addr_t __stage2_range_addr_end(phys_addr_t addr, phys_addr_t end,
3538
phys_addr_t size)
3639
{
@@ -147,7 +150,7 @@ static int kvm_mmu_split_huge_pages(struct kvm *kvm, phys_addr_t addr,
147150
return -EINVAL;
148151

149152
next = __stage2_range_addr_end(addr, end, chunk_size);
150-
ret = kvm_pgtable_stage2_split(pgt, addr, next - addr, cache);
153+
ret = KVM_PGT_FN(kvm_pgtable_stage2_split)(pgt, addr, next - addr, cache);
151154
if (ret)
152155
break;
153156
} while (addr = next, addr != end);
@@ -168,15 +171,23 @@ static bool memslot_is_logging(struct kvm_memory_slot *memslot)
168171
*/
169172
int kvm_arch_flush_remote_tlbs(struct kvm *kvm)
170173
{
171-
kvm_call_hyp(__kvm_tlb_flush_vmid, &kvm->arch.mmu);
174+
if (is_protected_kvm_enabled())
175+
kvm_call_hyp_nvhe(__pkvm_tlb_flush_vmid, kvm->arch.pkvm.handle);
176+
else
177+
kvm_call_hyp(__kvm_tlb_flush_vmid, &kvm->arch.mmu);
172178
return 0;
173179
}
174180

175181
int kvm_arch_flush_remote_tlbs_range(struct kvm *kvm,
176182
gfn_t gfn, u64 nr_pages)
177183
{
178-
kvm_tlb_flush_vmid_range(&kvm->arch.mmu,
179-
gfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT);
184+
u64 size = nr_pages << PAGE_SHIFT;
185+
u64 addr = gfn << PAGE_SHIFT;
186+
187+
if (is_protected_kvm_enabled())
188+
kvm_call_hyp_nvhe(__pkvm_tlb_flush_vmid, kvm->arch.pkvm.handle);
189+
else
190+
kvm_tlb_flush_vmid_range(&kvm->arch.mmu, addr, size);
180191
return 0;
181192
}
182193

@@ -225,7 +236,7 @@ static void stage2_free_unlinked_table_rcu_cb(struct rcu_head *head)
225236
void *pgtable = page_to_virt(page);
226237
s8 level = page_private(page);
227238

228-
kvm_pgtable_stage2_free_unlinked(&kvm_s2_mm_ops, pgtable, level);
239+
KVM_PGT_FN(kvm_pgtable_stage2_free_unlinked)(&kvm_s2_mm_ops, pgtable, level);
229240
}
230241

231242
static void stage2_free_unlinked_table(void *addr, s8 level)
@@ -324,7 +335,7 @@ static void __unmap_stage2_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64
324335

325336
lockdep_assert_held_write(&kvm->mmu_lock);
326337
WARN_ON(size & ~PAGE_MASK);
327-
WARN_ON(stage2_apply_range(mmu, start, end, kvm_pgtable_stage2_unmap,
338+
WARN_ON(stage2_apply_range(mmu, start, end, KVM_PGT_FN(kvm_pgtable_stage2_unmap),
328339
may_block));
329340
}
330341

@@ -336,7 +347,7 @@ void kvm_stage2_unmap_range(struct kvm_s2_mmu *mmu, phys_addr_t start,
336347

337348
void kvm_stage2_flush_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end)
338349
{
339-
stage2_apply_range_resched(mmu, addr, end, kvm_pgtable_stage2_flush);
350+
stage2_apply_range_resched(mmu, addr, end, KVM_PGT_FN(kvm_pgtable_stage2_flush));
340351
}
341352

342353
static void stage2_flush_memslot(struct kvm *kvm,
@@ -942,10 +953,14 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t
942953
return -ENOMEM;
943954

944955
mmu->arch = &kvm->arch;
945-
err = kvm_pgtable_stage2_init(pgt, mmu, &kvm_s2_mm_ops);
956+
err = KVM_PGT_FN(kvm_pgtable_stage2_init)(pgt, mmu, &kvm_s2_mm_ops);
946957
if (err)
947958
goto out_free_pgtable;
948959

960+
mmu->pgt = pgt;
961+
if (is_protected_kvm_enabled())
962+
return 0;
963+
949964
mmu->last_vcpu_ran = alloc_percpu(typeof(*mmu->last_vcpu_ran));
950965
if (!mmu->last_vcpu_ran) {
951966
err = -ENOMEM;
@@ -959,7 +974,6 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t
959974
mmu->split_page_chunk_size = KVM_ARM_EAGER_SPLIT_CHUNK_SIZE_DEFAULT;
960975
mmu->split_page_cache.gfp_zero = __GFP_ZERO;
961976

962-
mmu->pgt = pgt;
963977
mmu->pgd_phys = __pa(pgt->pgd);
964978

965979
if (kvm_is_nested_s2_mmu(kvm, mmu))
@@ -968,7 +982,7 @@ int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long t
968982
return 0;
969983

970984
out_destroy_pgtable:
971-
kvm_pgtable_stage2_destroy(pgt);
985+
KVM_PGT_FN(kvm_pgtable_stage2_destroy)(pgt);
972986
out_free_pgtable:
973987
kfree(pgt);
974988
return err;
@@ -1065,7 +1079,7 @@ void kvm_free_stage2_pgd(struct kvm_s2_mmu *mmu)
10651079
write_unlock(&kvm->mmu_lock);
10661080

10671081
if (pgt) {
1068-
kvm_pgtable_stage2_destroy(pgt);
1082+
KVM_PGT_FN(kvm_pgtable_stage2_destroy)(pgt);
10691083
kfree(pgt);
10701084
}
10711085
}
@@ -1082,16 +1096,24 @@ static void *hyp_mc_alloc_fn(void *unused)
10821096

10831097
void free_hyp_memcache(struct kvm_hyp_memcache *mc)
10841098
{
1085-
if (is_protected_kvm_enabled())
1086-
__free_hyp_memcache(mc, hyp_mc_free_fn,
1087-
kvm_host_va, NULL);
1099+
if (!is_protected_kvm_enabled())
1100+
return;
1101+
1102+
kfree(mc->mapping);
1103+
__free_hyp_memcache(mc, hyp_mc_free_fn, kvm_host_va, NULL);
10881104
}
10891105

10901106
int topup_hyp_memcache(struct kvm_hyp_memcache *mc, unsigned long min_pages)
10911107
{
10921108
if (!is_protected_kvm_enabled())
10931109
return 0;
10941110

1111+
if (!mc->mapping) {
1112+
mc->mapping = kzalloc(sizeof(struct pkvm_mapping), GFP_KERNEL_ACCOUNT);
1113+
if (!mc->mapping)
1114+
return -ENOMEM;
1115+
}
1116+
10951117
return __topup_hyp_memcache(mc, min_pages, hyp_mc_alloc_fn,
10961118
kvm_host_pa, NULL);
10971119
}
@@ -1130,8 +1152,8 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
11301152
break;
11311153

11321154
write_lock(&kvm->mmu_lock);
1133-
ret = kvm_pgtable_stage2_map(pgt, addr, PAGE_SIZE, pa, prot,
1134-
&cache, 0);
1155+
ret = KVM_PGT_FN(kvm_pgtable_stage2_map)(pgt, addr, PAGE_SIZE,
1156+
pa, prot, &cache, 0);
11351157
write_unlock(&kvm->mmu_lock);
11361158
if (ret)
11371159
break;
@@ -1151,7 +1173,7 @@ int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa,
11511173
*/
11521174
void kvm_stage2_wp_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end)
11531175
{
1154-
stage2_apply_range_resched(mmu, addr, end, kvm_pgtable_stage2_wrprotect);
1176+
stage2_apply_range_resched(mmu, addr, end, KVM_PGT_FN(kvm_pgtable_stage2_wrprotect));
11551177
}
11561178

11571179
/**
@@ -1442,9 +1464,9 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
14421464
unsigned long mmu_seq;
14431465
phys_addr_t ipa = fault_ipa;
14441466
struct kvm *kvm = vcpu->kvm;
1445-
struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache;
14461467
struct vm_area_struct *vma;
14471468
short vma_shift;
1469+
void *memcache;
14481470
gfn_t gfn;
14491471
kvm_pfn_t pfn;
14501472
bool logging_active = memslot_is_logging(memslot);
@@ -1472,8 +1494,15 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
14721494
* and a write fault needs to collapse a block entry into a table.
14731495
*/
14741496
if (!fault_is_perm || (logging_active && write_fault)) {
1475-
ret = kvm_mmu_topup_memory_cache(memcache,
1476-
kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu));
1497+
int min_pages = kvm_mmu_cache_min_pages(vcpu->arch.hw_mmu);
1498+
1499+
if (!is_protected_kvm_enabled()) {
1500+
memcache = &vcpu->arch.mmu_page_cache;
1501+
ret = kvm_mmu_topup_memory_cache(memcache, min_pages);
1502+
} else {
1503+
memcache = &vcpu->arch.pkvm_memcache;
1504+
ret = topup_hyp_memcache(memcache, min_pages);
1505+
}
14771506
if (ret)
14781507
return ret;
14791508
}
@@ -1494,7 +1523,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
14941523
* logging_active is guaranteed to never be true for VM_PFNMAP
14951524
* memslots.
14961525
*/
1497-
if (logging_active) {
1526+
if (logging_active || is_protected_kvm_enabled()) {
14981527
force_pte = true;
14991528
vma_shift = PAGE_SHIFT;
15001529
} else {
@@ -1634,7 +1663,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
16341663
prot |= kvm_encode_nested_level(nested);
16351664
}
16361665

1637-
read_lock(&kvm->mmu_lock);
1666+
kvm_fault_lock(kvm);
16381667
pgt = vcpu->arch.hw_mmu->pgt;
16391668
if (mmu_invalidate_retry(kvm, mmu_seq)) {
16401669
ret = -EAGAIN;
@@ -1696,16 +1725,16 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
16961725
* PTE, which will be preserved.
16971726
*/
16981727
prot &= ~KVM_NV_GUEST_MAP_SZ;
1699-
ret = kvm_pgtable_stage2_relax_perms(pgt, fault_ipa, prot, flags);
1728+
ret = KVM_PGT_FN(kvm_pgtable_stage2_relax_perms)(pgt, fault_ipa, prot, flags);
17001729
} else {
1701-
ret = kvm_pgtable_stage2_map(pgt, fault_ipa, vma_pagesize,
1730+
ret = KVM_PGT_FN(kvm_pgtable_stage2_map)(pgt, fault_ipa, vma_pagesize,
17021731
__pfn_to_phys(pfn), prot,
17031732
memcache, flags);
17041733
}
17051734

17061735
out_unlock:
17071736
kvm_release_faultin_page(kvm, page, !!ret, writable);
1708-
read_unlock(&kvm->mmu_lock);
1737+
kvm_fault_unlock(kvm);
17091738

17101739
/* Mark the page dirty only if the fault is handled successfully */
17111740
if (writable && !ret)
@@ -1724,7 +1753,7 @@ static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa)
17241753

17251754
read_lock(&vcpu->kvm->mmu_lock);
17261755
mmu = vcpu->arch.hw_mmu;
1727-
kvm_pgtable_stage2_mkyoung(mmu->pgt, fault_ipa, flags);
1756+
KVM_PGT_FN(kvm_pgtable_stage2_mkyoung)(mmu->pgt, fault_ipa, flags);
17281757
read_unlock(&vcpu->kvm->mmu_lock);
17291758
}
17301759

@@ -1764,7 +1793,7 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu)
17641793
}
17651794

17661795
/* Falls between the IPA range and the PARange? */
1767-
if (fault_ipa >= BIT_ULL(vcpu->arch.hw_mmu->pgt->ia_bits)) {
1796+
if (fault_ipa >= BIT_ULL(VTCR_EL2_IPA(vcpu->arch.hw_mmu->vtcr))) {
17681797
fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0);
17691798

17701799
if (is_iabt)
@@ -1930,7 +1959,7 @@ bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
19301959
if (!kvm->arch.mmu.pgt)
19311960
return false;
19321961

1933-
return kvm_pgtable_stage2_test_clear_young(kvm->arch.mmu.pgt,
1962+
return KVM_PGT_FN(kvm_pgtable_stage2_test_clear_young)(kvm->arch.mmu.pgt,
19341963
range->start << PAGE_SHIFT,
19351964
size, true);
19361965
/*
@@ -1946,7 +1975,7 @@ bool kvm_test_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range)
19461975
if (!kvm->arch.mmu.pgt)
19471976
return false;
19481977

1949-
return kvm_pgtable_stage2_test_clear_young(kvm->arch.mmu.pgt,
1978+
return KVM_PGT_FN(kvm_pgtable_stage2_test_clear_young)(kvm->arch.mmu.pgt,
19501979
range->start << PAGE_SHIFT,
19511980
size, false);
19521981
}

0 commit comments

Comments
 (0)