Skip to content

Commit d067012

Browse files
author
Marc Zyngier
committed
Merge branch kvm-arm64/pkvm-np-guest into kvmarm-master/next
* kvm-arm64/pkvm-np-guest: : . : pKVM support for non-protected guests using the standard MM : infrastructure, courtesy of Quentin Perret. From the cover letter: : : "This series moves the stage-2 page-table management of non-protected : guests to EL2 when pKVM is enabled. This is only intended as an : incremental step towards a 'feature-complete' pKVM, there is however a : lot more that needs to come on top. : : With that series applied, pKVM provides near-parity with standard KVM : from a functional perspective all while Linux no longer touches the : stage-2 page-tables itself at EL1. The majority of mm-related KVM : features work out of the box, including MMU notifiers, dirty logging, : RO memslots and things of that nature. There are however two gotchas: : : - We don't support mapping devices into guests: this requires : additional hypervisor support for tracking the 'state' of devices, : which will come in a later series. No device assignment until then. : : - Stage-2 mappings are forced to page-granularity even when backed by a : huge page for the sake of simplicity of this series. I'm only aiming : at functional parity-ish (from userspace's PoV) for now, support for : HP can be added on top later as a perf improvement." : . KVM: arm64: Plumb the pKVM MMU in KVM KVM: arm64: Introduce the EL1 pKVM MMU KVM: arm64: Introduce __pkvm_tlb_flush_vmid() KVM: arm64: Introduce __pkvm_host_mkyoung_guest() KVM: arm64: Introduce __pkvm_host_test_clear_young_guest() KVM: arm64: Introduce __pkvm_host_wrprotect_guest() KVM: arm64: Introduce __pkvm_host_relax_guest_perms() KVM: arm64: Introduce __pkvm_host_unshare_guest() KVM: arm64: Introduce __pkvm_host_share_guest() KVM: arm64: Introduce __pkvm_vcpu_{load,put}() KVM: arm64: Add {get,put}_pkvm_hyp_vm() helpers KVM: arm64: Make kvm_pgtable_stage2_init() a static inline function KVM: arm64: Pass walk flags to kvm_pgtable_stage2_relax_perms KVM: arm64: Pass walk flags to kvm_pgtable_stage2_mkyoung KVM: arm64: Move host page ownership tracking to the hyp vmemmap KVM: arm64: Make hyp_page::order a u8 KVM: arm64: Move enum pkvm_page_state to memory.h KVM: arm64: Change the layout of enum pkvm_page_state Signed-off-by: Marc Zyngier <[email protected]> # Conflicts: # arch/arm64/kvm/arm.c
2 parents b7bf7c8 + fce886a commit d067012

File tree

19 files changed

+1006
-145
lines changed

19 files changed

+1006
-145
lines changed

arch/arm64/include/asm/kvm_asm.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,12 @@ enum __kvm_host_smccc_func {
6464
/* Hypercalls available after pKVM finalisation */
6565
__KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp,
6666
__KVM_HOST_SMCCC_FUNC___pkvm_host_unshare_hyp,
67+
__KVM_HOST_SMCCC_FUNC___pkvm_host_share_guest,
68+
__KVM_HOST_SMCCC_FUNC___pkvm_host_unshare_guest,
69+
__KVM_HOST_SMCCC_FUNC___pkvm_host_relax_perms_guest,
70+
__KVM_HOST_SMCCC_FUNC___pkvm_host_wrprotect_guest,
71+
__KVM_HOST_SMCCC_FUNC___pkvm_host_test_clear_young_guest,
72+
__KVM_HOST_SMCCC_FUNC___pkvm_host_mkyoung_guest,
6773
__KVM_HOST_SMCCC_FUNC___kvm_adjust_pc,
6874
__KVM_HOST_SMCCC_FUNC___kvm_vcpu_run,
6975
__KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context,
@@ -78,6 +84,9 @@ enum __kvm_host_smccc_func {
7884
__KVM_HOST_SMCCC_FUNC___pkvm_init_vm,
7985
__KVM_HOST_SMCCC_FUNC___pkvm_init_vcpu,
8086
__KVM_HOST_SMCCC_FUNC___pkvm_teardown_vm,
87+
__KVM_HOST_SMCCC_FUNC___pkvm_vcpu_load,
88+
__KVM_HOST_SMCCC_FUNC___pkvm_vcpu_put,
89+
__KVM_HOST_SMCCC_FUNC___pkvm_tlb_flush_vmid,
8190
};
8291

8392
#define DECLARE_KVM_VHE_SYM(sym) extern char sym[]

arch/arm64/include/asm/kvm_host.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu);
8585
struct kvm_hyp_memcache {
8686
phys_addr_t head;
8787
unsigned long nr_pages;
88+
struct pkvm_mapping *mapping; /* only used from EL1 */
8889
};
8990

9091
static inline void push_hyp_memcache(struct kvm_hyp_memcache *mc,
@@ -775,6 +776,9 @@ struct kvm_vcpu_arch {
775776
/* Cache some mmu pages needed inside spinlock regions */
776777
struct kvm_mmu_memory_cache mmu_page_cache;
777778

779+
/* Pages to top-up the pKVM/EL2 guest pool */
780+
struct kvm_hyp_memcache pkvm_memcache;
781+
778782
/* Virtual SError ESR to restore when HCR_EL2.VSE is set */
779783
u64 vsesr_el2;
780784

arch/arm64/include/asm/kvm_mmu.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,22 @@ static inline bool kvm_is_nested_s2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
353353
return &kvm->arch.mmu != mmu;
354354
}
355355

356+
static inline void kvm_fault_lock(struct kvm *kvm)
357+
{
358+
if (is_protected_kvm_enabled())
359+
write_lock(&kvm->mmu_lock);
360+
else
361+
read_lock(&kvm->mmu_lock);
362+
}
363+
364+
static inline void kvm_fault_unlock(struct kvm *kvm)
365+
{
366+
if (is_protected_kvm_enabled())
367+
write_unlock(&kvm->mmu_lock);
368+
else
369+
read_unlock(&kvm->mmu_lock);
370+
}
371+
356372
#ifdef CONFIG_PTDUMP_STAGE2_DEBUGFS
357373
void kvm_s2_ptdump_create_debugfs(struct kvm *kvm);
358374
#else

arch/arm64/include/asm/kvm_pgtable.h

Lines changed: 25 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -412,15 +412,20 @@ static inline bool kvm_pgtable_walk_lock_held(void)
412412
* be used instead of block mappings.
413413
*/
414414
struct kvm_pgtable {
415-
u32 ia_bits;
416-
s8 start_level;
417-
kvm_pteref_t pgd;
418-
struct kvm_pgtable_mm_ops *mm_ops;
419-
420-
/* Stage-2 only */
421-
struct kvm_s2_mmu *mmu;
422-
enum kvm_pgtable_stage2_flags flags;
423-
kvm_pgtable_force_pte_cb_t force_pte_cb;
415+
union {
416+
struct rb_root pkvm_mappings;
417+
struct {
418+
u32 ia_bits;
419+
s8 start_level;
420+
kvm_pteref_t pgd;
421+
struct kvm_pgtable_mm_ops *mm_ops;
422+
423+
/* Stage-2 only */
424+
enum kvm_pgtable_stage2_flags flags;
425+
kvm_pgtable_force_pte_cb_t force_pte_cb;
426+
};
427+
};
428+
struct kvm_s2_mmu *mmu;
424429
};
425430

426431
/**
@@ -526,8 +531,11 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
526531
enum kvm_pgtable_stage2_flags flags,
527532
kvm_pgtable_force_pte_cb_t force_pte_cb);
528533

529-
#define kvm_pgtable_stage2_init(pgt, mmu, mm_ops) \
530-
__kvm_pgtable_stage2_init(pgt, mmu, mm_ops, 0, NULL)
534+
static inline int kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
535+
struct kvm_pgtable_mm_ops *mm_ops)
536+
{
537+
return __kvm_pgtable_stage2_init(pgt, mmu, mm_ops, 0, NULL);
538+
}
531539

532540
/**
533541
* kvm_pgtable_stage2_destroy() - Destroy an unused guest stage-2 page-table.
@@ -669,13 +677,15 @@ int kvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size);
669677
* kvm_pgtable_stage2_mkyoung() - Set the access flag in a page-table entry.
670678
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
671679
* @addr: Intermediate physical address to identify the page-table entry.
680+
* @flags: Flags to control the page-table walk (ex. a shared walk)
672681
*
673682
* The offset of @addr within a page is ignored.
674683
*
675684
* If there is a valid, leaf page-table entry used to translate @addr, then
676685
* set the access flag in that entry.
677686
*/
678-
void kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr);
687+
void kvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr,
688+
enum kvm_pgtable_walk_flags flags);
679689

680690
/**
681691
* kvm_pgtable_stage2_test_clear_young() - Test and optionally clear the access
@@ -705,6 +715,7 @@ bool kvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr,
705715
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
706716
* @addr: Intermediate physical address to identify the page-table entry.
707717
* @prot: Additional permissions to grant for the mapping.
718+
* @flags: Flags to control the page-table walk (ex. a shared walk)
708719
*
709720
* The offset of @addr within a page is ignored.
710721
*
@@ -717,7 +728,8 @@ bool kvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr,
717728
* Return: 0 on success, negative error code on failure.
718729
*/
719730
int kvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr,
720-
enum kvm_pgtable_prot prot);
731+
enum kvm_pgtable_prot prot,
732+
enum kvm_pgtable_walk_flags flags);
721733

722734
/**
723735
* kvm_pgtable_stage2_flush_range() - Clean and invalidate data cache to Point

arch/arm64/include/asm/kvm_pkvm.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,4 +137,30 @@ static inline size_t pkvm_host_sve_state_size(void)
137137
SVE_SIG_REGS_SIZE(sve_vq_from_vl(kvm_host_sve_max_vl)));
138138
}
139139

140+
struct pkvm_mapping {
141+
struct rb_node node;
142+
u64 gfn;
143+
u64 pfn;
144+
};
145+
146+
int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
147+
struct kvm_pgtable_mm_ops *mm_ops);
148+
void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
149+
int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
150+
enum kvm_pgtable_prot prot, void *mc,
151+
enum kvm_pgtable_walk_flags flags);
152+
int pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size);
153+
int pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size);
154+
int pkvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size);
155+
bool pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, u64 size, bool mkold);
156+
int pkvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, enum kvm_pgtable_prot prot,
157+
enum kvm_pgtable_walk_flags flags);
158+
void pkvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr,
159+
enum kvm_pgtable_walk_flags flags);
160+
int pkvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
161+
struct kvm_mmu_memory_cache *mc);
162+
void pkvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level);
163+
kvm_pte_t *pkvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt, u64 phys, s8 level,
164+
enum kvm_pgtable_prot prot, void *mc,
165+
bool force_pte);
140166
#endif /* __ARM64_KVM_PKVM_H__ */

arch/arm64/kvm/arm.c

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -500,7 +500,10 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
500500

501501
void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu)
502502
{
503-
kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
503+
if (!is_protected_kvm_enabled())
504+
kvm_mmu_free_memory_cache(&vcpu->arch.mmu_page_cache);
505+
else
506+
free_hyp_memcache(&vcpu->arch.pkvm_memcache);
504507
kvm_timer_vcpu_terminate(vcpu);
505508
kvm_pmu_vcpu_destroy(vcpu);
506509
kvm_vgic_vcpu_destroy(vcpu);
@@ -572,6 +575,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
572575
struct kvm_s2_mmu *mmu;
573576
int *last_ran;
574577

578+
if (is_protected_kvm_enabled())
579+
goto nommu;
580+
575581
if (vcpu_has_nv(vcpu))
576582
kvm_vcpu_load_hw_mmu(vcpu);
577583

@@ -592,6 +598,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
592598
*last_ran = vcpu->vcpu_idx;
593599
}
594600

601+
nommu:
595602
vcpu->cpu = cpu;
596603

597604
kvm_vgic_load(vcpu);
@@ -616,12 +623,26 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
616623

617624
vcpu_set_pauth_traps(vcpu);
618625

626+
if (is_protected_kvm_enabled()) {
627+
kvm_call_hyp_nvhe(__pkvm_vcpu_load,
628+
vcpu->kvm->arch.pkvm.handle,
629+
vcpu->vcpu_idx, vcpu->arch.hcr_el2);
630+
kvm_call_hyp(__vgic_v3_restore_vmcr_aprs,
631+
&vcpu->arch.vgic_cpu.vgic_v3);
632+
}
633+
619634
if (!cpumask_test_cpu(cpu, vcpu->kvm->arch.supported_cpus))
620635
vcpu_set_on_unsupported_cpu(vcpu);
621636
}
622637

623638
void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
624639
{
640+
if (is_protected_kvm_enabled()) {
641+
kvm_call_hyp(__vgic_v3_save_vmcr_aprs,
642+
&vcpu->arch.vgic_cpu.vgic_v3);
643+
kvm_call_hyp_nvhe(__pkvm_vcpu_put);
644+
}
645+
625646
kvm_vcpu_put_debug(vcpu);
626647
kvm_arch_vcpu_put_fp(vcpu);
627648
if (has_vhe())

arch/arm64/kvm/hyp/include/nvhe/gfp.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
#include <nvhe/memory.h>
88
#include <nvhe/spinlock.h>
99

10-
#define HYP_NO_ORDER USHRT_MAX
10+
#define HYP_NO_ORDER ((u8)(~0))
1111

1212
struct hyp_pool {
1313
/*
@@ -19,11 +19,11 @@ struct hyp_pool {
1919
struct list_head free_area[NR_PAGE_ORDERS];
2020
phys_addr_t range_start;
2121
phys_addr_t range_end;
22-
unsigned short max_order;
22+
u8 max_order;
2323
};
2424

2525
/* Allocation */
26-
void *hyp_alloc_pages(struct hyp_pool *pool, unsigned short order);
26+
void *hyp_alloc_pages(struct hyp_pool *pool, u8 order);
2727
void hyp_split_page(struct hyp_page *page);
2828
void hyp_get_page(struct hyp_pool *pool, void *addr);
2929
void hyp_put_page(struct hyp_pool *pool, void *addr);

arch/arm64/kvm/hyp/include/nvhe/mem_protect.h

Lines changed: 8 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -11,40 +11,10 @@
1111
#include <asm/kvm_mmu.h>
1212
#include <asm/kvm_pgtable.h>
1313
#include <asm/virt.h>
14+
#include <nvhe/memory.h>
1415
#include <nvhe/pkvm.h>
1516
#include <nvhe/spinlock.h>
1617

17-
/*
18-
* SW bits 0-1 are reserved to track the memory ownership state of each page:
19-
* 00: The page is owned exclusively by the page-table owner.
20-
* 01: The page is owned by the page-table owner, but is shared
21-
* with another entity.
22-
* 10: The page is shared with, but not owned by the page-table owner.
23-
* 11: Reserved for future use (lending).
24-
*/
25-
enum pkvm_page_state {
26-
PKVM_PAGE_OWNED = 0ULL,
27-
PKVM_PAGE_SHARED_OWNED = KVM_PGTABLE_PROT_SW0,
28-
PKVM_PAGE_SHARED_BORROWED = KVM_PGTABLE_PROT_SW1,
29-
__PKVM_PAGE_RESERVED = KVM_PGTABLE_PROT_SW0 |
30-
KVM_PGTABLE_PROT_SW1,
31-
32-
/* Meta-states which aren't encoded directly in the PTE's SW bits */
33-
PKVM_NOPAGE,
34-
};
35-
36-
#define PKVM_PAGE_STATE_PROT_MASK (KVM_PGTABLE_PROT_SW0 | KVM_PGTABLE_PROT_SW1)
37-
static inline enum kvm_pgtable_prot pkvm_mkstate(enum kvm_pgtable_prot prot,
38-
enum pkvm_page_state state)
39-
{
40-
return (prot & ~PKVM_PAGE_STATE_PROT_MASK) | state;
41-
}
42-
43-
static inline enum pkvm_page_state pkvm_getstate(enum kvm_pgtable_prot prot)
44-
{
45-
return prot & PKVM_PAGE_STATE_PROT_MASK;
46-
}
47-
4818
struct host_mmu {
4919
struct kvm_arch arch;
5020
struct kvm_pgtable pgt;
@@ -69,6 +39,13 @@ int __pkvm_host_donate_hyp(u64 pfn, u64 nr_pages);
6939
int __pkvm_hyp_donate_host(u64 pfn, u64 nr_pages);
7040
int __pkvm_host_share_ffa(u64 pfn, u64 nr_pages);
7141
int __pkvm_host_unshare_ffa(u64 pfn, u64 nr_pages);
42+
int __pkvm_host_share_guest(u64 pfn, u64 gfn, struct pkvm_hyp_vcpu *vcpu,
43+
enum kvm_pgtable_prot prot);
44+
int __pkvm_host_unshare_guest(u64 gfn, struct pkvm_hyp_vm *hyp_vm);
45+
int __pkvm_host_relax_perms_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu, enum kvm_pgtable_prot prot);
46+
int __pkvm_host_wrprotect_guest(u64 gfn, struct pkvm_hyp_vm *hyp_vm);
47+
int __pkvm_host_test_clear_young_guest(u64 gfn, bool mkold, struct pkvm_hyp_vm *vm);
48+
int __pkvm_host_mkyoung_guest(u64 gfn, struct pkvm_hyp_vcpu *vcpu);
7249

7350
bool addr_is_memory(phys_addr_t phys);
7451
int host_stage2_idmap_locked(phys_addr_t addr, u64 size, enum kvm_pgtable_prot prot);

arch/arm64/kvm/hyp/include/nvhe/memory.h

Lines changed: 47 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,47 @@
77

88
#include <linux/types.h>
99

10+
/*
11+
* Bits 0-1 are reserved to track the memory ownership state of each page:
12+
* 00: The page is owned exclusively by the page-table owner.
13+
* 01: The page is owned by the page-table owner, but is shared
14+
* with another entity.
15+
* 10: The page is shared with, but not owned by the page-table owner.
16+
* 11: Reserved for future use (lending).
17+
*/
18+
enum pkvm_page_state {
19+
PKVM_PAGE_OWNED = 0ULL,
20+
PKVM_PAGE_SHARED_OWNED = BIT(0),
21+
PKVM_PAGE_SHARED_BORROWED = BIT(1),
22+
__PKVM_PAGE_RESERVED = BIT(0) | BIT(1),
23+
24+
/* Meta-states which aren't encoded directly in the PTE's SW bits */
25+
PKVM_NOPAGE = BIT(2),
26+
};
27+
#define PKVM_PAGE_META_STATES_MASK (~__PKVM_PAGE_RESERVED)
28+
29+
#define PKVM_PAGE_STATE_PROT_MASK (KVM_PGTABLE_PROT_SW0 | KVM_PGTABLE_PROT_SW1)
30+
static inline enum kvm_pgtable_prot pkvm_mkstate(enum kvm_pgtable_prot prot,
31+
enum pkvm_page_state state)
32+
{
33+
prot &= ~PKVM_PAGE_STATE_PROT_MASK;
34+
prot |= FIELD_PREP(PKVM_PAGE_STATE_PROT_MASK, state);
35+
return prot;
36+
}
37+
38+
static inline enum pkvm_page_state pkvm_getstate(enum kvm_pgtable_prot prot)
39+
{
40+
return FIELD_GET(PKVM_PAGE_STATE_PROT_MASK, prot);
41+
}
42+
1043
struct hyp_page {
11-
unsigned short refcount;
12-
unsigned short order;
44+
u16 refcount;
45+
u8 order;
46+
47+
/* Host (non-meta) state. Guarded by the host stage-2 lock. */
48+
enum pkvm_page_state host_state : 8;
49+
50+
u32 host_share_guest_count;
1351
};
1452

1553
extern u64 __hyp_vmemmap;
@@ -29,7 +67,13 @@ static inline phys_addr_t hyp_virt_to_phys(void *addr)
2967

3068
#define hyp_phys_to_pfn(phys) ((phys) >> PAGE_SHIFT)
3169
#define hyp_pfn_to_phys(pfn) ((phys_addr_t)((pfn) << PAGE_SHIFT))
32-
#define hyp_phys_to_page(phys) (&hyp_vmemmap[hyp_phys_to_pfn(phys)])
70+
71+
static inline struct hyp_page *hyp_phys_to_page(phys_addr_t phys)
72+
{
73+
BUILD_BUG_ON(sizeof(struct hyp_page) != sizeof(u64));
74+
return &hyp_vmemmap[hyp_phys_to_pfn(phys)];
75+
}
76+
3377
#define hyp_virt_to_page(virt) hyp_phys_to_page(__hyp_pa(virt))
3478
#define hyp_virt_to_pfn(virt) hyp_phys_to_pfn(__hyp_pa(virt))
3579

0 commit comments

Comments
 (0)