Skip to content

Commit e912efe

Browse files
Quentin PerretMarc Zyngier
authored andcommitted
KVM: arm64: Introduce the EL1 pKVM MMU
Introduce a set of helper functions allowing to manipulate the pKVM guest stage-2 page-tables from EL1 using pKVM's HVC interface. Each helper has an exact one-to-one correspondance with the traditional kvm_pgtable_stage2_*() functions from pgtable.c, with a strictly matching prototype. This will ease plumbing later on in mmu.c. These callbacks track the gfn->pfn mappings in a simple rb_tree indexed by IPA in lieu of a page-table. This rb-tree is kept in sync with pKVM's state and is protected by the mmu_lock like a traditional stage-2 page-table. Signed-off-by: Quentin Perret <[email protected]> Tested-by: Fuad Tabba <[email protected]> Reviewed-by: Fuad Tabba <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Marc Zyngier <[email protected]>
1 parent 0adce4d commit e912efe

File tree

4 files changed

+242
-9
lines changed

4 files changed

+242
-9
lines changed

arch/arm64/include/asm/kvm_host.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ void kvm_arm_vcpu_destroy(struct kvm_vcpu *vcpu);
8585
struct kvm_hyp_memcache {
8686
phys_addr_t head;
8787
unsigned long nr_pages;
88+
struct pkvm_mapping *mapping; /* only used from EL1 */
8889
};
8990

9091
static inline void push_hyp_memcache(struct kvm_hyp_memcache *mc,

arch/arm64/include/asm/kvm_pgtable.h

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -412,15 +412,20 @@ static inline bool kvm_pgtable_walk_lock_held(void)
412412
* be used instead of block mappings.
413413
*/
414414
struct kvm_pgtable {
415-
u32 ia_bits;
416-
s8 start_level;
417-
kvm_pteref_t pgd;
418-
struct kvm_pgtable_mm_ops *mm_ops;
419-
420-
/* Stage-2 only */
421-
struct kvm_s2_mmu *mmu;
422-
enum kvm_pgtable_stage2_flags flags;
423-
kvm_pgtable_force_pte_cb_t force_pte_cb;
415+
union {
416+
struct rb_root pkvm_mappings;
417+
struct {
418+
u32 ia_bits;
419+
s8 start_level;
420+
kvm_pteref_t pgd;
421+
struct kvm_pgtable_mm_ops *mm_ops;
422+
423+
/* Stage-2 only */
424+
enum kvm_pgtable_stage2_flags flags;
425+
kvm_pgtable_force_pte_cb_t force_pte_cb;
426+
};
427+
};
428+
struct kvm_s2_mmu *mmu;
424429
};
425430

426431
/**

arch/arm64/include/asm/kvm_pkvm.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,4 +137,30 @@ static inline size_t pkvm_host_sve_state_size(void)
137137
SVE_SIG_REGS_SIZE(sve_vq_from_vl(kvm_host_sve_max_vl)));
138138
}
139139

140+
struct pkvm_mapping {
141+
struct rb_node node;
142+
u64 gfn;
143+
u64 pfn;
144+
};
145+
146+
int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
147+
struct kvm_pgtable_mm_ops *mm_ops);
148+
void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
149+
int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
150+
enum kvm_pgtable_prot prot, void *mc,
151+
enum kvm_pgtable_walk_flags flags);
152+
int pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size);
153+
int pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size);
154+
int pkvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size);
155+
bool pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, u64 size, bool mkold);
156+
int pkvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, enum kvm_pgtable_prot prot,
157+
enum kvm_pgtable_walk_flags flags);
158+
void pkvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr,
159+
enum kvm_pgtable_walk_flags flags);
160+
int pkvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
161+
struct kvm_mmu_memory_cache *mc);
162+
void pkvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level);
163+
kvm_pte_t *pkvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt, u64 phys, s8 level,
164+
enum kvm_pgtable_prot prot, void *mc,
165+
bool force_pte);
140166
#endif /* __ARM64_KVM_PKVM_H__ */

arch/arm64/kvm/pkvm.c

Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
#include <linux/init.h>
88
#include <linux/kmemleak.h>
99
#include <linux/kvm_host.h>
10+
#include <asm/kvm_mmu.h>
1011
#include <linux/memblock.h>
1112
#include <linux/mutex.h>
1213
#include <linux/sort.h>
@@ -268,3 +269,203 @@ static int __init finalize_pkvm(void)
268269
return ret;
269270
}
270271
device_initcall_sync(finalize_pkvm);
272+
273+
static int cmp_mappings(struct rb_node *node, const struct rb_node *parent)
274+
{
275+
struct pkvm_mapping *a = rb_entry(node, struct pkvm_mapping, node);
276+
struct pkvm_mapping *b = rb_entry(parent, struct pkvm_mapping, node);
277+
278+
if (a->gfn < b->gfn)
279+
return -1;
280+
if (a->gfn > b->gfn)
281+
return 1;
282+
return 0;
283+
}
284+
285+
static struct rb_node *find_first_mapping_node(struct rb_root *root, u64 gfn)
286+
{
287+
struct rb_node *node = root->rb_node, *prev = NULL;
288+
struct pkvm_mapping *mapping;
289+
290+
while (node) {
291+
mapping = rb_entry(node, struct pkvm_mapping, node);
292+
if (mapping->gfn == gfn)
293+
return node;
294+
prev = node;
295+
node = (gfn < mapping->gfn) ? node->rb_left : node->rb_right;
296+
}
297+
298+
return prev;
299+
}
300+
301+
/*
302+
* __tmp is updated to rb_next(__tmp) *before* entering the body of the loop to allow freeing
303+
* of __map inline.
304+
*/
305+
#define for_each_mapping_in_range_safe(__pgt, __start, __end, __map) \
306+
for (struct rb_node *__tmp = find_first_mapping_node(&(__pgt)->pkvm_mappings, \
307+
((__start) >> PAGE_SHIFT)); \
308+
__tmp && ({ \
309+
__map = rb_entry(__tmp, struct pkvm_mapping, node); \
310+
__tmp = rb_next(__tmp); \
311+
true; \
312+
}); \
313+
) \
314+
if (__map->gfn < ((__start) >> PAGE_SHIFT)) \
315+
continue; \
316+
else if (__map->gfn >= ((__end) >> PAGE_SHIFT)) \
317+
break; \
318+
else
319+
320+
int pkvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
321+
struct kvm_pgtable_mm_ops *mm_ops)
322+
{
323+
pgt->pkvm_mappings = RB_ROOT;
324+
pgt->mmu = mmu;
325+
326+
return 0;
327+
}
328+
329+
void pkvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
330+
{
331+
struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
332+
pkvm_handle_t handle = kvm->arch.pkvm.handle;
333+
struct pkvm_mapping *mapping;
334+
struct rb_node *node;
335+
336+
if (!handle)
337+
return;
338+
339+
node = rb_first(&pgt->pkvm_mappings);
340+
while (node) {
341+
mapping = rb_entry(node, struct pkvm_mapping, node);
342+
kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn);
343+
node = rb_next(node);
344+
rb_erase(&mapping->node, &pgt->pkvm_mappings);
345+
kfree(mapping);
346+
}
347+
}
348+
349+
int pkvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
350+
u64 phys, enum kvm_pgtable_prot prot,
351+
void *mc, enum kvm_pgtable_walk_flags flags)
352+
{
353+
struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
354+
struct pkvm_mapping *mapping = NULL;
355+
struct kvm_hyp_memcache *cache = mc;
356+
u64 gfn = addr >> PAGE_SHIFT;
357+
u64 pfn = phys >> PAGE_SHIFT;
358+
int ret;
359+
360+
if (size != PAGE_SIZE)
361+
return -EINVAL;
362+
363+
lockdep_assert_held_write(&kvm->mmu_lock);
364+
ret = kvm_call_hyp_nvhe(__pkvm_host_share_guest, pfn, gfn, prot);
365+
if (ret) {
366+
/* Is the gfn already mapped due to a racing vCPU? */
367+
if (ret == -EPERM)
368+
return -EAGAIN;
369+
}
370+
371+
swap(mapping, cache->mapping);
372+
mapping->gfn = gfn;
373+
mapping->pfn = pfn;
374+
WARN_ON(rb_find_add(&mapping->node, &pgt->pkvm_mappings, cmp_mappings));
375+
376+
return ret;
377+
}
378+
379+
int pkvm_pgtable_stage2_unmap(struct kvm_pgtable *pgt, u64 addr, u64 size)
380+
{
381+
struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
382+
pkvm_handle_t handle = kvm->arch.pkvm.handle;
383+
struct pkvm_mapping *mapping;
384+
int ret = 0;
385+
386+
lockdep_assert_held_write(&kvm->mmu_lock);
387+
for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) {
388+
ret = kvm_call_hyp_nvhe(__pkvm_host_unshare_guest, handle, mapping->gfn);
389+
if (WARN_ON(ret))
390+
break;
391+
rb_erase(&mapping->node, &pgt->pkvm_mappings);
392+
kfree(mapping);
393+
}
394+
395+
return ret;
396+
}
397+
398+
int pkvm_pgtable_stage2_wrprotect(struct kvm_pgtable *pgt, u64 addr, u64 size)
399+
{
400+
struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
401+
pkvm_handle_t handle = kvm->arch.pkvm.handle;
402+
struct pkvm_mapping *mapping;
403+
int ret = 0;
404+
405+
lockdep_assert_held(&kvm->mmu_lock);
406+
for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping) {
407+
ret = kvm_call_hyp_nvhe(__pkvm_host_wrprotect_guest, handle, mapping->gfn);
408+
if (WARN_ON(ret))
409+
break;
410+
}
411+
412+
return ret;
413+
}
414+
415+
int pkvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size)
416+
{
417+
struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
418+
struct pkvm_mapping *mapping;
419+
420+
lockdep_assert_held(&kvm->mmu_lock);
421+
for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
422+
__clean_dcache_guest_page(pfn_to_kaddr(mapping->pfn), PAGE_SIZE);
423+
424+
return 0;
425+
}
426+
427+
bool pkvm_pgtable_stage2_test_clear_young(struct kvm_pgtable *pgt, u64 addr, u64 size, bool mkold)
428+
{
429+
struct kvm *kvm = kvm_s2_mmu_to_kvm(pgt->mmu);
430+
pkvm_handle_t handle = kvm->arch.pkvm.handle;
431+
struct pkvm_mapping *mapping;
432+
bool young = false;
433+
434+
lockdep_assert_held(&kvm->mmu_lock);
435+
for_each_mapping_in_range_safe(pgt, addr, addr + size, mapping)
436+
young |= kvm_call_hyp_nvhe(__pkvm_host_test_clear_young_guest, handle, mapping->gfn,
437+
mkold);
438+
439+
return young;
440+
}
441+
442+
int pkvm_pgtable_stage2_relax_perms(struct kvm_pgtable *pgt, u64 addr, enum kvm_pgtable_prot prot,
443+
enum kvm_pgtable_walk_flags flags)
444+
{
445+
return kvm_call_hyp_nvhe(__pkvm_host_relax_perms_guest, addr >> PAGE_SHIFT, prot);
446+
}
447+
448+
void pkvm_pgtable_stage2_mkyoung(struct kvm_pgtable *pgt, u64 addr,
449+
enum kvm_pgtable_walk_flags flags)
450+
{
451+
WARN_ON(kvm_call_hyp_nvhe(__pkvm_host_mkyoung_guest, addr >> PAGE_SHIFT));
452+
}
453+
454+
void pkvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, s8 level)
455+
{
456+
WARN_ON_ONCE(1);
457+
}
458+
459+
kvm_pte_t *pkvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt, u64 phys, s8 level,
460+
enum kvm_pgtable_prot prot, void *mc, bool force_pte)
461+
{
462+
WARN_ON_ONCE(1);
463+
return NULL;
464+
}
465+
466+
int pkvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
467+
struct kvm_mmu_memory_cache *mc)
468+
{
469+
WARN_ON_ONCE(1);
470+
return -EINVAL;
471+
}

0 commit comments

Comments
 (0)