Skip to content

Commit e94a7de

Browse files
Quentin PerretMarc Zyngier
authored andcommitted
KVM: arm64: Move host page ownership tracking to the hyp vmemmap
We currently store part of the page-tracking state in PTE software bits for the host, guests and the hypervisor. This is sub-optimal when e.g. sharing pages as this forces to break block mappings purely to support this software tracking. This causes an unnecessarily fragmented stage-2 page-table for the host in particular when it shares pages with Secure, which can lead to measurable regressions. Moreover, having this state stored in the page-table forces us to do multiple costly walks on the page transition path, hence causing overhead. In order to work around these problems, move the host-side page-tracking logic from SW bits in its stage-2 PTEs to the hypervisor's vmemmap. Tested-by: Fuad Tabba <[email protected]> Reviewed-by: Fuad Tabba <[email protected]> Signed-off-by: Quentin Perret <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Marc Zyngier <[email protected]>
1 parent b35875d commit e94a7de

File tree

3 files changed

+84
-37
lines changed

3 files changed

+84
-37
lines changed

arch/arm64/kvm/hyp/include/nvhe/memory.h

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
#include <linux/types.h>
99

1010
/*
11-
* SW bits 0-1 are reserved to track the memory ownership state of each page:
11+
* Bits 0-1 are reserved to track the memory ownership state of each page:
1212
* 00: The page is owned exclusively by the page-table owner.
1313
* 01: The page is owned by the page-table owner, but is shared
1414
* with another entity.
@@ -43,7 +43,9 @@ static inline enum pkvm_page_state pkvm_getstate(enum kvm_pgtable_prot prot)
4343
struct hyp_page {
4444
u16 refcount;
4545
u8 order;
46-
u8 reserved;
46+
47+
/* Host (non-meta) state. Guarded by the host stage-2 lock. */
48+
enum pkvm_page_state host_state : 8;
4749
};
4850

4951
extern u64 __hyp_vmemmap;
@@ -63,7 +65,13 @@ static inline phys_addr_t hyp_virt_to_phys(void *addr)
6365

6466
#define hyp_phys_to_pfn(phys) ((phys) >> PAGE_SHIFT)
6567
#define hyp_pfn_to_phys(pfn) ((phys_addr_t)((pfn) << PAGE_SHIFT))
66-
#define hyp_phys_to_page(phys) (&hyp_vmemmap[hyp_phys_to_pfn(phys)])
68+
69+
static inline struct hyp_page *hyp_phys_to_page(phys_addr_t phys)
70+
{
71+
BUILD_BUG_ON(sizeof(struct hyp_page) != sizeof(u32));
72+
return &hyp_vmemmap[hyp_phys_to_pfn(phys)];
73+
}
74+
6775
#define hyp_virt_to_page(virt) hyp_phys_to_page(__hyp_pa(virt))
6876
#define hyp_virt_to_pfn(virt) hyp_phys_to_pfn(__hyp_pa(virt))
6977

arch/arm64/kvm/hyp/nvhe/mem_protect.c

Lines changed: 70 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -201,8 +201,8 @@ static void *guest_s2_zalloc_page(void *mc)
201201

202202
memset(addr, 0, PAGE_SIZE);
203203
p = hyp_virt_to_page(addr);
204-
memset(p, 0, sizeof(*p));
205204
p->refcount = 1;
205+
p->order = 0;
206206

207207
return addr;
208208
}
@@ -268,6 +268,7 @@ int kvm_guest_prepare_stage2(struct pkvm_hyp_vm *vm, void *pgd)
268268

269269
void reclaim_guest_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc)
270270
{
271+
struct hyp_page *page;
271272
void *addr;
272273

273274
/* Dump all pgtable pages in the hyp_pool */
@@ -279,7 +280,9 @@ void reclaim_guest_pages(struct pkvm_hyp_vm *vm, struct kvm_hyp_memcache *mc)
279280
/* Drain the hyp_pool into the memcache */
280281
addr = hyp_alloc_pages(&vm->pool, 0);
281282
while (addr) {
282-
memset(hyp_virt_to_page(addr), 0, sizeof(struct hyp_page));
283+
page = hyp_virt_to_page(addr);
284+
page->refcount = 0;
285+
page->order = 0;
283286
push_hyp_memcache(mc, addr, hyp_virt_to_phys);
284287
WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(addr), 1));
285288
addr = hyp_alloc_pages(&vm->pool, 0);
@@ -382,19 +385,28 @@ bool addr_is_memory(phys_addr_t phys)
382385
return !!find_mem_range(phys, &range);
383386
}
384387

385-
static bool addr_is_allowed_memory(phys_addr_t phys)
388+
static bool is_in_mem_range(u64 addr, struct kvm_mem_range *range)
389+
{
390+
return range->start <= addr && addr < range->end;
391+
}
392+
393+
static int check_range_allowed_memory(u64 start, u64 end)
386394
{
387395
struct memblock_region *reg;
388396
struct kvm_mem_range range;
389397

390-
reg = find_mem_range(phys, &range);
398+
/*
399+
* Callers can't check the state of a range that overlaps memory and
400+
* MMIO regions, so ensure [start, end[ is in the same kvm_mem_range.
401+
*/
402+
reg = find_mem_range(start, &range);
403+
if (!is_in_mem_range(end - 1, &range))
404+
return -EINVAL;
391405

392-
return reg && !(reg->flags & MEMBLOCK_NOMAP);
393-
}
406+
if (!reg || reg->flags & MEMBLOCK_NOMAP)
407+
return -EPERM;
394408

395-
static bool is_in_mem_range(u64 addr, struct kvm_mem_range *range)
396-
{
397-
return range->start <= addr && addr < range->end;
409+
return 0;
398410
}
399411

400412
static bool range_is_memory(u64 start, u64 end)
@@ -454,8 +466,10 @@ static int host_stage2_adjust_range(u64 addr, struct kvm_mem_range *range)
454466
if (kvm_pte_valid(pte))
455467
return -EAGAIN;
456468

457-
if (pte)
469+
if (pte) {
470+
WARN_ON(addr_is_memory(addr) && hyp_phys_to_page(addr)->host_state != PKVM_NOPAGE);
458471
return -EPERM;
472+
}
459473

460474
do {
461475
u64 granule = kvm_granule_size(level);
@@ -477,10 +491,33 @@ int host_stage2_idmap_locked(phys_addr_t addr, u64 size,
477491
return host_stage2_try(__host_stage2_idmap, addr, addr + size, prot);
478492
}
479493

494+
static void __host_update_page_state(phys_addr_t addr, u64 size, enum pkvm_page_state state)
495+
{
496+
phys_addr_t end = addr + size;
497+
498+
for (; addr < end; addr += PAGE_SIZE)
499+
hyp_phys_to_page(addr)->host_state = state;
500+
}
501+
480502
int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id)
481503
{
482-
return host_stage2_try(kvm_pgtable_stage2_set_owner, &host_mmu.pgt,
483-
addr, size, &host_s2_pool, owner_id);
504+
int ret;
505+
506+
if (!addr_is_memory(addr))
507+
return -EPERM;
508+
509+
ret = host_stage2_try(kvm_pgtable_stage2_set_owner, &host_mmu.pgt,
510+
addr, size, &host_s2_pool, owner_id);
511+
if (ret)
512+
return ret;
513+
514+
/* Don't forget to update the vmemmap tracking for the host */
515+
if (owner_id == PKVM_ID_HOST)
516+
__host_update_page_state(addr, size, PKVM_PAGE_OWNED);
517+
else
518+
__host_update_page_state(addr, size, PKVM_NOPAGE);
519+
520+
return 0;
484521
}
485522

486523
static bool host_stage2_force_pte_cb(u64 addr, u64 end, enum kvm_pgtable_prot prot)
@@ -604,35 +641,38 @@ static int check_page_state_range(struct kvm_pgtable *pgt, u64 addr, u64 size,
604641
return kvm_pgtable_walk(pgt, addr, size, &walker);
605642
}
606643

607-
static enum pkvm_page_state host_get_page_state(kvm_pte_t pte, u64 addr)
608-
{
609-
if (!addr_is_allowed_memory(addr))
610-
return PKVM_NOPAGE;
611-
612-
if (!kvm_pte_valid(pte) && pte)
613-
return PKVM_NOPAGE;
614-
615-
return pkvm_getstate(kvm_pgtable_stage2_pte_prot(pte));
616-
}
617-
618644
static int __host_check_page_state_range(u64 addr, u64 size,
619645
enum pkvm_page_state state)
620646
{
621-
struct check_walk_data d = {
622-
.desired = state,
623-
.get_page_state = host_get_page_state,
624-
};
647+
u64 end = addr + size;
648+
int ret;
649+
650+
ret = check_range_allowed_memory(addr, end);
651+
if (ret)
652+
return ret;
625653

626654
hyp_assert_lock_held(&host_mmu.lock);
627-
return check_page_state_range(&host_mmu.pgt, addr, size, &d);
655+
for (; addr < end; addr += PAGE_SIZE) {
656+
if (hyp_phys_to_page(addr)->host_state != state)
657+
return -EPERM;
658+
}
659+
660+
return 0;
628661
}
629662

630663
static int __host_set_page_state_range(u64 addr, u64 size,
631664
enum pkvm_page_state state)
632665
{
633-
enum kvm_pgtable_prot prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, state);
666+
if (hyp_phys_to_page(addr)->host_state == PKVM_NOPAGE) {
667+
int ret = host_stage2_idmap_locked(addr, size, PKVM_HOST_MEM_PROT);
634668

635-
return host_stage2_idmap_locked(addr, size, prot);
669+
if (ret)
670+
return ret;
671+
}
672+
673+
__host_update_page_state(addr, size, state);
674+
675+
return 0;
636676
}
637677

638678
static int host_request_owned_transition(u64 *completer_addr,

arch/arm64/kvm/hyp/nvhe/setup.c

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,6 @@ static void hpool_put_page(void *addr)
180180
static int fix_host_ownership_walker(const struct kvm_pgtable_visit_ctx *ctx,
181181
enum kvm_pgtable_walk_flags visit)
182182
{
183-
enum kvm_pgtable_prot prot;
184183
enum pkvm_page_state state;
185184
phys_addr_t phys;
186185

@@ -203,16 +202,16 @@ static int fix_host_ownership_walker(const struct kvm_pgtable_visit_ctx *ctx,
203202
case PKVM_PAGE_OWNED:
204203
return host_stage2_set_owner_locked(phys, PAGE_SIZE, PKVM_ID_HYP);
205204
case PKVM_PAGE_SHARED_OWNED:
206-
prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_BORROWED);
205+
hyp_phys_to_page(phys)->host_state = PKVM_PAGE_SHARED_BORROWED;
207206
break;
208207
case PKVM_PAGE_SHARED_BORROWED:
209-
prot = pkvm_mkstate(PKVM_HOST_MEM_PROT, PKVM_PAGE_SHARED_OWNED);
208+
hyp_phys_to_page(phys)->host_state = PKVM_PAGE_SHARED_OWNED;
210209
break;
211210
default:
212211
return -EINVAL;
213212
}
214213

215-
return host_stage2_idmap_locked(phys, PAGE_SIZE, prot);
214+
return 0;
216215
}
217216

218217
static int fix_hyp_pgtable_refcnt_walker(const struct kvm_pgtable_visit_ctx *ctx,

0 commit comments

Comments
 (0)