Skip to content

Commit cf0c712

Browse files
author
Marc Zyngier
committed
Merge branch kvm-arm64/mmu/el2-tracking into kvmarm-master/next
* kvm-arm64/mmu/el2-tracking: (25 commits) : Enable tracking of page sharing between host EL1 and EL2 KVM: arm64: Minor optimization of range_is_memory KVM: arm64: Make hyp_panic() more robust when protected mode is enabled KVM: arm64: Return -EPERM from __pkvm_host_share_hyp() KVM: arm64: Make __pkvm_create_mappings static KVM: arm64: Restrict EL2 stage-1 changes in protected mode KVM: arm64: Refactor protected nVHE stage-1 locking KVM: arm64: Remove __pkvm_mark_hyp KVM: arm64: Mark host bss and rodata section as shared KVM: arm64: Enable retrieving protections attributes of PTEs KVM: arm64: Introduce addr_is_memory() KVM: arm64: Expose pkvm_hyp_id KVM: arm64: Expose host stage-2 manipulation helpers KVM: arm64: Add helpers to tag shared pages in SW bits KVM: arm64: Allow populating software bits KVM: arm64: Enable forcing page-level stage-2 mappings KVM: arm64: Tolerate re-creating hyp mappings to set software bits KVM: arm64: Don't overwrite software bits with owner id KVM: arm64: Rename KVM_PTE_LEAF_ATTR_S2_IGNORED KVM: arm64: Optimize host memory aborts KVM: arm64: Expose page-table helpers ... Signed-off-by: Marc Zyngier <[email protected]>
2 parents 82f8d54 + 14ecf07 commit cf0c712

File tree

15 files changed

+618
-306
lines changed

15 files changed

+618
-306
lines changed

arch/arm64/include/asm/kvm_asm.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,12 +59,11 @@
5959
#define __KVM_HOST_SMCCC_FUNC___vgic_v3_save_aprs 13
6060
#define __KVM_HOST_SMCCC_FUNC___vgic_v3_restore_aprs 14
6161
#define __KVM_HOST_SMCCC_FUNC___pkvm_init 15
62-
#define __KVM_HOST_SMCCC_FUNC___pkvm_create_mappings 16
62+
#define __KVM_HOST_SMCCC_FUNC___pkvm_host_share_hyp 16
6363
#define __KVM_HOST_SMCCC_FUNC___pkvm_create_private_mapping 17
6464
#define __KVM_HOST_SMCCC_FUNC___pkvm_cpu_set_vector 18
6565
#define __KVM_HOST_SMCCC_FUNC___pkvm_prot_finalize 19
66-
#define __KVM_HOST_SMCCC_FUNC___pkvm_mark_hyp 20
67-
#define __KVM_HOST_SMCCC_FUNC___kvm_adjust_pc 21
66+
#define __KVM_HOST_SMCCC_FUNC___kvm_adjust_pc 20
6867

6968
#ifndef __ASSEMBLY__
7069

arch/arm64/include/asm/kvm_pgtable.h

Lines changed: 103 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,46 @@ static inline u64 kvm_get_parange(u64 mmfr0)
2525

2626
typedef u64 kvm_pte_t;
2727

28+
#define KVM_PTE_VALID BIT(0)
29+
30+
#define KVM_PTE_ADDR_MASK GENMASK(47, PAGE_SHIFT)
31+
#define KVM_PTE_ADDR_51_48 GENMASK(15, 12)
32+
33+
static inline bool kvm_pte_valid(kvm_pte_t pte)
34+
{
35+
return pte & KVM_PTE_VALID;
36+
}
37+
38+
static inline u64 kvm_pte_to_phys(kvm_pte_t pte)
39+
{
40+
u64 pa = pte & KVM_PTE_ADDR_MASK;
41+
42+
if (PAGE_SHIFT == 16)
43+
pa |= FIELD_GET(KVM_PTE_ADDR_51_48, pte) << 48;
44+
45+
return pa;
46+
}
47+
48+
static inline u64 kvm_granule_shift(u32 level)
49+
{
50+
/* Assumes KVM_PGTABLE_MAX_LEVELS is 4 */
51+
return ARM64_HW_PGTABLE_LEVEL_SHIFT(level);
52+
}
53+
54+
static inline u64 kvm_granule_size(u32 level)
55+
{
56+
return BIT(kvm_granule_shift(level));
57+
}
58+
59+
static inline bool kvm_level_supports_block_mapping(u32 level)
60+
{
61+
/*
62+
* Reject invalid block mappings and don't bother with 4TB mappings for
63+
* 52-bit PAs.
64+
*/
65+
return !(level == 0 || (PAGE_SIZE != SZ_4K && level == 1));
66+
}
67+
2868
/**
2969
* struct kvm_pgtable_mm_ops - Memory management callbacks.
3070
* @zalloc_page: Allocate a single zeroed memory page.
@@ -75,53 +115,65 @@ enum kvm_pgtable_stage2_flags {
75115
KVM_PGTABLE_S2_IDMAP = BIT(1),
76116
};
77117

78-
/**
79-
* struct kvm_pgtable - KVM page-table.
80-
* @ia_bits: Maximum input address size, in bits.
81-
* @start_level: Level at which the page-table walk starts.
82-
* @pgd: Pointer to the first top-level entry of the page-table.
83-
* @mm_ops: Memory management callbacks.
84-
* @mmu: Stage-2 KVM MMU struct. Unused for stage-1 page-tables.
85-
*/
86-
struct kvm_pgtable {
87-
u32 ia_bits;
88-
u32 start_level;
89-
kvm_pte_t *pgd;
90-
struct kvm_pgtable_mm_ops *mm_ops;
91-
92-
/* Stage-2 only */
93-
struct kvm_s2_mmu *mmu;
94-
enum kvm_pgtable_stage2_flags flags;
95-
};
96-
97118
/**
98119
* enum kvm_pgtable_prot - Page-table permissions and attributes.
99120
* @KVM_PGTABLE_PROT_X: Execute permission.
100121
* @KVM_PGTABLE_PROT_W: Write permission.
101122
* @KVM_PGTABLE_PROT_R: Read permission.
102123
* @KVM_PGTABLE_PROT_DEVICE: Device attributes.
124+
* @KVM_PGTABLE_PROT_SW0: Software bit 0.
125+
* @KVM_PGTABLE_PROT_SW1: Software bit 1.
126+
* @KVM_PGTABLE_PROT_SW2: Software bit 2.
127+
* @KVM_PGTABLE_PROT_SW3: Software bit 3.
103128
*/
104129
enum kvm_pgtable_prot {
105130
KVM_PGTABLE_PROT_X = BIT(0),
106131
KVM_PGTABLE_PROT_W = BIT(1),
107132
KVM_PGTABLE_PROT_R = BIT(2),
108133

109134
KVM_PGTABLE_PROT_DEVICE = BIT(3),
135+
136+
KVM_PGTABLE_PROT_SW0 = BIT(55),
137+
KVM_PGTABLE_PROT_SW1 = BIT(56),
138+
KVM_PGTABLE_PROT_SW2 = BIT(57),
139+
KVM_PGTABLE_PROT_SW3 = BIT(58),
110140
};
111141

112-
#define PAGE_HYP (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W)
142+
#define KVM_PGTABLE_PROT_RW (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_W)
143+
#define KVM_PGTABLE_PROT_RWX (KVM_PGTABLE_PROT_RW | KVM_PGTABLE_PROT_X)
144+
145+
#define PKVM_HOST_MEM_PROT KVM_PGTABLE_PROT_RWX
146+
#define PKVM_HOST_MMIO_PROT KVM_PGTABLE_PROT_RW
147+
148+
#define PAGE_HYP KVM_PGTABLE_PROT_RW
113149
#define PAGE_HYP_EXEC (KVM_PGTABLE_PROT_R | KVM_PGTABLE_PROT_X)
114150
#define PAGE_HYP_RO (KVM_PGTABLE_PROT_R)
115151
#define PAGE_HYP_DEVICE (PAGE_HYP | KVM_PGTABLE_PROT_DEVICE)
116152

153+
typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end,
154+
enum kvm_pgtable_prot prot);
155+
117156
/**
118-
* struct kvm_mem_range - Range of Intermediate Physical Addresses
119-
* @start: Start of the range.
120-
* @end: End of the range.
157+
* struct kvm_pgtable - KVM page-table.
158+
* @ia_bits: Maximum input address size, in bits.
159+
* @start_level: Level at which the page-table walk starts.
160+
* @pgd: Pointer to the first top-level entry of the page-table.
161+
* @mm_ops: Memory management callbacks.
162+
* @mmu: Stage-2 KVM MMU struct. Unused for stage-1 page-tables.
163+
* @flags: Stage-2 page-table flags.
164+
* @force_pte_cb: Function that returns true if page level mappings must
165+
* be used instead of block mappings.
121166
*/
122-
struct kvm_mem_range {
123-
u64 start;
124-
u64 end;
167+
struct kvm_pgtable {
168+
u32 ia_bits;
169+
u32 start_level;
170+
kvm_pte_t *pgd;
171+
struct kvm_pgtable_mm_ops *mm_ops;
172+
173+
/* Stage-2 only */
174+
struct kvm_s2_mmu *mmu;
175+
enum kvm_pgtable_stage2_flags flags;
176+
kvm_pgtable_force_pte_cb_t force_pte_cb;
125177
};
126178

127179
/**
@@ -216,21 +268,24 @@ int kvm_pgtable_hyp_map(struct kvm_pgtable *pgt, u64 addr, u64 size, u64 phys,
216268
u64 kvm_get_vtcr(u64 mmfr0, u64 mmfr1, u32 phys_shift);
217269

218270
/**
219-
* kvm_pgtable_stage2_init_flags() - Initialise a guest stage-2 page-table.
271+
* __kvm_pgtable_stage2_init() - Initialise a guest stage-2 page-table.
220272
* @pgt: Uninitialised page-table structure to initialise.
221273
* @arch: Arch-specific KVM structure representing the guest virtual
222274
* machine.
223275
* @mm_ops: Memory management callbacks.
224276
* @flags: Stage-2 configuration flags.
277+
* @force_pte_cb: Function that returns true if page level mappings must
278+
* be used instead of block mappings.
225279
*
226280
* Return: 0 on success, negative error code on failure.
227281
*/
228-
int kvm_pgtable_stage2_init_flags(struct kvm_pgtable *pgt, struct kvm_arch *arch,
229-
struct kvm_pgtable_mm_ops *mm_ops,
230-
enum kvm_pgtable_stage2_flags flags);
282+
int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_arch *arch,
283+
struct kvm_pgtable_mm_ops *mm_ops,
284+
enum kvm_pgtable_stage2_flags flags,
285+
kvm_pgtable_force_pte_cb_t force_pte_cb);
231286

232287
#define kvm_pgtable_stage2_init(pgt, arch, mm_ops) \
233-
kvm_pgtable_stage2_init_flags(pgt, arch, mm_ops, 0)
288+
__kvm_pgtable_stage2_init(pgt, arch, mm_ops, 0, NULL)
234289

235290
/**
236291
* kvm_pgtable_stage2_destroy() - Destroy an unused guest stage-2 page-table.
@@ -374,7 +429,8 @@ kvm_pte_t kvm_pgtable_stage2_mkold(struct kvm_pgtable *pgt, u64 addr);
374429
* If there is a valid, leaf page-table entry used to translate @addr, then
375430
* relax the permissions in that entry according to the read, write and
376431
* execute permissions specified by @prot. No permissions are removed, and
377-
* TLB invalidation is performed after updating the entry.
432+
* TLB invalidation is performed after updating the entry. Software bits cannot
433+
* be set or cleared using kvm_pgtable_stage2_relax_perms().
378434
*
379435
* Return: 0 on success, negative error code on failure.
380436
*/
@@ -453,22 +509,22 @@ int kvm_pgtable_get_leaf(struct kvm_pgtable *pgt, u64 addr,
453509
kvm_pte_t *ptep, u32 *level);
454510

455511
/**
456-
* kvm_pgtable_stage2_find_range() - Find a range of Intermediate Physical
457-
* Addresses with compatible permission
458-
* attributes.
459-
* @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
460-
* @addr: Address that must be covered by the range.
461-
* @prot: Protection attributes that the range must be compatible with.
462-
* @range: Range structure used to limit the search space at call time and
463-
* that will hold the result.
512+
* kvm_pgtable_stage2_pte_prot() - Retrieve the protection attributes of a
513+
* stage-2 Page-Table Entry.
514+
* @pte: Page-table entry
464515
*
465-
* The offset of @addr within a page is ignored. An IPA is compatible with @prot
466-
* iff its corresponding stage-2 page-table entry has default ownership and, if
467-
* valid, is mapped with protection attributes identical to @prot.
516+
* Return: protection attributes of the page-table entry in the enum
517+
* kvm_pgtable_prot format.
518+
*/
519+
enum kvm_pgtable_prot kvm_pgtable_stage2_pte_prot(kvm_pte_t pte);
520+
521+
/**
522+
* kvm_pgtable_hyp_pte_prot() - Retrieve the protection attributes of a stage-1
523+
* Page-Table Entry.
524+
* @pte: Page-table entry
468525
*
469-
* Return: 0 on success, negative error code on failure.
526+
* Return: protection attributes of the page-table entry in the enum
527+
* kvm_pgtable_prot format.
470528
*/
471-
int kvm_pgtable_stage2_find_range(struct kvm_pgtable *pgt, u64 addr,
472-
enum kvm_pgtable_prot prot,
473-
struct kvm_mem_range *range);
529+
enum kvm_pgtable_prot kvm_pgtable_hyp_pte_prot(kvm_pte_t pte);
474530
#endif /* __ARM64_KVM_PGTABLE_H__ */

arch/arm64/kvm/Kconfig

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,15 @@ if KVM
4646

4747
source "virt/kvm/Kconfig"
4848

49+
config NVHE_EL2_DEBUG
50+
bool "Debug mode for non-VHE EL2 object"
51+
help
52+
Say Y here to enable the debug mode for the non-VHE KVM EL2 object.
53+
Failure reports will BUG() in the hypervisor. This is intended for
54+
local EL2 hypervisor development.
55+
56+
If unsure, say N.
57+
4958
endif # KVM
5059

5160
endif # VIRTUALIZATION

arch/arm64/kvm/arm.c

Lines changed: 8 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -91,10 +91,14 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
9191
kvm->arch.return_nisv_io_abort_to_user = true;
9292
break;
9393
case KVM_CAP_ARM_MTE:
94-
if (!system_supports_mte() || kvm->created_vcpus)
95-
return -EINVAL;
96-
r = 0;
97-
kvm->arch.mte_enabled = true;
94+
mutex_lock(&kvm->lock);
95+
if (!system_supports_mte() || kvm->created_vcpus) {
96+
r = -EINVAL;
97+
} else {
98+
r = 0;
99+
kvm->arch.mte_enabled = true;
100+
}
101+
mutex_unlock(&kvm->lock);
98102
break;
99103
default:
100104
r = -EINVAL;
@@ -1946,62 +1950,17 @@ static void _kvm_host_prot_finalize(void *discard)
19461950
WARN_ON(kvm_call_hyp_nvhe(__pkvm_prot_finalize));
19471951
}
19481952

1949-
static inline int pkvm_mark_hyp(phys_addr_t start, phys_addr_t end)
1950-
{
1951-
return kvm_call_hyp_nvhe(__pkvm_mark_hyp, start, end);
1952-
}
1953-
1954-
#define pkvm_mark_hyp_section(__section) \
1955-
pkvm_mark_hyp(__pa_symbol(__section##_start), \
1956-
__pa_symbol(__section##_end))
1957-
19581953
static int finalize_hyp_mode(void)
19591954
{
1960-
int cpu, ret;
1961-
19621955
if (!is_protected_kvm_enabled())
19631956
return 0;
19641957

1965-
ret = pkvm_mark_hyp_section(__hyp_idmap_text);
1966-
if (ret)
1967-
return ret;
1968-
1969-
ret = pkvm_mark_hyp_section(__hyp_text);
1970-
if (ret)
1971-
return ret;
1972-
1973-
ret = pkvm_mark_hyp_section(__hyp_rodata);
1974-
if (ret)
1975-
return ret;
1976-
19771958
/*
19781959
* Exclude HYP BSS from kmemleak so that it doesn't get peeked
19791960
* at, which would end badly once the section is inaccessible.
19801961
* None of other sections should ever be introspected.
19811962
*/
19821963
kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
1983-
ret = pkvm_mark_hyp_section(__hyp_bss);
1984-
if (ret)
1985-
return ret;
1986-
1987-
ret = pkvm_mark_hyp(hyp_mem_base, hyp_mem_base + hyp_mem_size);
1988-
if (ret)
1989-
return ret;
1990-
1991-
for_each_possible_cpu(cpu) {
1992-
phys_addr_t start = virt_to_phys((void *)kvm_arm_hyp_percpu_base[cpu]);
1993-
phys_addr_t end = start + (PAGE_SIZE << nvhe_percpu_order());
1994-
1995-
ret = pkvm_mark_hyp(start, end);
1996-
if (ret)
1997-
return ret;
1998-
1999-
start = virt_to_phys((void *)per_cpu(kvm_arm_hyp_stack_page, cpu));
2000-
end = start + PAGE_SIZE;
2001-
ret = pkvm_mark_hyp(start, end);
2002-
if (ret)
2003-
return ret;
2004-
}
20051964

20061965
/*
20071966
* Flip the static key upfront as that may no longer be possible

arch/arm64/kvm/handle_exit.c

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -292,11 +292,12 @@ void handle_exit_early(struct kvm_vcpu *vcpu, int exception_index)
292292
kvm_handle_guest_serror(vcpu, kvm_vcpu_get_esr(vcpu));
293293
}
294294

295-
void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr,
295+
void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr,
296+
u64 elr_virt, u64 elr_phys,
296297
u64 par, uintptr_t vcpu,
297298
u64 far, u64 hpfar) {
298-
u64 elr_in_kimg = __phys_to_kimg(__hyp_pa(elr));
299-
u64 hyp_offset = elr_in_kimg - kaslr_offset() - elr;
299+
u64 elr_in_kimg = __phys_to_kimg(elr_phys);
300+
u64 hyp_offset = elr_in_kimg - kaslr_offset() - elr_virt;
300301
u64 mode = spsr & PSR_MODE_MASK;
301302

302303
/*
@@ -309,20 +310,24 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr,
309310
kvm_err("Invalid host exception to nVHE hyp!\n");
310311
} else if (ESR_ELx_EC(esr) == ESR_ELx_EC_BRK64 &&
311312
(esr & ESR_ELx_BRK64_ISS_COMMENT_MASK) == BUG_BRK_IMM) {
312-
struct bug_entry *bug = find_bug(elr_in_kimg);
313313
const char *file = NULL;
314314
unsigned int line = 0;
315315

316316
/* All hyp bugs, including warnings, are treated as fatal. */
317-
if (bug)
318-
bug_get_file_line(bug, &file, &line);
317+
if (!is_protected_kvm_enabled() ||
318+
IS_ENABLED(CONFIG_NVHE_EL2_DEBUG)) {
319+
struct bug_entry *bug = find_bug(elr_in_kimg);
320+
321+
if (bug)
322+
bug_get_file_line(bug, &file, &line);
323+
}
319324

320325
if (file)
321326
kvm_err("nVHE hyp BUG at: %s:%u!\n", file, line);
322327
else
323-
kvm_err("nVHE hyp BUG at: %016llx!\n", elr + hyp_offset);
328+
kvm_err("nVHE hyp BUG at: %016llx!\n", elr_virt + hyp_offset);
324329
} else {
325-
kvm_err("nVHE hyp panic at: %016llx!\n", elr + hyp_offset);
330+
kvm_err("nVHE hyp panic at: %016llx!\n", elr_virt + hyp_offset);
326331
}
327332

328333
/*
@@ -334,5 +339,5 @@ void __noreturn __cold nvhe_hyp_panic_handler(u64 esr, u64 spsr, u64 elr,
334339
kvm_err("Hyp Offset: 0x%llx\n", hyp_offset);
335340

336341
panic("HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%016lx\n",
337-
spsr, elr, esr, far, hpfar, par, vcpu);
342+
spsr, elr_virt, esr, far, hpfar, par, vcpu);
338343
}

0 commit comments

Comments
 (0)