Skip to content

Commit 435a9f6

Browse files
committed
Merge branch kvm-arm64/shadow-mmu into kvmarm/next
* kvm-arm64/shadow-mmu: : Shadow stage-2 MMU support for NV, courtesy of Marc Zyngier : : Initial implementation of shadow stage-2 page tables to support a guest : hypervisor. In the author's words: : : So here's the 10000m (approximately 30000ft for those of you stuck : with the wrong units) view of what this is doing: : : - for each {VMID,VTTBR,VTCR} tuple the guest uses, we use a : separate shadow s2_mmu context. This context has its own "real" : VMID and a set of page tables that are the combination of the : guest's S2 and the host S2, built dynamically one fault at a time. : : - these shadow S2 contexts are ephemeral, and behave exactly as : TLBs. For all intent and purposes, they *are* TLBs, and we discard : them pretty often. : : - TLB invalidation takes three possible paths: : : * either this is an EL2 S1 invalidation, and we directly emulate : it as early as possible : : * or this is an EL1 S1 invalidation, and we need to apply it to : the shadow S2s (plural!) that match the VMID set by the L1 guest : : * or finally, this is affecting S2, and we need to teardown the : corresponding part of the shadow S2s, which invalidates the TLBs KVM: arm64: nv: Truely enable nXS TLBI operations KVM: arm64: nv: Add handling of NXS-flavoured TLBI operations KVM: arm64: nv: Add handling of range-based TLBI operations KVM: arm64: nv: Add handling of outer-shareable TLBI operations KVM: arm64: nv: Invalidate TLBs based on shadow S2 TTL-like information KVM: arm64: nv: Tag shadow S2 entries with guest's leaf S2 level KVM: arm64: nv: Handle FEAT_TTL hinted TLB operations KVM: arm64: nv: Handle TLBI IPAS2E1{,IS} operations KVM: arm64: nv: Handle TLBI ALLE1{,IS} operations KVM: arm64: nv: Handle TLBI VMALLS12E1{,IS} operations KVM: arm64: nv: Handle TLB invalidation targeting L2 stage-1 KVM: arm64: nv: Handle EL2 Stage-1 TLB invalidation KVM: arm64: nv: Add Stage-1 EL2 invalidation primitives KVM: arm64: nv: Unmap/flush shadow stage 2 page tables KVM: arm64: nv: Handle shadow stage 2 page faults KVM: arm64: nv: Implement nested Stage-2 page table walk logic KVM: arm64: nv: Support multiple nested Stage-2 mmu structures Signed-off-by: Oliver Upton <[email protected]>
2 parents a35d5b2 + 3cfde36 commit 435a9f6

File tree

13 files changed

+1777
-43
lines changed

13 files changed

+1777
-43
lines changed

arch/arm64/include/asm/esr.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@
152152
#define ESR_ELx_Xs_MASK (GENMASK_ULL(4, 0))
153153

154154
/* ISS field definitions for exceptions taken in to Hyp */
155+
#define ESR_ELx_FSC_ADDRSZ (0x00)
155156
#define ESR_ELx_CV (UL(1) << 24)
156157
#define ESR_ELx_COND_SHIFT (20)
157158
#define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT)

arch/arm64/include/asm/kvm_asm.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,8 @@ extern void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
232232
phys_addr_t start, unsigned long pages);
233233
extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
234234

235+
extern int __kvm_tlbi_s1e2(struct kvm_s2_mmu *mmu, u64 va, u64 sys_encoding);
236+
235237
extern void __kvm_timer_set_cntvoff(u64 cntvoff);
236238

237239
extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);

arch/arm64/include/asm/kvm_host.h

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,33 @@ struct kvm_s2_mmu {
189189
uint64_t split_page_chunk_size;
190190

191191
struct kvm_arch *arch;
192+
193+
/*
194+
* For a shadow stage-2 MMU, the virtual vttbr used by the
195+
* host to parse the guest S2.
196+
* This either contains:
197+
* - the virtual VTTBR programmed by the guest hypervisor with
198+
* CnP cleared
199+
* - The value 1 (VMID=0, BADDR=0, CnP=1) if invalid
200+
*
201+
* We also cache the full VTCR which gets used for TLB invalidation,
202+
* taking the ARM ARM's "Any of the bits in VTCR_EL2 are permitted
203+
* to be cached in a TLB" to the letter.
204+
*/
205+
u64 tlb_vttbr;
206+
u64 tlb_vtcr;
207+
208+
/*
209+
* true when this represents a nested context where virtual
210+
* HCR_EL2.VM == 1
211+
*/
212+
bool nested_stage2_enabled;
213+
214+
/*
215+
* 0: Nobody is currently using this, check vttbr for validity
216+
* >0: Somebody is actively using this.
217+
*/
218+
atomic_t refcnt;
192219
};
193220

194221
struct kvm_arch_memory_slot {
@@ -256,6 +283,14 @@ struct kvm_arch {
256283
*/
257284
u64 fgu[__NR_FGT_GROUP_IDS__];
258285

286+
/*
287+
* Stage 2 paging state for VMs with nested S2 using a virtual
288+
* VMID.
289+
*/
290+
struct kvm_s2_mmu *nested_mmus;
291+
size_t nested_mmus_size;
292+
int nested_mmus_next;
293+
259294
/* Interrupt controller */
260295
struct vgic_dist vgic;
261296

@@ -1306,6 +1341,7 @@ void kvm_vcpu_load_vhe(struct kvm_vcpu *vcpu);
13061341
void kvm_vcpu_put_vhe(struct kvm_vcpu *vcpu);
13071342

13081343
int __init kvm_set_ipa_limit(void);
1344+
u32 kvm_get_pa_bits(struct kvm *kvm);
13091345

13101346
#define __KVM_HAVE_ARCH_VM_ALLOC
13111347
struct kvm *kvm_arch_alloc_vm(void);

arch/arm64/include/asm/kvm_mmu.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ alternative_cb_end
9898
#include <asm/mmu_context.h>
9999
#include <asm/kvm_emulate.h>
100100
#include <asm/kvm_host.h>
101+
#include <asm/kvm_nested.h>
101102

102103
void kvm_update_va_mask(struct alt_instr *alt,
103104
__le32 *origptr, __le32 *updptr, int nr_inst);
@@ -165,6 +166,10 @@ int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size,
165166
int create_hyp_stack(phys_addr_t phys_addr, unsigned long *haddr);
166167
void __init free_hyp_pgds(void);
167168

169+
void kvm_stage2_unmap_range(struct kvm_s2_mmu *mmu, phys_addr_t start, u64 size);
170+
void kvm_stage2_flush_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end);
171+
void kvm_stage2_wp_range(struct kvm_s2_mmu *mmu, phys_addr_t addr, phys_addr_t end);
172+
168173
void stage2_unmap_vm(struct kvm *kvm);
169174
int kvm_init_stage2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu, unsigned long type);
170175
void kvm_uninit_stage2_mmu(struct kvm *kvm);
@@ -326,5 +331,26 @@ static inline struct kvm *kvm_s2_mmu_to_kvm(struct kvm_s2_mmu *mmu)
326331
{
327332
return container_of(mmu->arch, struct kvm, arch);
328333
}
334+
335+
static inline u64 get_vmid(u64 vttbr)
336+
{
337+
return (vttbr & VTTBR_VMID_MASK(kvm_get_vmid_bits())) >>
338+
VTTBR_VMID_SHIFT;
339+
}
340+
341+
static inline bool kvm_s2_mmu_valid(struct kvm_s2_mmu *mmu)
342+
{
343+
return !(mmu->tlb_vttbr & VTTBR_CNP_BIT);
344+
}
345+
346+
static inline bool kvm_is_nested_s2_mmu(struct kvm *kvm, struct kvm_s2_mmu *mmu)
347+
{
348+
/*
349+
* Be careful, mmu may not be fully initialised so do look at
350+
* *any* of its fields.
351+
*/
352+
return &kvm->arch.mmu != mmu;
353+
}
354+
329355
#endif /* __ASSEMBLY__ */
330356
#endif /* __ARM64_KVM_MMU_H__ */

arch/arm64/include/asm/kvm_nested.h

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
#include <linux/bitfield.h>
66
#include <linux/kvm_host.h>
77
#include <asm/kvm_emulate.h>
8+
#include <asm/kvm_pgtable.h>
89

910
static inline bool vcpu_has_nv(const struct kvm_vcpu *vcpu)
1011
{
@@ -61,6 +62,125 @@ static inline u64 translate_ttbr0_el2_to_ttbr0_el1(u64 ttbr0)
6162
}
6263

6364
extern bool forward_smc_trap(struct kvm_vcpu *vcpu);
65+
extern void kvm_init_nested(struct kvm *kvm);
66+
extern int kvm_vcpu_init_nested(struct kvm_vcpu *vcpu);
67+
extern void kvm_init_nested_s2_mmu(struct kvm_s2_mmu *mmu);
68+
extern struct kvm_s2_mmu *lookup_s2_mmu(struct kvm_vcpu *vcpu);
69+
70+
union tlbi_info;
71+
72+
extern void kvm_s2_mmu_iterate_by_vmid(struct kvm *kvm, u16 vmid,
73+
const union tlbi_info *info,
74+
void (*)(struct kvm_s2_mmu *,
75+
const union tlbi_info *));
76+
extern void kvm_vcpu_load_hw_mmu(struct kvm_vcpu *vcpu);
77+
extern void kvm_vcpu_put_hw_mmu(struct kvm_vcpu *vcpu);
78+
79+
struct kvm_s2_trans {
80+
phys_addr_t output;
81+
unsigned long block_size;
82+
bool writable;
83+
bool readable;
84+
int level;
85+
u32 esr;
86+
u64 upper_attr;
87+
};
88+
89+
static inline phys_addr_t kvm_s2_trans_output(struct kvm_s2_trans *trans)
90+
{
91+
return trans->output;
92+
}
93+
94+
static inline unsigned long kvm_s2_trans_size(struct kvm_s2_trans *trans)
95+
{
96+
return trans->block_size;
97+
}
98+
99+
static inline u32 kvm_s2_trans_esr(struct kvm_s2_trans *trans)
100+
{
101+
return trans->esr;
102+
}
103+
104+
static inline bool kvm_s2_trans_readable(struct kvm_s2_trans *trans)
105+
{
106+
return trans->readable;
107+
}
108+
109+
static inline bool kvm_s2_trans_writable(struct kvm_s2_trans *trans)
110+
{
111+
return trans->writable;
112+
}
113+
114+
static inline bool kvm_s2_trans_executable(struct kvm_s2_trans *trans)
115+
{
116+
return !(trans->upper_attr & BIT(54));
117+
}
118+
119+
extern int kvm_walk_nested_s2(struct kvm_vcpu *vcpu, phys_addr_t gipa,
120+
struct kvm_s2_trans *result);
121+
extern int kvm_s2_handle_perm_fault(struct kvm_vcpu *vcpu,
122+
struct kvm_s2_trans *trans);
123+
extern int kvm_inject_s2_fault(struct kvm_vcpu *vcpu, u64 esr_el2);
124+
extern void kvm_nested_s2_wp(struct kvm *kvm);
125+
extern void kvm_nested_s2_unmap(struct kvm *kvm);
126+
extern void kvm_nested_s2_flush(struct kvm *kvm);
127+
128+
unsigned long compute_tlb_inval_range(struct kvm_s2_mmu *mmu, u64 val);
129+
130+
static inline bool kvm_supported_tlbi_s1e1_op(struct kvm_vcpu *vpcu, u32 instr)
131+
{
132+
struct kvm *kvm = vpcu->kvm;
133+
u8 CRm = sys_reg_CRm(instr);
134+
135+
if (!(sys_reg_Op0(instr) == TLBI_Op0 &&
136+
sys_reg_Op1(instr) == TLBI_Op1_EL1))
137+
return false;
138+
139+
if (!(sys_reg_CRn(instr) == TLBI_CRn_XS ||
140+
(sys_reg_CRn(instr) == TLBI_CRn_nXS &&
141+
kvm_has_feat(kvm, ID_AA64ISAR1_EL1, XS, IMP))))
142+
return false;
143+
144+
if (CRm == TLBI_CRm_nROS &&
145+
!kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, OS))
146+
return false;
147+
148+
if ((CRm == TLBI_CRm_RIS || CRm == TLBI_CRm_ROS ||
149+
CRm == TLBI_CRm_RNS) &&
150+
!kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, RANGE))
151+
return false;
152+
153+
return true;
154+
}
155+
156+
static inline bool kvm_supported_tlbi_s1e2_op(struct kvm_vcpu *vpcu, u32 instr)
157+
{
158+
struct kvm *kvm = vpcu->kvm;
159+
u8 CRm = sys_reg_CRm(instr);
160+
161+
if (!(sys_reg_Op0(instr) == TLBI_Op0 &&
162+
sys_reg_Op1(instr) == TLBI_Op1_EL2))
163+
return false;
164+
165+
if (!(sys_reg_CRn(instr) == TLBI_CRn_XS ||
166+
(sys_reg_CRn(instr) == TLBI_CRn_nXS &&
167+
kvm_has_feat(kvm, ID_AA64ISAR1_EL1, XS, IMP))))
168+
return false;
169+
170+
if (CRm == TLBI_CRm_IPAIS || CRm == TLBI_CRm_IPAONS)
171+
return false;
172+
173+
if (CRm == TLBI_CRm_nROS &&
174+
!kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, OS))
175+
return false;
176+
177+
if ((CRm == TLBI_CRm_RIS || CRm == TLBI_CRm_ROS ||
178+
CRm == TLBI_CRm_RNS) &&
179+
!kvm_has_feat(kvm, ID_AA64ISAR0_EL1, TLB, RANGE))
180+
return false;
181+
182+
return true;
183+
}
64184

65185
int kvm_init_nv_sysregs(struct kvm *kvm);
66186

@@ -76,4 +196,11 @@ static inline bool kvm_auth_eretax(struct kvm_vcpu *vcpu, u64 *elr)
76196
}
77197
#endif
78198

199+
#define KVM_NV_GUEST_MAP_SZ (KVM_PGTABLE_PROT_SW1 | KVM_PGTABLE_PROT_SW0)
200+
201+
static inline u64 kvm_encode_nested_level(struct kvm_s2_trans *trans)
202+
{
203+
return FIELD_PREP(KVM_NV_GUEST_MAP_SZ, trans->level);
204+
}
205+
79206
#endif /* __ARM64_KVM_NESTED_H */

arch/arm64/include/asm/sysreg.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -654,6 +654,23 @@
654654
#define OP_AT_S12E0W sys_insn(AT_Op0, 4, AT_CRn, 8, 7)
655655

656656
/* TLBI instructions */
657+
#define TLBI_Op0 1
658+
659+
#define TLBI_Op1_EL1 0 /* Accessible from EL1 or higher */
660+
#define TLBI_Op1_EL2 4 /* Accessible from EL2 or higher */
661+
662+
#define TLBI_CRn_XS 8 /* Extra Slow (the common one) */
663+
#define TLBI_CRn_nXS 9 /* not Extra Slow (which nobody uses)*/
664+
665+
#define TLBI_CRm_IPAIS 0 /* S2 Inner-Shareable */
666+
#define TLBI_CRm_nROS 1 /* non-Range, Outer-Sharable */
667+
#define TLBI_CRm_RIS 2 /* Range, Inner-Sharable */
668+
#define TLBI_CRm_nRIS 3 /* non-Range, Inner-Sharable */
669+
#define TLBI_CRm_IPAONS 4 /* S2 Outer and Non-Shareable */
670+
#define TLBI_CRm_ROS 5 /* Range, Outer-Sharable */
671+
#define TLBI_CRm_RNS 6 /* Range, Non-Sharable */
672+
#define TLBI_CRm_nRNS 7 /* non-Range, Non-Sharable */
673+
657674
#define OP_TLBI_VMALLE1OS sys_insn(1, 0, 8, 1, 0)
658675
#define OP_TLBI_VAE1OS sys_insn(1, 0, 8, 1, 1)
659676
#define OP_TLBI_ASIDE1OS sys_insn(1, 0, 8, 1, 2)

arch/arm64/kvm/arm.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
179179
mutex_unlock(&kvm->lock);
180180
#endif
181181

182+
kvm_init_nested(kvm);
183+
182184
ret = kvm_share_hyp(kvm, kvm + 1);
183185
if (ret)
184186
return ret;
@@ -578,6 +580,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
578580
struct kvm_s2_mmu *mmu;
579581
int *last_ran;
580582

583+
if (vcpu_has_nv(vcpu))
584+
kvm_vcpu_load_hw_mmu(vcpu);
585+
581586
mmu = vcpu->arch.hw_mmu;
582587
last_ran = this_cpu_ptr(mmu->last_vcpu_ran);
583588

@@ -633,6 +638,8 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
633638
kvm_timer_vcpu_put(vcpu);
634639
kvm_vgic_put(vcpu);
635640
kvm_vcpu_pmu_restore_host(vcpu);
641+
if (vcpu_has_nv(vcpu))
642+
kvm_vcpu_put_hw_mmu(vcpu);
636643
kvm_arm_vmid_clear_active();
637644

638645
vcpu_clear_on_unsupported_cpu(vcpu);
@@ -1491,6 +1498,10 @@ static int kvm_setup_vcpu(struct kvm_vcpu *vcpu)
14911498
if (kvm_vcpu_has_pmu(vcpu) && !kvm->arch.arm_pmu)
14921499
ret = kvm_arm_set_default_pmu(kvm);
14931500

1501+
/* Prepare for nested if required */
1502+
if (!ret && vcpu_has_nv(vcpu))
1503+
ret = kvm_vcpu_init_nested(vcpu);
1504+
14941505
return ret;
14951506
}
14961507

arch/arm64/kvm/hyp/vhe/switch.c

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -266,10 +266,59 @@ static void kvm_hyp_save_fpsimd_host(struct kvm_vcpu *vcpu)
266266
__fpsimd_save_state(*host_data_ptr(fpsimd_state));
267267
}
268268

269+
static bool kvm_hyp_handle_tlbi_el2(struct kvm_vcpu *vcpu, u64 *exit_code)
270+
{
271+
int ret = -EINVAL;
272+
u32 instr;
273+
u64 val;
274+
275+
/*
276+
* Ideally, we would never trap on EL2 S1 TLB invalidations using
277+
* the EL1 instructions when the guest's HCR_EL2.{E2H,TGE}=={1,1}.
278+
* But "thanks" to FEAT_NV2, we don't trap writes to HCR_EL2,
279+
* meaning that we can't track changes to the virtual TGE bit. So we
280+
* have to leave HCR_EL2.TTLB set on the host. Oopsie...
281+
*
282+
* Try and handle these invalidation as quickly as possible, without
283+
* fully exiting. Note that we don't need to consider any forwarding
284+
* here, as having E2H+TGE set is the very definition of being
285+
* InHost.
286+
*
287+
* For the lesser hypervisors out there that have failed to get on
288+
* with the VHE program, we can also handle the nVHE style of EL2
289+
* invalidation.
290+
*/
291+
if (!(is_hyp_ctxt(vcpu)))
292+
return false;
293+
294+
instr = esr_sys64_to_sysreg(kvm_vcpu_get_esr(vcpu));
295+
val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu));
296+
297+
if ((kvm_supported_tlbi_s1e1_op(vcpu, instr) &&
298+
vcpu_el2_e2h_is_set(vcpu) && vcpu_el2_tge_is_set(vcpu)) ||
299+
kvm_supported_tlbi_s1e2_op (vcpu, instr))
300+
ret = __kvm_tlbi_s1e2(NULL, val, instr);
301+
302+
if (ret)
303+
return false;
304+
305+
__kvm_skip_instr(vcpu);
306+
307+
return true;
308+
}
309+
310+
static bool kvm_hyp_handle_sysreg_vhe(struct kvm_vcpu *vcpu, u64 *exit_code)
311+
{
312+
if (kvm_hyp_handle_tlbi_el2(vcpu, exit_code))
313+
return true;
314+
315+
return kvm_hyp_handle_sysreg(vcpu, exit_code);
316+
}
317+
269318
static const exit_handler_fn hyp_exit_handlers[] = {
270319
[0 ... ESR_ELx_EC_MAX] = NULL,
271320
[ESR_ELx_EC_CP15_32] = kvm_hyp_handle_cp15_32,
272-
[ESR_ELx_EC_SYS64] = kvm_hyp_handle_sysreg,
321+
[ESR_ELx_EC_SYS64] = kvm_hyp_handle_sysreg_vhe,
273322
[ESR_ELx_EC_SVE] = kvm_hyp_handle_fpsimd,
274323
[ESR_ELx_EC_FP_ASIMD] = kvm_hyp_handle_fpsimd,
275324
[ESR_ELx_EC_IABT_LOW] = kvm_hyp_handle_iabt_low,

0 commit comments

Comments
 (0)