Skip to content

Commit 1c5a0b5

Browse files
committed
Merge tag 'kvmarm-6.11' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD
KVM/arm64 changes for 6.11 - Initial infrastructure for shadow stage-2 MMUs, as part of nested virtualization enablement - Support for userspace changes to the guest CTR_EL0 value, enabling (in part) migration of VMs between heterogenous hardware - Fixes + improvements to pKVM's FF-A proxy, adding support for v1.1 of the protocol - FPSIMD/SVE support for nested, including merged trap configuration and exception routing - New command-line parameter to control the WFx trap behavior under KVM - Introduce kCFI hardening in the EL2 hypervisor - Fixes + cleanups for handling presence/absence of FEAT_TCRX - Miscellaneous fixes + documentation updates
2 parents c8b8b81 + bb032b2 commit 1c5a0b5

39 files changed

+2764
-380
lines changed

Documentation/admin-guide/kernel-parameters.txt

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2720,6 +2720,24 @@
27202720
[KVM,ARM,EARLY] Allow use of GICv4 for direct
27212721
injection of LPIs.
27222722

2723+
kvm-arm.wfe_trap_policy=
2724+
[KVM,ARM] Control when to set WFE instruction trap for
2725+
KVM VMs. Traps are allowed but not guaranteed by the
2726+
CPU architecture.
2727+
2728+
trap: set WFE instruction trap
2729+
2730+
notrap: clear WFE instruction trap
2731+
2732+
kvm-arm.wfi_trap_policy=
2733+
[KVM,ARM] Control when to set WFI instruction trap for
2734+
KVM VMs. Traps are allowed but not guaranteed by the
2735+
CPU architecture.
2736+
2737+
trap: set WFI instruction trap
2738+
2739+
notrap: clear WFI instruction trap
2740+
27232741
kvm_cma_resv_ratio=n [PPC,EARLY]
27242742
Reserves given percentage from system memory area for
27252743
contiguous memory allocation for KVM hash pagetable

Documentation/virt/kvm/api.rst

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -891,12 +891,12 @@ like this::
891891

892892
The irq_type field has the following values:
893893

894-
- irq_type[0]:
894+
- KVM_ARM_IRQ_TYPE_CPU:
895895
out-of-kernel GIC: irq_id 0 is IRQ, irq_id 1 is FIQ
896-
- irq_type[1]:
896+
- KVM_ARM_IRQ_TYPE_SPI:
897897
in-kernel GIC: SPI, irq_id between 32 and 1019 (incl.)
898898
(the vcpu_index field is ignored)
899-
- irq_type[2]:
899+
- KVM_ARM_IRQ_TYPE_PPI:
900900
in-kernel GIC: PPI, irq_id between 16 and 31 (incl.)
901901

902902
(The irq_id field thus corresponds nicely to the IRQ ID in the ARM GIC specs)
@@ -1927,7 +1927,7 @@ flags:
19271927

19281928
If KVM_MSI_VALID_DEVID is set, devid contains a unique device identifier
19291929
for the device that wrote the MSI message. For PCI, this is usually a
1930-
BFD identifier in the lower 16 bits.
1930+
BDF identifier in the lower 16 bits.
19311931

19321932
On x86, address_hi is ignored unless the KVM_X2APIC_API_USE_32BIT_IDS
19331933
feature of KVM_CAP_X2APIC_API capability is enabled. If it is enabled,
@@ -2992,7 +2992,7 @@ flags:
29922992

29932993
If KVM_MSI_VALID_DEVID is set, devid contains a unique device identifier
29942994
for the device that wrote the MSI message. For PCI, this is usually a
2995-
BFD identifier in the lower 16 bits.
2995+
BDF identifier in the lower 16 bits.
29962996

29972997
On x86, address_hi is ignored unless the KVM_X2APIC_API_USE_32BIT_IDS
29982998
feature of KVM_CAP_X2APIC_API capability is enabled. If it is enabled,

Documentation/virt/kvm/devices/arm-vgic.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ Groups:
3131
KVM_VGIC_V2_ADDR_TYPE_CPU (rw, 64-bit)
3232
Base address in the guest physical address space of the GIC virtual cpu
3333
interface register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V2.
34-
This address needs to be 4K aligned and the region covers 4 KByte.
34+
This address needs to be 4K aligned and the region covers 8 KByte.
3535

3636
Errors:
3737

MAINTAINERS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12077,6 +12077,8 @@ L: [email protected] (moderated for non-subscribers)
1207712077
1207812078
S: Maintained
1207912079
T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git
12080+
F: Documentation/virt/kvm/arm/
12081+
F: Documentation/virt/kvm/devices/arm*
1208012082
F: arch/arm64/include/asm/kvm*
1208112083
F: arch/arm64/include/uapi/asm/kvm*
1208212084
F: arch/arm64/kvm/

arch/arm64/include/asm/esr.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@
152152
#define ESR_ELx_Xs_MASK (GENMASK_ULL(4, 0))
153153

154154
/* ISS field definitions for exceptions taken in to Hyp */
155+
#define ESR_ELx_FSC_ADDRSZ (0x00)
155156
#define ESR_ELx_CV (UL(1) << 24)
156157
#define ESR_ELx_COND_SHIFT (20)
157158
#define ESR_ELx_COND_MASK (UL(0xF) << ESR_ELx_COND_SHIFT)
@@ -379,13 +380,24 @@
379380
#ifndef __ASSEMBLY__
380381
#include <asm/types.h>
381382

383+
static inline unsigned long esr_brk_comment(unsigned long esr)
384+
{
385+
return esr & ESR_ELx_BRK64_ISS_COMMENT_MASK;
386+
}
387+
382388
static inline bool esr_is_data_abort(unsigned long esr)
383389
{
384390
const unsigned long ec = ESR_ELx_EC(esr);
385391

386392
return ec == ESR_ELx_EC_DABT_LOW || ec == ESR_ELx_EC_DABT_CUR;
387393
}
388394

395+
static inline bool esr_is_cfi_brk(unsigned long esr)
396+
{
397+
return ESR_ELx_EC(esr) == ESR_ELx_EC_BRK64 &&
398+
(esr_brk_comment(esr) & ~CFI_BRK_IMM_MASK) == CFI_BRK_IMM_BASE;
399+
}
400+
389401
static inline bool esr_fsc_is_translation_fault(unsigned long esr)
390402
{
391403
/* Translation fault, level -1 */

arch/arm64/include/asm/kvm_arm.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,6 @@
102102
#define HCR_HOST_NVHE_PROTECTED_FLAGS (HCR_HOST_NVHE_FLAGS | HCR_TSC)
103103
#define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H)
104104

105-
#define HCRX_GUEST_FLAGS (HCRX_EL2_SMPME | HCRX_EL2_TCR2En)
106105
#define HCRX_HOST_FLAGS (HCRX_EL2_MSCEn | HCRX_EL2_TCR2En | HCRX_EL2_EnFPM)
107106

108107
/* TCR_EL2 Registers bits */

arch/arm64/include/asm/kvm_asm.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,8 @@ extern void __kvm_tlb_flush_vmid_range(struct kvm_s2_mmu *mmu,
232232
phys_addr_t start, unsigned long pages);
233233
extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
234234

235+
extern int __kvm_tlbi_s1e2(struct kvm_s2_mmu *mmu, u64 va, u64 sys_encoding);
236+
235237
extern void __kvm_timer_set_cntvoff(u64 cntvoff);
236238

237239
extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);

arch/arm64/include/asm/kvm_emulate.h

Lines changed: 64 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#ifndef __ARM64_KVM_EMULATE_H__
1212
#define __ARM64_KVM_EMULATE_H__
1313

14+
#include <linux/bitfield.h>
1415
#include <linux/kvm_host.h>
1516

1617
#include <asm/debug-monitors.h>
@@ -55,6 +56,14 @@ void kvm_emulate_nested_eret(struct kvm_vcpu *vcpu);
5556
int kvm_inject_nested_sync(struct kvm_vcpu *vcpu, u64 esr_el2);
5657
int kvm_inject_nested_irq(struct kvm_vcpu *vcpu);
5758

59+
static inline void kvm_inject_nested_sve_trap(struct kvm_vcpu *vcpu)
60+
{
61+
u64 esr = FIELD_PREP(ESR_ELx_EC_MASK, ESR_ELx_EC_SVE) |
62+
ESR_ELx_IL;
63+
64+
kvm_inject_nested_sync(vcpu, esr);
65+
}
66+
5867
#if defined(__KVM_VHE_HYPERVISOR__) || defined(__KVM_NVHE_HYPERVISOR__)
5968
static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
6069
{
@@ -69,39 +78,17 @@ static __always_inline bool vcpu_el1_is_32bit(struct kvm_vcpu *vcpu)
6978

7079
static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu)
7180
{
72-
vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
73-
if (has_vhe() || has_hvhe())
74-
vcpu->arch.hcr_el2 |= HCR_E2H;
75-
if (cpus_have_final_cap(ARM64_HAS_RAS_EXTN)) {
76-
/* route synchronous external abort exceptions to EL2 */
77-
vcpu->arch.hcr_el2 |= HCR_TEA;
78-
/* trap error record accesses */
79-
vcpu->arch.hcr_el2 |= HCR_TERR;
80-
}
81+
if (!vcpu_has_run_once(vcpu))
82+
vcpu->arch.hcr_el2 = HCR_GUEST_FLAGS;
8183

82-
if (cpus_have_final_cap(ARM64_HAS_STAGE2_FWB)) {
83-
vcpu->arch.hcr_el2 |= HCR_FWB;
84-
} else {
85-
/*
86-
* For non-FWB CPUs, we trap VM ops (HCR_EL2.TVM) until M+C
87-
* get set in SCTLR_EL1 such that we can detect when the guest
88-
* MMU gets turned on and do the necessary cache maintenance
89-
* then.
90-
*/
84+
/*
85+
* For non-FWB CPUs, we trap VM ops (HCR_EL2.TVM) until M+C
86+
* get set in SCTLR_EL1 such that we can detect when the guest
87+
* MMU gets turned on and do the necessary cache maintenance
88+
* then.
89+
*/
90+
if (!cpus_have_final_cap(ARM64_HAS_STAGE2_FWB))
9191
vcpu->arch.hcr_el2 |= HCR_TVM;
92-
}
93-
94-
if (cpus_have_final_cap(ARM64_HAS_EVT) &&
95-
!cpus_have_final_cap(ARM64_MISMATCHED_CACHE_TYPE))
96-
vcpu->arch.hcr_el2 |= HCR_TID4;
97-
else
98-
vcpu->arch.hcr_el2 |= HCR_TID2;
99-
100-
if (vcpu_el1_is_32bit(vcpu))
101-
vcpu->arch.hcr_el2 &= ~HCR_RW;
102-
103-
if (kvm_has_mte(vcpu->kvm))
104-
vcpu->arch.hcr_el2 |= HCR_ATA;
10592
}
10693

10794
static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu)
@@ -660,4 +647,50 @@ static __always_inline void kvm_reset_cptr_el2(struct kvm_vcpu *vcpu)
660647

661648
kvm_write_cptr_el2(val);
662649
}
650+
651+
/*
652+
* Returns a 'sanitised' view of CPTR_EL2, translating from nVHE to the VHE
653+
* format if E2H isn't set.
654+
*/
655+
static inline u64 vcpu_sanitised_cptr_el2(const struct kvm_vcpu *vcpu)
656+
{
657+
u64 cptr = __vcpu_sys_reg(vcpu, CPTR_EL2);
658+
659+
if (!vcpu_el2_e2h_is_set(vcpu))
660+
cptr = translate_cptr_el2_to_cpacr_el1(cptr);
661+
662+
return cptr;
663+
}
664+
665+
static inline bool ____cptr_xen_trap_enabled(const struct kvm_vcpu *vcpu,
666+
unsigned int xen)
667+
{
668+
switch (xen) {
669+
case 0b00:
670+
case 0b10:
671+
return true;
672+
case 0b01:
673+
return vcpu_el2_tge_is_set(vcpu) && !vcpu_is_el2(vcpu);
674+
case 0b11:
675+
default:
676+
return false;
677+
}
678+
}
679+
680+
#define __guest_hyp_cptr_xen_trap_enabled(vcpu, xen) \
681+
(!vcpu_has_nv(vcpu) ? false : \
682+
____cptr_xen_trap_enabled(vcpu, \
683+
SYS_FIELD_GET(CPACR_ELx, xen, \
684+
vcpu_sanitised_cptr_el2(vcpu))))
685+
686+
static inline bool guest_hyp_fpsimd_traps_enabled(const struct kvm_vcpu *vcpu)
687+
{
688+
return __guest_hyp_cptr_xen_trap_enabled(vcpu, FPEN);
689+
}
690+
691+
static inline bool guest_hyp_sve_traps_enabled(const struct kvm_vcpu *vcpu)
692+
{
693+
return __guest_hyp_cptr_xen_trap_enabled(vcpu, ZEN);
694+
}
695+
663696
#endif /* __ARM64_KVM_EMULATE_H__ */

arch/arm64/include/asm/kvm_host.h

Lines changed: 64 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -189,6 +189,33 @@ struct kvm_s2_mmu {
189189
uint64_t split_page_chunk_size;
190190

191191
struct kvm_arch *arch;
192+
193+
/*
194+
* For a shadow stage-2 MMU, the virtual vttbr used by the
195+
* host to parse the guest S2.
196+
* This either contains:
197+
* - the virtual VTTBR programmed by the guest hypervisor with
198+
* CnP cleared
199+
* - The value 1 (VMID=0, BADDR=0, CnP=1) if invalid
200+
*
201+
* We also cache the full VTCR which gets used for TLB invalidation,
202+
* taking the ARM ARM's "Any of the bits in VTCR_EL2 are permitted
203+
* to be cached in a TLB" to the letter.
204+
*/
205+
u64 tlb_vttbr;
206+
u64 tlb_vtcr;
207+
208+
/*
209+
* true when this represents a nested context where virtual
210+
* HCR_EL2.VM == 1
211+
*/
212+
bool nested_stage2_enabled;
213+
214+
/*
215+
* 0: Nobody is currently using this, check vttbr for validity
216+
* >0: Somebody is actively using this.
217+
*/
218+
atomic_t refcnt;
192219
};
193220

194221
struct kvm_arch_memory_slot {
@@ -256,6 +283,14 @@ struct kvm_arch {
256283
*/
257284
u64 fgu[__NR_FGT_GROUP_IDS__];
258285

286+
/*
287+
* Stage 2 paging state for VMs with nested S2 using a virtual
288+
* VMID.
289+
*/
290+
struct kvm_s2_mmu *nested_mmus;
291+
size_t nested_mmus_size;
292+
int nested_mmus_next;
293+
259294
/* Interrupt controller */
260295
struct vgic_dist vgic;
261296

@@ -327,11 +362,11 @@ struct kvm_arch {
327362
* Atomic access to multiple idregs are guarded by kvm_arch.config_lock.
328363
*/
329364
#define IDREG_IDX(id) (((sys_reg_CRm(id) - 1) << 3) | sys_reg_Op2(id))
330-
#define IDX_IDREG(idx) sys_reg(3, 0, 0, ((idx) >> 3) + 1, (idx) & Op2_mask)
331-
#define IDREG(kvm, id) ((kvm)->arch.id_regs[IDREG_IDX(id)])
332365
#define KVM_ARM_ID_REG_NUM (IDREG_IDX(sys_reg(3, 0, 0, 7, 7)) + 1)
333366
u64 id_regs[KVM_ARM_ID_REG_NUM];
334367

368+
u64 ctr_el0;
369+
335370
/* Masks for VNCR-baked sysregs */
336371
struct kvm_sysreg_masks *sysreg_masks;
337372

@@ -423,6 +458,7 @@ enum vcpu_sysreg {
423458
MDCR_EL2, /* Monitor Debug Configuration Register (EL2) */
424459
CPTR_EL2, /* Architectural Feature Trap Register (EL2) */
425460
HACR_EL2, /* Hypervisor Auxiliary Control Register */
461+
ZCR_EL2, /* SVE Control Register (EL2) */
426462
TTBR0_EL2, /* Translation Table Base Register 0 (EL2) */
427463
TTBR1_EL2, /* Translation Table Base Register 1 (EL2) */
428464
TCR_EL2, /* Translation Control Register (EL2) */
@@ -867,6 +903,9 @@ struct kvm_vcpu_arch {
867903

868904
#define vcpu_sve_max_vq(vcpu) sve_vq_from_vl((vcpu)->arch.sve_max_vl)
869905

906+
#define vcpu_sve_zcr_elx(vcpu) \
907+
(unlikely(is_hyp_ctxt(vcpu)) ? ZCR_EL2 : ZCR_EL1)
908+
870909
#define vcpu_sve_state_size(vcpu) ({ \
871910
size_t __size_ret; \
872911
unsigned int __vcpu_vq; \
@@ -991,6 +1030,7 @@ static inline bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val)
9911030
case DACR32_EL2: *val = read_sysreg_s(SYS_DACR32_EL2); break;
9921031
case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break;
9931032
case DBGVCR32_EL2: *val = read_sysreg_s(SYS_DBGVCR32_EL2); break;
1033+
case ZCR_EL1: *val = read_sysreg_s(SYS_ZCR_EL12); break;
9941034
default: return false;
9951035
}
9961036

@@ -1036,6 +1076,7 @@ static inline bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg)
10361076
case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break;
10371077
case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break;
10381078
case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); break;
1079+
case ZCR_EL1: write_sysreg_s(val, SYS_ZCR_EL12); break;
10391080
default: return false;
10401081
}
10411082

@@ -1145,7 +1186,7 @@ int __init populate_nv_trap_config(void);
11451186
bool lock_all_vcpus(struct kvm *kvm);
11461187
void unlock_all_vcpus(struct kvm *kvm);
11471188

1148-
void kvm_init_sysreg(struct kvm_vcpu *);
1189+
void kvm_calculate_traps(struct kvm_vcpu *vcpu);
11491190

11501191
/* MMIO helpers */
11511192
void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data);
@@ -1306,6 +1347,7 @@ void kvm_vcpu_load_vhe(struct kvm_vcpu *vcpu);
13061347
void kvm_vcpu_put_vhe(struct kvm_vcpu *vcpu);
13071348

13081349
int __init kvm_set_ipa_limit(void);
1350+
u32 kvm_get_pa_bits(struct kvm *kvm);
13091351

13101352
#define __KVM_HAVE_ARCH_VM_ALLOC
13111353
struct kvm *kvm_arch_alloc_vm(void);
@@ -1355,6 +1397,24 @@ static inline void kvm_hyp_reserve(void) { }
13551397
void kvm_arm_vcpu_power_off(struct kvm_vcpu *vcpu);
13561398
bool kvm_arm_vcpu_stopped(struct kvm_vcpu *vcpu);
13571399

1400+
static inline u64 *__vm_id_reg(struct kvm_arch *ka, u32 reg)
1401+
{
1402+
switch (reg) {
1403+
case sys_reg(3, 0, 0, 1, 0) ... sys_reg(3, 0, 0, 7, 7):
1404+
return &ka->id_regs[IDREG_IDX(reg)];
1405+
case SYS_CTR_EL0:
1406+
return &ka->ctr_el0;
1407+
default:
1408+
WARN_ON_ONCE(1);
1409+
return NULL;
1410+
}
1411+
}
1412+
1413+
#define kvm_read_vm_id_reg(kvm, reg) \
1414+
({ u64 __val = *__vm_id_reg(&(kvm)->arch, reg); __val; })
1415+
1416+
void kvm_set_vm_id_reg(struct kvm *kvm, u32 reg, u64 val);
1417+
13581418
#define __expand_field_sign_unsigned(id, fld, val) \
13591419
((u64)SYS_FIELD_VALUE(id, fld, val))
13601420

@@ -1371,7 +1431,7 @@ bool kvm_arm_vcpu_stopped(struct kvm_vcpu *vcpu);
13711431

13721432
#define get_idreg_field_unsigned(kvm, id, fld) \
13731433
({ \
1374-
u64 __val = IDREG((kvm), SYS_##id); \
1434+
u64 __val = kvm_read_vm_id_reg((kvm), SYS_##id); \
13751435
FIELD_GET(id##_##fld##_MASK, __val); \
13761436
})
13771437

0 commit comments

Comments
 (0)