Skip to content

Commit 0bc9a9e

Browse files
author
Marc Zyngier
committed
KVM: arm64: Work around x1e's CNTVOFF_EL2 bogosity
It appears that on Qualcomm's x1e CPU, CNTVOFF_EL2 doesn't really work, specially with HCR_EL2.E2H=1. A non-zero offset results in a screaming virtual timer interrupt, to the tune of a few 100k interrupts per second on a 4 vcpu VM. This is also evidenced by this CPU's inability to correctly run any of the timer selftests. The only case this doesn't break is when this register is set to 0, which breaks VM migration. When HCR_EL2.E2H=0, the timer seems to behave normally, and does not result in an interrupt storm. As a workaround, use the fact that this CPU implements FEAT_ECV, and trap all accesses to the virtual timer and counter, keeping CNTVOFF_EL2 set to zero, and emulate accesses to CVAL/TVAL/CTL and the counter itself, fixing up the timer to account for the missing offset. And if you think this is disgusting, you'd probably be right. Acked-by: Oliver Upton <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Marc Zyngier <[email protected]>
1 parent d1e37a5 commit 0bc9a9e

File tree

8 files changed

+90
-8
lines changed

8 files changed

+90
-8
lines changed

arch/arm64/include/asm/cputype.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@
122122
#define QCOM_CPU_PART_KRYO_3XX_SILVER 0x803
123123
#define QCOM_CPU_PART_KRYO_4XX_GOLD 0x804
124124
#define QCOM_CPU_PART_KRYO_4XX_SILVER 0x805
125+
#define QCOM_CPU_PART_ORYON_X1 0x001
125126

126127
#define NVIDIA_CPU_PART_DENVER 0x003
127128
#define NVIDIA_CPU_PART_CARMEL 0x004
@@ -198,6 +199,7 @@
198199
#define MIDR_QCOM_KRYO_3XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_3XX_SILVER)
199200
#define MIDR_QCOM_KRYO_4XX_GOLD MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_GOLD)
200201
#define MIDR_QCOM_KRYO_4XX_SILVER MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO_4XX_SILVER)
202+
#define MIDR_QCOM_ORYON_X1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_ORYON_X1)
201203
#define MIDR_NVIDIA_DENVER MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_DENVER)
202204
#define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL)
203205
#define MIDR_FUJITSU_A64FX MIDR_CPU_MODEL(ARM_CPU_IMP_FUJITSU, FUJITSU_CPU_PART_A64FX)

arch/arm64/kernel/cpu_errata.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -786,6 +786,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
786786
ERRATA_MIDR_RANGE_LIST(erratum_ac03_cpu_38_list),
787787
},
788788
#endif
789+
{
790+
.desc = "Broken CNTVOFF_EL2",
791+
.capability = ARM64_WORKAROUND_QCOM_ORYON_CNTVOFF,
792+
ERRATA_MIDR_RANGE_LIST(((const struct midr_range[]) {
793+
MIDR_ALL_VERSIONS(MIDR_QCOM_ORYON_X1),
794+
{}
795+
})),
796+
},
789797
{
790798
}
791799
};

arch/arm64/kernel/image-vars.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,9 @@ KVM_NVHE_ALIAS(__hyp_stub_vectors);
105105
KVM_NVHE_ALIAS(vgic_v2_cpuif_trap);
106106
KVM_NVHE_ALIAS(vgic_v3_cpuif_trap);
107107

108+
/* Static key which is set if CNTVOFF_EL2 is unusable */
109+
KVM_NVHE_ALIAS(broken_cntvoff_key);
110+
108111
/* EL2 exception handling */
109112
KVM_NVHE_ALIAS(__start___kvm_ex_table);
110113
KVM_NVHE_ALIAS(__stop___kvm_ex_table);

arch/arm64/kvm/arch_timer.c

Lines changed: 55 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ static u32 host_vtimer_irq_flags;
3030
static u32 host_ptimer_irq_flags;
3131

3232
static DEFINE_STATIC_KEY_FALSE(has_gic_active_state);
33+
DEFINE_STATIC_KEY_FALSE(broken_cntvoff_key);
3334

3435
static const u8 default_ppi[] = {
3536
[TIMER_PTIMER] = 30,
@@ -519,7 +520,12 @@ static void timer_save_state(struct arch_timer_context *ctx)
519520
case TIMER_VTIMER:
520521
case TIMER_HVTIMER:
521522
timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTV_CTL));
522-
timer_set_cval(ctx, read_sysreg_el0(SYS_CNTV_CVAL));
523+
cval = read_sysreg_el0(SYS_CNTV_CVAL);
524+
525+
if (has_broken_cntvoff())
526+
cval -= timer_get_offset(ctx);
527+
528+
timer_set_cval(ctx, cval);
523529

524530
/* Disable the timer */
525531
write_sysreg_el0(0, SYS_CNTV_CTL);
@@ -624,8 +630,15 @@ static void timer_restore_state(struct arch_timer_context *ctx)
624630

625631
case TIMER_VTIMER:
626632
case TIMER_HVTIMER:
627-
set_cntvoff(timer_get_offset(ctx));
628-
write_sysreg_el0(timer_get_cval(ctx), SYS_CNTV_CVAL);
633+
cval = timer_get_cval(ctx);
634+
offset = timer_get_offset(ctx);
635+
if (has_broken_cntvoff()) {
636+
set_cntvoff(0);
637+
cval += offset;
638+
} else {
639+
set_cntvoff(offset);
640+
}
641+
write_sysreg_el0(cval, SYS_CNTV_CVAL);
629642
isb();
630643
write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTV_CTL);
631644
break;
@@ -820,6 +833,13 @@ static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map)
820833
if (!has_cntpoff() && timer_get_offset(map->direct_ptimer))
821834
tpt = tpc = true;
822835

836+
/*
837+
* For the poor sods that could not correctly substract one value
838+
* from another, trap the full virtual timer and counter.
839+
*/
840+
if (has_broken_cntvoff() && timer_get_offset(map->direct_vtimer))
841+
tvt = tvc = true;
842+
823843
/*
824844
* Apply the enable bits that the guest hypervisor has requested for
825845
* its own guest. We can only add traps that wouldn't have been set
@@ -1450,6 +1470,37 @@ static int kvm_irq_init(struct arch_timer_kvm_info *info)
14501470
return 0;
14511471
}
14521472

1473+
static void kvm_timer_handle_errata(void)
1474+
{
1475+
u64 mmfr0, mmfr1, mmfr4;
1476+
1477+
/*
1478+
* CNTVOFF_EL2 is broken on some implementations. For those, we trap
1479+
* all virtual timer/counter accesses, requiring FEAT_ECV.
1480+
*
1481+
* However, a hypervisor supporting nesting is likely to mitigate the
1482+
* erratum at L0, and not require other levels to mitigate it (which
1483+
* would otherwise be a terrible performance sink due to trap
1484+
* amplification).
1485+
*
1486+
* Given that the affected HW implements both FEAT_VHE and FEAT_E2H0,
1487+
* and that NV is likely not to (because of limitations of the
1488+
* architecture), only enable the workaround when FEAT_VHE and
1489+
* FEAT_E2H0 are both detected. Time will tell if this actually holds.
1490+
*/
1491+
mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
1492+
mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
1493+
mmfr4 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR4_EL1);
1494+
if (SYS_FIELD_GET(ID_AA64MMFR1_EL1, VH, mmfr1) &&
1495+
!SYS_FIELD_GET(ID_AA64MMFR4_EL1, E2H0, mmfr4) &&
1496+
SYS_FIELD_GET(ID_AA64MMFR0_EL1, ECV, mmfr0) &&
1497+
(has_vhe() || has_hvhe()) &&
1498+
cpus_have_final_cap(ARM64_WORKAROUND_QCOM_ORYON_CNTVOFF)) {
1499+
static_branch_enable(&broken_cntvoff_key);
1500+
kvm_info("Broken CNTVOFF_EL2, trapping virtual timer\n");
1501+
}
1502+
}
1503+
14531504
int __init kvm_timer_hyp_init(bool has_gic)
14541505
{
14551506
struct arch_timer_kvm_info *info;
@@ -1518,6 +1569,7 @@ int __init kvm_timer_hyp_init(bool has_gic)
15181569
goto out_free_vtimer_irq;
15191570
}
15201571

1572+
kvm_timer_handle_errata();
15211573
return 0;
15221574

15231575
out_free_ptimer_irq:

arch/arm64/kvm/hyp/nvhe/timer-sr.c

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,15 +22,16 @@ void __kvm_timer_set_cntvoff(u64 cntvoff)
2222
*/
2323
void __timer_disable_traps(struct kvm_vcpu *vcpu)
2424
{
25-
u64 val, shift = 0;
25+
u64 set, clr, shift = 0;
2626

2727
if (has_hvhe())
2828
shift = 10;
2929

3030
/* Allow physical timer/counter access for the host */
31-
val = read_sysreg(cnthctl_el2);
32-
val |= (CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN) << shift;
33-
write_sysreg(val, cnthctl_el2);
31+
set = (CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN) << shift;
32+
clr = CNTHCTL_EL1TVT | CNTHCTL_EL1TVCT;
33+
34+
sysreg_clear_set(cnthctl_el2, clr, set);
3435
}
3536

3637
/*
@@ -58,5 +59,12 @@ void __timer_enable_traps(struct kvm_vcpu *vcpu)
5859
set <<= 10;
5960
}
6061

62+
/*
63+
* Trap the virtual counter/timer if we have a broken cntvoff
64+
* implementation.
65+
*/
66+
if (has_broken_cntvoff())
67+
set |= CNTHCTL_EL1TVT | CNTHCTL_EL1TVCT;
68+
6169
sysreg_clear_set(cnthctl_el2, clr, set);
6270
}

arch/arm64/kvm/sys_regs.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1721,7 +1721,8 @@ static u64 __kvm_read_sanitised_id_reg(const struct kvm_vcpu *vcpu,
17211721
if (!vcpu_has_ptrauth(vcpu))
17221722
val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_APA3) |
17231723
ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_GPA3));
1724-
if (!cpus_have_final_cap(ARM64_HAS_WFXT))
1724+
if (!cpus_have_final_cap(ARM64_HAS_WFXT) ||
1725+
has_broken_cntvoff())
17251726
val &= ~ARM64_FEATURE_MASK(ID_AA64ISAR2_EL1_WFxT);
17261727
break;
17271728
case SYS_ID_AA64MMFR2_EL1:

arch/arm64/tools/cpucaps

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ WORKAROUND_CLEAN_CACHE
105105
WORKAROUND_DEVICE_LOAD_ACQUIRE
106106
WORKAROUND_NVIDIA_CARMEL_CNP
107107
WORKAROUND_QCOM_FALKOR_E1003
108+
WORKAROUND_QCOM_ORYON_CNTVOFF
108109
WORKAROUND_REPEAT_TLBI
109110
WORKAROUND_SPECULATIVE_AT
110111
WORKAROUND_SPECULATIVE_SSBS

include/kvm/arm_arch_timer.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,13 @@ void kvm_timer_cpu_down(void);
151151
/* CNTKCTL_EL1 valid bits as of DDI0487J.a */
152152
#define CNTKCTL_VALID_BITS (BIT(17) | GENMASK_ULL(9, 0))
153153

154+
DECLARE_STATIC_KEY_FALSE(broken_cntvoff_key);
155+
156+
static inline bool has_broken_cntvoff(void)
157+
{
158+
return static_branch_unlikely(&broken_cntvoff_key);
159+
}
160+
154161
static inline bool has_cntpoff(void)
155162
{
156163
return (has_vhe() && cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF));

0 commit comments

Comments
 (0)