@@ -30,6 +30,7 @@ static u32 host_vtimer_irq_flags;
30
30
static u32 host_ptimer_irq_flags ;
31
31
32
32
static DEFINE_STATIC_KEY_FALSE (has_gic_active_state );
33
+ DEFINE_STATIC_KEY_FALSE (broken_cntvoff_key );
33
34
34
35
static const u8 default_ppi [] = {
35
36
[TIMER_PTIMER ] = 30 ,
@@ -101,21 +102,6 @@ u64 timer_get_cval(struct arch_timer_context *ctxt)
101
102
}
102
103
}
103
104
104
- static u64 timer_get_offset (struct arch_timer_context * ctxt )
105
- {
106
- u64 offset = 0 ;
107
-
108
- if (!ctxt )
109
- return 0 ;
110
-
111
- if (ctxt -> offset .vm_offset )
112
- offset += * ctxt -> offset .vm_offset ;
113
- if (ctxt -> offset .vcpu_offset )
114
- offset += * ctxt -> offset .vcpu_offset ;
115
-
116
- return offset ;
117
- }
118
-
119
105
static void timer_set_ctl (struct arch_timer_context * ctxt , u32 ctl )
120
106
{
121
107
struct kvm_vcpu * vcpu = ctxt -> vcpu ;
@@ -441,11 +427,30 @@ void kvm_timer_update_run(struct kvm_vcpu *vcpu)
441
427
regs -> device_irq_level |= KVM_ARM_DEV_EL1_PTIMER ;
442
428
}
443
429
430
+ static void kvm_timer_update_status (struct arch_timer_context * ctx , bool level )
431
+ {
432
+ /*
433
+ * Paper over NV2 brokenness by publishing the interrupt status
434
+ * bit. This still results in a poor quality of emulation (guest
435
+ * writes will have no effect until the next exit).
436
+ *
437
+ * But hey, it's fast, right?
438
+ */
439
+ if (is_hyp_ctxt (ctx -> vcpu ) &&
440
+ (ctx == vcpu_vtimer (ctx -> vcpu ) || ctx == vcpu_ptimer (ctx -> vcpu ))) {
441
+ unsigned long val = timer_get_ctl (ctx );
442
+ __assign_bit (__ffs (ARCH_TIMER_CTRL_IT_STAT ), & val , level );
443
+ timer_set_ctl (ctx , val );
444
+ }
445
+ }
446
+
444
447
static void kvm_timer_update_irq (struct kvm_vcpu * vcpu , bool new_level ,
445
448
struct arch_timer_context * timer_ctx )
446
449
{
447
450
int ret ;
448
451
452
+ kvm_timer_update_status (timer_ctx , new_level );
453
+
449
454
timer_ctx -> irq .level = new_level ;
450
455
trace_kvm_timer_update_irq (vcpu -> vcpu_id , timer_irq (timer_ctx ),
451
456
timer_ctx -> irq .level );
@@ -471,6 +476,8 @@ static void timer_emulate(struct arch_timer_context *ctx)
471
476
return ;
472
477
}
473
478
479
+ kvm_timer_update_status (ctx , should_fire );
480
+
474
481
/*
475
482
* If the timer can fire now, we don't need to have a soft timer
476
483
* scheduled for the future. If the timer cannot fire at all,
@@ -513,7 +520,12 @@ static void timer_save_state(struct arch_timer_context *ctx)
513
520
case TIMER_VTIMER :
514
521
case TIMER_HVTIMER :
515
522
timer_set_ctl (ctx , read_sysreg_el0 (SYS_CNTV_CTL ));
516
- timer_set_cval (ctx , read_sysreg_el0 (SYS_CNTV_CVAL ));
523
+ cval = read_sysreg_el0 (SYS_CNTV_CVAL );
524
+
525
+ if (has_broken_cntvoff ())
526
+ cval -= timer_get_offset (ctx );
527
+
528
+ timer_set_cval (ctx , cval );
517
529
518
530
/* Disable the timer */
519
531
write_sysreg_el0 (0 , SYS_CNTV_CTL );
@@ -618,8 +630,15 @@ static void timer_restore_state(struct arch_timer_context *ctx)
618
630
619
631
case TIMER_VTIMER :
620
632
case TIMER_HVTIMER :
621
- set_cntvoff (timer_get_offset (ctx ));
622
- write_sysreg_el0 (timer_get_cval (ctx ), SYS_CNTV_CVAL );
633
+ cval = timer_get_cval (ctx );
634
+ offset = timer_get_offset (ctx );
635
+ if (has_broken_cntvoff ()) {
636
+ set_cntvoff (0 );
637
+ cval += offset ;
638
+ } else {
639
+ set_cntvoff (offset );
640
+ }
641
+ write_sysreg_el0 (cval , SYS_CNTV_CVAL );
623
642
isb ();
624
643
write_sysreg_el0 (timer_get_ctl (ctx ), SYS_CNTV_CTL );
625
644
break ;
@@ -762,7 +781,7 @@ static void kvm_timer_vcpu_load_nested_switch(struct kvm_vcpu *vcpu,
762
781
763
782
static void timer_set_traps (struct kvm_vcpu * vcpu , struct timer_map * map )
764
783
{
765
- bool tpt , tpc ;
784
+ bool tvt , tpt , tvc , tpc , tvt02 , tpt02 ;
766
785
u64 clr , set ;
767
786
768
787
/*
@@ -777,7 +796,29 @@ static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map)
777
796
* within this function, reality kicks in and we start adding
778
797
* traps based on emulation requirements.
779
798
*/
780
- tpt = tpc = false;
799
+ tvt = tpt = tvc = tpc = false;
800
+ tvt02 = tpt02 = false;
801
+
802
+ /*
803
+ * NV2 badly breaks the timer semantics by redirecting accesses to
804
+ * the EL1 timer state to memory, so let's call ECV to the rescue if
805
+ * available: we trap all CNT{P,V}_{CTL,CVAL,TVAL}_EL0 accesses.
806
+ *
807
+ * The treatment slightly varies depending whether we run a nVHE or
808
+ * VHE guest: nVHE will use the _EL0 registers directly, while VHE
809
+ * will use the _EL02 accessors. This translates in different trap
810
+ * bits.
811
+ *
812
+ * None of the trapping is required when running in non-HYP context,
813
+ * unless required by the L1 hypervisor settings once we advertise
814
+ * ECV+NV in the guest, or that we need trapping for other reasons.
815
+ */
816
+ if (cpus_have_final_cap (ARM64_HAS_ECV ) && is_hyp_ctxt (vcpu )) {
817
+ if (vcpu_el2_e2h_is_set (vcpu ))
818
+ tvt02 = tpt02 = true;
819
+ else
820
+ tvt = tpt = true;
821
+ }
781
822
782
823
/*
783
824
* We have two possibility to deal with a physical offset:
@@ -792,10 +833,21 @@ static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map)
792
833
if (!has_cntpoff () && timer_get_offset (map -> direct_ptimer ))
793
834
tpt = tpc = true;
794
835
836
+ /*
837
+ * For the poor sods that could not correctly substract one value
838
+ * from another, trap the full virtual timer and counter.
839
+ */
840
+ if (has_broken_cntvoff () && timer_get_offset (map -> direct_vtimer ))
841
+ tvt = tvc = true;
842
+
795
843
/*
796
844
* Apply the enable bits that the guest hypervisor has requested for
797
845
* its own guest. We can only add traps that wouldn't have been set
798
846
* above.
847
+ * Implementation choices: we do not support NV when E2H=0 in the
848
+ * guest, and we don't support configuration where E2H is writable
849
+ * by the guest (either FEAT_VHE or FEAT_E2H0 is implemented, but
850
+ * not both). This simplifies the handling of the EL1NV* bits.
799
851
*/
800
852
if (vcpu_has_nv (vcpu ) && !is_hyp_ctxt (vcpu )) {
801
853
u64 val = __vcpu_sys_reg (vcpu , CNTHCTL_EL2 );
@@ -806,6 +858,9 @@ static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map)
806
858
807
859
tpt |= !(val & (CNTHCTL_EL1PCEN << 10 ));
808
860
tpc |= !(val & (CNTHCTL_EL1PCTEN << 10 ));
861
+
862
+ tpt02 |= (val & CNTHCTL_EL1NVPCT );
863
+ tvt02 |= (val & CNTHCTL_EL1NVVCT );
809
864
}
810
865
811
866
/*
@@ -817,6 +872,10 @@ static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map)
817
872
818
873
assign_clear_set_bit (tpt , CNTHCTL_EL1PCEN << 10 , set , clr );
819
874
assign_clear_set_bit (tpc , CNTHCTL_EL1PCTEN << 10 , set , clr );
875
+ assign_clear_set_bit (tvt , CNTHCTL_EL1TVT , clr , set );
876
+ assign_clear_set_bit (tvc , CNTHCTL_EL1TVCT , clr , set );
877
+ assign_clear_set_bit (tvt02 , CNTHCTL_EL1NVVCT , clr , set );
878
+ assign_clear_set_bit (tpt02 , CNTHCTL_EL1NVPCT , clr , set );
820
879
821
880
/* This only happens on VHE, so use the CNTHCTL_EL2 accessor. */
822
881
sysreg_clear_set (cnthctl_el2 , clr , set );
@@ -905,6 +964,54 @@ void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
905
964
kvm_timer_blocking (vcpu );
906
965
}
907
966
967
+ void kvm_timer_sync_nested (struct kvm_vcpu * vcpu )
968
+ {
969
+ /*
970
+ * When NV2 is on, guest hypervisors have their EL1 timer register
971
+ * accesses redirected to the VNCR page. Any guest action taken on
972
+ * the timer is postponed until the next exit, leading to a very
973
+ * poor quality of emulation.
974
+ *
975
+ * This is an unmitigated disaster, only papered over by FEAT_ECV,
976
+ * which allows trapping of the timer registers even with NV2.
977
+ * Still, this is still worse than FEAT_NV on its own. Meh.
978
+ */
979
+ if (!vcpu_el2_e2h_is_set (vcpu )) {
980
+ if (cpus_have_final_cap (ARM64_HAS_ECV ))
981
+ return ;
982
+
983
+ /*
984
+ * A non-VHE guest hypervisor doesn't have any direct access
985
+ * to its timers: the EL2 registers trap (and the HW is
986
+ * fully emulated), while the EL0 registers access memory
987
+ * despite the access being notionally direct. Boo.
988
+ *
989
+ * We update the hardware timer registers with the
990
+ * latest value written by the guest to the VNCR page
991
+ * and let the hardware take care of the rest.
992
+ */
993
+ write_sysreg_el0 (__vcpu_sys_reg (vcpu , CNTV_CTL_EL0 ), SYS_CNTV_CTL );
994
+ write_sysreg_el0 (__vcpu_sys_reg (vcpu , CNTV_CVAL_EL0 ), SYS_CNTV_CVAL );
995
+ write_sysreg_el0 (__vcpu_sys_reg (vcpu , CNTP_CTL_EL0 ), SYS_CNTP_CTL );
996
+ write_sysreg_el0 (__vcpu_sys_reg (vcpu , CNTP_CVAL_EL0 ), SYS_CNTP_CVAL );
997
+ } else {
998
+ /*
999
+ * For a VHE guest hypervisor, the EL2 state is directly
1000
+ * stored in the host EL1 timers, while the emulated EL0
1001
+ * state is stored in the VNCR page. The latter could have
1002
+ * been updated behind our back, and we must reset the
1003
+ * emulation of the timers.
1004
+ */
1005
+ struct timer_map map ;
1006
+ get_timer_map (vcpu , & map );
1007
+
1008
+ soft_timer_cancel (& map .emul_vtimer -> hrtimer );
1009
+ soft_timer_cancel (& map .emul_ptimer -> hrtimer );
1010
+ timer_emulate (map .emul_vtimer );
1011
+ timer_emulate (map .emul_ptimer );
1012
+ }
1013
+ }
1014
+
908
1015
/*
909
1016
* With a userspace irqchip we have to check if the guest de-asserted the
910
1017
* timer and if so, unmask the timer irq signal on the host interrupt
@@ -1363,6 +1470,37 @@ static int kvm_irq_init(struct arch_timer_kvm_info *info)
1363
1470
return 0 ;
1364
1471
}
1365
1472
1473
+ static void kvm_timer_handle_errata (void )
1474
+ {
1475
+ u64 mmfr0 , mmfr1 , mmfr4 ;
1476
+
1477
+ /*
1478
+ * CNTVOFF_EL2 is broken on some implementations. For those, we trap
1479
+ * all virtual timer/counter accesses, requiring FEAT_ECV.
1480
+ *
1481
+ * However, a hypervisor supporting nesting is likely to mitigate the
1482
+ * erratum at L0, and not require other levels to mitigate it (which
1483
+ * would otherwise be a terrible performance sink due to trap
1484
+ * amplification).
1485
+ *
1486
+ * Given that the affected HW implements both FEAT_VHE and FEAT_E2H0,
1487
+ * and that NV is likely not to (because of limitations of the
1488
+ * architecture), only enable the workaround when FEAT_VHE and
1489
+ * FEAT_E2H0 are both detected. Time will tell if this actually holds.
1490
+ */
1491
+ mmfr0 = read_sanitised_ftr_reg (SYS_ID_AA64MMFR0_EL1 );
1492
+ mmfr1 = read_sanitised_ftr_reg (SYS_ID_AA64MMFR1_EL1 );
1493
+ mmfr4 = read_sanitised_ftr_reg (SYS_ID_AA64MMFR4_EL1 );
1494
+ if (SYS_FIELD_GET (ID_AA64MMFR1_EL1 , VH , mmfr1 ) &&
1495
+ !SYS_FIELD_GET (ID_AA64MMFR4_EL1 , E2H0 , mmfr4 ) &&
1496
+ SYS_FIELD_GET (ID_AA64MMFR0_EL1 , ECV , mmfr0 ) &&
1497
+ (has_vhe () || has_hvhe ()) &&
1498
+ cpus_have_final_cap (ARM64_WORKAROUND_QCOM_ORYON_CNTVOFF )) {
1499
+ static_branch_enable (& broken_cntvoff_key );
1500
+ kvm_info ("Broken CNTVOFF_EL2, trapping virtual timer\n" );
1501
+ }
1502
+ }
1503
+
1366
1504
int __init kvm_timer_hyp_init (bool has_gic )
1367
1505
{
1368
1506
struct arch_timer_kvm_info * info ;
@@ -1431,6 +1569,7 @@ int __init kvm_timer_hyp_init(bool has_gic)
1431
1569
goto out_free_vtimer_irq ;
1432
1570
}
1433
1571
1572
+ kvm_timer_handle_errata ();
1434
1573
return 0 ;
1435
1574
1436
1575
out_free_ptimer_irq :
0 commit comments