2929struct x86_pmu_capability __read_mostly kvm_pmu_cap ;
3030EXPORT_SYMBOL_GPL (kvm_pmu_cap );
3131
32+ struct kvm_pmu_emulated_event_selectors __read_mostly kvm_pmu_eventsel ;
33+ EXPORT_SYMBOL_GPL (kvm_pmu_eventsel );
34+
3235/* Precise Distribution of Instructions Retired (PDIR) */
3336static const struct x86_cpu_id vmx_pebs_pdir_cpu [] = {
3437 X86_MATCH_INTEL_FAM6_MODEL (ICELAKE_D , NULL ),
@@ -67,7 +70,7 @@ static const struct x86_cpu_id vmx_pebs_pdist_cpu[] = {
6770 * all perf counters (both gp and fixed). The mapping relationship
6871 * between pmc and perf counters is as the following:
6972 * * Intel: [0 .. KVM_INTEL_PMC_MAX_GENERIC-1] <=> gp counters
70- * [INTEL_PMC_IDX_FIXED .. INTEL_PMC_IDX_FIXED + 2] <=> fixed
73+ * [KVM_FIXED_PMC_BASE_IDX .. KVM_FIXED_PMC_BASE_IDX + 2] <=> fixed
7174 * * AMD: [0 .. AMD64_NUM_COUNTERS-1] and, for families 15H
7275 * and later, [0 .. AMD64_NUM_COUNTERS_CORE-1] <=> gp counters
7376 */
@@ -411,7 +414,7 @@ static bool is_gp_event_allowed(struct kvm_x86_pmu_event_filter *f,
411414static bool is_fixed_event_allowed (struct kvm_x86_pmu_event_filter * filter ,
412415 int idx )
413416{
414- int fixed_idx = idx - INTEL_PMC_IDX_FIXED ;
417+ int fixed_idx = idx - KVM_FIXED_PMC_BASE_IDX ;
415418
416419 if (filter -> action == KVM_PMU_EVENT_DENY &&
417420 test_bit (fixed_idx , (ulong * )& filter -> fixed_counter_bitmap ))
@@ -441,11 +444,10 @@ static bool check_pmu_event_filter(struct kvm_pmc *pmc)
441444static bool pmc_event_is_allowed (struct kvm_pmc * pmc )
442445{
443446 return pmc_is_globally_enabled (pmc ) && pmc_speculative_in_use (pmc ) &&
444- static_call (kvm_x86_pmu_hw_event_available )(pmc ) &&
445447 check_pmu_event_filter (pmc );
446448}
447449
448- static void reprogram_counter (struct kvm_pmc * pmc )
450+ static int reprogram_counter (struct kvm_pmc * pmc )
449451{
450452 struct kvm_pmu * pmu = pmc_to_pmu (pmc );
451453 u64 eventsel = pmc -> eventsel ;
@@ -456,7 +458,7 @@ static void reprogram_counter(struct kvm_pmc *pmc)
456458 emulate_overflow = pmc_pause_counter (pmc );
457459
458460 if (!pmc_event_is_allowed (pmc ))
459- goto reprogram_complete ;
461+ return 0 ;
460462
461463 if (emulate_overflow )
462464 __kvm_perf_overflow (pmc , false);
@@ -466,7 +468,7 @@ static void reprogram_counter(struct kvm_pmc *pmc)
466468
467469 if (pmc_is_fixed (pmc )) {
468470 fixed_ctr_ctrl = fixed_ctrl_field (pmu -> fixed_ctr_ctrl ,
469- pmc -> idx - INTEL_PMC_IDX_FIXED );
471+ pmc -> idx - KVM_FIXED_PMC_BASE_IDX );
470472 if (fixed_ctr_ctrl & 0x1 )
471473 eventsel |= ARCH_PERFMON_EVENTSEL_OS ;
472474 if (fixed_ctr_ctrl & 0x2 )
@@ -477,43 +479,45 @@ static void reprogram_counter(struct kvm_pmc *pmc)
477479 }
478480
479481 if (pmc -> current_config == new_config && pmc_resume_counter (pmc ))
480- goto reprogram_complete ;
482+ return 0 ;
481483
482484 pmc_release_perf_event (pmc );
483485
484486 pmc -> current_config = new_config ;
485487
486- /*
487- * If reprogramming fails, e.g. due to contention, leave the counter's
488- * regprogram bit set, i.e. opportunistically try again on the next PMU
489- * refresh. Don't make a new request as doing so can stall the guest
490- * if reprogramming repeatedly fails.
491- */
492- if (pmc_reprogram_counter (pmc , PERF_TYPE_RAW ,
493- (eventsel & pmu -> raw_event_mask ),
494- !(eventsel & ARCH_PERFMON_EVENTSEL_USR ),
495- !(eventsel & ARCH_PERFMON_EVENTSEL_OS ),
496- eventsel & ARCH_PERFMON_EVENTSEL_INT ))
497- return ;
498-
499- reprogram_complete :
500- clear_bit (pmc -> idx , (unsigned long * )& pmc_to_pmu (pmc )-> reprogram_pmi );
488+ return pmc_reprogram_counter (pmc , PERF_TYPE_RAW ,
489+ (eventsel & pmu -> raw_event_mask ),
490+ !(eventsel & ARCH_PERFMON_EVENTSEL_USR ),
491+ !(eventsel & ARCH_PERFMON_EVENTSEL_OS ),
492+ eventsel & ARCH_PERFMON_EVENTSEL_INT );
501493}
502494
503495void kvm_pmu_handle_event (struct kvm_vcpu * vcpu )
504496{
497+ DECLARE_BITMAP (bitmap , X86_PMC_IDX_MAX );
505498 struct kvm_pmu * pmu = vcpu_to_pmu (vcpu );
499+ struct kvm_pmc * pmc ;
506500 int bit ;
507501
508- for_each_set_bit (bit , pmu -> reprogram_pmi , X86_PMC_IDX_MAX ) {
509- struct kvm_pmc * pmc = static_call (kvm_x86_pmu_pmc_idx_to_pmc )(pmu , bit );
502+ bitmap_copy (bitmap , pmu -> reprogram_pmi , X86_PMC_IDX_MAX );
510503
511- if (unlikely (!pmc )) {
512- clear_bit (bit , pmu -> reprogram_pmi );
513- continue ;
514- }
504+ /*
505+ * The reprogramming bitmap can be written asynchronously by something
506+ * other than the task that holds vcpu->mutex, take care to clear only
507+ * the bits that will actually processed.
508+ */
509+ BUILD_BUG_ON (sizeof (bitmap ) != sizeof (atomic64_t ));
510+ atomic64_andnot (* (s64 * )bitmap , & pmu -> __reprogram_pmi );
515511
516- reprogram_counter (pmc );
512+ kvm_for_each_pmc (pmu , pmc , bit , bitmap ) {
513+ /*
514+ * If reprogramming fails, e.g. due to contention, re-set the
515+ * regprogram bit set, i.e. opportunistically try again on the
516+ * next PMU refresh. Don't make a new request as doing so can
517+ * stall the guest if reprogramming repeatedly fails.
518+ */
519+ if (reprogram_counter (pmc ))
520+ set_bit (pmc -> idx , pmu -> reprogram_pmi );
517521 }
518522
519523 /*
@@ -525,10 +529,20 @@ void kvm_pmu_handle_event(struct kvm_vcpu *vcpu)
525529 kvm_pmu_cleanup (vcpu );
526530}
527531
528- /* check if idx is a valid index to access PMU */
529- bool kvm_pmu_is_valid_rdpmc_ecx (struct kvm_vcpu * vcpu , unsigned int idx )
532+ int kvm_pmu_check_rdpmc_early (struct kvm_vcpu * vcpu , unsigned int idx )
530533{
531- return static_call (kvm_x86_pmu_is_valid_rdpmc_ecx )(vcpu , idx );
534+ /*
535+ * On Intel, VMX interception has priority over RDPMC exceptions that
536+ * aren't already handled by the emulator, i.e. there are no additional
537+ * check needed for Intel PMUs.
538+ *
539+ * On AMD, _all_ exceptions on RDPMC have priority over SVM intercepts,
540+ * i.e. an invalid PMC results in a #GP, not #VMEXIT.
541+ */
542+ if (!kvm_pmu_ops .check_rdpmc_early )
543+ return 0 ;
544+
545+ return static_call (kvm_x86_pmu_check_rdpmc_early )(vcpu , idx );
532546}
533547
534548bool is_vmware_backdoor_pmc (u32 pmc_idx )
@@ -567,10 +581,9 @@ static int kvm_pmu_rdpmc_vmware(struct kvm_vcpu *vcpu, unsigned idx, u64 *data)
567581
568582int kvm_pmu_rdpmc (struct kvm_vcpu * vcpu , unsigned idx , u64 * data )
569583{
570- bool fast_mode = idx & (1u << 31 );
571584 struct kvm_pmu * pmu = vcpu_to_pmu (vcpu );
572585 struct kvm_pmc * pmc ;
573- u64 mask = fast_mode ? ~ 0u : ~0ull ;
586+ u64 mask = ~0ull ;
574587
575588 if (!pmu -> version )
576589 return 1 ;
@@ -716,11 +729,7 @@ static void kvm_pmu_reset(struct kvm_vcpu *vcpu)
716729
717730 bitmap_zero (pmu -> reprogram_pmi , X86_PMC_IDX_MAX );
718731
719- for_each_set_bit (i , pmu -> all_valid_pmc_idx , X86_PMC_IDX_MAX ) {
720- pmc = static_call (kvm_x86_pmu_pmc_idx_to_pmc )(pmu , i );
721- if (!pmc )
722- continue ;
723-
732+ kvm_for_each_pmc (pmu , pmc , i , pmu -> all_valid_pmc_idx ) {
724733 pmc_stop_counter (pmc );
725734 pmc -> counter = 0 ;
726735 pmc -> emulated_counter = 0 ;
@@ -741,6 +750,8 @@ static void kvm_pmu_reset(struct kvm_vcpu *vcpu)
741750 */
742751void kvm_pmu_refresh (struct kvm_vcpu * vcpu )
743752{
753+ struct kvm_pmu * pmu = vcpu_to_pmu (vcpu );
754+
744755 if (KVM_BUG_ON (kvm_vcpu_has_run (vcpu ), vcpu -> kvm ))
745756 return ;
746757
@@ -750,8 +761,22 @@ void kvm_pmu_refresh(struct kvm_vcpu *vcpu)
750761 */
751762 kvm_pmu_reset (vcpu );
752763
753- bitmap_zero (vcpu_to_pmu (vcpu )-> all_valid_pmc_idx , X86_PMC_IDX_MAX );
754- static_call (kvm_x86_pmu_refresh )(vcpu );
764+ pmu -> version = 0 ;
765+ pmu -> nr_arch_gp_counters = 0 ;
766+ pmu -> nr_arch_fixed_counters = 0 ;
767+ pmu -> counter_bitmask [KVM_PMC_GP ] = 0 ;
768+ pmu -> counter_bitmask [KVM_PMC_FIXED ] = 0 ;
769+ pmu -> reserved_bits = 0xffffffff00200000ull ;
770+ pmu -> raw_event_mask = X86_RAW_EVENT_MASK ;
771+ pmu -> global_ctrl_mask = ~0ull ;
772+ pmu -> global_status_mask = ~0ull ;
773+ pmu -> fixed_ctr_ctrl_mask = ~0ull ;
774+ pmu -> pebs_enable_mask = ~0ull ;
775+ pmu -> pebs_data_cfg_mask = ~0ull ;
776+ bitmap_zero (pmu -> all_valid_pmc_idx , X86_PMC_IDX_MAX );
777+
778+ if (vcpu -> kvm -> arch .enable_pmu )
779+ static_call (kvm_x86_pmu_refresh )(vcpu );
755780}
756781
757782void kvm_pmu_init (struct kvm_vcpu * vcpu )
@@ -776,10 +801,8 @@ void kvm_pmu_cleanup(struct kvm_vcpu *vcpu)
776801 bitmap_andnot (bitmask , pmu -> all_valid_pmc_idx ,
777802 pmu -> pmc_in_use , X86_PMC_IDX_MAX );
778803
779- for_each_set_bit (i , bitmask , X86_PMC_IDX_MAX ) {
780- pmc = static_call (kvm_x86_pmu_pmc_idx_to_pmc )(pmu , i );
781-
782- if (pmc && pmc -> perf_event && !pmc_speculative_in_use (pmc ))
804+ kvm_for_each_pmc (pmu , pmc , i , bitmask ) {
805+ if (pmc -> perf_event && !pmc_speculative_in_use (pmc ))
783806 pmc_stop_counter (pmc );
784807 }
785808
@@ -799,13 +822,6 @@ static void kvm_pmu_incr_counter(struct kvm_pmc *pmc)
799822 kvm_pmu_request_counter_reprogram (pmc );
800823}
801824
802- static inline bool eventsel_match_perf_hw_id (struct kvm_pmc * pmc ,
803- unsigned int perf_hw_id )
804- {
805- return !((pmc -> eventsel ^ perf_get_hw_event_config (perf_hw_id )) &
806- AMD64_RAW_EVENT_MASK_NB );
807- }
808-
809825static inline bool cpl_is_matched (struct kvm_pmc * pmc )
810826{
811827 bool select_os , select_user ;
@@ -817,29 +833,56 @@ static inline bool cpl_is_matched(struct kvm_pmc *pmc)
817833 select_user = config & ARCH_PERFMON_EVENTSEL_USR ;
818834 } else {
819835 config = fixed_ctrl_field (pmc_to_pmu (pmc )-> fixed_ctr_ctrl ,
820- pmc -> idx - INTEL_PMC_IDX_FIXED );
836+ pmc -> idx - KVM_FIXED_PMC_BASE_IDX );
821837 select_os = config & 0x1 ;
822838 select_user = config & 0x2 ;
823839 }
824840
841+ /*
842+ * Skip the CPL lookup, which isn't free on Intel, if the result will
843+ * be the same regardless of the CPL.
844+ */
845+ if (select_os == select_user )
846+ return select_os ;
847+
825848 return (static_call (kvm_x86_get_cpl )(pmc -> vcpu ) == 0 ) ? select_os : select_user ;
826849}
827850
828- void kvm_pmu_trigger_event (struct kvm_vcpu * vcpu , u64 perf_hw_id )
851+ void kvm_pmu_trigger_event (struct kvm_vcpu * vcpu , u64 eventsel )
829852{
853+ DECLARE_BITMAP (bitmap , X86_PMC_IDX_MAX );
830854 struct kvm_pmu * pmu = vcpu_to_pmu (vcpu );
831855 struct kvm_pmc * pmc ;
832856 int i ;
833857
834- for_each_set_bit (i , pmu -> all_valid_pmc_idx , X86_PMC_IDX_MAX ) {
835- pmc = static_call (kvm_x86_pmu_pmc_idx_to_pmc )(pmu , i );
858+ BUILD_BUG_ON (sizeof (pmu -> global_ctrl ) * BITS_PER_BYTE != X86_PMC_IDX_MAX );
836859
837- if (!pmc || !pmc_event_is_allowed (pmc ))
860+ if (!kvm_pmu_has_perf_global_ctrl (pmu ))
861+ bitmap_copy (bitmap , pmu -> all_valid_pmc_idx , X86_PMC_IDX_MAX );
862+ else if (!bitmap_and (bitmap , pmu -> all_valid_pmc_idx ,
863+ (unsigned long * )& pmu -> global_ctrl , X86_PMC_IDX_MAX ))
864+ return ;
865+
866+ kvm_for_each_pmc (pmu , pmc , i , bitmap ) {
867+ /*
868+ * Ignore checks for edge detect (all events currently emulated
869+ * but KVM are always rising edges), pin control (unsupported
870+ * by modern CPUs), and counter mask and its invert flag (KVM
871+ * doesn't emulate multiple events in a single clock cycle).
872+ *
873+ * Note, the uppermost nibble of AMD's mask overlaps Intel's
874+ * IN_TX (bit 32) and IN_TXCP (bit 33), as well as two reserved
875+ * bits (bits 35:34). Checking the "in HLE/RTM transaction"
876+ * flags is correct as the vCPU can't be in a transaction if
877+ * KVM is emulating an instruction. Checking the reserved bits
878+ * might be wrong if they are defined in the future, but so
879+ * could ignoring them, so do the simple thing for now.
880+ */
881+ if (((pmc -> eventsel ^ eventsel ) & AMD64_RAW_EVENT_MASK_NB ) ||
882+ !pmc_event_is_allowed (pmc ) || !cpl_is_matched (pmc ))
838883 continue ;
839884
840- /* Ignore checks for edge detect, pin control, invert and CMASK bits */
841- if (eventsel_match_perf_hw_id (pmc , perf_hw_id ) && cpl_is_matched (pmc ))
842- kvm_pmu_incr_counter (pmc );
885+ kvm_pmu_incr_counter (pmc );
843886 }
844887}
845888EXPORT_SYMBOL_GPL (kvm_pmu_trigger_event );
0 commit comments