Skip to content

Commit 27461da

Browse files
Like Xubonzini
authored andcommitted
KVM: x86/pmu: Support full width counting
Intel CPUs have a new alternative MSR range (starting from MSR_IA32_PMC0) for GP counters that allows writing the full counter width. Enable this range from a new capability bit (IA32_PERF_CAPABILITIES.FW_WRITE[bit 13]). The guest would query CPUID to get the counter width, and sign extends the counter values as needed. The traditional MSRs always limit to 32bit, even though the counter internally is larger (48 or 57 bits). When the new capability is set, use the alternative range which do not have these restrictions. This lowers the overhead of perf stat slightly because it has to do less interrupts to accumulate the counter value. Signed-off-by: Like Xu <[email protected]> Message-Id: <[email protected]> Signed-off-by: Paolo Bonzini <[email protected]>
1 parent cbd7175 commit 27461da

File tree

6 files changed

+66
-5
lines changed

6 files changed

+66
-5
lines changed

arch/x86/include/asm/kvm_host.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -601,6 +601,7 @@ struct kvm_vcpu_arch {
601601
u64 ia32_xss;
602602
u64 microcode_version;
603603
u64 arch_capabilities;
604+
u64 perf_capabilities;
604605

605606
/*
606607
* Paging state of the vcpu

arch/x86/kvm/cpuid.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -296,7 +296,7 @@ void kvm_set_cpu_caps(void)
296296
F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ |
297297
0 /* DS-CPL, VMX, SMX, EST */ |
298298
0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ |
299-
F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ |
299+
F(FMA) | F(CX16) | 0 /* xTPR Update */ | F(PDCM) |
300300
F(PCID) | 0 /* Reserved, DCA */ | F(XMM4_1) |
301301
F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) |
302302
0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) |

arch/x86/kvm/vmx/capabilities.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ extern int __read_mostly pt_mode;
1818
#define PT_MODE_SYSTEM 0
1919
#define PT_MODE_HOST_GUEST 1
2020

21+
#define PMU_CAP_FW_WRITES (1ULL << 13)
22+
2123
struct nested_vmx_msrs {
2224
/*
2325
* We only store the "true" versions of the VMX capability MSRs. We
@@ -367,4 +369,13 @@ static inline bool vmx_pt_mode_is_host_guest(void)
367369
return pt_mode == PT_MODE_HOST_GUEST;
368370
}
369371

372+
static inline u64 vmx_get_perf_capabilities(void)
373+
{
374+
/*
375+
* Since counters are virtualized, KVM would support full
376+
* width counting unconditionally, even if the host lacks it.
377+
*/
378+
return PMU_CAP_FW_WRITES;
379+
}
380+
370381
#endif /* __KVM_X86_VMX_CAPS_H */

arch/x86/kvm/vmx/pmu_intel.c

Lines changed: 48 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
#include "nested.h"
1919
#include "pmu.h"
2020

21+
#define MSR_PMC_FULL_WIDTH_BIT (MSR_IA32_PMC0 - MSR_IA32_PERFCTR0)
22+
2123
static struct kvm_event_hw_type_mapping intel_arch_events[] = {
2224
/* Index must match CPUID 0x0A.EBX bit vector */
2325
[0] = { 0x3c, 0x00, PERF_COUNT_HW_CPU_CYCLES },
@@ -150,6 +152,22 @@ static struct kvm_pmc *intel_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu,
150152
return &counters[array_index_nospec(idx, num_counters)];
151153
}
152154

155+
static inline bool fw_writes_is_enabled(struct kvm_vcpu *vcpu)
156+
{
157+
if (!guest_cpuid_has(vcpu, X86_FEATURE_PDCM))
158+
return false;
159+
160+
return vcpu->arch.perf_capabilities & PMU_CAP_FW_WRITES;
161+
}
162+
163+
static inline struct kvm_pmc *get_fw_gp_pmc(struct kvm_pmu *pmu, u32 msr)
164+
{
165+
if (!fw_writes_is_enabled(pmu_to_vcpu(pmu)))
166+
return NULL;
167+
168+
return get_gp_pmc(pmu, msr, MSR_IA32_PMC0);
169+
}
170+
153171
static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
154172
{
155173
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
@@ -162,10 +180,13 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
162180
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
163181
ret = pmu->version > 1;
164182
break;
183+
case MSR_IA32_PERF_CAPABILITIES:
184+
ret = guest_cpuid_has(vcpu, X86_FEATURE_PDCM);
185+
break;
165186
default:
166187
ret = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0) ||
167188
get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0) ||
168-
get_fixed_pmc(pmu, msr);
189+
get_fixed_pmc(pmu, msr) || get_fw_gp_pmc(pmu, msr);
169190
break;
170191
}
171192

@@ -203,8 +224,15 @@ static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
203224
case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
204225
msr_info->data = pmu->global_ovf_ctrl;
205226
return 0;
227+
case MSR_IA32_PERF_CAPABILITIES:
228+
if (!msr_info->host_initiated &&
229+
!guest_cpuid_has(vcpu, X86_FEATURE_PDCM))
230+
return 1;
231+
msr_info->data = vcpu->arch.perf_capabilities;
232+
return 0;
206233
default:
207-
if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0))) {
234+
if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
235+
(pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
208236
u64 val = pmc_read_counter(pmc);
209237
msr_info->data =
210238
val & pmu->counter_bitmask[KVM_PMC_GP];
@@ -261,9 +289,22 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
261289
return 0;
262290
}
263291
break;
292+
case MSR_IA32_PERF_CAPABILITIES:
293+
if (!msr_info->host_initiated)
294+
return 1;
295+
if (guest_cpuid_has(vcpu, X86_FEATURE_PDCM) ?
296+
(data & ~vmx_get_perf_capabilities()) : data)
297+
return 1;
298+
vcpu->arch.perf_capabilities = data;
299+
return 0;
264300
default:
265-
if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0))) {
266-
if (!msr_info->host_initiated)
301+
if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) ||
302+
(pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) {
303+
if ((msr & MSR_PMC_FULL_WIDTH_BIT) &&
304+
(data & ~pmu->counter_bitmask[KVM_PMC_GP]))
305+
return 1;
306+
if (!msr_info->host_initiated &&
307+
!(msr & MSR_PMC_FULL_WIDTH_BIT))
267308
data = (s64)(s32)data;
268309
pmc->counter += data - pmc_read_counter(pmc);
269310
if (pmc->perf_event)
@@ -303,6 +344,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
303344
pmu->counter_bitmask[KVM_PMC_FIXED] = 0;
304345
pmu->version = 0;
305346
pmu->reserved_bits = 0xffffffff00200000ull;
347+
vcpu->arch.perf_capabilities = 0;
306348

307349
entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
308350
if (!entry)
@@ -315,6 +357,8 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
315357
return;
316358

317359
perf_get_x86_pmu_capability(&x86_pmu);
360+
if (guest_cpuid_has(vcpu, X86_FEATURE_PDCM))
361+
vcpu->arch.perf_capabilities = vmx_get_perf_capabilities();
318362

319363
pmu->nr_arch_gp_counters = min_t(int, eax.split.num_counters,
320364
x86_pmu.num_counters_gp);

arch/x86/kvm/vmx/vmx.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1788,6 +1788,9 @@ static int vmx_get_msr_feature(struct kvm_msr_entry *msr)
17881788
if (!nested)
17891789
return 1;
17901790
return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data);
1791+
case MSR_IA32_PERF_CAPABILITIES:
1792+
msr->data = vmx_get_perf_capabilities();
1793+
return 0;
17911794
default:
17921795
return 1;
17931796
}

arch/x86/kvm/x86.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1253,6 +1253,7 @@ static const u32 emulated_msrs_all[] = {
12531253
MSR_IA32_TSC_ADJUST,
12541254
MSR_IA32_TSCDEADLINE,
12551255
MSR_IA32_ARCH_CAPABILITIES,
1256+
MSR_IA32_PERF_CAPABILITIES,
12561257
MSR_IA32_MISC_ENABLE,
12571258
MSR_IA32_MCG_STATUS,
12581259
MSR_IA32_MCG_CTL,
@@ -1319,6 +1320,7 @@ static const u32 msr_based_features_all[] = {
13191320
MSR_F10H_DECFG,
13201321
MSR_IA32_UCODE_REV,
13211322
MSR_IA32_ARCH_CAPABILITIES,
1323+
MSR_IA32_PERF_CAPABILITIES,
13221324
};
13231325

13241326
static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all)];

0 commit comments

Comments
 (0)