|
| 1 | +// SPDX-License-Identifier: GPL-2.0 |
| 2 | +/* |
| 3 | + * Copyright (C) 2023, Tencent, Inc. |
| 4 | + */ |
| 5 | + |
| 6 | +#define _GNU_SOURCE /* for program_invocation_short_name */ |
| 7 | +#include <x86intrin.h> |
| 8 | + |
| 9 | +#include "pmu.h" |
| 10 | +#include "processor.h" |
| 11 | + |
| 12 | +/* Number of LOOP instructions for the guest measurement payload. */ |
| 13 | +#define NUM_BRANCHES 10 |
| 14 | +/* |
| 15 | + * Number of "extra" instructions that will be counted, i.e. the number of |
| 16 | + * instructions that are needed to set up the loop and then disabled the |
| 17 | + * counter. 2 MOV, 2 XOR, 1 WRMSR. |
| 18 | + */ |
| 19 | +#define NUM_EXTRA_INSNS 5 |
| 20 | +#define NUM_INSNS_RETIRED (NUM_BRANCHES + NUM_EXTRA_INSNS) |
| 21 | + |
| 22 | +static uint8_t kvm_pmu_version; |
| 23 | +static bool kvm_has_perf_caps; |
| 24 | + |
| 25 | +static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu, |
| 26 | + void *guest_code, |
| 27 | + uint8_t pmu_version, |
| 28 | + uint64_t perf_capabilities) |
| 29 | +{ |
| 30 | + struct kvm_vm *vm; |
| 31 | + |
| 32 | + vm = vm_create_with_one_vcpu(vcpu, guest_code); |
| 33 | + vm_init_descriptor_tables(vm); |
| 34 | + vcpu_init_descriptor_tables(*vcpu); |
| 35 | + |
| 36 | + sync_global_to_guest(vm, kvm_pmu_version); |
| 37 | + |
| 38 | + /* |
| 39 | + * Set PERF_CAPABILITIES before PMU version as KVM disallows enabling |
| 40 | + * features via PERF_CAPABILITIES if the guest doesn't have a vPMU. |
| 41 | + */ |
| 42 | + if (kvm_has_perf_caps) |
| 43 | + vcpu_set_msr(*vcpu, MSR_IA32_PERF_CAPABILITIES, perf_capabilities); |
| 44 | + |
| 45 | + vcpu_set_cpuid_property(*vcpu, X86_PROPERTY_PMU_VERSION, pmu_version); |
| 46 | + return vm; |
| 47 | +} |
| 48 | + |
| 49 | +static void run_vcpu(struct kvm_vcpu *vcpu) |
| 50 | +{ |
| 51 | + struct ucall uc; |
| 52 | + |
| 53 | + do { |
| 54 | + vcpu_run(vcpu); |
| 55 | + switch (get_ucall(vcpu, &uc)) { |
| 56 | + case UCALL_SYNC: |
| 57 | + break; |
| 58 | + case UCALL_ABORT: |
| 59 | + REPORT_GUEST_ASSERT(uc); |
| 60 | + break; |
| 61 | + case UCALL_PRINTF: |
| 62 | + pr_info("%s", uc.buffer); |
| 63 | + break; |
| 64 | + case UCALL_DONE: |
| 65 | + break; |
| 66 | + default: |
| 67 | + TEST_FAIL("Unexpected ucall: %lu", uc.cmd); |
| 68 | + } |
| 69 | + } while (uc.cmd != UCALL_DONE); |
| 70 | +} |
| 71 | + |
| 72 | +static uint8_t guest_get_pmu_version(void) |
| 73 | +{ |
| 74 | + /* |
| 75 | + * Return the effective PMU version, i.e. the minimum between what KVM |
| 76 | + * supports and what is enumerated to the guest. The host deliberately |
| 77 | + * advertises a PMU version to the guest beyond what is actually |
| 78 | + * supported by KVM to verify KVM doesn't freak out and do something |
| 79 | + * bizarre with an architecturally valid, but unsupported, version. |
| 80 | + */ |
| 81 | + return min_t(uint8_t, kvm_pmu_version, this_cpu_property(X86_PROPERTY_PMU_VERSION)); |
| 82 | +} |
| 83 | + |
| 84 | +/* |
| 85 | + * If an architectural event is supported and guaranteed to generate at least |
| 86 | + * one "hit, assert that its count is non-zero. If an event isn't supported or |
| 87 | + * the test can't guarantee the associated action will occur, then all bets are |
| 88 | + * off regarding the count, i.e. no checks can be done. |
| 89 | + * |
| 90 | + * Sanity check that in all cases, the event doesn't count when it's disabled, |
| 91 | + * and that KVM correctly emulates the write of an arbitrary value. |
| 92 | + */ |
| 93 | +static void guest_assert_event_count(uint8_t idx, |
| 94 | + struct kvm_x86_pmu_feature event, |
| 95 | + uint32_t pmc, uint32_t pmc_msr) |
| 96 | +{ |
| 97 | + uint64_t count; |
| 98 | + |
| 99 | + count = _rdpmc(pmc); |
| 100 | + if (!this_pmu_has(event)) |
| 101 | + goto sanity_checks; |
| 102 | + |
| 103 | + switch (idx) { |
| 104 | + case INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX: |
| 105 | + GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED); |
| 106 | + break; |
| 107 | + case INTEL_ARCH_BRANCHES_RETIRED_INDEX: |
| 108 | + GUEST_ASSERT_EQ(count, NUM_BRANCHES); |
| 109 | + break; |
| 110 | + case INTEL_ARCH_CPU_CYCLES_INDEX: |
| 111 | + case INTEL_ARCH_REFERENCE_CYCLES_INDEX: |
| 112 | + GUEST_ASSERT_NE(count, 0); |
| 113 | + break; |
| 114 | + default: |
| 115 | + break; |
| 116 | + } |
| 117 | + |
| 118 | +sanity_checks: |
| 119 | + __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES})); |
| 120 | + GUEST_ASSERT_EQ(_rdpmc(pmc), count); |
| 121 | + |
| 122 | + wrmsr(pmc_msr, 0xdead); |
| 123 | + GUEST_ASSERT_EQ(_rdpmc(pmc), 0xdead); |
| 124 | +} |
| 125 | + |
| 126 | +static void __guest_test_arch_event(uint8_t idx, struct kvm_x86_pmu_feature event, |
| 127 | + uint32_t pmc, uint32_t pmc_msr, |
| 128 | + uint32_t ctrl_msr, uint64_t ctrl_msr_value) |
| 129 | +{ |
| 130 | + wrmsr(pmc_msr, 0); |
| 131 | + |
| 132 | + /* |
| 133 | + * Enable and disable the PMC in a monolithic asm blob to ensure that |
| 134 | + * the compiler can't insert _any_ code into the measured sequence. |
| 135 | + * Note, ECX doesn't need to be clobbered as the input value, @pmc_msr, |
| 136 | + * is restored before the end of the sequence. |
| 137 | + */ |
| 138 | + __asm__ __volatile__("wrmsr\n\t" |
| 139 | + "mov $" __stringify(NUM_BRANCHES) ", %%ecx\n\t" |
| 140 | + "loop .\n\t" |
| 141 | + "mov %%edi, %%ecx\n\t" |
| 142 | + "xor %%eax, %%eax\n\t" |
| 143 | + "xor %%edx, %%edx\n\t" |
| 144 | + "wrmsr\n\t" |
| 145 | + :: "a"((uint32_t)ctrl_msr_value), |
| 146 | + "d"(ctrl_msr_value >> 32), |
| 147 | + "c"(ctrl_msr), "D"(ctrl_msr) |
| 148 | + ); |
| 149 | + |
| 150 | + guest_assert_event_count(idx, event, pmc, pmc_msr); |
| 151 | +} |
| 152 | + |
| 153 | +static void guest_test_arch_event(uint8_t idx) |
| 154 | +{ |
| 155 | + const struct { |
| 156 | + struct kvm_x86_pmu_feature gp_event; |
| 157 | + } intel_event_to_feature[] = { |
| 158 | + [INTEL_ARCH_CPU_CYCLES_INDEX] = { X86_PMU_FEATURE_CPU_CYCLES }, |
| 159 | + [INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX] = { X86_PMU_FEATURE_INSNS_RETIRED }, |
| 160 | + [INTEL_ARCH_REFERENCE_CYCLES_INDEX] = { X86_PMU_FEATURE_REFERENCE_CYCLES }, |
| 161 | + [INTEL_ARCH_LLC_REFERENCES_INDEX] = { X86_PMU_FEATURE_LLC_REFERENCES }, |
| 162 | + [INTEL_ARCH_LLC_MISSES_INDEX] = { X86_PMU_FEATURE_LLC_MISSES }, |
| 163 | + [INTEL_ARCH_BRANCHES_RETIRED_INDEX] = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED }, |
| 164 | + [INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED }, |
| 165 | + [INTEL_ARCH_TOPDOWN_SLOTS_INDEX] = { X86_PMU_FEATURE_TOPDOWN_SLOTS }, |
| 166 | + }; |
| 167 | + |
| 168 | + uint32_t nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS); |
| 169 | + uint32_t pmu_version = guest_get_pmu_version(); |
| 170 | + /* PERF_GLOBAL_CTRL exists only for Architectural PMU Version 2+. */ |
| 171 | + bool guest_has_perf_global_ctrl = pmu_version >= 2; |
| 172 | + struct kvm_x86_pmu_feature gp_event; |
| 173 | + uint32_t base_pmc_msr; |
| 174 | + unsigned int i; |
| 175 | + |
| 176 | + /* The host side shouldn't invoke this without a guest PMU. */ |
| 177 | + GUEST_ASSERT(pmu_version); |
| 178 | + |
| 179 | + if (this_cpu_has(X86_FEATURE_PDCM) && |
| 180 | + rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES) |
| 181 | + base_pmc_msr = MSR_IA32_PMC0; |
| 182 | + else |
| 183 | + base_pmc_msr = MSR_IA32_PERFCTR0; |
| 184 | + |
| 185 | + gp_event = intel_event_to_feature[idx].gp_event; |
| 186 | + GUEST_ASSERT_EQ(idx, gp_event.f.bit); |
| 187 | + |
| 188 | + GUEST_ASSERT(nr_gp_counters); |
| 189 | + |
| 190 | + for (i = 0; i < nr_gp_counters; i++) { |
| 191 | + uint64_t eventsel = ARCH_PERFMON_EVENTSEL_OS | |
| 192 | + ARCH_PERFMON_EVENTSEL_ENABLE | |
| 193 | + intel_pmu_arch_events[idx]; |
| 194 | + |
| 195 | + wrmsr(MSR_P6_EVNTSEL0 + i, 0); |
| 196 | + if (guest_has_perf_global_ctrl) |
| 197 | + wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, BIT_ULL(i)); |
| 198 | + |
| 199 | + __guest_test_arch_event(idx, gp_event, i, base_pmc_msr + i, |
| 200 | + MSR_P6_EVNTSEL0 + i, eventsel); |
| 201 | + } |
| 202 | +} |
| 203 | + |
| 204 | +static void guest_test_arch_events(void) |
| 205 | +{ |
| 206 | + uint8_t i; |
| 207 | + |
| 208 | + for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++) |
| 209 | + guest_test_arch_event(i); |
| 210 | + |
| 211 | + GUEST_DONE(); |
| 212 | +} |
| 213 | + |
| 214 | +static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities, |
| 215 | + uint8_t length, uint8_t unavailable_mask) |
| 216 | +{ |
| 217 | + struct kvm_vcpu *vcpu; |
| 218 | + struct kvm_vm *vm; |
| 219 | + |
| 220 | + /* Testing arch events requires a vPMU (there are no negative tests). */ |
| 221 | + if (!pmu_version) |
| 222 | + return; |
| 223 | + |
| 224 | + vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_arch_events, |
| 225 | + pmu_version, perf_capabilities); |
| 226 | + |
| 227 | + vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH, |
| 228 | + length); |
| 229 | + vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EVENTS_MASK, |
| 230 | + unavailable_mask); |
| 231 | + |
| 232 | + run_vcpu(vcpu); |
| 233 | + |
| 234 | + kvm_vm_free(vm); |
| 235 | +} |
| 236 | + |
| 237 | +static void test_intel_counters(void) |
| 238 | +{ |
| 239 | + uint8_t nr_arch_events = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH); |
| 240 | + uint8_t pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION); |
| 241 | + unsigned int i; |
| 242 | + uint8_t v, j; |
| 243 | + uint32_t k; |
| 244 | + |
| 245 | + const uint64_t perf_caps[] = { |
| 246 | + 0, |
| 247 | + PMU_CAP_FW_WRITES, |
| 248 | + }; |
| 249 | + |
| 250 | + /* |
| 251 | + * Test up to PMU v5, which is the current maximum version defined by |
| 252 | + * Intel, i.e. is the last version that is guaranteed to be backwards |
| 253 | + * compatible with KVM's existing behavior. |
| 254 | + */ |
| 255 | + uint8_t max_pmu_version = max_t(typeof(pmu_version), pmu_version, 5); |
| 256 | + |
| 257 | + /* |
| 258 | + * Detect the existence of events that aren't supported by selftests. |
| 259 | + * This will (obviously) fail any time the kernel adds support for a |
| 260 | + * new event, but it's worth paying that price to keep the test fresh. |
| 261 | + */ |
| 262 | + TEST_ASSERT(nr_arch_events <= NR_INTEL_ARCH_EVENTS, |
| 263 | + "New architectural event(s) detected; please update this test (length = %u, mask = %x)", |
| 264 | + nr_arch_events, kvm_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK)); |
| 265 | + |
| 266 | + /* |
| 267 | + * Force iterating over known arch events regardless of whether or not |
| 268 | + * KVM/hardware supports a given event. |
| 269 | + */ |
| 270 | + nr_arch_events = max_t(typeof(nr_arch_events), nr_arch_events, NR_INTEL_ARCH_EVENTS); |
| 271 | + |
| 272 | + for (v = 0; v <= max_pmu_version; v++) { |
| 273 | + for (i = 0; i < ARRAY_SIZE(perf_caps); i++) { |
| 274 | + if (!kvm_has_perf_caps && perf_caps[i]) |
| 275 | + continue; |
| 276 | + |
| 277 | + pr_info("Testing arch events, PMU version %u, perf_caps = %lx\n", |
| 278 | + v, perf_caps[i]); |
| 279 | + /* |
| 280 | + * To keep the total runtime reasonable, test every |
| 281 | + * possible non-zero, non-reserved bitmap combination |
| 282 | + * only with the native PMU version and the full bit |
| 283 | + * vector length. |
| 284 | + */ |
| 285 | + if (v == pmu_version) { |
| 286 | + for (k = 1; k < (BIT(nr_arch_events) - 1); k++) |
| 287 | + test_arch_events(v, perf_caps[i], nr_arch_events, k); |
| 288 | + } |
| 289 | + /* |
| 290 | + * Test single bits for all PMU version and lengths up |
| 291 | + * the number of events +1 (to verify KVM doesn't do |
| 292 | + * weird things if the guest length is greater than the |
| 293 | + * host length). Explicitly test a mask of '0' and all |
| 294 | + * ones i.e. all events being available and unavailable. |
| 295 | + */ |
| 296 | + for (j = 0; j <= nr_arch_events + 1; j++) { |
| 297 | + test_arch_events(v, perf_caps[i], j, 0); |
| 298 | + test_arch_events(v, perf_caps[i], j, 0xff); |
| 299 | + |
| 300 | + for (k = 0; k < nr_arch_events; k++) |
| 301 | + test_arch_events(v, perf_caps[i], j, BIT(k)); |
| 302 | + } |
| 303 | + } |
| 304 | + } |
| 305 | +} |
| 306 | + |
| 307 | +int main(int argc, char *argv[]) |
| 308 | +{ |
| 309 | + TEST_REQUIRE(get_kvm_param_bool("enable_pmu")); |
| 310 | + |
| 311 | + TEST_REQUIRE(host_cpu_is_intel); |
| 312 | + TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION)); |
| 313 | + TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0); |
| 314 | + |
| 315 | + kvm_pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION); |
| 316 | + kvm_has_perf_caps = kvm_cpu_has(X86_FEATURE_PDCM); |
| 317 | + |
| 318 | + test_intel_counters(); |
| 319 | + |
| 320 | + return 0; |
| 321 | +} |
0 commit comments