7
7
#include "pmu.h"
8
8
#include "processor.h"
9
9
10
- /* Number of LOOP instructions for the guest measurement payload. */
11
- #define NUM_BRANCHES 10
10
+ /* Number of iterations of the loop for the guest measurement payload. */
11
+ #define NUM_LOOPS 10
12
+
13
+ /* Each iteration of the loop retires one branch instruction. */
14
+ #define NUM_BRANCH_INSNS_RETIRED (NUM_LOOPS)
15
+
16
+ /*
17
+ * Number of instructions in each loop. 1 CLFLUSH/CLFLUSHOPT/NOP, 1 MFENCE,
18
+ * 1 LOOP.
19
+ */
20
+ #define NUM_INSNS_PER_LOOP 3
21
+
12
22
/*
13
23
* Number of "extra" instructions that will be counted, i.e. the number of
14
- * instructions that are needed to set up the loop and then disabled the
15
- * counter. 1 CLFLUSH/CLFLUSHOPT/NOP, 1 MFENCE, 2 MOV, 2 XOR, 1 WRMSR.
24
+ * instructions that are needed to set up the loop and then disable the
25
+ * counter. 2 MOV, 2 XOR, 1 WRMSR.
16
26
*/
17
- #define NUM_EXTRA_INSNS 7
18
- #define NUM_INSNS_RETIRED (NUM_BRANCHES + NUM_EXTRA_INSNS)
27
+ #define NUM_EXTRA_INSNS 5
28
+
29
+ /* Total number of instructions retired within the measured section. */
30
+ #define NUM_INSNS_RETIRED (NUM_LOOPS * NUM_INSNS_PER_LOOP + NUM_EXTRA_INSNS)
31
+
19
32
20
33
static uint8_t kvm_pmu_version ;
21
34
static bool kvm_has_perf_caps ;
@@ -100,7 +113,7 @@ static void guest_assert_event_count(uint8_t idx,
100
113
GUEST_ASSERT_EQ (count , NUM_INSNS_RETIRED );
101
114
break ;
102
115
case INTEL_ARCH_BRANCHES_RETIRED_INDEX :
103
- GUEST_ASSERT_EQ (count , NUM_BRANCHES );
116
+ GUEST_ASSERT_EQ (count , NUM_BRANCH_INSNS_RETIRED );
104
117
break ;
105
118
case INTEL_ARCH_LLC_REFERENCES_INDEX :
106
119
case INTEL_ARCH_LLC_MISSES_INDEX :
@@ -120,7 +133,7 @@ static void guest_assert_event_count(uint8_t idx,
120
133
}
121
134
122
135
sanity_checks :
123
- __asm__ __volatile__("loop ." : "+c" ((int ){NUM_BRANCHES }));
136
+ __asm__ __volatile__("loop ." : "+c" ((int ){NUM_LOOPS }));
124
137
GUEST_ASSERT_EQ (_rdpmc (pmc ), count );
125
138
126
139
wrmsr (pmc_msr , 0xdead );
@@ -134,8 +147,8 @@ static void guest_assert_event_count(uint8_t idx,
134
147
* before the end of the sequence.
135
148
*
136
149
* If CLFUSH{,OPT} is supported, flush the cacheline containing (at least) the
137
- * start of the loop to force LLC references and misses, i.e. to allow testing
138
- * that those events actually count.
150
+ * CLFUSH{,OPT} instruction on each loop iteration to force LLC references and
151
+ * misses, i.e. to allow testing that those events actually count.
139
152
*
140
153
* If forced emulation is enabled (and specified), force emulation on a subset
141
154
* of the measured code to verify that KVM correctly emulates instructions and
@@ -145,10 +158,11 @@ static void guest_assert_event_count(uint8_t idx,
145
158
#define GUEST_MEASURE_EVENT (_msr , _value , clflush , FEP ) \
146
159
do { \
147
160
__asm__ __volatile__("wrmsr\n\t" \
161
+ " mov $" __stringify(NUM_LOOPS) ", %%ecx\n\t" \
162
+ "1:\n\t" \
148
163
clflush "\n\t" \
149
164
"mfence\n\t" \
150
- "1: mov $" __stringify(NUM_BRANCHES) ", %%ecx\n\t" \
151
- FEP "loop .\n\t" \
165
+ FEP "loop 1b\n\t" \
152
166
FEP "mov %%edi, %%ecx\n\t" \
153
167
FEP "xor %%eax, %%eax\n\t" \
154
168
FEP "xor %%edx, %%edx\n\t" \
@@ -163,9 +177,9 @@ do { \
163
177
wrmsr(pmc_msr, 0); \
164
178
\
165
179
if (this_cpu_has(X86_FEATURE_CLFLUSHOPT)) \
166
- GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt 1f ", FEP); \
180
+ GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt . ", FEP); \
167
181
else if (this_cpu_has(X86_FEATURE_CLFLUSH)) \
168
- GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush 1f ", FEP); \
182
+ GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush . ", FEP); \
169
183
else \
170
184
GUEST_MEASURE_EVENT(_ctrl_msr, _value, "nop", FEP); \
171
185
\
@@ -500,7 +514,7 @@ static void guest_test_fixed_counters(void)
500
514
wrmsr (MSR_CORE_PERF_FIXED_CTR0 + i , 0 );
501
515
wrmsr (MSR_CORE_PERF_FIXED_CTR_CTRL , FIXED_PMC_CTRL (i , FIXED_PMC_KERNEL ));
502
516
wrmsr (MSR_CORE_PERF_GLOBAL_CTRL , FIXED_PMC_GLOBAL_CTRL_ENABLE (i ));
503
- __asm__ __volatile__("loop ." : "+c" ((int ){NUM_BRANCHES }));
517
+ __asm__ __volatile__("loop ." : "+c" ((int ){NUM_LOOPS }));
504
518
wrmsr (MSR_CORE_PERF_GLOBAL_CTRL , 0 );
505
519
val = rdmsr (MSR_CORE_PERF_FIXED_CTR0 + i );
506
520
0 commit comments