Skip to content

Commit 4015350

Browse files
committed
Documentation/srso: Document a method for checking safe RET operates properly
Add a method to quickly verify whether safe RET operates properly on a given system using perf tool. Also, add a selftest which does the same thing. Signed-off-by: Borislav Petkov (AMD) <[email protected]> Link: https://lore.kernel.org/r/[email protected]
1 parent 225f2bd commit 4015350

File tree

3 files changed

+140
-1
lines changed

3 files changed

+140
-1
lines changed

Documentation/admin-guide/hw-vuln/srso.rst

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,3 +158,72 @@ poisoned BTB entry and using that safe one for all function returns.
158158
In older Zen1 and Zen2, this is accomplished using a reinterpretation
159159
technique similar to Retbleed one: srso_untrain_ret() and
160160
srso_safe_ret().
161+
162+
Checking the safe RET mitigation actually works
163+
-----------------------------------------------
164+
165+
In case one wants to validate whether the SRSO safe RET mitigation works
166+
on a kernel, one could use two performance counters
167+
168+
* PMC_0xc8 - Count of RET/RET lw retired
169+
* PMC_0xc9 - Count of RET/RET lw retired mispredicted
170+
171+
and compare the number of RETs retired properly vs those retired
172+
mispredicted, in kernel mode. Another way of specifying those events
173+
is::
174+
175+
# perf list ex_ret_near_ret
176+
177+
List of pre-defined events (to be used in -e or -M):
178+
179+
core:
180+
ex_ret_near_ret
181+
[Retired Near Returns]
182+
ex_ret_near_ret_mispred
183+
[Retired Near Returns Mispredicted]
184+
185+
Either the command using the event mnemonics::
186+
187+
# perf stat -e ex_ret_near_ret:k -e ex_ret_near_ret_mispred:k sleep 10s
188+
189+
or using the raw PMC numbers::
190+
191+
# perf stat -e cpu/event=0xc8,umask=0/k -e cpu/event=0xc9,umask=0/k sleep 10s
192+
193+
should give the same amount. I.e., every RET retired should be
194+
mispredicted::
195+
196+
[root@brent: ~/kernel/linux/tools/perf> ./perf stat -e cpu/event=0xc8,umask=0/k -e cpu/event=0xc9,umask=0/k sleep 10s
197+
198+
Performance counter stats for 'sleep 10s':
199+
200+
137,167 cpu/event=0xc8,umask=0/k
201+
137,173 cpu/event=0xc9,umask=0/k
202+
203+
10.004110303 seconds time elapsed
204+
205+
0.000000000 seconds user
206+
0.004462000 seconds sys
207+
208+
vs the case when the mitigation is disabled (spec_rstack_overflow=off)
209+
or not functioning properly, showing usually a lot smaller number of
210+
mispredicted retired RETs vs the overall count of retired RETs during
211+
a workload::
212+
213+
[root@brent: ~/kernel/linux/tools/perf> ./perf stat -e cpu/event=0xc8,umask=0/k -e cpu/event=0xc9,umask=0/k sleep 10s
214+
215+
Performance counter stats for 'sleep 10s':
216+
217+
201,627 cpu/event=0xc8,umask=0/k
218+
4,074 cpu/event=0xc9,umask=0/k
219+
220+
10.003267252 seconds time elapsed
221+
222+
0.002729000 seconds user
223+
0.000000000 seconds sys
224+
225+
Also, there is a selftest which performs the above, go to
226+
tools/testing/selftests/x86/ and do::
227+
228+
make srso
229+
./srso

tools/testing/selftests/x86/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ all_32: $(BINARIES_32)
7777

7878
all_64: $(BINARIES_64)
7979

80-
EXTRA_CLEAN := $(BINARIES_32) $(BINARIES_64)
80+
EXTRA_CLEAN := $(BINARIES_32) $(BINARIES_64) srso
8181

8282
$(BINARIES_32): $(OUTPUT)/%_32: %.c helpers.h
8383
$(CC) -m32 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $< $(EXTRA_FILES) -lrt -ldl -lm

tools/testing/selftests/x86/srso.c

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
#include <linux/perf_event.h>
3+
#include <cpuid.h>
4+
#include <errno.h>
5+
#include <stdio.h>
6+
#include <stdlib.h>
7+
#include <string.h>
8+
#include <sys/ioctl.h>
9+
#include <sys/syscall.h>
10+
#include <unistd.h>
11+
12+
int main(void)
13+
{
14+
struct perf_event_attr ret_attr, mret_attr;
15+
long long count_rets, count_rets_mispred;
16+
int rrets_fd, mrrets_fd;
17+
unsigned int cpuid1_eax, b, c, d;
18+
19+
__cpuid(1, cpuid1_eax, b, c, d);
20+
21+
if (cpuid1_eax < 0x00800f00 ||
22+
cpuid1_eax > 0x00afffff) {
23+
fprintf(stderr, "This needs to run on a Zen[1-4] machine (CPUID(1).EAX: 0x%x). Exiting...\n", cpuid1_eax);
24+
exit(EXIT_FAILURE);
25+
}
26+
27+
memset(&ret_attr, 0, sizeof(struct perf_event_attr));
28+
memset(&mret_attr, 0, sizeof(struct perf_event_attr));
29+
30+
ret_attr.type = mret_attr.type = PERF_TYPE_RAW;
31+
ret_attr.size = mret_attr.size = sizeof(struct perf_event_attr);
32+
ret_attr.config = 0xc8;
33+
mret_attr.config = 0xc9;
34+
ret_attr.disabled = mret_attr.disabled = 1;
35+
ret_attr.exclude_user = mret_attr.exclude_user = 1;
36+
ret_attr.exclude_hv = mret_attr.exclude_hv = 1;
37+
38+
rrets_fd = syscall(SYS_perf_event_open, &ret_attr, 0, -1, -1, 0);
39+
if (rrets_fd == -1) {
40+
perror("opening retired RETs fd");
41+
exit(EXIT_FAILURE);
42+
}
43+
44+
mrrets_fd = syscall(SYS_perf_event_open, &mret_attr, 0, -1, -1, 0);
45+
if (mrrets_fd == -1) {
46+
perror("opening retired mispredicted RETs fd");
47+
exit(EXIT_FAILURE);
48+
}
49+
50+
ioctl(rrets_fd, PERF_EVENT_IOC_RESET, 0);
51+
ioctl(mrrets_fd, PERF_EVENT_IOC_RESET, 0);
52+
53+
ioctl(rrets_fd, PERF_EVENT_IOC_ENABLE, 0);
54+
ioctl(mrrets_fd, PERF_EVENT_IOC_ENABLE, 0);
55+
56+
printf("Sleeping for 10 seconds\n");
57+
sleep(10);
58+
59+
ioctl(rrets_fd, PERF_EVENT_IOC_DISABLE, 0);
60+
ioctl(mrrets_fd, PERF_EVENT_IOC_DISABLE, 0);
61+
62+
read(rrets_fd, &count_rets, sizeof(long long));
63+
read(mrrets_fd, &count_rets_mispred, sizeof(long long));
64+
65+
printf("RETs: (%lld retired <-> %lld mispredicted)\n",
66+
count_rets, count_rets_mispred);
67+
printf("SRSO Safe-RET mitigation works correctly if both counts are almost equal.\n");
68+
69+
return 0;
70+
}

0 commit comments

Comments
 (0)