Skip to content

Commit d585951

Browse files
brooniectmarinas
authored andcommitted
kselftest/arm64: Include kernel mode NEON in fp-stress
Currently fp-stress only covers userspace use of floating point, it does not cover any kernel mode uses. Since currently kernel mode floating point usage can't be preempted and there are explicit preemption points in the existing implementations this isn't so important for fp-stress but when we readd preemption it will be good to try to exercise it. When the arm64 accelerated crypto operations are implemented we can relatively straightforwardly trigger kernel mode floating point usage by using the crypto userspace API to hash data, using the splice() support in an effort to minimise copying. We use /proc/crypto to check which accelerated implementations are available, picking the first symmetric hash we find. We run the kernel mode test unconditionally, replacing the second copy of the FPSIMD testcase for systems with FPSIMD only. If we don't think there are any suitable kernel mode implementations we fall back to running another copy of fpsimd-stress. There are a number issues with this approach, we don't actually verify that we are using an accelerated (or even CPU) implementation of the algorithm being tested and even with attempting to use splice() to minimise copying there are sizing limits on how much data gets spliced at once. Signed-off-by: Mark Brown <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Catalin Marinas <[email protected]>
1 parent 83a7eef commit d585951

File tree

4 files changed

+343
-9
lines changed

4 files changed

+343
-9
lines changed

tools/testing/selftests/arm64/fp/.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ fp-pidbench
22
fp-ptrace
33
fp-stress
44
fpsimd-test
5+
kernel-test
56
rdvl-sme
67
rdvl-sve
78
sve-probe-vls

tools/testing/selftests/arm64/fp/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ TEST_GEN_PROGS := \
1212
vec-syscfg \
1313
za-fork za-ptrace
1414
TEST_GEN_PROGS_EXTENDED := fp-pidbench fpsimd-test \
15+
kernel-test \
1516
rdvl-sme rdvl-sve \
1617
sve-test \
1718
ssve-test \

tools/testing/selftests/arm64/fp/fp-stress.c

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,19 @@ static void start_fpsimd(struct child_data *child, int cpu, int copy)
319319
ksft_print_msg("Started %s\n", child->name);
320320
}
321321

322+
static void start_kernel(struct child_data *child, int cpu, int copy)
323+
{
324+
int ret;
325+
326+
ret = asprintf(&child->name, "KERNEL-%d-%d", cpu, copy);
327+
if (ret == -1)
328+
ksft_exit_fail_msg("asprintf() failed\n");
329+
330+
child_start(child, "./kernel-test");
331+
332+
ksft_print_msg("Started %s\n", child->name);
333+
}
334+
322335
static void start_sve(struct child_data *child, int vl, int cpu)
323336
{
324337
int ret;
@@ -438,7 +451,7 @@ int main(int argc, char **argv)
438451
int ret;
439452
int timeout = 10;
440453
int cpus, i, j, c;
441-
int sve_vl_count, sme_vl_count, fpsimd_per_cpu;
454+
int sve_vl_count, sme_vl_count;
442455
bool all_children_started = false;
443456
int seen_children;
444457
int sve_vls[MAX_VLS], sme_vls[MAX_VLS];
@@ -482,12 +495,7 @@ int main(int argc, char **argv)
482495
have_sme2 = false;
483496
}
484497

485-
/* Force context switching if we only have FPSIMD */
486-
if (!sve_vl_count && !sme_vl_count)
487-
fpsimd_per_cpu = 2;
488-
else
489-
fpsimd_per_cpu = 1;
490-
tests += cpus * fpsimd_per_cpu;
498+
tests += cpus * 2;
491499

492500
ksft_print_header();
493501
ksft_set_plan(tests);
@@ -542,8 +550,8 @@ int main(int argc, char **argv)
542550
tests);
543551

544552
for (i = 0; i < cpus; i++) {
545-
for (j = 0; j < fpsimd_per_cpu; j++)
546-
start_fpsimd(&children[num_children++], i, j);
553+
start_fpsimd(&children[num_children++], i, 0);
554+
start_kernel(&children[num_children++], i, 0);
547555

548556
for (j = 0; j < sve_vl_count; j++)
549557
start_sve(&children[num_children++], sve_vls[j], i);
Lines changed: 324 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,324 @@
1+
// SPDX-License-Identifier: GPL-2.0-only
2+
/*
3+
* Copyright (C) 2024 ARM Limited.
4+
*/
5+
6+
#define _GNU_SOURCE
7+
8+
#include <stdio.h>
9+
#include <stdlib.h>
10+
#include <stdbool.h>
11+
#include <errno.h>
12+
#include <fcntl.h>
13+
#include <signal.h>
14+
#include <string.h>
15+
#include <unistd.h>
16+
17+
#include <sys/socket.h>
18+
19+
#include <linux/kernel.h>
20+
#include <linux/if_alg.h>
21+
22+
#define DATA_SIZE (16 * 4096)
23+
24+
static int base, sock;
25+
26+
static int digest_len;
27+
static char *ref;
28+
static char *digest;
29+
static char *alg_name;
30+
31+
static struct iovec data_iov;
32+
static int zerocopy[2];
33+
static int sigs;
34+
static int iter;
35+
36+
static void handle_exit_signal(int sig, siginfo_t *info, void *context)
37+
{
38+
printf("Terminated by signal %d, iterations=%d, signals=%d\n",
39+
sig, iter, sigs);
40+
exit(0);
41+
}
42+
43+
static void handle_kick_signal(int sig, siginfo_t *info, void *context)
44+
{
45+
sigs++;
46+
}
47+
48+
static char *drivers[] = {
49+
"crct10dif-arm64-ce",
50+
/* "crct10dif-arm64-neon", - Same priority as generic */
51+
"sha1-ce",
52+
"sha224-arm64",
53+
"sha224-arm64-neon",
54+
"sha224-ce",
55+
"sha256-arm64",
56+
"sha256-arm64-neon",
57+
"sha256-ce",
58+
"sha384-ce",
59+
"sha512-ce",
60+
"sha3-224-ce",
61+
"sha3-256-ce",
62+
"sha3-384-ce",
63+
"sha3-512-ce",
64+
"sm3-ce",
65+
"sm3-neon",
66+
};
67+
68+
static bool create_socket(void)
69+
{
70+
FILE *proc;
71+
struct sockaddr_alg addr;
72+
char buf[1024];
73+
char *c, *driver_name;
74+
bool is_shash, match;
75+
int ret, i;
76+
77+
ret = socket(AF_ALG, SOCK_SEQPACKET, 0);
78+
if (ret < 0) {
79+
if (errno == EAFNOSUPPORT) {
80+
printf("AF_ALG not supported\n");
81+
return false;
82+
}
83+
84+
printf("Failed to create AF_ALG socket: %s (%d)\n",
85+
strerror(errno), errno);
86+
return false;
87+
}
88+
base = ret;
89+
90+
memset(&addr, 0, sizeof(addr));
91+
addr.salg_family = AF_ALG;
92+
strncpy((char *)addr.salg_type, "hash", sizeof(addr.salg_type));
93+
94+
proc = fopen("/proc/crypto", "r");
95+
if (!proc) {
96+
printf("Unable to open /proc/crypto\n");
97+
return false;
98+
}
99+
100+
driver_name = NULL;
101+
is_shash = false;
102+
match = false;
103+
104+
/* Look through /proc/crypto for a driver with kernel mode FP usage */
105+
while (!match) {
106+
c = fgets(buf, sizeof(buf), proc);
107+
if (!c) {
108+
if (feof(proc)) {
109+
printf("Nothing found in /proc/crypto\n");
110+
return false;
111+
}
112+
continue;
113+
}
114+
115+
/* Algorithm descriptions are separated by a blank line */
116+
if (*c == '\n') {
117+
if (is_shash && driver_name) {
118+
for (i = 0; i < ARRAY_SIZE(drivers); i++) {
119+
if (strcmp(drivers[i],
120+
driver_name) == 0) {
121+
match = true;
122+
}
123+
}
124+
}
125+
126+
if (!match) {
127+
digest_len = 0;
128+
129+
free(driver_name);
130+
driver_name = NULL;
131+
132+
free(alg_name);
133+
alg_name = NULL;
134+
135+
is_shash = false;
136+
}
137+
continue;
138+
}
139+
140+
/* Remove trailing newline */
141+
c = strchr(buf, '\n');
142+
if (c)
143+
*c = '\0';
144+
145+
/* Find the field/value separator and start of the value */
146+
c = strchr(buf, ':');
147+
if (!c)
148+
continue;
149+
c += 2;
150+
151+
if (strncmp(buf, "digestsize", strlen("digestsize")) == 0)
152+
sscanf(c, "%d", &digest_len);
153+
154+
if (strncmp(buf, "name", strlen("name")) == 0)
155+
alg_name = strdup(c);
156+
157+
if (strncmp(buf, "driver", strlen("driver")) == 0)
158+
driver_name = strdup(c);
159+
160+
if (strncmp(buf, "type", strlen("type")) == 0)
161+
if (strncmp(c, "shash", strlen("shash")) == 0)
162+
is_shash = true;
163+
}
164+
165+
strncpy((char *)addr.salg_name, alg_name,
166+
sizeof(addr.salg_name) - 1);
167+
168+
ret = bind(base, (struct sockaddr *)&addr, sizeof(addr));
169+
if (ret < 0) {
170+
printf("Failed to bind %s: %s (%d)\n",
171+
addr.salg_name, strerror(errno), errno);
172+
return false;
173+
}
174+
175+
ret = accept(base, NULL, 0);
176+
if (ret < 0) {
177+
printf("Failed to accept %s: %s (%d)\n",
178+
addr.salg_name, strerror(errno), errno);
179+
return false;
180+
}
181+
182+
sock = ret;
183+
184+
ret = pipe(zerocopy);
185+
if (ret != 0) {
186+
printf("Failed to create zerocopy pipe: %s (%d)\n",
187+
strerror(errno), errno);
188+
return false;
189+
}
190+
191+
ref = malloc(digest_len);
192+
if (!ref) {
193+
printf("Failed to allocated %d byte reference\n", digest_len);
194+
return false;
195+
}
196+
197+
digest = malloc(digest_len);
198+
if (!digest) {
199+
printf("Failed to allocated %d byte digest\n", digest_len);
200+
return false;
201+
}
202+
203+
return true;
204+
}
205+
206+
static bool compute_digest(void *buf)
207+
{
208+
struct iovec iov;
209+
int ret, wrote;
210+
211+
iov = data_iov;
212+
while (iov.iov_len) {
213+
ret = vmsplice(zerocopy[1], &iov, 1, SPLICE_F_GIFT);
214+
if (ret < 0) {
215+
printf("Failed to send buffer: %s (%d)\n",
216+
strerror(errno), errno);
217+
return false;
218+
}
219+
220+
wrote = ret;
221+
ret = splice(zerocopy[0], NULL, sock, NULL, wrote, 0);
222+
if (ret < 0) {
223+
printf("Failed to splice buffer: %s (%d)\n",
224+
strerror(errno), errno);
225+
} else if (ret != wrote) {
226+
printf("Short splice: %d < %d\n", ret, wrote);
227+
}
228+
229+
iov.iov_len -= wrote;
230+
iov.iov_base += wrote;
231+
}
232+
233+
reread:
234+
ret = recv(sock, buf, digest_len, 0);
235+
if (ret == 0) {
236+
printf("No disgest returned\n");
237+
return false;
238+
}
239+
if (ret != digest_len) {
240+
if (errno == -EAGAIN)
241+
goto reread;
242+
printf("Failed to get digest: %s (%d)\n",
243+
strerror(errno), errno);
244+
return false;
245+
}
246+
247+
return true;
248+
}
249+
250+
int main(void)
251+
{
252+
char *data;
253+
struct sigaction sa;
254+
int ret;
255+
256+
/* Ensure we have unbuffered output */
257+
setvbuf(stdout, NULL, _IOLBF, 0);
258+
259+
/* The parent will communicate with us via signals */
260+
memset(&sa, 0, sizeof(sa));
261+
sa.sa_sigaction = handle_exit_signal;
262+
sa.sa_flags = SA_RESTART | SA_SIGINFO;
263+
sigemptyset(&sa.sa_mask);
264+
ret = sigaction(SIGTERM, &sa, NULL);
265+
if (ret < 0)
266+
printf("Failed to install SIGTERM handler: %s (%d)\n",
267+
strerror(errno), errno);
268+
269+
sa.sa_sigaction = handle_kick_signal;
270+
ret = sigaction(SIGUSR2, &sa, NULL);
271+
if (ret < 0)
272+
printf("Failed to install SIGUSR2 handler: %s (%d)\n",
273+
strerror(errno), errno);
274+
275+
data = malloc(DATA_SIZE);
276+
if (!data) {
277+
printf("Failed to allocate data buffer\n");
278+
return EXIT_FAILURE;
279+
}
280+
memset(data, 0, DATA_SIZE);
281+
282+
data_iov.iov_base = data;
283+
data_iov.iov_len = DATA_SIZE;
284+
285+
/*
286+
* If we can't create a socket assume it's a lack of system
287+
* support and fall back to a basic FPSIMD test for the
288+
* benefit of fp-stress.
289+
*/
290+
if (!create_socket()) {
291+
execl("./fpsimd-test", "./fpsimd-test", NULL);
292+
printf("Failed to fall back to fspimd-test: %d (%s)\n",
293+
errno, strerror(errno));
294+
return EXIT_FAILURE;
295+
}
296+
297+
/*
298+
* Compute a reference digest we hope is repeatable, we do
299+
* this at runtime partly to make it easier to play with
300+
* parameters.
301+
*/
302+
if (!compute_digest(ref)) {
303+
printf("Failed to compute reference digest\n");
304+
return EXIT_FAILURE;
305+
}
306+
307+
printf("AF_ALG using %s\n", alg_name);
308+
309+
while (true) {
310+
if (!compute_digest(digest)) {
311+
printf("Failed to coempute digest, iter=%d\n", iter);
312+
return EXIT_FAILURE;
313+
}
314+
315+
if (memcmp(ref, digest, digest_len) != 0) {
316+
printf("Digest mismatch, iter=%d\n", iter);
317+
return EXIT_FAILURE;
318+
}
319+
320+
iter++;
321+
}
322+
323+
return EXIT_FAILURE;
324+
}

0 commit comments

Comments
 (0)