Skip to content

Commit b920aa7

Browse files
hcahcazx2c4
authored andcommitted
s390/vdso: Wire up getrandom() vdso implementation
Provide the s390 specific vdso getrandom() architecture backend. _vdso_rng_data required data is placed within the _vdso_data vvar page, by using a hardcoded offset larger than vdso_data. As required the chacha20 implementation does not write to the stack. The implementation follows more or less the arm64 implementations and makes use of vector instructions. It has a fallback to the getrandom() system call for machines where the vector facility is not installed. The check if the vector facility is installed, as well as an optimization for machines with the vector-enhancements facility 2, is implemented with alternatives, avoiding runtime checks. Note that __kernel_getrandom() is implemented without the vdso user wrapper which would setup a stack frame for odd cases (aka very old glibc variants) where the caller has not done that. All callers of __kernel_getrandom() are required to setup a stack frame, like the C ABI requires it. The vdso testcases vdso_test_getrandom and vdso_test_chacha pass. Benchmark on a z16: $ ./vdso_test_getrandom bench-single vdso: 25000000 times in 0.493703559 seconds syscall: 25000000 times in 6.584025337 seconds Signed-off-by: Heiko Carstens <[email protected]> Reviewed-by: Harald Freudenberger <[email protected]> Signed-off-by: Jason A. Donenfeld <[email protected]>
1 parent c1ae1b4 commit b920aa7

File tree

12 files changed

+291
-9
lines changed

12 files changed

+291
-9
lines changed

arch/s390/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,7 @@ config S390
243243
select TRACE_IRQFLAGS_SUPPORT
244244
select TTY
245245
select USER_STACKTRACE_SUPPORT
246+
select VDSO_GETRANDOM
246247
select VIRT_CPU_ACCOUNTING
247248
select ZONE_DMA
248249
# Note: keep the above list sorted alphabetically

arch/s390/include/asm/fpu-insn-asm.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -407,6 +407,28 @@
407407
MRXBOPC 0, 0x0E, v1
408408
.endm
409409

410+
/* VECTOR STORE BYTE REVERSED ELEMENTS */
411+
.macro VSTBR vr1, disp, index="%r0", base, m
412+
VX_NUM v1, \vr1
413+
GR_NUM x2, \index
414+
GR_NUM b2, \base
415+
.word 0xE600 | ((v1&15) << 4) | (x2&15)
416+
.word (b2 << 12) | (\disp)
417+
MRXBOPC \m, 0x0E, v1
418+
.endm
419+
.macro VSTBRH vr1, disp, index="%r0", base
420+
VSTBR \vr1, \disp, \index, \base, 1
421+
.endm
422+
.macro VSTBRF vr1, disp, index="%r0", base
423+
VSTBR \vr1, \disp, \index, \base, 2
424+
.endm
425+
.macro VSTBRG vr1, disp, index="%r0", base
426+
VSTBR \vr1, \disp, \index, \base, 3
427+
.endm
428+
.macro VSTBRQ vr1, disp, index="%r0", base
429+
VSTBR \vr1, \disp, \index, \base, 4
430+
.endm
431+
410432
/* VECTOR STORE MULTIPLE */
411433
.macro VSTM vfrom, vto, disp, base, hint=3
412434
VX_NUM v1, \vfrom
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
3+
#ifndef __ASM_VDSO_GETRANDOM_H
4+
#define __ASM_VDSO_GETRANDOM_H
5+
6+
#ifndef __ASSEMBLY__
7+
8+
#include <vdso/datapage.h>
9+
#include <asm/vdso/vsyscall.h>
10+
#include <asm/syscall.h>
11+
#include <asm/unistd.h>
12+
#include <asm/page.h>
13+
14+
/**
15+
* getrandom_syscall - Invoke the getrandom() syscall.
16+
* @buffer: Destination buffer to fill with random bytes.
17+
* @len: Size of @buffer in bytes.
18+
* @flags: Zero or more GRND_* flags.
19+
* Returns: The number of random bytes written to @buffer, or a negative value indicating an error.
20+
*/
21+
static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsigned int flags)
22+
{
23+
return syscall3(__NR_getrandom, (long)buffer, (long)len, (long)flags);
24+
}
25+
26+
static __always_inline const struct vdso_rng_data *__arch_get_vdso_rng_data(void)
27+
{
28+
/*
29+
* The RNG data is in the real VVAR data page, but if a task belongs to a time namespace
30+
* then VVAR_DATA_PAGE_OFFSET points to the namespace-specific VVAR page and VVAR_TIMENS_
31+
* PAGE_OFFSET points to the real VVAR page.
32+
*/
33+
if (IS_ENABLED(CONFIG_TIME_NS) && _vdso_data->clock_mode == VDSO_CLOCKMODE_TIMENS)
34+
return (void *)&_vdso_rng_data + VVAR_TIMENS_PAGE_OFFSET * PAGE_SIZE;
35+
return &_vdso_rng_data;
36+
}
37+
38+
#endif /* !__ASSEMBLY__ */
39+
40+
#endif /* __ASM_VDSO_GETRANDOM_H */

arch/s390/include/asm/vdso/vsyscall.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,21 @@
22
#ifndef __ASM_VDSO_VSYSCALL_H
33
#define __ASM_VDSO_VSYSCALL_H
44

5+
#define __VDSO_RND_DATA_OFFSET 768
6+
57
#ifndef __ASSEMBLY__
68

79
#include <linux/hrtimer.h>
810
#include <linux/timekeeper_internal.h>
911
#include <vdso/datapage.h>
1012
#include <asm/vdso.h>
13+
14+
enum vvar_pages {
15+
VVAR_DATA_PAGE_OFFSET,
16+
VVAR_TIMENS_PAGE_OFFSET,
17+
VVAR_NR_PAGES
18+
};
19+
1120
/*
1221
* Update the vDSO data page to keep in sync with kernel timekeeping.
1322
*/
@@ -18,6 +27,12 @@ static __always_inline struct vdso_data *__s390_get_k_vdso_data(void)
1827
}
1928
#define __arch_get_k_vdso_data __s390_get_k_vdso_data
2029

30+
static __always_inline struct vdso_rng_data *__s390_get_k_vdso_rnd_data(void)
31+
{
32+
return (void *)vdso_data + __VDSO_RND_DATA_OFFSET;
33+
}
34+
#define __arch_get_k_vdso_rng_data __s390_get_k_vdso_rnd_data
35+
2136
/* The asm-generic header needs to be included after the definitions above */
2237
#include <asm-generic/vdso/vsyscall.h>
2338

arch/s390/kernel/vdso.c

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include <linux/time_namespace.h>
2020
#include <linux/random.h>
2121
#include <vdso/datapage.h>
22+
#include <asm/vdso/vsyscall.h>
2223
#include <asm/alternative.h>
2324
#include <asm/vdso.h>
2425

@@ -31,12 +32,6 @@ static union vdso_data_store vdso_data_store __page_aligned_data;
3132

3233
struct vdso_data *vdso_data = vdso_data_store.data;
3334

34-
enum vvar_pages {
35-
VVAR_DATA_PAGE_OFFSET,
36-
VVAR_TIMENS_PAGE_OFFSET,
37-
VVAR_NR_PAGES,
38-
};
39-
4035
#ifdef CONFIG_TIME_NS
4136
struct vdso_data *arch_get_vdso_data(void *vvar_page)
4237
{

arch/s390/kernel/vdso64/Makefile

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,17 @@
33

44
# Include the generic Makefile to check the built vdso.
55
include $(srctree)/lib/vdso/Makefile
6-
obj-vdso64 = vdso_user_wrapper.o note.o
7-
obj-cvdso64 = vdso64_generic.o getcpu.o
6+
obj-vdso64 = vdso_user_wrapper.o note.o vgetrandom-chacha.o
7+
obj-cvdso64 = vdso64_generic.o getcpu.o vgetrandom.o
88
VDSO_CFLAGS_REMOVE := -pg $(CC_FLAGS_FTRACE) $(CC_FLAGS_EXPOLINE) $(CC_FLAGS_CHECK_STACK)
99
CFLAGS_REMOVE_getcpu.o = $(VDSO_CFLAGS_REMOVE)
10+
CFLAGS_REMOVE_vgetrandom.o = $(VDSO_CFLAGS_REMOVE)
1011
CFLAGS_REMOVE_vdso64_generic.o = $(VDSO_CFLAGS_REMOVE)
1112

13+
ifneq ($(c-getrandom-y),)
14+
CFLAGS_vgetrandom.o += -include $(c-getrandom-y)
15+
endif
16+
1217
# Build rules
1318

1419
targets := $(obj-vdso64) $(obj-cvdso64) vdso64.so vdso64.so.dbg

arch/s390/kernel/vdso64/vdso.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,6 @@ int __s390_vdso_getcpu(unsigned *cpu, unsigned *node, struct getcpu_cache *unuse
1010
int __s390_vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz);
1111
int __s390_vdso_clock_gettime(clockid_t clock, struct __kernel_timespec *ts);
1212
int __s390_vdso_clock_getres(clockid_t clock, struct __kernel_timespec *ts);
13+
ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len);
1314

1415
#endif /* __ARCH_S390_KERNEL_VDSO64_VDSO_H */

arch/s390/kernel/vdso64/vdso64.lds.S

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
* library
55
*/
66

7+
#include <asm/vdso/vsyscall.h>
78
#include <asm/page.h>
89
#include <asm/vdso.h>
910

@@ -13,6 +14,7 @@ OUTPUT_ARCH(s390:64-bit)
1314
SECTIONS
1415
{
1516
PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
17+
PROVIDE(_vdso_rng_data = _vdso_data + __VDSO_RND_DATA_OFFSET);
1618
#ifdef CONFIG_TIME_NS
1719
PROVIDE(_timens_data = _vdso_data + PAGE_SIZE);
1820
#endif
@@ -144,6 +146,7 @@ VERSION
144146
__kernel_restart_syscall;
145147
__kernel_rt_sigreturn;
146148
__kernel_sigreturn;
149+
__kernel_getrandom;
147150
local: *;
148151
};
149152
}
Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
3+
#include <linux/linkage.h>
4+
#include <asm/alternative.h>
5+
#include <asm/fpu-insn.h>
6+
7+
#define STATE0 %v0
8+
#define STATE1 %v1
9+
#define STATE2 %v2
10+
#define STATE3 %v3
11+
#define COPY0 %v4
12+
#define COPY1 %v5
13+
#define COPY2 %v6
14+
#define COPY3 %v7
15+
#define PERM4 %v16
16+
#define PERM8 %v17
17+
#define PERM12 %v18
18+
#define BEPERM %v19
19+
#define TMP0 %v20
20+
#define TMP1 %v21
21+
#define TMP2 %v22
22+
#define TMP3 %v23
23+
24+
.section .rodata
25+
26+
.balign 128
27+
.Lconstants:
28+
.long 0x61707865,0x3320646e,0x79622d32,0x6b206574 # endian-neutral
29+
.long 0x04050607,0x08090a0b,0x0c0d0e0f,0x00010203 # rotl 4 bytes
30+
.long 0x08090a0b,0x0c0d0e0f,0x00010203,0x04050607 # rotl 8 bytes
31+
.long 0x0c0d0e0f,0x00010203,0x04050607,0x08090a0b # rotl 12 bytes
32+
.long 0x03020100,0x07060504,0x0b0a0908,0x0f0e0d0c # byte swap
33+
34+
.text
35+
/*
36+
* s390 ChaCha20 implementation meant for vDSO. Produces a given positive
37+
* number of blocks of output with nonce 0, taking an input key and 8-bytes
38+
* counter. Does not spill to the stack.
39+
*
40+
* void __arch_chacha20_blocks_nostack(uint8_t *dst_bytes,
41+
* const uint8_t *key,
42+
* uint32_t *counter,
43+
* size_t nblocks)
44+
*/
45+
SYM_FUNC_START(__arch_chacha20_blocks_nostack)
46+
larl %r1,.Lconstants
47+
48+
/* COPY0 = "expand 32-byte k" */
49+
VL COPY0,0,,%r1
50+
51+
/* PERM4-PERM12,BEPERM = byte selectors for VPERM */
52+
VLM PERM4,BEPERM,16,%r1
53+
54+
/* COPY1,COPY2 = key */
55+
VLM COPY1,COPY2,0,%r3
56+
57+
/* COPY3 = counter || zero nonce */
58+
lg %r3,0(%r4)
59+
VZERO COPY3
60+
VLVGG COPY3,%r3,0
61+
62+
lghi %r1,0
63+
.Lblock:
64+
VLR STATE0,COPY0
65+
VLR STATE1,COPY1
66+
VLR STATE2,COPY2
67+
VLR STATE3,COPY3
68+
69+
lghi %r0,10
70+
.Ldoubleround:
71+
/* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 16) */
72+
VAF STATE0,STATE0,STATE1
73+
VX STATE3,STATE3,STATE0
74+
VERLLF STATE3,STATE3,16
75+
76+
/* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 12) */
77+
VAF STATE2,STATE2,STATE3
78+
VX STATE1,STATE1,STATE2
79+
VERLLF STATE1,STATE1,12
80+
81+
/* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 8) */
82+
VAF STATE0,STATE0,STATE1
83+
VX STATE3,STATE3,STATE0
84+
VERLLF STATE3,STATE3,8
85+
86+
/* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 7) */
87+
VAF STATE2,STATE2,STATE3
88+
VX STATE1,STATE1,STATE2
89+
VERLLF STATE1,STATE1,7
90+
91+
/* STATE1[0,1,2,3] = STATE1[1,2,3,0] */
92+
VPERM STATE1,STATE1,STATE1,PERM4
93+
/* STATE2[0,1,2,3] = STATE2[2,3,0,1] */
94+
VPERM STATE2,STATE2,STATE2,PERM8
95+
/* STATE3[0,1,2,3] = STATE3[3,0,1,2] */
96+
VPERM STATE3,STATE3,STATE3,PERM12
97+
98+
/* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 16) */
99+
VAF STATE0,STATE0,STATE1
100+
VX STATE3,STATE3,STATE0
101+
VERLLF STATE3,STATE3,16
102+
103+
/* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 12) */
104+
VAF STATE2,STATE2,STATE3
105+
VX STATE1,STATE1,STATE2
106+
VERLLF STATE1,STATE1,12
107+
108+
/* STATE0 += STATE1, STATE3 = rotl32(STATE3 ^ STATE0, 8) */
109+
VAF STATE0,STATE0,STATE1
110+
VX STATE3,STATE3,STATE0
111+
VERLLF STATE3,STATE3,8
112+
113+
/* STATE2 += STATE3, STATE1 = rotl32(STATE1 ^ STATE2, 7) */
114+
VAF STATE2,STATE2,STATE3
115+
VX STATE1,STATE1,STATE2
116+
VERLLF STATE1,STATE1,7
117+
118+
/* STATE1[0,1,2,3] = STATE1[3,0,1,2] */
119+
VPERM STATE1,STATE1,STATE1,PERM12
120+
/* STATE2[0,1,2,3] = STATE2[2,3,0,1] */
121+
VPERM STATE2,STATE2,STATE2,PERM8
122+
/* STATE3[0,1,2,3] = STATE3[1,2,3,0] */
123+
VPERM STATE3,STATE3,STATE3,PERM4
124+
brctg %r0,.Ldoubleround
125+
126+
/* OUTPUT0 = STATE0 + STATE0 */
127+
VAF STATE0,STATE0,COPY0
128+
/* OUTPUT1 = STATE1 + STATE1 */
129+
VAF STATE1,STATE1,COPY1
130+
/* OUTPUT2 = STATE2 + STATE2 */
131+
VAF STATE2,STATE2,COPY2
132+
/* OUTPUT2 = STATE3 + STATE3 */
133+
VAF STATE3,STATE3,COPY3
134+
135+
/*
136+
* 32 bit wise little endian store to OUTPUT. If the vector
137+
* enhancement facility 2 is not installed use the slow path.
138+
*/
139+
ALTERNATIVE "brc 0xf,.Lstoreslow", "nop", ALT_FACILITY(148)
140+
VSTBRF STATE0,0,,%r2
141+
VSTBRF STATE1,16,,%r2
142+
VSTBRF STATE2,32,,%r2
143+
VSTBRF STATE3,48,,%r2
144+
.Lstoredone:
145+
146+
/* ++COPY3.COUNTER */
147+
/* alsih %r3,1 */
148+
.insn rilu,0xcc0a00000000,%r3,1
149+
alcr %r3,%r1
150+
VLVGG COPY3,%r3,0
151+
152+
/* OUTPUT += 64, --NBLOCKS */
153+
aghi %r2,64
154+
brctg %r5,.Lblock
155+
156+
/* COUNTER = COPY3.COUNTER */
157+
stg %r3,0(%r4)
158+
159+
/* Zero out potentially sensitive regs */
160+
VZERO STATE0
161+
VZERO STATE1
162+
VZERO STATE2
163+
VZERO STATE3
164+
VZERO COPY1
165+
VZERO COPY2
166+
167+
/* Early exit if TMP0-TMP3 have not been used */
168+
ALTERNATIVE "nopr", "br %r14", ALT_FACILITY(148)
169+
170+
VZERO TMP0
171+
VZERO TMP1
172+
VZERO TMP2
173+
VZERO TMP3
174+
175+
br %r14
176+
177+
.Lstoreslow:
178+
/* Convert STATE to little endian format and store to OUTPUT */
179+
VPERM TMP0,STATE0,STATE0,BEPERM
180+
VPERM TMP1,STATE1,STATE1,BEPERM
181+
VPERM TMP2,STATE2,STATE2,BEPERM
182+
VPERM TMP3,STATE3,STATE3,BEPERM
183+
VSTM TMP0,TMP3,0,%r2
184+
j .Lstoredone
185+
SYM_FUNC_END(__arch_chacha20_blocks_nostack)

arch/s390/kernel/vdso64/vgetrandom.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
3+
#include <asm/facility.h>
4+
#include <uapi/asm-generic/errno.h>
5+
#include "vdso.h"
6+
7+
ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len)
8+
{
9+
if (test_facility(129))
10+
return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len);
11+
if (unlikely(opaque_len == ~0UL && !buffer && !len && !flags))
12+
return -ENOSYS;
13+
return getrandom_syscall(buffer, len, flags);
14+
}

0 commit comments

Comments
 (0)