Skip to content

Commit ee0d030

Browse files
xry111palmer-dabbelt
authored andcommitted
RISC-V: vDSO: Wire up getrandom() vDSO implementation
Hook up the generic vDSO implementation to the generic vDSO getrandom implementation by providing the required __arch_chacha20_blocks_nostack and getrandom_syscall implementations. Also wire up the selftests. The benchmark result: vdso: 25000000 times in 2.466341333 seconds libc: 25000000 times in 41.447720005 seconds syscall: 25000000 times in 41.043926672 seconds vdso: 25000000 x 256 times in 162.286219353 seconds libc: 25000000 x 256 times in 2953.855018685 seconds syscall: 25000000 x 256 times in 2796.268546000 seconds [ alex: - Fix dynamic relocation - Squash Nathan's fix https://lore.kernel.org/all/[email protected]/ - Add comment from Loongarch ] Signed-off-by: Xi Ruoyao <[email protected]> Link: https://lore.kernel.org/r/[email protected] Tested-by: Alexandre Ghiti <[email protected]> Signed-off-by: Alexandre Ghiti <[email protected]> Signed-off-by: Palmer Dabbelt <[email protected]>
1 parent a869b8c commit ee0d030

File tree

7 files changed

+308
-0
lines changed

7 files changed

+308
-0
lines changed

arch/riscv/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,6 +219,7 @@ config RISCV
219219
select THREAD_INFO_IN_TASK
220220
select TRACE_IRQFLAGS_SUPPORT
221221
select UACCESS_MEMCPY if !MMU
222+
select VDSO_GETRANDOM if HAVE_GENERIC_VDSO
222223
select USER_STACKTRACE_SUPPORT
223224
select ZONE_DMA32 if 64BIT
224225

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
/* SPDX-License-Identifier: GPL-2.0-only */
2+
/*
3+
* Copyright (C) 2025 Xi Ruoyao <[email protected]>. All Rights Reserved.
4+
*/
5+
#ifndef __ASM_VDSO_GETRANDOM_H
6+
#define __ASM_VDSO_GETRANDOM_H
7+
8+
#ifndef __ASSEMBLY__
9+
10+
#include <asm/unistd.h>
11+
12+
static __always_inline ssize_t getrandom_syscall(void *_buffer, size_t _len, unsigned int _flags)
13+
{
14+
register long ret asm("a0");
15+
register long nr asm("a7") = __NR_getrandom;
16+
register void *buffer asm("a0") = _buffer;
17+
register size_t len asm("a1") = _len;
18+
register unsigned int flags asm("a2") = _flags;
19+
20+
asm volatile ("ecall\n"
21+
: "+r" (ret)
22+
: "r" (nr), "r" (buffer), "r" (len), "r" (flags)
23+
: "memory");
24+
25+
return ret;
26+
}
27+
28+
#endif /* !__ASSEMBLY__ */
29+
30+
#endif /* __ASM_VDSO_GETRANDOM_H */

arch/riscv/kernel/vdso/Makefile

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,17 @@ vdso-syms += flush_icache
1313
vdso-syms += hwprobe
1414
vdso-syms += sys_hwprobe
1515

16+
ifdef CONFIG_VDSO_GETRANDOM
17+
vdso-syms += getrandom
18+
endif
19+
1620
# Files to link into the vdso
1721
obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o
1822

23+
ifdef CONFIG_VDSO_GETRANDOM
24+
obj-vdso += vgetrandom-chacha.o
25+
endif
26+
1927
ccflags-y := -fno-stack-protector
2028
ccflags-y += -DDISABLE_BRANCH_PROFILING
2129
ccflags-y += -fno-builtin
@@ -24,6 +32,10 @@ ifneq ($(c-gettimeofday-y),)
2432
CFLAGS_vgettimeofday.o += -fPIC -include $(c-gettimeofday-y)
2533
endif
2634

35+
ifneq ($(c-getrandom-y),)
36+
CFLAGS_getrandom.o += -fPIC -include $(c-getrandom-y)
37+
endif
38+
2739
CFLAGS_hwprobe.o += -fPIC
2840

2941
# Build rules
@@ -38,6 +50,7 @@ endif
3850

3951
# Disable -pg to prevent insert call site
4052
CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS)
53+
CFLAGS_REMOVE_getrandom.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS)
4154
CFLAGS_REMOVE_hwprobe.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS)
4255

4356
# Force dependency

arch/riscv/kernel/vdso/getrandom.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
// SPDX-License-Identifier: GPL-2.0-only
2+
/*
3+
* Copyright (C) 2025 Xi Ruoyao <[email protected]>. All Rights Reserved.
4+
*/
5+
#include <linux/types.h>
6+
7+
ssize_t __vdso_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len)
8+
{
9+
return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len);
10+
}

arch/riscv/kernel/vdso/vdso.lds.S

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,9 @@ VERSION
7979
__vdso_flush_icache;
8080
#ifndef COMPAT_VDSO
8181
__vdso_riscv_hwprobe;
82+
#endif
83+
#if defined(CONFIG_VDSO_GETRANDOM) && !defined(COMPAT_VDSO)
84+
__vdso_getrandom;
8285
#endif
8386
local: *;
8487
};
Lines changed: 249 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,249 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
/*
3+
* Copyright (C) 2025 Xi Ruoyao <[email protected]>. All Rights Reserved.
4+
*
5+
* Based on arch/loongarch/vdso/vgetrandom-chacha.S.
6+
*/
7+
8+
#include <asm/asm.h>
9+
#include <linux/linkage.h>
10+
11+
.text
12+
13+
.macro ROTRI rd rs imm
14+
slliw t0, \rs, 32 - \imm
15+
srliw \rd, \rs, \imm
16+
or \rd, \rd, t0
17+
.endm
18+
19+
.macro OP_4REG op d0 d1 d2 d3 s0 s1 s2 s3
20+
\op \d0, \d0, \s0
21+
\op \d1, \d1, \s1
22+
\op \d2, \d2, \s2
23+
\op \d3, \d3, \s3
24+
.endm
25+
26+
/*
27+
* a0: output bytes
28+
* a1: 32-byte key input
29+
* a2: 8-byte counter input/output
30+
* a3: number of 64-byte blocks to write to output
31+
*/
32+
SYM_FUNC_START(__arch_chacha20_blocks_nostack)
33+
34+
#define output a0
35+
#define key a1
36+
#define counter a2
37+
#define nblocks a3
38+
#define i a4
39+
#define state0 s0
40+
#define state1 s1
41+
#define state2 s2
42+
#define state3 s3
43+
#define state4 s4
44+
#define state5 s5
45+
#define state6 s6
46+
#define state7 s7
47+
#define state8 s8
48+
#define state9 s9
49+
#define state10 s10
50+
#define state11 s11
51+
#define state12 a5
52+
#define state13 a6
53+
#define state14 a7
54+
#define state15 t1
55+
#define cnt t2
56+
#define copy0 t3
57+
#define copy1 t4
58+
#define copy2 t5
59+
#define copy3 t6
60+
61+
/* Packs to be used with OP_4REG */
62+
#define line0 state0, state1, state2, state3
63+
#define line1 state4, state5, state6, state7
64+
#define line2 state8, state9, state10, state11
65+
#define line3 state12, state13, state14, state15
66+
67+
#define line1_perm state5, state6, state7, state4
68+
#define line2_perm state10, state11, state8, state9
69+
#define line3_perm state15, state12, state13, state14
70+
71+
#define copy copy0, copy1, copy2, copy3
72+
73+
#define _16 16, 16, 16, 16
74+
#define _20 20, 20, 20, 20
75+
#define _24 24, 24, 24, 24
76+
#define _25 25, 25, 25, 25
77+
78+
/*
79+
* The ABI requires s0-s9 saved.
80+
* This does not violate the stack-less requirement: no sensitive data
81+
* is spilled onto the stack.
82+
*/
83+
addi sp, sp, -12*SZREG
84+
REG_S s0, (sp)
85+
REG_S s1, SZREG(sp)
86+
REG_S s2, 2*SZREG(sp)
87+
REG_S s3, 3*SZREG(sp)
88+
REG_S s4, 4*SZREG(sp)
89+
REG_S s5, 5*SZREG(sp)
90+
REG_S s6, 6*SZREG(sp)
91+
REG_S s7, 7*SZREG(sp)
92+
REG_S s8, 8*SZREG(sp)
93+
REG_S s9, 9*SZREG(sp)
94+
REG_S s10, 10*SZREG(sp)
95+
REG_S s11, 11*SZREG(sp)
96+
97+
ld cnt, (counter)
98+
99+
li copy0, 0x61707865
100+
li copy1, 0x3320646e
101+
li copy2, 0x79622d32
102+
li copy3, 0x6b206574
103+
104+
.Lblock:
105+
/* state[0,1,2,3] = "expand 32-byte k" */
106+
mv state0, copy0
107+
mv state1, copy1
108+
mv state2, copy2
109+
mv state3, copy3
110+
111+
/* state[4,5,..,11] = key */
112+
lw state4, (key)
113+
lw state5, 4(key)
114+
lw state6, 8(key)
115+
lw state7, 12(key)
116+
lw state8, 16(key)
117+
lw state9, 20(key)
118+
lw state10, 24(key)
119+
lw state11, 28(key)
120+
121+
/* state[12,13] = counter */
122+
mv state12, cnt
123+
srli state13, cnt, 32
124+
125+
/* state[14,15] = 0 */
126+
mv state14, zero
127+
mv state15, zero
128+
129+
li i, 10
130+
.Lpermute:
131+
/* odd round */
132+
OP_4REG addw line0, line1
133+
OP_4REG xor line3, line0
134+
OP_4REG ROTRI line3, _16
135+
136+
OP_4REG addw line2, line3
137+
OP_4REG xor line1, line2
138+
OP_4REG ROTRI line1, _20
139+
140+
OP_4REG addw line0, line1
141+
OP_4REG xor line3, line0
142+
OP_4REG ROTRI line3, _24
143+
144+
OP_4REG addw line2, line3
145+
OP_4REG xor line1, line2
146+
OP_4REG ROTRI line1, _25
147+
148+
/* even round */
149+
OP_4REG addw line0, line1_perm
150+
OP_4REG xor line3_perm, line0
151+
OP_4REG ROTRI line3_perm, _16
152+
153+
OP_4REG addw line2_perm, line3_perm
154+
OP_4REG xor line1_perm, line2_perm
155+
OP_4REG ROTRI line1_perm, _20
156+
157+
OP_4REG addw line0, line1_perm
158+
OP_4REG xor line3_perm, line0
159+
OP_4REG ROTRI line3_perm, _24
160+
161+
OP_4REG addw line2_perm, line3_perm
162+
OP_4REG xor line1_perm, line2_perm
163+
OP_4REG ROTRI line1_perm, _25
164+
165+
addi i, i, -1
166+
bnez i, .Lpermute
167+
168+
/* output[0,1,2,3] = copy[0,1,2,3] + state[0,1,2,3] */
169+
OP_4REG addw line0, copy
170+
sw state0, (output)
171+
sw state1, 4(output)
172+
sw state2, 8(output)
173+
sw state3, 12(output)
174+
175+
/* from now on state[0,1,2,3] are scratch registers */
176+
177+
/* state[0,1,2,3] = lo(key) */
178+
lw state0, (key)
179+
lw state1, 4(key)
180+
lw state2, 8(key)
181+
lw state3, 12(key)
182+
183+
/* output[4,5,6,7] = state[0,1,2,3] + state[4,5,6,7] */
184+
OP_4REG addw line1, line0
185+
sw state4, 16(output)
186+
sw state5, 20(output)
187+
sw state6, 24(output)
188+
sw state7, 28(output)
189+
190+
/* state[0,1,2,3] = hi(key) */
191+
lw state0, 16(key)
192+
lw state1, 20(key)
193+
lw state2, 24(key)
194+
lw state3, 28(key)
195+
196+
/* output[8,9,10,11] = tmp[0,1,2,3] + state[8,9,10,11] */
197+
OP_4REG addw line2, line0
198+
sw state8, 32(output)
199+
sw state9, 36(output)
200+
sw state10, 40(output)
201+
sw state11, 44(output)
202+
203+
/* output[12,13,14,15] = state[12,13,14,15] + [cnt_lo, cnt_hi, 0, 0] */
204+
addw state12, state12, cnt
205+
srli state0, cnt, 32
206+
addw state13, state13, state0
207+
sw state12, 48(output)
208+
sw state13, 52(output)
209+
sw state14, 56(output)
210+
sw state15, 60(output)
211+
212+
/* ++counter */
213+
addi cnt, cnt, 1
214+
215+
/* output += 64 */
216+
addi output, output, 64
217+
/* --nblocks */
218+
addi nblocks, nblocks, -1
219+
bnez nblocks, .Lblock
220+
221+
/* counter = [cnt_lo, cnt_hi] */
222+
sd cnt, (counter)
223+
224+
/* Zero out the potentially sensitive regs, in case nothing uses these
225+
* again. As at now copy[0,1,2,3] just contains "expand 32-byte k" and
226+
* state[0,...,11] are s0-s11 those we'll restore in the epilogue, we
227+
* only need to zero state[12,...,15].
228+
*/
229+
mv state12, zero
230+
mv state13, zero
231+
mv state14, zero
232+
mv state15, zero
233+
234+
REG_L s0, (sp)
235+
REG_L s1, SZREG(sp)
236+
REG_L s2, 2*SZREG(sp)
237+
REG_L s3, 3*SZREG(sp)
238+
REG_L s4, 4*SZREG(sp)
239+
REG_L s5, 5*SZREG(sp)
240+
REG_L s6, 6*SZREG(sp)
241+
REG_L s7, 7*SZREG(sp)
242+
REG_L s8, 8*SZREG(sp)
243+
REG_L s9, 9*SZREG(sp)
244+
REG_L s10, 10*SZREG(sp)
245+
REG_L s11, 11*SZREG(sp)
246+
addi sp, sp, 12*SZREG
247+
248+
ret
249+
SYM_FUNC_END(__arch_chacha20_blocks_nostack)

tools/testing/selftests/vDSO/vgetrandom-chacha.S

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@
1111
#include "../../../../arch/loongarch/vdso/vgetrandom-chacha.S"
1212
#elif defined(__powerpc__) || defined(__powerpc64__)
1313
#include "../../../../arch/powerpc/kernel/vdso/vgetrandom-chacha.S"
14+
#elif defined(__riscv) && __riscv_xlen == 64
15+
#include "../../../../arch/riscv/kernel/vdso/vgetrandom-chacha.S"
1416
#elif defined(__s390x__)
1517
#include "../../../../arch/s390/kernel/vdso64/vgetrandom-chacha.S"
1618
#elif defined(__x86_64__)

0 commit comments

Comments
 (0)