Skip to content

Commit 712676e

Browse files
zatrazzzx2c4
authored andcommitted
arm64: vDSO: Wire up getrandom() vDSO implementation
Hook up the generic vDSO implementation to the aarch64 vDSO data page. The _vdso_rng_data required data is placed within the _vdso_data vvar page, by using a offset larger than the vdso_data. The vDSO function requires a ChaCha20 implementation that does not write to the stack, and that can do an entire ChaCha20 permutation. The one provided uses NEON on the permute operation, with a fallback to the syscall for chips that do not support AdvSIMD. This also passes the vdso_test_chacha test along with vdso_test_getrandom. The vdso_test_getrandom bench-single result on Neoverse-N1 shows: vdso: 25000000 times in 0.783884250 seconds libc: 25000000 times in 8.780275399 seconds syscall: 25000000 times in 8.786581518 seconds A small fixup to arch/arm64/include/asm/mman.h was required to avoid pulling kernel code into the vDSO, similar to what's already done in arch/arm64/include/asm/rwonce.h. Signed-off-by: Adhemerval Zanella <[email protected]> Reviewed-by: Ard Biesheuvel <[email protected]> Acked-by: Will Deacon <[email protected]> Signed-off-by: Jason A. Donenfeld <[email protected]>
1 parent 2c2ca34 commit 712676e

File tree

12 files changed

+286
-16
lines changed

12 files changed

+286
-16
lines changed

arch/arm64/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -262,6 +262,7 @@ config ARM64
262262
select TRACE_IRQFLAGS_NMI_SUPPORT
263263
select HAVE_SOFTIRQ_ON_OWN_STACK
264264
select USER_STACKTRACE_SUPPORT
265+
select VDSO_GETRANDOM
265266
help
266267
ARM 64-bit (AArch64) Linux support.
267268

arch/arm64/include/asm/mman.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,11 @@
22
#ifndef __ASM_MMAN_H__
33
#define __ASM_MMAN_H__
44

5+
#include <uapi/asm/mman.h>
6+
7+
#ifndef BUILD_VDSO
58
#include <linux/compiler.h>
69
#include <linux/types.h>
7-
#include <uapi/asm/mman.h>
810

911
static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
1012
unsigned long pkey __always_unused)
@@ -60,4 +62,6 @@ static inline bool arch_validate_flags(unsigned long vm_flags)
6062
}
6163
#define arch_validate_flags(vm_flags) arch_validate_flags(vm_flags)
6264

65+
#endif /* !BUILD_VDSO */
66+
6367
#endif /* ! __ASM_MMAN_H__ */
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
/* SPDX-License-Identifier: GPL-2.0 */
2+
3+
#ifndef __ASM_VDSO_GETRANDOM_H
4+
#define __ASM_VDSO_GETRANDOM_H
5+
6+
#ifndef __ASSEMBLY__
7+
8+
#include <asm/unistd.h>
9+
#include <asm/vdso/vsyscall.h>
10+
#include <vdso/datapage.h>
11+
12+
/**
13+
* getrandom_syscall - Invoke the getrandom() syscall.
14+
* @buffer: Destination buffer to fill with random bytes.
15+
* @len: Size of @buffer in bytes.
16+
* @flags: Zero or more GRND_* flags.
17+
* Returns: The number of random bytes written to @buffer, or a negative value indicating an error.
18+
*/
19+
static __always_inline ssize_t getrandom_syscall(void *_buffer, size_t _len, unsigned int _flags)
20+
{
21+
register void *buffer asm ("x0") = _buffer;
22+
register size_t len asm ("x1") = _len;
23+
register unsigned int flags asm ("x2") = _flags;
24+
register long ret asm ("x0");
25+
register long nr asm ("x8") = __NR_getrandom;
26+
27+
asm volatile(
28+
" svc #0\n"
29+
: "=r" (ret)
30+
: "r" (buffer), "r" (len), "r" (flags), "r" (nr)
31+
: "memory");
32+
33+
return ret;
34+
}
35+
36+
static __always_inline const struct vdso_rng_data *__arch_get_vdso_rng_data(void)
37+
{
38+
/*
39+
* The RNG data is in the real VVAR data page, but if a task belongs to a time namespace
40+
* then VVAR_DATA_PAGE_OFFSET points to the namespace-specific VVAR page and VVAR_TIMENS_
41+
* PAGE_OFFSET points to the real VVAR page.
42+
*/
43+
if (IS_ENABLED(CONFIG_TIME_NS) && _vdso_data->clock_mode == VDSO_CLOCKMODE_TIMENS)
44+
return (void *)&_vdso_rng_data + VVAR_TIMENS_PAGE_OFFSET * (1UL << CONFIG_PAGE_SHIFT);
45+
return &_vdso_rng_data;
46+
}
47+
48+
#endif /* !__ASSEMBLY__ */
49+
50+
#endif /* __ASM_VDSO_GETRANDOM_H */

arch/arm64/include/asm/vdso/vsyscall.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,19 @@
22
#ifndef __ASM_VDSO_VSYSCALL_H
33
#define __ASM_VDSO_VSYSCALL_H
44

5+
#define __VDSO_RND_DATA_OFFSET 480
6+
57
#ifndef __ASSEMBLY__
68

79
#include <linux/timekeeper_internal.h>
810
#include <vdso/datapage.h>
911

12+
enum vvar_pages {
13+
VVAR_DATA_PAGE_OFFSET,
14+
VVAR_TIMENS_PAGE_OFFSET,
15+
VVAR_NR_PAGES,
16+
};
17+
1018
#define VDSO_PRECISION_MASK ~(0xFF00ULL<<48)
1119

1220
extern struct vdso_data *vdso_data;
@@ -21,6 +29,13 @@ struct vdso_data *__arm64_get_k_vdso_data(void)
2129
}
2230
#define __arch_get_k_vdso_data __arm64_get_k_vdso_data
2331

32+
static __always_inline
33+
struct vdso_rng_data *__arm64_get_k_vdso_rnd_data(void)
34+
{
35+
return (void *)vdso_data + __VDSO_RND_DATA_OFFSET;
36+
}
37+
#define __arch_get_k_vdso_rng_data __arm64_get_k_vdso_rnd_data
38+
2439
static __always_inline
2540
void __arm64_update_vsyscall(struct vdso_data *vdata, struct timekeeper *tk)
2641
{

arch/arm64/kernel/vdso.c

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,12 +34,6 @@ enum vdso_abi {
3434
VDSO_ABI_AA32,
3535
};
3636

37-
enum vvar_pages {
38-
VVAR_DATA_PAGE_OFFSET,
39-
VVAR_TIMENS_PAGE_OFFSET,
40-
VVAR_NR_PAGES,
41-
};
42-
4337
struct vdso_abi_info {
4438
const char *name;
4539
const char *vdso_code_start;

arch/arm64/kernel/vdso/Makefile

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
# Include the generic Makefile to check the built vdso.
1010
include $(srctree)/lib/vdso/Makefile
1111

12-
obj-vdso := vgettimeofday.o note.o sigreturn.o
12+
obj-vdso := vgettimeofday.o note.o sigreturn.o vgetrandom.o vgetrandom-chacha.o
1313

1414
# Build rules
1515
targets := $(obj-vdso) vdso.so vdso.so.dbg
@@ -34,19 +34,28 @@ ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18
3434
ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO
3535

3636
# -Wmissing-prototypes and -Wmissing-declarations are removed from
37-
# the CFLAGS of vgettimeofday.c to make possible to build the
38-
# kernel with CONFIG_WERROR enabled.
39-
CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) \
40-
$(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) \
41-
$(CC_FLAGS_LTO) $(CC_FLAGS_CFI) \
42-
-Wmissing-prototypes -Wmissing-declarations
37+
# the CFLAGS to make possible to build the kernel with CONFIG_WERROR enabled.
38+
CC_FLAGS_REMOVE_VDSO := $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) \
39+
$(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) \
40+
$(CC_FLAGS_LTO) $(CC_FLAGS_CFI) \
41+
-Wmissing-prototypes -Wmissing-declarations
4342

44-
CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny -fasynchronous-unwind-tables
43+
CC_FLAGS_ADD_VDSO := -O2 -mcmodel=tiny -fasynchronous-unwind-tables
44+
45+
CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_REMOVE_VDSO)
46+
CFLAGS_REMOVE_vgetrandom.o = $(CC_FLAGS_REMOVE_VDSO)
47+
48+
CFLAGS_vgettimeofday.o = $(CC_FLAGS_ADD_VDSO)
49+
CFLAGS_vgetrandom.o = $(CC_FLAGS_ADD_VDSO)
4550

4651
ifneq ($(c-gettimeofday-y),)
4752
CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y)
4853
endif
4954

55+
ifneq ($(c-getrandom-y),)
56+
CFLAGS_vgetrandom.o += -include $(c-getrandom-y)
57+
endif
58+
5059
targets += vdso.lds
5160
CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
5261

arch/arm64/kernel/vdso/vdso.lds.S

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,14 +11,17 @@
1111
#include <linux/const.h>
1212
#include <asm/page.h>
1313
#include <asm/vdso.h>
14+
#include <asm/vdso/vsyscall.h>
1415
#include <asm-generic/vmlinux.lds.h>
16+
#include <vdso/datapage.h>
1517

1618
OUTPUT_FORMAT("elf64-littleaarch64", "elf64-bigaarch64", "elf64-littleaarch64")
1719
OUTPUT_ARCH(aarch64)
1820

1921
SECTIONS
2022
{
2123
PROVIDE(_vdso_data = . - __VVAR_PAGES * PAGE_SIZE);
24+
PROVIDE(_vdso_rng_data = _vdso_data + __VDSO_RND_DATA_OFFSET);
2225
#ifdef CONFIG_TIME_NS
2326
PROVIDE(_timens_data = _vdso_data + PAGE_SIZE);
2427
#endif
@@ -102,6 +105,7 @@ VERSION
102105
__kernel_gettimeofday;
103106
__kernel_clock_gettime;
104107
__kernel_clock_getres;
108+
__kernel_getrandom;
105109
local: *;
106110
};
107111
}
Lines changed: 172 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,172 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
3+
#include <linux/linkage.h>
4+
#include <asm/cache.h>
5+
#include <asm/assembler.h>
6+
7+
.text
8+
9+
#define state0 v0
10+
#define state1 v1
11+
#define state2 v2
12+
#define state3 v3
13+
#define copy0 v4
14+
#define copy0_q q4
15+
#define copy1 v5
16+
#define copy2 v6
17+
#define copy3 v7
18+
#define copy3_d d7
19+
#define one_d d16
20+
#define one_q q16
21+
#define one_v v16
22+
#define tmp v17
23+
#define rot8 v18
24+
25+
/*
26+
* ARM64 ChaCha20 implementation meant for vDSO. Produces a given positive
27+
* number of blocks of output with nonce 0, taking an input key and 8-bytes
28+
* counter. Importantly does not spill to the stack.
29+
*
30+
* This implementation avoids d8-d15 because they are callee-save in user
31+
* space.
32+
*
33+
* void __arch_chacha20_blocks_nostack(uint8_t *dst_bytes,
34+
* const uint8_t *key,
35+
* uint32_t *counter,
36+
* size_t nblocks)
37+
*
38+
* x0: output bytes
39+
* x1: 32-byte key input
40+
* x2: 8-byte counter input/output
41+
* x3: number of 64-byte block to write to output
42+
*/
43+
SYM_FUNC_START(__arch_chacha20_blocks_nostack)
44+
45+
/* copy0 = "expand 32-byte k" */
46+
mov_q x8, 0x3320646e61707865
47+
mov_q x9, 0x6b20657479622d32
48+
mov copy0.d[0], x8
49+
mov copy0.d[1], x9
50+
51+
/* copy1,copy2 = key */
52+
ld1 { copy1.4s, copy2.4s }, [x1]
53+
/* copy3 = counter || zero nonce */
54+
ld1 { copy3.2s }, [x2]
55+
56+
movi one_v.2s, #1
57+
uzp1 one_v.4s, one_v.4s, one_v.4s
58+
59+
.Lblock:
60+
/* copy state to auxiliary vectors for the final add after the permute. */
61+
mov state0.16b, copy0.16b
62+
mov state1.16b, copy1.16b
63+
mov state2.16b, copy2.16b
64+
mov state3.16b, copy3.16b
65+
66+
mov w4, 20
67+
.Lpermute:
68+
/*
69+
* Permute one 64-byte block where the state matrix is stored in the four NEON
70+
* registers state0-state3. It performs matrix operations on four words in parallel,
71+
* but requires shuffling to rearrange the words after each round.
72+
*/
73+
74+
.Ldoubleround:
75+
/* state0 += state1, state3 = rotl32(state3 ^ state0, 16) */
76+
add state0.4s, state0.4s, state1.4s
77+
eor state3.16b, state3.16b, state0.16b
78+
rev32 state3.8h, state3.8h
79+
80+
/* state2 += state3, state1 = rotl32(state1 ^ state2, 12) */
81+
add state2.4s, state2.4s, state3.4s
82+
eor tmp.16b, state1.16b, state2.16b
83+
shl state1.4s, tmp.4s, #12
84+
sri state1.4s, tmp.4s, #20
85+
86+
/* state0 += state1, state3 = rotl32(state3 ^ state0, 8) */
87+
add state0.4s, state0.4s, state1.4s
88+
eor tmp.16b, state3.16b, state0.16b
89+
shl state3.4s, tmp.4s, #8
90+
sri state3.4s, tmp.4s, #24
91+
92+
/* state2 += state3, state1 = rotl32(state1 ^ state2, 7) */
93+
add state2.4s, state2.4s, state3.4s
94+
eor tmp.16b, state1.16b, state2.16b
95+
shl state1.4s, tmp.4s, #7
96+
sri state1.4s, tmp.4s, #25
97+
98+
/* state1[0,1,2,3] = state1[1,2,3,0] */
99+
ext state1.16b, state1.16b, state1.16b, #4
100+
/* state2[0,1,2,3] = state2[2,3,0,1] */
101+
ext state2.16b, state2.16b, state2.16b, #8
102+
/* state3[0,1,2,3] = state3[1,2,3,0] */
103+
ext state3.16b, state3.16b, state3.16b, #12
104+
105+
/* state0 += state1, state3 = rotl32(state3 ^ state0, 16) */
106+
add state0.4s, state0.4s, state1.4s
107+
eor state3.16b, state3.16b, state0.16b
108+
rev32 state3.8h, state3.8h
109+
110+
/* state2 += state3, state1 = rotl32(state1 ^ state2, 12) */
111+
add state2.4s, state2.4s, state3.4s
112+
eor tmp.16b, state1.16b, state2.16b
113+
shl state1.4s, tmp.4s, #12
114+
sri state1.4s, tmp.4s, #20
115+
116+
/* state0 += state1, state3 = rotl32(state3 ^ state0, 8) */
117+
add state0.4s, state0.4s, state1.4s
118+
eor tmp.16b, state3.16b, state0.16b
119+
shl state3.4s, tmp.4s, #8
120+
sri state3.4s, tmp.4s, #24
121+
122+
/* state2 += state3, state1 = rotl32(state1 ^ state2, 7) */
123+
add state2.4s, state2.4s, state3.4s
124+
eor tmp.16b, state1.16b, state2.16b
125+
shl state1.4s, tmp.4s, #7
126+
sri state1.4s, tmp.4s, #25
127+
128+
/* state1[0,1,2,3] = state1[3,0,1,2] */
129+
ext state1.16b, state1.16b, state1.16b, #12
130+
/* state2[0,1,2,3] = state2[2,3,0,1] */
131+
ext state2.16b, state2.16b, state2.16b, #8
132+
/* state3[0,1,2,3] = state3[1,2,3,0] */
133+
ext state3.16b, state3.16b, state3.16b, #4
134+
135+
subs w4, w4, #2
136+
b.ne .Ldoubleround
137+
138+
/* output0 = state0 + state0 */
139+
add state0.4s, state0.4s, copy0.4s
140+
/* output1 = state1 + state1 */
141+
add state1.4s, state1.4s, copy1.4s
142+
/* output2 = state2 + state2 */
143+
add state2.4s, state2.4s, copy2.4s
144+
/* output2 = state3 + state3 */
145+
add state3.4s, state3.4s, copy3.4s
146+
st1 { state0.16b - state3.16b }, [x0]
147+
148+
/*
149+
* ++copy3.counter, the 'add' clears the upper half of the SIMD register
150+
* which is the expected behaviour here.
151+
*/
152+
add copy3_d, copy3_d, one_d
153+
154+
/* output += 64, --nblocks */
155+
add x0, x0, 64
156+
subs x3, x3, #1
157+
b.ne .Lblock
158+
159+
/* counter = copy3.counter */
160+
st1 { copy3.2s }, [x2]
161+
162+
/* Zero out the potentially sensitive regs, in case nothing uses these again. */
163+
movi state0.16b, #0
164+
movi state1.16b, #0
165+
movi state2.16b, #0
166+
movi state3.16b, #0
167+
movi copy1.16b, #0
168+
movi copy2.16b, #0
169+
ret
170+
SYM_FUNC_END(__arch_chacha20_blocks_nostack)
171+
172+
emit_aarch64_feature_1_and

arch/arm64/kernel/vdso/vgetrandom.c

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
3+
#include <uapi/asm-generic/errno.h>
4+
5+
typeof(__cvdso_getrandom) __kernel_getrandom;
6+
7+
ssize_t __kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, size_t opaque_len)
8+
{
9+
if (alternative_has_cap_likely(ARM64_HAS_FPSIMD))
10+
return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len);
11+
12+
if (unlikely(opaque_len == ~0UL && !buffer && !len && !flags))
13+
return -ENOSYS;
14+
return getrandom_syscall(buffer, len, flags);
15+
}

tools/arch/arm64/vdso

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
../../../arch/arm64/kernel/vdso

0 commit comments

Comments
 (0)