Skip to content

Commit 34e1a5d

Browse files
committed
Merge tag 'random-6.12-rc1-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/crng/random
Pull more random number generator updates from Jason Donenfeld: - Christophe realized that the LoongArch64 instructions could be scheduled more similar to how GCC generates code, which Ruoyao implemented, for a 5% speedup from basically some rearrangements - An update to MAINTAINERS to match the right files * tag 'random-6.12-rc1-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/crng/random: LoongArch: vDSO: Tune chacha implementation MAINTAINERS: make vDSO getrandom matches more generic
2 parents 9c44575 + 9805f39 commit 34e1a5d

File tree

2 files changed

+56
-41
lines changed

2 files changed

+56
-41
lines changed

MAINTAINERS

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19344,10 +19344,7 @@ F: drivers/char/random.c
1934419344
F: include/linux/random.h
1934519345
F: include/uapi/linux/random.h
1934619346
F: drivers/virt/vmgenid.c
19347-
F: include/vdso/getrandom.h
19348-
F: lib/vdso/getrandom.c
19349-
F: arch/x86/entry/vdso/vgetrandom*
19350-
F: arch/x86/include/asm/vdso/getrandom*
19347+
N: ^.*/vdso/[^/]*getrandom[^/]+$
1935119348

1935219349
RAPIDIO SUBSYSTEM
1935319350
M: Matt Porter <[email protected]>

arch/loongarch/vdso/vgetrandom-chacha.S

Lines changed: 55 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -9,23 +9,11 @@
99

1010
.text
1111

12-
/* Salsa20 quarter-round */
13-
.macro QR a b c d
14-
add.w \a, \a, \b
15-
xor \d, \d, \a
16-
rotri.w \d, \d, 16
17-
18-
add.w \c, \c, \d
19-
xor \b, \b, \c
20-
rotri.w \b, \b, 20
21-
22-
add.w \a, \a, \b
23-
xor \d, \d, \a
24-
rotri.w \d, \d, 24
25-
26-
add.w \c, \c, \d
27-
xor \b, \b, \c
28-
rotri.w \b, \b, 25
12+
.macro OP_4REG op d0 d1 d2 d3 s0 s1 s2 s3
13+
\op \d0, \d0, \s0
14+
\op \d1, \d1, \s1
15+
\op \d2, \d2, \s2
16+
\op \d3, \d3, \s3
2917
.endm
3018

3119
/*
@@ -74,6 +62,23 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack)
7462
/* Reuse i as copy3 */
7563
#define copy3 i
7664

65+
/* Packs to be used with OP_4REG */
66+
#define line0 state0, state1, state2, state3
67+
#define line1 state4, state5, state6, state7
68+
#define line2 state8, state9, state10, state11
69+
#define line3 state12, state13, state14, state15
70+
71+
#define line1_perm state5, state6, state7, state4
72+
#define line2_perm state10, state11, state8, state9
73+
#define line3_perm state15, state12, state13, state14
74+
75+
#define copy copy0, copy1, copy2, copy3
76+
77+
#define _16 16, 16, 16, 16
78+
#define _20 20, 20, 20, 20
79+
#define _24 24, 24, 24, 24
80+
#define _25 25, 25, 25, 25
81+
7782
/*
7883
* The ABI requires s0-s9 saved, and sp aligned to 16-byte.
7984
* This does not violate the stack-less requirement: no sensitive data
@@ -126,16 +131,38 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack)
126131
li.w i, 10
127132
.Lpermute:
128133
/* odd round */
129-
QR state0, state4, state8, state12
130-
QR state1, state5, state9, state13
131-
QR state2, state6, state10, state14
132-
QR state3, state7, state11, state15
134+
OP_4REG add.w line0, line1
135+
OP_4REG xor line3, line0
136+
OP_4REG rotri.w line3, _16
137+
138+
OP_4REG add.w line2, line3
139+
OP_4REG xor line1, line2
140+
OP_4REG rotri.w line1, _20
141+
142+
OP_4REG add.w line0, line1
143+
OP_4REG xor line3, line0
144+
OP_4REG rotri.w line3, _24
145+
146+
OP_4REG add.w line2, line3
147+
OP_4REG xor line1, line2
148+
OP_4REG rotri.w line1, _25
133149

134150
/* even round */
135-
QR state0, state5, state10, state15
136-
QR state1, state6, state11, state12
137-
QR state2, state7, state8, state13
138-
QR state3, state4, state9, state14
151+
OP_4REG add.w line0, line1_perm
152+
OP_4REG xor line3_perm, line0
153+
OP_4REG rotri.w line3_perm, _16
154+
155+
OP_4REG add.w line2_perm, line3_perm
156+
OP_4REG xor line1_perm, line2_perm
157+
OP_4REG rotri.w line1_perm, _20
158+
159+
OP_4REG add.w line0, line1_perm
160+
OP_4REG xor line3_perm, line0
161+
OP_4REG rotri.w line3_perm, _24
162+
163+
OP_4REG add.w line2_perm, line3_perm
164+
OP_4REG xor line1_perm, line2_perm
165+
OP_4REG rotri.w line1_perm, _25
139166

140167
addi.w i, i, -1
141168
bnez i, .Lpermute
@@ -147,10 +174,7 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack)
147174
li.w copy3, 0x6b206574
148175

149176
/* output[0,1,2,3] = copy[0,1,2,3] + state[0,1,2,3] */
150-
add.w state0, state0, copy0
151-
add.w state1, state1, copy1
152-
add.w state2, state2, copy2
153-
add.w state3, state3, copy3
177+
OP_4REG add.w line0, copy
154178
st.w state0, output, 0
155179
st.w state1, output, 4
156180
st.w state2, output, 8
@@ -165,10 +189,7 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack)
165189
ld.w state3, key, 12
166190

167191
/* output[4,5,6,7] = state[0,1,2,3] + state[4,5,6,7] */
168-
add.w state4, state4, state0
169-
add.w state5, state5, state1
170-
add.w state6, state6, state2
171-
add.w state7, state7, state3
192+
OP_4REG add.w line1, line0
172193
st.w state4, output, 16
173194
st.w state5, output, 20
174195
st.w state6, output, 24
@@ -181,10 +202,7 @@ SYM_FUNC_START(__arch_chacha20_blocks_nostack)
181202
ld.w state3, key, 28
182203

183204
/* output[8,9,10,11] = state[0,1,2,3] + state[8,9,10,11] */
184-
add.w state8, state8, state0
185-
add.w state9, state9, state1
186-
add.w state10, state10, state2
187-
add.w state11, state11, state3
205+
OP_4REG add.w line2, line0
188206
st.w state8, output, 32
189207
st.w state9, output, 36
190208
st.w state10, output, 40

0 commit comments

Comments
 (0)