Skip to content

Commit c5674d0

Browse files
greentimepalmer-dabbelt
authored andcommitted
riscv: Add vector extension XOR implementation
This patch adds support for vector optimized XOR and it is tested in qemu. Co-developed-by: Han-Kuan Chen <[email protected]> Signed-off-by: Han-Kuan Chen <[email protected]> Signed-off-by: Greentime Hu <[email protected]> Signed-off-by: Andy Chiu <[email protected]> Tested-by: Björn Töpel <[email protected]> Tested-by: Lad Prabhakar <[email protected]> Link: https://lore.kernel.org/r/[email protected] Signed-off-by: Palmer Dabbelt <[email protected]>
1 parent 956895b commit c5674d0

File tree

4 files changed

+168
-0
lines changed

4 files changed

+168
-0
lines changed

arch/riscv/include/asm/asm-prototypes.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,24 @@ long long __lshrti3(long long a, int b);
99
long long __ashrti3(long long a, int b);
1010
long long __ashlti3(long long a, int b);
1111

12+
#ifdef CONFIG_RISCV_ISA_V
13+
14+
void xor_regs_2_(unsigned long bytes, unsigned long *__restrict p1,
15+
const unsigned long *__restrict p2);
16+
void xor_regs_3_(unsigned long bytes, unsigned long *__restrict p1,
17+
const unsigned long *__restrict p2,
18+
const unsigned long *__restrict p3);
19+
void xor_regs_4_(unsigned long bytes, unsigned long *__restrict p1,
20+
const unsigned long *__restrict p2,
21+
const unsigned long *__restrict p3,
22+
const unsigned long *__restrict p4);
23+
void xor_regs_5_(unsigned long bytes, unsigned long *__restrict p1,
24+
const unsigned long *__restrict p2,
25+
const unsigned long *__restrict p3,
26+
const unsigned long *__restrict p4,
27+
const unsigned long *__restrict p5);
28+
29+
#endif /* CONFIG_RISCV_ISA_V */
1230

1331
#define DECLARE_DO_ERROR_INFO(name) asmlinkage void name(struct pt_regs *regs)
1432

arch/riscv/include/asm/xor.h

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
/* SPDX-License-Identifier: GPL-2.0-or-later */
2+
/*
3+
* Copyright (C) 2021 SiFive
4+
*/
5+
6+
#include <linux/hardirq.h>
7+
#include <asm-generic/xor.h>
8+
#ifdef CONFIG_RISCV_ISA_V
9+
#include <asm/vector.h>
10+
#include <asm/switch_to.h>
11+
#include <asm/asm-prototypes.h>
12+
13+
static void xor_vector_2(unsigned long bytes, unsigned long *__restrict p1,
14+
const unsigned long *__restrict p2)
15+
{
16+
kernel_vector_begin();
17+
xor_regs_2_(bytes, p1, p2);
18+
kernel_vector_end();
19+
}
20+
21+
static void xor_vector_3(unsigned long bytes, unsigned long *__restrict p1,
22+
const unsigned long *__restrict p2,
23+
const unsigned long *__restrict p3)
24+
{
25+
kernel_vector_begin();
26+
xor_regs_3_(bytes, p1, p2, p3);
27+
kernel_vector_end();
28+
}
29+
30+
static void xor_vector_4(unsigned long bytes, unsigned long *__restrict p1,
31+
const unsigned long *__restrict p2,
32+
const unsigned long *__restrict p3,
33+
const unsigned long *__restrict p4)
34+
{
35+
kernel_vector_begin();
36+
xor_regs_4_(bytes, p1, p2, p3, p4);
37+
kernel_vector_end();
38+
}
39+
40+
static void xor_vector_5(unsigned long bytes, unsigned long *__restrict p1,
41+
const unsigned long *__restrict p2,
42+
const unsigned long *__restrict p3,
43+
const unsigned long *__restrict p4,
44+
const unsigned long *__restrict p5)
45+
{
46+
kernel_vector_begin();
47+
xor_regs_5_(bytes, p1, p2, p3, p4, p5);
48+
kernel_vector_end();
49+
}
50+
51+
static struct xor_block_template xor_block_rvv = {
52+
.name = "rvv",
53+
.do_2 = xor_vector_2,
54+
.do_3 = xor_vector_3,
55+
.do_4 = xor_vector_4,
56+
.do_5 = xor_vector_5
57+
};
58+
59+
#undef XOR_TRY_TEMPLATES
60+
#define XOR_TRY_TEMPLATES \
61+
do { \
62+
xor_speed(&xor_block_8regs); \
63+
xor_speed(&xor_block_32regs); \
64+
if (has_vector()) { \
65+
xor_speed(&xor_block_rvv);\
66+
} \
67+
} while (0)
68+
#endif

arch/riscv/lib/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,4 @@ lib-$(CONFIG_64BIT) += tishift.o
1111
lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o
1212

1313
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
14+
lib-$(CONFIG_RISCV_ISA_V) += xor.o

arch/riscv/lib/xor.S

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
/* SPDX-License-Identifier: GPL-2.0-or-later */
2+
/*
3+
* Copyright (C) 2021 SiFive
4+
*/
5+
#include <linux/linkage.h>
6+
#include <linux/export.h>
7+
#include <asm/asm.h>
8+
9+
SYM_FUNC_START(xor_regs_2_)
10+
vsetvli a3, a0, e8, m8, ta, ma
11+
vle8.v v0, (a1)
12+
vle8.v v8, (a2)
13+
sub a0, a0, a3
14+
vxor.vv v16, v0, v8
15+
add a2, a2, a3
16+
vse8.v v16, (a1)
17+
add a1, a1, a3
18+
bnez a0, xor_regs_2_
19+
ret
20+
SYM_FUNC_END(xor_regs_2_)
21+
EXPORT_SYMBOL(xor_regs_2_)
22+
23+
SYM_FUNC_START(xor_regs_3_)
24+
vsetvli a4, a0, e8, m8, ta, ma
25+
vle8.v v0, (a1)
26+
vle8.v v8, (a2)
27+
sub a0, a0, a4
28+
vxor.vv v0, v0, v8
29+
vle8.v v16, (a3)
30+
add a2, a2, a4
31+
vxor.vv v16, v0, v16
32+
add a3, a3, a4
33+
vse8.v v16, (a1)
34+
add a1, a1, a4
35+
bnez a0, xor_regs_3_
36+
ret
37+
SYM_FUNC_END(xor_regs_3_)
38+
EXPORT_SYMBOL(xor_regs_3_)
39+
40+
SYM_FUNC_START(xor_regs_4_)
41+
vsetvli a5, a0, e8, m8, ta, ma
42+
vle8.v v0, (a1)
43+
vle8.v v8, (a2)
44+
sub a0, a0, a5
45+
vxor.vv v0, v0, v8
46+
vle8.v v16, (a3)
47+
add a2, a2, a5
48+
vxor.vv v0, v0, v16
49+
vle8.v v24, (a4)
50+
add a3, a3, a5
51+
vxor.vv v16, v0, v24
52+
add a4, a4, a5
53+
vse8.v v16, (a1)
54+
add a1, a1, a5
55+
bnez a0, xor_regs_4_
56+
ret
57+
SYM_FUNC_END(xor_regs_4_)
58+
EXPORT_SYMBOL(xor_regs_4_)
59+
60+
SYM_FUNC_START(xor_regs_5_)
61+
vsetvli a6, a0, e8, m8, ta, ma
62+
vle8.v v0, (a1)
63+
vle8.v v8, (a2)
64+
sub a0, a0, a6
65+
vxor.vv v0, v0, v8
66+
vle8.v v16, (a3)
67+
add a2, a2, a6
68+
vxor.vv v0, v0, v16
69+
vle8.v v24, (a4)
70+
add a3, a3, a6
71+
vxor.vv v0, v0, v24
72+
vle8.v v8, (a5)
73+
add a4, a4, a6
74+
vxor.vv v16, v0, v8
75+
add a5, a5, a6
76+
vse8.v v16, (a1)
77+
add a1, a1, a6
78+
bnez a0, xor_regs_5_
79+
ret
80+
SYM_FUNC_END(xor_regs_5_)
81+
EXPORT_SYMBOL(xor_regs_5_)

0 commit comments

Comments
 (0)