Skip to content

Commit ba874ba

Browse files
sunyuechipablodelara
authored andcommitted
raid: R-V V pq_gen
banana_f3: new: pq_gen_warm: runtime = 3062397 usecs, bandwidth 4737 MB in 3.0624 sec = 1546.92 MB/s old: pq_gen_warm: runtime = 3005894 usecs, bandwidth 2851 MB in 3.0059 sec = 948.80 MB/s Signed-off-by: sunyuechi <[email protected]>
1 parent b725bdd commit ba874ba

File tree

5 files changed

+225
-1
lines changed

5 files changed

+225
-1
lines changed

raid/Makefile.am

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,12 @@
2929

3030
include raid/aarch64/Makefile.am
3131

32+
include raid/riscv64/Makefile.am
33+
3234
lsrc += raid/raid_base.c
3335

3436
lsrc_base_aliases += raid/raid_base_aliases.c
3537
lsrc_ppc64le += raid/raid_base_aliases.c
36-
lsrc_riscv64 += raid/raid_base_aliases.c
3738

3839
lsrc_x86_64 += \
3940
raid/xor_gen_sse.asm \

raid/riscv64/Makefile.am

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
########################################################################
2+
# Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS).
3+
#
4+
# Redistribution and use in source and binary forms, with or without
5+
# modification, are permitted provided that the following conditions
6+
# are met:
7+
# * Redistributions of source code must retain the above copyright
8+
# notice, this list of conditions and the following disclaimer.
9+
# * Redistributions in binary form must reproduce the above copyright
10+
# notice, this list of conditions and the following disclaimer in
11+
# the documentation and/or other materials provided with the
12+
# distribution.
13+
# * Neither the name of ISCAS nor the names of its
14+
# contributors may be used to endorse or promote products derived
15+
# from this software without specific prior written permission.
16+
#
17+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18+
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19+
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20+
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21+
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22+
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23+
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24+
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25+
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28+
########################################################################
29+
30+
lsrc_riscv64 += \
31+
raid/riscv64/raid_multibinary_riscv64_dispatcher.c \
32+
raid/riscv64/raid_multibinary_riscv64.S \
33+
raid/riscv64/raid_pq_gen_rvv.S
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
/**********************************************************************
2+
Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS).
3+
4+
Redistribution and use in source and binary forms, with or without
5+
modification, are permitted provided that the following conditions
6+
are met:
7+
* Redistributions of source code must retain the above copyright
8+
notice, this list of conditions and the following disclaimer.
9+
* Redistributions in binary form must reproduce the above copyright
10+
notice, this list of conditions and the following disclaimer in
11+
the documentation and/or other materials provided with the
12+
distribution.
13+
* Neither the name of ISCAS nor the names of its
14+
contributors may be used to endorse or promote products derived
15+
from this software without specific prior written permission.
16+
17+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18+
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20+
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21+
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22+
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23+
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28+
**********************************************************************/
29+
30+
#include "riscv64_multibinary.h"
31+
32+
#if HAVE_RVV
33+
mbin_interface pq_gen
34+
#else
35+
mbin_interface_base pq_gen pq_gen_base
36+
#endif
37+
38+
mbin_interface_base pq_check pq_check_base
39+
mbin_interface_base xor_gen xor_gen_base
40+
mbin_interface_base xor_check xor_check_base
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
/**********************************************************************
2+
Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS).
3+
4+
Redistribution and use in source and binary forms, with or without
5+
modification, are permitted provided that the following conditions
6+
are met:
7+
* Redistributions of source code must retain the above copyright
8+
notice, this list of conditions and the following disclaimer.
9+
* Redistributions in binary form must reproduce the above copyright
10+
notice, this list of conditions and the following disclaimer in
11+
the documentation and/or other materials provided with the
12+
distribution.
13+
* Neither the name of ISCAS nor the names of its
14+
contributors may be used to endorse or promote products derived
15+
from this software without specific prior written permission.
16+
17+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18+
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20+
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21+
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22+
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23+
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28+
**********************************************************************/
29+
#include "riscv64_multibinary.h"
30+
31+
extern int
32+
pq_gen_rvv(int vects, int len, void **array);
33+
extern int
34+
pq_gen_base(int vects, int len, void **array);
35+
36+
DEFINE_INTERFACE_DISPATCHER(pq_gen)
37+
{
38+
#if HAVE_RVV
39+
const unsigned long hwcap = getauxval(AT_HWCAP);
40+
if (hwcap & HWCAP_RV('V'))
41+
return pq_gen_rvv;
42+
else
43+
#endif
44+
return pq_gen_base;
45+
}

raid/riscv64/raid_pq_gen_rvv.S

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
/**********************************************************************
2+
Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS).
3+
4+
Redistribution and use in source and binary forms, with or without
5+
modification, are permitted provided that the following conditions
6+
are met:
7+
* Redistributions of source code must retain the above copyright
8+
notice, this list of conditions and the following disclaimer.
9+
* Redistributions in binary form must reproduce the above copyright
10+
notice, this list of conditions and the following disclaimer in
11+
the documentation and/or other materials provided with the
12+
distribution.
13+
* Neither the name of ISCAS nor the names of its
14+
contributors may be used to endorse or promote products derived
15+
from this software without specific prior written permission.
16+
17+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18+
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20+
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21+
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22+
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23+
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28+
**********************************************************************/
29+
#if HAVE_RVV
30+
.option arch, +v
31+
.global pq_gen_rvv
32+
.type pq_gen_rvv, %function
33+
pq_gen_rvv:
34+
srli a1, a1, 3 // blocks = len / 8
35+
beqz a1, ret0 // blocks <= 0
36+
addi a6, a0, -3 // j = vects - 4
37+
blez a6, ret1 // vects < 4
38+
39+
slli t0, a0, 3 // t0 = vects * 8
40+
add t0, a2, t0 // array + vects * 8
41+
li t1, 0x8080808080808080 // bit7
42+
li t2, 0xfefefefefefefefe // notbit0
43+
li t3, 0x1d1d1d1d1d1d1d1d // gf8poly
44+
ld a3, -24(t0) // src[vects-3]
45+
ld a4, -16(t0) // p
46+
ld a5, -8(t0) // q
47+
mv t6, a1 // save blocks
48+
mv t5, a4 // save p
49+
mv a7, a5 // save q
50+
51+
init_pq:
52+
vsetvli t4, t6, e64, m4, ta, ma
53+
vle64.v v0, (a3)
54+
vse64.v v0, (a4) // init p
55+
vse64.v v0, (a5) // init q
56+
sub t6, t6, t4
57+
slli t4, t4, 3
58+
add a3, a3, t4
59+
add a4, a4, t4
60+
add a5, a5, t4
61+
bnez t6, init_pq
62+
63+
outer_j:
64+
mv a4, t5 // restore p
65+
mv a5, a7 // restore q
66+
mv t6, a1 // restore blocks
67+
ld a0, -32(t0) // src[j]
68+
69+
inner_block:
70+
vsetvli t4, t6, e64, m4, ta, ma
71+
vle64.v v8, (a0) // s
72+
vle64.v v0, (a4) // p
73+
vle64.v v4, (a5) // q
74+
vxor.vv v0, v0, v8 // p ^= s
75+
vand.vx v20, v4, t1 // q & bit7
76+
vsll.vi v24, v4, 1 // (q << 1)
77+
vand.vx v24, v24, t2 // (q << 1) & notbit0
78+
vsrl.vi v16, v20, 7 // (q & bit7)>>7
79+
vsll.vi v20, v20, 1 // (q & bit7)<<1
80+
vsub.vv v20, v20, v16 // (q & bit7)<<1 - (q & bit7)>>7
81+
vand.vx v20, v20, t3 // ((q & bit7)<<1 - (q & bit7)>>7) & gf8poly
82+
vxor.vv v4, v24, v20 // ((q << 1) & notbit0) ^
83+
vxor.vv v4, v4, v8 // s^
84+
vse64.v v0, (a4) // p
85+
vse64.v v4, (a5) // q
86+
sub t6, t6, t4 // blocks
87+
slli t4, t4, 3
88+
add a4, a4, t4 // p+=
89+
add a5, a5, t4 // q+=
90+
add a0, a0, t4 // s+=
91+
bnez t6, inner_block
92+
93+
addi a6, a6, -1
94+
addi t0, t0, -8
95+
bnez a6, outer_j
96+
97+
ret0:
98+
li a0, 0
99+
ret
100+
101+
ret1:
102+
li a0, 1
103+
ret
104+
105+
#endif

0 commit comments

Comments
 (0)