Skip to content

Commit c5d75f1

Browse files
sunyuechipablodelara
authored andcommitted
erasure_code: R-V V gf_vect_dot_prod
banana_f3: rvv: gf_vect_dot_prod_warm: runtime = 3062964 usecs, bandwidth 490 MB in 3.0630 sec = 160.25 MB/s c: gf_vect_dot_prod_warm: runtime = 3000581 usecs, bandwidth 173 MB in 3.0006 sec = 57.69 MB/s Signed-off-by: sunyuechi <[email protected]>
1 parent 4174804 commit c5d75f1

File tree

4 files changed

+140
-2
lines changed

4 files changed

+140
-2
lines changed

erasure_code/riscv64/Makefile.am

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,4 +30,5 @@
3030
lsrc_riscv64 += \
3131
erasure_code/riscv64/ec_multibinary_riscv64_dispatcher.c \
3232
erasure_code/riscv64/ec_multibinary_riscv64.S \
33-
erasure_code/riscv64/ec_gf_vect_mul_rvv.S
33+
erasure_code/riscv64/ec_gf_vect_mul_rvv.S \
34+
erasure_code/riscv64/ec_gf_vect_dot_prod_rvv.S
Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
/**********************************************************************
2+
Copyright (c) 2025 Institute of Software Chinese Academy of Sciences (ISCAS).
3+
4+
Redistribution and use in source and binary forms, with or without
5+
modification, are permitted provided that the following conditions
6+
are met:
7+
* Redistributions of source code must retain the above copyright
8+
notice, this list of conditions and the following disclaimer.
9+
* Redistributions in binary form must reproduce the above copyright
10+
notice, this list of conditions and the following disclaimer in
11+
the documentation and/or other materials provided with the
12+
distribution.
13+
* Neither the name of ISCAS nor the names of its
14+
contributors may be used to endorse or promote products derived
15+
from this software without specific prior written permission.
16+
17+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18+
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19+
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20+
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21+
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22+
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23+
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24+
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25+
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26+
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28+
**********************************************************************/
29+
#if HAVE_RVV
30+
#include "ec_table.S"
31+
.option arch, +v
32+
.global gf_vect_dot_prod_rvv
33+
.type gf_vect_dot_prod_rvv, %function
34+
gf_vect_dot_prod_rvv:
35+
blez a0, 3f
36+
37+
lla t0, gff_base
38+
lla t1, gflog_base
39+
addi a2, a2, 1
40+
vsetvli zero, a1, e8, mf2, ta, ma
41+
vmv.v.i v20, 0
42+
li t2, 0
43+
li t3, 32
44+
csrr t5, vlenb // vlen/8
45+
srli t5, t5, 1 // mf2: vlen/16
46+
blt t5, a1, slow // vlen/16(hardware) < vlen(software)
47+
48+
vlse8.v v24, (a2), t3 // v[j*32+1]
49+
vmsne.vi v12, v24, 0 // if v == 0
50+
vluxei8.v v24, (t1), v24 // gflag_base[v[]]
51+
52+
1:
53+
vsetvli zero, zero, e8, mf2, ta, ma
54+
vle64.v v16, (a3) // src[j]
55+
vluxei64.v v16, (t2), v16 // src[j][i]
56+
vmsne.vi v0, v16, 0 // if src == 0
57+
vmand.mm v0, v0, v12 // if src == 0 || v == 0
58+
vluxei8.v v16, (t1), v16, v0.t // gflag_base[src[j][i]]
59+
vwaddu.vv v8, v16, v24, v0.t
60+
vmv.v.i v16, 0
61+
vsetvli zero, zero, e8, mf2, ta, mu
62+
vluxei16.v v16, (t0), v8, v0.t // gff_base[i]
63+
vxor.vv v20, v16, v20
64+
65+
vmv.s.x v8, zero
66+
vredxor.vs v8, v20, v8
67+
vmv.x.s t5, v8
68+
addi a0, a0, -1 // len
69+
sb t5, (a4)
70+
addi t2, t2, 1 // src[j][i]
71+
vmv.v.i v20, 0
72+
addi a4, a4, 1 // dest[i]
73+
bnez a0, 1b
74+
ret
75+
76+
slow:
77+
mv a7, a3 // src
78+
mv a6, a2 // v
79+
mv t4, a1 // vlen
80+
81+
1:
82+
vsetvli t6, a1, e8, mf2, ta, ma
83+
vle64.v v16, (a3)
84+
vluxei64.v v16, (t2), v16 // src[j][i]
85+
vlse8.v v24, (a2), t3 // v[j*32+1]
86+
vmsne.vi v0, v16, 0 // if src == 0
87+
vmsne.vi v12, v24, 0 // if v == 0
88+
vmand.mm v0, v0, v12
89+
vluxei8.v v16, (t1), v16, v0.t // gflag_base[src[j][i]]
90+
vluxei8.v v24, (t1), v24, v0.t // gflag_base[v[]]
91+
vwaddu.vv v8, v16, v24, v0.t
92+
vmv.v.i v16, 0
93+
vsetvli zero, zero, e8, mf2, ta, mu
94+
vluxei16.v v16, (t0), v8, v0.t // gff_base[i]
95+
vxor.vv v20, v16, v20
96+
slli t5, t6, 5
97+
add a2, a2, t5 // v += 32 * vlen
98+
slli t5, t6, 3
99+
add a3, a3, t5 // src += 8 * vlen
100+
sub a1, a1, t6 // vlen
101+
bnez a1, 1b // for (j = 0; j < vlen; j++)
102+
103+
vsetvli zero, t4, e8, mf2, ta, mu
104+
vmv.s.x v8, zero
105+
vredxor.vs v8, v20, v8
106+
vmv.x.s t5, v8
107+
addi a0, a0, -1 // len
108+
mv a3, a7 // src
109+
mv a2, a6 // v
110+
mv a1, t4 // vlen
111+
addi t2, t2, 1 // i
112+
sb t5, (a4)
113+
vmv.v.i v20, 0
114+
addi a4, a4, 1 // dest[i]
115+
bnez a0, 1b // for (i = 0; i < len; i++) {
116+
117+
3:
118+
ret
119+
120+
#endif

erasure_code/riscv64/ec_multibinary_riscv64.S

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,12 @@
3131

3232
#if HAVE_RVV
3333
mbin_interface gf_vect_mul
34+
mbin_interface gf_vect_dot_prod
3435
#else
3536
mbin_interface_base gf_vect_mul gf_vect_mul_base
37+
mbin_interface_base gf_vect_dot_prod gf_vect_dot_prod_base
3638
#endif
3739

38-
mbin_interface_base gf_vect_dot_prod gf_vect_dot_prod_base
3940
mbin_interface_base ec_encode_data ec_encode_data_base
4041
mbin_interface_base ec_init_tables ec_init_tables_base
4142
mbin_interface_base ec_encode_data_update ec_encode_data_update_base

erasure_code/riscv64/ec_multibinary_riscv64_dispatcher.c

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,11 @@ extern int
3232
gf_vect_mul_rvv(int len, unsigned char *a, unsigned char *src, unsigned char *dest);
3333
extern int
3434
gf_vect_mul_base(int len, unsigned char *a, unsigned char *src, unsigned char *dest);
35+
extern void
36+
gf_vect_dot_prod_rvv(int len, int vlen, unsigned char *v, unsigned char **src, unsigned char *dest);
37+
extern void
38+
gf_vect_dot_prod_base(int len, int vlen, unsigned char *v, unsigned char **src,
39+
unsigned char *dest);
3540

3641
DEFINE_INTERFACE_DISPATCHER(gf_vect_mul)
3742
{
@@ -43,3 +48,14 @@ DEFINE_INTERFACE_DISPATCHER(gf_vect_mul)
4348
#endif
4449
return gf_vect_mul_base;
4550
}
51+
52+
DEFINE_INTERFACE_DISPATCHER(gf_vect_dot_prod)
53+
{
54+
#if HAVE_RVV
55+
const unsigned long hwcap = getauxval(AT_HWCAP);
56+
if (hwcap & HWCAP_RV('V'))
57+
return gf_vect_dot_prod_rvv;
58+
else
59+
#endif
60+
return gf_vect_dot_prod_base;
61+
}

0 commit comments

Comments
 (0)