Skip to content

Commit 64e23f0

Browse files
committed
erasure_code: add prefetch and vsetvli optimization
Signed-off-by: Shuo Lv <[email protected]>
1 parent 5e90721 commit 64e23f0

15 files changed

+116
-60
lines changed

configure.ac

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ AM_CONDITIONAL([CPU_PPC64LE], [test "$CPU" = "ppc64le"])
3838
AM_CONDITIONAL([CPU_RISCV64], [test "$CPU" = "riscv64"])
3939
AM_CONDITIONAL([CPU_UNDEFINED], [test "x$CPU" = "x"])
4040
AM_CONDITIONAL([HAVE_RVV], [false])
41+
AM_CONDITIONAL([HAVE_ZICBOP], [false])
4142

4243
# Check for programs
4344
AC_PROG_CC_STDC
@@ -70,9 +71,28 @@ case "${CPU}" in
7071
[AC_DEFINE([HAVE_RVV], [0], [Disable RVV instructions])
7172
AM_CONDITIONAL([HAVE_RVV], [false]) rvv=no]
7273
)
74+
AC_MSG_CHECKING([Zicbop support])
75+
AC_COMPILE_IFELSE(
76+
[AC_LANG_PROGRAM([], [
77+
__asm__ volatile(
78+
".option arch, +zicbop\n"
79+
".insn i 0x0F, 0, x0, x0, 0x010" ::: "memory"
80+
);
81+
])],
82+
[AC_DEFINE([HAVE_ZICBOP], [1], [Enable Zicbop instructions])
83+
AM_CONDITIONAL([HAVE_ZICBOP], [true]) zicbop=yes],
84+
[AC_DEFINE([HAVE_ZICBOP], [0], [Disable Zicbop instructions])
85+
AM_CONDITIONAL([HAVE_ZICBOP], [false]) zicbop=no]
86+
)
87+
AC_MSG_RESULT([$zicbop])
7388
if test "x$rvv" = "xyes"; then
74-
CFLAGS+=" -march=rv64gcv"
75-
CCASFLAGS+=" -march=rv64gcv"
89+
if test "x$zicbop" = "xyes"; then
90+
CFLAGS+=" -march=rv64gcv_zicbop"
91+
CCASFLAGS+=" -march=rv64gcv_zicbop"
92+
else
93+
CFLAGS+=" -march=rv64gcv"
94+
CCASFLAGS+=" -march=rv64gcv"
95+
fi
7696
fi
7797
AC_MSG_RESULT([$rvv])
7898
;;

erasure_code/riscv64/gf_2vect_dot_prod_rvv.S

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -74,9 +74,10 @@ gf_2vect_dot_prod_rvv:
7474
li t6, 16
7575
blt x_len, t6, .return_fail
7676

77-
vsetvli a5, x0, e8, m1 /* Set vector length to maximum */
78-
77+
vsetvli a5, x0, e8, m1,ta,ma /* Set vector length to maximum */
7978
li x_pos, 0
79+
slli x_vec, x_vec, 3
80+
8081
ld x_dest1, 0(x_dest)
8182
ld x_dest2, 8(x_dest)
8283

@@ -92,15 +93,12 @@ gf_2vect_dot_prod_rvv:
9293

9394
/* gf_tbl base = (x_tbl + dest_idx * x_vec * 32) */
9495
mv x_tbl1, x_tbl /* reset x_tbl1 */
95-
slli t6, x_vec, 5
96+
slli t6, x_vec, 2
9697
add x_tbl2, x_tbl1, t6 /* reset x_tbl2 */
9798

9899
/* Loop 2: x_vec, number of source vectors (ie. data blocks) */
99100
.Llooprvv_vl_vects:
100101
/* load src data */
101-
slli a6, x_vec_i, 3
102-
add a6,x_src,a6
103-
ld x_ptr, 0(a6)
104102
add x_ptr,x_ptr,x_pos
105103

106104
vle8.v v_src, (x_ptr) /* load from: src base + pos offset */
@@ -120,6 +118,14 @@ gf_2vect_dot_prod_rvv:
120118
vle8.v v_gft2_hi, (x_tbl2)
121119
addi x_tbl2, x_tbl2, 16
122120

121+
prefetch.r 0(x_tbl1)
122+
prefetch.r 0(x_tbl2)
123+
124+
/* calc for next */
125+
addi x_vec_i, x_vec_i, 8 /* move x_vec_i to next */
126+
add a6,x_src,x_vec_i
127+
ld x_ptr, 0(a6)
128+
123129
/* dest 1 */
124130
/* table indexing, ie. gf(2^8) multiplication */
125131
vrgather.vv v26, v_gft1_lo, v_src_lo
@@ -134,8 +140,6 @@ gf_2vect_dot_prod_rvv:
134140
vxor.vv v_dest2, v_dest2, v26
135141
vxor.vv v_dest2, v_dest2, v27
136142

137-
/* calc for next */
138-
addi x_vec_i, x_vec_i, 1 /* move x_vec_i to next */
139143
blt x_vec_i, x_vec, .Llooprvv_vl_vects
140144
/* end of Loop 2 */
141145

erasure_code/riscv64/gf_2vect_mad_rvv.S

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ gf_2vect_mad_rvv:
7171
li t3, 16
7272
blt x_len, t3, .return_fail
7373

74-
vsetvli t4, x0, e8, m1
74+
vsetvli t4, x0, e8, m1,ta,ma
7575

7676
/* load table 1 */
7777
slli t3, x_vec_i, 5

erasure_code/riscv64/gf_3vect_dot_prod_rvv.S

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -84,10 +84,11 @@ gf_3vect_dot_prod_rvv:
8484
sd s0, 0(sp)
8585
sd s1, 8(sp)
8686

87-
vsetvli a7, x0, e8, m1 /* Set vector length to maximum */
88-
87+
vsetvli a7, x0, e8, m1,ta,ma /* Set vector length to maximum */
8988
li x_pos, 0
90-
slli t_offset, x_vec, 5
89+
slli x_vec, x_vec, 3
90+
91+
slli t_offset, x_vec, 2
9192
ld x_dest1, 0(x_dest)
9293
ld x_dest2, 8(x_dest)
9394
ld x_dest3, 16(x_dest)
@@ -101,20 +102,19 @@ gf_3vect_dot_prod_rvv:
101102
vmv.v.i v_dest2, 0
102103
vmv.v.i v_dest3, 0
103104

105+
/* Loop 2: x_vec, number of source vectors (ie. data blocks) */
106+
li x_vec_i, 0
107+
/* load source pointer */
108+
ld x_ptr, 0(x_src)
109+
104110
/* Reset table pointers */
105111
mv x_tbl1, x_tbl
106112
add x_tbl2, x_tbl1, t_offset
107113
add x_tbl3, x_tbl2, t_offset
108114

109-
/* Loop 2: x_vec, number of source vectors (ie. data blocks) */
110-
li x_vec_i, 0
111115
.Lloop_rvv_vl_vects:
112-
/* Load source data */
113-
slli t0, x_vec_i, 3
114-
add t0,x_src,t0
115-
ld x_ptr, 0(t0)
116+
/* load source data */
116117
add x_ptr,x_ptr,x_pos
117-
118118
vle8.v v_src, (x_ptr)
119119

120120
/* Split 4-bit lo; 4-bit hi */
@@ -131,14 +131,22 @@ gf_3vect_dot_prod_rvv:
131131
vle8.v v_gft2_hi, (x_tbl2)
132132
addi x_tbl2, x_tbl2, 16
133133

134+
/* Move to next source vector */
135+
addi x_vec_i, x_vec_i, 8
136+
add t0,x_src,x_vec_i
137+
ld x_ptr, 0(t0)
138+
139+
prefetch.r 0(x_tbl1)
140+
prefetch.r 0(x_tbl2)
134141

135142
/* Load next gf_table's */
136143
vle8.v v_gft3_lo, (x_tbl3)
137144
addi x_tbl3, x_tbl3, 16
138145
vle8.v v_gft3_hi, (x_tbl3)
139146
addi x_tbl3, x_tbl3, 16
147+
prefetch.r 0(x_tbl3)
140148

141-
/* dest 1 */
149+
/* dest 1 */
142150
vrgather.vv v26, v_gft1_lo, v_src_lo
143151
vrgather.vv v27, v_gft1_hi, v_src_hi
144152
vxor.vv v_dest1, v_dest1, v26
@@ -156,9 +164,6 @@ gf_3vect_dot_prod_rvv:
156164
vxor.vv v_dest3, v_dest3, v26
157165
vxor.vv v_dest3, v_dest3, v27
158166

159-
/* Move to next source vector */
160-
addi x_vec_i, x_vec_i, 1
161-
162167
/* Check if we have processed all vectors */
163168
blt x_vec_i, x_vec, .Lloop_rvv_vl_vects
164169

erasure_code/riscv64/gf_3vect_mad_rvv.S

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ gf_3vect_mad_rvv:
7575
li t4, 16
7676
blt x_len, t4, .return_fail
7777

78-
vsetvli t5, x0, e8, m1
78+
vsetvli t5, x0, e8, m1,ta,ma
7979

8080
/* Load table 1 */
8181
slli t4, x_vec_i, 5

erasure_code/riscv64/gf_4vect_dot_prod_rvv.S

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -91,10 +91,13 @@ gf_4vect_dot_prod_rvv:
9191
sd s2, 16(sp)
9292
sd s3, 24(sp)
9393

94-
vsetvli t0, x0, e8, m1 /* Set vector length to maximum */
95-
94+
# vsetvli t0, x0, e8, m1 /* Set vector length to maximum */
95+
vsetvli t0, x0, e8, m1,ta,ma
9696
li x_pos, 0
97-
slli t_offset, x_vec, 5
97+
98+
slli x_vec, x_vec, 3
99+
slli t_offset, x_vec, 2
100+
98101
ld x_dest1, 0(x_dest)
99102
ld x_dest2, 8(x_dest)
100103
ld x_dest3, 16(x_dest)
@@ -111,19 +114,20 @@ gf_4vect_dot_prod_rvv:
111114
vmv.v.i v_dest3, 0
112115
vmv.v.i v_dest4, 0
113116

117+
/* x_vec, number of source vectors (ie. data blocks) */
118+
li x_vec_i, 0
119+
120+
/* load source pointer */
121+
ld x_ptr, 0(x_src)
122+
114123
/* Reset table pointers */
115124
mv x_tbl1, x_tbl
116125
add x_tbl2, x_tbl1, t_offset
117126
add x_tbl3, x_tbl2, t_offset
118127
add x_tbl4, x_tbl3, t_offset
119128

120-
/* Loop 2: x_vec, number of source vectors (ie. data blocks) */
121-
li x_vec_i, 0
122129
.Lloop_rvv_vl_vects:
123130
/* Load source data */
124-
slli a6, x_vec_i, 3
125-
add a6,x_src,a6
126-
ld x_ptr, 0(a6)
127131
add x_ptr,x_ptr,x_pos
128132

129133
vle8.v v_src, (x_ptr)
@@ -142,6 +146,13 @@ gf_4vect_dot_prod_rvv:
142146
vle8.v v_gft2_hi, (x_tbl2)
143147
addi x_tbl2, x_tbl2, 16
144148

149+
prefetch.r 0(x_tbl1)
150+
prefetch.r 0(x_tbl2)
151+
152+
/* Move to next source vector */
153+
addi x_vec_i, x_vec_i, 8
154+
add a6,x_src,x_vec_i
155+
ld x_ptr, 0(a6)
145156

146157
/* Load next gf_table's */
147158
vle8.v v_gft3_lo, (x_tbl3)
@@ -153,6 +164,8 @@ gf_4vect_dot_prod_rvv:
153164
addi x_tbl4, x_tbl4, 16
154165
vle8.v v_gft4_hi, (x_tbl4)
155166
addi x_tbl4, x_tbl4, 16
167+
prefetch.r 0(x_tbl3)
168+
prefetch.r 0(x_tbl4)
156169

157170
/* dest 1 */
158171
vrgather.vv v26, v_gft1_lo, v_src_lo
@@ -178,9 +191,6 @@ gf_4vect_dot_prod_rvv:
178191
vxor.vv v_dest4, v_dest4, v26
179192
vxor.vv v_dest4, v_dest4, v27
180193

181-
/* Move to next source vector */
182-
addi x_vec_i, x_vec_i, 1
183-
184194
/* Check if we have processed all vectors */
185195
blt x_vec_i, x_vec, .Lloop_rvv_vl_vects
186196

@@ -198,7 +208,7 @@ gf_4vect_dot_prod_rvv:
198208
j .Lloop_rvv_vl
199209

200210
.return_pass:
201-
/* restore callee-saved registers */
211+
/* restore callee-saved registers */
202212
ld s0, 0(sp)
203213
ld s1, 8(sp)
204214
ld s2, 16(sp)

erasure_code/riscv64/gf_4vect_mad_rvv.S

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ gf_4vect_mad_rvv:
7979
li t5, 16
8080
blt x_len, t5, .return_fail
8181

82-
vsetvli t6, x0, e8, m1
82+
vsetvli t6, x0, e8, m1,ta,ma
8383

8484
/* load table 1 */
8585
slli t5, x_vec_i, 5

erasure_code/riscv64/gf_5vect_dot_prod_rvv.S

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,8 +95,7 @@ gf_5vect_dot_prod_rvv:
9595
sd s4, 32(sp)
9696
sd s5, 40(sp)
9797

98-
vsetvli a5, x0, e8, m1
99-
98+
vsetvli a5, x0, e8, m1,ta,ma
10099
/* Initialize position */
101100
li x_pos, 0
102101

@@ -151,6 +150,9 @@ gf_5vect_dot_prod_rvv:
151150
vle8.v v_gft2_hi, (x_tbl2)
152151
addi x_tbl2, x_tbl2, 16
153152

153+
prefetch.r 0(x_tbl1)
154+
prefetch.r 0(x_tbl2)
155+
154156
/* Move to next source vector */
155157
addi x_vec_i, x_vec_i, 1
156158

@@ -171,6 +173,9 @@ gf_5vect_dot_prod_rvv:
171173
vle8.v v_gft4_hi, (x_tbl4)
172174
addi x_tbl4, x_tbl4, 16
173175

176+
prefetch.r 0(x_tbl3)
177+
prefetch.r 0(x_tbl4)
178+
174179
/* dest 2 */
175180
vrgather.vv v26, v_gft2_lo, v_src_lo
176181
vrgather.vv v27, v_gft2_hi, v_src_hi
@@ -188,6 +193,7 @@ gf_5vect_dot_prod_rvv:
188193
addi x_tbl5, x_tbl5, 16
189194
vle8.v v_gft5_hi, (x_tbl5)
190195
addi x_tbl5, x_tbl5, 16
196+
prefetch.r 0(x_tbl5)
191197

192198
/* dest 4 */
193199
vrgather.vv v26, v_gft4_lo, v_src_lo

erasure_code/riscv64/gf_5vect_mad_rvv.S

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ gf_5vect_mad_rvv:
8383
li t6, 16
8484
blt x_len, t6, .return_fail
8585

86-
vsetvli a7, x0, e8, m1
86+
vsetvli a7, x0, e8, m1,ta,ma
8787

8888
/* Load table 1 */
8989
slli a6, x_vec_i, 5

0 commit comments

Comments
 (0)