Skip to content

Commit 2f626d5

Browse files
committed
erasure_code: add prefetch optimaztion
Signed-off-by: Shuo Lv <[email protected]>
1 parent 5e41e31 commit 2f626d5

File tree

7 files changed

+91
-2
lines changed

7 files changed

+91
-2
lines changed

configure.ac

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ AM_CONDITIONAL([CPU_PPC64LE], [test "$CPU" = "ppc64le"])
3838
AM_CONDITIONAL([CPU_RISCV64], [test "$CPU" = "riscv64"])
3939
AM_CONDITIONAL([CPU_UNDEFINED], [test "x$CPU" = "x"])
4040
AM_CONDITIONAL([HAVE_RVV], [false])
41+
AM_CONDITIONAL([HAVE_ZICBOP], [false])
4142

4243
# Check for programs
4344
AC_PROG_CC_STDC
@@ -70,10 +71,29 @@ case "${CPU}" in
7071
[AC_DEFINE([HAVE_RVV], [0], [Disable RVV instructions])
7172
AM_CONDITIONAL([HAVE_RVV], [false]) rvv=no]
7273
)
74+
AC_MSG_CHECKING([Zicbop support])
75+
AC_COMPILE_IFELSE(
76+
[AC_LANG_PROGRAM([], [
77+
__asm__ volatile(
78+
".option arch, +zicbop\n"
79+
".insn i 0x0F, 0, x0, x0, 0x010" ::: "memory"
80+
);
81+
])],
82+
[AC_DEFINE([HAVE_ZICBOP], [1], [Enable Zicbop instructions])
83+
AM_CONDITIONAL([HAVE_ZICBOP], [true]) zicbop=yes],
84+
[AC_DEFINE([HAVE_ZICBOP], [0], [Disable Zicbop instructions])
85+
AM_CONDITIONAL([HAVE_ZICBOP], [false]) zicbop=no]
86+
)
87+
AC_MSG_RESULT([$zicbop])
7388
AC_MSG_RESULT([$zicbop])
7489
if test "x$rvv" = "xyes"; then
75-
CFLAGS+=" -march=rv64gcv"
76-
CCASFLAGS+=" -march=rv64gcv"
90+
if test "x$zicbop" = "xyes"; then
91+
CFLAGS+=" -march=rv64gcv_zicbop"
92+
CCASFLAGS+=" -march=rv64gcv_zicbop"
93+
else
94+
CFLAGS+=" -march=rv64gcv"
95+
CCASFLAGS+=" -march=rv64gcv"
96+
fi
7797
fi
7898
AC_MSG_RESULT([$rvv])
7999
;;

erasure_code/riscv64/gf_2vect_dot_prod_rvv.S

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,11 @@ gf_2vect_dot_prod_rvv:
118118
vle8.v v_gft2_hi, (x_tbl2)
119119
addi x_tbl2, x_tbl2, 16
120120

121+
#if HAVE_ZICBOP
122+
prefetch.r 0(x_tbl1)
123+
prefetch.r 0(x_tbl2)
124+
#endif
125+
121126
/* calc for next */
122127
addi x_vec_i, x_vec_i, 8 /* move x_vec_i to next */
123128
add a6,x_src,x_vec_i

erasure_code/riscv64/gf_3vect_dot_prod_rvv.S

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,12 +136,21 @@ gf_3vect_dot_prod_rvv:
136136
add t0,x_src,x_vec_i
137137
ld x_ptr, 0(t0)
138138

139+
#if HAVE_ZICBOP
140+
prefetch.r 0(x_tbl1)
141+
prefetch.r 0(x_tbl2)
142+
#endif
143+
139144
/* Load next gf_table's */
140145
vle8.v v_gft3_lo, (x_tbl3)
141146
addi x_tbl3, x_tbl3, 16
142147
vle8.v v_gft3_hi, (x_tbl3)
143148
addi x_tbl3, x_tbl3, 16
144149

150+
#if HAVE_ZICBOP
151+
prefetch.r 0(x_tbl3)
152+
#endif
153+
145154
/* dest 1 */
146155
vrgather.vv v26, v_gft1_lo, v_src_lo
147156
vrgather.vv v27, v_gft1_hi, v_src_hi

erasure_code/riscv64/gf_4vect_dot_prod_rvv.S

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,11 @@ gf_4vect_dot_prod_rvv:
146146
vle8.v v_gft2_hi, (x_tbl2)
147147
addi x_tbl2, x_tbl2, 16
148148

149+
#if HAVE_ZICBOP
150+
prefetch.r 0(x_tbl1)
151+
prefetch.r 0(x_tbl2)
152+
#endif
153+
149154
/* Move to next source vector */
150155
addi x_vec_i, x_vec_i, 8
151156
add a6,x_src,x_vec_i
@@ -162,6 +167,11 @@ gf_4vect_dot_prod_rvv:
162167
vle8.v v_gft4_hi, (x_tbl4)
163168
addi x_tbl4, x_tbl4, 16
164169

170+
#if HAVE_ZICBOP
171+
prefetch.r 0(x_tbl3)
172+
prefetch.r 0(x_tbl4)
173+
#endif
174+
165175
/* dest 1 */
166176
vrgather.vv v26, v_gft1_lo, v_src_lo
167177
vrgather.vv v27, v_gft1_hi, v_src_hi

erasure_code/riscv64/gf_5vect_dot_prod_rvv.S

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,11 @@ gf_5vect_dot_prod_rvv:
150150
vle8.v v_gft2_hi, (x_tbl2)
151151
addi x_tbl2, x_tbl2, 16
152152

153+
#if HAVE_ZICBOP
154+
prefetch.r 0(x_tbl1)
155+
prefetch.r 0(x_tbl2)
156+
#endif
157+
153158
/* Move to next source vector */
154159
addi x_vec_i, x_vec_i, 1
155160

@@ -170,6 +175,11 @@ gf_5vect_dot_prod_rvv:
170175
vle8.v v_gft4_hi, (x_tbl4)
171176
addi x_tbl4, x_tbl4, 16
172177

178+
#if HAVE_ZICBOP
179+
prefetch.r 0(x_tbl3)
180+
prefetch.r 0(x_tbl4)
181+
#endif
182+
173183
/* dest 2 */
174184
vrgather.vv v26, v_gft2_lo, v_src_lo
175185
vrgather.vv v27, v_gft2_hi, v_src_hi
@@ -188,6 +198,10 @@ gf_5vect_dot_prod_rvv:
188198
vle8.v v_gft5_hi, (x_tbl5)
189199
addi x_tbl5, x_tbl5, 16
190200

201+
#if HAVE_ZICBOP
202+
prefetch.r 0(x_tbl5)
203+
#endif
204+
191205
/* dest 4 */
192206
vrgather.vv v26, v_gft4_lo, v_src_lo
193207
vrgather.vv v27, v_gft4_hi, v_src_hi

erasure_code/riscv64/gf_6vect_dot_prod_rvv.S

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,11 @@ gf_6vect_dot_prod_rvv:
166166
vle8.v v_gft2_hi, (x_tbl2)
167167
addi x_tbl2, x_tbl2, 16
168168

169+
#if HAVE_ZICBOP
170+
prefetch.r 0(x_tbl1)
171+
prefetch.r 0(x_tbl2)
172+
#endif
173+
169174
/* load next source pointer */
170175
addi x_vec_i, x_vec_i,8
171176
add a6,x_src,x_vec_i
@@ -181,6 +186,11 @@ gf_6vect_dot_prod_rvv:
181186
vle8.v v_gft4_hi, (x_tbl4)
182187
addi x_tbl4, x_tbl4, 16
183188

189+
#if HAVE_ZICBOP
190+
prefetch.r 0(x_tbl3)
191+
prefetch.r 0(x_tbl4)
192+
#endif
193+
184194
vle8.v v_gft5_lo, (x_tbl5)
185195
addi x_tbl5, x_tbl5, 16
186196
vle8.v v_gft5_hi, (x_tbl5)
@@ -191,6 +201,11 @@ gf_6vect_dot_prod_rvv:
191201
vle8.v v_gft6_hi, (x_tbl6)
192202
addi x_tbl6, x_tbl6, 16
193203

204+
#if HAVE_ZICBOP
205+
prefetch.r 0(x_tbl5)
206+
prefetch.r 0(x_tbl6)
207+
#endif
208+
194209
/* dest 1 */
195210
vrgather.vv v26, v_gft1_lo, v_src_lo
196211
vrgather.vv v27, v_gft1_hi, v_src_hi

erasure_code/riscv64/gf_7vect_dot_prod_rvv.S

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,11 @@ gf_7vect_dot_prod_rvv:
180180
vle8.v v_gft2_hi, (x_tbl2)
181181
addi x_tbl2, x_tbl2, 16
182182

183+
#if HAVE_ZICBOP
184+
prefetch.r 0(x_tbl1)
185+
prefetch.r 0(x_tbl2)
186+
#endif
187+
183188
vle8.v v_gft3_lo, (x_tbl3)
184189
addi x_tbl3, x_tbl3, 16
185190
vle8.v v_gft3_hi, (x_tbl3)
@@ -190,6 +195,11 @@ gf_7vect_dot_prod_rvv:
190195
vle8.v v_gft4_hi, (x_tbl4)
191196
addi x_tbl4, x_tbl4, 16
192197

198+
#if HAVE_ZICBOP
199+
prefetch.r 0(x_tbl3)
200+
prefetch.r 0(x_tbl4)
201+
#endif
202+
193203
vle8.v v_gft5_lo, (x_tbl5)
194204
addi x_tbl5, x_tbl5, 16
195205
vle8.v v_gft5_hi, (x_tbl5)
@@ -205,6 +215,12 @@ gf_7vect_dot_prod_rvv:
205215
vle8.v v_gft7_hi, (x_tbl7)
206216
addi x_tbl7, x_tbl7, 16
207217

218+
#if HAVE_ZICBOP
219+
prefetch.r 0(x_tbl5)
220+
prefetch.r 0(x_tbl6)
221+
prefetch.r 0(x_tbl7)
222+
#endif
223+
208224
/* dest 1 */
209225
vrgather.vv v26, v_gft1_lo, v_src_lo
210226
vrgather.vv v27, v_gft1_hi, v_src_hi

0 commit comments

Comments
 (0)