Skip to content

Commit 3b900a8

Browse files
committed
erasure_code: add prefetch and vsetvli optimization
1 parent 5e90721 commit 3b900a8

15 files changed

+74
-30
lines changed

configure.ac

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,8 @@ case "${CPU}" in
7171
AM_CONDITIONAL([HAVE_RVV], [false]) rvv=no]
7272
)
7373
if test "x$rvv" = "xyes"; then
74-
CFLAGS+=" -march=rv64gcv"
75-
CCASFLAGS+=" -march=rv64gcv"
74+
CFLAGS+=" -march=rv64gcv_zicbop"
75+
CCASFLAGS+=" -march=rv64gcv_zicbop"
7676
fi
7777
AC_MSG_RESULT([$rvv])
7878
;;

erasure_code/riscv64/gf_2vect_dot_prod_rvv.S

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,7 @@ gf_2vect_dot_prod_rvv:
7474
li t6, 16
7575
blt x_len, t6, .return_fail
7676

77-
vsetvli a5, x0, e8, m1 /* Set vector length to maximum */
78-
77+
vsetvli a5, x0, e8, m1,ta,ma /* Set vector length to maximum */
7978
li x_pos, 0
8079
ld x_dest1, 0(x_dest)
8180
ld x_dest2, 8(x_dest)
@@ -120,6 +119,9 @@ gf_2vect_dot_prod_rvv:
120119
vle8.v v_gft2_hi, (x_tbl2)
121120
addi x_tbl2, x_tbl2, 16
122121

122+
prefetch.r 0(x_tbl1)
123+
prefetch.r 0(x_tbl2)
124+
123125
/* dest 1 */
124126
/* table indexing, ie. gf(2^8) multiplication */
125127
vrgather.vv v26, v_gft1_lo, v_src_lo

erasure_code/riscv64/gf_2vect_mad_rvv.S

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ gf_2vect_mad_rvv:
7171
li t3, 16
7272
blt x_len, t3, .return_fail
7373

74-
vsetvli t4, x0, e8, m1
74+
vsetvli t4, x0, e8, m1,ta,ma
7575

7676
/* load table 1 */
7777
slli t3, x_vec_i, 5

erasure_code/riscv64/gf_3vect_dot_prod_rvv.S

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -84,8 +84,7 @@ gf_3vect_dot_prod_rvv:
8484
sd s0, 0(sp)
8585
sd s1, 8(sp)
8686

87-
vsetvli a7, x0, e8, m1 /* Set vector length to maximum */
88-
87+
vsetvli a7, x0, e8, m1,ta,ma /* Set vector length to maximum */
8988
li x_pos, 0
9089
slli t_offset, x_vec, 5
9190
ld x_dest1, 0(x_dest)
@@ -131,14 +130,17 @@ gf_3vect_dot_prod_rvv:
131130
vle8.v v_gft2_hi, (x_tbl2)
132131
addi x_tbl2, x_tbl2, 16
133132

133+
prefetch.r 0(x_tbl1)
134+
prefetch.r 0(x_tbl2)
134135

135136
/* Load next gf_table's */
136137
vle8.v v_gft3_lo, (x_tbl3)
137138
addi x_tbl3, x_tbl3, 16
138139
vle8.v v_gft3_hi, (x_tbl3)
139140
addi x_tbl3, x_tbl3, 16
141+
prefetch.r 0(x_tbl2)
140142

141-
/* dest 1 */
143+
/* dest 1 */
142144
vrgather.vv v26, v_gft1_lo, v_src_lo
143145
vrgather.vv v27, v_gft1_hi, v_src_hi
144146
vxor.vv v_dest1, v_dest1, v26

erasure_code/riscv64/gf_3vect_mad_rvv.S

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ gf_3vect_mad_rvv:
7575
li t4, 16
7676
blt x_len, t4, .return_fail
7777

78-
vsetvli t5, x0, e8, m1
78+
vsetvli t5, x0, e8, m1,ta,ma
7979

8080
/* Load table 1 */
8181
slli t4, x_vec_i, 5

erasure_code/riscv64/gf_4vect_dot_prod_rvv.S

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,8 +91,8 @@ gf_4vect_dot_prod_rvv:
9191
sd s2, 16(sp)
9292
sd s3, 24(sp)
9393

94-
vsetvli t0, x0, e8, m1 /* Set vector length to maximum */
95-
94+
# vsetvli t0, x0, e8, m1 /* Set vector length to maximum */
95+
vsetvli t0, x0, e8, m1,ta,ma
9696
li x_pos, 0
9797
slli t_offset, x_vec, 5
9898
ld x_dest1, 0(x_dest)
@@ -142,6 +142,8 @@ gf_4vect_dot_prod_rvv:
142142
vle8.v v_gft2_hi, (x_tbl2)
143143
addi x_tbl2, x_tbl2, 16
144144

145+
prefetch.r 0(x_tbl1)
146+
prefetch.r 0(x_tbl2)
145147

146148
/* Load next gf_table's */
147149
vle8.v v_gft3_lo, (x_tbl3)
@@ -153,6 +155,8 @@ gf_4vect_dot_prod_rvv:
153155
addi x_tbl4, x_tbl4, 16
154156
vle8.v v_gft4_hi, (x_tbl4)
155157
addi x_tbl4, x_tbl4, 16
158+
prefetch.r 0(x_tbl3)
159+
prefetch.r 0(x_tbl4)
156160

157161
/* dest 1 */
158162
vrgather.vv v26, v_gft1_lo, v_src_lo

erasure_code/riscv64/gf_4vect_mad_rvv.S

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ gf_4vect_mad_rvv:
7979
li t5, 16
8080
blt x_len, t5, .return_fail
8181

82-
vsetvli t6, x0, e8, m1
82+
vsetvli t6, x0, e8, m1,ta,ma
8383

8484
/* load table 1 */
8585
slli t5, x_vec_i, 5

erasure_code/riscv64/gf_5vect_dot_prod_rvv.S

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,8 +95,7 @@ gf_5vect_dot_prod_rvv:
9595
sd s4, 32(sp)
9696
sd s5, 40(sp)
9797

98-
vsetvli a5, x0, e8, m1
99-
98+
vsetvli a5, x0, e8, m1,ta,ma
10099
/* Initialize position */
101100
li x_pos, 0
102101

@@ -151,6 +150,9 @@ gf_5vect_dot_prod_rvv:
151150
vle8.v v_gft2_hi, (x_tbl2)
152151
addi x_tbl2, x_tbl2, 16
153152

153+
prefetch.r 0(x_tbl1)
154+
prefetch.r 0(x_tbl2)
155+
154156
/* Move to next source vector */
155157
addi x_vec_i, x_vec_i, 1
156158

@@ -171,6 +173,9 @@ gf_5vect_dot_prod_rvv:
171173
vle8.v v_gft4_hi, (x_tbl4)
172174
addi x_tbl4, x_tbl4, 16
173175

176+
prefetch.r 0(x_tbl3)
177+
prefetch.r 0(x_tbl4)
178+
174179
/* dest 2 */
175180
vrgather.vv v26, v_gft2_lo, v_src_lo
176181
vrgather.vv v27, v_gft2_hi, v_src_hi
@@ -188,6 +193,7 @@ gf_5vect_dot_prod_rvv:
188193
addi x_tbl5, x_tbl5, 16
189194
vle8.v v_gft5_hi, (x_tbl5)
190195
addi x_tbl5, x_tbl5, 16
196+
prefetch.r 0(x_tbl5)
191197

192198
/* dest 4 */
193199
vrgather.vv v26, v_gft4_lo, v_src_lo

erasure_code/riscv64/gf_5vect_mad_rvv.S

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ gf_5vect_mad_rvv:
8383
li t6, 16
8484
blt x_len, t6, .return_fail
8585

86-
vsetvli a7, x0, e8, m1
86+
vsetvli a7, x0, e8, m1,ta,ma
8787

8888
/* Load table 1 */
8989
slli a6, x_vec_i, 5

erasure_code/riscv64/gf_6vect_dot_prod_rvv.S

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ gf_6vect_dot_prod_rvv:
102102
sd s7, 56(sp)
103103

104104
li t0, 0x0F
105-
vsetvli a5, x0, e8, m1
105+
vsetvli a5, x0, e8, m1,ta,ma
106106

107107
/* initialize position */
108108
li x_pos, 0
@@ -167,6 +167,14 @@ gf_6vect_dot_prod_rvv:
167167
addi x_tbl2, x_tbl2, 16
168168
vle8.v v_gft2_hi, (x_tbl2)
169169
addi x_tbl2, x_tbl2, 16
170+
prefetch.r 0(x_tbl1)
171+
prefetch.r 0(x_tbl2)
172+
173+
/* dest 1 */
174+
vrgather.vv v26, v_gft1_lo, v_src_lo
175+
vrgather.vv v27, v_gft1_hi, v_src_hi
176+
vxor.vv v_dest1, v_dest1, v26
177+
vxor.vv v_dest1, v_dest1, v27
170178

171179
vle8.v v_gft3_lo, (x_tbl3)
172180
addi x_tbl3, x_tbl3, 16
@@ -178,6 +186,21 @@ gf_6vect_dot_prod_rvv:
178186
vle8.v v_gft4_hi, (x_tbl4)
179187
addi x_tbl4, x_tbl4, 16
180188

189+
prefetch.r 0(x_tbl3)
190+
prefetch.r 0(x_tbl4)
191+
192+
/* dest 2 */
193+
vrgather.vv v26, v_gft2_lo, v_src_lo
194+
vrgather.vv v27, v_gft2_hi, v_src_hi
195+
vxor.vv v_dest2, v_dest2, v26
196+
vxor.vv v_dest2, v_dest2, v27
197+
198+
/* GF multiplication and accumulation for dest3 */
199+
vrgather.vv v26, v_gft3_lo, v_src_lo
200+
vrgather.vv v27, v_gft3_hi, v_src_hi
201+
vxor.vv v_dest3, v_dest3, v26
202+
vxor.vv v_dest3, v_dest3, v27
203+
181204
vle8.v v_gft5_lo, (x_tbl5)
182205
addi x_tbl5, x_tbl5, 16
183206
vle8.v v_gft5_hi, (x_tbl5)
@@ -188,25 +211,27 @@ gf_6vect_dot_prod_rvv:
188211
vle8.v v_gft6_hi, (x_tbl6)
189212
addi x_tbl6, x_tbl6, 16
190213

214+
prefetch.r 0(x_tbl5)
215+
prefetch.r 0(x_tbl6)
191216

192217
/* dest 1 */
193-
vrgather.vv v26, v_gft1_lo, v_src_lo
218+
/* vrgather.vv v26, v_gft1_lo, v_src_lo
194219
vrgather.vv v27, v_gft1_hi, v_src_hi
195220
vxor.vv v_dest1, v_dest1, v26
196221
vxor.vv v_dest1, v_dest1, v27
197-
222+
*/
198223
/* dest 2 */
199-
vrgather.vv v26, v_gft2_lo, v_src_lo
224+
/* vrgather.vv v26, v_gft2_lo, v_src_lo
200225
vrgather.vv v27, v_gft2_hi, v_src_hi
201226
vxor.vv v_dest2, v_dest2, v26
202227
vxor.vv v_dest2, v_dest2, v27
203-
228+
*/
204229
/* GF multiplication and accumulation for dest3 */
205-
vrgather.vv v26, v_gft3_lo, v_src_lo
230+
/* vrgather.vv v26, v_gft3_lo, v_src_lo
206231
vrgather.vv v27, v_gft3_hi, v_src_hi
207232
vxor.vv v_dest3, v_dest3, v26
208233
vxor.vv v_dest3, v_dest3, v27
209-
234+
*/
210235
/* GF multiplication and accumulation for dest4 */
211236
vrgather.vv v26, v_gft4_lo, v_src_lo
212237
vrgather.vv v27, v_gft4_hi, v_src_hi

0 commit comments

Comments
 (0)