Skip to content

Commit 339b646

Browse files
committed
erasure_code: add prefetch optimization
Signed-off-by: Shuo Lv <[email protected]>
1 parent 5e90721 commit 339b646

8 files changed

+103
-47
lines changed

configure.ac

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ AM_CONDITIONAL([CPU_PPC64LE], [test "$CPU" = "ppc64le"])
3838
AM_CONDITIONAL([CPU_RISCV64], [test "$CPU" = "riscv64"])
3939
AM_CONDITIONAL([CPU_UNDEFINED], [test "x$CPU" = "x"])
4040
AM_CONDITIONAL([HAVE_RVV], [false])
41+
AM_CONDITIONAL([HAVE_ZICBOP], [false])
4142

4243
# Check for programs
4344
AC_PROG_CC_STDC
@@ -70,9 +71,28 @@ case "${CPU}" in
7071
[AC_DEFINE([HAVE_RVV], [0], [Disable RVV instructions])
7172
AM_CONDITIONAL([HAVE_RVV], [false]) rvv=no]
7273
)
74+
AC_MSG_CHECKING([Zicbop support])
75+
AC_COMPILE_IFELSE(
76+
[AC_LANG_PROGRAM([], [
77+
__asm__ volatile(
78+
".option arch, +zicbop\n"
79+
".insn i 0x0F, 0, x0, x0, 0x010" ::: "memory"
80+
);
81+
])],
82+
[AC_DEFINE([HAVE_ZICBOP], [1], [Enable Zicbop instructions])
83+
AM_CONDITIONAL([HAVE_ZICBOP], [true]) zicbop=yes],
84+
[AC_DEFINE([HAVE_ZICBOP], [0], [Disable Zicbop instructions])
85+
AM_CONDITIONAL([HAVE_ZICBOP], [false]) zicbop=no]
86+
)
87+
AC_MSG_RESULT([$zicbop])
7388
if test "x$rvv" = "xyes"; then
74-
CFLAGS+=" -march=rv64gcv"
75-
CCASFLAGS+=" -march=rv64gcv"
89+
if test "x$zicbop" = "xyes"; then
90+
CFLAGS+=" -march=rv64gcv_zicbop"
91+
CCASFLAGS+=" -march=rv64gcv_zicbop"
92+
else
93+
CFLAGS+=" -march=rv64gcv"
94+
CCASFLAGS+=" -march=rv64gcv"
95+
fi
7696
fi
7797
AC_MSG_RESULT([$rvv])
7898
;;

erasure_code/riscv64/gf_2vect_dot_prod_rvv.S

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,9 @@ gf_2vect_dot_prod_rvv:
7575
blt x_len, t6, .return_fail
7676

7777
vsetvli a5, x0, e8, m1 /* Set vector length to maximum */
78-
7978
li x_pos, 0
79+
slli x_vec, x_vec, 3
80+
8081
ld x_dest1, 0(x_dest)
8182
ld x_dest2, 8(x_dest)
8283

@@ -92,15 +93,12 @@ gf_2vect_dot_prod_rvv:
9293

9394
/* gf_tbl base = (x_tbl + dest_idx * x_vec * 32) */
9495
mv x_tbl1, x_tbl /* reset x_tbl1 */
95-
slli t6, x_vec, 5
96+
slli t6, x_vec, 2
9697
add x_tbl2, x_tbl1, t6 /* reset x_tbl2 */
9798

9899
/* Loop 2: x_vec, number of source vectors (ie. data blocks) */
99100
.Llooprvv_vl_vects:
100101
/* load src data */
101-
slli a6, x_vec_i, 3
102-
add a6,x_src,a6
103-
ld x_ptr, 0(a6)
104102
add x_ptr,x_ptr,x_pos
105103

106104
vle8.v v_src, (x_ptr) /* load from: src base + pos offset */
@@ -120,6 +118,14 @@ gf_2vect_dot_prod_rvv:
120118
vle8.v v_gft2_hi, (x_tbl2)
121119
addi x_tbl2, x_tbl2, 16
122120

121+
prefetch.r 0(x_tbl1)
122+
prefetch.r 0(x_tbl2)
123+
124+
/* calc for next */
125+
addi x_vec_i, x_vec_i, 8 /* move x_vec_i to next */
126+
add a6,x_src,x_vec_i
127+
ld x_ptr, 0(a6)
128+
123129
/* dest 1 */
124130
/* table indexing, ie. gf(2^8) multiplication */
125131
vrgather.vv v26, v_gft1_lo, v_src_lo
@@ -134,8 +140,6 @@ gf_2vect_dot_prod_rvv:
134140
vxor.vv v_dest2, v_dest2, v26
135141
vxor.vv v_dest2, v_dest2, v27
136142

137-
/* calc for next */
138-
addi x_vec_i, x_vec_i, 1 /* move x_vec_i to next */
139143
blt x_vec_i, x_vec, .Llooprvv_vl_vects
140144
/* end of Loop 2 */
141145

erasure_code/riscv64/gf_3vect_dot_prod_rvv.S

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -85,9 +85,10 @@ gf_3vect_dot_prod_rvv:
8585
sd s1, 8(sp)
8686

8787
vsetvli a7, x0, e8, m1 /* Set vector length to maximum */
88-
8988
li x_pos, 0
90-
slli t_offset, x_vec, 5
89+
slli x_vec, x_vec, 3
90+
91+
slli t_offset, x_vec, 2
9192
ld x_dest1, 0(x_dest)
9293
ld x_dest2, 8(x_dest)
9394
ld x_dest3, 16(x_dest)
@@ -101,20 +102,19 @@ gf_3vect_dot_prod_rvv:
101102
vmv.v.i v_dest2, 0
102103
vmv.v.i v_dest3, 0
103104

105+
/* Loop 2: x_vec, number of source vectors (ie. data blocks) */
106+
li x_vec_i, 0
107+
/* load source pointer */
108+
ld x_ptr, 0(x_src)
109+
104110
/* Reset table pointers */
105111
mv x_tbl1, x_tbl
106112
add x_tbl2, x_tbl1, t_offset
107113
add x_tbl3, x_tbl2, t_offset
108114

109-
/* Loop 2: x_vec, number of source vectors (ie. data blocks) */
110-
li x_vec_i, 0
111115
.Lloop_rvv_vl_vects:
112-
/* Load source data */
113-
slli t0, x_vec_i, 3
114-
add t0,x_src,t0
115-
ld x_ptr, 0(t0)
116+
/* load source data */
116117
add x_ptr,x_ptr,x_pos
117-
118118
vle8.v v_src, (x_ptr)
119119

120120
/* Split 4-bit lo; 4-bit hi */
@@ -131,14 +131,22 @@ gf_3vect_dot_prod_rvv:
131131
vle8.v v_gft2_hi, (x_tbl2)
132132
addi x_tbl2, x_tbl2, 16
133133

134+
/* Move to next source vector */
135+
addi x_vec_i, x_vec_i, 8
136+
add t0,x_src,x_vec_i
137+
ld x_ptr, 0(t0)
138+
139+
prefetch.r 0(x_tbl1)
140+
prefetch.r 0(x_tbl2)
134141

135142
/* Load next gf_table's */
136143
vle8.v v_gft3_lo, (x_tbl3)
137144
addi x_tbl3, x_tbl3, 16
138145
vle8.v v_gft3_hi, (x_tbl3)
139146
addi x_tbl3, x_tbl3, 16
147+
prefetch.r 0(x_tbl3)
140148

141-
/* dest 1 */
149+
/* dest 1 */
142150
vrgather.vv v26, v_gft1_lo, v_src_lo
143151
vrgather.vv v27, v_gft1_hi, v_src_hi
144152
vxor.vv v_dest1, v_dest1, v26
@@ -156,9 +164,6 @@ gf_3vect_dot_prod_rvv:
156164
vxor.vv v_dest3, v_dest3, v26
157165
vxor.vv v_dest3, v_dest3, v27
158166

159-
/* Move to next source vector */
160-
addi x_vec_i, x_vec_i, 1
161-
162167
/* Check if we have processed all vectors */
163168
blt x_vec_i, x_vec, .Lloop_rvv_vl_vects
164169

erasure_code/riscv64/gf_4vect_dot_prod_rvv.S

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -91,10 +91,13 @@ gf_4vect_dot_prod_rvv:
9191
sd s2, 16(sp)
9292
sd s3, 24(sp)
9393

94-
vsetvli t0, x0, e8, m1 /* Set vector length to maximum */
95-
94+
# vsetvli t0, x0, e8, m1 /* Set vector length to maximum */
95+
vsetvli t0, x0, e8, m1
9696
li x_pos, 0
97-
slli t_offset, x_vec, 5
97+
98+
slli x_vec, x_vec, 3
99+
slli t_offset, x_vec, 2
100+
98101
ld x_dest1, 0(x_dest)
99102
ld x_dest2, 8(x_dest)
100103
ld x_dest3, 16(x_dest)
@@ -111,19 +114,20 @@ gf_4vect_dot_prod_rvv:
111114
vmv.v.i v_dest3, 0
112115
vmv.v.i v_dest4, 0
113116

117+
/* x_vec, number of source vectors (ie. data blocks) */
118+
li x_vec_i, 0
119+
120+
/* load source pointer */
121+
ld x_ptr, 0(x_src)
122+
114123
/* Reset table pointers */
115124
mv x_tbl1, x_tbl
116125
add x_tbl2, x_tbl1, t_offset
117126
add x_tbl3, x_tbl2, t_offset
118127
add x_tbl4, x_tbl3, t_offset
119128

120-
/* Loop 2: x_vec, number of source vectors (ie. data blocks) */
121-
li x_vec_i, 0
122129
.Lloop_rvv_vl_vects:
123130
/* Load source data */
124-
slli a6, x_vec_i, 3
125-
add a6,x_src,a6
126-
ld x_ptr, 0(a6)
127131
add x_ptr,x_ptr,x_pos
128132

129133
vle8.v v_src, (x_ptr)
@@ -142,6 +146,13 @@ gf_4vect_dot_prod_rvv:
142146
vle8.v v_gft2_hi, (x_tbl2)
143147
addi x_tbl2, x_tbl2, 16
144148

149+
prefetch.r 0(x_tbl1)
150+
prefetch.r 0(x_tbl2)
151+
152+
/* Move to next source vector */
153+
addi x_vec_i, x_vec_i, 8
154+
add a6,x_src,x_vec_i
155+
ld x_ptr, 0(a6)
145156

146157
/* Load next gf_table's */
147158
vle8.v v_gft3_lo, (x_tbl3)
@@ -153,6 +164,8 @@ gf_4vect_dot_prod_rvv:
153164
addi x_tbl4, x_tbl4, 16
154165
vle8.v v_gft4_hi, (x_tbl4)
155166
addi x_tbl4, x_tbl4, 16
167+
prefetch.r 0(x_tbl3)
168+
prefetch.r 0(x_tbl4)
156169

157170
/* dest 1 */
158171
vrgather.vv v26, v_gft1_lo, v_src_lo
@@ -178,9 +191,6 @@ gf_4vect_dot_prod_rvv:
178191
vxor.vv v_dest4, v_dest4, v26
179192
vxor.vv v_dest4, v_dest4, v27
180193

181-
/* Move to next source vector */
182-
addi x_vec_i, x_vec_i, 1
183-
184194
/* Check if we have processed all vectors */
185195
blt x_vec_i, x_vec, .Lloop_rvv_vl_vects
186196

@@ -198,7 +208,7 @@ gf_4vect_dot_prod_rvv:
198208
j .Lloop_rvv_vl
199209

200210
.return_pass:
201-
/* restore callee-saved registers */
211+
/* restore callee-saved registers */
202212
ld s0, 0(sp)
203213
ld s1, 8(sp)
204214
ld s2, 16(sp)

erasure_code/riscv64/gf_5vect_dot_prod_rvv.S

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,6 @@ gf_5vect_dot_prod_rvv:
9696
sd s5, 40(sp)
9797

9898
vsetvli a5, x0, e8, m1
99-
10099
/* Initialize position */
101100
li x_pos, 0
102101

@@ -151,6 +150,9 @@ gf_5vect_dot_prod_rvv:
151150
vle8.v v_gft2_hi, (x_tbl2)
152151
addi x_tbl2, x_tbl2, 16
153152

153+
prefetch.r 0(x_tbl1)
154+
prefetch.r 0(x_tbl2)
155+
154156
/* Move to next source vector */
155157
addi x_vec_i, x_vec_i, 1
156158

@@ -171,6 +173,9 @@ gf_5vect_dot_prod_rvv:
171173
vle8.v v_gft4_hi, (x_tbl4)
172174
addi x_tbl4, x_tbl4, 16
173175

176+
prefetch.r 0(x_tbl3)
177+
prefetch.r 0(x_tbl4)
178+
174179
/* dest 2 */
175180
vrgather.vv v26, v_gft2_lo, v_src_lo
176181
vrgather.vv v27, v_gft2_hi, v_src_hi
@@ -188,6 +193,7 @@ gf_5vect_dot_prod_rvv:
188193
addi x_tbl5, x_tbl5, 16
189194
vle8.v v_gft5_hi, (x_tbl5)
190195
addi x_tbl5, x_tbl5, 16
196+
prefetch.r 0(x_tbl5)
191197

192198
/* dest 4 */
193199
vrgather.vv v26, v_gft4_lo, v_src_lo

erasure_code/riscv64/gf_6vect_dot_prod_rvv.S

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,8 @@ gf_6vect_dot_prod_rvv:
107107
/* initialize position */
108108
li x_pos, 0
109109

110+
slli x_vec, x_vec, 3
111+
110112
/* load destination pointers */
111113
ld x_dest1, 0(x14) # a4 is also x14
112114
ld x_dest2, 8(x_dest)
@@ -136,7 +138,7 @@ gf_6vect_dot_prod_rvv:
136138
/* initialize table pointers */
137139
/* gf_tbl base = (x_tbl + dest_idx * x_vec * 32) */
138140
mv x_tbl1, x_tbl
139-
slli t0, x_vec, 5
141+
slli t0, x_vec, 2
140142
add x_tbl2, x_tbl1, t0
141143
add x_tbl3, x_tbl2, t0
142144
add x_tbl4, x_tbl3, t0
@@ -145,11 +147,7 @@ gf_6vect_dot_prod_rvv:
145147

146148
.Llooprvv_vl_vects:
147149
/* load source data */
148-
slli a6, x_vec_i, 3
149-
add a6,x_src,a6
150-
ld x_ptr, 0(a6)
151150
add x_ptr,x_ptr,x_pos
152-
153151
vle8.v v_src, (x_ptr)
154152

155153

@@ -167,6 +165,13 @@ gf_6vect_dot_prod_rvv:
167165
addi x_tbl2, x_tbl2, 16
168166
vle8.v v_gft2_hi, (x_tbl2)
169167
addi x_tbl2, x_tbl2, 16
168+
prefetch.r 0(x_tbl1)
169+
prefetch.r 0(x_tbl2)
170+
171+
/* load next source pointer */
172+
addi x_vec_i, x_vec_i,8
173+
add a6,x_src,x_vec_i
174+
ld x_ptr, 0(a6)
170175

171176
vle8.v v_gft3_lo, (x_tbl3)
172177
addi x_tbl3, x_tbl3, 16
@@ -178,6 +183,9 @@ gf_6vect_dot_prod_rvv:
178183
vle8.v v_gft4_hi, (x_tbl4)
179184
addi x_tbl4, x_tbl4, 16
180185

186+
prefetch.r 0(x_tbl3)
187+
prefetch.r 0(x_tbl4)
188+
181189
vle8.v v_gft5_lo, (x_tbl5)
182190
addi x_tbl5, x_tbl5, 16
183191
vle8.v v_gft5_hi, (x_tbl5)
@@ -188,6 +196,8 @@ gf_6vect_dot_prod_rvv:
188196
vle8.v v_gft6_hi, (x_tbl6)
189197
addi x_tbl6, x_tbl6, 16
190198

199+
prefetch.r 0(x_tbl5)
200+
prefetch.r 0(x_tbl6)
191201

192202
/* dest 1 */
193203
vrgather.vv v26, v_gft1_lo, v_src_lo
@@ -225,10 +235,6 @@ gf_6vect_dot_prod_rvv:
225235
vxor.vv v_dest6, v_dest6, v26
226236
vxor.vv v_dest6, v_dest6, v27
227237

228-
229-
/* load next source pointer */
230-
addi x_vec_i, x_vec_i,1
231-
232238
/* check if we have processed all vectors */
233239
blt x_vec_i, x_vec, .Llooprvv_vl_vects
234240

erasure_code/riscv64/gf_7vect_dot_prod_rvv.S

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,6 @@ gf_7vect_dot_prod_rvv:
113113
sd s8, 64(sp)
114114

115115
vsetvli t0, x0, e8, m1
116-
117116
/* initialize position */
118117
li x_pos, 0
119118

@@ -181,6 +180,9 @@ gf_7vect_dot_prod_rvv:
181180
vle8.v v_gft2_hi, (x_tbl2)
182181
addi x_tbl2, x_tbl2, 16
183182

183+
prefetch.r 0(x_tbl1)
184+
prefetch.r 0(x_tbl2)
185+
184186
vle8.v v_gft3_lo, (x_tbl3)
185187
addi x_tbl3, x_tbl3, 16
186188
vle8.v v_gft3_hi, (x_tbl3)
@@ -191,6 +193,8 @@ gf_7vect_dot_prod_rvv:
191193
vle8.v v_gft4_hi, (x_tbl4)
192194
addi x_tbl4, x_tbl4, 16
193195

196+
prefetch.r 0(x_tbl3)
197+
prefetch.r 0(x_tbl4)
194198
vle8.v v_gft5_lo, (x_tbl5)
195199
addi x_tbl5, x_tbl5, 16
196200
vle8.v v_gft5_hi, (x_tbl5)
@@ -205,7 +209,9 @@ gf_7vect_dot_prod_rvv:
205209
addi x_tbl7, x_tbl7, 16
206210
vle8.v v_gft7_hi, (x_tbl7)
207211
addi x_tbl7, x_tbl7, 16
208-
212+
prefetch.r 0(x_tbl5)
213+
prefetch.r 0(x_tbl6)
214+
prefetch.r 0(x_tbl7)
209215

210216
/* dest 1 */
211217
vrgather.vv v26, v_gft1_lo, v_src_lo

0 commit comments

Comments
 (0)