@@ -102,7 +102,7 @@ gf_6vect_dot_prod_rvv:
102
102
sd s7, 56 (sp)
103
103
104
104
li t0, 0x0F
105
- vsetvli a5, x0, e8, m1
105
+ vsetvli a5, x0, e8, m1,ta,ma
106
106
107
107
/* initialize position */
108
108
li x_pos, 0
@@ -167,6 +167,14 @@ gf_6vect_dot_prod_rvv:
167
167
addi x_tbl2, x_tbl2, 16
168
168
vle8.v v_gft2_hi, (x_tbl2)
169
169
addi x_tbl2, x_tbl2, 16
170
+ prefetch .r 0 (x_tbl1)
171
+ prefetch .r 0 (x_tbl2)
172
+
173
+ /* dest 1 */
174
+ vrgather.vv v26, v_gft1_lo, v_src_lo
175
+ vrgather.vv v27, v_gft1_hi, v_src_hi
176
+ vxor.vv v_dest1, v_dest1, v26
177
+ vxor.vv v_dest1, v_dest1, v27
170
178
171
179
vle8.v v_gft3_lo, (x_tbl3)
172
180
addi x_tbl3, x_tbl3, 16
@@ -178,6 +186,21 @@ gf_6vect_dot_prod_rvv:
178
186
vle8.v v_gft4_hi, (x_tbl4)
179
187
addi x_tbl4, x_tbl4, 16
180
188
189
+ prefetch .r 0 (x_tbl3)
190
+ prefetch .r 0 (x_tbl4)
191
+
192
+ /* dest 2 */
193
+ vrgather.vv v26, v_gft2_lo, v_src_lo
194
+ vrgather.vv v27, v_gft2_hi, v_src_hi
195
+ vxor.vv v_dest2, v_dest2, v26
196
+ vxor.vv v_dest2, v_dest2, v27
197
+
198
+ /* GF multiplication and accumulation for dest3 */
199
+ vrgather.vv v26, v_gft3_lo, v_src_lo
200
+ vrgather.vv v27, v_gft3_hi, v_src_hi
201
+ vxor.vv v_dest3, v_dest3, v26
202
+ vxor.vv v_dest3, v_dest3, v27
203
+
181
204
vle8.v v_gft5_lo, (x_tbl5)
182
205
addi x_tbl5, x_tbl5, 16
183
206
vle8.v v_gft5_hi, (x_tbl5)
@@ -188,25 +211,27 @@ gf_6vect_dot_prod_rvv:
188
211
vle8.v v_gft6_hi, (x_tbl6)
189
212
addi x_tbl6, x_tbl6, 16
190
213
214
+ prefetch .r 0 (x_tbl5)
215
+ prefetch .r 0 (x_tbl6)
191
216
192
217
/* dest 1 */
193
- vrgather.vv v26, v_gft1_lo, v_src_lo
218
+ /* vrgather.vv v26, v_gft1_lo, v_src_lo
194
219
vrgather.vv v27, v_gft1_hi, v_src_hi
195
220
vxor.vv v_dest1, v_dest1, v26
196
221
vxor.vv v_dest1, v_dest1, v27
197
-
222
+ */
198
223
/* dest 2 */
199
- vrgather.vv v26, v_gft2_lo, v_src_lo
224
+ /* vrgather.vv v26, v_gft2_lo, v_src_lo
200
225
vrgather.vv v27, v_gft2_hi, v_src_hi
201
226
vxor.vv v_dest2, v_dest2, v26
202
227
vxor.vv v_dest2, v_dest2, v27
203
-
228
+ */
204
229
/* GF multiplication and accumulation for dest3 */
205
- vrgather.vv v26, v_gft3_lo, v_src_lo
230
+ /* vrgather.vv v26, v_gft3_lo, v_src_lo
206
231
vrgather.vv v27, v_gft3_hi, v_src_hi
207
232
vxor.vv v_dest3, v_dest3, v26
208
233
vxor.vv v_dest3, v_dest3, v27
209
-
234
+ */
210
235
/* GF multiplication and accumulation for dest4 */
211
236
vrgather.vv v26, v_gft4_lo, v_src_lo
212
237
vrgather.vv v27, v_gft4_hi, v_src_hi
0 commit comments