Skip to content

Commit be89c37

Browse files
committed
erasure_code: add optimization implementation
Signed-off-by: Shuo Lv <[email protected]>
1 parent 5e90721 commit be89c37

8 files changed

+50
-48
lines changed

configure.ac

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,9 +70,10 @@ case "${CPU}" in
7070
[AC_DEFINE([HAVE_RVV], [0], [Disable RVV instructions])
7171
AM_CONDITIONAL([HAVE_RVV], [false]) rvv=no]
7272
)
73+
AC_MSG_RESULT([$zicbop])
7374
if test "x$rvv" = "xyes"; then
74-
CFLAGS+=" -march=rv64gcv"
75-
CCASFLAGS+=" -march=rv64gcv"
75+
CFLAGS+=" -march=rv64gcv"
76+
CCASFLAGS+=" -march=rv64gcv"
7677
fi
7778
AC_MSG_RESULT([$rvv])
7879
;;

erasure_code/riscv64/gf_2vect_dot_prod_rvv.S

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,9 @@ gf_2vect_dot_prod_rvv:
7575
blt x_len, t6, .return_fail
7676

7777
vsetvli a5, x0, e8, m1 /* Set vector length to maximum */
78-
7978
li x_pos, 0
79+
slli x_vec, x_vec, 3
80+
8081
ld x_dest1, 0(x_dest)
8182
ld x_dest2, 8(x_dest)
8283

@@ -92,15 +93,12 @@ gf_2vect_dot_prod_rvv:
9293

9394
/* gf_tbl base = (x_tbl + dest_idx * x_vec * 32) */
9495
mv x_tbl1, x_tbl /* reset x_tbl1 */
95-
slli t6, x_vec, 5
96+
slli t6, x_vec, 2
9697
add x_tbl2, x_tbl1, t6 /* reset x_tbl2 */
9798

9899
/* Loop 2: x_vec, number of source vectors (ie. data blocks) */
99100
.Llooprvv_vl_vects:
100101
/* load src data */
101-
slli a6, x_vec_i, 3
102-
add a6,x_src,a6
103-
ld x_ptr, 0(a6)
104102
add x_ptr,x_ptr,x_pos
105103

106104
vle8.v v_src, (x_ptr) /* load from: src base + pos offset */
@@ -120,6 +118,11 @@ gf_2vect_dot_prod_rvv:
120118
vle8.v v_gft2_hi, (x_tbl2)
121119
addi x_tbl2, x_tbl2, 16
122120

121+
/* calc for next */
122+
addi x_vec_i, x_vec_i, 8 /* move x_vec_i to next */
123+
add a6,x_src,x_vec_i
124+
ld x_ptr, 0(a6)
125+
123126
/* dest 1 */
124127
/* table indexing, ie. gf(2^8) multiplication */
125128
vrgather.vv v26, v_gft1_lo, v_src_lo
@@ -134,8 +137,6 @@ gf_2vect_dot_prod_rvv:
134137
vxor.vv v_dest2, v_dest2, v26
135138
vxor.vv v_dest2, v_dest2, v27
136139

137-
/* calc for next */
138-
addi x_vec_i, x_vec_i, 1 /* move x_vec_i to next */
139140
blt x_vec_i, x_vec, .Llooprvv_vl_vects
140141
/* end of Loop 2 */
141142

erasure_code/riscv64/gf_3vect_dot_prod_rvv.S

Lines changed: 14 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -85,9 +85,10 @@ gf_3vect_dot_prod_rvv:
8585
sd s1, 8(sp)
8686

8787
vsetvli a7, x0, e8, m1 /* Set vector length to maximum */
88-
8988
li x_pos, 0
90-
slli t_offset, x_vec, 5
89+
slli x_vec, x_vec, 3
90+
91+
slli t_offset, x_vec, 2
9192
ld x_dest1, 0(x_dest)
9293
ld x_dest2, 8(x_dest)
9394
ld x_dest3, 16(x_dest)
@@ -101,20 +102,19 @@ gf_3vect_dot_prod_rvv:
101102
vmv.v.i v_dest2, 0
102103
vmv.v.i v_dest3, 0
103104

105+
/* Loop 2: x_vec, number of source vectors (ie. data blocks) */
106+
li x_vec_i, 0
107+
/* load source pointer */
108+
ld x_ptr, 0(x_src)
109+
104110
/* Reset table pointers */
105111
mv x_tbl1, x_tbl
106112
add x_tbl2, x_tbl1, t_offset
107113
add x_tbl3, x_tbl2, t_offset
108114

109-
/* Loop 2: x_vec, number of source vectors (ie. data blocks) */
110-
li x_vec_i, 0
111115
.Lloop_rvv_vl_vects:
112-
/* Load source data */
113-
slli t0, x_vec_i, 3
114-
add t0,x_src,t0
115-
ld x_ptr, 0(t0)
116+
/* load source data */
116117
add x_ptr,x_ptr,x_pos
117-
118118
vle8.v v_src, (x_ptr)
119119

120120
/* Split 4-bit lo; 4-bit hi */
@@ -131,14 +131,18 @@ gf_3vect_dot_prod_rvv:
131131
vle8.v v_gft2_hi, (x_tbl2)
132132
addi x_tbl2, x_tbl2, 16
133133

134+
/* Move to next source vector */
135+
addi x_vec_i, x_vec_i, 8
136+
add t0,x_src,x_vec_i
137+
ld x_ptr, 0(t0)
134138

135139
/* Load next gf_table's */
136140
vle8.v v_gft3_lo, (x_tbl3)
137141
addi x_tbl3, x_tbl3, 16
138142
vle8.v v_gft3_hi, (x_tbl3)
139143
addi x_tbl3, x_tbl3, 16
140144

141-
/* dest 1 */
145+
/* dest 1 */
142146
vrgather.vv v26, v_gft1_lo, v_src_lo
143147
vrgather.vv v27, v_gft1_hi, v_src_hi
144148
vxor.vv v_dest1, v_dest1, v26
@@ -156,9 +160,6 @@ gf_3vect_dot_prod_rvv:
156160
vxor.vv v_dest3, v_dest3, v26
157161
vxor.vv v_dest3, v_dest3, v27
158162

159-
/* Move to next source vector */
160-
addi x_vec_i, x_vec_i, 1
161-
162163
/* Check if we have processed all vectors */
163164
blt x_vec_i, x_vec, .Lloop_rvv_vl_vects
164165

erasure_code/riscv64/gf_4vect_dot_prod_rvv.S

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -91,10 +91,13 @@ gf_4vect_dot_prod_rvv:
9191
sd s2, 16(sp)
9292
sd s3, 24(sp)
9393

94-
vsetvli t0, x0, e8, m1 /* Set vector length to maximum */
95-
94+
# vsetvli t0, x0, e8, m1 /* Set vector length to maximum */
95+
vsetvli t0, x0, e8, m1
9696
li x_pos, 0
97-
slli t_offset, x_vec, 5
97+
98+
slli x_vec, x_vec, 3
99+
slli t_offset, x_vec, 2
100+
98101
ld x_dest1, 0(x_dest)
99102
ld x_dest2, 8(x_dest)
100103
ld x_dest3, 16(x_dest)
@@ -111,19 +114,20 @@ gf_4vect_dot_prod_rvv:
111114
vmv.v.i v_dest3, 0
112115
vmv.v.i v_dest4, 0
113116

117+
/* x_vec, number of source vectors (ie. data blocks) */
118+
li x_vec_i, 0
119+
120+
/* load source pointer */
121+
ld x_ptr, 0(x_src)
122+
114123
/* Reset table pointers */
115124
mv x_tbl1, x_tbl
116125
add x_tbl2, x_tbl1, t_offset
117126
add x_tbl3, x_tbl2, t_offset
118127
add x_tbl4, x_tbl3, t_offset
119128

120-
/* Loop 2: x_vec, number of source vectors (ie. data blocks) */
121-
li x_vec_i, 0
122129
.Lloop_rvv_vl_vects:
123130
/* Load source data */
124-
slli a6, x_vec_i, 3
125-
add a6,x_src,a6
126-
ld x_ptr, 0(a6)
127131
add x_ptr,x_ptr,x_pos
128132

129133
vle8.v v_src, (x_ptr)
@@ -142,6 +146,10 @@ gf_4vect_dot_prod_rvv:
142146
vle8.v v_gft2_hi, (x_tbl2)
143147
addi x_tbl2, x_tbl2, 16
144148

149+
/* Move to next source vector */
150+
addi x_vec_i, x_vec_i, 8
151+
add a6,x_src,x_vec_i
152+
ld x_ptr, 0(a6)
145153

146154
/* Load next gf_table's */
147155
vle8.v v_gft3_lo, (x_tbl3)
@@ -178,9 +186,6 @@ gf_4vect_dot_prod_rvv:
178186
vxor.vv v_dest4, v_dest4, v26
179187
vxor.vv v_dest4, v_dest4, v27
180188

181-
/* Move to next source vector */
182-
addi x_vec_i, x_vec_i, 1
183-
184189
/* Check if we have processed all vectors */
185190
blt x_vec_i, x_vec, .Lloop_rvv_vl_vects
186191

@@ -198,7 +203,7 @@ gf_4vect_dot_prod_rvv:
198203
j .Lloop_rvv_vl
199204

200205
.return_pass:
201-
/* restore callee-saved registers */
206+
/* restore callee-saved registers */
202207
ld s0, 0(sp)
203208
ld s1, 8(sp)
204209
ld s2, 16(sp)

erasure_code/riscv64/gf_5vect_dot_prod_rvv.S

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,6 @@ gf_5vect_dot_prod_rvv:
9696
sd s5, 40(sp)
9797

9898
vsetvli a5, x0, e8, m1
99-
10099
/* Initialize position */
101100
li x_pos, 0
102101

erasure_code/riscv64/gf_6vect_dot_prod_rvv.S

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,8 @@ gf_6vect_dot_prod_rvv:
107107
/* initialize position */
108108
li x_pos, 0
109109

110+
slli x_vec, x_vec, 3
111+
110112
/* load destination pointers */
111113
ld x_dest1, 0(x14) # a4 is also x14
112114
ld x_dest2, 8(x_dest)
@@ -136,7 +138,7 @@ gf_6vect_dot_prod_rvv:
136138
/* initialize table pointers */
137139
/* gf_tbl base = (x_tbl + dest_idx * x_vec * 32) */
138140
mv x_tbl1, x_tbl
139-
slli t0, x_vec, 5
141+
slli t0, x_vec, 2
140142
add x_tbl2, x_tbl1, t0
141143
add x_tbl3, x_tbl2, t0
142144
add x_tbl4, x_tbl3, t0
@@ -145,11 +147,7 @@ gf_6vect_dot_prod_rvv:
145147

146148
.Llooprvv_vl_vects:
147149
/* load source data */
148-
slli a6, x_vec_i, 3
149-
add a6,x_src,a6
150-
ld x_ptr, 0(a6)
151150
add x_ptr,x_ptr,x_pos
152-
153151
vle8.v v_src, (x_ptr)
154152

155153

@@ -168,6 +166,11 @@ gf_6vect_dot_prod_rvv:
168166
vle8.v v_gft2_hi, (x_tbl2)
169167
addi x_tbl2, x_tbl2, 16
170168

169+
/* load next source pointer */
170+
addi x_vec_i, x_vec_i,8
171+
add a6,x_src,x_vec_i
172+
ld x_ptr, 0(a6)
173+
171174
vle8.v v_gft3_lo, (x_tbl3)
172175
addi x_tbl3, x_tbl3, 16
173176
vle8.v v_gft3_hi, (x_tbl3)
@@ -188,7 +191,6 @@ gf_6vect_dot_prod_rvv:
188191
vle8.v v_gft6_hi, (x_tbl6)
189192
addi x_tbl6, x_tbl6, 16
190193

191-
192194
/* dest 1 */
193195
vrgather.vv v26, v_gft1_lo, v_src_lo
194196
vrgather.vv v27, v_gft1_hi, v_src_hi
@@ -225,10 +227,6 @@ gf_6vect_dot_prod_rvv:
225227
vxor.vv v_dest6, v_dest6, v26
226228
vxor.vv v_dest6, v_dest6, v27
227229

228-
229-
/* load next source pointer */
230-
addi x_vec_i, x_vec_i,1
231-
232230
/* check if we have processed all vectors */
233231
blt x_vec_i, x_vec, .Llooprvv_vl_vects
234232

erasure_code/riscv64/gf_7vect_dot_prod_rvv.S

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,6 @@ gf_7vect_dot_prod_rvv:
113113
sd s8, 64(sp)
114114

115115
vsetvli t0, x0, e8, m1
116-
117116
/* initialize position */
118117
li x_pos, 0
119118

@@ -206,7 +205,6 @@ gf_7vect_dot_prod_rvv:
206205
vle8.v v_gft7_hi, (x_tbl7)
207206
addi x_tbl7, x_tbl7, 16
208207

209-
210208
/* dest 1 */
211209
vrgather.vv v26, v_gft1_lo, v_src_lo
212210
vrgather.vv v27, v_gft1_hi, v_src_hi

erasure_code/riscv64/gf_vect_dot_prod_rvv.S

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,6 @@ gf_vect_dot_prod_rvv:
6363
blt a0, t4, .return_fail
6464

6565
vsetvli t5, zero, e8, m1 # Set vector length to maximum
66-
6766
# Initialize pos = 0
6867
li t2, 0
6968

0 commit comments

Comments
 (0)