Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,8 @@ case "${CPU}" in
AM_CONDITIONAL([HAVE_RVV], [false]) rvv=no]
)
if test "x$rvv" = "xyes"; then
CFLAGS+=" -march=rv64gcv"
CCASFLAGS+=" -march=rv64gcv"
CFLAGS+=" -march=rv64gcv"
CCASFLAGS+=" -march=rv64gcv"
fi
AC_MSG_RESULT([$rvv])
;;
Expand Down
23 changes: 12 additions & 11 deletions erasure_code/riscv64/gf_2vect_dot_prod_rvv.S
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,10 @@ gf_2vect_dot_prod_rvv:
li t6, 16
blt x_len, t6, .return_fail

vsetvli a5, x0, e8, m1 /* Set vector length to maximum */

vsetvli a5, x0, e8, m1, ta, ma /* Set vector length to maximum */
li x_pos, 0
slli x_vec, x_vec, 3

ld x_dest1, 0(x_dest)
ld x_dest2, 8(x_dest)

Expand All @@ -92,16 +93,13 @@ gf_2vect_dot_prod_rvv:

/* gf_tbl base = (x_tbl + dest_idx * x_vec * 32) */
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should this 32 be 4 now, since you are shifting left 2 instead of 5 bits?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Totally it need shift left 5, slli x_vec, x_vec, 3 + slli t6, x_vec, 2. You are right, here 32 in comments should be 4, we could add extra comments.

mv x_tbl1, x_tbl /* reset x_tbl1 */
slli t6, x_vec, 5
slli t6, x_vec, 2
add x_tbl2, x_tbl1, t6 /* reset x_tbl2 */

/* Loop 2: x_vec, number of source vectors (ie. data blocks) */
.Llooprvv_vl_vects:
/* load src data */
slli a6, x_vec_i, 3
add a6,x_src,a6
ld x_ptr, 0(a6)
add x_ptr,x_ptr,x_pos
add x_ptr, x_ptr, x_pos

vle8.v v_src, (x_ptr) /* load from: src base + pos offset */
/* split 4-bit lo; 4-bit hi */
Expand All @@ -120,6 +118,11 @@ gf_2vect_dot_prod_rvv:
vle8.v v_gft2_hi, (x_tbl2)
addi x_tbl2, x_tbl2, 16

/* calc for next */
addi x_vec_i, x_vec_i, 8 /* move x_vec_i to next */
add a6, x_src, x_vec_i
ld x_ptr, 0(a6)

/* dest 1 */
/* table indexing, ie. gf(2^8) multiplication */
vrgather.vv v26, v_gft1_lo, v_src_lo
Expand All @@ -134,16 +137,14 @@ gf_2vect_dot_prod_rvv:
vxor.vv v_dest2, v_dest2, v26
vxor.vv v_dest2, v_dest2, v27

/* calc for next */
addi x_vec_i, x_vec_i, 1 /* move x_vec_i to next */
blt x_vec_i, x_vec, .Llooprvv_vl_vects
/* end of Loop 2 */

/* store dest data */
vse8.v v_dest1, (x_dest1)
vse8.v v_dest2, (x_dest2)
add x_dest1,x_dest1,a5
add x_dest2,x_dest2,a5
add x_dest1, x_dest1, a5
add x_dest2, x_dest2, a5

/* increment one vector length */
add x_pos, x_pos, a5
Expand Down
2 changes: 1 addition & 1 deletion erasure_code/riscv64/gf_2vect_mad_rvv.S
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ gf_2vect_mad_rvv:
li t3, 16
blt x_len, t3, .return_fail

vsetvli t4, x0, e8, m1
vsetvli t4, x0, e8, m1, ta, ma

/* load table 1 */
slli t3, x_vec_i, 5
Expand Down
38 changes: 19 additions & 19 deletions erasure_code/riscv64/gf_3vect_dot_prod_rvv.S
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@
#define x_dest3 a5
#define t_offset a6


/* vectors */
#define v_src v1
#define v_src_lo v2
Expand All @@ -84,10 +83,11 @@ gf_3vect_dot_prod_rvv:
sd s0, 0(sp)
sd s1, 8(sp)

vsetvli a7, x0, e8, m1 /* Set vector length to maximum */

vsetvli a7, x0, e8, m1, ta, ma /* Set vector length to maximum */
li x_pos, 0
slli t_offset, x_vec, 5
slli x_vec, x_vec, 3

slli t_offset, x_vec, 2
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The commit message should include the reason for changing the implementation.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated.

ld x_dest1, 0(x_dest)
ld x_dest2, 8(x_dest)
ld x_dest3, 16(x_dest)
Expand All @@ -101,20 +101,19 @@ gf_3vect_dot_prod_rvv:
vmv.v.i v_dest2, 0
vmv.v.i v_dest3, 0

/* Loop 2: x_vec, number of source vectors (ie. data blocks) */
li x_vec_i, 0
/* load source pointer */
ld x_ptr, 0(x_src)

/* Reset table pointers */
mv x_tbl1, x_tbl
add x_tbl2, x_tbl1, t_offset
add x_tbl3, x_tbl2, t_offset

/* Loop 2: x_vec, number of source vectors (ie. data blocks) */
li x_vec_i, 0
.Lloop_rvv_vl_vects:
/* Load source data */
slli t0, x_vec_i, 3
add t0,x_src,t0
ld x_ptr, 0(t0)
add x_ptr,x_ptr,x_pos

/* load source data */
add x_ptr, x_ptr, x_pos
vle8.v v_src, (x_ptr)

/* Split 4-bit lo; 4-bit hi */
Expand All @@ -131,14 +130,18 @@ gf_3vect_dot_prod_rvv:
vle8.v v_gft2_hi, (x_tbl2)
addi x_tbl2, x_tbl2, 16

/* Move to next source vector */
addi x_vec_i, x_vec_i, 8
add t0, x_src, x_vec_i
ld x_ptr, 0(t0)

/* Load next gf_table's */
vle8.v v_gft3_lo, (x_tbl3)
addi x_tbl3, x_tbl3, 16
vle8.v v_gft3_hi, (x_tbl3)
addi x_tbl3, x_tbl3, 16

/* dest 1 */
/* dest 1 */
vrgather.vv v26, v_gft1_lo, v_src_lo
vrgather.vv v27, v_gft1_hi, v_src_hi
vxor.vv v_dest1, v_dest1, v26
Expand All @@ -156,19 +159,16 @@ gf_3vect_dot_prod_rvv:
vxor.vv v_dest3, v_dest3, v26
vxor.vv v_dest3, v_dest3, v27

/* Move to next source vector */
addi x_vec_i, x_vec_i, 1

/* Check if we have processed all vectors */
blt x_vec_i, x_vec, .Lloop_rvv_vl_vects

/* Store destination data */
vse8.v v_dest1, (x_dest1)
vse8.v v_dest2, (x_dest2)
vse8.v v_dest3, (x_dest3)
add x_dest1,x_dest1, a7
add x_dest2,x_dest2, a7
add x_dest3,x_dest3, a7
add x_dest1, x_dest1, a7
add x_dest2, x_dest2, a7
add x_dest3, x_dest3, a7

add x_pos, x_pos, a7
j .Lloop_rvv_vl
Expand Down
2 changes: 1 addition & 1 deletion erasure_code/riscv64/gf_3vect_mad_rvv.S
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ gf_3vect_mad_rvv:
li t4, 16
blt x_len, t4, .return_fail

vsetvli t5, x0, e8, m1
vsetvli t5, x0, e8, m1, ta, ma

/* Load table 1 */
slli t4, x_vec_i, 5
Expand Down
38 changes: 21 additions & 17 deletions erasure_code/riscv64/gf_4vect_dot_prod_rvv.S
Original file line number Diff line number Diff line change
Expand Up @@ -91,10 +91,12 @@ gf_4vect_dot_prod_rvv:
sd s2, 16(sp)
sd s3, 24(sp)

vsetvli t0, x0, e8, m1 /* Set vector length to maximum */

vsetvli t0, x0, e8, m1, ta, ma
li x_pos, 0
slli t_offset, x_vec, 5

slli x_vec, x_vec, 3
slli t_offset, x_vec, 2

ld x_dest1, 0(x_dest)
ld x_dest2, 8(x_dest)
ld x_dest3, 16(x_dest)
Expand All @@ -111,20 +113,21 @@ gf_4vect_dot_prod_rvv:
vmv.v.i v_dest3, 0
vmv.v.i v_dest4, 0

/* x_vec, number of source vectors (ie. data blocks) */
li x_vec_i, 0

/* load source pointer */
ld x_ptr, 0(x_src)

/* Reset table pointers */
mv x_tbl1, x_tbl
add x_tbl2, x_tbl1, t_offset
add x_tbl3, x_tbl2, t_offset
add x_tbl4, x_tbl3, t_offset

/* Loop 2: x_vec, number of source vectors (ie. data blocks) */
li x_vec_i, 0
.Lloop_rvv_vl_vects:
/* Load source data */
slli a6, x_vec_i, 3
add a6,x_src,a6
ld x_ptr, 0(a6)
add x_ptr,x_ptr,x_pos
add x_ptr, x_ptr, x_pos

vle8.v v_src, (x_ptr)

Expand All @@ -142,6 +145,10 @@ gf_4vect_dot_prod_rvv:
vle8.v v_gft2_hi, (x_tbl2)
addi x_tbl2, x_tbl2, 16

/* Move to next source vector */
addi x_vec_i, x_vec_i, 8
add a6, x_src, x_vec_i
ld x_ptr, 0(a6)

/* Load next gf_table's */
vle8.v v_gft3_lo, (x_tbl3)
Expand Down Expand Up @@ -178,9 +185,6 @@ gf_4vect_dot_prod_rvv:
vxor.vv v_dest4, v_dest4, v26
vxor.vv v_dest4, v_dest4, v27

/* Move to next source vector */
addi x_vec_i, x_vec_i, 1

/* Check if we have processed all vectors */
blt x_vec_i, x_vec, .Lloop_rvv_vl_vects

Expand All @@ -189,16 +193,16 @@ gf_4vect_dot_prod_rvv:
vse8.v v_dest2, (x_dest2)
vse8.v v_dest3, (x_dest3)
vse8.v v_dest4, (x_dest4)
add x_dest1,x_dest1, t0
add x_dest2,x_dest2, t0
add x_dest3,x_dest3, t0
add x_dest4,x_dest4, t0
add x_dest1, x_dest1, t0
add x_dest2, x_dest2, t0
add x_dest3, x_dest3, t0
add x_dest4, x_dest4, t0
/* Increment position */
add x_pos, x_pos, t0
j .Lloop_rvv_vl

.return_pass:
/* restore callee-saved registers */
/* restore callee-saved registers */
ld s0, 0(sp)
ld s1, 8(sp)
ld s2, 16(sp)
Expand Down
2 changes: 1 addition & 1 deletion erasure_code/riscv64/gf_4vect_mad_rvv.S
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ gf_4vect_mad_rvv:
li t5, 16
blt x_len, t5, .return_fail

vsetvli t6, x0, e8, m1
vsetvli t6, x0, e8, m1, ta, ma

/* load table 1 */
slli t5, x_vec_i, 5
Expand Down
16 changes: 7 additions & 9 deletions erasure_code/riscv64/gf_5vect_dot_prod_rvv.S
Original file line number Diff line number Diff line change
Expand Up @@ -95,8 +95,7 @@ gf_5vect_dot_prod_rvv:
sd s4, 32(sp)
sd s5, 40(sp)

vsetvli a5, x0, e8, m1

vsetvli a5, x0, e8, m1, ta, ma
/* Initialize position */
li x_pos, 0

Expand Down Expand Up @@ -131,7 +130,7 @@ gf_5vect_dot_prod_rvv:
.Llooprvv_vl_vects:
/* Load source data */
slli a6, x_vec_i, 3
add a6,x_src,a6
add a6, x_src, a6
ld x_ptr, 0(a6)
add x_ptr, x_ptr, x_pos
vle8.v v_src, (x_ptr)
Expand Down Expand Up @@ -204,19 +203,18 @@ gf_5vect_dot_prod_rvv:
/* Check if we have processed all vectors */
blt x_vec_i, x_vec, .Llooprvv_vl_vects


vse8.v v_dest1, (x_dest1)
vse8.v v_dest2, (x_dest2)
vse8.v v_dest3, (x_dest3)
vse8.v v_dest4, (x_dest4)
vse8.v v_dest5, (x_dest5)

/* Store destination data */
add x_dest1,x_dest1,a5
add x_dest2,x_dest2,a5
add x_dest3,x_dest3,a5
add x_dest4,x_dest4,a5
add x_dest5,x_dest5,a5
add x_dest1, x_dest1, a5
add x_dest2, x_dest2, a5
add x_dest3, x_dest3, a5
add x_dest4, x_dest4, a5
add x_dest5, x_dest5, a5

/* Increment position */
add x_pos, x_pos, a5
Expand Down
2 changes: 1 addition & 1 deletion erasure_code/riscv64/gf_5vect_mad_rvv.S
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ gf_5vect_mad_rvv:
li t6, 16
blt x_len, t6, .return_fail

vsetvli a7, x0, e8, m1
vsetvli a7, x0, e8, m1, ta, ma

/* Load table 1 */
slli a6, x_vec_i, 5
Expand Down
Loading
Loading