From fd94836bb845a9143e35f12ec9414cfaac9c73a3 Mon Sep 17 00:00:00 2001 From: lvshuo Date: Tue, 29 Jul 2025 20:49:07 +0800 Subject: [PATCH 1/2] erasure_code: add optimization implementation reduce one slli instructions and remove the dependence between vle8.v and ld instructions gf5 and gf7 are not modified, +5 and +7 are not used in actual scenarios. Signed-off-by: Shuo Lv --- configure.ac | 4 +-- erasure_code/riscv64/gf_2vect_dot_prod_rvv.S | 15 +++++----- erasure_code/riscv64/gf_3vect_dot_prod_rvv.S | 27 +++++++++--------- erasure_code/riscv64/gf_4vect_dot_prod_rvv.S | 29 ++++++++++++-------- erasure_code/riscv64/gf_5vect_dot_prod_rvv.S | 3 +- erasure_code/riscv64/gf_6vect_dot_prod_rvv.S | 18 ++++++------ erasure_code/riscv64/gf_7vect_dot_prod_rvv.S | 4 +-- erasure_code/riscv64/gf_vect_dot_prod_rvv.S | 1 - 8 files changed, 51 insertions(+), 50 deletions(-) diff --git a/configure.ac b/configure.ac index 47f34278..a425c607 100644 --- a/configure.ac +++ b/configure.ac @@ -71,8 +71,8 @@ case "${CPU}" in AM_CONDITIONAL([HAVE_RVV], [false]) rvv=no] ) if test "x$rvv" = "xyes"; then - CFLAGS+=" -march=rv64gcv" - CCASFLAGS+=" -march=rv64gcv" + CFLAGS+=" -march=rv64gcv" + CCASFLAGS+=" -march=rv64gcv" fi AC_MSG_RESULT([$rvv]) ;; diff --git a/erasure_code/riscv64/gf_2vect_dot_prod_rvv.S b/erasure_code/riscv64/gf_2vect_dot_prod_rvv.S index 1f015952..11f7be33 100644 --- a/erasure_code/riscv64/gf_2vect_dot_prod_rvv.S +++ b/erasure_code/riscv64/gf_2vect_dot_prod_rvv.S @@ -75,8 +75,9 @@ gf_2vect_dot_prod_rvv: blt x_len, t6, .return_fail vsetvli a5, x0, e8, m1 /* Set vector length to maximum */ - li x_pos, 0 + slli x_vec, x_vec, 3 + ld x_dest1, 0(x_dest) ld x_dest2, 8(x_dest) @@ -92,15 +93,12 @@ gf_2vect_dot_prod_rvv: /* gf_tbl base = (x_tbl + dest_idx * x_vec * 32) */ mv x_tbl1, x_tbl /* reset x_tbl1 */ - slli t6, x_vec, 5 + slli t6, x_vec, 2 add x_tbl2, x_tbl1, t6 /* reset x_tbl2 */ /* Loop 2: x_vec, number of source vectors (ie. data blocks) */ .Llooprvv_vl_vects: /* load src data */ - slli a6, x_vec_i, 3 - add a6,x_src,a6 - ld x_ptr, 0(a6) add x_ptr,x_ptr,x_pos vle8.v v_src, (x_ptr) /* load from: src base + pos offset */ @@ -120,6 +118,11 @@ gf_2vect_dot_prod_rvv: vle8.v v_gft2_hi, (x_tbl2) addi x_tbl2, x_tbl2, 16 + /* calc for next */ + addi x_vec_i, x_vec_i, 8 /* move x_vec_i to next */ + add a6, x_src, x_vec_i + ld x_ptr, 0(a6) + /* dest 1 */ /* table indexing, ie. gf(2^8) multiplication */ vrgather.vv v26, v_gft1_lo, v_src_lo @@ -134,8 +137,6 @@ gf_2vect_dot_prod_rvv: vxor.vv v_dest2, v_dest2, v26 vxor.vv v_dest2, v_dest2, v27 - /* calc for next */ - addi x_vec_i, x_vec_i, 1 /* move x_vec_i to next */ blt x_vec_i, x_vec, .Llooprvv_vl_vects /* end of Loop 2 */ diff --git a/erasure_code/riscv64/gf_3vect_dot_prod_rvv.S b/erasure_code/riscv64/gf_3vect_dot_prod_rvv.S index c617ab3e..2bf17dc3 100644 --- a/erasure_code/riscv64/gf_3vect_dot_prod_rvv.S +++ b/erasure_code/riscv64/gf_3vect_dot_prod_rvv.S @@ -85,9 +85,10 @@ gf_3vect_dot_prod_rvv: sd s1, 8(sp) vsetvli a7, x0, e8, m1 /* Set vector length to maximum */ - li x_pos, 0 - slli t_offset, x_vec, 5 + slli x_vec, x_vec, 3 + + slli t_offset, x_vec, 2 ld x_dest1, 0(x_dest) ld x_dest2, 8(x_dest) ld x_dest3, 16(x_dest) @@ -101,20 +102,19 @@ gf_3vect_dot_prod_rvv: vmv.v.i v_dest2, 0 vmv.v.i v_dest3, 0 + /* Loop 2: x_vec, number of source vectors (ie. data blocks) */ + li x_vec_i, 0 + /* load source pointer */ + ld x_ptr, 0(x_src) + /* Reset table pointers */ mv x_tbl1, x_tbl add x_tbl2, x_tbl1, t_offset add x_tbl3, x_tbl2, t_offset - /* Loop 2: x_vec, number of source vectors (ie. data blocks) */ - li x_vec_i, 0 .Lloop_rvv_vl_vects: - /* Load source data */ - slli t0, x_vec_i, 3 - add t0,x_src,t0 - ld x_ptr, 0(t0) + /* load source data */ add x_ptr,x_ptr,x_pos - vle8.v v_src, (x_ptr) /* Split 4-bit lo; 4-bit hi */ @@ -131,6 +131,10 @@ gf_3vect_dot_prod_rvv: vle8.v v_gft2_hi, (x_tbl2) addi x_tbl2, x_tbl2, 16 + /* Move to next source vector */ + addi x_vec_i, x_vec_i, 8 + add t0, x_src, x_vec_i + ld x_ptr, 0(t0) /* Load next gf_table's */ vle8.v v_gft3_lo, (x_tbl3) @@ -138,7 +142,7 @@ gf_3vect_dot_prod_rvv: vle8.v v_gft3_hi, (x_tbl3) addi x_tbl3, x_tbl3, 16 -/* dest 1 */ + /* dest 1 */ vrgather.vv v26, v_gft1_lo, v_src_lo vrgather.vv v27, v_gft1_hi, v_src_hi vxor.vv v_dest1, v_dest1, v26 @@ -156,9 +160,6 @@ gf_3vect_dot_prod_rvv: vxor.vv v_dest3, v_dest3, v26 vxor.vv v_dest3, v_dest3, v27 - /* Move to next source vector */ - addi x_vec_i, x_vec_i, 1 - /* Check if we have processed all vectors */ blt x_vec_i, x_vec, .Lloop_rvv_vl_vects diff --git a/erasure_code/riscv64/gf_4vect_dot_prod_rvv.S b/erasure_code/riscv64/gf_4vect_dot_prod_rvv.S index ace146dc..c4f18a05 100644 --- a/erasure_code/riscv64/gf_4vect_dot_prod_rvv.S +++ b/erasure_code/riscv64/gf_4vect_dot_prod_rvv.S @@ -91,10 +91,13 @@ gf_4vect_dot_prod_rvv: sd s2, 16(sp) sd s3, 24(sp) - vsetvli t0, x0, e8, m1 /* Set vector length to maximum */ - + # vsetvli t0, x0, e8, m1 /* Set vector length to maximum */ + vsetvli t0, x0, e8, m1 li x_pos, 0 - slli t_offset, x_vec, 5 + + slli x_vec, x_vec, 3 + slli t_offset, x_vec, 2 + ld x_dest1, 0(x_dest) ld x_dest2, 8(x_dest) ld x_dest3, 16(x_dest) @@ -111,19 +114,20 @@ gf_4vect_dot_prod_rvv: vmv.v.i v_dest3, 0 vmv.v.i v_dest4, 0 + /* x_vec, number of source vectors (ie. data blocks) */ + li x_vec_i, 0 + + /* load source pointer */ + ld x_ptr, 0(x_src) + /* Reset table pointers */ mv x_tbl1, x_tbl add x_tbl2, x_tbl1, t_offset add x_tbl3, x_tbl2, t_offset add x_tbl4, x_tbl3, t_offset - /* Loop 2: x_vec, number of source vectors (ie. data blocks) */ - li x_vec_i, 0 .Lloop_rvv_vl_vects: /* Load source data */ - slli a6, x_vec_i, 3 - add a6,x_src,a6 - ld x_ptr, 0(a6) add x_ptr,x_ptr,x_pos vle8.v v_src, (x_ptr) @@ -142,6 +146,10 @@ gf_4vect_dot_prod_rvv: vle8.v v_gft2_hi, (x_tbl2) addi x_tbl2, x_tbl2, 16 + /* Move to next source vector */ + addi x_vec_i, x_vec_i, 8 + add a6, x_src, x_vec_i + ld x_ptr, 0(a6) /* Load next gf_table's */ vle8.v v_gft3_lo, (x_tbl3) @@ -178,9 +186,6 @@ gf_4vect_dot_prod_rvv: vxor.vv v_dest4, v_dest4, v26 vxor.vv v_dest4, v_dest4, v27 - /* Move to next source vector */ - addi x_vec_i, x_vec_i, 1 - /* Check if we have processed all vectors */ blt x_vec_i, x_vec, .Lloop_rvv_vl_vects @@ -198,7 +203,7 @@ gf_4vect_dot_prod_rvv: j .Lloop_rvv_vl .return_pass: -/* restore callee-saved registers */ + /* restore callee-saved registers */ ld s0, 0(sp) ld s1, 8(sp) ld s2, 16(sp) diff --git a/erasure_code/riscv64/gf_5vect_dot_prod_rvv.S b/erasure_code/riscv64/gf_5vect_dot_prod_rvv.S index 0b5cf3ee..d26b320b 100644 --- a/erasure_code/riscv64/gf_5vect_dot_prod_rvv.S +++ b/erasure_code/riscv64/gf_5vect_dot_prod_rvv.S @@ -96,7 +96,6 @@ gf_5vect_dot_prod_rvv: sd s5, 40(sp) vsetvli a5, x0, e8, m1 - /* Initialize position */ li x_pos, 0 @@ -131,7 +130,7 @@ gf_5vect_dot_prod_rvv: .Llooprvv_vl_vects: /* Load source data */ slli a6, x_vec_i, 3 - add a6,x_src,a6 + add a6, x_src, a6 ld x_ptr, 0(a6) add x_ptr, x_ptr, x_pos vle8.v v_src, (x_ptr) diff --git a/erasure_code/riscv64/gf_6vect_dot_prod_rvv.S b/erasure_code/riscv64/gf_6vect_dot_prod_rvv.S index 6cc9a168..f96032d9 100644 --- a/erasure_code/riscv64/gf_6vect_dot_prod_rvv.S +++ b/erasure_code/riscv64/gf_6vect_dot_prod_rvv.S @@ -107,6 +107,8 @@ gf_6vect_dot_prod_rvv: /* initialize position */ li x_pos, 0 + slli x_vec, x_vec, 3 + /* load destination pointers */ ld x_dest1, 0(x14) # a4 is also x14 ld x_dest2, 8(x_dest) @@ -136,7 +138,7 @@ gf_6vect_dot_prod_rvv: /* initialize table pointers */ /* gf_tbl base = (x_tbl + dest_idx * x_vec * 32) */ mv x_tbl1, x_tbl - slli t0, x_vec, 5 + slli t0, x_vec, 2 add x_tbl2, x_tbl1, t0 add x_tbl3, x_tbl2, t0 add x_tbl4, x_tbl3, t0 @@ -145,11 +147,7 @@ gf_6vect_dot_prod_rvv: .Llooprvv_vl_vects: /* load source data */ - slli a6, x_vec_i, 3 - add a6,x_src,a6 - ld x_ptr, 0(a6) add x_ptr,x_ptr,x_pos - vle8.v v_src, (x_ptr) @@ -168,6 +166,11 @@ gf_6vect_dot_prod_rvv: vle8.v v_gft2_hi, (x_tbl2) addi x_tbl2, x_tbl2, 16 + /* load next source pointer */ + addi x_vec_i, x_vec_i,8 + add a6, x_src, x_vec_i + ld x_ptr, 0(a6) + vle8.v v_gft3_lo, (x_tbl3) addi x_tbl3, x_tbl3, 16 vle8.v v_gft3_hi, (x_tbl3) @@ -188,7 +191,6 @@ gf_6vect_dot_prod_rvv: vle8.v v_gft6_hi, (x_tbl6) addi x_tbl6, x_tbl6, 16 - /* dest 1 */ vrgather.vv v26, v_gft1_lo, v_src_lo vrgather.vv v27, v_gft1_hi, v_src_hi @@ -225,10 +227,6 @@ gf_6vect_dot_prod_rvv: vxor.vv v_dest6, v_dest6, v26 vxor.vv v_dest6, v_dest6, v27 - - /* load next source pointer */ - addi x_vec_i, x_vec_i,1 - /* check if we have processed all vectors */ blt x_vec_i, x_vec, .Llooprvv_vl_vects diff --git a/erasure_code/riscv64/gf_7vect_dot_prod_rvv.S b/erasure_code/riscv64/gf_7vect_dot_prod_rvv.S index d4cc1d72..facc5187 100644 --- a/erasure_code/riscv64/gf_7vect_dot_prod_rvv.S +++ b/erasure_code/riscv64/gf_7vect_dot_prod_rvv.S @@ -113,7 +113,6 @@ gf_7vect_dot_prod_rvv: sd s8, 64(sp) vsetvli t0, x0, e8, m1 - /* initialize position */ li x_pos, 0 @@ -160,7 +159,7 @@ gf_7vect_dot_prod_rvv: .Llooprvv_vl_vects: /* load source data */ slli a5, x_vec_i, 3 - add a5,x_src,a5 + add a5, x_src, a5 ld x_ptr, 0(a5) add x_ptr,x_ptr,x_pos @@ -206,7 +205,6 @@ gf_7vect_dot_prod_rvv: vle8.v v_gft7_hi, (x_tbl7) addi x_tbl7, x_tbl7, 16 - /* dest 1 */ vrgather.vv v26, v_gft1_lo, v_src_lo vrgather.vv v27, v_gft1_hi, v_src_hi diff --git a/erasure_code/riscv64/gf_vect_dot_prod_rvv.S b/erasure_code/riscv64/gf_vect_dot_prod_rvv.S index 471b65ed..9f379ee3 100644 --- a/erasure_code/riscv64/gf_vect_dot_prod_rvv.S +++ b/erasure_code/riscv64/gf_vect_dot_prod_rvv.S @@ -63,7 +63,6 @@ gf_vect_dot_prod_rvv: blt a0, t4, .return_fail vsetvli t5, zero, e8, m1 # Set vector length to maximum - # Initialize pos = 0 li t2, 0 From 757020acbe0ef51596f81852fa33f44298eed6ef Mon Sep 17 00:00:00 2001 From: lvshuo Date: Thu, 28 Aug 2025 16:43:19 +0800 Subject: [PATCH 2/2] erasure_code: set vsetvli to default parameter and add space Signed-off-by: Shuo Lv --- erasure_code/riscv64/gf_2vect_dot_prod_rvv.S | 8 ++++---- erasure_code/riscv64/gf_2vect_mad_rvv.S | 2 +- erasure_code/riscv64/gf_3vect_dot_prod_rvv.S | 11 +++++------ erasure_code/riscv64/gf_3vect_mad_rvv.S | 2 +- erasure_code/riscv64/gf_4vect_dot_prod_rvv.S | 13 ++++++------- erasure_code/riscv64/gf_4vect_mad_rvv.S | 2 +- erasure_code/riscv64/gf_5vect_dot_prod_rvv.S | 13 ++++++------- erasure_code/riscv64/gf_5vect_mad_rvv.S | 2 +- erasure_code/riscv64/gf_6vect_dot_prod_rvv.S | 19 +++++++++---------- erasure_code/riscv64/gf_6vect_mad_rvv.S | 2 +- erasure_code/riscv64/gf_7vect_dot_prod_rvv.S | 20 +++++++++----------- erasure_code/riscv64/gf_vect_dot_prod_rvv.S | 3 +-- erasure_code/riscv64/gf_vect_mad_rvv.S | 2 +- erasure_code/riscv64/gf_vect_mul_rvv.S | 4 ++-- 14 files changed, 48 insertions(+), 55 deletions(-) diff --git a/erasure_code/riscv64/gf_2vect_dot_prod_rvv.S b/erasure_code/riscv64/gf_2vect_dot_prod_rvv.S index 11f7be33..338b70c8 100644 --- a/erasure_code/riscv64/gf_2vect_dot_prod_rvv.S +++ b/erasure_code/riscv64/gf_2vect_dot_prod_rvv.S @@ -74,7 +74,7 @@ gf_2vect_dot_prod_rvv: li t6, 16 blt x_len, t6, .return_fail - vsetvli a5, x0, e8, m1 /* Set vector length to maximum */ + vsetvli a5, x0, e8, m1, ta, ma /* Set vector length to maximum */ li x_pos, 0 slli x_vec, x_vec, 3 @@ -99,7 +99,7 @@ gf_2vect_dot_prod_rvv: /* Loop 2: x_vec, number of source vectors (ie. data blocks) */ .Llooprvv_vl_vects: /* load src data */ - add x_ptr,x_ptr,x_pos + add x_ptr, x_ptr, x_pos vle8.v v_src, (x_ptr) /* load from: src base + pos offset */ /* split 4-bit lo; 4-bit hi */ @@ -143,8 +143,8 @@ gf_2vect_dot_prod_rvv: /* store dest data */ vse8.v v_dest1, (x_dest1) vse8.v v_dest2, (x_dest2) - add x_dest1,x_dest1,a5 - add x_dest2,x_dest2,a5 + add x_dest1, x_dest1, a5 + add x_dest2, x_dest2, a5 /* increment one vector length */ add x_pos, x_pos, a5 diff --git a/erasure_code/riscv64/gf_2vect_mad_rvv.S b/erasure_code/riscv64/gf_2vect_mad_rvv.S index fb90f3a9..fd569e57 100644 --- a/erasure_code/riscv64/gf_2vect_mad_rvv.S +++ b/erasure_code/riscv64/gf_2vect_mad_rvv.S @@ -71,7 +71,7 @@ gf_2vect_mad_rvv: li t3, 16 blt x_len, t3, .return_fail - vsetvli t4, x0, e8, m1 + vsetvli t4, x0, e8, m1, ta, ma /* load table 1 */ slli t3, x_vec_i, 5 diff --git a/erasure_code/riscv64/gf_3vect_dot_prod_rvv.S b/erasure_code/riscv64/gf_3vect_dot_prod_rvv.S index 2bf17dc3..6975c087 100644 --- a/erasure_code/riscv64/gf_3vect_dot_prod_rvv.S +++ b/erasure_code/riscv64/gf_3vect_dot_prod_rvv.S @@ -57,7 +57,6 @@ #define x_dest3 a5 #define t_offset a6 - /* vectors */ #define v_src v1 #define v_src_lo v2 @@ -84,7 +83,7 @@ gf_3vect_dot_prod_rvv: sd s0, 0(sp) sd s1, 8(sp) - vsetvli a7, x0, e8, m1 /* Set vector length to maximum */ + vsetvli a7, x0, e8, m1, ta, ma /* Set vector length to maximum */ li x_pos, 0 slli x_vec, x_vec, 3 @@ -114,7 +113,7 @@ gf_3vect_dot_prod_rvv: .Lloop_rvv_vl_vects: /* load source data */ - add x_ptr,x_ptr,x_pos + add x_ptr, x_ptr, x_pos vle8.v v_src, (x_ptr) /* Split 4-bit lo; 4-bit hi */ @@ -167,9 +166,9 @@ gf_3vect_dot_prod_rvv: vse8.v v_dest1, (x_dest1) vse8.v v_dest2, (x_dest2) vse8.v v_dest3, (x_dest3) - add x_dest1,x_dest1, a7 - add x_dest2,x_dest2, a7 - add x_dest3,x_dest3, a7 + add x_dest1, x_dest1, a7 + add x_dest2, x_dest2, a7 + add x_dest3, x_dest3, a7 add x_pos, x_pos, a7 j .Lloop_rvv_vl diff --git a/erasure_code/riscv64/gf_3vect_mad_rvv.S b/erasure_code/riscv64/gf_3vect_mad_rvv.S index 8d33471c..920a0217 100644 --- a/erasure_code/riscv64/gf_3vect_mad_rvv.S +++ b/erasure_code/riscv64/gf_3vect_mad_rvv.S @@ -75,7 +75,7 @@ gf_3vect_mad_rvv: li t4, 16 blt x_len, t4, .return_fail - vsetvli t5, x0, e8, m1 + vsetvli t5, x0, e8, m1, ta, ma /* Load table 1 */ slli t4, x_vec_i, 5 diff --git a/erasure_code/riscv64/gf_4vect_dot_prod_rvv.S b/erasure_code/riscv64/gf_4vect_dot_prod_rvv.S index c4f18a05..0a627e56 100644 --- a/erasure_code/riscv64/gf_4vect_dot_prod_rvv.S +++ b/erasure_code/riscv64/gf_4vect_dot_prod_rvv.S @@ -91,8 +91,7 @@ gf_4vect_dot_prod_rvv: sd s2, 16(sp) sd s3, 24(sp) - # vsetvli t0, x0, e8, m1 /* Set vector length to maximum */ - vsetvli t0, x0, e8, m1 + vsetvli t0, x0, e8, m1, ta, ma li x_pos, 0 slli x_vec, x_vec, 3 @@ -128,7 +127,7 @@ gf_4vect_dot_prod_rvv: .Lloop_rvv_vl_vects: /* Load source data */ - add x_ptr,x_ptr,x_pos + add x_ptr, x_ptr, x_pos vle8.v v_src, (x_ptr) @@ -194,10 +193,10 @@ gf_4vect_dot_prod_rvv: vse8.v v_dest2, (x_dest2) vse8.v v_dest3, (x_dest3) vse8.v v_dest4, (x_dest4) - add x_dest1,x_dest1, t0 - add x_dest2,x_dest2, t0 - add x_dest3,x_dest3, t0 - add x_dest4,x_dest4, t0 + add x_dest1, x_dest1, t0 + add x_dest2, x_dest2, t0 + add x_dest3, x_dest3, t0 + add x_dest4, x_dest4, t0 /* Increment position */ add x_pos, x_pos, t0 j .Lloop_rvv_vl diff --git a/erasure_code/riscv64/gf_4vect_mad_rvv.S b/erasure_code/riscv64/gf_4vect_mad_rvv.S index 48b35eae..3c98bc7f 100644 --- a/erasure_code/riscv64/gf_4vect_mad_rvv.S +++ b/erasure_code/riscv64/gf_4vect_mad_rvv.S @@ -79,7 +79,7 @@ gf_4vect_mad_rvv: li t5, 16 blt x_len, t5, .return_fail - vsetvli t6, x0, e8, m1 + vsetvli t6, x0, e8, m1, ta, ma /* load table 1 */ slli t5, x_vec_i, 5 diff --git a/erasure_code/riscv64/gf_5vect_dot_prod_rvv.S b/erasure_code/riscv64/gf_5vect_dot_prod_rvv.S index d26b320b..629ef314 100644 --- a/erasure_code/riscv64/gf_5vect_dot_prod_rvv.S +++ b/erasure_code/riscv64/gf_5vect_dot_prod_rvv.S @@ -95,7 +95,7 @@ gf_5vect_dot_prod_rvv: sd s4, 32(sp) sd s5, 40(sp) - vsetvli a5, x0, e8, m1 + vsetvli a5, x0, e8, m1, ta, ma /* Initialize position */ li x_pos, 0 @@ -203,7 +203,6 @@ gf_5vect_dot_prod_rvv: /* Check if we have processed all vectors */ blt x_vec_i, x_vec, .Llooprvv_vl_vects - vse8.v v_dest1, (x_dest1) vse8.v v_dest2, (x_dest2) vse8.v v_dest3, (x_dest3) @@ -211,11 +210,11 @@ gf_5vect_dot_prod_rvv: vse8.v v_dest5, (x_dest5) /* Store destination data */ - add x_dest1,x_dest1,a5 - add x_dest2,x_dest2,a5 - add x_dest3,x_dest3,a5 - add x_dest4,x_dest4,a5 - add x_dest5,x_dest5,a5 + add x_dest1, x_dest1, a5 + add x_dest2, x_dest2, a5 + add x_dest3, x_dest3, a5 + add x_dest4, x_dest4, a5 + add x_dest5, x_dest5, a5 /* Increment position */ add x_pos, x_pos, a5 diff --git a/erasure_code/riscv64/gf_5vect_mad_rvv.S b/erasure_code/riscv64/gf_5vect_mad_rvv.S index 57227ed2..b4f5954e 100644 --- a/erasure_code/riscv64/gf_5vect_mad_rvv.S +++ b/erasure_code/riscv64/gf_5vect_mad_rvv.S @@ -83,7 +83,7 @@ gf_5vect_mad_rvv: li t6, 16 blt x_len, t6, .return_fail - vsetvli a7, x0, e8, m1 + vsetvli a7, x0, e8, m1, ta, ma /* Load table 1 */ slli a6, x_vec_i, 5 diff --git a/erasure_code/riscv64/gf_6vect_dot_prod_rvv.S b/erasure_code/riscv64/gf_6vect_dot_prod_rvv.S index f96032d9..f5789fe8 100644 --- a/erasure_code/riscv64/gf_6vect_dot_prod_rvv.S +++ b/erasure_code/riscv64/gf_6vect_dot_prod_rvv.S @@ -102,7 +102,7 @@ gf_6vect_dot_prod_rvv: sd s7, 56(sp) li t0, 0x0F - vsetvli a5, x0, e8, m1 + vsetvli a5, x0, e8, m1, ta, ma /* initialize position */ li x_pos, 0 @@ -147,10 +147,9 @@ gf_6vect_dot_prod_rvv: .Llooprvv_vl_vects: /* load source data */ - add x_ptr,x_ptr,x_pos + add x_ptr, x_ptr, x_pos vle8.v v_src, (x_ptr) - /* split 4-bit lo; 4-bit hi */ vand.vi v_src_lo, v_src, 0x0F vsrl.vi v_src_hi, v_src, 4 @@ -167,7 +166,7 @@ gf_6vect_dot_prod_rvv: addi x_tbl2, x_tbl2, 16 /* load next source pointer */ - addi x_vec_i, x_vec_i,8 + addi x_vec_i, x_vec_i, 8 add a6, x_src, x_vec_i ld x_ptr, 0(a6) @@ -238,12 +237,12 @@ gf_6vect_dot_prod_rvv: vse8.v v_dest5, (x_dest5) # x_dest5 vse8.v v_dest6, (x_dest6) # x_dest6 - add x_dest1,x_dest1, a5 - add x_dest2,x_dest2, a5 - add x_dest3,x_dest3, a5 - add x_dest4,x_dest4, a5 - add x_dest5,x_dest5, a5 - add x_dest6,x_dest6, a5 + add x_dest1, x_dest1, a5 + add x_dest2, x_dest2, a5 + add x_dest3, x_dest3, a5 + add x_dest4, x_dest4, a5 + add x_dest5, x_dest5, a5 + add x_dest6, x_dest6, a5 /* increment position */ add x_pos, x_pos, a5 diff --git a/erasure_code/riscv64/gf_6vect_mad_rvv.S b/erasure_code/riscv64/gf_6vect_mad_rvv.S index 95d4a666..dbe093a0 100644 --- a/erasure_code/riscv64/gf_6vect_mad_rvv.S +++ b/erasure_code/riscv64/gf_6vect_mad_rvv.S @@ -91,7 +91,7 @@ gf_6vect_mad_rvv: addi sp, sp, -16 sd s8, 0(sp) - vsetvli a6, x0, e8, m1 + vsetvli a6, x0, e8, m1, ta, ma /* Load table 1 */ slli s8, x_vec_i, 5 diff --git a/erasure_code/riscv64/gf_7vect_dot_prod_rvv.S b/erasure_code/riscv64/gf_7vect_dot_prod_rvv.S index facc5187..bf668716 100644 --- a/erasure_code/riscv64/gf_7vect_dot_prod_rvv.S +++ b/erasure_code/riscv64/gf_7vect_dot_prod_rvv.S @@ -94,7 +94,6 @@ #define v_gft7_lo v23 #define v_gft7_hi v24 - gf_7vect_dot_prod_rvv: /* less than 16 bytes, return_fail */ li t0, 16 @@ -112,7 +111,7 @@ gf_7vect_dot_prod_rvv: sd s7, 56(sp) sd s8, 64(sp) - vsetvli t0, x0, e8, m1 + vsetvli t0, x0, e8, m1, ta, ma /* initialize position */ li x_pos, 0 @@ -161,7 +160,7 @@ gf_7vect_dot_prod_rvv: slli a5, x_vec_i, 3 add a5, x_src, a5 ld x_ptr, 0(a5) - add x_ptr,x_ptr,x_pos + add x_ptr, x_ptr, x_pos vle8.v v_src, (x_ptr) @@ -241,7 +240,6 @@ gf_7vect_dot_prod_rvv: vxor.vv v_dest6, v_dest6, v26 vxor.vv v_dest6, v_dest6, v27 - /* GF multiplication and accumulation for dest7 */ vrgather.vv v26, v_gft7_lo, v_src_lo vrgather.vv v27, v_gft7_hi, v_src_hi @@ -261,13 +259,13 @@ gf_7vect_dot_prod_rvv: vse8.v v_dest6, (x_dest6) vse8.v v_dest7, (x_dest7) - add x_dest1,x_dest1, t0 - add x_dest2,x_dest2, t0 - add x_dest3,x_dest3, t0 - add x_dest4,x_dest4, t0 - add x_dest5,x_dest5, t0 - add x_dest6,x_dest6, t0 - add x_dest7,x_dest7, t0 + add x_dest1, x_dest1, t0 + add x_dest2, x_dest2, t0 + add x_dest3, x_dest3, t0 + add x_dest4, x_dest4, t0 + add x_dest5, x_dest5, t0 + add x_dest6, x_dest6, t0 + add x_dest7, x_dest7, t0 /* increment one vector length */ add x_pos, x_pos, t0 diff --git a/erasure_code/riscv64/gf_vect_dot_prod_rvv.S b/erasure_code/riscv64/gf_vect_dot_prod_rvv.S index 9f379ee3..a4af6d7c 100644 --- a/erasure_code/riscv64/gf_vect_dot_prod_rvv.S +++ b/erasure_code/riscv64/gf_vect_dot_prod_rvv.S @@ -27,7 +27,6 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ######################################################################## - # RISC-V RVV implementation of gf_vect_dot_prod_rvv # Function: gf_vect_dot_prod_rvv @@ -62,7 +61,7 @@ gf_vect_dot_prod_rvv: li t4, 16 blt a0, t4, .return_fail - vsetvli t5, zero, e8, m1 # Set vector length to maximum + vsetvli t5, zero, e8, m1, ta, ma # Set vector length to maximum # Initialize pos = 0 li t2, 0 diff --git a/erasure_code/riscv64/gf_vect_mad_rvv.S b/erasure_code/riscv64/gf_vect_mad_rvv.S index 2c9aeb86..bb900514 100644 --- a/erasure_code/riscv64/gf_vect_mad_rvv.S +++ b/erasure_code/riscv64/gf_vect_mad_rvv.S @@ -65,7 +65,7 @@ gf_vect_mad_rvv: li t1, 16 blt x_len, t1, .return_fail - vsetvli t2, x0, e8, m1 + vsetvli t2, x0, e8, m1, ta, ma /* x_tbl += x_vec_i * 2^5 */ slli t1, x_vec_i, 5 diff --git a/erasure_code/riscv64/gf_vect_mul_rvv.S b/erasure_code/riscv64/gf_vect_mul_rvv.S index 92a8982f..aa72d1d4 100644 --- a/erasure_code/riscv64/gf_vect_mul_rvv.S +++ b/erasure_code/riscv64/gf_vect_mul_rvv.S @@ -67,7 +67,7 @@ gf_vect_mul_rvv: andi x_tmp, x_len, 0x1F bnez x_tmp, .return_fail - vsetvli t6, x0, e8, m1 + vsetvli t6, x0, e8, m1, ta, ma /* Load pre-calculated constants into v_gft1_lo and v_gft1_hi */ vle8.v v_gft1_lo, (x_tbl) @@ -79,7 +79,7 @@ gf_vect_mul_rvv: .Llooprvv_vl: /* Load source data into v_src */ - add x_ptr,x_src,x_pos + add x_ptr, x_src, x_pos vle8.v v_src, (x_ptr) /* Split 4-bit lo and 4-bit hi */