Skip to content

Bump xuantie toolchains #5393

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/c910v.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ jobs:
if: "github.repository == 'OpenMathLib/OpenBLAS'"
runs-on: ubuntu-latest
env:
xuetie_toolchain: https://occ-oss-prod.oss-cn-hangzhou.aliyuncs.com/resource//1698113812618
toolchain_file_name: Xuantie-900-gcc-linux-5.10.4-glibc-x86_64-V2.8.0-20231018.tar.gz
xuetie_toolchain: https://occ-oss-prod.oss-cn-hangzhou.aliyuncs.com/resource//1749714096626
toolchain_file_name: Xuantie-900-gcc-linux-6.6.0-glibc-x86_64-V3.1.0-20250522.tar.gz
strategy:
fail-fast: false
matrix:
Expand Down Expand Up @@ -77,7 +77,7 @@ jobs:
run: |
wget ${xuetie_toolchain}/${toolchain_file_name}
tar -xvf ${toolchain_file_name} -C /opt
export PATH="/opt/Xuantie-900-gcc-linux-5.10.4-glibc-x86_64-V2.8.0/bin:$PATH"
export PATH="/opt/Xuantie-900-gcc-linux-6.6.0-glibc-x86_64-V3.1.0/bin:$PATH"

make CC='ccache ${{ matrix.triple }}-gcc -static' FC='ccache ${{ matrix.triple }}-gfortran -static' ${{ matrix.opts }} HOSTCC='ccache gcc' -j$(nproc)

Expand Down
2 changes: 1 addition & 1 deletion Makefile.prebuild
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ TARGET_FLAGS = -mips64r6
endif

ifeq ($(TARGET), C910V)
TARGET_FLAGS = -march=rv64gcv0p7_zfh_xtheadc -mabi=lp64d
TARGET_FLAGS = -march=rv64gc_zfh_xtheadc_xtheadvector -mabi=lp64d
endif

ifeq ($(TARGET), CK860FV)
Expand Down
4 changes: 2 additions & 2 deletions Makefile.riscv64
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
ifeq ($(CORE), C910V)
CCOMMON_OPT += -march=rv64imafdcv0p7_zfh_xtheadc -mabi=lp64d -mtune=c920
FCOMMON_OPT += -march=rv64imafdcv0p7_zfh_xtheadc -mabi=lp64d -mtune=c920 -static
CCOMMON_OPT += -march=rv64imafdc_zfh_xtheadc_xtheadvector -mabi=lp64d -mtune=c920
FCOMMON_OPT += -march=rv64imafdc_zfh_xtheadc_xtheadvector -mabi=lp64d -mtune=c920 -static
endif
ifeq ($(CORE), x280)
CCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh_zvl512b -mabi=lp64d
Expand Down
2 changes: 1 addition & 1 deletion common_macro.h
Original file line number Diff line number Diff line change
Expand Up @@ -2709,7 +2709,7 @@
#ifndef ASSEMBLER
#if !defined(DYNAMIC_ARCH) \
&& (defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64) || defined(ARCH_ARM64) \
|| defined(ARCH_LOONGARCH64) || defined(ARCH_E2K) || defined(ARCH_ALPHA))
|| defined(ARCH_LOONGARCH64) || defined(ARCH_E2K) || defined(ARCH_ALPHA) || defined(ARCH_RISCV64))
extern BLASLONG gemm_offset_a;
extern BLASLONG gemm_offset_b;
extern BLASLONG bgemm_p;
Expand Down
6 changes: 0 additions & 6 deletions common_riscv64.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,13 +93,7 @@ static inline int blas_quickdivide(blasint x, blasint y){
# include <riscv_vector.h>
#endif

#if defined( __riscv_xtheadc ) && defined( __riscv_v ) && ( __riscv_v <= 7000 )
// t-head toolchain uses obsolete rvv intrinsics, can't build for C910V without this
#define RISCV_0p10_INTRINSICS
#define RISCV_RVV(x) x
#else
#define RISCV_RVV(x) __riscv_ ## x
#endif

#if defined(C910V) || defined(RISCV64_ZVL256B)
# if !defined(DOUBLE)
Expand Down
126 changes: 63 additions & 63 deletions kernel/riscv64/dsdot_vector.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,114 +37,114 @@ double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
vfloat32m2_t vx, vy;
unsigned int gvl = 0;
vfloat64m1_t v_res, v_z0;
gvl = vsetvlmax_e64m1();
v_res = vfmv_v_f_f64m1(0, gvl);
v_z0 = vfmv_v_f_f64m1(0, gvl);
gvl = __riscv_vsetvlmax_e64m1();
v_res = __riscv_vfmv_v_f_f64m1(0, gvl);
v_z0 = __riscv_vfmv_v_f_f64m1(0, gvl);

if(inc_x == 1 && inc_y == 1){
gvl = vsetvl_e64m4(n);
vr = vfmv_v_f_f64m4(0, gvl);
gvl = __riscv_vsetvl_e64m4(n);
vr = __riscv_vfmv_v_f_f64m4(0, gvl);
for(i=0,j=0; i<n/gvl; i++){
vx = vle32_v_f32m2(&x[j], gvl);
vy = vle32_v_f32m2(&y[j], gvl);
vr = vfwmacc_vv_f64m4(vr, vx, vy, gvl);
vx = __riscv_vle32_v_f32m2(&x[j], gvl);
vy = __riscv_vle32_v_f32m2(&y[j], gvl);
vr = __riscv_vfwmacc_vv_f64m4(vr, vx, vy, gvl);
j += gvl;
}
if(j > 0){
v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl);
dot += (double)vfmv_f_s_f64m1_f64(v_res);
v_res = __riscv_vfredusum_vs_f64m4_f64m1(vr, v_z0, gvl);
dot += (double)__riscv_vfmv_f_s_f64m1_f64(v_res);
}
//tail
if(j < n){
gvl = vsetvl_e64m4(n-j);
vx = vle32_v_f32m2(&x[j], gvl);
vy = vle32_v_f32m2(&y[j], gvl);
vfloat64m4_t vz = vfmv_v_f_f64m4(0, gvl);
//vr = vfdot_vv_f32m2(vx, vy, gvl);
vr = vfwmacc_vv_f64m4(vz, vx, vy, gvl);
v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl);
dot += (double)vfmv_f_s_f64m1_f64(v_res);
gvl = __riscv_vsetvl_e64m4(n-j);
vx = __riscv_vle32_v_f32m2(&x[j], gvl);
vy = __riscv_vle32_v_f32m2(&y[j], gvl);
vfloat64m4_t vz = __riscv_vfmv_v_f_f64m4(0, gvl);
//vr = __riscv_vfdot_vv_f32m2(vx, vy, gvl);
vr = __riscv_vfwmacc_vv_f64m4(vz, vx, vy, gvl);
v_res = __riscv_vfredusum_vs_f64m4_f64m1(vr, v_z0, gvl);
dot += (double)__riscv_vfmv_f_s_f64m1_f64(v_res);
}
}else if(inc_y == 1){
gvl = vsetvl_e64m4(n);
vr = vfmv_v_f_f64m4(0, gvl);
gvl = __riscv_vsetvl_e64m4(n);
vr = __riscv_vfmv_v_f_f64m4(0, gvl);
int stride_x = inc_x * sizeof(FLOAT);
for(i=0,j=0; i<n/gvl; i++){
vx = vlse32_v_f32m2(&x[j*inc_x], stride_x, gvl);
vy = vle32_v_f32m2(&y[j], gvl);
vr = vfwmacc_vv_f64m4(vr, vx, vy, gvl);
vx = __riscv_vlse32_v_f32m2(&x[j*inc_x], stride_x, gvl);
vy = __riscv_vle32_v_f32m2(&y[j], gvl);
vr = __riscv_vfwmacc_vv_f64m4(vr, vx, vy, gvl);
j += gvl;
}
if(j > 0){
v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl);
dot += (double)vfmv_f_s_f64m1_f64(v_res);
v_res = __riscv_vfredusum_vs_f64m4_f64m1(vr, v_z0, gvl);
dot += (double)__riscv_vfmv_f_s_f64m1_f64(v_res);

}
//tail
if(j < n){
gvl = vsetvl_e64m4(n-j);
vx = vlse32_v_f32m2(&x[j*inc_x], stride_x, gvl);
vy = vle32_v_f32m2(&y[j], gvl);
vfloat64m4_t vz = vfmv_v_f_f64m4(0, gvl);
//vr = vfdot_vv_f32m2(vx, vy, gvl);
vr = vfwmacc_vv_f64m4(vz, vx, vy, gvl);
v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl);
dot += (double)vfmv_f_s_f64m1_f64(v_res);
gvl = __riscv_vsetvl_e64m4(n-j);
vx = __riscv_vlse32_v_f32m2(&x[j*inc_x], stride_x, gvl);
vy = __riscv_vle32_v_f32m2(&y[j], gvl);
vfloat64m4_t vz = __riscv_vfmv_v_f_f64m4(0, gvl);
//vr = __riscv_vfdot_vv_f32m2(vx, vy, gvl);
vr = __riscv_vfwmacc_vv_f64m4(vz, vx, vy, gvl);
v_res = __riscv_vfredusum_vs_f64m4_f64m1(vr, v_z0, gvl);
dot += (double)__riscv_vfmv_f_s_f64m1_f64(v_res);

}
}else if(inc_x == 1){
gvl = vsetvl_e64m4(n);
vr = vfmv_v_f_f64m4(0, gvl);
gvl = __riscv_vsetvl_e64m4(n);
vr = __riscv_vfmv_v_f_f64m4(0, gvl);
int stride_y = inc_y * sizeof(FLOAT);
for(i=0,j=0; i<n/gvl; i++){
vx = vle32_v_f32m2(&x[j], gvl);
vy = vlse32_v_f32m2(&y[j*inc_y], stride_y, gvl);
vr = vfwmacc_vv_f64m4(vr, vx, vy, gvl);
vx = __riscv_vle32_v_f32m2(&x[j], gvl);
vy = __riscv_vlse32_v_f32m2(&y[j*inc_y], stride_y, gvl);
vr = __riscv_vfwmacc_vv_f64m4(vr, vx, vy, gvl);
j += gvl;
}
if(j > 0){
v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl);
dot += (double)vfmv_f_s_f64m1_f64(v_res);
v_res = __riscv_vfredusum_vs_f64m4_f64m1(vr, v_z0, gvl);
dot += (double)__riscv_vfmv_f_s_f64m1_f64(v_res);

}
//tail
if(j < n){
gvl = vsetvl_e64m4(n-j);
vx = vle32_v_f32m2(&x[j], gvl);
vy = vlse32_v_f32m2(&y[j*inc_y], stride_y, gvl);
vfloat64m4_t vz = vfmv_v_f_f64m4(0, gvl);
//vr = vfdot_vv_f32m2(vx, vy, gvl);
vr = vfwmacc_vv_f64m4(vz, vx, vy, gvl);
v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl);
dot += (double)vfmv_f_s_f64m1_f64(v_res);
gvl = __riscv_vsetvl_e64m4(n-j);
vx = __riscv_vle32_v_f32m2(&x[j], gvl);
vy = __riscv_vlse32_v_f32m2(&y[j*inc_y], stride_y, gvl);
vfloat64m4_t vz = __riscv_vfmv_v_f_f64m4(0, gvl);
//vr = __riscv_vfdot_vv_f32m2(vx, vy, gvl);
vr = __riscv_vfwmacc_vv_f64m4(vz, vx, vy, gvl);
v_res = __riscv_vfredusum_vs_f64m4_f64m1(vr, v_z0, gvl);
dot += (double)__riscv_vfmv_f_s_f64m1_f64(v_res);

}
}else{
gvl = vsetvl_e64m4(n);
vr = vfmv_v_f_f64m4(0, gvl);
gvl = __riscv_vsetvl_e64m4(n);
vr = __riscv_vfmv_v_f_f64m4(0, gvl);
int stride_x = inc_x * sizeof(FLOAT);
int stride_y = inc_y * sizeof(FLOAT);
for(i=0,j=0; i<n/gvl; i++){
vx = vlse32_v_f32m2(&x[j*inc_x], stride_x, gvl);
vy = vlse32_v_f32m2(&y[j*inc_y], stride_y, gvl);
vr = vfwmacc_vv_f64m4(vr, vx, vy, gvl);
vx = __riscv_vlse32_v_f32m2(&x[j*inc_x], stride_x, gvl);
vy = __riscv_vlse32_v_f32m2(&y[j*inc_y], stride_y, gvl);
vr = __riscv_vfwmacc_vv_f64m4(vr, vx, vy, gvl);
j += gvl;
}
if(j > 0){
v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl);
dot += (double)vfmv_f_s_f64m1_f64(v_res);
v_res = __riscv_vfredusum_vs_f64m4_f64m1(vr, v_z0, gvl);
dot += (double)__riscv_vfmv_f_s_f64m1_f64(v_res);

}
//tail
if(j < n){
gvl = vsetvl_e64m4(n-j);
vx = vlse32_v_f32m2(&x[j*inc_x], stride_x, gvl);
vy = vlse32_v_f32m2(&y[j*inc_y], stride_y, gvl);
vfloat64m4_t vz = vfmv_v_f_f64m4(0, gvl);
//vr = vfdot_vv_f32m2(vx, vy, gvl);
vr = vfwmacc_vv_f64m4(vz, vx, vy, gvl);
v_res = vfredusum_vs_f64m4_f64m1(v_res, vr, v_z0, gvl);
dot += (double)vfmv_f_s_f64m1_f64(v_res);
gvl = __riscv_vsetvl_e64m4(n-j);
vx = __riscv_vlse32_v_f32m2(&x[j*inc_x], stride_x, gvl);
vy = __riscv_vlse32_v_f32m2(&y[j*inc_y], stride_y, gvl);
vfloat64m4_t vz = __riscv_vfmv_v_f_f64m4(0, gvl);
//vr = __riscv_vfdot_vv_f32m2(vx, vy, gvl);
vr = __riscv_vfwmacc_vv_f64m4(vz, vx, vy, gvl);
v_res = __riscv_vfredusum_vs_f64m4_f64m1(vr, v_z0, gvl);
dot += (double)__riscv_vfmv_f_s_f64m1_f64(v_res);

}
}
Expand Down
Loading