diff --git a/Makefile.riscv64 b/Makefile.riscv64 index cbabcadab7..cffc77e803 100644 --- a/Makefile.riscv64 +++ b/Makefile.riscv64 @@ -6,23 +6,20 @@ ifeq ($(CORE), x280) CCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh_zvl512b -mabi=lp64d FCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d -static endif -ifeq ($(CORE), RISCV64_ZVL256B) +RISCV64_OPT = rv64imafdcv ifeq ($(BUILD_HFLOAT16), 1) -CCOMMON_OPT += -march=rv64imafdcv_zvl256b_zvfh_zfh -mabi=lp64d -FCOMMON_OPT += -march=rv64imafdcv_zvfh_zfh -mabi=lp64d -else -CCOMMON_OPT += -march=rv64imafdcv_zvl256b -mabi=lp64d -FCOMMON_OPT += -march=rv64imafdcv -mabi=lp64d +RISCV64_OPT := $(RISCV64_OPT)_zvfh_zfh endif +ifeq ($(BUILD_BFLOAT16), 1) +RISCV64_OPT := $(RISCV64_OPT)_zfbfmin_zvfbfmin_zvfbfwma endif -ifeq ($(CORE), RISCV64_ZVL128B) -ifeq ($(BUILD_HFLOAT16), 1) -CCOMMON_OPT += -march=rv64imafdcv_zvfh_zfh -mabi=lp64d -FCOMMON_OPT += -march=rv64imafdcv_zvfh_zfh -mabi=lp64d -else -CCOMMON_OPT += -march=rv64imafdcv -mabi=lp64d -FCOMMON_OPT += -march=rv64imafdcv -mabi=lp64d +ifeq ($(CORE), RISCV64_ZVL256B) +CCOMMON_OPT += -march=$(RISCV64_OPT)_zvl256b -mabi=lp64d +FCOMMON_OPT += -march=$(RISCV64_OPT) -mabi=lp64d endif +ifeq ($(CORE), RISCV64_ZVL128B) +CCOMMON_OPT += -march=$(RISCV64_OPT) -mabi=lp64d +FCOMMON_OPT += -march=$(RISCV64_OPT) -mabi=lp64d endif ifeq ($(CORE), RISCV64_GENERIC) CCOMMON_OPT += -march=rv64imafdc -mabi=lp64d diff --git a/kernel/riscv64/KERNEL.RISCV64_ZVL128B b/kernel/riscv64/KERNEL.RISCV64_ZVL128B index 03c8a4c95f..8fa59ee507 100644 --- a/kernel/riscv64/KERNEL.RISCV64_ZVL128B +++ b/kernel/riscv64/KERNEL.RISCV64_ZVL128B @@ -256,7 +256,7 @@ ifndef ZGEMM_BETA ZGEMM_BETA = zgemm_beta_rvv.c endif -ifeq ($(BUILD_BFLOAT16), 1) +ifeq ($(BUILD_HFLOAT16), 1) SHGEMMKERNEL = shgemm_kernel_$(SHGEMM_UNROLL_M)x$(SHGEMM_UNROLL_N)_zvl128b.c SHGEMMONCOPY = ../generic/gemm_ncopy_$(SHGEMM_UNROLL_N).c SHGEMMOTCOPY = ../generic/gemm_tcopy_$(SHGEMM_UNROLL_N).c @@ -267,5 +267,16 @@ SHGEMM_BETA = gemm_beta_rvv.c endif endif +ifeq ($(BUILD_BFLOAT16), 1) +SBGEMMKERNEL = sbgemm_kernel_$(SBGEMM_UNROLL_M)x$(SBGEMM_UNROLL_N)_zvl128b.c +SBGEMMONCOPY = ../generic/gemm_ncopy_$(SBGEMM_UNROLL_N).c +SBGEMMOTCOPY = ../generic/gemm_tcopy_$(SBGEMM_UNROLL_N).c +SBGEMMONCOPYOBJ = sbgemm_oncopy$(TSUFFIX).$(SUFFIX) +SBGEMMOTCOPYOBJ = sbgemm_otcopy$(TSUFFIX).$(SUFFIX) +ifndef SBGEMM_BETA +SBGEMM_BETA = gemm_beta_rvv.c +endif +endif + DOMATCOPY_CT = omatcopy_ct_rvv.c SOMATCOPY_CT = omatcopy_ct_rvv.c diff --git a/kernel/riscv64/KERNEL.RISCV64_ZVL256B b/kernel/riscv64/KERNEL.RISCV64_ZVL256B index d42379e165..df1307da64 100644 --- a/kernel/riscv64/KERNEL.RISCV64_ZVL256B +++ b/kernel/riscv64/KERNEL.RISCV64_ZVL256B @@ -223,7 +223,7 @@ DOMATCOPY_CT = omatcopy_ct_rvv.c SOMATCOPY_CT = omatcopy_ct_rvv.c -ifeq ($(BUILD_BFLOAT16), 1) +ifeq ($(BUILD_HFLOAT16), 1) SHGEMMKERNEL = shgemm_kernel_$(SHGEMM_UNROLL_M)x$(SHGEMM_UNROLL_N)_zvl256b.c ifneq ($(SHGEMM_UNROLL_M), $(SHGEMM_UNROLL_N)) SHGEMMINCOPY = ../generic/gemm_ncopy_$(SHGEMM_UNROLL_M).c @@ -240,5 +240,22 @@ SHGEMM_BETA = gemm_beta_rvv.c endif endif +ifeq ($(BUILD_BFLOAT16), 1) +SBGEMMKERNEL = sbgemm_kernel_$(SBGEMM_UNROLL_M)x$(SBGEMM_UNROLL_N)_zvl256b.c +ifneq ($(SBGEMM_UNROLL_M), $(SBGEMM_UNROLL_N)) +SBGEMMINCOPY = ../generic/gemm_ncopy_$(SBGEMM_UNROLL_M).c +SBGEMMITCOPY = ../generic/gemm_tcopy_$(SBGEMM_UNROLL_M).c +SBGEMMINCOPYOBJ = sbgemm_incopy$(TSUFFIX).$(SUFFIX) +SBGEMMITCOPYOBJ = sbgemm_itcopy$(TSUFFIX).$(SUFFIX) +endif +SBGEMMONCOPY = ../generic/gemm_ncopy_$(SBGEMM_UNROLL_N).c +SBGEMMOTCOPY = ../generic/gemm_tcopy_$(SBGEMM_UNROLL_N).c +SBGEMMONCOPYOBJ = sbgemm_oncopy$(TSUFFIX).$(SUFFIX) +SBGEMMOTCOPYOBJ = sbgemm_otcopy$(TSUFFIX).$(SUFFIX) +ifndef SBGEMM_BETA +SBGEMM_BETA = gemm_beta_rvv.c +endif +endif + SAXPBYKERNEL = axpby_vector_v2.c DAXPBYKERNEL = axpby_vector_v2.c diff --git a/kernel/riscv64/sbgemm_kernel_16x8_zvl256b.c b/kernel/riscv64/sbgemm_kernel_16x8_zvl256b.c new file mode 100644 index 0000000000..bded873b8e --- /dev/null +++ b/kernel/riscv64/sbgemm_kernel_16x8_zvl256b.c @@ -0,0 +1,851 @@ +#include "common.h" +#include + +int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT alpha, IFLOAT *A, IFLOAT *B, FLOAT *C, BLASLONG ldc) +{ + BLASLONG gvl = 0; + BLASLONG m_top = 0; + BLASLONG n_top = 0; + + // -- MAIN PASS + for (BLASLONG j=0; j + +int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT alpha, IFLOAT *A, IFLOAT *B, FLOAT *C, BLASLONG ldc) +{ + BLASLONG gvl = 0; + BLASLONG m_top = 0; + BLASLONG n_top = 0; + + // -- MAIN PASS + for (BLASLONG j=0; j