Skip to content

Commit c00afc8

Browse files
committed
Add and use vectorized packing to ZVL128B and ZVL256B. Up to 3x+ faster than generic scalar functions.
1 parent 9a64b32 commit c00afc8

File tree

6 files changed

+729
-1
lines changed

6 files changed

+729
-1
lines changed

kernel/riscv64/KERNEL.RISCV64_ZVL128B

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,27 +96,47 @@ CGEMVTKERNEL = zgemv_t_rvv.c
9696
ZGEMVTKERNEL = zgemv_t_rvv.c
9797

9898
SGEMMKERNEL = sgemm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N)_zvl128b.c
99+
ifneq ($(filter $(SGEMM_UNROLL_N),4 8 16),)
100+
SGEMMONCOPY = gemm_ncopy_$(SGEMM_UNROLL_N)_rvv.c
101+
SGEMMOTCOPY = gemm_tcopy_$(SGEMM_UNROLL_N)_rvv.c
102+
else
99103
SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c
100104
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c
105+
endif
101106
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
102107
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
103108

104109
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
110+
ifneq ($(filter $(SGEMM_UNROLL_M),4 8 16),)
111+
SGEMMINCOPY = gemm_ncopy_$(SGEMM_UNROLL_M)_rvv.c
112+
SGEMMITCOPY = gemm_tcopy_$(SGEMM_UNROLL_M)_rvv.c
113+
else
105114
SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c
106115
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c
116+
endif
107117
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
108118
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
109119
endif
110120

111121
DGEMMKERNEL = dgemm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N)_zvl128b.c
122+
ifneq ($(filter $(DGEMM_UNROLL_N),4 8 16),)
123+
DGEMMONCOPY = gemm_ncopy_$(DGEMM_UNROLL_N)_rvv.c
124+
DGEMMOTCOPY = gemm_tcopy_$(DGEMM_UNROLL_N)_rvv.c
125+
else
112126
DGEMMONCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_N).c
113127
DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c
128+
endif
114129
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
115130
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
116131

117132
ifneq ($(DGEMM_UNROLL_M), $(DGEMM_UNROLL_N))
133+
ifneq ($(filter $(DGEMM_UNROLL_M),4 8 16),)
134+
DGEMMINCOPY = gemm_ncopy_$(DGEMM_UNROLL_M)_rvv.c
135+
DGEMMITCOPY = gemm_tcopy_$(DGEMM_UNROLL_M)_rvv.c
136+
else
118137
DGEMMINCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_M).c
119138
DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c
139+
endif
120140
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
121141
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
122142
endif
@@ -255,4 +275,4 @@ SHGEMMOTCOPYOBJ = shgemm_otcopy$(TSUFFIX).$(SUFFIX)
255275
ifndef SHGEMM_BETA
256276
SHGEMM_BETA = gemm_beta_rvv.c
257277
endif
258-
endif
278+
endif

kernel/riscv64/KERNEL.RISCV64_ZVL256B

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,25 +96,45 @@ CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N)_zvl256b.c
9696
ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N)_zvl256b.c
9797

9898
SGEMMKERNEL = sgemm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N)_zvl256b.c
99+
ifneq ($(filter $(SGEMM_UNROLL_N),4 8 16),)
100+
SGEMMONCOPY = gemm_ncopy_$(SGEMM_UNROLL_N)_rvv.c
101+
SGEMMOTCOPY = gemm_tcopy_$(SGEMM_UNROLL_N)_rvv.c
102+
else
99103
SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c
100104
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c
105+
endif
101106
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
102107
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
103108
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
109+
ifneq ($(filter $(SGEMM_UNROLL_M),4 8 16),)
110+
SGEMMINCOPY = gemm_ncopy_$(SGEMM_UNROLL_M)_rvv.c
111+
SGEMMITCOPY = gemm_tcopy_$(SGEMM_UNROLL_M)_rvv.c
112+
else
104113
SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c
105114
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c
115+
endif
106116
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
107117
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
108118
endif
109119

110120
DGEMMKERNEL = dgemm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N)_zvl256b.c
121+
ifneq ($(filter $(DGEMM_UNROLL_N),4 8 16),)
122+
DGEMMONCOPY = gemm_ncopy_$(DGEMM_UNROLL_N)_rvv.c
123+
DGEMMOTCOPY = gemm_tcopy_$(DGEMM_UNROLL_N)_rvv.c
124+
else
111125
DGEMMONCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_N).c
112126
DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c
127+
endif
113128
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
114129
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
115130
ifneq ($(DGEMM_UNROLL_M), $(DGEMM_UNROLL_N))
131+
ifneq ($(filter $(DGEMM_UNROLL_M),4 8 16),)
132+
DGEMMINCOPY = gemm_ncopy_$(DGEMM_UNROLL_M)_rvv.c
133+
DGEMMITCOPY = gemm_tcopy_$(DGEMM_UNROLL_M)_rvv.c
134+
else
116135
DGEMMINCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_M).c
117136
DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c
137+
endif
118138
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
119139
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
120140
endif

0 commit comments

Comments
 (0)