Skip to content

Commit 54d321d

Browse files
authored
Merge pull request #3466 from rafaelcfsousa/rafael/small_matrix_p10
[POWER] Add small matrix for sgemm/dgemm on Power10
2 parents c248442 + c78fdcc commit 54d321d

11 files changed

+9008
-0
lines changed

Makefile.system

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,8 @@ endif
256256
#For small matrix optimization
257257
ifeq ($(ARCH), x86_64)
258258
SMALL_MATRIX_OPT = 1
259+
else ifeq ($(CORE), POWER10)
260+
SMALL_MATRIX_OPT = 1
259261
endif
260262
ifeq ($(SMALL_MATRIX_OPT), 1)
261263
CCOMMON_OPT += -DSMALL_MATRIX_OPT

kernel/power/KERNEL.POWER10

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,16 @@ SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
3232
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
3333
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
3434

35+
SGEMM_SMALL_M_PERMIT = gemm_small_kernel_permit_power10.c
36+
SGEMM_SMALL_K_NN = sgemm_small_kernel_nn_power10.c
37+
SGEMM_SMALL_K_B0_NN = sgemm_small_kernel_nn_power10.c
38+
SGEMM_SMALL_K_NT = sgemm_small_kernel_nt_power10.c
39+
SGEMM_SMALL_K_B0_NT = sgemm_small_kernel_nt_power10.c
40+
SGEMM_SMALL_K_TN = sgemm_small_kernel_tn_power10.c
41+
SGEMM_SMALL_K_B0_TN = sgemm_small_kernel_tn_power10.c
42+
SGEMM_SMALL_K_TT = sgemm_small_kernel_tt_power10.c
43+
SGEMM_SMALL_K_B0_TT = sgemm_small_kernel_tt_power10.c
44+
3545
DGEMMKERNEL = dgemm_kernel_power10.c
3646
DGEMMINCOPY =
3747
DGEMMITCOPY =
@@ -42,6 +52,16 @@ DGEMMITCOPYOBJ =
4252
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
4353
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
4454

55+
DGEMM_SMALL_M_PERMIT = gemm_small_kernel_permit_power10.c
56+
DGEMM_SMALL_K_NT = dgemm_small_kernel_nt_power10.c
57+
DGEMM_SMALL_K_B0_NT = dgemm_small_kernel_nt_power10.c
58+
DGEMM_SMALL_K_NN = dgemm_small_kernel_nn_power10.c
59+
DGEMM_SMALL_K_B0_NN = dgemm_small_kernel_nn_power10.c
60+
DGEMM_SMALL_K_TT = dgemm_small_kernel_tt_power10.c
61+
DGEMM_SMALL_K_B0_TT = dgemm_small_kernel_tt_power10.c
62+
DGEMM_SMALL_K_TN = dgemm_small_kernel_tn_power10.c
63+
DGEMM_SMALL_K_B0_TN = dgemm_small_kernel_tn_power10.c
64+
4565
CGEMMKERNEL = cgemm_kernel_power10.S
4666
#CGEMMKERNEL = cgemm_kernel_8x4_power8.S
4767
CGEMMINCOPY = ../generic/zgemm_ncopy_8.c

0 commit comments

Comments
 (0)