Skip to content

Commit 213c0e7

Browse files
committed
Added special unrolled vectorized versions of "Solve" for specific sizes,
in DTRSM and STRSM, to improve performance in Power9 and Power10.
1 parent 5e81e81 commit 213c0e7

File tree

6 files changed

+4244
-16
lines changed

6 files changed

+4244
-16
lines changed

kernel/power/KERNEL.POWER10

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -63,15 +63,15 @@ ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
6363
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
6464
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
6565

66-
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
67-
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
68-
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
69-
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
70-
71-
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
72-
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
73-
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
74-
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
66+
STRSMKERNEL_LN = trsm_kernel_LN_power10.c
67+
STRSMKERNEL_LT = trsm_kernel_LT_power10.c
68+
STRSMKERNEL_RN = trsm_kernel_RN_power10.c
69+
STRSMKERNEL_RT = trsm_kernel_RT_power10.c
70+
71+
DTRSMKERNEL_LN = trsm_kernel_LN_power10.c
72+
DTRSMKERNEL_LT = trsm_kernel_LT_power10.c
73+
DTRSMKERNEL_RN = trsm_kernel_RN_power10.c
74+
DTRSMKERNEL_RT = trsm_kernel_RT_power10.c
7575

7676
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
7777
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c

kernel/power/KERNEL.POWER9

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -52,15 +52,15 @@ ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
5252
ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX)
5353
ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX)
5454

55-
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
56-
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
57-
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
58-
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
55+
STRSMKERNEL_LN = trsm_kernel_LN_power10.c
56+
STRSMKERNEL_LT = trsm_kernel_LT_power10.c
57+
STRSMKERNEL_RN = trsm_kernel_RN_power10.c
58+
STRSMKERNEL_RT = trsm_kernel_RT_power10.c
5959

60-
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
60+
DTRSMKERNEL_LN = trsm_kernel_LN_power10.c
6161
DTRSMKERNEL_LT = dtrsm_kernel_LT_16x4_power8.S
62-
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
63-
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
62+
DTRSMKERNEL_RN = trsm_kernel_RN_power10.c
63+
DTRSMKERNEL_RT = trsm_kernel_RT_power10.c
6464

6565
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
6666
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c

0 commit comments

Comments
 (0)