Skip to content

Commit a8f62a3

Browse files
committed
fix UNROLL_MN and add to targets for SVE
1 parent 774267f commit a8f62a3

File tree

3 files changed

+31
-26
lines changed

3 files changed

+31
-26
lines changed

kernel/arm64/KERNEL.A64FX

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,8 @@ DSDOTKERNEL = dot.S
114114
DGEMM_BETA = dgemm_beta.S
115115
SGEMM_BETA = sgemm_beta.S
116116

117-
SGEMMKERNEL = sgemm_kernel_sve_v1x$(SGEMM_UNROLL_N).S
118-
STRMMKERNEL = strmm_kernel_8x$(SGEMM_UNROLL_N).S
117+
SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S
118+
STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S
119119

120120
SGEMMINCOPY = sgemm_ncopy_sve_v1.c
121121
SGEMMITCOPY = sgemm_tcopy_sve_v1.c
@@ -127,6 +127,11 @@ SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
127127
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
128128
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
129129

130+
STRMMUNCOPY_M = trmm_uncopy_sve_v1.c
131+
STRMMLNCOPY_M = trmm_lncopy_sve_v1.c
132+
STRMMUTCOPY_M = trmm_utcopy_sve_v1.c
133+
STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
134+
130135
SSYMMUCOPY_M = symm_ucopy_sve.c
131136
SSYMMLCOPY_M = symm_lcopy_sve.c
132137

kernel/arm64/KERNEL.ARMV8SVE

Lines changed: 16 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -114,35 +114,27 @@ DSDOTKERNEL = dot.S
114114
DGEMM_BETA = dgemm_beta.S
115115
SGEMM_BETA = sgemm_beta.S
116116

117-
SGEMMKERNEL = sgemm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
118-
STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
119-
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
120-
ifeq ($(SGEMM_UNROLL_M), 16)
121-
SGEMMITCOPY = sgemm_tcopy_$(SGEMM_UNROLL_M).S
122-
else
123-
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c
124-
endif
125-
ifeq ($(SGEMM_UNROLL_M), 4)
126-
SGEMMINCOPY = sgemm_ncopy_$(SGEMM_UNROLL_M).S
127-
else
128-
SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c
129-
endif
117+
SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S
118+
STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S
119+
120+
SGEMMINCOPY = sgemm_ncopy_sve_v1.c
121+
SGEMMITCOPY = sgemm_tcopy_sve_v1.c
122+
SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S
123+
SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S
124+
130125
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
131126
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
132-
endif
133-
ifeq ($(SGEMM_UNROLL_N), 16)
134-
SGEMMOTCOPY = sgemm_tcopy_$(SGEMM_UNROLL_N).S
135-
else
136-
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c
137-
endif
138-
ifeq ($(SGEMM_UNROLL_N), 4)
139-
SGEMMONCOPY = sgemm_ncopy_$(SGEMM_UNROLL_N).S
140-
else
141-
SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c
142-
endif
143127
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
144128
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
145129

130+
STRMMUNCOPY_M = trmm_uncopy_sve_v1.c
131+
STRMMLNCOPY_M = trmm_lncopy_sve_v1.c
132+
STRMMUTCOPY_M = trmm_utcopy_sve_v1.c
133+
STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
134+
135+
SSYMMUCOPY_M = symm_ucopy_sve.c
136+
SSYMMLCOPY_M = symm_lcopy_sve.c
137+
146138
DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S
147139
DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S
148140

param.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3296,14 +3296,22 @@ is a big desktop or server with abundant cache rather than a phone or embedded d
32963296

32973297
#elif defined(ARMV8SVE) || defined(A64FX)
32983298

3299+
/* When all BLAS3 routines are implemeted with SVE, SGEMM_DEFAULT_UNROLL_M should be "sve_vl".
3300+
Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy routines in both directions seperated. */
32993301
#define SGEMM_DEFAULT_UNROLL_M 4
33003302
#define SGEMM_DEFAULT_UNROLL_N 8
3303+
/* SGEMM_UNROLL_MN is calculated as max(SGEMM_UNROLL_M, SGEMM_UNROLL_N)
3304+
* Since we don't define SGEMM_UNROLL_M correctly we have to manually set this macro.
3305+
* If SVE size is ever more than 1024, this should be increased also. */
3306+
#define SGEMM_DEFAULT_UNROLL_MN 32
33013307

33023308
/* When all BLAS3 routines are implemeted with SVE, DGEMM_DEFAULT_UNROLL_M should be "sve_vl".
33033309
Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy routines in both directions seperated. */
33043310
#define DGEMM_DEFAULT_UNROLL_M 2
33053311
#define DGEMM_DEFAULT_UNROLL_N 8
33063312

3313+
#define DGEMM_DEFAULT_UNROLL_MN 32
3314+
33073315
#define CGEMM_DEFAULT_UNROLL_M 8
33083316
#define CGEMM_DEFAULT_UNROLL_N 4
33093317

0 commit comments

Comments
 (0)