Skip to content

Commit 697e275

Browse files
authored
Merge pull request #3464 from binebrank/arm_sve_sgemm
Add sgemm part for Arm SVE
2 parents f7f7fea + a8f62a3 commit 697e275

15 files changed

+3969
-56
lines changed

kernel/Makefile.L3

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1483,29 +1483,61 @@ $(KDIR)xtrsm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XTRSMKERNEL_RT) $(XT
14831483
$(CC) -c $(CFLAGS) -DTRSMKERNEL -DCOMPLEX -DXDOUBLE -UUPPER -DRT -DCONJ $< -o $@
14841484

14851485

1486+
ifdef STRMMUNCOPY_M
1487+
$(KDIR)strmm_iunucopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMUNCOPY_M)
1488+
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@
1489+
1490+
$(KDIR)strmm_iunncopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMUNCOPY_M)
1491+
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@
1492+
else
14861493
$(KDIR)strmm_iunucopy$(TSUFFIX).$(SUFFIX) : generic/trmm_uncopy_$(SGEMM_UNROLL_M).c
14871494
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@
14881495

14891496
$(KDIR)strmm_iunncopy$(TSUFFIX).$(SUFFIX) : generic/trmm_uncopy_$(SGEMM_UNROLL_M).c
14901497
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@
1498+
endif
1499+
1500+
ifdef STRMMLNCOPY_M
1501+
$(KDIR)strmm_ilnucopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMLNCOPY_M)
1502+
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@
14911503

1504+
$(KDIR)strmm_ilnncopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMLNCOPY_M)
1505+
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@
1506+
else
14921507
$(KDIR)strmm_ilnucopy$(TSUFFIX).$(SUFFIX) : generic/trmm_lncopy_$(SGEMM_UNROLL_M).c
14931508
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@
14941509

14951510
$(KDIR)strmm_ilnncopy$(TSUFFIX).$(SUFFIX) : generic/trmm_lncopy_$(SGEMM_UNROLL_M).c
14961511
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@
1512+
endif
14971513

1514+
ifdef STRMMUTCOPY_M
1515+
$(KDIR)strmm_iutucopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMUTCOPY_M)
1516+
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@
1517+
1518+
$(KDIR)strmm_iutncopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMUTCOPY_M)
1519+
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@
1520+
else
14981521
$(KDIR)strmm_iutucopy$(TSUFFIX).$(SUFFIX) : generic/trmm_utcopy_$(SGEMM_UNROLL_M).c
14991522
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -ULOWER -DUNIT $< -o $@
15001523

15011524
$(KDIR)strmm_iutncopy$(TSUFFIX).$(SUFFIX) : generic/trmm_utcopy_$(SGEMM_UNROLL_M).c
15021525
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -ULOWER -UUNIT $< -o $@
1526+
endif
1527+
1528+
ifdef STRMMLTCOPY_M
1529+
$(KDIR)strmm_iltucopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMLTCOPY_M)
1530+
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@
15031531

1532+
$(KDIR)strmm_iltncopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(STRMMLTCOPY_M)
1533+
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@
1534+
else
15041535
$(KDIR)strmm_iltucopy$(TSUFFIX).$(SUFFIX) : generic/trmm_ltcopy_$(SGEMM_UNROLL_M).c
15051536
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -DLOWER -DUNIT $< -o $@
15061537

15071538
$(KDIR)strmm_iltncopy$(TSUFFIX).$(SUFFIX) : generic/trmm_ltcopy_$(SGEMM_UNROLL_M).c
15081539
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -DLOWER -UUNIT $< -o $@
1540+
endif
15091541

15101542
$(KDIR)strmm_ounucopy$(TSUFFIX).$(SUFFIX) : generic/trmm_uncopy_$(SGEMM_UNROLL_N).c
15111543
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -DOUTER -ULOWER -DUNIT $< -o $@
@@ -1809,11 +1841,21 @@ $(KDIR)ssymm_outcopy$(TSUFFIX).$(SUFFIX) : generic/symm_ucopy_$(SGEMM_UNROLL_N).
18091841
$(KDIR)ssymm_oltcopy$(TSUFFIX).$(SUFFIX) : generic/symm_lcopy_$(SGEMM_UNROLL_N).c
18101842
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -DOUTER -DLOWER $< -o $@
18111843

1844+
ifdef SSYMMUCOPY_M
1845+
$(KDIR)ssymm_iutcopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SSYMMUCOPY_M)
1846+
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -ULOWER $< -o $@
1847+
else
18121848
$(KDIR)ssymm_iutcopy$(TSUFFIX).$(SUFFIX) : generic/symm_ucopy_$(SGEMM_UNROLL_M).c
18131849
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -ULOWER $< -o $@
1850+
endif
18141851

1852+
ifdef SSYMMLCOPY_M
1853+
$(KDIR)ssymm_iltcopy$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SSYMMLCOPY_M)
1854+
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -DLOWER $< -o $@
1855+
else
18151856
$(KDIR)ssymm_iltcopy$(TSUFFIX).$(SUFFIX) : generic/symm_lcopy_$(SGEMM_UNROLL_M).c
18161857
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -UDOUBLE -UCOMPLEX -UOUTER -DLOWER $< -o $@
1858+
endif
18171859

18181860
$(KDIR)dsymm_outcopy$(TSUFFIX).$(SUFFIX) : generic/symm_ucopy_$(DGEMM_UNROLL_N).c
18191861
$(CC) -c $(CFLAGS) $(NO_UNINITIALIZED_WARN) -DDOUBLE -UCOMPLEX -DOUTER -ULOWER $< -o $@

kernel/arm64/KERNEL.A64FX

Lines changed: 15 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -114,35 +114,26 @@ DSDOTKERNEL = dot.S
114114
DGEMM_BETA = dgemm_beta.S
115115
SGEMM_BETA = sgemm_beta.S
116116

117-
SGEMMKERNEL = sgemm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
118-
STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
119-
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
120-
ifeq ($(SGEMM_UNROLL_M), 16)
121-
SGEMMITCOPY = sgemm_tcopy_$(SGEMM_UNROLL_M).S
122-
else
123-
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c
124-
endif
125-
ifeq ($(SGEMM_UNROLL_M), 4)
126-
SGEMMINCOPY = sgemm_ncopy_$(SGEMM_UNROLL_M).S
127-
else
128-
SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c
129-
endif
117+
SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S
118+
STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S
119+
120+
SGEMMINCOPY = sgemm_ncopy_sve_v1.c
121+
SGEMMITCOPY = sgemm_tcopy_sve_v1.c
122+
SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S
123+
SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S
124+
130125
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
131126
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
132-
endif
133-
ifeq ($(SGEMM_UNROLL_N), 16)
134-
SGEMMOTCOPY = sgemm_tcopy_$(SGEMM_UNROLL_N).S
135-
else
136-
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c
137-
endif
138-
ifeq ($(SGEMM_UNROLL_N), 4)
139-
SGEMMONCOPY = sgemm_ncopy_$(SGEMM_UNROLL_N).S
140-
else
141-
SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c
142-
endif
143127
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
144128
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
145129

130+
STRMMUNCOPY_M = trmm_uncopy_sve_v1.c
131+
STRMMLNCOPY_M = trmm_lncopy_sve_v1.c
132+
STRMMUTCOPY_M = trmm_utcopy_sve_v1.c
133+
STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
134+
135+
SSYMMUCOPY_M = symm_ucopy_sve.c
136+
SSYMMLCOPY_M = symm_lcopy_sve.c
146137

147138
DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S
148139
DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S

kernel/arm64/KERNEL.ARMV8SVE

Lines changed: 16 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -114,35 +114,27 @@ DSDOTKERNEL = dot.S
114114
DGEMM_BETA = dgemm_beta.S
115115
SGEMM_BETA = sgemm_beta.S
116116

117-
SGEMMKERNEL = sgemm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
118-
STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
119-
ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
120-
ifeq ($(SGEMM_UNROLL_M), 16)
121-
SGEMMITCOPY = sgemm_tcopy_$(SGEMM_UNROLL_M).S
122-
else
123-
SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c
124-
endif
125-
ifeq ($(SGEMM_UNROLL_M), 4)
126-
SGEMMINCOPY = sgemm_ncopy_$(SGEMM_UNROLL_M).S
127-
else
128-
SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c
129-
endif
117+
SGEMMKERNEL = sgemm_kernel_sve_v2x$(SGEMM_UNROLL_N).S
118+
STRMMKERNEL = strmm_kernel_sve_v1x$(SGEMM_UNROLL_N).S
119+
120+
SGEMMINCOPY = sgemm_ncopy_sve_v1.c
121+
SGEMMITCOPY = sgemm_tcopy_sve_v1.c
122+
SGEMMONCOPY = sgemm_ncopy_$(DGEMM_UNROLL_N).S
123+
SGEMMOTCOPY = sgemm_tcopy_$(DGEMM_UNROLL_N).S
124+
130125
SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX)
131126
SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX)
132-
endif
133-
ifeq ($(SGEMM_UNROLL_N), 16)
134-
SGEMMOTCOPY = sgemm_tcopy_$(SGEMM_UNROLL_N).S
135-
else
136-
SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c
137-
endif
138-
ifeq ($(SGEMM_UNROLL_N), 4)
139-
SGEMMONCOPY = sgemm_ncopy_$(SGEMM_UNROLL_N).S
140-
else
141-
SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c
142-
endif
143127
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
144128
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
145129

130+
STRMMUNCOPY_M = trmm_uncopy_sve_v1.c
131+
STRMMLNCOPY_M = trmm_lncopy_sve_v1.c
132+
STRMMUTCOPY_M = trmm_utcopy_sve_v1.c
133+
STRMMLTCOPY_M = trmm_ltcopy_sve_v1.c
134+
135+
SSYMMUCOPY_M = symm_ucopy_sve.c
136+
SSYMMLCOPY_M = symm_lcopy_sve.c
137+
146138
DGEMMKERNEL = dgemm_kernel_sve_v2x$(DGEMM_UNROLL_N).S
147139
DTRMMKERNEL = dtrmm_kernel_sve_v1x$(DGEMM_UNROLL_N).S
148140

0 commit comments

Comments
 (0)