Skip to content

Commit 9cac379

Browse files
authored
Merge pull request #103 from xianyi/develop
rebase
2 parents 89eea6b + 5b9ebe4 commit 9cac379

File tree

6 files changed

+52
-10
lines changed

6 files changed

+52
-10
lines changed

cmake/prebuild.cmake

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -416,6 +416,29 @@ endif ()
416416
set(ZGEMM_UNROLL_M 4)
417417
set(ZGEMM_UNROLL_N 4)
418418
set(SYMV_P 16)
419+
elseif ("${TCORE}" STREQUAL "VORTEX")
420+
file(APPEND ${TARGET_CONF_TEMP}
421+
"#define ARMV8\n"
422+
"#define L1_CODE_SIZE\t32768\n"
423+
"#define L1_CODE_LINESIZE\t64\n"
424+
"#define L1_CODE_ASSOCIATIVE\t4\n"
425+
"#define L1_DATA_SIZE\t32768\n"
426+
"#define L1_DATA_LINESIZE\t64\n"
427+
"#define L1_DATA_ASSOCIATIVE\t4\n"
428+
"#define L2_SIZE\t5262144\n"
429+
"#define L2_LINESIZE\t64\n"
430+
"#define L2_ASSOCIATIVE\t8\n"
431+
"#define DTB_DEFAULT_ENTRIES\t64\n"
432+
"#define DTB_SIZE\t4096\n")
433+
set(SGEMM_UNROLL_M 16)
434+
set(SGEMM_UNROLL_N 4)
435+
set(DGEMM_UNROLL_M 8)
436+
set(DGEMM_UNROLL_N 4)
437+
set(CGEMM_UNROLL_M 8)
438+
set(CGEMM_UNROLL_N 4)
439+
set(ZGEMM_UNROLL_M 4)
440+
set(ZGEMM_UNROLL_N 4)
441+
set(SYMV_P 16)
419442
elseif ("${TCORE}" STREQUAL "POWER6")
420443
file(APPEND ${TARGET_CONF_TEMP}
421444
"#define L1_DATA_SIZE 32768\n"

cpuid_arm64.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -424,7 +424,7 @@ void get_cpuconfig(void)
424424
sysctlbyname("hw.l1dcachesize",&value,&length,NULL,0);
425425
printf("#define L1_DATA_SIZE %d \n",value);
426426
sysctlbyname("hw.l2dcachesize",&value,&length,NULL,0);
427-
printf("#define L2_DATA_SIZE %d \n",value);
427+
printf("#define L2_SIZE %d \n",value);
428428
break;
429429
#endif
430430
}

exports/Makefile

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -120,10 +120,10 @@ dll : ../$(LIBDLLNAME)
120120
-Wl,--whole-archive ../$(LIBNAME) -Wl,--no-whole-archive $(FEXTRALIB) $(EXTRALIB)
121121

122122
$(LIBPREFIX).def : gensymbol
123-
perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F)
123+
perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
124124

125125
libgoto_hpl.def : gensymbol
126-
perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F)
126+
perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
127127

128128
ifeq ($(OSNAME), Darwin)
129129
INTERNALNAME = $(LIBPREFIX).$(MAJOR_VERSION).dylib
@@ -258,16 +258,16 @@ static : ../$(LIBNAME)
258258
rm -f goto.$(SUFFIX)
259259

260260
osx.def : gensymbol ../Makefile.system ../getarch.c
261-
perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F)
261+
perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
262262

263263
aix.def : gensymbol ../Makefile.system ../getarch.c
264-
perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F)
264+
perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
265265

266266
objcopy.def : gensymbol ../Makefile.system ../getarch.c
267-
perl ./gensymbol objcopy $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F)
267+
perl ./gensymbol objcopy $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
268268

269269
objconv.def : gensymbol ../Makefile.system ../getarch.c
270-
perl ./gensymbol objconv $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16)> $(@F)
270+
perl ./gensymbol objconv $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_BFLOAT16) $(BUILD_SINGLE) $(BUILD_DOUBLE) $(BUILD_COMPLEX) $(BUILD_COMPLEX16) > $(@F)
271271

272272
test : linktest.c
273273
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK.

exports/gensymbol

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@
5050
zomatcopy, zimatcopy,dzamax,dzamin,dzasum,dznrm2,
5151
zgeadd, dzsum);
5252

53-
@cblasobjs = (lsame, xerbla);
53+
@blasobjs = (lsame, xerbla);
5454
@halfblasobjs = (sbgemm, sbdot, sbstobf16, sbdtobf16, sbf16tos, dbf16tod);
5555
@cblasobjsc = (
5656
cblas_caxpy, cblas_ccopy, cblas_cdotc, cblas_cdotu, cblas_cgbmv, cblas_cgemm, cblas_cgemv,
@@ -92,7 +92,7 @@
9292
cblas_izamax, cblas_izamin, cblas_izmin, cblas_izmax, cblas_dzsum,cblas_zimatcopy,cblas_zomatcopy
9393
);
9494

95-
@cblasobjs = ( cblas_xerbla );
95+
@cblasobjs = ( cblas_xerbla );
9696

9797
@halfcblasobjs = (cblas_sbgemm, cblas_sbdot, cblas_sbstobf16, cblas_sbdtobf16, cblas_sbf16tos, cblas_dbf16tod);
9898

@@ -3600,6 +3600,7 @@ if ($ARGV[13] == 1) {
36003600
@lapack2objs = (@lapack2objs, @lapack2objss);
36013601
@lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_s);
36023602
@lapackeobjs = (@lapackeobjs, @lapackeobjss);
3603+
@lapackobjs2 = (@lapackobjs2, @lapackobjs2s);
36033604
}
36043605
if ($ARGV[14] == 1) {
36053606
@blasobjs = (@blasobjs, @blasobjsd);
@@ -3608,6 +3609,7 @@ if ($ARGV[14] == 1) {
36083609
@lapack2objs = (@lapack2objs, @lapack2objsd);
36093610
@lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_d);
36103611
@lapackeobjs = (@lapackeobjs, @lapackeobjsd);
3612+
@lapackobjs2 = (@lapackobjs2, @lapackobjs2d);
36113613
}
36123614
if ($ARGV[15] == 1) {
36133615
@blasobjs = (@blasobjs, @blasobjsc);
@@ -3618,6 +3620,7 @@ if ($ARGV[15] == 1) {
36183620
@lapack2objs = (@lapack2objs, @lapack2objsc, @lapac2objszc);
36193621
@lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_c);
36203622
@lapackeobjs = (@lapackeobjs, @lapackeobjsc);
3623+
@lapackobjs2 = (@lapackobjs2, @lapackobjs2sc, @lapackobjs2c);
36213624
}
36223625
if ($ARGV[16] == 1) {
36233626
@blasobjs = (@blasobjs, @blasobjsz);
@@ -3628,6 +3631,7 @@ if ($ARGV[16] == 1) {
36283631
@lapack2objs = (@lapack2objs, @lapack2objsz, @lapack2objszc);
36293632
@lapack_embeded_underscore_objs = (@lapack_embeded_underscore_objs, @lapack_embeded_underscore_objs_z);
36303633
@lapackeobjs = (@lapackeobjs, @lapackeobjsz);
3634+
@lapackobjs2 = (@lapackobjs2, @lapackobjs2dz, @lapackobjs2z);
36313635
}
36323636
if ($ARGV[8] == 1) {
36333637
#ONLY_CBLAS=1

getarch.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1222,6 +1222,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
12221222
#else
12231223
#endif
12241224

1225+
#ifdef FORCE_VORTEX
1226+
#define FORCE
1227+
#define ARCHITECTURE "ARM64"
1228+
#define SUBARCHITECTURE "VORTEX"
1229+
#define SUBDIRNAME "arm64"
1230+
#define ARCHCONFIG "-DVORTEX " \
1231+
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
1232+
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
1233+
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \
1234+
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
1235+
#define LIBNAME "vortex"
1236+
#define CORENAME "VORTEX"
1237+
#endif
1238+
12251239
#ifdef FORCE_ZARCH_GENERIC
12261240
#define FORCE
12271241
#define ARCHITECTURE "ZARCH"

kernel/x86_64/sgemm_direct_skylakex.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1-
#if defined(SKYLAKEX) || defined (COOPERLAKE)
21
/* the direct sgemm code written by Arjan van der Ven */
32
#include <immintrin.h>
43
#include "common.h"
4+
5+
#if defined(SKYLAKEX) || defined (COOPERLAKE)
56
/*
67
* "Direct sgemm" code. This code operates directly on the inputs and outputs
78
* of the sgemm call, avoiding the copies, memory realignments and threading,

0 commit comments

Comments
 (0)