Skip to content

Commit 525db54

Browse files
authored
Merge pull request #74 from xianyi/develop
rebase
2 parents d6b6e5c + cb097be commit 525db54

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

63 files changed

+771
-823
lines changed

Makefile.arm64

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,16 @@ CCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
5656
FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
5757
endif
5858

59+
ifeq ($(CORE), THUNDERX3T110)
60+
ifeq ($(GCCVERSIONGTEQ10), 1)
61+
CCOMMON_OPT += -march=armv8.3-a -mtune=thunderx3t110
62+
FCOMMON_OPT += -march=armv8.3-a -mtune=thunderx3t110
63+
else
64+
CCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
65+
FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
66+
endif
67+
endif
68+
5969
ifeq ($(GCCVERSIONGTEQ9), 1)
6070
ifeq ($(CORE), TSV110)
6171
CCOMMON_OPT += -march=armv8.2-a -mtune=tsv110

Makefile.power

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -11,34 +11,34 @@ endif
1111

1212
ifeq ($(CORE), POWER10)
1313
ifeq ($(USE_OPENMP), 1)
14-
COMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
15-
FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
14+
COMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -DUSE_OPENMP -fno-fast-math -fopenmp
15+
FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -DUSE_OPENMP -fno-fast-math -fopenmp
1616
else
17-
COMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -malign-power -fno-fast-math
18-
FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -malign-power -fno-fast-math
17+
COMMON_OPT += -Ofast -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
18+
FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -fno-fast-math
1919
endif
2020
endif
2121

2222
ifeq ($(CORE), POWER9)
2323
ifeq ($(USE_OPENMP), 1)
2424
ifneq ($(C_COMPILER), PGI)
25-
CCOMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
25+
CCOMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -DUSE_OPENMP -fno-fast-math -fopenmp
2626
else
2727
CCOMMON_OPT += -fast -Mvect=simd -Mcache_align -DUSE_OPENMP -mp
2828
endif
2929
ifneq ($(F_COMPILER), PGI)
30-
FCOMMON_OPT += -O2 -frecursive -mcpu=power9 -mtune=power9 -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
30+
FCOMMON_OPT += -O2 -frecursive -mcpu=power9 -mtune=power9 -DUSE_OPENMP -fno-fast-math -fopenmp
3131
else
3232
FCOMMON_OPT += -O2 -Mrecursive -DUSE_OPENMP -mp
3333
endif
3434
else
3535
ifneq ($(C_COMPILER), PGI)
36-
CCOMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -malign-power -fno-fast-math
36+
CCOMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -fno-fast-math
3737
else
3838
CCOMMON_OPT += -fast -Mvect=simd -Mcache_align
3939
endif
4040
ifneq ($(F_COMPILER), PGI)
41-
FCOMMON_OPT += -O2 -frecursive -mcpu=power9 -mtune=power9 -malign-power -fno-fast-math
41+
FCOMMON_OPT += -O2 -frecursive -mcpu=power9 -mtune=power9 -fno-fast-math
4242
else
4343
FCOMMON_OPT += -O2 -Mrecursive
4444
endif
@@ -48,26 +48,26 @@ endif
4848
ifeq ($(CORE), POWER8)
4949
ifeq ($(USE_OPENMP), 1)
5050
ifneq ($(C_COMPILER), PGI)
51-
CCOMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
51+
CCOMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -DUSE_OPENMP -fno-fast-math -fopenmp
5252
else
5353
CCOMMON_OPT += -fast -Mvect=simd -Mcache_align -DUSE_OPENMP -mp
5454
endif
5555
ifneq ($(F_COMPILER), PGI)
56-
FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp
56+
FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -DUSE_OPENMP -fno-fast-math -fopenmp
5757
else
5858
FCOMMON_OPT += -O2 -Mrecursive -DUSE_OPENMP -mp
5959
endif
6060
else
6161
ifneq ($(C_COMPILER), PGI)
62-
CCOMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -malign-power -fno-fast-math
62+
CCOMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -fno-fast-math
6363
else
6464
CCOMMON_OPT += -fast -Mvect=simd -Mcache_align
6565
endif
6666
ifneq ($(F_COMPILER), PGI)
6767
ifeq ($(OSNAME), AIX)
68-
FCOMMON_OPT += -O1 -frecursive -mcpu=power8 -mtune=power8 -malign-power -fno-fast-math
68+
FCOMMON_OPT += -O1 -frecursive -mcpu=power8 -mtune=power8 -fno-fast-math
6969
else
70-
FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -malign-power -fno-fast-math
70+
FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -fno-fast-math
7171
endif
7272
else
7373
FCOMMON_OPT += -O2 -Mrecursive

Makefile.system

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -578,6 +578,7 @@ DYNAMIC_CORE += THUNDERX
578578
DYNAMIC_CORE += THUNDERX2T99
579579
DYNAMIC_CORE += TSV110
580580
DYNAMIC_CORE += EMAG8180
581+
DYNAMIC_CORE += THUNDERX3T110
581582
endif
582583

583584
ifeq ($(ARCH), zarch)
@@ -617,7 +618,6 @@ DYNAMIC_CORE += POWER8
617618
ifneq ($(C_COMPILER), GCC)
618619
DYNAMIC_CORE += POWER9
619620
DYNAMIC_CORE += POWER10
620-
override LDFLAGS += -Wl,-no-power10-stubs
621621
endif
622622
ifeq ($(C_COMPILER), GCC)
623623
ifeq ($(GCCVERSIONGT5), 1)
@@ -627,11 +627,9 @@ $(info, OpenBLAS: Your gcc version is too old to build the POWER9 kernels.)
627627
endif
628628
ifeq ($(GCCVERSIONGTEQ11), 1)
629629
DYNAMIC_CORE += POWER10
630-
override LDFLAGS += -Wl,-no-power10-stubs
631630
else ifeq ($(GCCVERSIONGTEQ10), 1)
632631
ifeq ($(GCCMINORVERSIONGTEQ2), 1)
633632
DYNAMIC_CORE += POWER10
634-
override LDFLAGS += -Wl,-no-power10-stubs
635633
endif
636634
else
637635
$(info, OpenBLAS: Your gcc version is too old to build the POWER10 kernels.)
@@ -1241,7 +1239,9 @@ KERNELDIR = $(TOPDIR)/kernel/$(ARCH)
12411239

12421240
include $(TOPDIR)/Makefile.$(ARCH)
12431241

1242+
ifneq ($(C_COMPILER), PGI)
12441243
CCOMMON_OPT += -UASMNAME -UASMFNAME -UNAME -UCNAME -UCHAR_NAME -UCHAR_CNAME
1244+
endif
12451245
CCOMMON_OPT += -DASMNAME=$(FU)$(*F) -DASMFNAME=$(FU)$(*F)$(BU) -DNAME=$(*F)$(BU) -DCNAME=$(*F) -DCHAR_NAME=\"$(*F)$(BU)\" -DCHAR_CNAME=\"$(*F)\"
12461246

12471247
ifeq ($(CORE), PPC440)

README.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@ You can download them from [file hosting on sourceforge.net](https://sourceforge
2828
## Installation from Source
2929

3030
Download from project homepage, https://xianyi.github.com/OpenBLAS/, or check out the code
31-
using Git from https://github.com/xianyi/OpenBLAS.git.
31+
using Git from https://github.com/xianyi/OpenBLAS.git. (If you want the most up to date version, be
32+
sure to use the develop branch - master is several years out of date due to a change of maintainership.)
3233
Buildtime parameters can be chosen in Makefile.rule, see there for a short description of each option.
3334
Most can also be given directly on the make or cmake command line.
3435

TargetList.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ FALKOR
9696
THUNDERX
9797
THUNDERX2T99
9898
TSV110
99+
THUNDERX3T110
99100

100101
9.System Z:
101102
ZARCH_GENERIC

cmake/arch.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ endif ()
4545

4646
if (DYNAMIC_ARCH)
4747
if (ARM64)
48-
set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 CORTEXA72 CORTEXA73 FALKOR THUNDERX THUNDERX2T99 TSV110 EMAG8180 NEOVERSEN1)
48+
set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 CORTEXA72 CORTEXA73 FALKOR THUNDERX THUNDERX2T99 TSV110 EMAG8180 NEOVERSEN1 THUNDERX3T110)
4949
endif ()
5050

5151
if (POWER)

cmake/prebuild.cmake

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,8 +195,13 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS
195195
"#define HAVE_VFP\n"
196196
"#define HAVE_NEON\n"
197197
"#define ARMV8\n")
198+
if ("${TCORE}" STREQUAL "CORTEXA57")
198199
set(SGEMM_UNROLL_M 16)
199200
set(SGEMM_UNROLL_N 4)
201+
else ()
202+
set(SGEMM_UNROLL_M 8)
203+
set(SGEMM_UNROLL_N 8)
204+
endif ()
200205
set(DGEMM_UNROLL_M 8)
201206
set(DGEMM_UNROLL_N 4)
202207
set(CGEMM_UNROLL_M 8)
@@ -338,6 +343,33 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS
338343
set(ZGEMM_UNROLL_M 4)
339344
set(ZGEMM_UNROLL_N 4)
340345
set(SYMV_P 16)
346+
elseif ("${TCORE}" STREQUAL "THUNDERX3T110")
347+
file(APPEND ${TARGET_CONF_TEMP}
348+
"#define THUNDERX3T110\n"
349+
"#define L1_CODE_SIZE\t65536\n"
350+
"#define L1_CODE_LINESIZE\t64\n"
351+
"#define L1_CODE_ASSOCIATIVE\t8\n"
352+
"#define L1_DATA_SIZE\t65536\n"
353+
"#define L1_DATA_LINESIZE\t64\n"
354+
"#define L1_DATA_ASSOCIATIVE\t8\n"
355+
"#define L2_SIZE\t524288\n"
356+
"#define L2_LINESIZE\t64\n"
357+
"#define L2_ASSOCIATIVE\t8\n"
358+
"#define L3_SIZE\t94371840\n"
359+
"#define L3_LINESIZE\t64\n"
360+
"#define L3_ASSOCIATIVE\t32\n"
361+
"#define DTB_DEFAULT_ENTRIES\t64\n"
362+
"#define DTB_SIZE\t4096\n"
363+
"#define ARMV8\n")
364+
set(SGEMM_UNROLL_M 16)
365+
set(SGEMM_UNROLL_N 4)
366+
set(DGEMM_UNROLL_M 8)
367+
set(DGEMM_UNROLL_N 4)
368+
set(CGEMM_UNROLL_M 8)
369+
set(CGEMM_UNROLL_N 4)
370+
set(ZGEMM_UNROLL_M 4)
371+
set(ZGEMM_UNROLL_N 4)
372+
set(SYMV_P 16)
341373
elseif ("${TCORE}" STREQUAL "TSV110")
342374
file(APPEND ${TARGET_CONF_TEMP}
343375
"#define ARMV8\n"

cpuid_arm64.c

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
// Cavium
4141
#define CPU_THUNDERX 7
4242
#define CPU_THUNDERX2T99 8
43+
#define CPU_THUNDERX3T110 12
4344
//Hisilicon
4445
#define CPU_TSV110 9
4546
// Ampere
@@ -57,7 +58,8 @@ static char *cpuname[] = {
5758
"THUNDERX2T99",
5859
"TSV110",
5960
"EMAG8180",
60-
"NEOVERSEN1"
61+
"NEOVERSEN1",
62+
"THUNDERX3T110"
6163
};
6264

6365
static char *cpuname_lower[] = {
@@ -72,7 +74,8 @@ static char *cpuname_lower[] = {
7274
"thunderx2t99",
7375
"tsv110",
7476
"emag8180",
75-
"neoversen1"
77+
"neoversen1",
78+
"thunderx3t110"
7679
};
7780

7881
int get_feature(char *search)
@@ -158,6 +161,8 @@ int detect(void)
158161
return CPU_THUNDERX;
159162
else if (strstr(cpu_implementer, "0x43") && strstr(cpu_part, "0x0af"))
160163
return CPU_THUNDERX2T99;
164+
else if (strstr(cpu_implementer, "0x43") && strstr(cpu_part, "0x0b8"))
165+
return CPU_THUNDERX3T110;
161166
// HiSilicon
162167
else if (strstr(cpu_implementer, "0x48") && strstr(cpu_part, "0xd01"))
163168
return CPU_TSV110;
@@ -372,7 +377,25 @@ void get_cpuconfig(void)
372377
printf("#define L2_LINESIZE 64\n");
373378
printf("#define DTB_DEFAULT_ENTRIES 64\n");
374379
printf("#define DTB_SIZE 4096\n");
380+
break;
375381

382+
case CPU_THUNDERX3T110:
383+
printf("#define THUNDERX3T110 \n");
384+
printf("#define L1_CODE_SIZE 65536 \n");
385+
printf("#define L1_CODE_LINESIZE 64 \n");
386+
printf("#define L1_CODE_ASSOCIATIVE 8 \n");
387+
printf("#define L1_DATA_SIZE 32768 \n");
388+
printf("#define L1_DATA_LINESIZE 64 \n");
389+
printf("#define L1_DATA_ASSOCIATIVE 8 \n");
390+
printf("#define L2_SIZE 524288 \n");
391+
printf("#define L2_LINESIZE 64 \n");
392+
printf("#define L2_ASSOCIATIVE 8 \n");
393+
printf("#define L3_SIZE 94371840 \n");
394+
printf("#define L3_LINESIZE 64 \n");
395+
printf("#define L3_ASSOCIATIVE 32 \n");
396+
printf("#define DTB_DEFAULT_ENTRIES 64 \n");
397+
printf("#define DTB_SIZE 4096 \n");
398+
break;
376399
}
377400
get_cpucount();
378401
}

cpuid_x86.c

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1454,10 +1454,11 @@ int get_cpuname(void){
14541454
return CPUTYPE_OPTERON;
14551455
case 1:
14561456
case 3:
1457-
case 7:
1458-
case 10:
1457+
// case 7:
1458+
// case 10:
14591459
return CPUTYPE_BARCELONA;
14601460
case 5:
1461+
case 7:
14611462
return CPUTYPE_BOBCAT;
14621463
case 6:
14631464
switch (model) {
@@ -1507,6 +1508,8 @@ int get_cpuname(void){
15071508
// AMD Ryzen
15081509
case 8:
15091510
// AMD Ryzen2
1511+
default:
1512+
// Matisse/Renoir and other recent Ryzen2
15101513
if(support_avx())
15111514
#ifndef NO_AVX2
15121515
return CPUTYPE_ZEN;
@@ -1516,6 +1519,16 @@ int get_cpuname(void){
15161519
else
15171520
return CPUTYPE_BARCELONA;
15181521
}
1522+
break;
1523+
case 10: // Zen3
1524+
if(support_avx())
1525+
#ifndef NO_AVX2
1526+
return CPUTYPE_ZEN;
1527+
#else
1528+
return CPUTYPE_SANDYBRIDGE; // Zen is closer in architecture to Sandy Bridge than to Excavator
1529+
#endif
1530+
else
1531+
return CPUTYPE_BARCELONA;
15191532
}
15201533
break;
15211534
}
@@ -2107,7 +2120,7 @@ int get_coretype(void){
21072120
return CORE_PILEDRIVER;
21082121
else
21092122
return CORE_BARCELONA; //OS don't support AVX.
2110-
case 5: // New EXCAVATOR
2123+
case 5: // New EXCAVATOR
21112124
if(support_avx())
21122125
return CORE_EXCAVATOR;
21132126
else
@@ -2135,12 +2148,14 @@ int get_coretype(void){
21352148
}
21362149
break;
21372150
}
2138-
} else if (exfamily == 8) {
2151+
} else if (exfamily == 8 || exfamily == 10) {
21392152
switch (model) {
21402153
case 1:
21412154
// AMD Ryzen
21422155
case 8:
2143-
// Ryzen 2
2156+
// Ryzen 2
2157+
default:
2158+
// Matisse,Renoir Ryzen2 models
21442159
if(support_avx())
21452160
#ifndef NO_AVX2
21462161
return CORE_ZEN;

driver/others/dynamic.c

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -656,7 +656,7 @@ static gotoblas_t *get_coretype(void){
656656
if ((exfamily == 0) || (exfamily == 2)) {
657657
if (ecx & (1 << 0)) return &gotoblas_OPTERON_SSE3;
658658
else return &gotoblas_OPTERON;
659-
} else if (exfamily == 5) {
659+
} else if (exfamily == 5 || exfamily == 7) {
660660
return &gotoblas_BOBCAT;
661661
} else if (exfamily == 6) {
662662
if(model == 1){
@@ -710,24 +710,32 @@ static gotoblas_t *get_coretype(void){
710710
}
711711
}
712712
} else if (exfamily == 8) {
713-
if (model == 1 || model == 8) {
713+
/* if (model == 1 || model == 8) */ {
714714
if(support_avx())
715715
return &gotoblas_ZEN;
716716
else{
717717
openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK);
718718
return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
719719
}
720720
}
721-
} else if (exfamily == 9) {
721+
} else if (exfamily == 9) {
722722
if(support_avx())
723723
return &gotoblas_ZEN;
724724
else{
725725
openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK);
726726
return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
727-
}
727+
}
728+
} else if (exfamily == 10) {
729+
if(support_avx())
730+
return &gotoblas_ZEN;
731+
else{
732+
openblas_warning(FALLBACK_VERBOSE, BARCELONA_FALLBACK);
733+
return &gotoblas_BARCELONA; //OS doesn't support AVX. Use old kernels.
734+
}
728735
}else {
729736
return &gotoblas_BARCELONA;
730737
}
738+
731739
}
732740
}
733741

0 commit comments

Comments
 (0)