Skip to content

Commit 9ee21a0

Browse files
authored
Merge pull request #2780 from Guobing-Chen/CPL_build_support
Enable COOPERLAKE build target
2 parents 75eeb26 + bd3207b commit 9ee21a0

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+336
-76
lines changed

Makefile.system

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,9 @@ endif
8888
ifeq ($(TARGET), SKYLAKEX)
8989
GETARCH_FLAGS := -DFORCE_NEHALEM
9090
endif
91+
ifeq ($(TARGET), COOPERLAKE)
92+
GETARCH_FLAGS := -DFORCE_NEHALEM
93+
endif
9194
ifeq ($(TARGET), SANDYBRIDGE)
9295
GETARCH_FLAGS := -DFORCE_NEHALEM
9396
endif
@@ -130,6 +133,9 @@ endif
130133
ifeq ($(TARGET_CORE), SKYLAKEX)
131134
GETARCH_FLAGS := -DFORCE_NEHALEM
132135
endif
136+
ifeq ($(TARGET_CORE), COOPERLAKE)
137+
GETARCH_FLAGS := -DFORCE_NEHALEM
138+
endif
133139
ifeq ($(TARGET_CORE), SANDYBRIDGE)
134140
GETARCH_FLAGS := -DFORCE_NEHALEM
135141
endif
@@ -553,7 +559,7 @@ DYNAMIC_CORE += HASWELL ZEN
553559
endif
554560
ifneq ($(NO_AVX512), 1)
555561
ifneq ($(NO_AVX2), 1)
556-
DYNAMIC_CORE += SKYLAKEX
562+
DYNAMIC_CORE += SKYLAKEX COOPERLAKE
557563
endif
558564
endif
559565
endif

Makefile.x86_64

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,32 @@ endif
2727
endif
2828
endif
2929

30+
ifeq ($(CORE), COOPERLAKE)
31+
ifndef DYNAMIC_ARCH
32+
ifndef NO_AVX512
33+
ifeq ($(C_COMPILER), GCC)
34+
# cooperlake support was added in 10.1
35+
GCCVERSIONGTEQ10 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 10)
36+
GCCMINORVERSIONGTEQ1 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 1)
37+
ifeq ($(GCCVERSIONGTEQ10)$(GCCMINORVERSIONGTEQ1), 11)
38+
CCOMMON_OPT += -march=cooperlake
39+
FCOMMON_OPT += -march=cooperlake
40+
endif
41+
endif
42+
ifeq ($(OSNAME), CYGWIN_NT)
43+
CCOMMON_OPT += -fno-asynchronous-unwind-tables
44+
FCOMMON_OPT += -fno-asynchronous-unwind-tables
45+
endif
46+
ifeq ($(OSNAME), WINNT)
47+
ifeq ($(C_COMPILER), GCC)
48+
CCOMMON_OPT += -fno-asynchronous-unwind-tables
49+
FCOMMON_OPT += -fno-asynchronous-unwind-tables
50+
endif
51+
endif
52+
endif
53+
endif
54+
endif
55+
3056
ifeq ($(CORE), HASWELL)
3157
ifndef DYNAMIC_ARCH
3258
ifndef NO_AVX2

TargetList.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ SANDYBRIDGE
2222
HASWELL
2323
SKYLAKEX
2424
ATOM
25+
COOPERLAKE
2526

2627
b)AMD CPU:
2728
ATHLON

cmake/arch.cmake

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,9 +76,9 @@ if (DYNAMIC_ARCH)
7676
set(DYNAMIC_CORE ${DYNAMIC_CORE} HASWELL ZEN)
7777
endif ()
7878
if (NOT NO_AVX512)
79-
set(DYNAMIC_CORE ${DYNAMIC_CORE} SKYLAKEX)
79+
set(DYNAMIC_CORE ${DYNAMIC_CORE} SKYLAKEX COOPERLAKE)
8080
string(REGEX REPLACE "-march=native" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
81-
endif ()
81+
endif ()
8282
if (DYNAMIC_LIST)
8383
set(DYNAMIC_CORE PRESCOTT ${DYNAMIC_LIST})
8484
endif ()

cmake/cc.cmake

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,3 +103,16 @@ if (${CORE} STREQUAL "SKYLAKEX")
103103
endif ()
104104
endif ()
105105
endif ()
106+
107+
if (${CORE} STREQUAL "COOPERLAKE")
108+
if (NOT DYNAMIC_ARCH)
109+
if (NOT NO_AVX512)
110+
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
111+
if (${GCC_VERSION} VERSION_GREATER 10.1 OR ${GCC_VERSION} VERSION_EQUAL 10.1)
112+
set (CCOMMON_OPT = "${CCOMMON_OPT} -march=cooperlake")
113+
else ()
114+
set (CCOMMON_OPT "${CCOMMON_OPT} -march=skylake-avx512")
115+
endif()
116+
endif ()
117+
endif ()
118+
endif ()

cmake/system.cmake

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ endif ()
3333
if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32)
3434
message(STATUS "Compiling a ${BINARY}-bit binary.")
3535
set(NO_AVX 1)
36-
if (${TARGET} STREQUAL "HASWELL" OR ${TARGET} STREQUAL "SANDYBRIDGE" OR ${TARGET} STREQUAL "SKYLAKEX")
36+
if (${TARGET} STREQUAL "HASWELL" OR ${TARGET} STREQUAL "SANDYBRIDGE" OR ${TARGET} STREQUAL "SKYLAKEX" OR ${TARGET} STREQUAL "COOPERLAKE")
3737
set(TARGET "NEHALEM")
3838
endif ()
3939
if (${TARGET} STREQUAL "BULLDOZER" OR ${TARGET} STREQUAL "PILEDRIVER" OR ${TARGET} STREQUAL "ZEN")
@@ -45,6 +45,18 @@ if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32)
4545
endif ()
4646

4747
if (DEFINED TARGET)
48+
if (${TARGET} STREQUAL "COOPERLAKE" AND NOT NO_AVX512)
49+
# if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
50+
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
51+
if (${GCC_VERSION} VERSION_GREATER 10.1 OR ${GCC_VERSION} VERSION_EQUAL 10.1)
52+
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=cooperlake")
53+
else()
54+
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
55+
endif()
56+
# elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG")
57+
# set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
58+
# endif()
59+
endif()
4860
if (${TARGET} STREQUAL "SKYLAKEX" AND NOT NO_AVX512)
4961
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
5062
endif()

cpuid.h

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@
118118
#define CORE_ZEN 27
119119
#define CORE_SKYLAKEX 28
120120
#define CORE_DHYANA 29
121+
#define CORE_COOPERLAKE 30
121122

122123
#define HAVE_SSE (1 << 0)
123124
#define HAVE_SSE2 (1 << 1)
@@ -137,11 +138,12 @@
137138
#define HAVE_MISALIGNSSE (1 << 15)
138139
#define HAVE_128BITFPU (1 << 16)
139140
#define HAVE_FASTMOVU (1 << 17)
140-
#define HAVE_AVX (1 << 18)
141-
#define HAVE_FMA4 (1 << 19)
142-
#define HAVE_FMA3 (1 << 20)
143-
#define HAVE_AVX512VL (1 << 21)
144-
#define HAVE_AVX2 (1 << 22)
141+
#define HAVE_AVX (1 << 18)
142+
#define HAVE_FMA4 (1 << 19)
143+
#define HAVE_FMA3 (1 << 20)
144+
#define HAVE_AVX512VL (1 << 21)
145+
#define HAVE_AVX2 (1 << 22)
146+
#define HAVE_AVX512BF16 (1 << 23)
145147

146148
#define CACHE_INFO_L1_I 1
147149
#define CACHE_INFO_L1_D 2
@@ -218,7 +220,8 @@ typedef struct {
218220
#define CPUTYPE_ZEN 51
219221
#define CPUTYPE_SKYLAKEX 52
220222
#define CPUTYPE_DHYANA 53
223+
#define CPUTYPE_COOPERLAKE 54
221224

222-
#define CPUTYPE_HYGON_UNKNOWN 54
225+
#define CPUTYPE_HYGON_UNKNOWN 99
223226

224227
#endif

cpuid_x86.c

Lines changed: 32 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,22 @@ int support_avx512(){
249249
#endif
250250
}
251251

252+
int support_avx512_bf16(){
253+
#if !defined(NO_AVX) && !defined(NO_AVX512)
254+
int eax, ebx, ecx, edx;
255+
int ret=0;
256+
257+
if (!support_avx512())
258+
return 0;
259+
cpuid_count(7, 1, &eax, &ebx, &ecx, &edx);
260+
if((eax & 32) == 32){
261+
ret=1; // CPUID.7.1:EAX[bit 5] indicates whether avx512_bf16 supported or not
262+
}
263+
return ret;
264+
#else
265+
return 0;
266+
#endif
267+
}
252268

253269
int get_vendor(void){
254270
int eax, ebx, ecx, edx;
@@ -335,6 +351,7 @@ int get_cputype(int gettype){
335351
if (support_avx()) feature |= HAVE_AVX;
336352
if (support_avx2()) feature |= HAVE_AVX2;
337353
if (support_avx512()) feature |= HAVE_AVX512VL;
354+
if (support_avx512_bf16()) feature |= HAVE_AVX512BF16;
338355
if ((ecx & (1 << 12)) != 0) feature |= HAVE_FMA3;
339356
#endif
340357

@@ -1337,6 +1354,8 @@ int get_cpuname(void){
13371354
return CPUTYPE_NEHALEM;
13381355
case 5:
13391356
// Skylake X
1357+
if(support_avx512_bf16())
1358+
return CPUTYPE_COOPERLAKE;
13401359
if(support_avx512())
13411360
return CPUTYPE_SKYLAKEX;
13421361
if(support_avx2())
@@ -1677,7 +1696,8 @@ static char *cpuname[] = {
16771696
"EXCAVATOR",
16781697
"ZEN",
16791698
"SKYLAKEX",
1680-
"DHYANA"
1699+
"DHYANA",
1700+
"COOPERLAKE"
16811701
};
16821702

16831703
static char *lowercpuname[] = {
@@ -1733,7 +1753,8 @@ static char *lowercpuname[] = {
17331753
"excavator",
17341754
"zen",
17351755
"skylakex",
1736-
"dhyana"
1756+
"dhyana",
1757+
"cooperlake"
17371758
};
17381759

17391760
static char *corename[] = {
@@ -1766,7 +1787,8 @@ static char *corename[] = {
17661787
"EXCAVATOR",
17671788
"ZEN",
17681789
"SKYLAKEX",
1769-
"DHYANA"
1790+
"DHYANA",
1791+
"COOPERLAKE"
17701792
};
17711793

17721794
static char *corename_lower[] = {
@@ -1799,7 +1821,8 @@ static char *corename_lower[] = {
17991821
"excavator",
18001822
"zen",
18011823
"skylakex",
1802-
"dhyana"
1824+
"dhyana",
1825+
"cooperlake"
18031826
};
18041827

18051828

@@ -2007,7 +2030,9 @@ int get_coretype(void){
20072030
case 5:
20082031
// Skylake X
20092032
#ifndef NO_AVX512
2010-
return CORE_SKYLAKEX;
2033+
if(support_avx512_bf16())
2034+
return CORE_COOPERLAKE;
2035+
return CORE_SKYLAKEX;
20112036
#else
20122037
if(support_avx())
20132038
#ifndef NO_AVX2
@@ -2276,6 +2301,7 @@ void get_cpuconfig(void){
22762301
if (features & HAVE_AVX ) printf("#define HAVE_AVX\n");
22772302
if (features & HAVE_AVX2 ) printf("#define HAVE_AVX2\n");
22782303
if (features & HAVE_AVX512VL ) printf("#define HAVE_AVX512VL\n");
2304+
if (features & HAVE_AVX512BF16 ) printf("#define HAVE_AVX512BF16\n");
22792305
if (features & HAVE_3DNOWEX) printf("#define HAVE_3DNOWEX\n");
22802306
if (features & HAVE_3DNOW) printf("#define HAVE_3DNOW\n");
22812307
if (features & HAVE_FMA4 ) printf("#define HAVE_FMA4\n");
@@ -2346,6 +2372,7 @@ void get_sse(void){
23462372
if (features & HAVE_AVX ) printf("HAVE_AVX=1\n");
23472373
if (features & HAVE_AVX2 ) printf("HAVE_AVX2=1\n");
23482374
if (features & HAVE_AVX512VL ) printf("HAVE_AVX512VL=1\n");
2375+
if (features & HAVE_AVX512BF16 ) printf("HAVE_AVX512BF16=1\n");
23492376
if (features & HAVE_3DNOWEX) printf("HAVE_3DNOWEX=1\n");
23502377
if (features & HAVE_3DNOW) printf("HAVE_3DNOW=1\n");
23512378
if (features & HAVE_FMA4 ) printf("HAVE_FMA4=1\n");

driver/level3/level3.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -333,7 +333,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
333333
#else
334334
for(jjs = js; jjs < js + min_j; jjs += min_jj){
335335
min_jj = min_j + js - jjs;
336-
#ifdef SKYLAKEX
336+
#if defined(SKYLAKEX) || defined(COOPERLAKE)
337337
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve best performance */
338338
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
339339
#else

driver/level3/level3_thread.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
367367
/* Split local region of B into parts */
368368
for(jjs = js; jjs < MIN(n_to, js + div_n); jjs += min_jj){
369369
min_jj = MIN(n_to, js + div_n) - jjs;
370-
#ifdef SKYLAKEX
370+
#if defined(SKYLAKEX) || defined(COOPERLAKE)
371371
/* the current AVX512 s/d/c/z GEMM kernel requires n>=6*GEMM_UNROLL_N to achieve the best performance */
372372
if (min_jj >= 6*GEMM_UNROLL_N) min_jj = 6*GEMM_UNROLL_N;
373373
#else

0 commit comments

Comments
 (0)