Skip to content

Commit fb3d80c

Browse files
authored
Merge pull request #78 from xianyi/develop
rebase
2 parents 9f0ef9c + 9ee21a0 commit fb3d80c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

71 files changed

+450
-91
lines changed

CMakeLists.txt

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -389,11 +389,9 @@ if(NOT NO_LAPACKE)
389389
install (FILES ${CMAKE_BINARY_DIR}/lapacke_mangling.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/openblas${SUFFIX64})
390390
endif()
391391

392-
include(FindPkgConfig QUIET)
393-
if(PKG_CONFIG_FOUND)
394-
configure_file(${PROJECT_SOURCE_DIR}/cmake/openblas.pc.in ${PROJECT_BINARY_DIR}/openblas${SUFFIX64}.pc @ONLY)
395-
install (FILES ${PROJECT_BINARY_DIR}/openblas${SUFFIX64}.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig/)
396-
endif()
392+
# Install pkg-config files
393+
configure_file(${PROJECT_SOURCE_DIR}/cmake/openblas.pc.in ${PROJECT_BINARY_DIR}/openblas${SUFFIX64}.pc @ONLY)
394+
install (FILES ${PROJECT_BINARY_DIR}/openblas${SUFFIX64}.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig/)
397395

398396

399397
# GNUInstallDirs "DATADIR" wrong here; CMake search path wants "share".

Makefile.system

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,9 @@ endif
8888
ifeq ($(TARGET), SKYLAKEX)
8989
GETARCH_FLAGS := -DFORCE_NEHALEM
9090
endif
91+
ifeq ($(TARGET), COOPERLAKE)
92+
GETARCH_FLAGS := -DFORCE_NEHALEM
93+
endif
9194
ifeq ($(TARGET), SANDYBRIDGE)
9295
GETARCH_FLAGS := -DFORCE_NEHALEM
9396
endif
@@ -130,6 +133,9 @@ endif
130133
ifeq ($(TARGET_CORE), SKYLAKEX)
131134
GETARCH_FLAGS := -DFORCE_NEHALEM
132135
endif
136+
ifeq ($(TARGET_CORE), COOPERLAKE)
137+
GETARCH_FLAGS := -DFORCE_NEHALEM
138+
endif
133139
ifeq ($(TARGET_CORE), SANDYBRIDGE)
134140
GETARCH_FLAGS := -DFORCE_NEHALEM
135141
endif
@@ -553,7 +559,7 @@ DYNAMIC_CORE += HASWELL ZEN
553559
endif
554560
ifneq ($(NO_AVX512), 1)
555561
ifneq ($(NO_AVX2), 1)
556-
DYNAMIC_CORE += SKYLAKEX
562+
DYNAMIC_CORE += SKYLAKEX COOPERLAKE
557563
endif
558564
endif
559565
endif

Makefile.x86_64

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,32 @@ endif
2727
endif
2828
endif
2929

30+
ifeq ($(CORE), COOPERLAKE)
31+
ifndef DYNAMIC_ARCH
32+
ifndef NO_AVX512
33+
ifeq ($(C_COMPILER), GCC)
34+
# cooperlake support was added in 10.1
35+
GCCVERSIONGTEQ10 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 10)
36+
GCCMINORVERSIONGTEQ1 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 1)
37+
ifeq ($(GCCVERSIONGTEQ10)$(GCCMINORVERSIONGTEQ1), 11)
38+
CCOMMON_OPT += -march=cooperlake
39+
FCOMMON_OPT += -march=cooperlake
40+
endif
41+
endif
42+
ifeq ($(OSNAME), CYGWIN_NT)
43+
CCOMMON_OPT += -fno-asynchronous-unwind-tables
44+
FCOMMON_OPT += -fno-asynchronous-unwind-tables
45+
endif
46+
ifeq ($(OSNAME), WINNT)
47+
ifeq ($(C_COMPILER), GCC)
48+
CCOMMON_OPT += -fno-asynchronous-unwind-tables
49+
FCOMMON_OPT += -fno-asynchronous-unwind-tables
50+
endif
51+
endif
52+
endif
53+
endif
54+
endif
55+
3056
ifeq ($(CORE), HASWELL)
3157
ifndef DYNAMIC_ARCH
3258
ifndef NO_AVX2

TargetList.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ SANDYBRIDGE
2222
HASWELL
2323
SKYLAKEX
2424
ATOM
25+
COOPERLAKE
2526

2627
b)AMD CPU:
2728
ATHLON

cmake/arch.cmake

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,9 +76,9 @@ if (DYNAMIC_ARCH)
7676
set(DYNAMIC_CORE ${DYNAMIC_CORE} HASWELL ZEN)
7777
endif ()
7878
if (NOT NO_AVX512)
79-
set(DYNAMIC_CORE ${DYNAMIC_CORE} SKYLAKEX)
79+
set(DYNAMIC_CORE ${DYNAMIC_CORE} SKYLAKEX COOPERLAKE)
8080
string(REGEX REPLACE "-march=native" "" CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
81-
endif ()
81+
endif ()
8282
if (DYNAMIC_LIST)
8383
set(DYNAMIC_CORE PRESCOTT ${DYNAMIC_LIST})
8484
endif ()

cmake/cc.cmake

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,3 +103,16 @@ if (${CORE} STREQUAL "SKYLAKEX")
103103
endif ()
104104
endif ()
105105
endif ()
106+
107+
if (${CORE} STREQUAL "COOPERLAKE")
108+
if (NOT DYNAMIC_ARCH)
109+
if (NOT NO_AVX512)
110+
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
111+
if (${GCC_VERSION} VERSION_GREATER 10.1 OR ${GCC_VERSION} VERSION_EQUAL 10.1)
112+
set (CCOMMON_OPT = "${CCOMMON_OPT} -march=cooperlake")
113+
else ()
114+
set (CCOMMON_OPT "${CCOMMON_OPT} -march=skylake-avx512")
115+
endif()
116+
endif ()
117+
endif ()
118+
endif ()

cmake/system.cmake

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ endif ()
3333
if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32)
3434
message(STATUS "Compiling a ${BINARY}-bit binary.")
3535
set(NO_AVX 1)
36-
if (${TARGET} STREQUAL "HASWELL" OR ${TARGET} STREQUAL "SANDYBRIDGE" OR ${TARGET} STREQUAL "SKYLAKEX")
36+
if (${TARGET} STREQUAL "HASWELL" OR ${TARGET} STREQUAL "SANDYBRIDGE" OR ${TARGET} STREQUAL "SKYLAKEX" OR ${TARGET} STREQUAL "COOPERLAKE")
3737
set(TARGET "NEHALEM")
3838
endif ()
3939
if (${TARGET} STREQUAL "BULLDOZER" OR ${TARGET} STREQUAL "PILEDRIVER" OR ${TARGET} STREQUAL "ZEN")
@@ -45,6 +45,18 @@ if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32)
4545
endif ()
4646

4747
if (DEFINED TARGET)
48+
if (${TARGET} STREQUAL "COOPERLAKE" AND NOT NO_AVX512)
49+
# if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
50+
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
51+
if (${GCC_VERSION} VERSION_GREATER 10.1 OR ${GCC_VERSION} VERSION_EQUAL 10.1)
52+
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=cooperlake")
53+
else()
54+
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
55+
endif()
56+
# elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG")
57+
# set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
58+
# endif()
59+
endif()
4860
if (${TARGET} STREQUAL "SKYLAKEX" AND NOT NO_AVX512)
4961
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
5062
endif()

common_level3.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,12 +47,12 @@ __global__ void cuda_dgemm_kernel(int, int, int, double *, double *, double *);
4747
extern "C" {
4848
#endif
4949

50-
extern void sgemm_kernel_direct(BLASLONG M, BLASLONG N, BLASLONG K,
50+
void sgemm_direct(BLASLONG M, BLASLONG N, BLASLONG K,
5151
float * A, BLASLONG strideA,
5252
float * B, BLASLONG strideB,
5353
float * R, BLASLONG strideR);
5454

55-
extern int sgemm_kernel_direct_performant(BLASLONG M, BLASLONG N, BLASLONG K);
55+
int sgemm_direct_performant(BLASLONG M, BLASLONG N, BLASLONG K);
5656

5757

5858
int shgemm_beta(BLASLONG, BLASLONG, BLASLONG, float,

common_param.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,11 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG);
175175
int (*ssymv_L) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
176176
int (*ssymv_U) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
177177

178+
#ifdef ARCH_X86_64
179+
void (*sgemm_direct) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG , float *, BLASLONG , float * , BLASLONG);
180+
int (*sgemm_direct_performant) (BLASLONG M, BLASLONG N, BLASLONG K);
181+
#endif
182+
178183
int (*sgemm_kernel )(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG);
179184
int (*sgemm_beta )(BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
180185

common_s.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@
4545
#define SSYMV_THREAD_U ssymv_thread_U
4646
#define SSYMV_THREAD_L ssymv_thread_L
4747

48+
49+
#define SGEMM_DIRECT_PERFORMANT sgemm_direct_performant
50+
#define SGEMM_DIRECT sgemm_direct
51+
4852
#define SGEMM_ONCOPY sgemm_oncopy
4953
#define SGEMM_OTCOPY sgemm_otcopy
5054

@@ -204,6 +208,14 @@
204208
#define SSYMV_THREAD_U ssymv_thread_U
205209
#define SSYMV_THREAD_L ssymv_thread_L
206210

211+
#ifdef ARCH_X86_64
212+
#define SGEMM_DIRECT_PERFORMANT gotoblas -> sgemm_direct_performant
213+
#define SGEMM_DIRECT gotoblas -> sgemm_direct
214+
#else
215+
#define SGEMM_DIRECT_PERFORMANT sgemm_direct_performant
216+
#define SGEMM_DIRECT sgemm_direct
217+
#endif
218+
207219
#define SGEMM_ONCOPY gotoblas -> sgemm_oncopy
208220
#define SGEMM_OTCOPY gotoblas -> sgemm_otcopy
209221
#define SGEMM_INCOPY gotoblas -> sgemm_incopy

0 commit comments

Comments
 (0)