Skip to content

Commit 756802d

Browse files
authored
Merge pull request #2890 from martin-frbg/s-d-sum
Revert special handling of Windows xNRM2 and enable C+intrinsics kern…
2 parents 01492de + 75e3a92 commit 756802d

File tree

4 files changed

+29
-12
lines changed

4 files changed

+29
-12
lines changed

Makefile.x86_64

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@ ifdef HAVE_SSE3
1212
ifndef DYNAMIC_ARCH
1313
CCOMMON_OPT += -msse3
1414
FCOMMON_OPT += -msse3
15+
ifdef HAVE_SSSE3
16+
CCOMMON_OPT += -mssse3
17+
FCOMMON_OPT += -mssse3
18+
endif
1519
endif
1620
endif
1721

@@ -60,7 +64,7 @@ endif
6064
endif
6165
endif
6266

63-
ifeq ($(CORE), HASWELL)
67+
ifeq ($(CORE), $(filter $(CORE), HASWELL ZEN SKYLAKEX COOPERLAKE))
6468
ifndef DYNAMIC_ARCH
6569
ifndef NO_AVX2
6670
ifeq ($(C_COMPILER), GCC)

cmake/cc.cmake

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,10 +109,25 @@ if (${CORE} STREQUAL "COOPERLAKE")
109109
if (NOT NO_AVX512)
110110
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
111111
if (${GCC_VERSION} VERSION_GREATER 10.1 OR ${GCC_VERSION} VERSION_EQUAL 10.1)
112-
set (CCOMMON_OPT = "${CCOMMON_OPT} -march=cooperlake")
112+
set (CCOMMON_OPT "${CCOMMON_OPT} -march=cooperlake")
113113
else ()
114114
set (CCOMMON_OPT "${CCOMMON_OPT} -march=skylake-avx512")
115115
endif()
116116
endif ()
117117
endif ()
118118
endif ()
119+
120+
if (NOT DYNAMIC_ARCH)
121+
if (HAVE_AVX2)
122+
set (CCOMMON_OPT "${CCOMMON_OPT} -mavx2")
123+
endif ()
124+
if (HAVE_AVX)
125+
set (CCOMMON_OPT "${CCOMMON_OPT} -mavx")
126+
endif ()
127+
if (HAVE_SSE3)
128+
set (CCOMMON_OPT "${CCOMMON_OPT} -msse3")
129+
endif ()
130+
if (HAVE_SSSE3)
131+
set (CCOMMON_OPT "${CCOMMON_OPT} -mssse3")
132+
endif ()
133+
endif()

kernel/Makefile

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@ include $(TOPDIR)/Makefile.system
88
ifdef HAVE_SSE3
99
CFLAGS += -msse3
1010
endif
11+
ifdef HAVE_SSSE3
12+
CFLAGS += -mssse3
13+
endif
1114

1215
ifeq ($(C_COMPILER), GCC)
1316
GCCVERSIONGTEQ9 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 9)
@@ -41,8 +44,8 @@ ifdef NO_AVX2
4144
endif
4245

4346
ifdef TARGET_CORE
44-
ifeq ($(TARGET_CORE), $(filter $(TARGET_CORE),PRESCOTT CORE2 PENRYN DUNNINGTON ATOM NANO NEHALEM BARCELONA BOBCAT BULLDOZER PILEDRIVER EXCAVATOR STEAMROLLER OPTERON_SSE3))
45-
override CFLAGS += -msse3
47+
ifeq ($(TARGET_CORE), $(filter $(TARGET_CORE),PRESCOTT CORE2 PENRYN DUNNINGTON ATOM NANO SANDYBRIDGE HASWELL NEHALEM ZEN BARCELONA BOBCAT BULLDOZER PILEDRIVER EXCAVATOR STEAMROLLER OPTERON_SSE3))
48+
override CFLAGS += -msse3 -mssse3
4649
endif
4750
ifeq ($(TARGET_CORE), COOPERLAKE)
4851
override CFLAGS += -DBUILD_KERNEL -DTABLE_NAME=gotoblas_$(TARGET_CORE)

kernel/x86_64/KERNEL

Lines changed: 3 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -259,12 +259,8 @@ SNRM2KERNEL = nrm2_sse.S
259259
endif
260260

261261
ifndef DNRM2KERNEL
262-
ifeq ($(OSNAME),WINNT)
263-
DNRM2KERNEL = ../arm/nrm2.c
264-
else
265262
DNRM2KERNEL = nrm2.S
266263
endif
267-
endif
268264

269265
ifndef QNRM2KERNEL
270266
QNRM2KERNEL = nrm2.S
@@ -275,12 +271,8 @@ CNRM2KERNEL = znrm2_sse.S
275271
endif
276272

277273
ifndef ZNRM2KERNEL
278-
ifeq ($(OSNAME),WINNT)
279-
ZNRM2KERNEL = ../arm/znrm2.c
280-
else
281274
ZNRM2KERNEL = znrm2.S
282275
endif
283-
endif
284276

285277
ifndef XNRM2KERNEL
286278
XNRM2KERNEL = znrm2.S
@@ -486,3 +478,6 @@ XTRSMKERNEL_RN = xtrsm_kernel_LT_1x1.S
486478
XTRSMKERNEL_RT = xtrsm_kernel_LT_1x1.S
487479

488480
XGEMM3MKERNEL = xgemm3m_kernel_2x2.S
481+
482+
SSUMKERNEL = ../arm/sum.c
483+
DSUMKERNEL = ../arm/sum.c

0 commit comments

Comments
 (0)