Skip to content

Commit 2e99e26

Browse files
authored
Add workaround for gcc 4.6 miscompiling assembly kernels with -mavx
1 parent 52ed274 commit 2e99e26

File tree

4 files changed

+72
-0
lines changed

4 files changed

+72
-0
lines changed

Makefile.system

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1561,6 +1561,7 @@ export KERNELDIR
15611561
export FUNCTION_PROFILE
15621562
export TARGET_CORE
15631563
export NO_AVX512
1564+
export NO_AVX2
15641565
export BUILD_BFLOAT16
15651566

15661567
export SBGEMM_UNROLL_M

Makefile.x86_64

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,18 @@ ifdef HAVE_SSE4_1
2020
CCOMMON_OPT += -msse4.1
2121
FCOMMON_OPT += -msse4.1
2222
endif
23+
ifndef OLDGCC
2324
ifdef HAVE_AVX
2425
CCOMMON_OPT += -mavx
2526
FCOMMON_OPT += -mavx
2627
endif
28+
endif
29+
ifndef NO_AVX2
2730
ifdef HAVE_AVX2
2831
CCOMMON_OPT += -mavx2
2932
FCOMMON_OPT += -mavx2
3033
endif
34+
endif
3135
ifdef HAVE_FMA3
3236
CCOMMON_OPT += -mfma
3337
FCOMMON_OPT += -mfma

c_check

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,16 @@ $architecture = zarch if ($data =~ /ARCH_ZARCH/);
229229
$binformat = bin32;
230230
$binformat = bin64 if ($data =~ /BINARY_64/);
231231

232+
233+
if ($compiler eq "GCC" &&( ($architecture eq "x86") || ($architecture eq "x86_64"))) {
234+
$no_avx2 = 0;
235+
$oldgcc = 0;
236+
$data = `$compiler_name -dumpversion`;
237+
if ($data <= 4.6) {
238+
$no_avx2 = 1;
239+
$oldgcc = 1;
240+
}
241+
}
232242
$no_avx512= 0;
233243
if (($architecture eq "x86") || ($architecture eq "x86_64")) {
234244
eval "use File::Temp qw(tempfile)";
@@ -368,6 +378,8 @@ print MAKEFILE "CEXTRALIB=$linker_L $linker_l $linker_a\n";
368378
print MAKEFILE "HAVE_MSA=1\n" if $have_msa eq 1;
369379
print MAKEFILE "MSA_FLAGS=$msa_flags\n" if $have_msa eq 1;
370380
print MAKEFILE "NO_AVX512=1\n" if $no_avx512 eq 1;
381+
print MAKEFILE "NO_AVX2=1\n" if $no_avx2 eq 1;
382+
print MAKEFILE "OLDGCC=1\n" if $oldgcc eq 1;
371383

372384
$os =~ tr/[a-z]/[A-Z]/;
373385
$architecture =~ tr/[a-z]/[A-Z]/;

getarch.c

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
326326
#define FORCE
327327
#define FORCE_INTEL
328328
#define ARCHITECTURE "X86"
329+
#ifdef NO_AVX2
330+
#define SUBARCHITECTURE "SANDYBRIDGE"
331+
#define ARCHCONFIG "-DSANDYBRIDGE " \
332+
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
333+
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
334+
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
335+
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX"
336+
#define LIBNAME "sandybridge"
337+
#define CORENAME "SANDYBRIDGE"
338+
#else
329339
#define SUBARCHITECTURE "HASWELL"
330340
#define ARCHCONFIG "-DHASWELL " \
331341
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
@@ -336,6 +346,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
336346
#define LIBNAME "haswell"
337347
#define CORENAME "HASWELL"
338348
#endif
349+
#endif
339350

340351
#ifdef FORCE_SKYLAKEX
341352
#ifdef NO_AVX512
@@ -551,6 +562,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
551562
#define FORCE
552563
#define FORCE_INTEL
553564
#define ARCHITECTURE "X86"
565+
#ifdef NO_AVX2
566+
#define SUBARCHITECTURE "SANDYBRIDGE"
567+
#define ARCHCONFIG "-DSANDYBRIDGE " \
568+
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \
569+
"-DL2_SIZE=262144 -DL2_LINESIZE=64 " \
570+
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
571+
"-DHAVE_CMOV -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 -DHAVE_AVX"
572+
#define LIBNAME "sandybridge"
573+
#define CORENAME "SANDYBRIDGE"
574+
#else
554575
#define SUBARCHITECTURE "ZEN"
555576
#define ARCHCONFIG "-DZEN " \
556577
"-DL1_CODE_SIZE=32768 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=8 " \
@@ -565,6 +586,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
565586
#define LIBNAME "zen"
566587
#define CORENAME "ZEN"
567588
#endif
589+
#endif
568590

569591

570592
#ifdef FORCE_SSE_GENERIC
@@ -983,6 +1005,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
9831005
#else
9841006
#endif
9851007

1008+
#ifdef FORCE_RISCV64_GENERIC
1009+
#define FORCE
1010+
#define ARCHITECTURE "RISCV64"
1011+
#define SUBARCHITECTURE "RISCV64_GENERIC"
1012+
#define SUBDIRNAME "riscv64"
1013+
#define ARCHCONFIG "-DRISCV64_GENERIC " \
1014+
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \
1015+
"-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \
1016+
"-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 "
1017+
#define LIBNAME "riscv64_generic"
1018+
#define CORENAME "RISCV64_GENERIC"
1019+
#else
1020+
#endif
1021+
9861022
#ifdef FORCE_CORTEXA15
9871023
#define FORCE
9881024
#define ARCHITECTURE "ARM"
@@ -1268,6 +1304,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
12681304
#define CORENAME "Z14"
12691305
#endif
12701306

1307+
#ifdef FORCE_C910V
1308+
#define FORCE
1309+
#define ARCHITECTURE "RISCV64"
1310+
#define SUBARCHITECTURE "C910V"
1311+
#define SUBDIRNAME "riscv64"
1312+
#define ARCHCONFIG "-DC910V " \
1313+
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=32 " \
1314+
"-DL2_SIZE=1048576 -DL2_LINESIZE=32 " \
1315+
"-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 "
1316+
#define LIBNAME "c910v"
1317+
#define CORENAME "C910V"
1318+
#else
1319+
#endif
1320+
1321+
12711322
#ifndef FORCE
12721323

12731324
#ifdef USER_TARGET
@@ -1322,6 +1373,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
13221373
#define OPENBLAS_SUPPORTED
13231374
#endif
13241375

1376+
#ifdef __riscv
1377+
#include "cpuid_riscv64.c"
1378+
#endif
1379+
13251380
#ifdef __arm__
13261381
#include "cpuid_arm.c"
13271382
#define OPENBLAS_SUPPORTED

0 commit comments

Comments
 (0)