@@ -9,11 +9,10 @@ ifndef TOPDIR
99TOPDIR = .
1010endif
1111
12- # If ARCH is not set, we use the host system's architecture for getarch compile options.
13- ifndef ARCH
12+ # we need to use the host system's architecture for getarch compile options even especially when cross-compiling
1413HOSTARCH := $(shell uname -m)
15- else
16- HOSTARCH = $(ARCH)
14+ ifeq ($(HOSTARCH), amd64)
15+ HOSTARCH=x86_64
1716endif
1817
1918# Catch conflicting usage of ARCH in some BSD environments
@@ -102,7 +101,7 @@ GETARCH_FLAGS += -DUSER_TARGET
102101ifeq ($(TARGET), GENERIC)
103102ifeq ($(DYNAMIC_ARCH), 1)
104103override NO_EXPRECISION=1
105- export NO_EXPRECiSION
104+ export NO_EXPRECISION
106105endif
107106endif
108107endif
@@ -119,6 +118,9 @@ endif
119118ifeq ($(TARGET), COOPERLAKE)
120119GETARCH_FLAGS := -DFORCE_NEHALEM
121120endif
121+ ifeq ($(TARGET), SAPPHIRERAPIDS)
122+ GETARCH_FLAGS := -DFORCE_NEHALEM
123+ endif
122124ifeq ($(TARGET), SANDYBRIDGE)
123125GETARCH_FLAGS := -DFORCE_NEHALEM
124126endif
@@ -143,8 +145,13 @@ endif
143145ifeq ($(TARGET), POWER8)
144146GETARCH_FLAGS := -DFORCE_POWER6
145147endif
148+ ifeq ($(TARGET), POWER9)
149+ GETARCH_FLAGS := -DFORCE_POWER6
150+ endif
151+ ifeq ($(TARGET), POWER10)
152+ GETARCH_FLAGS := -DFORCE_POWER6
153+ endif
146154endif
147-
148155
149156#TARGET_CORE will override TARGET which is used in DYNAMIC_ARCH=1.
150157#
@@ -164,6 +171,9 @@ endif
164171ifeq ($(TARGET_CORE), COOPERLAKE)
165172GETARCH_FLAGS := -DFORCE_NEHALEM
166173endif
174+ ifeq ($(TARGET_CORE), SAPPHIRERAPIDS)
175+ GETARCH_FLAGS := -DFORCE_NEHALEM
176+ endif
167177ifeq ($(TARGET_CORE), SANDYBRIDGE)
168178GETARCH_FLAGS := -DFORCE_NEHALEM
169179endif
@@ -251,6 +261,8 @@ endif
251261#For small matrix optimization
252262ifeq ($(ARCH), x86_64)
253263SMALL_MATRIX_OPT = 1
264+ else ifeq ($(CORE), POWER10)
265+ SMALL_MATRIX_OPT = 1
254266endif
255267ifeq ($(SMALL_MATRIX_OPT), 1)
256268CCOMMON_OPT += -DSMALL_MATRIX_OPT
@@ -260,6 +272,10 @@ endif
260272ifndef GOTOBLAS_MAKEFILE
261273export GOTOBLAS_MAKEFILE = 1
262274
275+ # Determine if the assembler is GNU Assembler
276+ HAVE_GAS := $(shell $(AS) -v < /dev/null 2>&1 | grep GNU 2>&1 >/dev/null ; echo $$?)
277+ GETARCH_FLAGS += -DHAVE_GAS=$(HAVE_GAS)
278+
263279# Generating Makefile.conf and config.h
264280DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" HOST_CFLAGS="$(GETARCH_FLAGS)" CFLAGS="$(CFLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) TARGET_CORE=$(TARGET_CORE) ONLY_CBLAS=$(ONLY_CBLAS) TARGET=$(TARGET) all)
265281
307323SMP = 1
308324endif
309325else
310- ifeq ($(NUM_THREAD ), 1)
326+ ifeq ($(NUM_THREADS ), 1)
311327SMP =
312328else
313329SMP = 1
@@ -892,15 +908,25 @@ endif
892908
893909ifeq ($(C_COMPILER), PGI)
894910PGCVERSIONGT20 := $(shell expr `$(CC) --version|sed -n "2p" |sed -e "s/[^0-9.]//g" |cut -d "." -f 1` \> 20)
895- PGCVERSIONGTEQ20 := $(shell expr `$(CC) --version|sed -n "2p" |sed -e "s/[^0-9.]//g" |cut -d "." -f 1` \> = 20)
896- PGCMINORVERSIONGE11 := $(shell expr `$(CC) --version|sed -n "2p" |sed -e "s/[^0-9.]//g" |cut -c 4-5` = = 11)
911+ PGCVERSIONEQ20 := $(shell expr `$(CC) --version|sed -n "2p" |sed -e "s/[^0-9.]//g" |cut -d "." -f 1` = = 20)
912+ PGCMINORVERSIONGE11 := $(shell expr `$(CC) --version|sed -n "2p" |cut -d "-" -f 1 | sed -e "s/[^0-9.]//g" |cut -c 4-5` \> = 11)
897913PGCVERSIONCHECK := $(PGCVERSIONGT20)$(PGCVERSIONEQ20)$(PGCMINORVERSIONGE11)
898- ifeq ($(PGCVERSIONCHECK), $(filter $(PGCVERSIONCHECK), 110 111 011))
914+ ifeq ($(PGCVERSIONCHECK), $(filter $(PGCVERSIONCHECK), 100 101 011))
899915NEWPGI := 1
916+ PGCVERSIONGT21 := $(shell expr `$(CC) --version|sed -n "2p" |sed -e "s/[^0-9.]//g" |cut -d "." -f 1` \> 21)
917+ PGCVERSIONEQ21 := $(shell expr `$(CC) --version|sed -n "2p" |sed -e "s/[^0-9.]//g" |cut -d "." -f 1` == 21)
918+ PGCVERSIONCHECK2 := $(PGCVERSIONGT21)$(PGCVERSIONEQ21)$(PGCMINORVERSIONGE11)
919+ ifeq ($(PGCVERSIONCHECK2), $(filter $(PGCVERSIONCHECK2), 100 101 011))
920+ NEWPGI2 := 1
921+ endif
900922endif
901923ifdef BINARY64
902924ifeq ($(ARCH), x86_64)
925+ ifneq ($(NEWPGI2),1)
903926CCOMMON_OPT += -tp p7-64
927+ else
928+ CCOMMON_OPT += -tp px
929+ endif
904930ifneq ($(NEWPGI),1)
905931CCOMMON_OPT += -D__MMX__ -Mnollvm
906932endif
@@ -915,7 +941,11 @@ endif
915941endif
916942endif
917943else
944+ ifneq ($(NEWPGI2),1)
918945CCOMMON_OPT += -tp p7
946+ else
947+ CCOMMON_OPT += -tp px
948+ endif
919949endif
920950endif
921951
@@ -1092,8 +1122,12 @@ FCOMMON_OPT += -i8
10921122endif
10931123endif
10941124ifeq ($(ARCH), x86_64)
1125+ ifneq ($(NEWPGI2),1)
10951126FCOMMON_OPT += -tp p7-64
10961127else
1128+ FCOMMON_OPT += -tp px
1129+ endif
1130+ else
10971131ifeq ($(ARCH), power)
10981132ifeq ($(CORE), POWER6)
10991133$(warning NVIDIA HPC compilers do not support POWER6.)
@@ -1643,8 +1677,10 @@ export HAVE_VFP
16431677export HAVE_VFPV3
16441678export HAVE_VFPV4
16451679export HAVE_NEON
1646- export HAVE_MSA
1647- export MSA_FLAGS
1680+ ifndef NO_MSA
1681+ export HAVE_MSA
1682+ export MSA_FLAGS
1683+ endif
16481684export KERNELDIR
16491685export FUNCTION_PROFILE
16501686export TARGET_CORE
0 commit comments