Skip to content

Commit a0e4fb3

Browse files
authored
Merge pull request #3061 from martin-frbg/arm64-pgi
Support NVIDIA HPC SDK on ARM64
2 parents 2c445be + 89ae305 commit a0e4fb3

File tree

9 files changed

+50
-10
lines changed

9 files changed

+50
-10
lines changed

Makefile.arm64

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
1+
ifneq ($(C_COMPILER), PGI)
22
ifeq ($(CORE), ARMV8)
33
CCOMMON_OPT += -march=armv8-a
44
FCOMMON_OPT += -march=armv8-a
@@ -77,4 +77,4 @@ CCOMMON_OPT += -march=armv8.2-a -mtune=tsv110
7777
FCOMMON_OPT += -march=armv8.2-a -mtune=tsv110
7878
endif
7979
endif
80-
80+
endif

cmake/utils.cmake

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,9 @@ macro(ParseMakefileVars MAKEFILE_IN)
7474
string(REGEX MATCH "ifneq \\(\\$\\(([_A-Z]+)\\),[ \t]*([0-9_A-Z]+)\\)" line_match "${makefile_line}")
7575
if (NOT "${line_match}" STREQUAL "")
7676
# message(STATUS "IFNEQ: ${line_match} first: ${CMAKE_MATCH_1} second: ${CMAKE_MATCH_2}")
77+
if ( ${CMAKE_MATCH_1} STREQUAL C_COMPILER)
78+
set (CMAKE_MATCH_1 CMAKE_C_COMPILER)
79+
endif ()
7780
if (NOT ( ${${CMAKE_MATCH_1}} STREQUAL ${CMAKE_MATCH_2}))
7881
# message (STATUS "condition is true")
7982
set (IfElse 1)

common_arm64.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3939

4040
#define INLINE inline
4141

42-
#ifdef F_INTERFACE_FLANG
42+
#if defined( F_INTERFACE_FLANG) || defined(F_INTERFACE_PGI)
4343
#define RETURN_BY_STACK
4444
#else
4545
#define RETURN_BY_COMPLEX

kernel/arm/zdot.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA
4848

4949
dot[0]=0.0;
5050
dot[1]=0.0;
51-
#if !defined(__PPC__) && !defined(__SunOS)
51+
#if !defined(__PPC__) && !defined(__SunOS) && !defined(__PGI)
5252
CREAL(result) = 0.0 ;
5353
CIMAG(result) = 0.0 ;
5454
#else
@@ -73,7 +73,7 @@ OPENBLAS_COMPLEX_FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLA
7373
i++ ;
7474

7575
}
76-
#if !defined(__PPC__) && !defined(__SunOS)
76+
#if !defined(__PPC__) && !defined(__SunOS) && !defined(__PGI)
7777
CREAL(result) = dot[0];
7878
CIMAG(result) = dot[1];
7979
#else

kernel/arm64/KERNEL.ARMV8

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,9 +97,18 @@ CNRM2KERNEL = znrm2.S
9797
ZNRM2KERNEL = znrm2.S
9898

9999
DDOTKERNEL = dot.S
100+
ifneq ($(C_COMPILER), PGI)
100101
SDOTKERNEL = ../generic/dot.c
102+
else
103+
SDOTKERNEL = dot.S
104+
endif
105+
ifneq ($(C_COMPILER), PGI)
101106
CDOTKERNEL = zdot.S
102107
ZDOTKERNEL = zdot.S
108+
else
109+
CDOTKERNEL = ../arm/zdot.c
110+
ZDOTKERNEL = ../arm/zdot.c
111+
endif
103112
DSDOTKERNEL = dot.S
104113

105114
DGEMM_BETA = dgemm_beta.S

kernel/arm64/KERNEL.CORTEXA53

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -96,11 +96,20 @@ DNRM2KERNEL = nrm2.S
9696
CNRM2KERNEL = znrm2.S
9797
ZNRM2KERNEL = znrm2.S
9898

99-
DDOTKERNEL = dot.S
100-
SDOTKERNEL = ../generic/dot.c
101-
CDOTKERNEL = zdot.S
102-
ZDOTKERNEL = zdot.S
103-
DSDOTKERNEL = dot.S
99+
ifneq ($(C_COMPILER), PGI)
100+
SDOTKERNEL = ../generic/dot.c
101+
else
102+
SDOTKERNEL = dot.S
103+
endif
104+
DDOTKERNEL = dot.S
105+
ifneq ($(C_COMPILER), PGI)
106+
CDOTKERNEL = zdot.S
107+
ZDOTKERNEL = zdot.S
108+
else
109+
CDOTKERNEL = ../arm/zdot.c
110+
ZDOTKERNEL = ../arm/zdot.c
111+
endif
112+
DSDOTKERNEL = dot.S
104113

105114
DGEMM_BETA = dgemm_beta.S
106115
SGEMM_BETA = sgemm_beta.S

kernel/arm64/KERNEL.CORTEXA57

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,10 +70,19 @@ DCOPYKERNEL = copy.S
7070
CCOPYKERNEL = copy.S
7171
ZCOPYKERNEL = copy.S
7272

73+
ifneq ($(C_COMPILER), PGI)
7374
SDOTKERNEL = ../generic/dot.c
75+
else
76+
SDOTKERNEL = dot.S
77+
endif
7478
DDOTKERNEL = dot.S
79+
ifneq ($(C_COMPILER), PGI)
7580
CDOTKERNEL = zdot.S
7681
ZDOTKERNEL = zdot.S
82+
else
83+
CDOTKERNEL = ../arm/zdot.c
84+
ZDOTKERNEL = ../arm/zdot.c
85+
endif
7786
DSDOTKERNEL = dot.S
7887

7988
SNRM2KERNEL = nrm2.S

kernel/arm64/KERNEL.THUNDERX

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,13 @@ ZCOPYKERNEL = copy.S
4747

4848
SDOTKERNEL = dot_thunderx.c
4949
DDOTKERNEL = ddot_thunderx.c
50+
ifneq ($(C_COMPILER), PGI)
5051
CDOTKERNEL = zdot.S
5152
ZDOTKERNEL = zdot.S
53+
else
54+
CDOTKERNEL = ../arm/zdot.c
55+
ZDOTKERNEL = ../arm/zdot.c
56+
endif
5257
DSDOTKERNEL = dot.S
5358

5459
SNRM2KERNEL = nrm2.S

kernel/arm64/KERNEL.TSV110

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,13 @@ ZCOPYKERNEL = copy.S
7272

7373
SDOTKERNEL = dot.S
7474
DDOTKERNEL = dot.S
75+
ifneq ($(C_COMPILER), PGI)
7576
CDOTKERNEL = zdot.S
7677
ZDOTKERNEL = zdot.S
78+
else
79+
CDOTKERNEL = ../arm/zdot.c
80+
ZDOTKERNEL = ../arm/zdot.c
81+
endif
7782
DSDOTKERNEL = dot.S
7883

7984
SNRM2KERNEL = nrm2.S

0 commit comments

Comments
 (0)