Skip to content

Commit 0f27a03

Browse files
authored
Add workaround for NVIDIA HPC mishandling of the asm DOT kernels
1 parent c2a8ebf commit 0f27a03

File tree

1 file changed

+9
-0
lines changed

1 file changed

+9
-0
lines changed

kernel/arm64/KERNEL.CORTEXA57

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,10 +70,19 @@ DCOPYKERNEL = copy.S
7070
CCOPYKERNEL = copy.S
7171
ZCOPYKERNEL = copy.S
7272

73+
ifneq ($(C_COMPILER), PGI)
7374
SDOTKERNEL = ../generic/dot.c
75+
else
76+
SDOTKERNEL = dot.S
77+
endif
7478
DDOTKERNEL = dot.S
79+
ifneq ($(C_COMPILER), PGI)
7580
CDOTKERNEL = zdot.S
7681
ZDOTKERNEL = zdot.S
82+
else
83+
CDOTKERNEL = ../arm/zdot.c
84+
ZDOTKERNEL = ../arm/zdot.c
85+
endif
7786
DSDOTKERNEL = dot.S
7887

7988
SNRM2KERNEL = nrm2.S

0 commit comments

Comments
 (0)