Skip to content

Commit c2a8ebf

Browse files
authored
Add workaround for NVIDIA HPC mishandling of the asm DOT kernels
1 parent 43aac5b commit c2a8ebf

File tree

1 file changed

+9
-0
lines changed

1 file changed

+9
-0
lines changed

kernel/arm64/KERNEL.ARMV8

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,9 +97,18 @@ CNRM2KERNEL = znrm2.S
9797
ZNRM2KERNEL = znrm2.S
9898

9999
DDOTKERNEL = dot.S
100+
ifneq ($(C_COMPILER), PGI)
100101
SDOTKERNEL = ../generic/dot.c
102+
else
103+
SDOTKERNEL = dot.S
104+
endif
105+
ifneq ($(C_COMPILER), PGI)
101106
CDOTKERNEL = zdot.S
102107
ZDOTKERNEL = zdot.S
108+
else
109+
CDOTKERNEL = ../arm/zdot.c
110+
ZDOTKERNEL = ../arm/zdot.c
111+
endif
103112
DSDOTKERNEL = dot.S
104113

105114
DGEMM_BETA = dgemm_beta.S

0 commit comments

Comments
 (0)