Skip to content

Commit 3559c5d

Browse files
authored
Merge pull request #3048 from martin-frbg/issue2998
Temporarily revert to the old NRM2 kernels for ThunderX2/3 and NeoverseN1
2 parents 85e5165 + 8631e29 commit 3559c5d

File tree

4 files changed

+19
-16
lines changed

4 files changed

+19
-16
lines changed

kernel/arm64/KERNEL.NEOVERSEN1

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -91,10 +91,10 @@ IDAMAXKERNEL = iamax_thunderx2t99.c
9191
ICAMAXKERNEL = izamax_thunderx2t99.c
9292
IZAMAXKERNEL = izamax_thunderx2t99.c
9393

94-
SNRM2KERNEL = scnrm2_thunderx2t99.c
95-
DNRM2KERNEL = dznrm2_thunderx2t99.c
96-
CNRM2KERNEL = scnrm2_thunderx2t99.c
97-
ZNRM2KERNEL = dznrm2_thunderx2t99.c
94+
SNRM2KERNEL = nrm2.S
95+
DNRM2KERNEL = nrm2.S
96+
CNRM2KERNEL = znrm2.S
97+
ZNRM2KERNEL = znrm2.S
9898

9999
DDOTKERNEL = dot_thunderx2t99.c
100100
SDOTKERNEL = dot_thunderx2t99.c

kernel/arm64/KERNEL.THUNDERX2T99

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -153,12 +153,12 @@ IDAMAXKERNEL = iamax_thunderx2t99.c
153153
ICAMAXKERNEL = izamax_thunderx2t99.c
154154
IZAMAXKERNEL = izamax_thunderx2t99.c
155155

156-
SNRM2KERNEL = scnrm2_thunderx2t99.c
157-
CNRM2KERNEL = scnrm2_thunderx2t99.c
156+
SNRM2KERNEL = nrm2.S
157+
CNRM2KERNEL = nrm2.S
158158
#DNRM2KERNEL = dznrm2_thunderx2t99_fast.c
159159
#ZNRM2KERNEL = dznrm2_thunderx2t99_fast.c
160-
DNRM2KERNEL = dznrm2_thunderx2t99.c
161-
ZNRM2KERNEL = dznrm2_thunderx2t99.c
160+
DNRM2KERNEL = znrm2.S
161+
ZNRM2KERNEL = znrm2.S
162162

163163

164164
DDOTKERNEL = dot_thunderx2t99.c

kernel/arm64/KERNEL.THUNDERX3T110

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -153,13 +153,16 @@ IDAMAXKERNEL = iamax_thunderx2t99.c
153153
ICAMAXKERNEL = izamax_thunderx2t99.c
154154
IZAMAXKERNEL = izamax_thunderx2t99.c
155155

156-
SNRM2KERNEL = scnrm2_thunderx2t99.c
157-
CNRM2KERNEL = scnrm2_thunderx2t99.c
158-
#DNRM2KERNEL = dznrm2_thunderx2t99_fast.c
159-
#ZNRM2KERNEL = dznrm2_thunderx2t99_fast.c
160-
DNRM2KERNEL = dznrm2_thunderx2t99.c
161-
ZNRM2KERNEL = dznrm2_thunderx2t99.c
162-
156+
#SNRM2KERNEL = scnrm2_thunderx2t99.c
157+
#CNRM2KERNEL = scnrm2_thunderx2t99.c
158+
##DNRM2KERNEL = dznrm2_thunderx2t99_fast.c
159+
##ZNRM2KERNEL = dznrm2_thunderx2t99_fast.c
160+
#DNRM2KERNEL = dznrm2_thunderx2t99.c
161+
#ZNRM2KERNEL = dznrm2_thunderx2t99.c
162+
SNRM2KERNEL = nrm2.S
163+
DNRM2KERNEL = nrm2.S
164+
CNRM2KERNEL = znrm2.S
165+
ZNRM2KERNEL = znrm2.S
163166

164167
DDOTKERNEL = dot_thunderx2t99.c
165168
SDOTKERNEL = dot_thunderx2t99.c

kernel/x86_64/srot.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ static void srot_kernel(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT c, FLOAT s)
1313
{
1414
BLASLONG i = 0;
1515

16-
#if V_SIMD && (defined(HAVE_FMA3) || V_SIMD > 128)
16+
#if V_SIMD && !defined(C_PGI) && (defined(HAVE_FMA3) || V_SIMD > 128)
1717
const int vstep = v_nlanes_f32;
1818
const int unrollx4 = n & (-vstep * 4);
1919
const int unrollx = n & -vstep;

0 commit comments

Comments
 (0)