Skip to content

Commit ef23240

Browse files
authored
Merge pull request #4177 from martin-frbg/issue4176
Fix ZAXPY calls with INCX=0 on pre-AVX x86_64 and add utest
2 parents c2f4bdb + 862d06a commit ef23240

File tree

2 files changed

+41
-2
lines changed

2 files changed

+41
-2
lines changed

kernel/x86_64/zaxpy_sse2.S

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1418,10 +1418,10 @@
14181418
movq M, %rax
14191419
//If incx==0 || incy==0, avoid unloop and jump to end.
14201420
cmpq $0, INCX
1421-
je .L58
1421+
jne .L59
14221422
cmpq $0, INCY
14231423
je .L58
1424-
1424+
.L59:
14251425
sarq $3, %rax
14261426
jle .L55
14271427

utest/test_axpy.c

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,26 @@ CTEST(axpy,zaxpy_inc_0)
7474
ASSERT_DBL_NEAR_TOL(y2[i], y1[i], DOUBLE_EPS);
7575
}
7676
}
77+
78+
CTEST(axpy,zaxpy_incx_0)
79+
{
80+
blasint i;
81+
blasint N=4,incX=0,incY=1;
82+
double a[2]={0.25,0.5};
83+
double x1[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
84+
double y1[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
85+
double x2[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
86+
double y2[]={0.75,5.25,4.75,9.25,0.75,5.25,4.75,9.25};
87+
88+
//OpenBLAS
89+
BLASFUNC(zaxpy)(&N,a,x1,&incX,y1,&incY);
90+
91+
for(i=0; i<2*N; i++){
92+
ASSERT_DBL_NEAR_TOL(x2[i], x1[i], DOUBLE_EPS);
93+
ASSERT_DBL_NEAR_TOL(y2[i], y1[i], DOUBLE_EPS);
94+
}
95+
}
96+
7797
#endif
7898

7999
#ifdef BUILD_SINGLE
@@ -116,5 +136,24 @@ CTEST(axpy,caxpy_inc_0)
116136
ASSERT_DBL_NEAR_TOL(y2[i], y1[i], DOUBLE_EPS);
117137
}
118138
}
139+
140+
CTEST(axpy,caxpy_incx_0)
141+
{
142+
blasint i;
143+
blasint N=4,incX=0,incY=1;
144+
float a[2]={0.25,0.5};
145+
float x1[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
146+
float y1[]={2.0,4.0,6.0,8.0,2.0,4.0,6.0,8.0};
147+
double x2[]={1.0,3.0,5.0,7.0,1.0,3.0,5.0,7.0};
148+
double y2[]={0.75,5.25,4.75,9.25,0.75,5.25,4.75,9.25};
149+
150+
//OpenBLAS
151+
BLASFUNC(caxpy)(&N,a,x1,&incX,y1,&incY);
152+
153+
for(i=0; i<2*N; i++){
154+
ASSERT_DBL_NEAR_TOL(x2[i], x1[i], DOUBLE_EPS);
155+
ASSERT_DBL_NEAR_TOL(y2[i], y1[i], DOUBLE_EPS);
156+
}
157+
}
119158
#endif
120159

0 commit comments

Comments
 (0)