Skip to content

Commit 043128c

Browse files
authored
Merge pull request #3029 from RajalakshmiSR/axpyp10
POWER10: Improve axpy performance
2 parents 3331ca4 + 346e30a commit 043128c

File tree

3 files changed

+38
-17
lines changed

3 files changed

+38
-17
lines changed

kernel/power/caxpy_microk_power10.c

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -112,10 +112,14 @@ static void caxpy_kernel_8 (long n, float *x, float *y,
112112
"xvmaddasp 38, 58, 33 \n\t"
113113
"xvmaddasp 39, 59, 33 \n\t"
114114

115-
"stxvp 48, 0(%4) \n\t"
116-
"stxvp 50, 32(%4) \n\t"
117-
"stxvp 34, 64(%4) \n\t"
118-
"stxvp 38, 96(%4) \n\t"
115+
"stxv 49, 0(%4) \n\t"
116+
"stxv 48, 16(%4) \n\t"
117+
"stxv 51, 32(%4) \n\t"
118+
"stxv 50, 48(%4) \n\t"
119+
"stxv 35, 64(%4) \n\t"
120+
"stxv 34, 80(%4) \n\t"
121+
"stxv 39, 96(%4) \n\t"
122+
"stxv 38, 112(%4) \n\t"
119123

120124
"addi %4, %4, 128 \n\t"
121125
"xxperm 52, 40, %x10 \n\t" // exchange real and imag part
@@ -159,10 +163,14 @@ static void caxpy_kernel_8 (long n, float *x, float *y,
159163
"xvmaddasp 38, 58, 33 \n\t"
160164
"xvmaddasp 39, 59, 33 \n\t"
161165

162-
"stxvp 48, 0(%4) \n\t"
163-
"stxvp 50, 32(%4) \n\t"
164-
"stxvp 34, 64(%4) \n\t"
165-
"stxvp 38, 96(%4) \n\t"
166+
"stxv 49, 0(%4) \n\t"
167+
"stxv 48, 16(%4) \n\t"
168+
"stxv 51, 32(%4) \n\t"
169+
"stxv 50, 48(%4) \n\t"
170+
"stxv 35, 64(%4) \n\t"
171+
"stxv 34, 80(%4) \n\t"
172+
"stxv 39, 96(%4) \n\t"
173+
"stxv 38, 112(%4) \n\t"
166174

167175
"#n=%1 x=%5=%2 y=%0=%3 alpha=(%7,%8) mvecp=%6=%9 ytmp=%4\n"
168176
:

kernel/power/daxpy_power10.c

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -66,12 +66,19 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS
6666
if ( (inc_x == 1) && (inc_y == 1) )
6767
{
6868

69-
BLASLONG n1 = n & -16;
69+
if ( n >= 16 )
70+
{
71+
BLASLONG align = ((32 - ((uintptr_t)y & (uintptr_t)0x1F)) >> 3) & 0x3;
72+
for (i = 0; i < align; i++) {
73+
y[i] += da * x[i] ;
74+
}
75+
}
76+
BLASLONG n1 = (n-i) & -16;
77+
if ( n1 )
78+
daxpy_kernel_8(n1, &x[i], &y[i], da);
79+
80+
i += n1;
7081

71-
if ( n1 )
72-
daxpy_kernel_8(n1, x, y, da);
73-
74-
i = n1;
7582
while(i < n)
7683
{
7784

kernel/power/saxpy_power10.c

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,12 +64,18 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS
6464
if ( (inc_x == 1) && (inc_y == 1) )
6565
{
6666

67-
BLASLONG n1 = n & -64;
68-
67+
if ( n >= 64 )
68+
{
69+
BLASLONG align = ((32 - ((uintptr_t)y & (uintptr_t)0x1F)) >> 2) & 0x7;
70+
for (i = 0; i < align; i++) {
71+
y[i] += da * x[i] ;
72+
}
73+
}
74+
BLASLONG n1 = (n-i) & -64;
6975
if ( n1 )
70-
saxpy_kernel_64(n1, x, y, da);
76+
saxpy_kernel_64(n1, &x[i], &y[i], da);
7177

72-
i = n1;
78+
i += n1;
7379
while(i < n)
7480
{
7581

0 commit comments

Comments
 (0)