Skip to content

Commit e6c0e39

Browse files
authored
Optimize Zgemv
1 parent 453bfa7 commit e6c0e39

26 files changed

+2866
-1713
lines changed

cpuid_zarch.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,9 @@
2727

2828
#include <string.h>
2929

30-
#define CPU_GENERIC 0
31-
#define CPU_Z13 1
32-
#define CPU_Z14 2
30+
#define CPU_GENERIC 0
31+
#define CPU_Z13 1
32+
#define CPU_Z14 2
3333

3434
static char *cpuname[] = {
3535
"ZARCH_GENERIC",
@@ -112,7 +112,7 @@ void get_cpuconfig(void)
112112
printf("#define Z13\n");
113113
printf("#define DTB_DEFAULT_ENTRIES 64\n");
114114
break;
115-
case CPU_Z14:
115+
case CPU_Z14:
116116
printf("#define Z14\n");
117117
printf("#define DTB_DEFAULT_ENTRIES 64\n");
118118
break;

kernel/zarch/KERNEL.Z13

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,12 +74,12 @@ ZSWAPKERNEL = zswap.c
7474
SGEMVNKERNEL = ../arm/gemv_n.c
7575
DGEMVNKERNEL = dgemv_n_4.c
7676
CGEMVNKERNEL = ../arm/zgemv_n.c
77-
ZGEMVNKERNEL = ../arm/zgemv_n.c
77+
ZGEMVNKERNEL = zgemv_n_4.c
7878

7979
SGEMVTKERNEL = ../arm/gemv_t.c
8080
DGEMVTKERNEL = dgemv_t_4.c
8181
CGEMVTKERNEL = ../arm/zgemv_t.c
82-
ZGEMVTKERNEL = ../arm/zgemv_t.c
82+
ZGEMVTKERNEL = zgemv_t_4.c
8383

8484
STRMMKERNEL = strmm8x4V.S
8585
DTRMMKERNEL = trmm8x4V.S

kernel/zarch/KERNEL.Z14

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -73,13 +73,13 @@ ZSWAPKERNEL = zswap.c
7373

7474
SGEMVNKERNEL = sgemv_n_4.c
7575
DGEMVNKERNEL = dgemv_n_4.c
76-
CGEMVNKERNEL = ../arm/zgemv_n.c
77-
ZGEMVNKERNEL = ../arm/zgemv_n.c
76+
CGEMVNKERNEL = cgemv_n_4.c
77+
ZGEMVNKERNEL = zgemv_n_4.c
7878

7979
SGEMVTKERNEL = sgemv_t_4.c
8080
DGEMVTKERNEL = dgemv_t_4.c
81-
CGEMVTKERNEL = ../arm/zgemv_t.c
82-
ZGEMVTKERNEL = ../arm/zgemv_t.c
81+
CGEMVTKERNEL = cgemv_t_4.c
82+
ZGEMVTKERNEL = zgemv_t_4.c
8383

8484
STRMMKERNEL = strmm8x4V.S
8585
DTRMMKERNEL = trmm8x4V.S

kernel/zarch/camax.c

Lines changed: 24 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,7 @@ static FLOAT camax_kernel_32(BLASLONG n, FLOAT *x)
198198

199199
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
200200
BLASLONG i = 0;
201-
BLASLONG j = 0;
201+
BLASLONG ix = 0;
202202
FLOAT maxf = 0.0;
203203
BLASLONG inc_x2;
204204

@@ -216,53 +216,55 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
216216
else
217217
{
218218
maxf=CABS1(x,0);
219+
ix += 2;
219220
i++;
220221
}
221222

222223
while (i < n) {
223-
if (ABS(x[i*2]) > maxf) {
224-
maxf = ABS(x[i*2]);
224+
if (CABS1(x,ix) > maxf) {
225+
maxf = CABS1(x,ix);
225226
}
227+
ix += 2;
226228
i++;
227229
}
228230
return (maxf);
229231

230232
} else {
231233

232-
inc_x2 = 2 * inc_x;
233234
maxf=CABS1(x,0);
234-
i += inc_x2;
235-
j++;
235+
inc_x2 = 2 * inc_x;
236+
ix += inc_x2;
237+
i++;
236238

237239
BLASLONG n1 = (n - 1) & -4;
238-
while (j < n1) {
240+
while (i < n1) {
239241

240-
if (CABS1(x,i) > maxf) {
241-
maxf = CABS1(x,i);
242+
if (CABS1(x,ix) > maxf) {
243+
maxf = CABS1(x,ix);
242244
}
243-
if (CABS1(x,i+inc_x2) > maxf) {
244-
maxf = CABS1(x,i+inc_x2);
245+
if (CABS1(x,ix+inc_x2) > maxf) {
246+
maxf = CABS1(x,ix+inc_x2);
245247
}
246-
if (CABS1(x,i+inc_x2*2) > maxf) {
247-
maxf = CABS1(x,i+inc_x2*2);
248+
if (CABS1(x,ix+inc_x2*2) > maxf) {
249+
maxf = CABS1(x,ix+inc_x2*2);
248250
}
249-
if (CABS1(x,i+inc_x2*3) > maxf) {
250-
maxf = CABS1(x,i+inc_x2*3);
251+
if (CABS1(x,ix+inc_x2*3) > maxf) {
252+
maxf = CABS1(x,ix+inc_x2*3);
251253
}
252254

253-
i += inc_x2 * 4;
255+
ix += inc_x2 * 4;
254256

255-
j += 4;
257+
i += 4;
256258

257259
}
258260

259261

260-
while (j < n) {
261-
if (CABS1(x,i) > maxf) {
262-
maxf = CABS1(x,i);
262+
while (i < n) {
263+
if (CABS1(x,ix) > maxf) {
264+
maxf = CABS1(x,ix);
263265
}
264-
i += inc_x2;
265-
j++;
266+
ix += inc_x2;
267+
i++;
266268
}
267269
return (maxf);
268270
}

kernel/zarch/camin.c

Lines changed: 24 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,7 @@ static FLOAT camin_kernel_32(BLASLONG n, FLOAT *x)
198198

199199
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
200200
BLASLONG i = 0;
201-
BLASLONG j = 0;
201+
BLASLONG ix = 0;
202202
FLOAT minf = 0.0;
203203
BLASLONG inc_x2;
204204

@@ -216,53 +216,55 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x) {
216216
else
217217
{
218218
minf=CABS1(x,0);
219+
ix += 2;
219220
i++;
220221
}
221222

222223
while (i < n) {
223-
if (ABS(x[i*2]) < minf) {
224-
minf = ABS(x[i*2]);
224+
if (CABS1(x,ix) < minf) {
225+
minf = CABS1(x,ix);
225226
}
227+
ix += 2;
226228
i++;
227229
}
228230
return (minf);
229231

230232
} else {
231233

232-
inc_x2 = 2 * inc_x;
233234
minf=CABS1(x,0);
234-
i += inc_x2;
235-
j++;
235+
inc_x2 = 2 * inc_x;
236+
ix += inc_x2;
237+
i++;
236238

237239
BLASLONG n1 = (n - 1) & -4;
238-
while (j < n1) {
240+
while (i < n1) {
239241

240-
if (CABS1(x,i) < minf) {
241-
minf = CABS1(x,i);
242+
if (CABS1(x,ix) < minf) {
243+
minf = CABS1(x,ix);
242244
}
243-
if (CABS1(x,i+inc_x2) < minf) {
244-
minf = CABS1(x,i+inc_x2);
245+
if (CABS1(x,ix+inc_x2) < minf) {
246+
minf = CABS1(x,ix+inc_x2);
245247
}
246-
if (CABS1(x,i+inc_x2*2) < minf) {
247-
minf = CABS1(x,i+inc_x2*2);
248+
if (CABS1(x,ix+inc_x2*2) < minf) {
249+
minf = CABS1(x,ix+inc_x2*2);
248250
}
249-
if (CABS1(x,i+inc_x2*3) < minf) {
250-
minf = CABS1(x,i+inc_x2*3);
251+
if (CABS1(x,ix+inc_x2*3) < minf) {
252+
minf = CABS1(x,ix+inc_x2*3);
251253
}
252254

253-
i += inc_x2 * 4;
255+
ix += inc_x2 * 4;
254256

255-
j += 4;
257+
i += 4;
256258

257259
}
258260

259261

260-
while (j < n) {
261-
if (CABS1(x,i) < minf) {
262-
minf = CABS1(x,i);
262+
while (i < n) {
263+
if (CABS1(x,ix) < minf) {
264+
minf = CABS1(x,ix);
263265
}
264-
i += inc_x2;
265-
j++;
266+
ix += inc_x2;
267+
i++;
266268
}
267269
return (minf);
268270
}

kernel/zarch/caxpy.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -110,15 +110,15 @@ static void caxpy_kernel_16(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
110110
"agfi %%r1,128 \n\t"
111111
"brctg %%r0,0b "
112112
:
113-
:"r"(n),"ZR"((const FLOAT (*)[n * 2])x),"ZR"((FLOAT (*)[n * 2])y),"a"(alpha)
113+
:"r"(n),"ZR"((const FLOAT (*)[n * 2])x),"ZR"((FLOAT (*)[n * 2])y),"ZQ"((const FLOAT (*)[2])alpha)
114114
:"memory","cc","r0","r1","v0","v1","v16","v17","v18","v19","v20","v21","v22","v23","v24","v25","v26","v27","v28","v29","v30","v31"
115115
);
116116
}
117117

118118
int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) {
119119
BLASLONG i = 0;
120120
BLASLONG ix = 0, iy = 0;
121-
FLOAT da[2];
121+
FLOAT da[2] __attribute__ ((aligned(16)));
122122

123123
if (n <= 0) return (0);
124124

0 commit comments

Comments
 (0)