Skip to content

Commit 2befb60

Browse files
committed
LoongArch64: Update LA264
All lapack-test passed at there
1 parent d220347 commit 2befb60

File tree

4 files changed

+35
-118
lines changed

4 files changed

+35
-118
lines changed

kernel/Makefile.L3

Lines changed: 16 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1110,97 +1110,51 @@ else
11101110
$(CC) $(CFLAGS) -c -DTRMMKERNEL -UDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@
11111111
endif
11121112

1113-
$(KDIR)ztrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
1114-
ifeq ($(OS), AIX)
1115-
$(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o - > ztrmm_kernel_ln.s
1116-
$(M4_AIX) ztrmm_kernel_ln.s > ztrmm_kernel_ln_nomacros.s
1117-
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN ztrmm_kernel_ln_nomacros.s -o $@
1118-
rm ztrmm_kernel_ln.s ztrmm_kernel_ln_nomacros.s
1119-
else ifeq ($(CORE), SANDYBRIDGE)
1113+
$(KDIR)ztrmm_kernel_LN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL)
1114+
ifeq ($(CORE),SANDYBRIDGE)
11201115
$(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o $@
11211116
else
11221117
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -UCONJ -DNN $< -o $@
11231118
endif
11241119

1125-
$(KDIR)ztrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
1126-
ifeq ($(OS), AIX)
1127-
$(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o - > ztrmm_kernel_lt.s
1128-
$(M4_AIX) ztrmm_kernel_lt.s > ztrmm_kernel_lt_nomacros.s
1129-
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN ztrmm_kernel_lt_nomacros.s -o $@
1130-
rm ztrmm_kernel_lt.s ztrmm_kernel_lt_nomacros.s
1131-
else ifeq ($(CORE), SANDYBRIDGE)
1120+
$(KDIR)ztrmm_kernel_LT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL)
1121+
ifeq ($(CORE),SANDYBRIDGE)
11321122
$(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o $@
11331123
else
11341124
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -UCONJ -DNN $< -o $@
11351125
endif
1136-
1137-
$(KDIR)ztrmm_kernel_LR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
1138-
ifeq ($(OS), AIX)
1139-
$(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o - > ztrmm_kernel_lr.s
1140-
$(M4_AIX) ztrmm_kernel_lr.s > ztrmm_kernel_lr_nomacros.s
1141-
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN ztrmm_kernel_lr_nomacros.s -o $@
1142-
rm ztrmm_kernel_lr.s ztrmm_kernel_lr_nomacros.s
1143-
else ifeq ($(CORE), SANDYBRIDGE)
1126+
$(KDIR)ztrmm_kernel_LR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL)
1127+
ifeq ($(CORE),SANDYBRIDGE)
11441128
$(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o $@
11451129
else
11461130
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -UTRANSA -DCONJ -DCN $< -o $@
11471131
endif
1148-
1149-
$(KDIR)ztrmm_kernel_LC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
1150-
ifeq ($(OS), AIX)
1151-
$(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o - > ztrmm_kernel_lc.s
1152-
$(M4_AIX) ztrmm_kernel_lc.s >ztrmm_kernel_lc_nomacros.s
1153-
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN ztrmm_kernel_lc_nomacros.s -o $@
1154-
rm ztrmm_kernel_lc.s ztrmm_kernel_lc_nomacros.s
1155-
else ifeq ($(CORE), SANDYBRIDGE)
1132+
$(KDIR)ztrmm_kernel_LC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL)
1133+
ifeq ($(CORE),SANDYBRIDGE)
11561134
$(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o $@
11571135
else
11581136
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -DLEFT -DTRANSA -DCONJ -DCN $< -o $@
11591137
endif
1160-
1161-
$(KDIR)ztrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
1162-
ifeq ($(OS), AIX)
1163-
$(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o - > ztrmm_kernel_rn.s
1164-
$(M4_AIX) ztrmm_kernel_rn.s > ztrmm_kernel_rn_nomacros.s
1165-
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN ztrmm_kernel_rn_nomacros.s -o $@
1166-
rm ztrmm_kernel_rn.s ztrmm_kernel_rn_nomacros.s
1167-
else ifeq ($(CORE), SANDYBRIDGE)
1138+
$(KDIR)ztrmm_kernel_RN$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL)
1139+
ifeq ($(CORE),SANDYBRIDGE)
11681140
$(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o $@
11691141
else
11701142
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -UCONJ -DNN $< -o $@
11711143
endif
1172-
1173-
$(KDIR)ztrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
1174-
ifeq ($(OS), AIX)
1175-
$(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o - > ztrmm_kernel_rt.s
1176-
$(M4_AIX) ztrmm_kernel_rt.s > ztrmm_kernel_rt_nomacros.s
1177-
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN ztrmm_kernel_rt_nomacros.s -o $@
1178-
rm ztrmm_kernel_rt.s ztrmm_kernel_rt_nomacros.s
1179-
else ifeq ($(CORE), SANDYBRIDGE)
1144+
$(KDIR)ztrmm_kernel_RT$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL)
1145+
ifeq ($(CORE),SANDYBRIDGE)
11801146
$(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o $@
11811147
else
11821148
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -UCONJ -DNN $< -o $@
11831149
endif
1184-
1185-
$(KDIR)ztrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
1186-
ifeq ($(OS), AIX)
1187-
$(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o - > ztrmm_kernel_rr.s
1188-
$(M4_AIX) ztrmm_kernel_rr.s > ztrmm_kernel_rr_nomacros.s
1189-
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC ztrmm_kernel_rr_nomacros.s -o $@
1190-
rm ztrmm_kernel_rr.s ztrmm_kernel_rr_nomacros.s
1191-
else ifeq ($(CORE), SANDYBRIDGE)
1150+
$(KDIR)ztrmm_kernel_RR$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL)
1151+
ifeq ($(CORE),SANDYBRIDGE)
11921152
$(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o $@
11931153
else
11941154
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -UTRANSA -DCONJ -DNC $< -o $@
11951155
endif
1196-
1197-
$(KDIR)ztrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZTRMMKERNEL)
1198-
ifeq ($(OS), AIX)
1199-
$(CC) $(CFLAGS) -S -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o - > ztrmm_kernel_rc.s
1200-
$(M4_AIX) ztrmm_kernel_rc.s > ztrmm_kernel_rc_nomacros.s
1201-
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC ztrmm_kernel_rc_nomacros.s -o $@
1202-
rm ztrmm_kernel_rc.s ztrmm_kernel_rc_nomacros.s
1203-
else ifeq ($(CORE), SANDYBRIDGE)
1156+
$(KDIR)ztrmm_kernel_RC$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(ZGEMMKERNEL)
1157+
ifeq ($(CORE),SANDYBRIDGE)
12041158
$(CC) $(filter-out -mavx,$(CFLAGS)) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@
12051159
else
12061160
$(CC) $(CFLAGS) -c -DTRMMKERNEL -DDOUBLE -DCOMPLEX -ULEFT -DTRANSA -DCONJ -DNC $< -o $@

kernel/arm/zscal.c

Lines changed: 0 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -38,12 +38,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3838
#include<stdio.h>
3939
#include "common.h"
4040

41-
#if defined(DOUBLE)
42-
int ztemp_k(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r,FLOAT da_i, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2);
43-
#else
44-
int ctemp_k(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r,FLOAT da_i, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2);
45-
#endif
46-
4741
int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r,FLOAT da_i, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2)
4842
{
4943
BLASLONG i=0;
@@ -58,23 +52,6 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r,FLOAT da_i, F
5852
inc_x2 = 2 * inc_x;
5953
FLOAT *xtemp0 = (FLOAT*)malloc(inc_x2 * sizeof(FLOAT) * n);
6054
FLOAT *xtemp1 = (FLOAT*)malloc(inc_x2 * sizeof(FLOAT) * n);
61-
#if defined(DOUBLE)
62-
#else
63-
for ( i=0; i<n; i++ )
64-
{
65-
xtemp0[ip0] = x[ip0];
66-
xtemp0[ip0+1] = x[ip0+1];
67-
xtemp1[ip0] = x[ip0];
68-
xtemp1[ip0+1] = x[ip0+1];
69-
ip0 += inc_x2;
70-
}
71-
#endif
72-
73-
#if defined(DOUBLE)
74-
ztemp_k(n, dummy0, dummy1, da_r, da_i, xtemp1, inc_x, y, inc_y, dummy, dummy2);
75-
#else
76-
ctemp_k(n, dummy0, dummy1, da_r, da_i, xtemp1, inc_x, y, inc_y, dummy, dummy2);
77-
#endif
7855

7956
for ( i=0; i<n; i++ )
8057
{
@@ -112,20 +89,6 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r,FLOAT da_i, F
11289
ip += inc_x2;
11390
}
11491

115-
#if defined(DOUBLE)
116-
#else
117-
ip=0;
118-
for (i = 0; i<n; i++) {
119-
if(abs(xtemp1[ip] - x[ip]) > 0.000001 || abs(xtemp1[ip+1] - x[ip+1]) > 0.000001){
120-
printf("real= %f, c= %f, lsx= %f, da_r= %f, da_i= %f, i= %d, n= %d, inc_x= %d\n ", xtemp0[ip], x[ip], xtemp1[ip], da_r, da_i, i, n, inc_x);
121-
printf("image= %f, c= %f, lsx= %f, da_r= %f, da_i= %f, i= %d, n= %d, inc_x2= %d\n ", xtemp0[ip+1], x[ip+1], xtemp1[ip+1], da_r, da_i, i, n, inc_x2);
122-
break;
123-
}
124-
ip += inc_x2;
125-
}
126-
#endif
127-
free(xtemp0);
128-
free(xtemp1);
12992
return(0);
13093

13194
}

kernel/loongarch64/KERNEL.LA264

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@ ZDOTKERNEL = cdot_lsx.S
88

99
SSCALKERNEL = scal_lsx.S
1010
DSCALKERNEL = scal_lsx.S
11-
CSCALKERNEL = cscal_lsx.S
12-
ZSCALKERNEL = cscal_lsx.S
11+
#CSCALKERNEL = cscal_lsx.S
12+
#ZSCALKERNEL = cscal_lsx.S
1313

1414
#SAMAXKERNEL = amax_lsx.S
1515
#DAMAXKERNEL = amax_lsx.S
@@ -62,8 +62,8 @@ ZSCALKERNEL = cscal_lsx.S
6262
#ZDOTKERNEL = ../arm/zdot.c
6363
#SSCALKERNEL = ../arm/scal.c
6464
#DSCALKERNEL = ../arm/scal.c
65-
#CSCALKERNEL = ../arm/zscal.c
66-
#ZSCALKERNEL = ../arm/zscal.c
65+
CSCALKERNEL = ../arm/zscal.c
66+
ZSCALKERNEL = ../arm/zscal.c
6767
SAMAXKERNEL = ../arm/amax.c
6868
DAMAXKERNEL = ../arm/amax.c
6969
CAMAXKERNEL = ../arm/zamax.c
@@ -243,23 +243,23 @@ CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
243243
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
244244

245245
#disici
246-
#ZGEMVNKERNEL = zgemv_n_2_lsx.S
247-
#ZGEMVTKERNEL = zgemv_t_2_lsx.S
248-
ZGEMVNKERNEL = ../arm/zgemv_n.c
249-
ZGEMVTKERNEL = ../arm/zgemv_t.c
246+
ZGEMVNKERNEL = zgemv_n_2_lsx.S
247+
ZGEMVTKERNEL = zgemv_t_2_lsx.S
248+
#ZGEMVNKERNEL = ../arm/zgemv_n.c
249+
#ZGEMVTKERNEL = ../arm/zgemv_t.c
250250

251-
#ZGEMMKERNEL = zgemm_kernel_4x4_lsx.S
252-
#ZGEMMONCOPY = zgemm_ncopy_4_lsx.S
253-
#ZGEMMOTCOPY = zgemm_tcopy_4_lsx.S
254-
#ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
255-
#ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
256-
257-
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
258-
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
259-
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
251+
ZGEMMKERNEL = zgemm_kernel_4x4_lsx.S
252+
ZGEMMONCOPY = zgemm_ncopy_4_lsx.S
253+
ZGEMMOTCOPY = zgemm_tcopy_4_lsx.S
260254
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
261255
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
262256

257+
#ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
258+
#ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
259+
#ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
260+
#ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
261+
#ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
262+
#
263263
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
264264
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
265265
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c

param.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2905,8 +2905,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29052905
#define CGEMM_DEFAULT_UNROLL_M 2
29062906
#define CGEMM_DEFAULT_UNROLL_N 2
29072907

2908-
#define ZGEMM_DEFAULT_UNROLL_M 2
2909-
#define ZGEMM_DEFAULT_UNROLL_N 2
2908+
#define ZGEMM_DEFAULT_UNROLL_M 4
2909+
#define ZGEMM_DEFAULT_UNROLL_N 4
29102910

29112911
#define SGEMM_DEFAULT_P 128
29122912
#define DGEMM_DEFAULT_P 128

0 commit comments

Comments
 (0)