Skip to content

Commit 3c61387

Browse files
committed
LoongArch64: Fixed swap_lsx.S
1 parent 29d974c commit 3c61387

File tree

6 files changed

+227
-289
lines changed

6 files changed

+227
-289
lines changed

kernel/Makefile.L3

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
USE_GEMM3M = 0
22
OS := $(shell uname)
33

4+
USE_TRMM = 1
5+
46
ifeq ($(ARCH), x86)
57
USE_GEMM3M = 1
68
endif

kernel/loongarch64/KERNEL

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ ifndef ZGEMM3MKERNEL
235235
ZGEMM3MKERNEL = zgemm3m_kernel.S
236236
endif
237237

238-
DSDOTKERNEL = dot.S
238+
#DSDOTKERNEL = dot.S
239239

240240
ifndef SROTMKERNEL
241241
SROTMKERNEL = ../generic/rotm.c

kernel/loongarch64/KERNEL.LA264

Lines changed: 208 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -5,142 +5,288 @@ DSDOTKERNEL = dot_lsx.S
55
DDOTKERNEL = dot_lsx.S
66
CDOTKERNEL = cdot_lsx.S
77
ZDOTKERNEL = cdot_lsx.S
8+
#SDOTKERNEL = ../generic/dot.c
9+
#DDOTKERNEL = ../arm/dot.c
10+
#CDOTKERNEL = ../arm/zdot.c
11+
#ZDOTKERNEL = ../arm/zdot.c
812

913
SSCALKERNEL = scal_lsx.S
1014
DSCALKERNEL = scal_lsx.S
1115
CSCALKERNEL = cscal_lsx.S
1216
ZSCALKERNEL = cscal_lsx.S
17+
#SSCALKERNEL = ../arm/scal.c
18+
#DSCALKERNEL = ../arm/scal.c
19+
#CSCALKERNEL = ../arm/zscal.c
20+
#ZSCALKERNEL = ../arm/zscal.c
1321

1422
SAMAXKERNEL = amax_lsx.S
1523
DAMAXKERNEL = amax_lsx.S
1624
CAMAXKERNEL = camax_lsx.S
1725
ZAMAXKERNEL = camax_lsx.S
26+
#SAMAXKERNEL = ../arm/amax.c
27+
#DAMAXKERNEL = ../arm/amax.c
28+
#CAMAXKERNEL = ../arm/zamax.c
29+
#ZAMAXKERNEL = ../arm/zamax.c
1830

1931
SAMINKERNEL = amin_lsx.S
2032
DAMINKERNEL = amin_lsx.S
2133
CAMINKERNEL = camin_lsx.S
2234
ZAMINKERNEL = camin_lsx.S
35+
#SAMINKERNEL = ../arm/amin.c
36+
#DAMINKERNEL = ../arm/amin.c
37+
#CAMINKERNEL = ../arm/zamin.c
38+
#ZAMINKERNEL = ../arm/zamin.c
2339

2440
SMAXKERNEL = max_lsx.S
2541
DMAXKERNEL = max_lsx.S
42+
#SMAXKERNEL = ../arm/max.c
43+
#DMAXKERNEL = ../arm/max.c
2644

2745
SMINKERNEL = min_lsx.S
2846
DMINKERNEL = min_lsx.S
47+
#SMINKERNEL = ../arm/min.c
48+
#DMINKERNEL = ../arm/min.c
2949

3050
ISMAXKERNEL = imax_lsx.S
3151
IDMAXKERNEL = imax_lsx.S
52+
#ISMAXKERNEL = ../arm/imax.c
53+
#IDMAXKERNEL = ../arm/imax.c
3254

3355
ISMINKERNEL = imin_lsx.S
3456
IDMINKERNEL = imin_lsx.S
57+
#ISMINKERNEL = ../arm/imin.c
58+
#IDMINKERNEL = ../arm/imin.c
3559

3660
ISAMAXKERNEL = iamax_lsx.S
3761
IDAMAXKERNEL = iamax_lsx.S
3862
ICAMAXKERNEL = icamax_lsx.S
3963
IZAMAXKERNEL = icamax_lsx.S
64+
#ISAMAXKERNEL = ../arm/iamax.c
65+
#IDAMAXKERNEL = ../arm/iamax.c
66+
#ICAMAXKERNEL = ../arm/izamax.c
67+
#IZAMAXKERNEL = ../arm/izamax.c
4068

4169
ISAMINKERNEL = iamin_lsx.S
4270
IDAMINKERNEL = iamin_lsx.S
4371
ICAMINKERNEL = icamin_lsx.S
4472
IZAMINKERNEL = icamin_lsx.S
73+
#ISAMINKERNEL = ../arm/iamin.c
74+
#IDAMINKERNEL = ../arm/iamin.c
75+
#ICAMINKERNEL = ../arm/izamin.c
76+
#IZAMINKERNEL = ../arm/izamin.c
4577

4678
SCOPYKERNEL = copy_lsx.S
4779
DCOPYKERNEL = copy_lsx.S
4880
CCOPYKERNEL = ccopy_lsx.S
4981
ZCOPYKERNEL = ccopy_lsx.S
82+
#SCOPYKERNEL = ../arm/copy.c
83+
#DCOPYKERNEL = ../arm/copy.c
84+
#CCOPYKERNEL = ../arm/zcopy.c
85+
#ZCOPYKERNEL = ../arm/zcopy.c
5086

5187
SSWAPKERNEL = swap_lsx.S
5288
DSWAPKERNEL = swap_lsx.S
89+
#SSWAPKERNEL = ../arm/swap.c
90+
#DSWAPKERNEL = ../arm/swap.c
91+
92+
#####################################
93+
94+
#SAXPYKERNEL = axpy_lsx.S
95+
#DAXPYKERNEL = axpy_lsx.S
96+
#CAXPYKERNEL = caxpy_lsx.S
97+
#ZAXPYKERNEL = caxpy_lsx.S
98+
SAXPYKERNEL = ../arm/axpy.c
99+
DAXPYKERNEL = ../arm/axpy.c
100+
CAXPYKERNEL = ../arm/zaxpy.c
101+
ZAXPYKERNEL = ../arm/zaxpy.c
53102

54-
SAXPYKERNEL = axpy_lsx.S
55-
DAXPYKERNEL = axpy_lsx.S
56-
CAXPYKERNEL = caxpy_lsx.S
57-
ZAXPYKERNEL = caxpy_lsx.S
58103

59104
SAXPBYKERNEL = axpby_lsx.S
60105
DAXPBYKERNEL = axpby_lsx.S
61106
CAXPBYKERNEL = caxpby_lsx.S
62107
ZAXPBYKERNEL = caxpby_lsx.S
63108

64-
SSUMKERNEL = sum_lsx.S
65-
DSUMKERNEL = sum_lsx.S
109+
#SSUMKERNEL = sum_lsx.S
110+
#DSUMKERNEL = sum_lsx.S
111+
SSUMKERNEL = ../arm/sum.c
112+
DSUMKERNEL = ../arm/sum.c
113+
66114

67-
SASUMKERNEL = asum_lsx.S
68-
DASUMKERNEL = asum_lsx.S
69-
CASUMKERNEL = casum_lsx.S
70-
ZASUMKERNEL = casum_lsx.S
115+
#SASUMKERNEL = asum_lsx.S
116+
#DASUMKERNEL = asum_lsx.S
117+
#CASUMKERNEL = casum_lsx.S
118+
#ZASUMKERNEL = casum_lsx.S
119+
SASUMKERNEL = ../arm/asum.c
120+
DASUMKERNEL = ../arm/asum.c
121+
CASUMKERNEL = ../arm/zasum.c
122+
ZASUMKERNEL = ../arm/zasum.c
71123

72124
SROTKERNEL = rot_lsx.S
73125
DROTKERNEL = rot_lsx.S
74126
CROTKERNEL = crot_lsx.S
75127
ZROTKERNEL = crot_lsx.S
76-
77-
SNRM2KERNEL = snrm2_lsx.S
78-
DNRM2KERNEL = dnrm2_lsx.S
79-
CNRM2KERNEL = cnrm2_lsx.S
80-
ZNRM2KERNEL = znrm2_lsx.S
81-
82-
CSWAPKERNEL = cswap_lsx.S
83-
ZSWAPKERNEL = cswap_lsx.S
84-
85-
CSUMKERNEL = csum_lsx.S
86-
ZSUMKERNEL = csum_lsx.S
87-
88-
SGEMVNKERNEL = sgemv_n_lsx.S
89-
SGEMVTKERNEL = sgemv_t_lsx.S
90-
91-
SSYMV_U_KERNEL = ssymv_U_lsx.S
92-
SSYMV_L_KERNEL = ssymv_L_lsx.S
93-
94-
DGEMVNKERNEL = dgemv_n_lsx.S
95-
DGEMVTKERNEL = dgemv_t_lsx.S
96-
97-
DSYMV_U_KERNEL = dsymv_U_lsx.S
98-
DSYMV_L_KERNEL = dsymv_L_lsx.S
99-
100-
DGEMMKERNEL = dgemm_kernel_8x4.S
101-
DGEMMINCOPY = dgemm_ncopy_8_lsx.S
102-
DGEMMITCOPY = dgemm_tcopy_8_lsx.S
103-
DGEMMONCOPY = dgemm_ncopy_4_lsx.S
104-
DGEMMOTCOPY = dgemm_tcopy_4_lsx.S
105-
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
106-
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
107-
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
108-
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
128+
#SROTKERNEL = ../arm/rot.c
129+
#DROTKERNEL = ../arm/rot.c
130+
#CROTKERNEL = ../arm/zrot.c
131+
#ZROTKERNEL = ../arm/zrot.c
132+
133+
#SNRM2KERNEL = snrm2_lsx.S
134+
#DNRM2KERNEL = dnrm2_lsx.S
135+
#CNRM2KERNEL = cnrm2_lsx.S
136+
#ZNRM2KERNEL = znrm2_lsx.S
137+
SNRM2KERNEL = ../arm/nrm2.c
138+
DNRM2KERNEL = ../arm/nrm2.c
139+
CNRM2KERNEL = ../arm/znrm2.c
140+
ZNRM2KERNEL = ../arm/znrm2.c
141+
142+
143+
#######################################
144+
#CSWAPKERNEL = cswap_lsx.S
145+
#ZSWAPKERNEL = cswap_lsx.S
146+
CSWAPKERNEL = ../arm/zswap.c
147+
ZSWAPKERNEL = ../arm/zswap.c
148+
149+
#CSUMKERNEL = csum_lsx.S
150+
#ZSUMKERNEL = csum_lsx.S
151+
CSUMKERNEL = ../arm/zsum.c
152+
ZSUMKERNEL = ../arm/zsum.c
153+
154+
#SGEMVNKERNEL = sgemv_n_lsx.S
155+
#SGEMVTKERNEL = sgemv_t_lsx.S
156+
SGEMVNKERNEL = ../arm/gemv_n.c
157+
SGEMVTKERNEL = ../arm/gemv_t.c
158+
159+
#SSYMV_U_KERNEL = ssymv_U_lsx.S
160+
#SSYMV_L_KERNEL = ssymv_L_lsx.S
161+
SSYMV_U_KERNEL = ../generic/symv_k.c
162+
SSYMV_L_KERNEL = ../generic/symv_k.c
163+
164+
#DGEMVNKERNEL = dgemv_n_lsx.S
165+
#DGEMVTKERNEL = dgemv_t_lsx.S
166+
DGEMVNKERNEL = ../arm/gemv_n.c
167+
DGEMVTKERNEL = ../arm/gemv_t.c
168+
169+
#DSYMV_U_KERNEL = dsymv_U_lsx.S
170+
#DSYMV_L_KERNEL = dsymv_L_lsx.S
171+
DSYMV_U_KERNEL = ../generic/symv_k.c
172+
DSYMV_L_KERNEL = ../generic/symv_k.c
173+
174+
#DGEMMKERNEL = dgemm_kernel_8x4.S
175+
#DGEMMINCOPY = dgemm_ncopy_8_lsx.S
176+
#DGEMMITCOPY = dgemm_tcopy_8_lsx.S
177+
#DGEMMONCOPY = dgemm_ncopy_4_lsx.S
178+
#DGEMMOTCOPY = dgemm_tcopy_4_lsx.S
179+
#DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
180+
#DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
181+
#DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
182+
#DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
109183

110184
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
111185
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
112186
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
113187
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
114188

115-
CGEMVNKERNEL = cgemv_n_4_lsx.S
116-
CGEMVTKERNEL = cgemv_t_4_lsx.S
117-
118-
CGEMMKERNEL = cgemm_kernel_8x4_lsx.S
119-
CGEMMINCOPY = cgemm_ncopy_8_lsx.S
120-
CGEMMITCOPY = cgemm_tcopy_8_lsx.S
121-
CGEMMONCOPY = cgemm_ncopy_4_lsx.S
122-
CGEMMOTCOPY = cgemm_tcopy_4_lsx.S
123-
CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
124-
CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
125-
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
126-
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
189+
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
190+
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
191+
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
192+
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
193+
194+
#CGEMVNKERNEL = cgemv_n_4_lsx.S
195+
#CGEMVTKERNEL = cgemv_t_4_lsx.S
196+
CGEMVTKERNEL = ../arm/zgemv_t.c
197+
CGEMVNKERNEL = ../arm/zgemv_n.c
198+
199+
#CGEMMKERNEL = cgemm_kernel_8x4_lsx.S
200+
#CGEMMINCOPY = cgemm_ncopy_8_lsx.S
201+
#CGEMMITCOPY = cgemm_tcopy_8_lsx.S
202+
#CGEMMONCOPY = cgemm_ncopy_4_lsx.S
203+
#CGEMMOTCOPY = cgemm_tcopy_4_lsx.S
204+
#CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX)
205+
#CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX)
206+
#CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
207+
#CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
127208

128209
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
129210
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
130211
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
131212
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
132213

133-
ZGEMVNKERNEL = zgemv_n_2_lsx.S
134-
ZGEMVTKERNEL = zgemv_t_2_lsx.S
214+
#ZGEMVNKERNEL = zgemv_n_2_lsx.S
215+
#ZGEMVTKERNEL = zgemv_t_2_lsx.S
216+
ZGEMVTKERNEL = ../arm/zgemv_t.c
217+
ZGEMVNKERNEL = ../arm/zgemv_n.c
135218

136-
ZGEMMKERNEL = zgemm_kernel_4x4_lsx.S
137-
ZGEMMONCOPY = zgemm_ncopy_4_lsx.S
138-
ZGEMMOTCOPY = zgemm_tcopy_4_lsx.S
139-
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
140-
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
219+
#ZGEMMKERNEL = zgemm_kernel_4x4_lsx.S
220+
#ZGEMMONCOPY = zgemm_ncopy_4_lsx.S
221+
#ZGEMMOTCOPY = zgemm_tcopy_4_lsx.S
222+
#ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
223+
#ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
141224

142225
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
143226
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
144227
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
145228
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
229+
230+
CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c
231+
ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c
232+
233+
QSYMV_U_KERNEL = ../generic/symv_k.c
234+
QSYMV_L_KERNEL = ../generic/symv_k.c
235+
CSYMV_U_KERNEL = ../generic/zsymv_k.c
236+
CSYMV_L_KERNEL = ../generic/zsymv_k.c
237+
ZSYMV_U_KERNEL = ../generic/zsymv_k.c
238+
ZSYMV_L_KERNEL = ../generic/zsymv_k.c
239+
XSYMV_U_KERNEL = ../generic/zsymv_k.c
240+
XSYMV_L_KERNEL = ../generic/zsymv_k.c
241+
242+
ZHEMV_U_KERNEL = ../generic/zhemv_k.c
243+
ZHEMV_L_KERNEL = ../generic/zhemv_k.c
244+
245+
LSAME_KERNEL = ../generic/lsame.c
246+
SCABS_KERNEL = ../generic/cabs.c
247+
DCABS_KERNEL = ../generic/cabs.c
248+
QCABS_KERNEL = ../generic/cabs.c
249+
250+
251+
ifndef SROTMKERNEL
252+
SROTMKERNEL = ../generic/rotm.c
253+
endif
254+
255+
ifndef DROTMKERNEL
256+
DROTMKERNEL = ../generic/rotm.c
257+
endif
258+
259+
ifndef QROTMKERNEL
260+
QROTMKERNEL = ../generic/rotm.c
261+
endif
262+
263+
STRMMKERNEL = ../generic/trmmkernel_2x2.c
264+
DTRMMKERNEL = ../generic/trmmkernel_2x2.c
265+
CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
266+
ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
267+
268+
SGEMMKERNEL = ../generic/gemmkernel_2x2.c
269+
SGEMMONCOPY = ../generic/gemm_ncopy_2.c
270+
SGEMMOTCOPY = ../generic/gemm_tcopy_2.c
271+
SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
272+
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)
273+
274+
DGEMMKERNEL = ../generic/gemmkernel_2x2.c
275+
DGEMMONCOPY = ../generic/gemm_ncopy_2.c
276+
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
277+
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
278+
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
279+
280+
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c
281+
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c
282+
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
283+
CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX)
284+
CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX)
285+
286+
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c
287+
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c
288+
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
289+
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
290+
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
291+
146292
endif

0 commit comments

Comments
 (0)