1
- SAMAXKERNEL = amax.S
2
- DAMAXKERNEL = amax.S
3
- CAMAXKERNEL = zamax.S
4
- ZAMAXKERNEL = zamax.S
5
-
6
1
SAMINKERNEL = ../arm/amin.c
7
2
DAMINKERNEL = ../arm/amin.c
8
3
CAMINKERNEL = ../arm/zamin.c
@@ -14,11 +9,6 @@ DMAXKERNEL = ../arm/max.c
14
9
SMINKERNEL = ../arm/min.c
15
10
DMINKERNEL = ../arm/min.c
16
11
17
- ISAMAXKERNEL = iamax.S
18
- IDAMAXKERNEL = iamax.S
19
- ICAMAXKERNEL = izamax.S
20
- IZAMAXKERNEL = izamax.S
21
-
22
12
ISAMINKERNEL = ../arm/iamin.c
23
13
IDAMINKERNEL = ../arm/iamin.c
24
14
ICAMINKERNEL = ../arm/izamin.c
@@ -30,33 +20,35 @@ IDMAXKERNEL = ../arm/imax.c
30
20
ISMINKERNEL = ../arm/imin.c
31
21
IDMINKERNEL = ../arm/imin.c
32
22
33
- SASUMKERNEL = asum.S
34
- DASUMKERNEL = asum.S
35
- CASUMKERNEL = casum.S
36
- ZASUMKERNEL = zasum.S
23
+ STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
24
+ STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
25
+ STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
26
+ STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
37
27
38
- SAXPYKERNEL = axpy.S
39
- DAXPYKERNEL = axpy.S
40
- CAXPYKERNEL = zaxpy.S
41
- ZAXPYKERNEL = zaxpy.S
28
+ DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
29
+ DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
30
+ DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
31
+ DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
42
32
43
- SCOPYKERNEL = copy.S
44
- DCOPYKERNEL = copy.S
45
- CCOPYKERNEL = copy.S
46
- ZCOPYKERNEL = copy.S
33
+ CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
34
+ CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
35
+ CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
36
+ CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
47
37
48
- SDOTKERNEL = dot.S
49
- DDOTKERNEL = dot.S
50
- CDOTKERNEL = zdot.S
51
- ZDOTKERNEL = zdot.S
52
- DSDOTKERNEL = dot.S
38
+ ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
39
+ ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
40
+ ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
41
+ ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
53
42
54
- ifneq ($(OS_DARWIN)$(CROSS),11)
55
- SNRM2KERNEL = nrm2.S
56
- DNRM2KERNEL = nrm2.S
57
- CNRM2KERNEL = znrm2.S
58
- ZNRM2KERNEL = znrm2.S
59
- endif
43
+ SAMAXKERNEL = amax.S
44
+ DAMAXKERNEL = amax.S
45
+ CAMAXKERNEL = zamax.S
46
+ ZAMAXKERNEL = zamax.S
47
+
48
+ SAXPYKERNEL = axpy.S
49
+ DAXPYKERNEL = daxpy_thunderx2t99.S
50
+ CAXPYKERNEL = zaxpy.S
51
+ ZAXPYKERNEL = zaxpy.S
60
52
61
53
SROTKERNEL = rot.S
62
54
DROTKERNEL = rot.S
@@ -68,11 +60,6 @@ DSCALKERNEL = scal.S
68
60
CSCALKERNEL = zscal.S
69
61
ZSCALKERNEL = zscal.S
70
62
71
- SSWAPKERNEL = swap.S
72
- DSWAPKERNEL = swap.S
73
- CSWAPKERNEL = swap.S
74
- ZSWAPKERNEL = swap.S
75
-
76
63
SGEMVNKERNEL = gemv_n.S
77
64
DGEMVNKERNEL = gemv_n.S
78
65
CGEMVNKERNEL = zgemv_n.S
@@ -83,18 +70,137 @@ DGEMVTKERNEL = gemv_t.S
83
70
CGEMVTKERNEL = zgemv_t.S
84
71
ZGEMVTKERNEL = zgemv_t.S
85
72
86
- STRMMKERNEL = ../generic/trmmkernel_4x4.c
73
+
74
+ SASUMKERNEL = sasum_thunderx2t99.c
75
+ DASUMKERNEL = dasum_thunderx2t99.c
76
+ CASUMKERNEL = casum_thunderx2t99.c
77
+ ZASUMKERNEL = zasum_thunderx2t99.c
78
+
79
+ SCOPYKERNEL = copy_thunderx2t99.c
80
+ DCOPYKERNEL = copy_thunderx2t99.c
81
+ CCOPYKERNEL = copy_thunderx2t99.c
82
+ ZCOPYKERNEL = copy_thunderx2t99.c
83
+
84
+ SSWAPKERNEL = swap_thunderx2t99.S
85
+ DSWAPKERNEL = swap_thunderx2t99.S
86
+ CSWAPKERNEL = swap_thunderx2t99.S
87
+ ZSWAPKERNEL = swap_thunderx2t99.S
88
+
89
+ ISAMAXKERNEL = iamax_thunderx2t99.c
90
+ IDAMAXKERNEL = iamax_thunderx2t99.c
91
+ ICAMAXKERNEL = izamax_thunderx2t99.c
92
+ IZAMAXKERNEL = izamax_thunderx2t99.c
93
+
94
+ ifneq ($(OS_DARWIN)$(CROSS),11)
95
+ SNRM2KERNEL = scnrm2_thunderx2t99.c
96
+ CNRM2KERNEL = scnrm2_thunderx2t99.c
97
+ #DNRM2KERNEL = dznrm2_thunderx2t99_fast.c
98
+ #ZNRM2KERNEL = dznrm2_thunderx2t99_fast.c
99
+ DNRM2KERNEL = dznrm2_thunderx2t99.c
100
+ ZNRM2KERNEL = dznrm2_thunderx2t99.c
101
+ endif
102
+
103
+ DDOTKERNEL = dot_thunderx2t99.c
104
+ SDOTKERNEL = dot_thunderx2t99.c
105
+ CDOTKERNEL = zdot_thunderx2t99.c
106
+ ZDOTKERNEL = zdot_thunderx2t99.c
107
+ DSDOTKERNEL = dot.S
108
+
109
+ ifneq ($(OS_DARWIN)$(CROSS),11)
110
+
111
+ SGEMMKERNEL = sgemm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
112
+ STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
113
+ ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N))
114
+ SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c
115
+ SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c
116
+ SGEMMINCOPYOBJ = sgemm_incopy.o
117
+ SGEMMITCOPYOBJ = sgemm_itcopy.o
118
+ endif
119
+ SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c
120
+ SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c
121
+ SGEMMONCOPYOBJ = sgemm_oncopy.o
122
+ SGEMMOTCOPYOBJ = sgemm_otcopy.o
123
+
124
+ DGEMMKERNEL = dgemm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S
125
+ DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S
126
+
127
+ ifneq ($(DGEMM_UNROLL_M), $(DGEMM_UNROLL_N))
128
+
129
+ ifeq ($(DGEMM_UNROLL_M), 8)
130
+ DGEMMINCOPY = dgemm_ncopy_$(DGEMM_UNROLL_M).S
131
+ DGEMMITCOPY = dgemm_tcopy_$(DGEMM_UNROLL_M).S
132
+ else
133
+ DGEMMINCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_M).c
134
+ DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c
135
+ endif
136
+
137
+ DGEMMINCOPYOBJ = dgemm_incopy.o
138
+ DGEMMITCOPYOBJ = dgemm_itcopy.o
139
+ endif
140
+
141
+ ifeq ($(DGEMM_UNROLL_N), 4)
142
+ DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S
143
+ DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S
144
+ else
145
+ DGEMMONCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_N).c
146
+ DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c
147
+ endif
148
+
149
+ DGEMMONCOPYOBJ = dgemm_oncopy.o
150
+ DGEMMOTCOPYOBJ = dgemm_otcopy.o
151
+
152
+ CGEMMKERNEL = cgemm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S
153
+ CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S
154
+ ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N))
155
+ CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c
156
+ CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c
157
+ CGEMMINCOPYOBJ = cgemm_incopy.o
158
+ CGEMMITCOPYOBJ = cgemm_itcopy.o
159
+ endif
160
+ CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c
161
+ CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c
162
+ CGEMMONCOPYOBJ = cgemm_oncopy.o
163
+ CGEMMOTCOPYOBJ = cgemm_otcopy.o
164
+
165
+ ZGEMMKERNEL = zgemm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S
166
+ ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S
167
+ ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N))
168
+ ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c
169
+ ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c
170
+ ZGEMMINCOPYOBJ = zgemm_incopy.o
171
+ ZGEMMITCOPYOBJ = zgemm_itcopy.o
172
+ endif
173
+ ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c
174
+ ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
175
+ ZGEMMONCOPYOBJ = zgemm_oncopy.o
176
+ ZGEMMOTCOPYOBJ = zgemm_otcopy.o
177
+
178
+ ifeq ($(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N), 8x4)
179
+ DGEMMKERNEL = dgemm_kernel_8x4_thunderx2t99.S
180
+ endif
181
+
182
+ ifeq ($(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N), 16x4)
183
+ SGEMMKERNEL = sgemm_kernel_16x4_thunderx2t99.S
184
+ endif
185
+
186
+ ifeq ($(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N), 8x4)
187
+ CGEMMKERNEL = cgemm_kernel_8x4_thunderx2t99.S
188
+ endif
189
+
190
+ ifeq ($(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N), 4x4)
191
+ ZGEMMKERNEL = zgemm_kernel_4x4_thunderx2t99.S
192
+ endif
193
+
194
+ else
195
+
196
+ STRMMKERNEL = ../generic/trmmkernel_2x2.c
87
197
DTRMMKERNEL = ../generic/trmmkernel_2x2.c
88
198
CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
89
199
ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c
90
200
91
- ifneq ($(OS_DARWIN)$(CROSS),11)
92
- SGEMMKERNEL = sgemm_kernel_4x4.S
93
- else
94
201
SGEMMKERNEL = ../generic/gemmkernel_2x2.c
95
- endif
96
- SGEMMONCOPY = ../generic/gemm_ncopy_4.c
97
- SGEMMOTCOPY = ../generic/gemm_tcopy_4.c
202
+ SGEMMONCOPY = ../generic/gemm_ncopy_2.c
203
+ SGEMMOTCOPY = ../generic/gemm_tcopy_2.c
98
204
SGEMMONCOPYOBJ = sgemm_oncopy.o
99
205
SGEMMOTCOPYOBJ = sgemm_otcopy.o
100
206
@@ -116,26 +222,4 @@ ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c
116
222
ZGEMMONCOPYOBJ = zgemm_oncopy.o
117
223
ZGEMMOTCOPYOBJ = zgemm_otcopy.o
118
224
119
- STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
120
- STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
121
- STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
122
- STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
123
-
124
- DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
125
- DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
126
- DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
127
- DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
128
-
129
- CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
130
- CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
131
- CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
132
- CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
133
-
134
- ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
135
- ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
136
- ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
137
- ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c
138
-
139
-
140
-
141
-
225
+ endif
0 commit comments