Skip to content

Commit f088374

Browse files
authored
Merge pull request #96 from xianyi/develop
rebase
2 parents e396ec8 + 1c0b03e commit f088374

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+2676
-934
lines changed

Makefile.system

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ ifndef TOPDIR
99
TOPDIR = .
1010
endif
1111

12-
# If ARCH is not set, we use the host system's architecture for getarch compile options.
12+
# If ARCH is not set, we use the host system's architecture for getarch compile options.
1313
ifndef ARCH
1414
HOSTARCH := $(shell uname -m)
1515
else
@@ -73,6 +73,18 @@ endif
7373
#
7474
# Beginning of system configuration
7575
#
76+
ifneq ($(BUILD_SINGLE),1)
77+
ifneq ($(BUILD_DOUBLE),1)
78+
ifneq ($(BUILD_COMPLEX),1)
79+
ifneq ($(BUILD_COMPLEX16),1)
80+
override BUILD_SINGLE=1
81+
override BUILD_DOUBLE=1
82+
override BUILD_COMPLEX=1
83+
override BUILD_COMPLEX16=1
84+
endif
85+
endif
86+
endif
87+
endif
7688

7789
ifndef HOSTCC
7890
HOSTCC = $(CC)
@@ -1224,16 +1236,16 @@ ifeq ($(BUILD_HALF), 1)
12241236
CCOMMON_OPT += -DBUILD_HALF
12251237
endif
12261238
ifeq ($(BUILD_SINGLE), 1)
1227-
CCOMMON_OPT += -DBUILD_SINGLE
1239+
CCOMMON_OPT += -DBUILD_SINGLE=1
12281240
endif
12291241
ifeq ($(BUILD_DOUBLE), 1)
1230-
CCOMMON_OPT += -DBUILD_DOUBLE
1242+
CCOMMON_OPT += -DBUILD_DOUBLE=1
12311243
endif
12321244
ifeq ($(BUILD_COMPLEX), 1)
1233-
CCOMMON_OPT += -DBUILD_COMPLEX
1245+
CCOMMON_OPT += -DBUILD_COMPLEX=1
12341246
endif
12351247
ifeq ($(BUILD_COMPLEX16), 1)
1236-
CCOMMON_OPT += -DBUILD_COMPLEX16
1248+
CCOMMON_OPT += -DBUILD_COMPLEX16=1
12371249
endif
12381250

12391251
CCOMMON_OPT += -DVERSION=\"$(VERSION)\"

Makefile.x86_64

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,11 @@ endif
99
endif
1010

1111
ifdef HAVE_SSE3
12+
ifndef DYNAMIC_ARCH
1213
CCOMMON_OPT += -msse3
1314
FCOMMON_OPT += -msse3
1415
endif
16+
endif
1517

1618
ifeq ($(CORE), SKYLAKEX)
1719
ifndef DYNAMIC_ARCH

common_param.h

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG);
167167
float (*snrm2_k) (BLASLONG, float *, BLASLONG);
168168
float (*sasum_k) (BLASLONG, float *, BLASLONG);
169169
#endif
170+
170171
#if BUILD_SINGLE
171172
float (*ssum_k) (BLASLONG, float *, BLASLONG);
172173
#endif
@@ -188,13 +189,15 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG);
188189
int (*sgemv_n) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
189190
int (*sgemv_t) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
190191
#endif
191-
#if BUILD_SINGLE
192+
193+
#if BUILD_SINGLE
192194
int (*sger_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
193195

194196
int (*ssymv_L) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
195197
int (*ssymv_U) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
196198
#endif
197199

200+
198201
#if (BUILD_SINGLE) || (BUILD_DOUBLE) || (BUILD_COMPLEX)
199202
#ifdef ARCH_X86_64
200203
void (*sgemm_direct) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG , float *, BLASLONG , float * , BLASLONG);
@@ -210,6 +213,7 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG);
210213
int (*sgemm_oncopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *);
211214
int (*sgemm_otcopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *);
212215
#endif
216+
213217
#if (BUILD_SINGLE) || (BUILD_DOUBLE)
214218
int (*strsm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
215219
int (*strsm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG);
@@ -304,12 +308,14 @@ BLASLONG (*idmin_k) (BLASLONG, double *, BLASLONG);
304308
int (*dgemv_n) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
305309
int (*dgemv_t) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
306310
#endif
311+
307312
#if BUILD_DOUBLE
308313
int (*dger_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
309314

310315
int (*dsymv_L) (BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
311316
int (*dsymv_U) (BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
312317
#endif
318+
313319
#if (BUILD_DOUBLE) || (BUILD_COMPLEX16)
314320
int (*dgemm_kernel )(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG);
315321
int (*dgemm_beta )(BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
@@ -319,6 +325,7 @@ BLASLONG (*idmin_k) (BLASLONG, double *, BLASLONG);
319325
int (*dgemm_oncopy )(BLASLONG, BLASLONG, double *, BLASLONG, double *);
320326
int (*dgemm_otcopy )(BLASLONG, BLASLONG, double *, BLASLONG, double *);
321327
#endif
328+
322329
#if BUILD_DOUBLE
323330
int (*dtrsm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
324331
int (*dtrsm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, double, double *, double *, double *, BLASLONG, BLASLONG);
@@ -466,6 +473,7 @@ BLASLONG (*iqmin_k) (BLASLONG, xdouble *, BLASLONG);
466473

467474
#endif
468475

476+
469477
#if (BUILD_COMPLEX) || (BUILD_COMPLEX16)
470478
int cgemm_p, cgemm_q, cgemm_r;
471479
int cgemm_unroll_m, cgemm_unroll_n, cgemm_unroll_mn;
@@ -644,6 +652,7 @@ BLASLONG (*icamin_k)(BLASLONG, float *, BLASLONG);
644652
int (*claswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG, blasint *, float *);
645653
#endif
646654

655+
647656
#if BUILD_COMPLEX16
648657
int zgemm_p, zgemm_q, zgemm_r;
649658
int zgemm_unroll_m, zgemm_unroll_n, zgemm_unroll_mn;
@@ -982,6 +991,7 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
982991
void (*init)(void);
983992

984993
int snum_opt, dnum_opt, qnum_opt;
994+
985995
#if BUILD_SINGLE
986996
int (*saxpby_k) (BLASLONG, float, float*, BLASLONG,float, float*, BLASLONG);
987997
#endif
@@ -995,14 +1005,14 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
9951005
int (*zaxpby_k) (BLASLONG, double, double, double*, BLASLONG,double,double, double*, BLASLONG);
9961006
#endif
9971007

998-
#if BUILD_SINGLE
1008+
#if BUILD_SINGLE
9991009
int (*somatcopy_k_cn) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
10001010
int (*somatcopy_k_ct) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
10011011
int (*somatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
10021012
int (*somatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG, float*, BLASLONG);
10031013
#endif
10041014

1005-
#if BUILD_DOUBLE
1015+
#if BUILD_DOUBLE
10061016
int (*domatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
10071017
int (*domatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);
10081018
int (*domatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG, double*, BLASLONG);

ctest/Makefile

Lines changed: 115 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,56 +46,155 @@ else
4646
all :: all1 all2 all3
4747
endif
4848

49-
all1: xscblat1 xdcblat1 xccblat1 xzcblat1
49+
ifeq ($(BUILD_SINGLE),1)
50+
all1targets += xscblat1
51+
endif
52+
ifeq ($(BUILD_DOUBLE),1)
53+
all1targets += xdcblat1
54+
endif
55+
ifeq ($(BUILD_COMPLEX),1)
56+
all1targets += xccblat1
57+
endif
58+
ifeq ($(BUILD_COMPLEX16),1)
59+
all1targets += xzcblat1
60+
endif
61+
62+
all1: $(all1targets)
63+
5064
ifndef CROSS
5165
ifeq ($(USE_OPENMP), 1)
66+
ifeq ($(BUILD_SINGLE),1)
5267
OMP_NUM_THREADS=2 ./xscblat1
68+
endif
69+
ifeq ($(BUILD_DOUBLE),1)
5370
OMP_NUM_THREADS=2 ./xdcblat1
71+
endif
72+
ifeq ($(BUILD_COMPLEX),1)
5473
OMP_NUM_THREADS=2 ./xccblat1
74+
endif
75+
ifeq ($(BUILD_COMPLEX16),1)
5576
OMP_NUM_THREADS=2 ./xzcblat1
77+
endif
5678
else
79+
ifeq ($(BUILD_SINGLE),1)
5780
OPENBLAS_NUM_THREADS=2 ./xscblat1
81+
endif
82+
ifeq ($(BUILD_DOUBLE),1)
5883
OPENBLAS_NUM_THREADS=2 ./xdcblat1
84+
endif
85+
ifeq ($(BUILD_COMPLEX),1)
5986
OPENBLAS_NUM_THREADS=2 ./xccblat1
87+
endif
88+
ifeq ($(BUILD_COMPLEX16),1)
6089
OPENBLAS_NUM_THREADS=2 ./xzcblat1
6190
endif
6291
endif
92+
endif
93+
94+
ifeq ($(BUILD_SINGLE),1)
95+
all2targets += xscblat2
96+
endif
97+
ifeq ($(BUILD_DOUBLE),1)
98+
all2targets += xdcblat2
99+
endif
100+
ifeq ($(BUILD_COMPLEX),1)
101+
all2targets += xccblat2
102+
endif
103+
ifeq ($(BUILD_COMPLEX16),1)
104+
all2targets += xzcblat2
105+
endif
106+
107+
all2: $(all2targets)
63108

64-
all2: xscblat2 xdcblat2 xccblat2 xzcblat2
65109
ifndef CROSS
66110
ifeq ($(USE_OPENMP), 1)
111+
ifeq ($(BUILD_SINGLE),1)
67112
OMP_NUM_THREADS=2 ./xscblat2 < sin2
113+
endif
114+
ifeq ($(BUILD_DOUBLE),1)
68115
OMP_NUM_THREADS=2 ./xdcblat2 < din2
116+
endif
117+
ifeq ($(BUILD_COMPLEX),1)
69118
OMP_NUM_THREADS=2 ./xccblat2 < cin2
119+
endif
120+
ifeq ($(BUILD_COMPLEX16),1)
70121
OMP_NUM_THREADS=2 ./xzcblat2 < zin2
122+
endif
71123
else
124+
ifeq ($(BUILD_SINGLE),1)
72125
OPENBLAS_NUM_THREADS=2 ./xscblat2 < sin2
126+
endif
127+
ifeq ($(BUILD_DOUBLE),1)
73128
OPENBLAS_NUM_THREADS=2 ./xdcblat2 < din2
129+
endif
130+
ifeq ($(BUILD_COMPLEX),1)
74131
OPENBLAS_NUM_THREADS=2 ./xccblat2 < cin2
132+
endif
133+
ifeq ($(BUILD_COMPLEX16),1)
75134
OPENBLAS_NUM_THREADS=2 ./xzcblat2 < zin2
76135
endif
77136
endif
137+
endif
138+
139+
140+
ifeq ($(BUILD_SINGLE),1)
141+
all3targets += xscblat3
142+
endif
143+
ifeq ($(BUILD_DOUBLE),1)
144+
all3targets += xdcblat3
145+
endif
146+
ifeq ($(BUILD_COMPLEX),1)
147+
all3targets += xccblat3
148+
endif
149+
ifeq ($(BUILD_COMPLEX16),1)
150+
all3targets += xzcblat3
151+
endif
152+
153+
all3: $(all3targets)
78154

79-
all3: xscblat3 xdcblat3 xccblat3 xzcblat3
80155
ifndef CROSS
81156
ifeq ($(USE_OPENMP), 1)
157+
ifeq ($(BUILD_SINGLE),1)
82158
OMP_NUM_THREADS=2 ./xscblat3 < sin3
159+
endif
160+
ifeq ($(BUILD_DOUBLE),1)
83161
OMP_NUM_THREADS=2 ./xdcblat3 < din3
162+
endif
163+
ifeq ($(BUILD_COMPLEX),1)
84164
OMP_NUM_THREADS=2 ./xccblat3 < cin3
165+
endif
166+
ifeq ($(BUILD_COMPLEX16),1)
85167
OMP_NUM_THREADS=2 ./xzcblat3 < zin3
168+
endif
86169
else
170+
ifeq ($(BUILD_SINGLE),1)
87171
OPENBLAS_NUM_THREADS=2 ./xscblat3 < sin3
172+
endif
173+
ifeq ($(BUILD_DOUBLE),1)
88174
OPENBLAS_NUM_THREADS=2 ./xdcblat3 < din3
175+
endif
176+
ifeq ($(BUILD_COMPLEX),1)
89177
OPENBLAS_NUM_THREADS=2 ./xccblat3 < cin3
178+
endif
179+
ifeq ($(BUILD_COMPLEX16),1)
90180
OPENBLAS_NUM_THREADS=2 ./xzcblat3 < zin3
91181
endif
182+
endif
183+
endif
92184

93185
all3_3m: xzcblat3_3m xccblat3_3m
94186
ifeq ($(USE_OPENMP), 1)
187+
ifeq ($(BUILD_SINGLE),1)
95188
OMP_NUM_THREADS=2 ./xccblat3_3m < cin3_3m
189+
endif
190+
ifeq ($(BUILD_COMPLEX16),1)
96191
OMP_NUM_THREADS=2 ./xzcblat3_3m < zin3_3m
192+
endif
97193
else
194+
ifeq ($(BUILD_COMPLEX),1)
98195
OPENBLAS_NUM_THREADS=2 ./xccblat3_3m < cin3_3m
196+
endif
197+
ifeq ($(BUILD_COMPLEX16),1)
99198
OPENBLAS_NUM_THREADS=2 ./xzcblat3_3m < zin3_3m
100199
endif
101200
endif
@@ -115,21 +214,30 @@ endif
115214
endif
116215
endif
117216

217+
ifeq ($(BUILD_SINGLE),1)
118218
# Single real
119219
xscblat1: $(stestl1o) c_sblat1.o $(TOPDIR)/$(LIBNAME)
120220
$(FC) $(FLDFLAGS) -o xscblat1 c_sblat1.o $(stestl1o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
221+
121222
xscblat2: $(stestl2o) c_sblat2.o $(TOPDIR)/$(LIBNAME)
122223
$(FC) $(FLDFLAGS) -o xscblat2 c_sblat2.o $(stestl2o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
224+
123225
xscblat3: $(stestl3o) c_sblat3.o $(TOPDIR)/$(LIBNAME)
124226
$(FC) $(FLDFLAGS) -o xscblat3 c_sblat3.o $(stestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
227+
endif
228+
229+
ifeq ($(BUILD_DOUBLE),1)
125230
# Double real
126231
xdcblat1: $(dtestl1o) c_dblat1.o $(TOPDIR)/$(LIBNAME)
127232
$(FC) $(FLDFLAGS) -o xdcblat1 c_dblat1.o $(dtestl1o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
128233
xdcblat2: $(dtestl2o) c_dblat2.o $(TOPDIR)/$(LIBNAME)
129234
$(FC) $(FLDFLAGS) -o xdcblat2 c_dblat2.o $(dtestl2o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
130235
xdcblat3: $(dtestl3o) c_dblat3.o $(TOPDIR)/$(LIBNAME)
131236
$(FC) $(FLDFLAGS) -o xdcblat3 c_dblat3.o $(dtestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
237+
endif
132238

239+
240+
ifeq ($(BUILD_COMPLEX),1)
133241
# Single complex
134242
xccblat1: $(ctestl1o) c_cblat1.o $(TOPDIR)/$(LIBNAME)
135243
$(FC) $(FLDFLAGS) -o xccblat1 c_cblat1.o $(ctestl1o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
@@ -140,7 +248,10 @@ xccblat3: $(ctestl3o) c_cblat3.o $(TOPDIR)/$(LIBNAME)
140248

141249
xccblat3_3m: $(ctestl3o_3m) c_cblat3_3m.o $(TOPDIR)/$(LIBNAME)
142250
$(FC) $(FLDFLAGS) -o xccblat3_3m c_cblat3_3m.o $(ctestl3o_3m) $(LIB) $(EXTRALIB) $(CEXTRALIB)
251+
endif
143252

253+
254+
ifeq ($(BUILD_COMPLEX16),1)
144255
# Double complex
145256
xzcblat1: $(ztestl1o) c_zblat1.o $(TOPDIR)/$(LIBNAME)
146257
$(FC) $(FLDFLAGS) -o xzcblat1 c_zblat1.o $(ztestl1o) $(LIB) $(EXTRALIB) $(CEXTRALIB)
@@ -152,6 +263,6 @@ xzcblat3: $(ztestl3o) c_zblat3.o $(TOPDIR)/$(LIBNAME)
152263

153264
xzcblat3_3m: $(ztestl3o_3m) c_zblat3_3m.o $(TOPDIR)/$(LIBNAME)
154265
$(FC) $(FLDFLAGS) -o xzcblat3_3m c_zblat3_3m.o $(ztestl3o_3m) $(LIB) $(EXTRALIB) $(CEXTRALIB)
155-
266+
endif
156267

157268
include $(TOPDIR)/Makefile.tail

driver/level2/CMakeLists.txt

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,19 @@ foreach (float_type ${FLOAT_TYPES})
197197
endif ()
198198
endforeach ()
199199

200+
if ( BUILD_COMPLEX AND NOT BUILD_SINGLE)
201+
if (USE_THREAD)
202+
GenerateNamedObjects("gemv_thread.c" "" "gemv_thread_n" false "" "" false "SINGLE")
203+
GenerateNamedObjects("gemv_thread.c" "TRANSA" "gemv_thread_t" false "" "" false "SINGLE")
204+
endif ()
205+
endif ()
206+
if ( BUILD_COMPLEX16 AND NOT BUILD_DOUBLE)
207+
if (USE_THREAD)
208+
GenerateNamedObjects("gemv_thread.c" "" "gemv_thread_n" false "" "" false "DOUBLE")
209+
GenerateNamedObjects("gemv_thread.c" "TRANSA" "gemv_thread_t" false "" "" false "DOUBLE")
210+
endif ()
211+
endif ()
212+
200213
if (USE_THREAD)
201214
GenerateCombinationObjects("${UL_SMP_SOURCES}" "LOWER" "U" "" 2)
202215
endif ()

0 commit comments

Comments
 (0)