Skip to content

Commit 5dd14e3

Browse files
authored
Make building the bfloat16 functions conditional on option BUILD_HALF (#2590)
* make building the bfloat16 BLAS functions conditional on BUILD_HALF * pass the BUILD_HALF option to gensymbol * Pass BUILD_HALF as a compiler define for dynamic_arch builds
1 parent a54e35e commit 5dd14e3

File tree

12 files changed

+120
-25
lines changed

12 files changed

+120
-25
lines changed

CMakeLists.txt

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,10 +86,13 @@ if (NOT NO_LAPACK)
8686
list(APPEND SUBDIRS lapack)
8787
endif ()
8888

89+
if (NOT DEFINED BUILD_HALF)
90+
set (BUILD_HALF false)
91+
endif ()
8992
# set which float types we want to build for
9093
if (NOT DEFINED BUILD_SINGLE AND NOT DEFINED BUILD_DOUBLE AND NOT DEFINED BUILD_COMPLEX AND NOT DEFINED BUILD_COMPLEX16)
9194
# if none are defined, build for all
92-
set(BUILD_HALF true)
95+
# set(BUILD_HALF true)
9396
set(BUILD_SINGLE true)
9497
set(BUILD_DOUBLE true)
9598
set(BUILD_COMPLEX true)
@@ -121,7 +124,7 @@ if (BUILD_COMPLEX16)
121124
list(APPEND FLOAT_TYPES "ZCOMPLEX") # defines COMPLEX and DOUBLE
122125
endif ()
123126

124-
if (BUILD_SINGLE OR BUILD_HALF)
127+
if (BUILD_HALF)
125128
message(STATUS "Building Half Precision")
126129
list(APPEND FLOAT_TYPES "HALF") # defines nothing
127130
endif ()

Makefile.rule

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,9 @@ COMMON_PROF = -pg
273273
#
274274
# CPP_THREAD_SAFETY_TEST = 1
275275

276+
277+
# If you want to enable the experimental BFLOAT16 support
278+
# BUILD_HALF = 1
276279
#
277280
# End of user configuration
278281
#

Makefile.system

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1124,6 +1124,10 @@ ifeq ($(USE_TLS), 1)
11241124
CCOMMON_OPT += -DUSE_TLS
11251125
endif
11261126

1127+
ifeq ($(BUILD_HALF), 1)
1128+
CCOMMON_OPT += -DBUILD_HALF
1129+
endif
1130+
11271131
CCOMMON_OPT += -DVERSION=\"$(VERSION)\"
11281132

11291133
ifndef SYMBOLPREFIX
@@ -1395,6 +1399,7 @@ export KERNELDIR
13951399
export FUNCTION_PROFILE
13961400
export TARGET_CORE
13971401
export NO_AVX512
1402+
export BUILD_HALF
13981403

13991404
export SHGEMM_UNROLL_M
14001405
export SHGEMM_UNROLL_N

cmake/kernel.cmake

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ macro(SetDefaultL1)
113113
set(ZSUMKERNEL zsum.S)
114114
set(QSUMKERNEL sum.S)
115115
set(XSUMKERNEL zsum.S)
116+
if (BUILD_HALF)
116117
set(SHAMINKERNEL ../arm/amin.c)
117118
set(SHAMAXKERNEL ../arm/amax.c)
118119
set(SHMAXKERNEL ../arm/max.c)
@@ -131,6 +132,7 @@ macro(SetDefaultL1)
131132
set(SHNRM2KERNEL ../arm/nrm2.c)
132133
set(SHSUMKERNEL ../arm/sum.c)
133134
set(SHSWAPKERNEL ../arm/swap.c)
135+
endif ()
134136
endmacro ()
135137

136138
macro(SetDefaultL2)
@@ -179,17 +181,19 @@ macro(SetDefaultL2)
179181
set(XHEMV_L_KERNEL ../generic/zhemv_k.c)
180182
set(XHEMV_V_KERNEL ../generic/zhemv_k.c)
181183
set(XHEMV_M_KERNEL ../generic/zhemv_k.c)
184+
if (BUILD_HALF)
182185
set(SHGEMVNKERNEL ../arm/gemv_n.c)
183186
set(SHGEMVTKERNEL ../arm/gemv_t.c)
184187
set(SHGERKERNEL ../generic/ger.c)
185-
188+
endif ()
186189
endmacro ()
187190

188191
macro(SetDefaultL3)
189192
set(SGEADD_KERNEL ../generic/geadd.c)
190193
set(DGEADD_KERNEL ../generic/geadd.c)
191194
set(CGEADD_KERNEL ../generic/zgeadd.c)
192195
set(ZGEADD_KERNEL ../generic/zgeadd.c)
196+
if (BUILD_HALF)
193197
set(SHGEADD_KERNEL ../generic/geadd.c)
194198
set(SHGEMMKERNEL ../generic/gemmkernel_2x2.c)
195199
set(SHGEMM_BETA ../generic/gemm_beta.c)
@@ -201,6 +205,6 @@ macro(SetDefaultL3)
201205
set(SHGEMMITCOPYOBJ shgemm_itcopy.o)
202206
set(SHGEMMONCOPYOBJ shgemm_oncopy.o)
203207
set(SHGEMMOTCOPYOBJ shgemm_otcopy.o)
204-
208+
endif ()
205209

206210
endmacro ()

common_param.h

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ typedef struct {
4747
int dtb_entries;
4848
int offsetA, offsetB, align;
4949

50-
#if 1
50+
#ifdef BUILD_HALF
5151
int shgemm_p, shgemm_q, shgemm_r;
5252
int shgemm_unroll_m, shgemm_unroll_n, shgemm_unroll_mn;
5353

@@ -1002,12 +1002,14 @@ extern gotoblas_t *gotoblas;
10021002

10031003
#define HAVE_EX_L2 gotoblas -> exclusive_cache
10041004

1005+
#ifdef BUILD_HALF
10051006
#define SHGEMM_P gotoblas -> shgemm_p
10061007
#define SHGEMM_Q gotoblas -> shgemm_q
10071008
#define SHGEMM_R gotoblas -> shgemm_r
10081009
#define SHGEMM_UNROLL_M gotoblas -> shgemm_unroll_m
10091010
#define SHGEMM_UNROLL_N gotoblas -> shgemm_unroll_n
10101011
#define SHGEMM_UNROLL_MN gotoblas -> shgemm_unroll_mn
1012+
#endif
10111013

10121014
#define SGEMM_P gotoblas -> sgemm_p
10131015
#define SGEMM_Q gotoblas -> sgemm_q
@@ -1086,6 +1088,7 @@ extern gotoblas_t *gotoblas;
10861088
#define HAVE_EX_L2 0
10871089
#endif
10881090

1091+
#ifdef BUILD_HALF
10891092
#define SHGEMM_P SHGEMM_DEFAULT_P
10901093
#define SHGEMM_Q SHGEMM_DEFAULT_Q
10911094
#define SHGEMM_R SHGEMM_DEFAULT_R
@@ -1096,6 +1099,7 @@ extern gotoblas_t *gotoblas;
10961099
#else
10971100
#define SHGEMM_UNROLL_MN MAX((SHGEMM_UNROLL_M), (SHGEMM_UNROLL_N))
10981101
#endif
1102+
#endif
10991103

11001104
#define SGEMM_P SGEMM_DEFAULT_P
11011105
#define SGEMM_Q SGEMM_DEFAULT_Q
@@ -1330,31 +1334,31 @@ extern gotoblas_t *gotoblas;
13301334
#endif
13311335

13321336
#ifndef SHGEMM_DEFAULT_R
1333-
#define SHGEMM_DEFAULT_R (((BUFFER_SIZE - ((SHGEMM_DEFAULT_P * SHGEMM_DEFAULT_Q * 4 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (SHGEMM_DEFAULT_Q * 4) - 15) & ~15)
1337+
#define SHGEMM_DEFAULT_R (((BUFFER_SIZE - ((SHGEMM_DEFAULT_P * SHGEMM_DEFAULT_Q * 4 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (SHGEMM_DEFAULT_Q * 4) - 15) & ~15UL)
13341338
#endif
13351339

13361340
#ifndef SGEMM_DEFAULT_R
1337-
#define SGEMM_DEFAULT_R (((BUFFER_SIZE - ((SGEMM_DEFAULT_P * SGEMM_DEFAULT_Q * 4 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (SGEMM_DEFAULT_Q * 4) - 15) & ~15)
1341+
#define SGEMM_DEFAULT_R (((BUFFER_SIZE - ((SGEMM_DEFAULT_P * SGEMM_DEFAULT_Q * 4 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (SGEMM_DEFAULT_Q * 4) - 15) & ~15UL)
13381342
#endif
13391343

13401344
#ifndef DGEMM_DEFAULT_R
1341-
#define DGEMM_DEFAULT_R (((BUFFER_SIZE - ((DGEMM_DEFAULT_P * DGEMM_DEFAULT_Q * 8 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (DGEMM_DEFAULT_Q * 8) - 15) & ~15)
1345+
#define DGEMM_DEFAULT_R (((BUFFER_SIZE - ((DGEMM_DEFAULT_P * DGEMM_DEFAULT_Q * 8 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (DGEMM_DEFAULT_Q * 8) - 15) & ~15UL)
13421346
#endif
13431347

13441348
#ifndef QGEMM_DEFAULT_R
1345-
#define QGEMM_DEFAULT_R (((BUFFER_SIZE - ((QGEMM_DEFAULT_P * QGEMM_DEFAULT_Q * 16 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (QGEMM_DEFAULT_Q * 16) - 15) & ~15)
1349+
#define QGEMM_DEFAULT_R (((BUFFER_SIZE - ((QGEMM_DEFAULT_P * QGEMM_DEFAULT_Q * 16 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (QGEMM_DEFAULT_Q * 16) - 15) & ~15UL)
13461350
#endif
13471351

13481352
#ifndef CGEMM_DEFAULT_R
1349-
#define CGEMM_DEFAULT_R (((BUFFER_SIZE - ((CGEMM_DEFAULT_P * CGEMM_DEFAULT_Q * 8 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (CGEMM_DEFAULT_Q * 8) - 15) & ~15)
1353+
#define CGEMM_DEFAULT_R (((BUFFER_SIZE - ((CGEMM_DEFAULT_P * CGEMM_DEFAULT_Q * 8 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (CGEMM_DEFAULT_Q * 8) - 15) & ~15UL)
13501354
#endif
13511355

13521356
#ifndef ZGEMM_DEFAULT_R
1353-
#define ZGEMM_DEFAULT_R (((BUFFER_SIZE - ((ZGEMM_DEFAULT_P * ZGEMM_DEFAULT_Q * 16 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (ZGEMM_DEFAULT_Q * 16) - 15) & ~15)
1357+
#define ZGEMM_DEFAULT_R (((BUFFER_SIZE - ((ZGEMM_DEFAULT_P * ZGEMM_DEFAULT_Q * 16 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (ZGEMM_DEFAULT_Q * 16) - 15) & ~15UL)
13541358
#endif
13551359

13561360
#ifndef XGEMM_DEFAULT_R
1357-
#define XGEMM_DEFAULT_R (((BUFFER_SIZE - ((XGEMM_DEFAULT_P * XGEMM_DEFAULT_Q * 32 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (XGEMM_DEFAULT_Q * 32) - 15) & ~15)
1361+
#define XGEMM_DEFAULT_R (((BUFFER_SIZE - ((XGEMM_DEFAULT_P * XGEMM_DEFAULT_Q * 32 + GEMM_DEFAULT_OFFSET_A + GEMM_DEFAULT_ALIGN) & ~GEMM_DEFAULT_ALIGN)) / (XGEMM_DEFAULT_Q * 32) - 15) & ~15UL)
13581362
#endif
13591363

13601364
#ifndef SNUMOPT

driver/level3/Makefile

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,10 @@ ifeq ($(ARCH), MIPS)
1919
USE_GEMM3M = 1
2020
endif
2121

22+
ifeq ($(BUILD_HALF),1)
2223
SHBLASOBJS += shgemm_nn.$(SUFFIX) shgemm_nt.$(SUFFIX) shgemm_tn.$(SUFFIX) shgemm_tt.$(SUFFIX)
24+
endif
25+
2326
SBLASOBJS += \
2427
sgemm_nn.$(SUFFIX) sgemm_nt.$(SUFFIX) sgemm_tn.$(SUFFIX) sgemm_tt.$(SUFFIX) \
2528
strmm_LNUU.$(SUFFIX) strmm_LNUN.$(SUFFIX) strmm_LNLU.$(SUFFIX) strmm_LNLN.$(SUFFIX) \
@@ -204,8 +207,9 @@ COMMONOBJS += gemm_thread_m.$(SUFFIX) gemm_thread_n.$(SUFFIX) gemm_thread_mn.$(
204207
COMMONOBJS += syrk_thread.$(SUFFIX)
205208

206209
ifndef USE_SIMPLE_THREADED_LEVEL3
207-
210+
ifeq ($(BUILD_HALF),1)
208211
SHBLASOBJS += shgemm_thread_nn.$(SUFFIX) shgemm_thread_nt.$(SUFFIX) shgemm_thread_tn.$(SUFFIX) shgemm_thread_tt.$(SUFFIX)
212+
endif
209213
SBLASOBJS += sgemm_thread_nn.$(SUFFIX) sgemm_thread_nt.$(SUFFIX) sgemm_thread_tn.$(SUFFIX) sgemm_thread_tt.$(SUFFIX)
210214
DBLASOBJS += dgemm_thread_nn.$(SUFFIX) dgemm_thread_nt.$(SUFFIX) dgemm_thread_tn.$(SUFFIX) dgemm_thread_tt.$(SUFFIX)
211215
QBLASOBJS += qgemm_thread_nn.$(SUFFIX) qgemm_thread_nt.$(SUFFIX) qgemm_thread_tn.$(SUFFIX) qgemm_thread_tt.$(SUFFIX)

exports/Makefile

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,10 @@ ifndef BUILD_LAPACK_DEPRECATED
3030
BUILD_LAPACK_DEPRECATED = 0
3131
endif
3232

33+
ifndef BUILD_HALF
34+
BUILD_HALF = 0
35+
endif
36+
3337
ifeq ($(OSNAME), WINNT)
3438
ifeq ($(F_COMPILER), GFORTRAN)
3539
ifndef ONLY_CBLAS
@@ -234,23 +238,23 @@ static : ../$(LIBNAME)
234238
rm -f goto.$(SUFFIX)
235239

236240
osx.def : gensymbol ../Makefile.system ../getarch.c
237-
perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) > $(@F)
241+
perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) > $(@F)
238242

239243
aix.def : gensymbol ../Makefile.system ../getarch.c
240-
perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) > $(@F)
244+
perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) > $(@F)
241245

242246
objcopy.def : gensymbol ../Makefile.system ../getarch.c
243-
perl ./gensymbol objcopy $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) > $(@F)
247+
perl ./gensymbol objcopy $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) > $(@F)
244248

245249
objconv.def : gensymbol ../Makefile.system ../getarch.c
246-
perl ./gensymbol objconv $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) > $(@F)
250+
perl ./gensymbol objconv $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) > $(@F)
247251

248252
test : linktest.c
249253
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK.
250254
rm -f linktest
251255

252256
linktest.c : gensymbol ../Makefile.system ../getarch.c
253-
perl ./gensymbol linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) > linktest.c
257+
perl ./gensymbol linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) $(ONLY_CBLAS) "$(SYMBOLPREFIX)" "$(SYMBOLSUFFIX)" $(BUILD_LAPACK_DEPRECATED) $(BUILD_HALF) > linktest.c
254258

255259
clean ::
256260
@rm -f *.def *.dylib __.SYMDEF* *.renamed

exports/gensymbol

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
icamax,icamin,idamax,idamin,idmax,idmin,isamax,isamin,ismax,ismin,
3131
izamax,izamin,lsame,samax,samin,sasum,saxpy,scabs1,scamax,
3232
scamin,scasum,scnrm2,scopy,sdot,sdsdot,sgbmv,sgemm,sgemv,sger,
33-
shgemm, smax,smin,snrm2,
33+
smax,smin,snrm2,
3434
srot,srotg,srotm,srotmg,ssbmv,sscal,sspmv,sspr2,sspr,sswap,
3535
ssymm,ssymv,ssyr2,ssyr2k,ssyr,ssyrk,stbmv,stbsv,stpmv,stpsv,
3636
strmm,strmv,strsm,strsv,zaxpy,zcopy,zdotc,zdotu,zdrot,
@@ -51,6 +51,7 @@
5151
zimatcopy,
5252
);
5353

54+
@halfblasobjs = (shgemm);
5455
@cblasobjs = (
5556
cblas_caxpy, cblas_ccopy, cblas_cdotc, cblas_cdotu, cblas_cgbmv, cblas_cgemm, cblas_cgemv,
5657
cblas_cgerc, cblas_cgeru, cblas_chbmv, cblas_chemm, cblas_chemv, cblas_cher2, cblas_cher2k,
@@ -67,7 +68,7 @@
6768
cblas_isamax, cblas_izamax,
6869
cblas_sasum, cblas_saxpy,
6970
cblas_scasum, cblas_scnrm2, cblas_scopy, cblas_sdot, cblas_sdsdot, cblas_sgbmv, cblas_sgemm,
70-
cblas_sgemv, cblas_sger, cblas_shgemm, cblas_snrm2, cblas_srot, cblas_srotg,
71+
cblas_sgemv, cblas_sger, cblas_snrm2, cblas_srot, cblas_srotg,
7172
cblas_srotm, cblas_srotmg, cblas_ssbmv, cblas_sscal, cblas_sspmv, cblas_sspr2, cblas_sspr,
7273
cblas_sswap, cblas_ssymm, cblas_ssymv, cblas_ssyr2, cblas_ssyr2k, cblas_ssyr, cblas_ssyrk,
7374
cblas_stbmv, cblas_stbsv, cblas_stpmv, cblas_stpsv, cblas_strmm, cblas_strmv, cblas_strsm,
@@ -83,6 +84,8 @@
8384
cblas_sgeadd, cblas_dgeadd,cblas_cgeadd, cblas_zgeadd
8485
);
8586

87+
@halfcblasobjs = (cblas_shgemm);
88+
8689
@exblasobjs = (
8790
qamax,qamin,qasum,qaxpy,qcabs1,qcopy,qdot,qgbmv,qgemm,
8891
qgemv,qger,qmax,qmin,
@@ -3454,6 +3457,10 @@ use File::Spec;
34543457
use File::Basename;
34553458
my $dirname = File::Spec->catfile(dirname(dirname(File::Spec->rel2abs(__FILE__))), "lapack-netlib");
34563459

3460+
if ($ARGV[12] == 1) {
3461+
@blasobjs = (@blasobjs, @halfblasobjs);
3462+
@cblasobjs = (@cblasobjs, @halfcblasobjs);
3463+
}
34573464
if ($ARGV[8] == 1) {
34583465
#ONLY_CBLAS=1
34593466
@underscore_objs = (@misc_underscore_objs);

interface/Makefile

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,9 @@ SBLAS3OBJS = \
4646
somatcopy.$(SUFFIX) simatcopy.$(SUFFIX)\
4747
sgeadd.$(SUFFIX)
4848

49+
ifeq ($(BUILD_HALF),1)
4950
SHBLAS3OBJS = shgemm.$(SUFFIX)
51+
endif
5052

5153
DBLAS1OBJS = \
5254
daxpy.$(SUFFIX) dswap.$(SUFFIX) \
@@ -278,7 +280,9 @@ CSBLAS3OBJS = \
278280
cblas_ssyrk.$(SUFFIX) cblas_ssyr2k.$(SUFFIX) cblas_somatcopy.$(SUFFIX) cblas_simatcopy.$(SUFFIX)\
279281
cblas_sgeadd.$(SUFFIX)
280282

283+
ifeq ($(BUILD_HALF),1)
281284
CSHBLAS3OBJS = cblas_shgemm.$(SUFFIX)
285+
endif
282286

283287
CDBLAS1OBJS = \
284288
cblas_idamax.$(SUFFIX) cblas_idamin.$(SUFFIX) cblas_dasum.$(SUFFIX) cblas_daxpy.$(SUFFIX) \
@@ -1214,8 +1218,10 @@ zhpr2.$(SUFFIX) zhpr2.$(PSUFFIX) : zhpr2.c
12141218
xhpr2.$(SUFFIX) xhpr2.$(PSUFFIX) : zhpr2.c
12151219
$(CC) -c $(CFLAGS) $< -o $(@F)
12161220

1221+
ifeq ($(BUILD_HALF),1)
12171222
shgemm.$(SUFFIX) shgemm.$(PSUFFIX) : gemm.c ../param.h
12181223
$(CC) -c $(CFLAGS) $< -o $(@F)
1224+
endif
12191225

12201226
sgemm.$(SUFFIX) sgemm.$(PSUFFIX) : gemm.c ../param.h
12211227
$(CC) -c $(CFLAGS) $< -o $(@F)
@@ -1778,8 +1784,10 @@ cblas_zhemv.$(SUFFIX) cblas_zhemv.$(PSUFFIX) : zhemv.c
17781784
cblas_sgemm.$(SUFFIX) cblas_sgemm.$(PSUFFIX) : gemm.c ../param.h
17791785
$(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F)
17801786

1787+
ifeq ($(BUILD_HALF),1)
17811788
cblas_shgemm.$(SUFFIX) cblas_shgemm.$(PSUFFIX) : gemm.c ../param.h
17821789
$(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F)
1790+
endif
17831791

17841792
cblas_dgemm.$(SUFFIX) cblas_dgemm.$(PSUFFIX) : gemm.c ../param.h
17851793
$(CC) -DCBLAS -c $(CFLAGS) $< -o $(@F)

kernel/CMakeLists.txt

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,11 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS)
137137
foreach (float_type SINGLE DOUBLE HALF)
138138
string(SUBSTRING ${float_type} 0 1 float_char)
139139
if (${float_type} STREQUAL "HALF")
140-
set (float_char "SH")
140+
if (NOT ${BUILD_HALF})
141+
continue ()
142+
else ()
143+
set (float_char "SH")
144+
endif ()
141145
endif ()
142146
GenerateNamedObjects("${KERNELDIR}/${${float_char}GEMMKERNEL}" "" "gemm_kernel" false "" "" false ${float_type})
143147
endforeach()

0 commit comments

Comments
 (0)