@@ -80,24 +80,24 @@ SGEMMDIRECTPERFORMANT = sgemm_direct_performant.c
80
80
endif
81
81
endif
82
82
83
- ifeq ($(BUILD_HALF ), 1)
84
- ifndef SHGEMMKERNEL
85
- SHGEMM_BETA = ../generic/gemm_beta.c
86
- SHGEMMKERNEL = ../generic/gemmkernel_2x2.c
87
- SHGEMMINCOPY = ../generic/gemm_ncopy_2.c
88
- SHGEMMITCOPY = ../generic/gemm_tcopy_2.c
89
- SHGEMMONCOPY = ../generic/gemm_ncopy_2.c
90
- SHGEMMOTCOPY = ../generic/gemm_tcopy_2.c
91
- SHGEMMINCOPYOBJ = shgemm_incopy $(TSUFFIX).$(SUFFIX)
92
- SHGEMMITCOPYOBJ = shgemm_itcopy $(TSUFFIX).$(SUFFIX)
93
- SHGEMMONCOPYOBJ = shgemm_oncopy $(TSUFFIX).$(SUFFIX)
94
- SHGEMMOTCOPYOBJ = shgemm_otcopy $(TSUFFIX).$(SUFFIX)
83
+ ifeq ($(BUILD_BFLOAT16 ), 1)
84
+ ifndef SBGEMMKERNEL
85
+ SBGEMM_BETA = ../generic/gemm_beta.c
86
+ SBGEMMKERNEL = ../generic/gemmkernel_2x2.c
87
+ SBGEMMINCOPY = ../generic/gemm_ncopy_2.c
88
+ SBGEMMITCOPY = ../generic/gemm_tcopy_2.c
89
+ SBGEMMONCOPY = ../generic/gemm_ncopy_2.c
90
+ SBGEMMOTCOPY = ../generic/gemm_tcopy_2.c
91
+ SBGEMMINCOPYOBJ = sbgemm_incopy $(TSUFFIX).$(SUFFIX)
92
+ SBGEMMITCOPYOBJ = sbgemm_itcopy $(TSUFFIX).$(SUFFIX)
93
+ SBGEMMONCOPYOBJ = sbgemm_oncopy $(TSUFFIX).$(SUFFIX)
94
+ SBGEMMOTCOPYOBJ = sbgemm_otcopy $(TSUFFIX).$(SUFFIX)
95
95
endif
96
96
97
97
SHKERNELOBJS += \
98
- shgemm_kernel $(TSUFFIX).$(SUFFIX) \
99
- $(SHGEMMINCOPYOBJ ) $(SHGEMMITCOPYOBJ ) \
100
- $(SHGEMMONCOPYOBJ ) $(SHGEMMOTCOPYOBJ )
98
+ sbgemm_kernel $(TSUFFIX).$(SUFFIX) \
99
+ $(SBGEMMINCOPYOBJ ) $(SBGEMMITCOPYOBJ ) \
100
+ $(SBGEMMONCOPYOBJ ) $(SBGEMMOTCOPYOBJ )
101
101
endif
102
102
103
103
ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE),$(BUILD_COMPLEX))" ""
@@ -149,7 +149,7 @@ XKERNELOBJS += \
149
149
$(XGEMMINCOPYOBJ) $(XGEMMITCOPYOBJ) \
150
150
$(XGEMMONCOPYOBJ) $(XGEMMOTCOPYOBJ)
151
151
152
- ifeq ($(BUILD_HALF ),1)
152
+ ifeq ($(BUILD_BFLOAT16 ),1)
153
153
SHBLASOBJS += $(SHKERNELOBJS)
154
154
endif
155
155
SBLASOBJS += $(SKERNELOBJS)
@@ -159,8 +159,8 @@ CBLASOBJS += $(CKERNELOBJS)
159
159
ZBLASOBJS += $(ZKERNELOBJS)
160
160
XBLASOBJS += $(XKERNELOBJS)
161
161
162
- ifeq ($(BUILD_HALF ),1)
163
- SHBLASOBJS += shgemm_beta $(TSUFFIX).$(SUFFIX)
162
+ ifeq ($(BUILD_BFLOAT16 ),1)
163
+ SHBLASOBJS += sbgemm_beta $(TSUFFIX).$(SUFFIX)
164
164
endif
165
165
166
166
ifneq "$(or $(BUILD_SINGLE),$(BUILD_DOUBLE))" ""
@@ -492,11 +492,11 @@ ZBLASOBJS += \
492
492
zgeadd_k$(TSUFFIX).$(SUFFIX)
493
493
endif
494
494
495
- ifeq ($(BUILD_HALF ), 1)
496
- SHGEMMINCOPYOBJ_P = $(SHGEMMINCOPYOBJ :.$(SUFFIX)=.$(PSUFFIX))
497
- SHGEMMITCOPYOBJ_P = $(SHGEMMITCOPYOBJ :.$(SUFFIX)=.$(PSUFFIX))
498
- SHGEMMONCOPYOBJ_P = $(SHGEMMONCOPYOBJ :.$(SUFFIX)=.$(PSUFFIX))
499
- SHGEMMOTCOPYOBJ_P = $(SHGEMMOTCOPYOBJ :.$(SUFFIX)=.$(PSUFFIX))
495
+ ifeq ($(BUILD_BFLOAT16 ), 1)
496
+ SBGEMMINCOPYOBJ_P = $(SBGEMMINCOPYOBJ :.$(SUFFIX)=.$(PSUFFIX))
497
+ SBGEMMITCOPYOBJ_P = $(SBGEMMITCOPYOBJ :.$(SUFFIX)=.$(PSUFFIX))
498
+ SBGEMMONCOPYOBJ_P = $(SBGEMMONCOPYOBJ :.$(SUFFIX)=.$(PSUFFIX))
499
+ SBGEMMOTCOPYOBJ_P = $(SBGEMMOTCOPYOBJ :.$(SUFFIX)=.$(PSUFFIX))
500
500
endif
501
501
502
502
SGEMMINCOPYOBJ_P = $(SGEMMINCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
@@ -524,9 +524,9 @@ XGEMMITCOPYOBJ_P = $(XGEMMITCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
524
524
XGEMMONCOPYOBJ_P = $(XGEMMONCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
525
525
XGEMMOTCOPYOBJ_P = $(XGEMMOTCOPYOBJ:.$(SUFFIX)=.$(PSUFFIX))
526
526
527
- ifeq ($(BUILD_HALF ),1)
528
- $(KDIR)shgemm_beta $(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SHGEMM_BETA )
529
- $(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
527
+ ifeq ($(BUILD_BFLOAT16 ),1)
528
+ $(KDIR)sbgemm_beta $(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SBGEMM_BETA )
529
+ $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
530
530
endif
531
531
532
532
$(KDIR)sgemm_beta$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMM_BETA)
@@ -548,35 +548,35 @@ $(KDIR)xgemm_beta$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(XGEMM_BETA)
548
548
$(CC) $(CFLAGS) -c -DXDOUBLE -DCOMPLEX $< -o $@
549
549
550
550
551
- ifeq ($(BUILD_HALF ), 1)
551
+ ifeq ($(BUILD_BFLOAT16 ), 1)
552
552
553
- $(KDIR)$(SHGEMMONCOPYOBJ ) : $(KERNELDIR)/$(SHGEMMONCOPY )
554
- $(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
553
+ $(KDIR)$(SBGEMMONCOPYOBJ ) : $(KERNELDIR)/$(SBGEMMONCOPY )
554
+ $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
555
555
556
- $(KDIR)$(SHGEMMOTCOPYOBJ ) : $(KERNELDIR)/$(SHGEMMOTCOPY )
556
+ $(KDIR)$(SBGEMMOTCOPYOBJ ) : $(KERNELDIR)/$(SBGEMMOTCOPY )
557
557
558
558
ifeq ($(OS), AIX)
559
- $(CC) $(CFLAGS) -S -DHALF -UDOUBLE -UCOMPLEX $< -o - > shgemmotcopy .s
560
- m4 shgemmotcopy .s > shgemmotcopy_nomacros .s
561
- $(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX shgemmotcopy_nomacros .s -o $@
562
- rm shgemmotcopy .s shgemmotcopy_nomacros .s
559
+ $(CC) $(CFLAGS) -S -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o - > sbgemmotcopy .s
560
+ m4 sbgemmotcopy .s > sbgemmotcopy_nomacros .s
561
+ $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX sbgemmotcopy_nomacros .s -o $@
562
+ rm sbgemmotcopy .s sbgemmotcopy_nomacros .s
563
563
else
564
- $(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
564
+ $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
565
565
endif
566
566
567
- ifneq ($(SHGEMM_UNROLL_M ), $(SHGEMM_UNROLL_N ))
567
+ ifneq ($(SBGEMM_UNROLL_M ), $(SBGEMM_UNROLL_N ))
568
568
569
- $(KDIR)$(SHGEMMINCOPYOBJ ) : $(KERNELDIR)/$(SHGEMMINCOPY )
570
- $(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
569
+ $(KDIR)$(SBGEMMINCOPYOBJ ) : $(KERNELDIR)/$(SBGEMMINCOPY )
570
+ $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
571
571
572
- $(KDIR)$(SHGEMMITCOPYOBJ ) : $(KERNELDIR)/$(SHGEMMITCOPY )
572
+ $(KDIR)$(SBGEMMITCOPYOBJ ) : $(KERNELDIR)/$(SBGEMMITCOPY )
573
573
ifeq ($(OS), AIX)
574
- $(CC) $(CFLAGS) -S -DHALF -UDOUBLE -UCOMPLEX $< -o - > shgemmitcopy .s
575
- m4 shgemmitcopy .s > shgemmitcopy_nomacros .s
576
- $(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX shgemmitcopy_nomacros .s -o $@
577
- rm shgemmitcopy .s shgemmitcopy_nomacros .s
574
+ $(CC) $(CFLAGS) -S -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o - > sbgemmitcopy .s
575
+ m4 sbgemmitcopy .s > sbgemmitcopy_nomacros .s
576
+ $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX sbgemmitcopy_nomacros .s -o $@
577
+ rm sbgemmitcopy .s sbgemmitcopy_nomacros .s
578
578
else
579
- $(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
579
+ $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
580
580
endif
581
581
582
582
endif
@@ -746,16 +746,16 @@ $(KDIR)sgemm_direct$(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SGEMMDIRECTKERNEL)
746
746
$(CC) $(CFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@
747
747
endif
748
748
749
- ifeq ($(BUILD_HALF ), 1)
749
+ ifeq ($(BUILD_BFLOAT16 ), 1)
750
750
751
- $(KDIR)shgemm_kernel $(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SHGEMMKERNEL ) $(SHGEMMDEPEND )
751
+ $(KDIR)sbgemm_kernel $(TSUFFIX).$(SUFFIX) : $(KERNELDIR)/$(SBGEMMKERNEL ) $(SBGEMMDEPEND )
752
752
ifeq ($(OS), AIX)
753
- $(CC) $(CFLAGS) -S -DHALF -UDOUBLE -UCOMPLEX $< -o - > shgemm_kernel $(TSUFFIX).s
754
- m4 shgemm_kernel $(TSUFFIX).s > shgemm_kernel $(TSUFFIX)_nomacros.s
755
- $(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX shgemm_kernel $(TSUFFIX)_nomacros.s -o $@
756
- rm shgemm_kernel $(TSUFFIX).s shgemm_kernel $(TSUFFIX)_nomacros.s
753
+ $(CC) $(CFLAGS) -S -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o - > sbgemm_kernel $(TSUFFIX).s
754
+ m4 sbgemm_kernel $(TSUFFIX).s > sbgemm_kernel $(TSUFFIX)_nomacros.s
755
+ $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX sbgemm_kernel $(TSUFFIX)_nomacros.s -o $@
756
+ rm sbgemm_kernel $(TSUFFIX).s sbgemm_kernel $(TSUFFIX)_nomacros.s
757
757
else
758
- $(CC) $(CFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
758
+ $(CC) $(CFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
759
759
endif
760
760
endif
761
761
@@ -2375,9 +2375,9 @@ $(KDIR)xtrsm_oltncopy$(TSUFFIX).$(SUFFIX) : generic/ztrsm_ltcopy_$(XGEMM_UNROLL_
2375
2375
$(KDIR)sgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMM_BETA)
2376
2376
$(CC) $(PFLAGS) -c -UDOUBLE -UCOMPLEX $< -o $@
2377
2377
2378
- ifeq ($(BUILD_HALF ),1)
2379
- $(KDIR)shgemm_beta $(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SHGEMM_BETA )
2380
- $(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
2378
+ ifeq ($(BUILD_BFLOAT16 ),1)
2379
+ $(KDIR)sbgemm_beta $(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SBGEMM_BETA )
2380
+ $(CC) $(PFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
2381
2381
endif
2382
2382
2383
2383
$(KDIR)dgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DGEMM_BETA)
@@ -2396,19 +2396,19 @@ $(KDIR)xgemm_beta$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(XGEMM_BETA)
2396
2396
$(CC) $(PFLAGS) -c -DXDOUBLE -DCOMPLEX $< -o $@
2397
2397
2398
2398
2399
- ifeq ($(BUILD_HALF ), 1)
2400
- $(SHGEMMONCOPYOBJ_P ) : $(KERNELDIR)/$(SHGEMMONCOPY )
2401
- $(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
2399
+ ifeq ($(BUILD_BFLOAT16 ), 1)
2400
+ $(SBGEMMONCOPYOBJ_P ) : $(KERNELDIR)/$(SBGEMMONCOPY )
2401
+ $(CC) $(PFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
2402
2402
2403
- $(SHGEMMOTCOPYOBJ_P ) : $(KERNELDIR)/$(SHGEMMOTCOPY )
2404
- $(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
2403
+ $(SBGEMMOTCOPYOBJ_P ) : $(KERNELDIR)/$(SBGEMMOTCOPY )
2404
+ $(CC) $(PFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
2405
2405
2406
- ifneq ($(SHGEMM_UNROLL_M ), $(SHGEMM_UNROLL_N ))
2407
- $(SHGEMMINCOPYOBJ_P ) : $(KERNELDIR)/$(SHGEMMINCOPY )
2408
- $(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
2406
+ ifneq ($(SBGEMM_UNROLL_M ), $(SBGEMM_UNROLL_N ))
2407
+ $(SBGEMMINCOPYOBJ_P ) : $(KERNELDIR)/$(SBGEMMINCOPY )
2408
+ $(CC) $(PFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
2409
2409
2410
- $(SHGEMMITCOPYOBJ_P ) : $(KERNELDIR)/$(SHGEMMITCOPY )
2411
- $(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
2410
+ $(SBGEMMITCOPYOBJ_P ) : $(KERNELDIR)/$(SBGEMMITCOPY )
2411
+ $(CC) $(PFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
2412
2412
2413
2413
endif
2414
2414
endif
@@ -2518,9 +2518,9 @@ endif
2518
2518
endif
2519
2519
2520
2520
2521
- ifeq ($(BUILD_HALF ), 1)
2522
- $(KDIR)shgemm_kernel $(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SHGEMMKERNEL ) $(SHGEMMDEPEND )
2523
- $(CC) $(PFLAGS) -c -DHALF -UDOUBLE -UCOMPLEX $< -o $@
2521
+ ifeq ($(BUILD_BFLOAT16 ), 1)
2522
+ $(KDIR)sbgemm_kernel $(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SBGEMMKERNEL ) $(SBGEMMDEPEND )
2523
+ $(CC) $(PFLAGS) -c -DBFLOAT16 -UDOUBLE -UCOMPLEX $< -o $@
2524
2524
endif
2525
2525
2526
2526
$(KDIR)sgemm_kernel$(TSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SGEMMKERNEL) $(SGEMMDEPEND)
0 commit comments