Skip to content

Commit 0e11537

Browse files
authored
Merge pull request #5357 from Mousius/bgemm-init
Add infrastructure for BGEMM
2 parents 98aefb7 + 66d9185 commit 0e11537

32 files changed

+1028
-70
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ test/ZBLAT3.SUMM
8282
test/ZBLAT3_3M.SUMM
8383
test/SHBLAT3.SUMM
8484
test/SBBLAT3.SUMM
85+
test/BBLAT3.SUMM
8586
test/cblat1
8687
test/cblat2
8788
test/cblat3
@@ -96,6 +97,7 @@ test/sblat3
9697
test/sblat3_3m
9798
test/test_shgemm
9899
test/test_sbgemm
100+
test/test_bgemm
99101
test/zblat1
100102
test/zblat2
101103
test/zblat3

CONTRIBUTORS.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,7 @@ In chronological order:
251251
252252
* [2025-02-03] Optimize SBGEMM kernel on NEOVERSEV1
253253
* [2025-02-27] Add sbgemv_n_neon kernel
254+
* [2025-05-17] Impl prototype of BGEMM inferface
254255

255256
* Abhishek Kumar <https://github.com/abhishek-iitmadras>
256257
* [2025-04-22] Optimise dot kernel for NEOVERSE V1

Makefile.system

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -276,14 +276,14 @@ SMALL_MATRIX_OPT = 1
276276
endif
277277
ifeq ($(ARCH), arm64)
278278
GEMM_GEMV_FORWARD = 1
279-
GEMM_GEMV_FORWARD_BF16 = 1
279+
SBGEMM_GEMV_FORWARD = 1
280280
endif
281281
ifeq ($(ARCH), riscv)
282282
GEMM_GEMV_FORWARD = 1
283283
endif
284284
ifeq ($(ARCH), power)
285285
GEMM_GEMV_FORWARD = 1
286-
GEMM_GEMV_FORWARD_BF16 = 1
286+
SBGEMM_GEMV_FORWARD = 1
287287
endif
288288

289289
ifeq ($(SMALL_MATRIX_OPT), 1)
@@ -293,8 +293,8 @@ ifneq ($(ONLY_CBLAS), 1)
293293
ifeq ($(GEMM_GEMV_FORWARD), 1)
294294
CCOMMON_OPT += -DGEMM_GEMV_FORWARD
295295
endif
296-
ifeq ($(GEMM_GEMV_FORWARD_BF16), 1)
297-
CCOMMON_OPT += -DGEMM_GEMV_FORWARD_BF16
296+
ifeq ($(SBGEMM_GEMV_FORWARD), 1)
297+
CCOMMON_OPT += -DSBGEMM_GEMV_FORWARD
298298
endif
299299
endif
300300

@@ -1905,6 +1905,8 @@ export BUILD_HFLOAT16
19051905
export NO_LSX
19061906
export NO_LASX
19071907

1908+
export BGEMM_UNROLL_M
1909+
export BGEMM_UNROLL_N
19081910
export SBGEMM_UNROLL_M
19091911
export SBGEMM_UNROLL_N
19101912
export SHGEMM_UNROLL_M

Makefile.tail

Lines changed: 33 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,32 @@
1+
###############################################################################
2+
# Copyright (c) 2025, The OpenBLAS Project
3+
# All rights reserved.
4+
# Redistribution and use in source and binary forms, with or without
5+
# modification, are permitted provided that the following conditions are
6+
# met:
7+
# 1. Redistributions of source code must retain the above copyright
8+
# notice, this list of conditions and the following disclaimer.
9+
# 2. Redistributions in binary form must reproduce the above copyright
10+
# notice, this list of conditions and the following disclaimer in
11+
# the documentation and/or other materials provided with the
12+
# distribution.
13+
# 3. Neither the name of the OpenBLAS project nor the names of
14+
# its contributors may be used to endorse or promote products
15+
# derived from this software without specific prior written permission.
16+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19+
# ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
20+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26+
# POSSIBILITY OF SUCH DAMAGE.
27+
###############################################################################
28+
29+
BBLASOBJS_P = $(BBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
130
SBBLASOBJS_P = $(SBBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
231
SHBLASPBJS_P = $(SHBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
332
SBLASOBJS_P = $(SBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
@@ -12,8 +41,8 @@ COMMONOBJS_P = $(COMMONOBJS:.$(SUFFIX)=.$(PSUFFIX))
1241

1342
HPLOBJS_P = $(HPLOBJS:.$(SUFFIX)=.$(PSUFFIX))
1443

15-
BLASOBJS = $(SHBLASOBJS) $(SBEXTOBJS) $(SBBLASOBJS) $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS) $(CBAUXOBJS)
16-
BLASOBJS_P = $(SHBLASPBJS_P) $(SBEXTOBJS_P) $(SBBLASOBJS_P) $(SBLASOBJS_P) $(DBLASOBJS_P) $(CBLASOBJS_P) $(ZBLASOBJS_P) $(CBAUXOBJS_P)
44+
BLASOBJS = $(SHBLASOBJS) $(BBLASOBJS) $(SBEXTOBJS) $(SBBLASOBJS) $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS) $(CBAUXOBJS)
45+
BLASOBJS_P = $(SHBLASPBJS_P) $(BBLASOBJS_P) $(SBEXTOBJS_P) $(SBBLASOBJS_P) $(SBLASOBJS_P) $(DBLASOBJS_P) $(CBLASOBJS_P) $(ZBLASOBJS_P) $(CBAUXOBJS_P)
1746

1847
ifdef EXPRECISION
1948
BLASOBJS += $(QBLASOBJS) $(XBLASOBJS)
@@ -26,6 +55,7 @@ BLASOBJS_P += $(QBLASOBJS_P) $(XBLASOBJS_P)
2655
endif
2756

2857
$(SHBLASOBJS) $(SHBLASOBJS_P) : override CFLAGS += -DHFLOAT16 -UDOUBLE -UCOMPLEX
58+
$(BBLASOBJS) $(BBLASOBJS_P) : override CFLAGS += -DBFLOAT16 -DBGEMM -UDOUBLE -UCOMPLEX
2959
$(SBBLASOBJS) $(SBBLASOBJS_P) : override CFLAGS += -DBFLOAT16 -UDOUBLE -UCOMPLEX
3060
$(SBLASOBJS) $(SBLASOBJS_P) : override CFLAGS += -UDOUBLE -UCOMPLEX
3161
$(DBLASOBJS) $(DBLASOBJS_P) : override CFLAGS += -DDOUBLE -UCOMPLEX
@@ -36,6 +66,7 @@ $(XBLASOBJS) $(XBLASOBJS_P) : override CFLAGS += -DXDOUBLE -DCOMPLEX
3666
$(SBEXTOBJS) $(SBEXTOBJS_P) : override CFLAGS += -DBFLOAT16 -UDOUBLE -UCOMPLEX
3767

3868
$(SHBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
69+
$(BBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
3970
$(SBBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
4071
$(SBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
4172
$(DBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)

cblas.h

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,31 @@
1+
/***************************************************************************
2+
* Copyright (c) 2025, The OpenBLAS Project
3+
* All rights reserved.
4+
* Redistribution and use in source and binary forms, with or without
5+
* modification, are permitted provided that the following conditions are
6+
* met:
7+
* 1. Redistributions of source code must retain the above copyright
8+
* notice, this list of conditions and the following disclaimer.
9+
* 2. Redistributions in binary form must reproduce the above copyright
10+
* notice, this list of conditions and the following disclaimer in
11+
* the documentation and/or other materials provided with the
12+
* distribution.
13+
* 3. Neither the name of the OpenBLAS project nor the names of
14+
* its contributors may be used to endorse or promote products
15+
* derived from this software without specific prior written permission.
16+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17+
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19+
* ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
20+
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21+
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22+
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23+
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24+
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25+
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26+
* POSSIBILITY OF SUCH DAMAGE.
27+
* *****************************************************************************/
28+
129
#ifndef CBLAS_H
230
#define CBLAS_H
331

@@ -441,6 +469,8 @@ void cblas_dbf16tod(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *in, OPE
441469
float cblas_sbdot(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST bfloat16 *y, OPENBLAS_CONST blasint incy);
442470
void cblas_sbgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n, OPENBLAS_CONST float alpha, OPENBLAS_CONST bfloat16 *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST bfloat16 *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float beta, float *y, OPENBLAS_CONST blasint incy);
443471

472+
void cblas_bgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
473+
OPENBLAS_CONST bfloat16 alpha, OPENBLAS_CONST bfloat16 *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST bfloat16 *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST bfloat16 beta, bfloat16 *C, OPENBLAS_CONST blasint ldc);
444474
void cblas_sbgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
445475
OPENBLAS_CONST float alpha, OPENBLAS_CONST bfloat16 *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST bfloat16 *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);
446476
void cblas_sbgemm_batch(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE * TransA_array, OPENBLAS_CONST enum CBLAS_TRANSPOSE * TransB_array, OPENBLAS_CONST blasint * M_array, OPENBLAS_CONST blasint * N_array, OPENBLAS_CONST blasint * K_array,

cmake/kernel.cmake

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,31 @@
1+
###############################################################################
2+
# Copyright (c) 2025, The OpenBLAS Project
3+
# All rights reserved.
4+
# Redistribution and use in source and binary forms, with or without
5+
# modification, are permitted provided that the following conditions are
6+
# met:
7+
# 1. Redistributions of source code must retain the above copyright
8+
# notice, this list of conditions and the following disclaimer.
9+
# 2. Redistributions in binary form must reproduce the above copyright
10+
# notice, this list of conditions and the following disclaimer in
11+
# the documentation and/or other materials provided with the
12+
# distribution.
13+
# 3. Neither the name of the OpenBLAS project nor the names of
14+
# its contributors may be used to endorse or promote products
15+
# derived from this software without specific prior written permission.
16+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19+
# ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
20+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26+
# POSSIBILITY OF SUCH DAMAGE.
27+
###############################################################################
28+
129
# helper functions for the kernel CMakeLists.txt
230

331
function(SetFallback KERNEL SOURCE_PATH)
@@ -206,6 +234,16 @@ macro(SetDefaultL3)
206234
SetFallback(ZGEADD_KERNEL ../generic/zgeadd.c)
207235
if (BUILD_BFLOAT16)
208236
SetFallback(SHGEADD_KERNEL ../generic/geadd.c)
237+
SetFallback(BGEMMKERNEL ../generic/gemmkernel_2x2.c)
238+
SetFallback(BGEMM_BETA ../generic/gemm_beta.c)
239+
SetFallback(BGEMMINCOPY ../generic/gemm_ncopy_2.c)
240+
SetFallback(BGEMMITCOPY ../generic/gemm_tcopy_2.c)
241+
SetFallback(BGEMMONCOPY ../generic/gemm_ncopy_2.c)
242+
SetFallback(BGEMMOTCOPY ../generic/gemm_tcopy_2.c)
243+
SetFallback(BGEMMINCOPYOBJ bgemm_incopy.o)
244+
SetFallback(BGEMMITCOPYOBJ bgemm_itcopy.o)
245+
SetFallback(BGEMMONCOPYOBJ bgemm_oncopy.o)
246+
SetFallback(BGEMMOTCOPYOBJ bgemm_otcopy.o)
209247
SetFallback(SBGEMMKERNEL ../generic/gemmkernel_2x2.c)
210248
SetFallback(SBGEMM_BETA ../generic/gemm_beta.c)
211249
SetFallback(SBGEMMINCOPY ../generic/gemm_ncopy_2.c)

cmake/prebuild.cmake

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,31 @@
1+
###############################################################################
2+
# Copyright (c) 2025, The OpenBLAS Project
3+
# All rights reserved.
4+
# Redistribution and use in source and binary forms, with or without
5+
# modification, are permitted provided that the following conditions are
6+
# met:
7+
# 1. Redistributions of source code must retain the above copyright
8+
# notice, this list of conditions and the following disclaimer.
9+
# 2. Redistributions in binary form must reproduce the above copyright
10+
# notice, this list of conditions and the following disclaimer in
11+
# the documentation and/or other materials provided with the
12+
# distribution.
13+
# 3. Neither the name of the OpenBLAS project nor the names of
14+
# its contributors may be used to endorse or promote products
15+
# derived from this software without specific prior written permission.
16+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19+
# ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
20+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26+
# POSSIBILITY OF SUCH DAMAGE.
27+
###############################################################################
28+
129
##
230
## Author: Hank Anderson <[email protected]>
331
## Description: Ported from OpenBLAS/Makefile.prebuild
@@ -131,6 +159,8 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS
131159
set(HAVE_SSE2 1)
132160
set(HAVE_SSE3 1)
133161
set(HAVE_SSSE3 1)
162+
set(BGEMM_UNROLL_M 8)
163+
set(BGEMM_UNROLL_N 4)
134164
set(SBGEMM_UNROLL_M 8)
135165
set(SBGEMM_UNROLL_N 4)
136166
set(SGEMM_UNROLL_M 8)

cmake/system.cmake

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -425,8 +425,8 @@ endif ()
425425
if (GEMM_GEMV_FORWARD AND NOT ONLY_CBLAS)
426426
set(CCOMMON_OPT "${CCOMMON_OPT} -DGEMM_GEMV_FORWARD")
427427
endif ()
428-
if (GEMM_GEMV_FORWARD_BF16 AND NOT ONLY_CBLAS)
429-
set(CCOMMON_OPT "${CCOMMON_OPT} -DGEMM_GEMV_FORWARD_BF16")
428+
if (SBGEMM_GEMV_FORWARD AND NOT ONLY_CBLAS)
429+
set(CCOMMON_OPT "${CCOMMON_OPT} -DSBGEMM_GEMV_FORWARD")
430430
endif ()
431431
if (SMALL_MATRIX_OPT)
432432
set(CCOMMON_OPT "${CCOMMON_OPT} -DSMALL_MATRIX_OPT")

cmake/utils.cmake

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,31 @@
1+
###############################################################################
2+
# Copyright (c) 2025, The OpenBLAS Project
3+
# All rights reserved.
4+
# Redistribution and use in source and binary forms, with or without
5+
# modification, are permitted provided that the following conditions are
6+
# met:
7+
# 1. Redistributions of source code must retain the above copyright
8+
# notice, this list of conditions and the following disclaimer.
9+
# 2. Redistributions in binary form must reproduce the above copyright
10+
# notice, this list of conditions and the following disclaimer in
11+
# the documentation and/or other materials provided with the
12+
# distribution.
13+
# 3. Neither the name of the OpenBLAS project nor the names of
14+
# its contributors may be used to endorse or promote products
15+
# derived from this software without specific prior written permission.
16+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17+
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19+
# ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
20+
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21+
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22+
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23+
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25+
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26+
# POSSIBILITY OF SUCH DAMAGE.
27+
###############################################################################
28+
129
# Functions to help with the OpenBLAS build
230

331
# Reads string from getarch into CMake vars. Format of getarch vars is VARNAME=VALUE
@@ -347,7 +375,7 @@ function(GenerateNamedObjects sources_in)
347375
if (NOT no_float_type)
348376
string(SUBSTRING ${float_type} 0 1 float_char)
349377
string(TOLOWER ${float_char} float_char)
350-
if (${float_type} STREQUAL "BFLOAT16")
378+
if (${float_type} STREQUAL "BFLOAT16" AND NOT "${defines_in}" MATCHES "BGEMM")
351379
set (float_char "sb")
352380
endif ()
353381
endif ()

common.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
/*********************************************************************/
22
/* Copyright 2009, 2010 The University of Texas at Austin. */
3+
/* Copyright 2025 The OpenBLAS Project. */
34
/* All rights reserved. */
45
/* */
56
/* Redistribution and use in source and binary forms, with or */
@@ -317,7 +318,11 @@ typedef int blasint;
317318
#elif defined(BFLOAT16)
318319
#define IFLOAT bfloat16
319320
#define XFLOAT IFLOAT
320-
#define FLOAT float
321+
#ifdef BGEMM
322+
#define FLOAT bfloat16
323+
#else
324+
#define FLOAT float
325+
#endif
321326
#define SIZE 2
322327
#define BASE_SHIFT 1
323328
#define ZBASE_SHIFT 2

0 commit comments

Comments
 (0)