Skip to content

Commit ea2faf0

Browse files
committed
Add optimized BGEMM for NEOVERSEN2 target
This re-uses the existing NEOVERSEN2 8x4 `sbgemm` kernel to implement `bgemm`.
1 parent a4f4662 commit ea2faf0

File tree

7 files changed

+189
-118
lines changed

7 files changed

+189
-118
lines changed

kernel/arm64/KERNEL.NEOVERSEN2

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,20 @@ ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c
188188
ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX)
189189
ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX)
190190

191+
ifeq ($(BUILD_BFLOAT16), 1)
192+
BGEMM_BETA = sbgemm_beta_neoversen2.c
193+
BGEMMKERNEL = sbgemm_kernel_$(BGEMM_UNROLL_M)x$(BGEMM_UNROLL_N)_neoversen2.c
194+
BGEMMINCOPY = sbgemm_ncopy_$(BGEMM_UNROLL_M)_neoversen2.c
195+
BGEMMITCOPY = sbgemm_tcopy_$(BGEMM_UNROLL_M)_neoversen2.c
196+
BGEMMONCOPY = sbgemm_ncopy_$(BGEMM_UNROLL_N)_neoversen2.c
197+
BGEMMOTCOPY = sbgemm_tcopy_$(BGEMM_UNROLL_N)_neoversen2.c
198+
BGEMMINCOPYOBJ = bgemm_incopy$(TSUFFIX).$(SUFFIX)
199+
BGEMMITCOPYOBJ = bgemm_itcopy$(TSUFFIX).$(SUFFIX)
200+
BGEMMONCOPYOBJ = bgemm_oncopy$(TSUFFIX).$(SUFFIX)
201+
BGEMMOTCOPYOBJ = bgemm_otcopy$(TSUFFIX).$(SUFFIX)
202+
BGEMVTKERNEL = sbgemv_t_bfdot.c
203+
BGEMVNKERNEL = bgemv_n_sve_v3x4.c
204+
191205
SBGEMM_BETA = sbgemm_beta_neoversen2.c
192206
SBGEMMKERNEL = sbgemm_kernel_$(SBGEMM_UNROLL_M)x$(SBGEMM_UNROLL_N)_neoversen2.c
193207
SBGEMMINCOPY = sbgemm_ncopy_$(SBGEMM_UNROLL_M)_neoversen2.c
@@ -199,4 +213,5 @@ SBGEMMITCOPYOBJ = sbgemm_itcopy$(TSUFFIX).$(SUFFIX)
199213
SBGEMMONCOPYOBJ = sbgemm_oncopy$(TSUFFIX).$(SUFFIX)
200214
SBGEMMOTCOPYOBJ = sbgemm_otcopy$(TSUFFIX).$(SUFFIX)
201215
SBGEMVTKERNEL = sbgemv_t_bfdot.c
202-
SBGEMVNKERNEL = sbgemv_n_neon.c
216+
SBGEMVNKERNEL = sbgemv_n_neon.c
217+
endif

kernel/arm64/KERNEL.NEOVERSEV2

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1 @@
1-
include $(KERNELDIR)/KERNEL.ARMV8SVE
2-
3-
ifeq ($(BUILD_BFLOAT16), 1)
4-
SBGEMVTKERNEL = sbgemv_t_bfdot.c
5-
SBGEMVNKERNEL = sbgemv_n_neon.c
6-
endif
1+
include $(KERNELDIR)/KERNEL.NEOVERSEN2

kernel/arm64/sbgemm_kernel_8x4_neoversen2.c

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/***************************************************************************
2-
* Copyright (c) 2022, The OpenBLAS Project
2+
* Copyright (c) 2022,2025 The OpenBLAS Project
33
* All rights reserved.
44
* Redistribution and use in source and binary forms, with or without
55
* modification, are permitted provided that the following conditions are
@@ -33,13 +33,23 @@
3333
#define ALPHA_ONE
3434
#include "sbgemm_kernel_8x4_neoversen2_impl.c"
3535
#undef ALPHA_ONE
36+
#undef UPDATE_C
3637
#include "sbgemm_kernel_8x4_neoversen2_impl.c"
3738

3839
int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, IFLOAT *A, IFLOAT *B,
3940
FLOAT *C, BLASLONG ldc) {
40-
if (alpha == 1.0f)
41-
return sbgemm_kernel_neoversen2_alpha_one(m, n, k, alpha, A, B, C, ldc);
41+
#ifdef BGEMM
42+
bfloat16_t alpha_bf16;
43+
memcpy(&alpha_bf16, &alpha, sizeof(bfloat16_t));
44+
float alpha_f32 = vcvtah_f32_bf16(alpha_bf16);
45+
#else
46+
float alpha_f32 = alpha;
47+
#endif
48+
49+
if (alpha_f32 == 1.0f)
50+
return gemm_kernel_neoversen2_alpha_one(m, n, k, alpha, A, B, C, ldc);
4251
else
43-
return sbgemm_kernel_neoversen2_alpha(m, n, k, alpha, A, B, C, ldc);
52+
return gemm_kernel_neoversen2_alpha(m, n, k, alpha, A, B, C, ldc);
53+
4454
return 0;
4555
}

0 commit comments

Comments
 (0)