Skip to content

Commit 5e43ba9

Browse files
authored
Merge pull request #5419 from Mousius/bgemm-optimisation
Optimize SBGEMM / BGEMM for NEOVERSEV1 further
2 parents 75c6ab4 + 5f47b87 commit 5e43ba9

13 files changed

+1017
-1417
lines changed

kernel/arm64/KERNEL.NEOVERSEV1

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -34,31 +34,31 @@ SGEMVTKERNEL = gemv_t_sve_v1x3.c
3434
DGEMVTKERNEL = gemv_t_sve_v1x3.c
3535
ifeq ($(BUILD_BFLOAT16), 1)
3636
BGEMM_BETA = bgemm_beta_neon.c
37-
BGEMMKERNEL = bgemm_kernel_$(BGEMM_UNROLL_M)x$(BGEMM_UNROLL_N)_neoversev1.c
37+
BGEMMKERNEL = bgemm_kernel_2vlx4_neoversev1.c
3838
ifneq ($(BGEMM_UNROLL_M), $(BGEMM_UNROLL_N))
39-
BGEMMINCOPY = sbgemm_ncopy_$(SBGEMM_UNROLL_M)_neoversev1.c
40-
BGEMMITCOPY = sbgemm_tcopy_$(SBGEMM_UNROLL_M)_neoversev1.c
39+
BGEMMINCOPY = bgemm_ncopy_2vl_neoversev1.c
40+
BGEMMITCOPY = bgemm_tcopy_2vl_neoversev1.c
4141
BGEMMINCOPYOBJ = bgemm_incopy$(TSUFFIX).$(SUFFIX)
4242
BGEMMITCOPYOBJ = bgemm_itcopy$(TSUFFIX).$(SUFFIX)
4343
endif
44-
BGEMMONCOPY = sbgemm_ncopy_$(BGEMM_UNROLL_N)_neoversev1.c
45-
BGEMMOTCOPY = sbgemm_tcopy_$(BGEMM_UNROLL_N)_neoversev1.c
44+
BGEMMONCOPY = bgemm_ncopy_4_neoversev1.c
45+
BGEMMOTCOPY = bgemm_tcopy_4_neoversev1.c
4646
BGEMMONCOPYOBJ = bgemm_oncopy$(TSUFFIX).$(SUFFIX)
4747
BGEMMOTCOPYOBJ = bgemm_otcopy$(TSUFFIX).$(SUFFIX)
4848

4949
BGEMVTKERNEL = sbgemv_t_bfdot.c
5050
BGEMVNKERNEL = bgemv_n_sve_v3x4.c
5151

5252
SBGEMM_BETA = sbgemm_beta_neoversev1.c
53-
SBGEMMKERNEL = sbgemm_kernel_$(SBGEMM_UNROLL_M)x$(SBGEMM_UNROLL_N)_neoversev1.c
53+
SBGEMMKERNEL = bgemm_kernel_2vlx4_neoversev1.c
5454
ifneq ($(SBGEMM_UNROLL_M), $(SBGEMM_UNROLL_N))
55-
SBGEMMINCOPY = sbgemm_ncopy_$(SBGEMM_UNROLL_M)_neoversev1.c
56-
SBGEMMITCOPY = sbgemm_tcopy_$(SBGEMM_UNROLL_M)_neoversev1.c
55+
SBGEMMINCOPY = bgemm_ncopy_2vl_neoversev1.c
56+
SBGEMMITCOPY = bgemm_tcopy_2vl_neoversev1.c
5757
SBGEMMINCOPYOBJ = sbgemm_incopy$(TSUFFIX).$(SUFFIX)
5858
SBGEMMITCOPYOBJ = sbgemm_itcopy$(TSUFFIX).$(SUFFIX)
5959
endif
60-
SBGEMMONCOPY = sbgemm_ncopy_$(SBGEMM_UNROLL_N)_neoversev1.c
61-
SBGEMMOTCOPY = sbgemm_tcopy_$(SBGEMM_UNROLL_N)_neoversev1.c
60+
SBGEMMONCOPY = bgemm_ncopy_4_neoversev1.c
61+
SBGEMMOTCOPY = bgemm_tcopy_4_neoversev1.c
6262
SBGEMMONCOPYOBJ = sbgemm_oncopy$(TSUFFIX).$(SUFFIX)
6363
SBGEMMOTCOPYOBJ = sbgemm_otcopy$(TSUFFIX).$(SUFFIX)
6464

kernel/arm64/bgemm_kernel_4x4_neoversev1.c renamed to kernel/arm64/bgemm_kernel_2vlx4_neoversev1.c

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,20 +32,26 @@
3232
#include "common.h"
3333

3434
#define ALPHA_ONE
35-
#include "bgemm_kernel_4x4_neoversev1_impl.c"
35+
#include "bgemm_kernel_2vlx4_neoversev1_impl.c"
3636
#undef ALPHA_ONE
3737
#undef UPDATE_C
38-
#include "bgemm_kernel_4x4_neoversev1_impl.c"
38+
#undef UPDATE_C2
39+
#undef UPDATE_C1
40+
#include "bgemm_kernel_2vlx4_neoversev1_impl.c"
3941

4042
int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha, IFLOAT *A, IFLOAT *B,
4143
FLOAT *C, BLASLONG ldc) {
44+
#ifdef BGEMM
4245
bfloat16_t alpha_bf16;
4346
memcpy(&alpha_bf16, &alpha, sizeof(bfloat16_t));
4447
float alpha_f32 = vcvtah_f32_bf16(alpha_bf16);
48+
#else
49+
float alpha_f32 = alpha;
50+
#endif
4551

4652
if (alpha_f32 == 1.0f)
47-
return bgemm_kernel_neoversev1_alpha_one(m, n, k, alpha, A, B, C, ldc);
53+
return bgemm_kernel_neoversev1_alpha_one(m, n, k, alpha_f32, A, B, C, ldc);
4854
else
49-
return bgemm_kernel_neoversev1_alpha(m, n, k, alpha, A, B, C, ldc);
55+
return bgemm_kernel_neoversev1_alpha(m, n, k, alpha_f32, A, B, C, ldc);
5056
return 0;
5157
}

0 commit comments

Comments
 (0)