Skip to content

Commit 392d381

Browse files
authored
Merge pull request #5394 from Mousius/optimize-bgemv
Optimized BGEMV for NEOVERSEV1 target
2 parents f4caa61 + 2c3cdaf commit 392d381

File tree

7 files changed

+426
-42
lines changed

7 files changed

+426
-42
lines changed

Makefile.system

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,7 @@ endif
277277
ifeq ($(ARCH), arm64)
278278
GEMM_GEMV_FORWARD = 1
279279
SBGEMM_GEMV_FORWARD = 1
280+
BGEMM_GEMV_FORWARD = 1
280281
endif
281282
ifeq ($(ARCH), riscv)
282283
GEMM_GEMV_FORWARD = 1
@@ -296,6 +297,9 @@ endif
296297
ifeq ($(SBGEMM_GEMV_FORWARD), 1)
297298
CCOMMON_OPT += -DSBGEMM_GEMV_FORWARD
298299
endif
300+
ifeq ($(BGEMM_GEMV_FORWARD), 1)
301+
CCOMMON_OPT += -DBGEMM_GEMV_FORWARD
302+
endif
299303
endif
300304

301305
# This operation is expensive, so execution should be once.

benchmark/Makefile

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ GOTO_LAPACK_TARGETS=
8484
endif
8585

8686
ifeq ($(BUILD_BFLOAT16),1)
87-
GOTO_BFLOAT_TARGETS=bgemm.goto sbgemm.goto
87+
GOTO_BFLOAT_TARGETS=bgemm.goto sbgemm.goto bgemv.goto sbgemv.goto
8888
else
8989
GOTO_BFLOAT_TARGETS=
9090
endif
@@ -667,6 +667,10 @@ bgemm.goto : bgemm.$(SUFFIX) ../$(LIBNAME)
667667
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
668668
sbgemm.goto : sbgemm.$(SUFFIX) ../$(LIBNAME)
669669
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
670+
bgemv.goto : bgemv.$(SUFFIX) ../$(LIBNAME)
671+
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
672+
sbgemv.goto : sbgemv.$(SUFFIX) ../$(LIBNAME)
673+
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm
670674
endif
671675

672676
ifeq ($(BUILD_HFLOAT16),1)
@@ -3146,6 +3150,13 @@ dgemv.$(SUFFIX) : gemv.c
31463150
cgemv.$(SUFFIX) : gemv.c
31473151
$(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^
31483152

3153+
ifeq ($(BUILD_BFLOAT16),1)
3154+
bgemv.$(SUFFIX) : gemv.c
3155+
$(CC) $(CFLAGS) -c -DBFLOAT16 -DBGEMM -UCOMPLEX -UDOUBLE -o $(@F) $^
3156+
sbgemv.$(SUFFIX) : gemv.c
3157+
$(CC) $(CFLAGS) -c -DBFLOAT16 -UCOMPLEX -UDOUBLE -o $(@F) $^
3158+
endif ()
3159+
31493160
zgemv.$(SUFFIX) : gemv.c
31503161
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^
31513162

benchmark/gemv.c

Lines changed: 25 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/***************************************************************************
2-
Copyright (c) 2014, The OpenBLAS Project
2+
Copyright (c) 2014, 2025 The OpenBLAS Project
33
All rights reserved.
44
Redistribution and use in source and binary forms, with or without
55
modification, are permitted provided that the following conditions are
@@ -34,6 +34,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
3434

3535
#ifdef DOUBLE
3636
#define GEMV BLASFUNC(dgemv)
37+
#elif defined(BFLOAT16) && defined(BGEMM)
38+
#define GEMV BLASFUNC(bgemv)
39+
#elif defined(BFLOAT16)
40+
#define GEMV BLASFUNC(sbgemv)
41+
#undef IFLOAT
42+
#define IFLOAT bfloat16
3743
#else
3844
#define GEMV BLASFUNC(sgemv)
3945
#endif
@@ -49,9 +55,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
4955
#endif
5056
int main(int argc, char *argv[]){
5157

52-
FLOAT *a, *x, *y;
53-
FLOAT alpha[] = {1.0, 1.0};
54-
FLOAT beta [] = {1.0, 0.0};
58+
IFLOAT *a, *x;
59+
FLOAT *y;
60+
#ifdef BGEMM
61+
blasint one=1;
62+
blasint two=2;
63+
float alpha_in[] = {1.0, 0.0};
64+
float beta_in[] = {0.0, 0.0};
65+
FLOAT alpha[2], beta[2];
66+
sbstobf16_(&two, alpha_in, &one, alpha, &one);
67+
sbstobf16_(&two, beta_in, &one, beta, &one);
68+
#else
69+
FLOAT alpha[] = {1.0, 0.0};
70+
FLOAT beta [] = {0.0, 0.0};
71+
#endif
5572
char trans='N';
5673
blasint m, i, j;
5774
blasint inc_x=1,inc_y=1;
@@ -97,11 +114,11 @@ int main(int argc, char *argv[]){
97114

98115
fprintf(stderr, "From : %3d To : %3d Step = %3d Trans = '%c' Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,trans,inc_x,inc_y,loops);
99116

100-
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * tomax * tomax * COMPSIZE)) == NULL){
117+
if (( a = (IFLOAT *)malloc(sizeof(IFLOAT) * tomax * tomax * COMPSIZE)) == NULL){
101118
fprintf(stderr,"Out of Memory!!\n");exit(1);
102119
}
103120

104-
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * tomax * abs(inc_x) * COMPSIZE)) == NULL){
121+
if (( x = (IFLOAT *)malloc(sizeof(IFLOAT) * tomax * abs(inc_x) * COMPSIZE)) == NULL){
105122
fprintf(stderr,"Out of Memory!!\n");exit(1);
106123
}
107124

@@ -125,15 +142,15 @@ int main(int argc, char *argv[]){
125142
fprintf(stderr, " %6dx%d : ", (int)m,(int)n);
126143
for(j = 0; j < m; j++){
127144
for(i = 0; i < n * COMPSIZE; i++){
128-
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
145+
a[(long)i + (long)j * (long)m * COMPSIZE] = ((IFLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
129146
}
130147
}
131148

132149
for (l=0; l<loops; l++)
133150
{
134151

135152
for(i = 0; i < n * COMPSIZE * abs(inc_x); i++){
136-
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
153+
x[i] = ((IFLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
137154
}
138155

139156
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){

kernel/arm64/KERNEL.NEOVERSEV1

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,9 @@ BGEMMOTCOPY = sbgemm_tcopy_$(BGEMM_UNROLL_N)_neoversev1.c
4646
BGEMMONCOPYOBJ = bgemm_oncopy$(TSUFFIX).$(SUFFIX)
4747
BGEMMOTCOPYOBJ = bgemm_otcopy$(TSUFFIX).$(SUFFIX)
4848

49+
BGEMVTKERNEL = sbgemv_t_bfdot.c
50+
BGEMVNKERNEL = bgemv_n_sve_v3x4.c
51+
4952
SBGEMM_BETA = sbgemm_beta_neoversev1.c
5053
SBGEMMKERNEL = sbgemm_kernel_$(SBGEMM_UNROLL_M)x$(SBGEMM_UNROLL_N)_neoversev1.c
5154
ifneq ($(SBGEMM_UNROLL_M), $(SBGEMM_UNROLL_N))

0 commit comments

Comments
 (0)