Skip to content

Commit 39fd29f

Browse files
author
Chip Kerchner
committed
Minor improvement and turn off BF16 GEMV forwarding by default.
1 parent 8541b25 commit 39fd29f

File tree

3 files changed

+18
-18
lines changed

3 files changed

+18
-18
lines changed

Makefile.system

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -282,7 +282,6 @@ GEMM_GEMV_FORWARD = 1
282282
endif
283283
ifeq ($(ARCH), power)
284284
GEMM_GEMV_FORWARD = 1
285-
GEMM_GEMV_FORWARD_BF16 = 1
286285
endif
287286

288287
ifeq ($(SMALL_MATRIX_OPT), 1)

kernel/power/sbgemv_n.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,10 @@ static void BF16GEMV_N_beta(BLASLONG n, FLOAT *output_vector, FLOAT *input_vecto
3131
{
3232
if (beta == 0) {
3333
memset(output_vector, 0, sizeof(FLOAT) * n);
34-
} else if ((output_vector != input_vector) && (beta == 1)) {
35-
memcpy(output_vector, input_vector, sizeof(FLOAT) * n);
34+
} else if (beta == 1) {
35+
if (output_vector != input_vector) {
36+
memcpy(output_vector, input_vector, sizeof(FLOAT) * n);
37+
}
3638
} else {
3739
vec_f32 b = { beta, beta, beta, beta };
3840

test/compare_sgemm_sbgemm.c

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -205,15 +205,14 @@ main (int argc, char *argv[])
205205
for (l = 0; l < 2; l++) { // l = 1 to test inc_x & inc_y not equal to one.
206206
for (x = 1; x <= loop; x++)
207207
{
208-
m = l + 1;
209-
k = (x == 0) ? 0 : m;
208+
k = (x == 0) ? 0 : l + 1;
210209
float *A = (float *)malloc_safe(x * x * sizeof(FLOAT));
211-
float *B = (float *)malloc_safe(x * sizeof(FLOAT) * m);
212-
float *C = (float *)malloc_safe(x * sizeof(FLOAT) * m);
210+
float *B = (float *)malloc_safe(x * sizeof(FLOAT) << l);
211+
float *C = (float *)malloc_safe(x * sizeof(FLOAT) << l);
213212
bfloat16_bits *AA = (bfloat16_bits *)malloc_safe(x * x * sizeof(bfloat16_bits));
214-
bfloat16_bits *BB = (bfloat16_bits *)malloc_safe(x * sizeof(bfloat16_bits) * m);
213+
bfloat16_bits *BB = (bfloat16_bits *)malloc_safe(x * sizeof(bfloat16_bits) << l);
215214
float *DD = (float *)malloc_safe(x * sizeof(FLOAT));
216-
float *CC = (float *)malloc_safe(x * sizeof(FLOAT) * m);
215+
float *CC = (float *)malloc_safe(x * sizeof(FLOAT) << l);
217216
if ((A == NULL) || (B == NULL) || (C == NULL) || (AA == NULL) || (BB == NULL) ||
218217
(DD == NULL) || (CC == NULL))
219218
return 1;
@@ -228,9 +227,9 @@ main (int argc, char *argv[])
228227
sbstobf16_(&one, &A[j*x+i], &one, &atmp, &one);
229228
AA[j * x + i].v = atmp;
230229
}
231-
B[j*m] = ((FLOAT) rand () / (FLOAT) RAND_MAX) + 0.5;
232-
sbstobf16_(&one, &B[j*m], &one, &btmp, &one);
233-
BB[j*m].v = btmp;
230+
B[j << l] = ((FLOAT) rand () / (FLOAT) RAND_MAX) + 0.5;
231+
sbstobf16_(&one, &B[j << l], &one, &btmp, &one);
232+
BB[j << l].v = btmp;
234233
}
235234
for (y = 0; y < 2; y++)
236235
{
@@ -240,25 +239,25 @@ main (int argc, char *argv[])
240239
transA = 'T';
241240
}
242241

243-
memset(CC, 0, x * m * sizeof(FLOAT));
242+
memset(CC, 0, x * sizeof(FLOAT) << l);
244243
memset(DD, 0, x * sizeof(FLOAT));
245-
memset(C, 0, x * m * sizeof(FLOAT));
244+
memset(C, 0, x * sizeof(FLOAT) << l);
246245

247246
SGEMV (&transA, &x, &x, &alpha, A, &x, B, &k, &beta, C, &k);
248247
SBGEMV (&transA, &x, &x, &alpha, (bfloat16*) AA, &x, (bfloat16*) BB, &k, &beta, CC, &k);
249248

250249
for (j = 0; j < x; j++)
251250
for (i = 0; i < x; i++)
252251
if (transA == 'N') {
253-
DD[i] += float16to32 (AA[j * x + i]) * float16to32 (BB[j*m]);
252+
DD[i] += float16to32 (AA[j * x + i]) * float16to32 (BB[j << l]);
254253
} else if (transA == 'T') {
255-
DD[j] += float16to32 (AA[j * x + i]) * float16to32 (BB[i*m]);
254+
DD[j] += float16to32 (AA[j * x + i]) * float16to32 (BB[i << l]);
256255
}
257256

258257
for (j = 0; j < x; j++) {
259-
if (fabs (CC[j*m] - C[j*m]) > 1.0)
258+
if (fabs (CC[j << l] - C[j << l]) > 1.0)
260259
ret++;
261-
if (fabs (CC[j*m] - DD[j]) > 1.0)
260+
if (fabs (CC[j << l] - DD[j]) > 1.0)
262261
ret++;
263262
}
264263
}

0 commit comments

Comments
 (0)