Skip to content

Commit 6c9ebf4

Browse files
committed
llamafile: fix fp32 miscalculation when activating fp16
Signed-off-by: Aaron Teo <[email protected]>
1 parent 77ad802 commit 6c9ebf4

File tree

1 file changed

+5
-3
lines changed

1 file changed

+5
-3
lines changed

ggml/src/ggml-cpu/llamafile/sgemm.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3458,10 +3458,12 @@ bool llamafile_sgemm(const struct ggml_compute_params * params, int64_t m, int64
34583458
return tb.matmul(m, n);
34593459
}
34603460
#elif defined(__VXE__) || defined(__VXE2__)
3461-
if (Btype == GGML_TYPE_F32) {
3462-
tinyBLAS<4, float32x4_t, float32x4_t, ggml_fp16_t, float, float> tb{ params,
3461+
if (n < 4)
3462+
return false;
3463+
if (Btype == GGML_TYPE_FP16) {
3464+
tinyBLAS<4, float32x4_t, float32x4_t, ggml_fp16_t, ggml_fp16_t, float> tb{ params,
34633465
k, (const ggml_fp16_t *)A, lda,
3464-
(const float *)B, ldb,
3466+
(const ggml_fp16_t *)B, ldb,
34653467
(float *)C, ldc};
34663468
return tb.matmul(m, n);
34673469
}

0 commit comments

Comments
 (0)