Skip to content

Commit e604b63

Browse files
committed
use fma instead of dot to fix Nvidia and Apple performance issues
1 parent be5a0a8 commit e604b63

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

ggml/src/ggml-vulkan/vulkan-shaders/mul_mm.comp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -357,7 +357,7 @@ void main() {
357357
[[unroll]] for (uint cc = 0; cc < TN; cc++) {
358358
[[unroll]] for (uint cr = 0; cr < TM; cr++) {
359359
const uint sums_idx = (wsic * TN + cc) * (WMITER * TM) + wsir * TM + cr;
360-
sums[sums_idx] += dot(ACC_TYPE_VEC2(cache_a[wsir * TM + cr]), ACC_TYPE_VEC2(cache_b[cc]));
360+
sums[sums_idx] = fma(ACC_TYPE(cache_a[wsir * TM + cr].x), ACC_TYPE(cache_b[cc].x), fma(ACC_TYPE(cache_a[wsir * TM + cr].y), ACC_TYPE(cache_b[cc].y), sums[sums_idx]));
361361
}
362362
}
363363
}

0 commit comments

Comments
 (0)