Skip to content

Commit 710dfc4

Browse files
CUDA: fix half2 -> half conversion for HIP (ggml-org#15529)
1 parent 611f419 commit 710dfc4

File tree

1 file changed

+1
-1
lines changed

1 file changed

+1
-1
lines changed

ggml/src/ggml-cuda/fattn-tile-f16.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,7 @@ static __global__ void flash_attn_tile_ext_f16(
258258
const half val = hexp(sink - kqmax[j0/nwarps]);
259259
kqsum[j0/nwarps] = kqsum[j0/nwarps] * KQ_max_scale;
260260
if (threadIdx.x == 0) {
261-
kqsum[j0/nwarps].x = __hadd(kqsum[j0/nwarps].x, val);
261+
kqsum[j0/nwarps].x = __hadd(__low2half(kqsum[j0/nwarps]), val);
262262
}
263263

264264
#pragma unroll

0 commit comments

Comments
 (0)