Skip to content

Commit be5f499

Browse files
committed
add
1 parent 1eeec1c commit be5f499

File tree

1 file changed

+2
-0
lines changed

1 file changed

+2
-0
lines changed

ggml/src/ggml-cuda/fattn.cu

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,7 @@ static void ggml_cuda_flash_attn_ext_vec_f16(ggml_backend_cuda_context & ctx, gg
163163

164164
FATTN_VEC_F16_CASE( 64, GGML_TYPE_F16, GGML_TYPE_F16)
165165
FATTN_VEC_F16_CASE(128, GGML_TYPE_F16, GGML_TYPE_F16)
166+
FATTN_VEC_F16_CASE(192, GGML_TYPE_F16, GGML_TYPE_F16)
166167
FATTN_VEC_F16_CASE(256, GGML_TYPE_F16, GGML_TYPE_F16)
167168
#endif // GGML_CUDA_FA_ALL_QUANTS
168169

@@ -238,6 +239,7 @@ static void ggml_cuda_flash_attn_ext_vec_f32(ggml_backend_cuda_context & ctx, gg
238239

239240
FATTN_VEC_F32_CASE( 64, GGML_TYPE_F16, GGML_TYPE_F16)
240241
FATTN_VEC_F32_CASE(128, GGML_TYPE_F16, GGML_TYPE_F16)
242+
FATTN_VEC_F32_CASE(192, GGML_TYPE_F16, GGML_TYPE_F16)
241243
FATTN_VEC_F32_CASE(256, GGML_TYPE_F16, GGML_TYPE_F16)
242244
#endif // GGML_CUDA_FA_ALL_QUANTS
243245

0 commit comments

Comments
 (0)