Skip to content

Commit 0629437

Browse files
committed
cuda : add TODO about KV padding requirement
1 parent 6669297 commit 0629437

File tree

1 file changed

+6
-0
lines changed

1 file changed

+6
-0
lines changed

ggml/src/ggml-cuda/fattn.cu

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,12 @@ static best_fattn_kernel ggml_cuda_get_best_fattn_kernel(const int device, const
208208

209209
const int cc = ggml_cuda_info().devices[device].cc;
210210

211+
// TODO: temporary until support is extended
212+
// https://github.com/ggml-org/llama.cpp/pull/16148#issuecomment-3343525206
213+
if (K->ne[1] % FATTN_KQ_STRIDE != 0) {
214+
return BEST_FATTN_KERNEL_NONE;
215+
}
216+
211217
switch (K->ne[0]) {
212218
case 64:
213219
case 128:

0 commit comments

Comments
 (0)