@@ -1237,10 +1237,12 @@ static __device__ __forceinline__ void flash_attn_ext_f16_process_tile(
12371237 }
12381238#else
12391239 GGML_UNUSED (Q_f2); GGML_UNUSED (K_h2); GGML_UNUSED (V_h2);
1240- GGML_UNUSED (mask_h2); GGML_UNUSED (dstk); GGML_UNUSED (dstk_fixup);
1240+ GGML_UNUSED (mask_h2); GGML_UNUSED (sinks_f);
1241+ GGML_UNUSED (dstk); GGML_UNUSED (dstk_fixup);
12411242 GGML_UNUSED (scale); GGML_UNUSED (slope); GGML_UNUSED (logit_softcap);
1242- GGML_UNUSED (ne01); GGML_UNUSED (ne02); GGML_UNUSED (stride_Q1);
1243- GGML_UNUSED (stride_Q2); GGML_UNUSED (stride_K); GGML_UNUSED (stride_V); GGML_UNUSED (stride_mask);
1243+ GGML_UNUSED (ne01); GGML_UNUSED (ne02);
1244+ GGML_UNUSED (stride_Q1); GGML_UNUSED (stride_Q2);
1245+ GGML_UNUSED (stride_K); GGML_UNUSED (stride_V); GGML_UNUSED (stride_mask);
12441246 GGML_UNUSED (jt); GGML_UNUSED (kb0_start); GGML_UNUSED (kb0_stop);
12451247 NO_DEVICE_CODE;
12461248#endif // TURING_MMA_AVAILABLE
@@ -1395,8 +1397,8 @@ static __global__ void flash_attn_ext_f16(
13951397 (Q_f2, K_h2, V_h2, mask_h2, sinks_f, dstk, dst_meta, scale, slope, logit_softcap,
13961398 ne01, ne02, stride_Q1, stride_Q2, stride_K, stride_V, stride_mask, jt, kb0_start_kernel, kb0_stop_kernel);
13971399#else
1398- GGML_UNUSED (Q); GGML_UNUSED (K); GGML_UNUSED (V); GGML_UNUSED (mask); GGML_UNUSED (sinks);
1399- GGML_UNUSED (dst); GGML_UNUSED (dst_meta);
1400+ GGML_UNUSED (Q); GGML_UNUSED (K); GGML_UNUSED (V); GGML_UNUSED (mask);
1401+ GGML_UNUSED (sinks); GGML_UNUSED (KV_max); GGML_UNUSED ( dst); GGML_UNUSED (dst_meta);
14001402 GGML_UNUSED (scale); GGML_UNUSED (max_bias); GGML_UNUSED (m0); GGML_UNUSED (m1);
14011403 GGML_UNUSED (n_head_log2); GGML_UNUSED (logit_softcap);
14021404 GGML_UNUSED (ne00); GGML_UNUSED (ne01); GGML_UNUSED (ne02); GGML_UNUSED (ne03);
0 commit comments