fixed inccorect padding for flash attn with swa

LostRuins · LostRuins · commit 989f9e6b98b4 · 2025-06-30T20:32:14.000+08:00
diff --git a/src/llama-kv-cache-unified-iswa.cpp b/src/llama-kv-cache-unified-iswa.cpp
@@ -31,6 +31,7 @@ llama_kv_cache_unified_iswa::llama_kv_cache_unified_iswa(
 
     //kcpp: pad the swa kv cache as well, similar to extra_context_handle_fragmentation
     size_swa += 32;
+    size_swa = GGML_PAD(size_swa, n_pad);
 
     // when using full-size SWA cache, we set the SWA cache size to be equal to the base cache size
     if (swa_full) {