cont : simplify SWA mask condition

ggerganov · ggerganov · commit acfbbc75af5a · 2025-05-11T14:04:10.000+03:00
ggml-ci
diff --git a/src/llama-hparams.h b/src/llama-hparams.h
@@ -102,7 +102,7 @@ struct llama_hparams {
     // Sliding Window Attention (SWA)
     llama_swa_type swa_type = LLAMA_SWA_TYPE_STANDARD;
 
-    uint32_t n_swa = 0;         // sliding window attention (SWA)
+    uint32_t n_swa = 0;         // the size of the sliding window (0 - no SWA)
     uint32_t n_swa_pattern = 1; // by default, all layers use non-sliding-window attention
 
     // for State Space Models
diff --git a/src/llama-kv-cache.cpp b/src/llama-kv-cache.cpp
@@ -687,10 +687,7 @@ void llama_kv_cache_unified::set_input_kq_mask(ggml_tensor * dst, const llama_ub
                                 {
                                     const llama_pos pos_chunk_start = (pos / hparams.n_swa) * hparams.n_swa;
 
-                                    // TODO: should this be simply:
-                                    //  if (cells[i].pos < pos_chunk_start) {
-                                    //
-                                    if (cells[i].pos < pos_chunk_start || pos < pos_chunk_start) {
+                                    if (cells[i].pos < pos_chunk_start) {
                                         f = -INFINITY;
                                     }
                                 } break;

Original file line number	Diff line number	Diff line change
`@@ -687,10 +687,7 @@ void llama_kv_cache_unified::set_input_kq_mask(ggml_tensor * dst, const llama_ub`
`687`	`687`	`{`
`688`	`688`	`const llama_pos pos_chunk_start = (pos / hparams.n_swa) * hparams.n_swa;`
`689`	`689`
`690`		`- // TODO: should this be simply:`
`691`		`- // if (cells[i].pos < pos_chunk_start) {`
`692`		`- //`
`693`		`- if (cells[i].pos < pos_chunk_start \|\| pos < pos_chunk_start) {`
	`690`	`+ if (cells[i].pos < pos_chunk_start) {`
`694`	`691`	`f = -INFINITY;`
`695`	`692`	`}`
`696`	`693`	`} break;`