add sink arg

fsx950223 · Doug Lehr · commit 1b1e7cdf4a0b · 2025-08-20T21:52:40.000-05:00
Signed-off-by: fsx950223 &lt;fsx950223@outlook.com&gt;
diff --git a/vllm/v1/attention/backends/rocm_aiter_fa.py b/vllm/v1/attention/backends/rocm_aiter_fa.py
@@ -289,10 +289,6 @@ def build(self,
             cu_seq_lens = None
             num_actual_kv_tokens = 0
 
-        def schedule(batch_size, cu_query_lens, max_query_len, seqlens,
-                     max_seq_len, causal):
-            return None
-
         use_cascade = common_prefix_len > 0
 
         attn_metadata = AiterFlashAttentionMetadata(
@@ -385,6 +381,7 @@ def __init__(
         logits_soft_cap: Optional[float] = None,
         attn_type: AttentionType = AttentionType.DECODER,
         kv_sharing_target_layer_name: Optional[int] = None,
+        sinks: Optional[torch.Tensor] = None,
     ) -> None:
         self.num_heads = num_heads
         self.head_size = head_size
@@ -414,6 +411,7 @@ def __init__(
                                       "encoder/decoder cross-attention "
                                       "are not implemented for "
                                       "FlashAttentionImpl")
+        self.sinks = sinks
 
     def forward(
         self,