Skip to content

Commit 1b1e7cd

Browse files
fsx950223Doug Lehr
authored andcommitted
add sink arg
Signed-off-by: fsx950223 <[email protected]>
1 parent a07ff61 commit 1b1e7cd

File tree

1 file changed

+2
-4
lines changed

1 file changed

+2
-4
lines changed

vllm/v1/attention/backends/rocm_aiter_fa.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -289,10 +289,6 @@ def build(self,
289289
cu_seq_lens = None
290290
num_actual_kv_tokens = 0
291291

292-
def schedule(batch_size, cu_query_lens, max_query_len, seqlens,
293-
max_seq_len, causal):
294-
return None
295-
296292
use_cascade = common_prefix_len > 0
297293

298294
attn_metadata = AiterFlashAttentionMetadata(
@@ -385,6 +381,7 @@ def __init__(
385381
logits_soft_cap: Optional[float] = None,
386382
attn_type: AttentionType = AttentionType.DECODER,
387383
kv_sharing_target_layer_name: Optional[int] = None,
384+
sinks: Optional[torch.Tensor] = None,
388385
) -> None:
389386
self.num_heads = num_heads
390387
self.head_size = head_size
@@ -414,6 +411,7 @@ def __init__(
414411
"encoder/decoder cross-attention "
415412
"are not implemented for "
416413
"FlashAttentionImpl")
414+
self.sinks = sinks
417415

418416
def forward(
419417
self,

0 commit comments

Comments
 (0)