We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 0c43220 commit 0ad18c3Copy full SHA for 0ad18c3
vllm_ascend/attention/utils.py
@@ -1,5 +1,5 @@
1
from dataclasses import dataclass
2
-from typing import Any, Optional
+from typing import Any
3
4
import torch
5
@@ -36,7 +36,7 @@ class AscendCommonAttentionMetadata:
36
37
slot_mapping_cpu: torch.Tensor
38
39
- actual_seq_lengths_q: Optional[list[int]]
+ actual_seq_lengths_q: list[int]
40
41
positions: torch.Tensor = None
42
@@ -70,7 +70,7 @@ class TorchairCommonAttentionMetadata:
70
71
decode_token_per_req: int
72
73
- actual_seq_lengths_q: Optional[list[int]] = None
74
75
attn_mask: torch.Tensor = None
76
0 commit comments