File tree Expand file tree Collapse file tree 1 file changed +0
-26
lines changed Expand file tree Collapse file tree 1 file changed +0
-26
lines changed Original file line number Diff line number Diff line change @@ -53,32 +53,6 @@ class AscendCommonAttentionMetadata:
53
53
graph_pad_size : int = - 1
54
54
55
55
56
- @dataclass
57
- class TorchairCommonAttentionMetadata :
58
- """
59
- Per-batch attention metadata, shared across layers and backends.
60
- AttentionMetadataBuilder instances use it to construct per-layer metadata.
61
-
62
- For many of the tensors we keep both GPU and CPU versions.
63
- """
64
-
65
- num_reqs : int
66
- """Number of requests"""
67
-
68
- num_actual_tokens : int
69
- """Total number of tokens in batch"""
70
-
71
- decode_token_per_req : int
72
-
73
- actual_seq_lengths_q : list [int ]
74
-
75
- attn_mask : torch .Tensor = None
76
-
77
- spec_attn_mask : torch .Tensor = None
78
-
79
- graph_pad_size : int = - 1
80
-
81
-
82
56
def split_decodes_and_prefills (
83
57
common_attn_metadata : AscendCommonAttentionMetadata ,
84
58
decode_threshold : int = 1 ,
You can’t perform that action at this time.
0 commit comments