File tree Expand file tree Collapse file tree 1 file changed +5
-3
lines changed Expand file tree Collapse file tree 1 file changed +5
-3
lines changed Original file line number Diff line number Diff line change 1
1
# SPDX-License-Identifier: Apache-2.0
2
2
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
3
3
import functools
4
- from typing import List , Optional
4
+ from typing import ClassVar , List , Optional
5
5
6
6
import torch
7
7
12
12
from vllm .config import CacheConfig
13
13
from vllm .model_executor .layers .quantization import QuantizationConfig
14
14
from vllm .v1 .attention .backends .utils import (
15
- CommonAttentionMetadata , make_local_attention_virtual_batches ,
16
- subclass_attention_backend )
15
+ AttentionCGSupport , CommonAttentionMetadata ,
16
+ make_local_attention_virtual_batches , subclass_attention_backend )
17
17
18
18
from ..layer import Attention
19
19
@@ -29,6 +29,8 @@ def create_chunked_local_attention_backend(
29
29
underlying_builder = underlying_attn_backend .get_builder_cls ()
30
30
31
31
class ChunkedLocalAttentionBuilder (underlying_builder ): # type: ignore
32
+ cudagraph_support : ClassVar [AttentionCGSupport ] = \
33
+ AttentionCGSupport .NEVER
32
34
33
35
def build (self ,
34
36
common_prefix_len : int ,
You can’t perform that action at this time.
0 commit comments