Skip to content

Commit 4134312

Browse files
[BugFix] ChunkedLocalAttention is currently not CG compatible (vllm-project#26034)
Signed-off-by: Lucas Wilkinson <[email protected]>
1 parent da554f9 commit 4134312

File tree

1 file changed

+5
-3
lines changed

1 file changed

+5
-3
lines changed

vllm/attention/layers/chunked_local_attention.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
# SPDX-License-Identifier: Apache-2.0
22
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
33
import functools
4-
from typing import List, Optional
4+
from typing import ClassVar, List, Optional
55

66
import torch
77

@@ -12,8 +12,8 @@
1212
from vllm.config import CacheConfig
1313
from vllm.model_executor.layers.quantization import QuantizationConfig
1414
from vllm.v1.attention.backends.utils import (
15-
CommonAttentionMetadata, make_local_attention_virtual_batches,
16-
subclass_attention_backend)
15+
AttentionCGSupport, CommonAttentionMetadata,
16+
make_local_attention_virtual_batches, subclass_attention_backend)
1717

1818
from ..layer import Attention
1919

@@ -29,6 +29,8 @@ def create_chunked_local_attention_backend(
2929
underlying_builder = underlying_attn_backend.get_builder_cls()
3030

3131
class ChunkedLocalAttentionBuilder(underlying_builder): # type: ignore
32+
cudagraph_support: ClassVar[AttentionCGSupport] = \
33+
AttentionCGSupport.NEVER
3234

3335
def build(self,
3436
common_prefix_len: int,

0 commit comments

Comments
 (0)