Skip to content
5 changes: 2 additions & 3 deletions vllm_ascend/attention/attention_v1.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@

from dataclasses import dataclass
from enum import Enum
from typing import ClassVar

import torch
import torch_npu
Expand Down Expand Up @@ -213,7 +212,7 @@ class AscendAttentionMetadataBuilder(AttentionMetadataBuilder[AscendMetadata]):
# Does this backend/builder reorder the batch?
# If not, set this to None. Otherwise set it to the query
# length that will be pulled into the front of the batch.
reorder_batch_threshold: ClassVar[int] = 1
reorder_batch_threshold: int = 1

def __init__(
self,
Expand Down Expand Up @@ -242,7 +241,7 @@ def __init__(
got {self.decode_threshold}"
)

AscendAttentionMetadataBuilder.reorder_batch_threshold = self.decode_threshold
self.reorder_batch_threshold = self.decode_threshold

scheduler_config = vllm_config.scheduler_config
self.chunked_prefill_enabled = scheduler_config.enable_chunked_prefill
Expand Down
7 changes: 0 additions & 7 deletions vllm_ascend/attention/context_parallel/attention_cp.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
# This file is a part of the vllm-ascend project.
#

from typing import ClassVar

import numpy as np
import torch
import torch.distributed as dist
Expand Down Expand Up @@ -61,11 +59,6 @@ class AscendAttentionCPMetadataBuilder(AscendAttentionMetadataBuilder):
Extends AscendAttentionMetadataBuilder with PCP/DCP metadata handling.
"""

# Does this backend/builder reorder the batch?
# If not, set this to None. Otherwise set it to the query
# length that will be pulled into the front of the batch.
reorder_batch_threshold: ClassVar[int] = 1

def __init__(
self,
kv_cache_spec: AttentionSpec,
Expand Down
Loading