|
20 | 20 |
|
21 | 21 | from vllm.config import SchedulerConfig |
22 | 22 |
|
23 | | -MAX_INT = 2147483647 |
24 | 23 |
|
25 | 24 |
|
26 | 25 | @dataclass |
27 | 26 | class AscendSchedulerConfig(SchedulerConfig): |
28 | 27 | enable_chunked_prefill: bool = False |
29 | | - max_long_partial_prefill_tokens: int = MAX_INT |
30 | | - ascend_long_prefill_token_threshold: int = MAX_INT |
| 28 | + max_long_partial_prefills: int = 1 |
| 29 | + long_prefill_token_threshold: int = 0 |
31 | 30 | policy: str = "fcfs" |
32 | 31 | scheduler_cls: Union[str, Type[object]] = ( |
33 | 32 | "vllm_ascend.core.scheduler.AscendScheduler") |
@@ -71,27 +70,27 @@ def __post_init__(self) -> None: |
71 | 70 | "max_num_batched_tokens and makes vLLM reject longer " |
72 | 71 | "sequences. Please increase max_num_batched_tokens or " |
73 | 72 | "decrease max_model_len.") |
74 | | - # concurrent partial prefills. Default is inf |
75 | | - if self.max_long_partial_prefill_tokens is None: |
76 | | - self.max_long_partial_prefill_tokens = MAX_INT |
77 | | - self.ascend_long_prefill_token_threshold = MAX_INT |
| 73 | + # concurrent partial prefills. Default is 1 meaning not enabled. |
| 74 | + if self.max_long_partial_prefills is None: |
| 75 | + self.max_long_partial_prefills = 1 |
| 76 | + self.long_prefill_token_threshold = 0 |
78 | 77 |
|
79 | | - if self.ascend_long_prefill_token_threshold is None or \ |
80 | | - self.ascend_long_prefill_token_threshold <= 0: |
| 78 | + if self.long_prefill_token_threshold is None or \ |
| 79 | + self.long_prefill_token_threshold <= 0: |
81 | 80 | if self.max_model_len is None: |
82 | | - self.ascend_long_prefill_token_threshold = MAX_INT |
| 81 | + self.long_prefill_token_threshold = 0 |
83 | 82 | else: |
84 | | - self.ascend_long_prefill_token_threshold = \ |
| 83 | + self.long_prefill_token_threshold = \ |
85 | 84 | max(1, int(self.max_model_len * 0.04)) |
86 | 85 |
|
87 | | - if self.max_long_partial_prefill_tokens < 0: |
| 86 | + if self.max_long_partial_prefills < 0: |
88 | 87 | raise ValueError( |
89 | | - f"max_long_partial_prefill_tokens must be non-negative, but got " |
90 | | - f"{self.max_long_partial_prefill_tokens}") |
91 | | - if self.ascend_long_prefill_token_threshold < 0: |
| 88 | + f"max_long_partial_prefills must be non-negative, but got " |
| 89 | + f"{self.max_long_partial_prefills}") |
| 90 | + if self.long_prefill_token_threshold < 0: |
92 | 91 | raise ValueError( |
93 | | - f"ascend_long_prefill_token_threshold must be non-negative, but got " |
94 | | - f"{self.ascend_long_prefill_token_threshold}") |
| 92 | + f"long_prefill_token_threshold must be non-negative, but got " |
| 93 | + f"{self.long_prefill_token_threshold}") |
95 | 94 |
|
96 | 95 | if self.policy != "fcfs": |
97 | 96 | raise NotImplementedError( |
|
0 commit comments