|
20 | 20 |
|
21 | 21 | from vllm.config import SchedulerConfig
|
22 | 22 |
|
| 23 | +MAX_INT = 2147483647 |
23 | 24 |
|
24 | 25 | @dataclass
|
25 | 26 | class AscendSchedulerConfig(SchedulerConfig):
|
26 | 27 | enable_chunked_prefill: bool = False
|
27 |
| - max_long_partial_prefills: Optional[Union[int, float]] = None |
28 |
| - long_prefill_token_threshold: Optional[Union[int, float]] = None |
| 28 | + max_long_partial_prefills: int = MAX_INT |
| 29 | + long_prefill_token_threshold: int = MAX_INT |
29 | 30 | policy: str = "fcfs"
|
30 | 31 | num_scheduler_steps: int = 1
|
31 | 32 | scheduler_cls: Union[str, Type[object]] = (
|
@@ -71,20 +72,24 @@ def __post_init__(self) -> None:
|
71 | 72 | "decrease max_model_len.")
|
72 | 73 | # concurrent partial prefills. Default is inf
|
73 | 74 | if self.max_long_partial_prefills is None:
|
74 |
| - self.max_long_partial_prefills = float('inf') |
75 |
| - self.long_prefill_token_threshold = float('inf') |
76 |
| - else: |
77 |
| - if self.long_prefill_token_threshold is None: |
| 75 | + self.max_long_partial_prefills = MAX_INT |
| 76 | + self.long_prefill_token_threshold = MAX_INT |
| 77 | + |
| 78 | + if self.long_prefill_token_threshold is None or \ |
| 79 | + self.long_prefill_token_threshold <= 0: |
| 80 | + if self.max_model_len is None: |
| 81 | + self.long_prefill_token_threshold = MAX_INT |
| 82 | + else: |
78 | 83 | self.long_prefill_token_threshold = \
|
79 | 84 | max(1, int(self.max_model_len * 0.04))
|
80 | 85 |
|
81 |
| - if self.max_long_partial_prefills <= 0: |
| 86 | + if self.max_long_partial_prefills < 0: |
82 | 87 | raise ValueError(
|
83 |
| - f"max_long_partial_prefills must be positive, but got " |
| 88 | + f"max_long_partial_prefills must be non-negative, but got " |
84 | 89 | f"{self.max_long_partial_prefills}")
|
85 |
| - if self.long_prefill_token_threshold <= 0: |
| 90 | + if self.long_prefill_token_threshold < 0: |
86 | 91 | raise ValueError(
|
87 |
| - f"long_prefill_token_threshold must be positive, but got " |
| 92 | + f"long_prefill_token_threshold must be non-negative, but got " |
88 | 93 | f"{self.long_prefill_token_threshold}")
|
89 | 94 |
|
90 | 95 | if self.policy != "fcfs":
|
|
0 commit comments