|
20 | 20 |
|
21 | 21 | from vllm.config import SchedulerConfig
|
22 | 22 |
|
| 23 | +MAX_INT = 2147483647 |
23 | 24 |
|
24 | 25 | @dataclass
|
25 | 26 | class AscendSchedulerConfig(SchedulerConfig):
|
26 | 27 | enable_chunked_prefill: bool = False
|
27 |
| - max_long_partial_prefills: Optional[Union[int, float]] = None |
28 |
| - long_prefill_token_threshold: Optional[Union[int, float]] = None |
| 28 | + max_long_partial_prefills: int = MAX_INT |
| 29 | + long_prefill_token_threshold: int = MAX_INT |
29 | 30 | policy: str = "fcfs"
|
30 | 31 | num_scheduler_steps: int = 1
|
31 | 32 | scheduler_cls: Union[str, Type[object]] = (
|
@@ -61,20 +62,24 @@ def __post_init__(self) -> None:
|
61 | 62 | self.chunked_prefill_enabled = self.enable_chunked_prefill
|
62 | 63 | # concurrent partial prefills. Default is inf
|
63 | 64 | if self.max_long_partial_prefills is None:
|
64 |
| - self.max_long_partial_prefills = float('inf') |
65 |
| - self.long_prefill_token_threshold = float('inf') |
66 |
| - else: |
67 |
| - if self.long_prefill_token_threshold is None: |
| 65 | + self.max_long_partial_prefills = MAX_INT |
| 66 | + self.long_prefill_token_threshold = MAX_INT |
| 67 | + |
| 68 | + if self.long_prefill_token_threshold is None or \ |
| 69 | + self.long_prefill_token_threshold <= 0: |
| 70 | + if self.max_model_len is None: |
| 71 | + self.long_prefill_token_threshold = MAX_INT |
| 72 | + else: |
68 | 73 | self.long_prefill_token_threshold = \
|
69 | 74 | max(1, int(self.max_model_len * 0.04))
|
70 | 75 |
|
71 |
| - if self.max_long_partial_prefills <= 0: |
| 76 | + if self.max_long_partial_prefills < 0: |
72 | 77 | raise ValueError(
|
73 |
| - f"max_long_partial_prefills must be positive, but got " |
| 78 | + f"max_long_partial_prefills must be non-negative, but got " |
74 | 79 | f"{self.max_long_partial_prefills}")
|
75 |
| - if self.long_prefill_token_threshold <= 0: |
| 80 | + if self.long_prefill_token_threshold < 0: |
76 | 81 | raise ValueError(
|
77 |
| - f"long_prefill_token_threshold must be positive, but got " |
| 82 | + f"long_prefill_token_threshold must be non-negative, but got " |
78 | 83 | f"{self.long_prefill_token_threshold}")
|
79 | 84 |
|
80 | 85 | if self.policy != "fcfs":
|
|
0 commit comments