[CI] Add some compatibility code to pass CI

Csrayz · Csrayz · commit 7730ee878f75 · 2025-08-25T11:50:49.000+08:00
Signed-off-by: Csrayz &lt;jover@cmbchina.com&gt;
diff --git a/vllm_ascend/core/schedule_config.py b/vllm_ascend/core/schedule_config.py
@@ -20,12 +20,13 @@
 
 from vllm.config import SchedulerConfig
 
+MAX_INT = 2147483647
 
 @dataclass
 class AscendSchedulerConfig(SchedulerConfig):
     enable_chunked_prefill: bool = False
-    max_long_partial_prefills: Optional[Union[int, float]] = None
-    long_prefill_token_threshold: Optional[Union[int, float]] = None
+    max_long_partial_prefills: int = MAX_INT
+    long_prefill_token_threshold: int = MAX_INT
     policy: str = "fcfs"
     num_scheduler_steps: int = 1
     scheduler_cls: Union[str, Type[object]] = (
@@ -71,20 +72,24 @@ def __post_init__(self) -> None:
                 "decrease max_model_len.")
         # concurrent partial prefills. Default is inf
         if self.max_long_partial_prefills is None:
-            self.max_long_partial_prefills = float('inf')
-            self.long_prefill_token_threshold = float('inf')
-        else:
-            if self.long_prefill_token_threshold is None:
+            self.max_long_partial_prefills = MAX_INT
+            self.long_prefill_token_threshold = MAX_INT
+
+        if self.long_prefill_token_threshold is None or \
+            self.long_prefill_token_threshold <= 0:
+            if self.max_model_len is None:
+                self.long_prefill_token_threshold = MAX_INT
+            else:
                 self.long_prefill_token_threshold = \
                     max(1, int(self.max_model_len * 0.04))
 
-        if self.max_long_partial_prefills <= 0:
+        if self.max_long_partial_prefills < 0:
             raise ValueError(
-                f"max_long_partial_prefills must be positive, but got "
+                f"max_long_partial_prefills must be non-negative, but got "
                 f"{self.max_long_partial_prefills}")
-        if self.long_prefill_token_threshold <= 0:
+        if self.long_prefill_token_threshold < 0:
             raise ValueError(
-                f"long_prefill_token_threshold must be positive, but got "
+                f"long_prefill_token_threshold must be non-negative, but got "
                 f"{self.long_prefill_token_threshold}")
 
         if self.policy != "fcfs":
diff --git a/vllm_ascend/core/scheduler.py b/vllm_ascend/core/scheduler.py
@@ -87,8 +87,12 @@ def schedule(self) -> SchedulerOutput:
 
         # Skip long prompt requests in prefill stage.
         # long_prefill_budget is float('inf') if not use.
-        long_prefill_budget = self.vllm_config.scheduler_config.max_long_partial_prefills
-        long_prefill_token_threshold = self.vllm_config.scheduler_config.long_prefill_token_threshold
+        if self.vllm_config.scheduler_config.long_prefill_token_threshold == 0:
+            long_prefill_budget = float('inf')
+            long_prefill_token_threshold = float('inf')
+        else:
+            long_prefill_budget = self.vllm_config.scheduler_config.max_long_partial_prefills
+            long_prefill_token_threshold = self.vllm_config.scheduler_config.long_prefill_token_threshold
 
         # Schedule prefill requests first.
         while self.waiting and token_budget > 0: