precommit fixes

Patryk999 · Patryk999 · commit 7874a610789b · 2025-11-17T16:52:38.000Z
Signed-off-by: Patryk Saffer &lt;patryk.saffer99@gmail.com&gt;
diff --git a/tests/v1/e2e/test_eplb_offline.py b/tests/v1/e2e/test_eplb_offline.py
@@ -58,7 +58,7 @@ def test_eplb_model(
         # Load EPLB statistics from disk
         eplb_config_load = EPLBConfig(
             load_initial_load_window=True,
-            load_path="/tmp/global_expert_load_window_i0.safetensors"
+            load_path="/tmp/global_expert_load_window_i0.safetensors",
         )
         llm = LLM(eplb_config=eplb_config_load, **llm_args)
         llm.generate(test_prompts, sampling_config)
diff --git a/vllm/config/parallel.py b/vllm/config/parallel.py
@@ -620,10 +620,13 @@ def _verify_args(self) -> Self:
             raise ValueError(
                 "Unable to use nsight profiling unless workers run with Ray."
             )
-        
-        if self.eplb_config.load_initial_load_window and self.eplb_config.load_path is None:
+        if (
+            self.eplb_config.load_initial_load_window
+            and self.eplb_config.load_path is None
+        ):
             raise ValueError(
-                "load_initial_load_window is set to True, but load_path is not provided."
+                "load_initial_load_window is set to True,"
+                "but load_path is not provided."
             )
         if self.eplb_config.save_load_window and self.eplb_config.save_dir is None:
             raise ValueError(
diff --git a/vllm/distributed/eplb/eplb_state.py b/vllm/distributed/eplb/eplb_state.py
@@ -354,7 +354,10 @@ def add_model(
         )
 
         eplb_step_interval = self.parallel_config.eplb_config.step_interval
-        if self.parallel_config.eplb_config.load_initial_load_window or self.parallel_config.eplb_config.save_load_window:
+        if (
+            self.parallel_config.eplb_config.load_initial_load_window
+            or self.parallel_config.eplb_config.save_load_window
+        ):
             self.expert_rearrangement_step = 0
         else:
             # Set the initial progress of rearrangement to 3/4
@@ -567,6 +570,7 @@ def rearrange(
             logger.info("Rearranging experts %s...", "(profile)" if is_profile else "")
 
         if load_initial_load_window:
+            assert self.parallel_config.eplb_config.load_path is not None
             global_expert_load_windows = load_eplb_state(
                 self.parallel_config.eplb_config.load_path,
                 list(self.model_states.values()),
@@ -577,7 +581,8 @@ def rearrange(
             global_expert_load_windows = []
             should_save_eplb_state = (
                 self.parallel_config.eplb_config.save_load_window
-                and not is_profile and not load_initial_load_window
+                and not is_profile
+                and not load_initial_load_window
             )
             if not execute_shuffle:
                 num_models = torch.tensor(
@@ -621,6 +626,7 @@ def rearrange(
                 global_expert_load_windows.append(global_expert_load_window)
 
             if is_main_rank and should_save_eplb_state:
+                assert self.parallel_config.eplb_config.save_dir is not None
                 save_eplb_state(
                     global_expert_load_windows,
                     self.parallel_config.eplb_config.save_dir,
diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
@@ -10,7 +10,6 @@
 from collections.abc import Callable
 from dataclasses import MISSING, dataclass, fields, is_dataclass
 from itertools import permutations
-from pathlib import Path
 from types import UnionType
 from typing import (
     TYPE_CHECKING,
diff --git a/vllm/model_executor/layers/fused_moe/fused_moe_method_base.py b/vllm/model_executor/layers/fused_moe/fused_moe_method_base.py
@@ -105,6 +105,7 @@ def apply(
         apply_router_weight_on_input: bool = False,
         activation: str = "silu",
         enable_eplb: bool = False,
+        eplb_static: bool = False,
         expert_load_view: torch.Tensor | None = None,
         logical_to_physical_map: torch.Tensor | None = None,
         logical_replica_count: torch.Tensor | None = None,
diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py
@@ -1045,6 +1045,7 @@ def apply(
             indices_type=self.topk_indices_dtype,
             num_fused_shared_experts=layer.num_fused_shared_experts,
             enable_eplb=enable_eplb,
+            eplb_static=eplb_static,
             expert_map=expert_map,
             expert_load_view=expert_load_view,
             logical_to_physical_map=logical_to_physical_map,
diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py
@@ -2100,7 +2100,8 @@ def eplb_step(self, is_dummy: bool = False, is_profile: bool = False) -> None:
         """
         if not self.parallel_config.enable_eplb:
             return
-        if is_profile and self.parallel_config.eplb_config.load_initial_load_window: # Maybe we can get rid of that
+        if (self.parallel_config.eplb_config.load_initial_load_window and
+            is_profile):
             return
         if (
             self.parallel_config.eplb_config.static

Original file line number	Diff line number	Diff line change
`@@ -58,7 +58,7 @@ def test_eplb_model(`
`58`	`58`	`# Load EPLB statistics from disk`
`59`	`59`	`eplb_config_load = EPLBConfig(`
`60`	`60`	`load_initial_load_window=True,`
`61`		`- load_path="/tmp/global_expert_load_window_i0.safetensors"`
	`61`	`+ load_path="/tmp/global_expert_load_window_i0.safetensors",`
`62`	`62`	`)`
`63`	`63`	`llm = LLM(eplb_config=eplb_config_load, **llm_args)`
`64`	`64`	`llm.generate(test_prompts, sampling_config)`