Skip to content

Commit 7874a61

Browse files
committed
precommit fixes
Signed-off-by: Patryk Saffer <[email protected]>
1 parent 3f245da commit 7874a61

File tree

7 files changed

+19
-8
lines changed

7 files changed

+19
-8
lines changed

tests/v1/e2e/test_eplb_offline.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def test_eplb_model(
5858
# Load EPLB statistics from disk
5959
eplb_config_load = EPLBConfig(
6060
load_initial_load_window=True,
61-
load_path="/tmp/global_expert_load_window_i0.safetensors"
61+
load_path="/tmp/global_expert_load_window_i0.safetensors",
6262
)
6363
llm = LLM(eplb_config=eplb_config_load, **llm_args)
6464
llm.generate(test_prompts, sampling_config)

vllm/config/parallel.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -620,10 +620,13 @@ def _verify_args(self) -> Self:
620620
raise ValueError(
621621
"Unable to use nsight profiling unless workers run with Ray."
622622
)
623-
624-
if self.eplb_config.load_initial_load_window and self.eplb_config.load_path is None:
623+
if (
624+
self.eplb_config.load_initial_load_window
625+
and self.eplb_config.load_path is None
626+
):
625627
raise ValueError(
626-
"load_initial_load_window is set to True, but load_path is not provided."
628+
"load_initial_load_window is set to True,"
629+
"but load_path is not provided."
627630
)
628631
if self.eplb_config.save_load_window and self.eplb_config.save_dir is None:
629632
raise ValueError(

vllm/distributed/eplb/eplb_state.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -354,7 +354,10 @@ def add_model(
354354
)
355355

356356
eplb_step_interval = self.parallel_config.eplb_config.step_interval
357-
if self.parallel_config.eplb_config.load_initial_load_window or self.parallel_config.eplb_config.save_load_window:
357+
if (
358+
self.parallel_config.eplb_config.load_initial_load_window
359+
or self.parallel_config.eplb_config.save_load_window
360+
):
358361
self.expert_rearrangement_step = 0
359362
else:
360363
# Set the initial progress of rearrangement to 3/4
@@ -567,6 +570,7 @@ def rearrange(
567570
logger.info("Rearranging experts %s...", "(profile)" if is_profile else "")
568571

569572
if load_initial_load_window:
573+
assert self.parallel_config.eplb_config.load_path is not None
570574
global_expert_load_windows = load_eplb_state(
571575
self.parallel_config.eplb_config.load_path,
572576
list(self.model_states.values()),
@@ -577,7 +581,8 @@ def rearrange(
577581
global_expert_load_windows = []
578582
should_save_eplb_state = (
579583
self.parallel_config.eplb_config.save_load_window
580-
and not is_profile and not load_initial_load_window
584+
and not is_profile
585+
and not load_initial_load_window
581586
)
582587
if not execute_shuffle:
583588
num_models = torch.tensor(
@@ -621,6 +626,7 @@ def rearrange(
621626
global_expert_load_windows.append(global_expert_load_window)
622627

623628
if is_main_rank and should_save_eplb_state:
629+
assert self.parallel_config.eplb_config.save_dir is not None
624630
save_eplb_state(
625631
global_expert_load_windows,
626632
self.parallel_config.eplb_config.save_dir,

vllm/engine/arg_utils.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
from collections.abc import Callable
1111
from dataclasses import MISSING, dataclass, fields, is_dataclass
1212
from itertools import permutations
13-
from pathlib import Path
1413
from types import UnionType
1514
from typing import (
1615
TYPE_CHECKING,

vllm/model_executor/layers/fused_moe/fused_moe_method_base.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ def apply(
105105
apply_router_weight_on_input: bool = False,
106106
activation: str = "silu",
107107
enable_eplb: bool = False,
108+
eplb_static: bool = False,
108109
expert_load_view: torch.Tensor | None = None,
109110
logical_to_physical_map: torch.Tensor | None = None,
110111
logical_replica_count: torch.Tensor | None = None,

vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1045,6 +1045,7 @@ def apply(
10451045
indices_type=self.topk_indices_dtype,
10461046
num_fused_shared_experts=layer.num_fused_shared_experts,
10471047
enable_eplb=enable_eplb,
1048+
eplb_static=eplb_static,
10481049
expert_map=expert_map,
10491050
expert_load_view=expert_load_view,
10501051
logical_to_physical_map=logical_to_physical_map,

vllm/v1/worker/gpu_model_runner.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2100,7 +2100,8 @@ def eplb_step(self, is_dummy: bool = False, is_profile: bool = False) -> None:
21002100
"""
21012101
if not self.parallel_config.enable_eplb:
21022102
return
2103-
if is_profile and self.parallel_config.eplb_config.load_initial_load_window: # Maybe we can get rid of that
2103+
if (self.parallel_config.eplb_config.load_initial_load_window and
2104+
is_profile):
21042105
return
21052106
if (
21062107
self.parallel_config.eplb_config.static

0 commit comments

Comments
 (0)