Skip to content

Commit ecec01d

Browse files
committed
more fixes
Signed-off-by: Patryk Saffer <[email protected]>
1 parent 783ca91 commit ecec01d

File tree

26 files changed

+45
-53
lines changed

26 files changed

+45
-53
lines changed

vllm/config/parallel.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -63,17 +63,14 @@ class EPLBConfig:
6363
This is turned off by default since it will cause communication overhead.
6464
"""
6565

66+
load_initial_load_window: bool = False
67+
save_load_window: bool = False
68+
static: bool = False
6669
save_dir: Path | None = None
6770
"""Directory to save expert load balance metrics."""
6871
load_path: Path | None = None
6972
"""Path to load expert load balance metrics."""
7073

71-
@property
72-
def record_metrics(self) -> bool:
73-
return self.save_dir is not None or (
74-
self.save_dir is None and self.load_path is None
75-
)
76-
7774

7875
@config
7976
@dataclass

vllm/distributed/eplb/eplb_state.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -352,10 +352,8 @@ def add_model(
352352
device=self.device,
353353
)
354354

355-
eplb_load_path = self.parallel_config.eplb_config.load_path
356-
eplb_save_dir = self.parallel_config.eplb_config.save_dir
357355
eplb_step_interval = self.parallel_config.eplb_config.step_interval
358-
if eplb_load_path is not None or eplb_save_dir is not None:
356+
if self.parallel_config.eplb_config.load_initial_load_window or self.parallel_config.eplb_config.save_load_window:
359357
self.expert_rearrangement_step = 0
360358
else:
361359
# Set the initial progress of rearrangement to 3/4
@@ -579,7 +577,7 @@ def rearrange(
579577
# Map the physical expert load to global logical experts
580578
global_expert_load_windows = []
581579
should_save_eplb_state = (
582-
self.parallel_config.eplb_config.save_dir is not None
580+
self.parallel_config.eplb_config.save_load_window
583581
and not is_profile
584582
and self.expert_rearrangement_step > 0
585583
)

vllm/engine/arg_utils.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -415,8 +415,6 @@ class EngineArgs:
415415
eplb_window_size: int = EPLBConfig.window_size
416416
eplb_step_interval: int = EPLBConfig.step_interval
417417
eplb_log_balancedness: bool = EPLBConfig.log_balancedness
418-
eplb_save_dir: Path | None = EPLBConfig.save_dir
419-
eplb_load_path: Path | None = EPLBConfig.load_path
420418
max_parallel_loading_workers: int | None = (
421419
ParallelConfig.max_parallel_loading_workers
422420
)

vllm/model_executor/layers/fused_moe/fused_moe.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1246,7 +1246,7 @@ def eplb_map_to_physical_and_record(
12461246
expert_load_view: torch.Tensor,
12471247
logical_to_physical_map: torch.Tensor,
12481248
logical_replica_count: torch.Tensor,
1249-
eplb_record_metrics: bool = False,
1249+
eplb_static: bool = False,
12501250
indices_type: torch.dtype | None = None,
12511251
) -> torch.Tensor:
12521252
"""
@@ -1288,7 +1288,7 @@ def eplb_map_to_physical_and_record(
12881288

12891289
topk_ids = physical_ids
12901290

1291-
if eplb_record_metrics:
1291+
if eplb_static:
12921292
# 2. Record expert load metrics.
12931293

12941294
# TODO(bowen): When using `FusedMoEModularKernel`, this

vllm/model_executor/layers/fused_moe/fused_moe_modular_method.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ def apply(
100100
apply_router_weight_on_input: bool = False,
101101
activation: str = "silu",
102102
enable_eplb: bool = False,
103-
eplb_record_metrics: bool = False,
103+
eplb_static: bool = False,
104104
expert_load_view: torch.Tensor | None = None,
105105
logical_to_physical_map: torch.Tensor | None = None,
106106
logical_replica_count: torch.Tensor | None = None,
@@ -134,7 +134,7 @@ def apply(
134134
e_score_correction_bias=e_score_correction_bias,
135135
indices_type=self.topk_indices_dtype,
136136
enable_eplb=enable_eplb,
137-
eplb_record_metrics=eplb_record_metrics,
137+
eplb_static=eplb_static,
138138
expert_map=expert_map,
139139
expert_load_view=expert_load_view,
140140
logical_to_physical_map=logical_to_physical_map,

vllm/model_executor/layers/fused_moe/layer.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -316,7 +316,7 @@ def __init__(
316316
activation: str = "silu",
317317
is_act_and_mul: bool = True,
318318
enable_eplb: bool = False,
319-
eplb_record_metrics: bool = False,
319+
eplb_static: bool = False,
320320
num_redundant_experts: int = 0,
321321
has_bias: bool = False,
322322
is_sequence_parallel=False,
@@ -398,7 +398,7 @@ def __init__(
398398
self.layer_name = prefix
399399

400400
self.enable_eplb = enable_eplb
401-
self.eplb_record_metrics = eplb_record_metrics
401+
self.eplb_static = eplb_static
402402
self.expert_load_view: torch.Tensor | None = None
403403
self.logical_to_physical_map: torch.Tensor | None = None
404404
self.logical_replica_count: torch.Tensor | None = None
@@ -1320,7 +1320,7 @@ def select_experts(
13201320
e_score_correction_bias: torch.Tensor | None = None,
13211321
indices_type: torch.dtype | None = None,
13221322
enable_eplb: bool = False,
1323-
eplb_record_metrics: bool = False,
1323+
eplb_static: bool = False,
13241324
expert_map: torch.Tensor | None = None,
13251325
expert_load_view: torch.Tensor | None = None,
13261326
logical_to_physical_map: torch.Tensor | None = None,
@@ -1423,7 +1423,7 @@ def select_experts(
14231423
topk_ids=topk_ids,
14241424
expert_load_view=expert_load_view,
14251425
logical_to_physical_map=logical_to_physical_map,
1426-
eplb_record_metrics=eplb_record_metrics,
1426+
eplb_static=eplb_static,
14271427
logical_replica_count=logical_replica_count,
14281428
indices_type=indices_type,
14291429
)
@@ -1610,7 +1610,7 @@ def process_chunk(chunk_start, chunk_end, skip_result_store=False):
16101610
e_score_correction_bias=self.e_score_correction_bias,
16111611
activation=self.activation,
16121612
enable_eplb=self.enable_eplb,
1613-
eplb_record_metrics=self.eplb_record_metrics,
1613+
eplb_static=self.eplb_static,
16141614
expert_load_view=self.expert_load_view,
16151615
logical_to_physical_map=self.logical_to_physical_map,
16161616
logical_replica_count=self.logical_replica_count,
@@ -1779,7 +1779,7 @@ def forward_impl(
17791779
activation=self.activation,
17801780
apply_router_weight_on_input=self.apply_router_weight_on_input,
17811781
enable_eplb=self.enable_eplb,
1782-
eplb_record_metrics=self.eplb_record_metrics,
1782+
eplb_static=self.eplb_static,
17831783
expert_load_view=self.expert_load_view,
17841784
logical_to_physical_map=self.logical_to_physical_map,
17851785
logical_replica_count=self.logical_replica_count,

vllm/model_executor/layers/quantization/awq_marlin.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -614,7 +614,7 @@ def apply(
614614
apply_router_weight_on_input: bool = False,
615615
activation: str = "silu",
616616
enable_eplb: bool = False,
617-
eplb_record_metrics: bool = False,
617+
eplb_static: bool = False,
618618
expert_load_view: torch.Tensor | None = None,
619619
logical_to_physical_map: torch.Tensor | None = None,
620620
logical_replica_count: torch.Tensor | None = None,

vllm/model_executor/layers/quantization/bitsandbytes.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -512,7 +512,7 @@ def apply(
512512
apply_router_weight_on_input: bool = False,
513513
activation: str = "silu",
514514
enable_eplb: bool = False,
515-
eplb_record_metrics: bool = False,
515+
eplb_static: bool = False,
516516
expert_load_view: torch.Tensor | None = None,
517517
logical_to_physical_map: torch.Tensor | None = None,
518518
logical_replica_count: torch.Tensor | None = None,

vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -439,7 +439,7 @@ def apply(
439439
apply_router_weight_on_input: bool = False,
440440
activation: str = "silu",
441441
enable_eplb: bool = False,
442-
eplb_record_metrics: bool = False,
442+
eplb_static: bool = False,
443443
expert_load_view: torch.Tensor | None = None,
444444
logical_to_physical_map: torch.Tensor | None = None,
445445
logical_replica_count: torch.Tensor | None = None,
@@ -1019,7 +1019,7 @@ def apply(
10191019
apply_router_weight_on_input: bool = False,
10201020
activation: str = "silu",
10211021
enable_eplb: bool = False,
1022-
eplb_record_metrics: bool = False,
1022+
eplb_static: bool = False,
10231023
expert_load_view: torch.Tensor | None = None,
10241024
logical_to_physical_map: torch.Tensor | None = None,
10251025
logical_replica_count: torch.Tensor | None = None,
@@ -1288,7 +1288,7 @@ def apply(
12881288
apply_router_weight_on_input: bool = False,
12891289
activation: str = "silu",
12901290
enable_eplb: bool = False,
1291-
eplb_record_metrics: bool = False,
1291+
eplb_static: bool = False,
12921292
expert_load_view: torch.Tensor | None = None,
12931293
logical_to_physical_map: torch.Tensor | None = None,
12941294
logical_replica_count: torch.Tensor | None = None,
@@ -1650,7 +1650,7 @@ def apply(
16501650
apply_router_weight_on_input: bool = False,
16511651
activation: str = "silu",
16521652
enable_eplb: bool = False,
1653-
eplb_record_metrics: bool = False,
1653+
eplb_static: bool = False,
16541654
expert_load_view: torch.Tensor | None = None,
16551655
logical_to_physical_map: torch.Tensor | None = None,
16561656
logical_replica_count: torch.Tensor | None = None,
@@ -1914,7 +1914,7 @@ def apply(
19141914
apply_router_weight_on_input: bool = False,
19151915
activation: str = "silu",
19161916
enable_eplb: bool = False,
1917-
eplb_record_metrics: bool = False,
1917+
eplb_static: bool = False,
19181918
expert_load_view: torch.Tensor | None = None,
19191919
logical_to_physical_map: torch.Tensor | None = None,
19201920
logical_replica_count: torch.Tensor | None = None,
@@ -2238,7 +2238,7 @@ def apply(
22382238
apply_router_weight_on_input: bool = False,
22392239
activation: str = "silu",
22402240
enable_eplb: bool = False,
2241-
eplb_record_metrics: bool = False,
2241+
eplb_static: bool = False,
22422242
expert_load_view: torch.Tensor | None = None,
22432243
logical_to_physical_map: torch.Tensor | None = None,
22442244
logical_replica_count: torch.Tensor | None = None,

vllm/model_executor/layers/quantization/experts_int8.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ def apply(
154154
apply_router_weight_on_input: bool = False,
155155
activation: str = "silu",
156156
enable_eplb: bool = False,
157-
eplb_record_metrics: bool = False,
157+
eplb_static: bool = False,
158158
expert_load_view: torch.Tensor | None = None,
159159
logical_to_physical_map: torch.Tensor | None = None,
160160
logical_replica_count: torch.Tensor | None = None,

0 commit comments

Comments
 (0)