Skip to content

Commit 10518bd

Browse files
committed
add check to assert we are using deepep_low_latency
Signed-off-by: Sage Moore <[email protected]>
1 parent e42c0e7 commit 10518bd

File tree

1 file changed

+9
-1
lines changed

1 file changed

+9
-1
lines changed

vllm/config/__init__.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3711,7 +3711,15 @@ def __post_init__(self):
37113711
"Compilation level should be CompilationLevel.PIECEWISE "\
37123712
"when cudagraph_mode piecewise cudagraphs is used, "\
37133713
f"cudagraph_mode={self.compilation_config.cudagraph_mode}"
3714-
3714+
3715+
if self.parallel_config.enable_microbatching:
3716+
a2a_backend = envs.VLLM_ALL2ALL_BACKEND
3717+
assert a2a_backend == "deepep_low_latency", \
3718+
"Microbatching currently only supports the deepep_low_latency "\
3719+
f"all2all backend. {a2a_backend} is not supported. To fix set "\
3720+
"the VLLM_ALL2ALL_BACKEND environment variable to "\
3721+
"deepep_low_latency and install the DeepEP kerenls."
3722+
37153723
if not self.instance_id:
37163724
self.instance_id = random_uuid()[:5]
37173725

0 commit comments

Comments
 (0)