add check to assert we are using deepep_low_latency

SageMoore · SageMoore · commit 10518bd25e1d · 2025-09-03T21:15:59.000Z
Signed-off-by: Sage Moore &lt;sage@neuralmagic.com&gt;
diff --git a/vllm/config/__init__.py b/vllm/config/__init__.py
@@ -3711,7 +3711,15 @@ def __post_init__(self):
                     "Compilation level should be CompilationLevel.PIECEWISE "\
                     "when cudagraph_mode piecewise cudagraphs is used, "\
                     f"cudagraph_mode={self.compilation_config.cudagraph_mode}"
-
+        
+        if self.parallel_config.enable_microbatching:
+            a2a_backend = envs.VLLM_ALL2ALL_BACKEND
+            assert a2a_backend == "deepep_low_latency", \
+            "Microbatching currently only supports the deepep_low_latency "\
+            f"all2all backend. {a2a_backend} is not supported. To fix set "\
+            "the VLLM_ALL2ALL_BACKEND environment variable to "\
+            "deepep_low_latency and install the DeepEP kerenls."
+ 
         if not self.instance_id:
             self.instance_id = random_uuid()[:5]