remove enable_async_comms

SageMoore · SageMoore · commit 0c5434351f5f · 2025-09-03T03:20:47.000Z
Signed-off-by: Sage Moore &lt;sage@neuralmagic.com&gt;
diff --git a/vllm/compilation/ubatch_wrapper.py b/vllm/compilation/ubatch_wrapper.py
@@ -209,9 +209,7 @@ def _make_ubatch_metadata(self, ubatch_slices, attn_metadata, input_ids,
             compute_stream=compute_stream,
             forward_contexts=forward_contexts,
             ready_barrier=self.ready_barrier,
-            device=self.device,
-            enable_async_comms=self.vllm_config.parallel_config.
-            enable_async_comms)
+            device=self.device)
 
         ubatch_metadata: list[UbatchMetadata] = []
         for i, ubatch_slice in enumerate(ubatch_slices):
diff --git a/vllm/config/parallel.py b/vllm/config/parallel.py
@@ -135,9 +135,6 @@ class ParallelConfig:
     request is greater than this threshold, microbatching will be used.
     Otherwise, the request will be processed in a single batch."""
 
-    enable_async_comms: bool = False
-    """enable async comms"""
-
     ray_workers_use_nsight: bool = False
     """Whether to profile Ray workers with nsight, see https://docs.ray.io/en/latest/ray-observability/user-guides/profiling.html#profiling-nsight-profiler."""
 
diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
@@ -317,7 +317,6 @@ class EngineArgs:
     enable_expert_parallel: bool = ParallelConfig.enable_expert_parallel
     enable_microbatching: bool = ParallelConfig.enable_microbatching
     microbatching_token_threshold: int = ParallelConfig.microbatching_token_threshold
-    enable_async_comms: bool = ParallelConfig.enable_async_comms
     eplb_config: EPLBConfig = get_field(ParallelConfig, "eplb_config")
     enable_eplb: bool = ParallelConfig.enable_eplb
     num_redundant_experts: int = EPLBConfig.num_redundant_experts
@@ -683,8 +682,6 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
                                     **parallel_kwargs["enable_microbatching"])
         parallel_group.add_argument("--microbatching-token-threshold",
                                     **parallel_kwargs["microbatching_token_threshold"])
-        parallel_group.add_argument("--enable-async-comms",
-                                    **parallel_kwargs["enable_async_comms"])
         parallel_group.add_argument("--enable-eplb",
                                     **parallel_kwargs["enable_eplb"])
         parallel_group.add_argument("--eplb-config",
@@ -1307,7 +1304,6 @@ def create_engine_config(
             enable_expert_parallel=self.enable_expert_parallel,
             enable_microbatching=self.enable_microbatching,
             microbatching_token_threshold=self.microbatching_token_threshold,
-            enable_async_comms=self.enable_async_comms,
             enable_eplb=self.enable_eplb,
             eplb_config=self.eplb_config,
             max_parallel_loading_workers=self.max_parallel_loading_workers,
diff --git a/vllm/v1/worker/ubatching.py b/vllm/v1/worker/ubatching.py
@@ -27,7 +27,6 @@ def __init__(self,
                  cpu_signal_event: threading.Event,
                  gpu_comm_done_event: torch.cuda.Event,
                  gpu_compute_done_event: torch.cuda.Event,
-                 enable_async_comms: bool,
                  schedule: str = "default"):
         self.id = id
         self.comm_stream = comm_stream
@@ -39,7 +38,6 @@ def __init__(self,
         self.current_stream = compute_stream
         self.gpu_comm_done_event = gpu_comm_done_event
         self.gpu_compute_done_event = gpu_compute_done_event
-        self.enable_async_comms = enable_async_comms
         self.schedule = schedule
         self.recv_hook = None
 
@@ -175,7 +173,6 @@ def make_ubatch_contexts(
     forward_contexts: list[ForwardContext],
     ready_barrier: threading.Barrier,
     device: Optional[torch.device] = None,
-    enable_async_comms: bool = False,
     schedule: str = "default",
 ) -> list[UBatchContext]:
     assert num_micro_batches == 2, "only been tested with 2 micro-batches"
@@ -206,7 +203,6 @@ def make_ubatch_contexts(
                                                         num_micro_batches],
                             gpu_comm_done_event=gpu_comm_done_events[i],
                             gpu_compute_done_event=gpu_compute_done_events[i],
-                            enable_async_comms=enable_async_comms,
                             schedule=schedule)
         ctxs.append(ctx)