comment clean up

Robert Shaw · Robert Shaw · commit 3a5927c94cb1 · 2026-03-31T07:29:07.000-04:00
Signed-off-by: Robert Shaw &lt;rshaw@neuralmagic.com&gt;
diff --git a/vllm/model_executor/layers/fused_moe/runner/shared_experts.py b/vllm/model_executor/layers/fused_moe/runner/shared_experts.py
@@ -147,14 +147,8 @@ def _run_in_aux_stream(
     ) -> torch.Tensor:
         # TODO: assert that maybe_setup_shared_experts_stream has been called.
 
-        # Run shared experts in parallel on a separate stream
-        # NOTE: We start the separate stream here and mark the
-        # sync end point immediately after it is done. This is
-        # important to avoid excessive stream allocations by the cuda
-        # graph replay later.
+        # Run shared experts in parallel on a separate stream.
         with torch.cuda.stream(self._stream):
-            # Note that hidden_states clone() is necessary here to avoid
-            # conflict with the main stream
             output = self._layer(shared_experts_input)
         current_stream().wait_stream(self._stream)