[FEAT] Add custom allreduce from AITER to vllm and (#629)

zejunchen-zejun · web-flow · commit e2fa100ef9cd · 2025-08-12T22:31:20.000+08:00
control it by the env flag VLLM_ROCM_USE_AITER_CUSTOM_ALL_REDUCE
(default: True)

Signed-off-by: zejunchen-zejun &lt;zejun.chen@amd.com&gt;
diff --git a/vllm/distributed/device_communicators/cuda_communicator.py b/vllm/distributed/device_communicators/cuda_communicator.py
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+from functools import cache
 from typing import Optional, Union
 
 import torch
@@ -15,6 +16,14 @@
 logger = init_logger(__name__)
 
 
+@cache
+def is_rocm_aiter_custom_allreduce_enabled() -> bool:
+    """Check if aiter custom allreduce is enabled for ROCm platform."""
+    return current_platform.is_rocm() \
+        and envs.VLLM_ROCM_USE_AITER \
+        and envs.VLLM_ROCM_USE_AITER_CUSTOM_ALL_REDUCE
+
+
 class CudaCommunicator(DeviceCommunicatorBase):
 
     def __init__(self,
@@ -38,8 +47,12 @@ def __init__(self,
         self.use_custom_allreduce = use_custom_allreduce
 
         # lazy import to avoid documentation build error
-        from vllm.distributed.device_communicators.custom_all_reduce import (
-            CustomAllreduce)
+        if is_rocm_aiter_custom_allreduce_enabled():
+            from aiter.dist.custom_all_reduce import CustomAllreduce
+            logger.info("Using aiter.dist.custom_all_reduce for ROCm platform")
+        else:
+            from vllm.distributed.device_communicators.custom_all_reduce import (  # noqa: E501
+                CustomAllreduce)
         from vllm.distributed.device_communicators.pynccl import (
             PyNcclCommunicator)
         from vllm.distributed.device_communicators.quick_all_reduce import (
diff --git a/vllm/envs.py b/vllm/envs.py
@@ -95,6 +95,7 @@
     VLLM_ROCM_USE_AITER_MLA: bool = True
     VLLM_ROCM_USE_AITER_MHA: bool = True
     VLLM_ROCM_USE_AITER_ROPE: bool = False
+    VLLM_ROCM_USE_AITER_CUSTOM_ALL_REDUCE: bool = True
     VLLM_ROCM_USE_SKINNY_GEMM: bool = True
     VLLM_ROCM_FP8_PADDING: bool = True
     VLLM_ROCM_MOE_PADDING: bool = True
@@ -709,6 +710,13 @@ def get_vllm_port() -> Optional[int]:
     lambda: (os.getenv("VLLM_ROCM_USE_AITER_ROPE", "False").lower() in
              ("true", "1")),
 
+    # Whether to use aiter custom allreduce for ROCm platform.
+    # By default is disabled, uses vLLM built-in custom allreduce.
+    "VLLM_ROCM_USE_AITER_CUSTOM_ALL_REDUCE":
+    lambda:
+    (os.getenv("VLLM_ROCM_USE_AITER_CUSTOM_ALL_REDUCE", "True").lower() in
+     ("true", "1")),
+
     # use rocm skinny gemms
     "VLLM_ROCM_USE_SKINNY_GEMM":
     lambda: (os.getenv("VLLM_ROCM_USE_SKINNY_GEMM", "True").lower() in