Add alternative comm backend for mnnvl

wenscarl · wenscarl · commit 0189214e7d5e · 2025-08-25T21:30:05.000Z
diff --git a/flashinfer/comm/mnnvl.py b/flashinfer/comm/mnnvl.py
@@ -16,6 +16,8 @@
 import ctypes
 import logging
 import os
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
 import platform
 import sys
 from typing import Any, Dict, List, Optional
@@ -220,6 +222,51 @@ def set_mpi_comm(cls, new_comm: MPI.Intracomm):
         def __getattr__(self, name):
             return getattr(self._comm, name)
 
+    class CommBackend(ABC):
+        """Abstract communication backend interface"""
+        @abstractmethod
+        def Get_rank(self) -> int: ...
+        
+        @abstractmethod
+        def Get_size(self) -> int: ...
+        
+        @abstractmethod
+        def allgather(self, data: int) -> List[int]: ...
+
+        @abstractmethod
+        def allgather_bytes(self, data): ...
+        
+        @abstractmethod
+        def Split(self, color: int, key: int) -> 'CommBackend': ...
+    class LegacyMPIBackend(CommBackend):
+        """Adapter for the original MpiComm singleton pattern"""
+        def __init__(self):
+            self._mpicomm = MpiComm()
+        
+        def Get_rank(self) -> int:
+            return self._mpicomm.Get_rank()
+        
+        def Get_size(self) -> int:
+            return self._mpicomm.Get_size()
+        
+        def allgather(self, data: int) -> List[int]:
+            return self._mpicomm.allgather(data)
+        
+        def allgather_bytes(self, data):
+            return self._mpicomm.allgather(data)
+        
+        def Split(self, color: int, key: int) -> CommBackend:
+            # Original split logic
+            new_comm = self._mpicomm.Split(color, key)
+            return LegacyMPIBackend()  # Returns new adapter
+    @dataclass
+    class MnnvlConfig:
+        """Configuration for MNNVL memory management"""
+        comm_backend: Optional[CommBackend] = None
+        allocation_granularity: int = 0
+        fabric_page_size: int = 1 << 29  # 512MB
+
+
     class MnnvlMemory:  # type: ignore[no-redef]
         initialized: bool = False
 
@@ -275,6 +322,17 @@ def initialize():
                     pynvml.nvmlInit()
                 MnnvlMemory.initialized = True
 
+        @staticmethod
+        def set_comm(mapping: Mapping, config: MnnvlConfig = None):
+            # print("set_comm"*10)
+            # print(f"config:{config}, tp_rank:{mapping.tp_rank}")
+            MnnvlMemory._config = config or MnnvlConfig(comm_backend=LegacyMPIBackend())
+            comm0 = config.comm_backend
+            comm = comm0.Split(
+                mapping.pp_rank * mapping.cp_size + mapping.cp_rank, mapping.tp_rank
+            )
+            MnnvlMemory.comm = comm
+
         @staticmethod
         def get_comm(mapping: Mapping):
             if MnnvlMemory.comm is not None:
diff --git a/flashinfer/comm/trtllm_alltoall.py b/flashinfer/comm/trtllm_alltoall.py
@@ -26,7 +26,7 @@
 from ..jit import gen_jit_spec
 from ..utils import register_custom_op
 from .mapping import Mapping
-from .mnnvl import MnnvlMemory
+from .mnnvl import (MnnvlMemory, MnnvlConfig)
 
 
 def gen_comm_alltoall_module() -> JitSpec:
@@ -389,27 +389,33 @@ class MnnvlMoe:
     moe_mapping: Mapping = None
 
     @staticmethod
-    def get_moe_workspaces(mapping: Mapping):
+    def get_moe_workspaces(mapping: Mapping, config: Optional[MnnvlConfig] = None):
         if MnnvlMoe.moe_workspace is not None:
             assert mapping == MnnvlMoe.moe_mapping, "only one moe mapping supported now"
             return MnnvlMoe.moe_workspace_tensor
 
         MnnvlMoe.moe_mapping = mapping
         workspace_size_per_rank = get_moe_commworkspace_size_per_rank(mapping.tp_size)
+        if config:
+            MnnvlMemory.set_comm(mapping, config)
+        MnnvlMemory.initialize()
         MnnvlMoe.moe_workspace = MnnvlMemory(mapping, workspace_size_per_rank)
         MnnvlMoe.moe_workspace_tensor = MnnvlMoe.moe_workspace.as_torch_strided_tensor(
             torch.uint64
         )
         return MnnvlMoe.moe_workspace_tensor
 
     @staticmethod
-    def get_moe_prepare_workspace(mapping: Mapping):
+    def get_moe_prepare_workspace(mapping: Mapping, config: Optional[MnnvlConfig] = None):
         if MnnvlMoe.moe_prepare_workspace_tensor is not None:
             assert mapping == MnnvlMoe.moe_mapping, "only one moe mapping supported now"
             return MnnvlMoe.moe_prepare_workspace_tensor
         workspace_size_per_rank = get_moe_prepare_workspace_size_per_rank(
             mapping.tp_size
         )
+        if config:
+            MnnvlMemory.set_comm(mapping, config)
+        MnnvlMemory.initialize()
         MnnvlMoe.moe_prepare_workspace = MnnvlMemory(mapping, workspace_size_per_rank)
         MnnvlMoe.moe_prepare_workspace_tensor = (
             MnnvlMoe.moe_prepare_workspace.as_torch_strided_tensor(torch.uint64)