Fix sphinx error (#1380)

cyx-6 · yzh119 · web-flow · commit 28eb1d8f806a · 2025-08-04T17:33:47.000-07:00
## 📌 Description  ## 🔍 Related Issues  ## 🚀 Pull Request Checklist Thank you for contributing to FlashInfer! Before we review your pull request, please make sure the following items are complete. ### ✅ Pre-commit Checks - [x] I have installed `pre-commit` by running `pip install pre-commit` (or used your preferred method). - [x] I have installed the hooks with `pre-commit install`. - [x] I have run the hooks manually with `pre-commit run --all-files` and fixed any reported issues. > If you are unsure about how to set up `pre-commit`, see [the pre-commit documentation](https://pre-commit.com/). ## 🧪 Tests - [x] Tests have been added or updated as needed. - [x] All tests are passing (`unittest`, etc.). ## Reviewer Notes  --------- Co-authored-by: Yaxing Cai <yaxingc@nvidia.com> Co-authored-by: Zihao Ye <expye@outlook.com>
diff --git a/.github/workflows/build-doc.yml b/.github/workflows/build-doc.yml
@@ -27,6 +27,11 @@ jobs:
       with:
         submodules: recursive
 
+    - name: Setup Python
+      uses: actions/setup-python@v4
+      with:
+        python-version: '3.12'
+
     - name: Configuring build Environment
       run: |
         sudo apt-get update
diff --git a/docs/api/comm.rst b/docs/api/comm.rst
@@ -1,130 +0,0 @@
-.. _apicomm:
-
-flashinfer.comm
-===============
-
-.. currentmodule:: flashinfer.comm
-
-This module provides communication primitives and utilities for distributed computing, including CUDA IPC, AllReduce operations, and memory management utilities.
-
-CUDA IPC Utilities
-------------------
-
-.. autosummary::
-    :toctree: ../generated
-
-    CudaRTLibrary
-    create_shared_buffer
-    free_shared_buffer
-
-DLPack Utilities
-----------------
-
-.. autosummary::
-    :toctree: ../generated
-
-    pack_strided_memory
-
-Mapping Utilities
------------------
-
-.. autosummary::
-    :toctree: ../generated
-
-    Mapping
-
-TensorRT-LLM AllReduce
-----------------------
-
-Types and Enums
-~~~~~~~~~~~~~~~~
-
-.. autosummary::
-    :toctree: ../generated
-
-    AllReduceFusionOp
-    AllReduceFusionPattern
-    AllReduceStrategyConfig
-    AllReduceStrategyType
-    FP4QuantizationSFLayout
-
-Core Operations
-~~~~~~~~~~~~~~~
-
-.. autosummary::
-    :toctree: ../generated
-
-    trtllm_allreduce_fusion
-    trtllm_custom_all_reduce
-    trtllm_moe_allreduce_fusion
-    trtllm_moe_finalize_allreduce_fusion
-
-Workspace Management
-~~~~~~~~~~~~~~~~~~~~
-
-.. autosummary::
-    :toctree: ../generated
-
-    trtllm_create_ipc_workspace_for_all_reduce
-    trtllm_create_ipc_workspace_for_all_reduce_fusion
-    trtllm_destroy_ipc_workspace_for_all_reduce
-    trtllm_destroy_ipc_workspace_for_all_reduce_fusion
-
-Initialization and Utilities
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-.. autosummary::
-    :toctree: ../generated
-
-    trtllm_lamport_initialize
-    trtllm_lamport_initialize_all
-    compute_fp4_swizzled_layout_sf_size
-
-vLLM AllReduce
---------------
-
-.. autosummary::
-    :toctree: ../generated
-
-    vllm_all_reduce
-    vllm_dispose
-    vllm_init_custom_ar
-    vllm_register_buffer
-    vllm_register_graph_buffers
-    vllm_get_graph_buffer_ipc_meta
-    vllm_meta_size
-
-MNNVL (Multi-Node NVLink)
--------------------------
-
-.. currentmodule:: flashinfer.comm.mnnvl
-
-Core Classes
-~~~~~~~~~~~~
-
-.. autosummary::
-    :toctree: ../generated
-
-    MnnvlMemory
-    McastGPUBuffer
-
-Utility Functions
-~~~~~~~~~~~~~~~~~
-
-.. autosummary::
-    :toctree: ../generated
-
-    create_tensor_from_cuda_memory
-    alloc_and_copy_to_cuda
-
-TensorRT-LLM MNNVL AllReduce
-----------------------------
-
-.. currentmodule:: flashinfer.comm.trtllm_mnnvl_ar
-
-.. autosummary::
-    :toctree: ../generated
-
-    trtllm_mnnvl_all_reduce
-    trtllm_mnnvl_fused_allreduce_rmsnorm
-    mpi_barrier
diff --git a/docs/build_docs.sh b/docs/build_docs.sh
@@ -1,8 +1,10 @@
 #!/bin/bash
+set -eo pipefail
+set -x
 echo "Building FlashInfer documentation..."
 
 make clean
-make html
+make SPHINXOPTS='-T -v' html
 
 # Add RunLLM widget to generated HTML files
 echo "Adding RunLLM widget to documentation..."
diff --git a/docs/conf.py b/docs/conf.py
@@ -1,5 +1,6 @@
 import os
 import sys
+import warnings
 from pathlib import Path
 
 # import tlcpack_sphinx_addon
@@ -12,13 +13,16 @@
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
 
 root = Path(__file__).parents[1].resolve()
-sys.path.append(str(root))
+sys.path.insert(0, str(root))
 os.environ["BUILD_DOC"] = "1"
 autodoc_mock_imports = [
     "torch",
     "triton",
     "flashinfer._build_meta",
     "cuda",
+    "numpy",
+    "einops",
+    "mpi4py",
 ]
 
 project = "FlashInfer"
diff --git a/docs/index.rst b/docs/index.rst
@@ -30,7 +30,6 @@ FlashInfer is a library and kernel generator for Large Language Models that prov
    api/attention
    api/gemm
    api/fused_moe
-   api/comm
    api/cascade
    api/sparse
    api/page
diff --git a/flashinfer/comm/cuda_ipc.py b/flashinfer/comm/cuda_ipc.py
@@ -68,6 +68,8 @@ def find_loaded_library(lib_name) -> Optional[str]:
 
 
 class CudaRTLibrary:
+    """CudaRTLibrary"""
+
     exported_functions = [
         # ​cudaError_t cudaSetDevice ( int  device )
         Function("cudaSetDevice", cudaError_t, [ctypes.c_int]),
@@ -195,10 +197,6 @@ def cudaIpcOpenMemHandle(self, handle: cudaIpcMemHandle_t) -> ctypes.c_void_p:
 def create_shared_buffer(
     size_in_bytes: int, group: Optional[ProcessGroup] = None
 ) -> List[int]:
-    """
-    Creates a shared buffer and returns a list of pointers
-    representing the buffer on all processes in the group.
-    """
     """
     Creates a shared buffer and returns a list of pointers
     representing the buffer on all processes in the group.
@@ -228,6 +226,9 @@ def create_shared_buffer(
 def free_shared_buffer(
     pointers: List[int], group: Optional[ProcessGroup] = None
 ) -> None:
+    """
+    Frees a shared buffer.
+    """
     if group is None:
         group = dist.group.WORLD
     rank = dist.get_rank(group=group)