add guards for pplx import

bnellnm · bnellnm · commit c7ddca42dcc3 · 2025-04-30T16:53:32.000Z
Signed-off-by: Bill Nell &lt;bnell@redhat.com&gt;
diff --git a/tests/kernels/moe/test_pplx_moe.py b/tests/kernels/moe/test_pplx_moe.py
@@ -10,10 +10,16 @@
 
 import pytest
 import torch
-from pplx_kernels import AllToAll
-from pplx_kernels.nvshmem import (nvshmem_alloc_empty_unique_id,
-                                  nvshmem_finalize, nvshmem_get_unique_id,
-                                  nvshmem_init)
+
+try:
+    from pplx_kernels import AllToAll
+    from pplx_kernels.nvshmem import (nvshmem_alloc_empty_unique_id,
+                                      nvshmem_finalize, nvshmem_get_unique_id,
+                                      nvshmem_init)
+    has_pplx = False
+except ImportError as ex:
+    has_pplx = False
+
 from torch.multiprocessing import (
     spawn)  # pyright: ignore[reportPrivateImportUsage]
 from typing_extensions import Concatenate, ParamSpec
@@ -45,6 +51,11 @@
     reason="Requires multi-node environment",
 )
 
+requires_pplx = pytest.mark.skipif(
+    not has_pplx,
+    reason="Requires PPLX kernels",
+)
+
 
 @dataclasses.dataclass
 class ProcessGroupInfo:
@@ -420,6 +431,7 @@ def _pplx_dispatch_combine(
 @pytest.mark.parametrize("topk", TOP_KS)
 @pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16])
 @pytest.mark.parametrize("world_dp_size", [[2, 1]])  #, [[4, 2]])
+@pytest.mark.skipif(not has_pplx, reason="PPLX kernels not available.")
 def test_pplx_dispatch_combine(
     m: int,
     n: int,
@@ -543,6 +555,7 @@ def _pplx_moe(
 @pytest.mark.parametrize("topk", TOP_KS)
 @pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16])
 @pytest.mark.parametrize("world_dp_size", [[2, 1]])  #, [4, 2]])
+@pytest.mark.skipif(not has_pplx, reason="PPLX kernels not available.")
 def test_pplx_moe(
     m: int,
     n: int,
diff --git a/vllm/distributed/parallel_state.py b/vllm/distributed/parallel_state.py
@@ -23,6 +23,7 @@
 """
 import contextlib
 import gc
+import importlib
 import pickle
 import weakref
 from collections import namedtuple
@@ -34,9 +35,6 @@
 
 import torch
 import torch.distributed
-from pplx_kernels.nvshmem import (nvshmem_alloc_empty_unique_id,
-                                  nvshmem_finalize, nvshmem_get_unique_id,
-                                  nvshmem_init)
 from torch.distributed import Backend, ProcessGroup
 
 import vllm.envs as envs
@@ -920,7 +918,12 @@ def init_distributed_environment(
 
 @run_once
 def pplx_init(rank, world_size):
-    if world_size > 1:
+    has_pplx = importlib.util.find_spec("pplx_kernels") is not None
+
+    if has_pplx and world_size > 1:
+        from pplx_kernels.nvshmem import (nvshmem_alloc_empty_unique_id,
+                                          nvshmem_get_unique_id,
+                                          nvshmem_init)
         try:
             global PPLX_DID_INIT
             logger.debug(f"PPLX_INIT {rank} {world_size}")
@@ -940,6 +943,7 @@ def pplx_init(rank, world_size):
 def pplx_finalize():
     global PPLX_DID_INIT
     if PPLX_DID_INIT:
+        from pplx_kernels.nvshmem import nvshmem_finalize
         nvshmem_finalize()
 
 
diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py
@@ -1,13 +1,13 @@
 # SPDX-License-Identifier: Apache-2.0
 
+import importlib
 import threading
 import weakref
 from abc import abstractmethod
 from dataclasses import dataclass
 from enum import Enum
 from typing import Callable, List, Optional, Tuple
 
-import pplx_kernels as pplx  # TODO: guard this
 import torch
 import torch.nn.functional as F
 from torch.nn.parameter import UninitializedParameter
@@ -27,14 +27,17 @@
 from vllm.platforms.interface import CpuArchEnum
 from vllm.utils import direct_register_custom_op
 
+has_pplx = importlib.util.find_spec("pplx_kernels") is not None
+
 if current_platform.is_cuda_alike():
     from .dispatch_combine import StandardDispatchCombine
     from .fused_batched_moe import BatchedDispatchCombine, BatchedTritonExperts
     from .fused_moe import TritonExperts, fused_experts
     from .modular_kernel import (FusedMoEModularKernel,
                                  FusedMoEPermuteExpertsUnpermute,
                                  FusedMoEQuantizeDispatchCombine)
-    from .pplx_dispatch_combine import PplxDispatchCombine
+    if has_pplx:
+        from .pplx_dispatch_combine import PplxDispatchCombine
 else:
     fused_experts = None  # type: ignore
 if current_platform.is_tpu():
@@ -115,6 +118,9 @@ def __init__(self):
         self._lock = threading.RLock()  # Reentrant lock for thread safety
 
     def get_or_create(self, **kwargs):
+        assert has_pplx
+        import pplx_kernels as pplx
+
         # Create a hashable key from the kwargs
         key = tuple(sorted((k, v) for k, v in kwargs.items()))
 
@@ -625,7 +631,7 @@ def __init__(
         dispatch_combine: FusedMoEQuantizeDispatchCombine = None
 
         # TODO: move to method?
-        if self.dp_size > 1:
+        if self.dp_size > 1 and has_pplx:
             logger.info("using pplx dispatch")
             max_num_tokens = MOE_DP_CHUNK_SIZE  # // moe.dp_size
             world_size = moe.ep_size