Using python package for platform detection

apbose · apbose · commit 19b5a998ce0e · 2025-08-12T17:32:34.000-07:00
diff --git a/.github/workflows/build-test-linux-aarch64.yml b/.github/workflows/build-test-linux-aarch64.yml
@@ -356,6 +356,41 @@ jobs:
         python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_core_test_results.xml .
         popd
 
+  tests-py-distributed:
+    name: Test dynamo distributed [Python]
+    needs: [filter-matrix, build]
+    if: false
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - repository: pytorch/tensorrt
+            package-name: torch_tensorrt
+            pre-script: packaging/pre_build_script.sh
+            post-script: packaging/post_build_script.sh
+            smoke-test-script: packaging/smoke_test_script.sh
+    uses: ./.github/workflows/linux-test.yml
+    with:
+      job-name: tests-py-dynamo-distributed
+      repository: "pytorch/tensorrt"
+      ref: ""
+      test-infra-repository: pytorch/test-infra
+      test-infra-ref: main
+      build-matrix: ${{ needs.filter-matrix.outputs.matrix }}
+      pre-script: ${{ matrix.pre-script }}
+      script: |
+        set -euo pipefail
+        export USE_HOST_DEPS=1
+        export CI_BUILD=1
+        export USE_TRTLLM_PLUGINS=1
+        dnf install -y mpich mpich-devel openmpi openmpi-devel
+        pushd .
+        cd tests/py
+        cd dynamo
+        python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_distributed_test_results.xml distributed/test_nccl_ops.py
+        popd
+
+
 concurrency:
   group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }}
   cancel-in-progress: true
diff --git a/py/torch_tensorrt/dynamo/utils.py b/py/torch_tensorrt/dynamo/utils.py
@@ -5,6 +5,7 @@
 import getpass
 import logging
 import os
+import platform
 import tempfile
 import urllib.request
 import warnings
@@ -29,7 +30,7 @@
 from torch._subclasses.fake_tensor import FakeTensor
 from torch.fx.experimental.proxy_tensor import unset_fake_temporarily
 from torch_tensorrt._Device import Device
-from torch_tensorrt._enums import Platform, dtype
+from torch_tensorrt._enums import dtype
 from torch_tensorrt._features import ENABLED_FEATURES
 from torch_tensorrt._Input import Input
 from torch_tensorrt._version import __tensorrt_llm_version__
@@ -101,37 +102,6 @@ class Frameworks(Enum):
     }
 
 
-def unified_dtype_converter(
-    dtype: Union[TRTDataType, torch.dtype, np.dtype], to: Frameworks
-) -> Union[np.dtype, torch.dtype, TRTDataType]:
-    """
-    Convert TensorRT, Numpy, or Torch data types to any other of those data types.
-
-    Args:
-        dtype (TRTDataType, torch.dtype, np.dtype): A TensorRT, Numpy, or Torch data type.
-        to (Frameworks): The framework to convert the data type to.
-
-    Returns:
-        The equivalent data type in the requested framework.
-    """
-    assert to in Frameworks, f"Expected valid Framework for translation, got {to}"
-    trt_major_version = int(trt.__version__.split(".")[0])
-    if dtype in (np.int8, torch.int8, trt.int8):
-        return DataTypeEquivalence[trt.int8][to]
-    elif trt_major_version >= 7 and dtype in (np.bool_, torch.bool, trt.bool):
-        return DataTypeEquivalence[trt.bool][to]
-    elif dtype in (np.int32, torch.int32, trt.int32):
-        return DataTypeEquivalence[trt.int32][to]
-    elif dtype in (np.int64, torch.int64, trt.int64):
-        return DataTypeEquivalence[trt.int64][to]
-    elif dtype in (np.float16, torch.float16, trt.float16):
-        return DataTypeEquivalence[trt.float16][to]
-    elif dtype in (np.float32, torch.float32, trt.float32):
-        return DataTypeEquivalence[trt.float32][to]
-    else:
-        raise TypeError("%s is not a supported dtype" % dtype)
-
-
 def deallocate_module(module: torch.fx.GraphModule, delete_module: bool = True) -> None:
     """
     This is a helper function to delete the instance of module. We first move it to CPU and then
@@ -870,29 +840,33 @@ def is_tegra_platform() -> bool:
     return False
 
 
-def is_platform_supported_for_trtllm(platform: str) -> bool:
+def is_platform_supported_for_trtllm() -> bool:
     """
-    Checks if the current platform supports TensorRT-LLM plugins for NCCL backend
+    Checks if the current platform supports TensorRT-LLM plugins for the NCCL backend.
+
     Returns:
-        bool: True if the platform supports TensorRT-LLM plugins for NCCL backend, False otherwise.
-    Note:
-        TensorRT-LLM plugins for NCCL backend are not supported on:
-        - Windows platforms
-        - Orin, Xavier, or Tegra devices (aarch64 architecture)
+        bool: True if supported, False otherwise.
 
+    Unsupported:
+        - Windows platforms
+        - Jetson/Orin/Xavier (aarch64 architecture + 'tegra' in platform release)
     """
-    if "windows" in platform:
+    system = platform.system().lower()
+    machine = platform.machine().lower()
+    release = platform.release().lower()
+
+    if "windows" in system:
         logger.info(
-            "TensorRT-LLM plugins for NCCL backend are not supported on Windows"
+            "TensorRT-LLM plugins for NCCL backend are not supported on Windows."
         )
         return False
-    if torch.cuda.is_available():
-        device_name = torch.cuda.get_device_name().lower()
-        if any(keyword in device_name for keyword in ["orin", "xavier", "tegra"]):
-            return False
+
+    if machine == "aarch64" and "tegra" in release:
         logger.info(
-            "TensorRT-LLM plugins for NCCL backend are not supported on Jetson devices"
+            "TensorRT-LLM plugins for NCCL backend are not supported on Jetson/Orin/Xavier (Tegra) devices."
         )
+        return False
+
     return True
 
 
@@ -905,7 +879,7 @@ def _extracted_dir_trtllm(platform: str) -> Path:
     return _cache_root() / "trtllm" / f"{__tensorrt_llm_version__}_{platform}"
 
 
-def download_and_get_plugin_lib_path(platform: str) -> Optional[str]:
+def download_and_get_plugin_lib_path() -> Optional[str]:
     """
     Returns the path to the TensorRT‑LLM shared library, downloading and extracting if necessary.
 
@@ -919,12 +893,13 @@ def download_and_get_plugin_lib_path(platform: str) -> Optional[str]:
         f"tensorrt_llm-{__tensorrt_llm_version__}-{_WHL_CPYTHON_VERSION}-"
         f"{_WHL_CPYTHON_VERSION}-{platform}.whl"
     )
+    platform_system = platform.system().lower()
     wheel_path = _cache_root() / wheel_filename
-    extract_dir = _extracted_dir_trtllm(platform)
+    extract_dir = _extracted_dir_trtllm(platform_system)
     # else will never be met though
     lib_filename = (
         "libnvinfer_plugin_tensorrt_llm.so"
-        if "linux" in platform
+        if "linux" in platform_system
         else "libnvinfer_plugin_tensorrt_llm.dll"
     )
     # eg: /tmp/torch_tensorrt_<username>/trtllm/0.17.0.post1_linux_x86_64/tensorrt_llm/libs/libnvinfer_plugin_tensorrt_llm.so
@@ -1057,10 +1032,7 @@ def load_tensorrt_llm_for_nccl() -> bool:
     Returns:
         bool: True if the plugin was successfully loaded and initialized, False otherwise.
     """
-    # Check platform compatibility first
-    platform = Platform.current_platform()
-    platform = str(platform).lower()
-    if not is_platform_supported_for_trtllm(platform):
+    if not is_platform_supported_for_trtllm():
         return False
     plugin_lib_path = os.environ.get("TRTLLM_PLUGINS_PATH")
 
@@ -1080,6 +1052,6 @@ def load_tensorrt_llm_for_nccl() -> bool:
             )
             return False
 
-        plugin_lib_path = download_and_get_plugin_lib_path(platform)
+        plugin_lib_path = download_and_get_plugin_lib_path()
         return load_and_initialize_trtllm_plugin(plugin_lib_path)  # type: ignore[arg-type]
     return False
diff --git a/tests/py/dynamo/distributed/test_nccl_ops.py b/tests/py/dynamo/distributed/test_nccl_ops.py
@@ -8,7 +8,6 @@
 from distributed_utils import set_environment_variables_pytest
 from parameterized import parameterized
 from torch.testing._internal.common_utils import run_tests
-from torch_tensorrt._enums import Platform
 from torch_tensorrt.dynamo.utils import is_platform_supported_for_trtllm
 
 
@@ -42,12 +41,9 @@ def forward(self, x):
         return torch.ops._c10d_functional.wait_tensor(out)
 
 
-platform_str = str(Platform.current_platform()).lower()
-
-
 class TestNcclOpsConverter(DispatchTestCase):
     @unittest.skipIf(
-        not is_platform_supported_for_trtllm(platform_str),
+        not is_platform_supported_for_trtllm(),
         "Skipped on Windows, Jetson: NCCL backend is not supported.",
     )
     @classmethod