From e1a173196ae201a66a6fe0c3b8b7d43218e04a9a Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Wed, 6 Aug 2025 11:31:35 -0700 Subject: [PATCH 01/12] initial check in for tensorrt rtx --- .bazelrc | 2 + .github/scripts/install-tensorrt-rtx.sh | 39 ++ .github/scripts/install-torch-tensorrt.sh | 12 + .../build-test-linux-aarch64-jetpack.yml | 4 +- .../workflows/build-test-linux-aarch64.yml | 4 +- .github/workflows/build-test-linux-x86_64.yml | 9 +- .../workflows/build-test-linux-x86_64_rtx.yml | 360 ++++++++++++++++ .github/workflows/build-test-windows.yml | 2 +- .github/workflows/build-test-windows_rtx.yml | 341 +++++++++++++++ ...nux_aarch64.yml => build_wheels_linux.yml} | 66 +-- .github/workflows/build_wheels_windows.yml | 394 ++++++++++++++++++ .github/workflows/linux-test.yml | 9 +- .github/workflows/windows-test.yml | 7 +- BUILD.bazel | 13 + MODULE.bazel | 17 + core/BUILD | 38 +- core/conversion/BUILD | 38 +- core/conversion/conversion.cpp | 2 +- core/conversion/conversionctx/BUILD | 38 +- .../conversionctx/ConversionCtx.cpp | 14 +- core/conversion/conversionctx/ConversionCtx.h | 2 + core/conversion/converters/BUILD | 76 +++- .../converters/impl/quantization.cpp | 11 +- core/conversion/evaluators/BUILD | 38 +- core/conversion/tensorcontainer/BUILD | 38 +- core/conversion/var/BUILD | 38 +- core/ir/BUILD | 38 +- core/lowering/BUILD | 38 +- core/lowering/passes/BUILD | 13 +- core/partitioning/BUILD | 38 +- core/partitioning/partitioningctx/BUILD | 38 +- core/partitioning/partitioninginfo/BUILD | 38 +- core/partitioning/segmentedblock/BUILD | 38 +- core/plugins/BUILD | 74 +++- core/runtime/BUILD | 38 +- core/util/BUILD | 48 ++- core/util/logging/BUILD | 35 +- core/util/trt_util.h | 2 +- cpp/BUILD | 39 +- cpp/CMakeLists.txt | 2 - cpp/bin/torchtrtc/BUILD | 31 +- cpp/bin/torchtrtc/fileio.h | 2 + cpp/bin/torchtrtc/main.cpp | 10 +- cpp/bin/torchtrtc/parser_util.h | 2 + cpp/include/torch_tensorrt/torch_tensorrt.h | 4 +- cpp/src/compile_spec.cpp | 3 +- dev_dep_versions.yml | 1 + docsrc/getting_started/tensorrt_rtx.rst | 65 +++ packaging/pre_build_script.sh | 7 + packaging/pre_build_script_windows.sh | 7 + packaging/smoke_test_windows.py | 5 +- py/torch_tensorrt/__init__.py | 81 +--- py/torch_tensorrt/_utils.py | 32 ++ .../csrc/register_tensorrt_classes.cpp | 2 + py/torch_tensorrt/csrc/tensorrt_classes.cpp | 3 +- py/torch_tensorrt/csrc/tensorrt_classes.h | 10 +- py/torch_tensorrt/csrc/torch_tensorrt_py.cpp | 8 +- .../dynamo/conversion/_TRTInterpreter.py | 28 +- .../dynamo/conversion/aten_ops_converters.py | 41 +- .../conversion/impl/normalization/ops.py | 5 +- .../dynamo/conversion/impl/quantize.py | 12 + .../dynamo/conversion/impl/unsqueeze.py | 20 +- .../runtime/_PythonTorchTensorRTModule.py | 3 +- py/torch_tensorrt/dynamo/utils.py | 3 +- .../fx/converters/acc_ops_converters.py | 13 +- py/torch_tensorrt/fx/fx2trt.py | 8 +- .../test/converters/acc_op/test_dequantize.py | 4 +- .../fx/test/converters/acc_op/test_pad.py | 4 +- .../acc_op/test_quantize_per_tensor.py | 4 +- .../converters/aten_op/test_reshape_aten.py | 7 +- py/torch_tensorrt/fx/tools/common_fx2trt.py | 3 +- py/torch_tensorrt/fx/utils.py | 10 +- py/torch_tensorrt/trt_alias.py | 159 +++++++ pyproject_rtx.toml.temp | 358 ++++++++++++++++ setup.py | 61 ++- tests/py/core/test_classes.py | 4 +- .../py/dynamo/conversion/test_nonzero_aten.py | 19 + tests/py/dynamo/models/test_models_export.py | 7 + .../runtime/test_000_compilation_settings.py | 3 +- tests/util/BUILD | 24 +- third_party/tensorrt_rtx/archive/BUILD | 68 +++ third_party/tensorrt_rtx/local/BUILD | 80 ++++ toolchains/ci_workspaces/MODULE.bazel.tmpl | 17 + toolchains/dep_collection/defs.bzl | 2 +- tools/debug/engine_visualization/__init__.py | 0 85 files changed, 2969 insertions(+), 362 deletions(-) create mode 100644 .github/scripts/install-tensorrt-rtx.sh create mode 100644 .github/workflows/build-test-linux-x86_64_rtx.yml create mode 100644 .github/workflows/build-test-windows_rtx.yml rename .github/workflows/{build_wheels_linux_aarch64.yml => build_wheels_linux.yml} (86%) create mode 100644 .github/workflows/build_wheels_windows.yml create mode 100644 docsrc/getting_started/tensorrt_rtx.rst create mode 100644 py/torch_tensorrt/trt_alias.py create mode 100644 pyproject_rtx.toml.temp create mode 100644 third_party/tensorrt_rtx/archive/BUILD create mode 100644 third_party/tensorrt_rtx/local/BUILD create mode 100644 tools/debug/engine_visualization/__init__.py diff --git a/.bazelrc b/.bazelrc index 03aa1d718e..8422bb9cf7 100644 --- a/.bazelrc +++ b/.bazelrc @@ -39,6 +39,8 @@ build:cxx11_abi --define=abi=cxx11_abi build:jetpack --//toolchains/dep_collection:compute_libs=jetpack +build:rtx --//toolchains/dep_collection:compute_libs=rtx + build:ci_testing --define=torchtrt_src=prebuilt --cxxopt="-DDISABLE_TEST_IN_CI" --action_env "NVIDIA_TF32_OVERRIDE=0" build:use_precompiled_torchtrt --define=torchtrt_src=prebuilt diff --git a/.github/scripts/install-tensorrt-rtx.sh b/.github/scripts/install-tensorrt-rtx.sh new file mode 100644 index 0000000000..bb44681607 --- /dev/null +++ b/.github/scripts/install-tensorrt-rtx.sh @@ -0,0 +1,39 @@ + +install_tensorrt_rtx() { + if [[ ${USE_RTX} == true ]]; then + install_wheel_or_not=${1:-false} + echo "It is the tensorrt-rtx build, install tensorrt-rtx with install_wheel_or_not:${install_wheel_or_not}" + PLATFORM=$(python -c "import sys; print(sys.platform)") + echo "PLATFORM: $PLATFORM" + # PYTHON_VERSION is always set in the CI environment, add this check for local testing + if [ -z "$PYTHON_VERSION" ]; then + echo "Error: PYTHON_VERSION environment variable is not set or empty. example format: export PYTHON_VERSION=3.11" + exit 1 + fi + + # python version is like 3.11, we need to convert it to cp311 + CPYTHON_TAG="cp${PYTHON_VERSION//./}" + if [[ ${PLATFORM} == win32 ]]; then + curl -L https://developer.nvidia.com/downloads/trt/rtx_sdk/secure/1.0/TensorRT-RTX-1.0.0.21.Windows.win10.cuda-12.9.zip -o TensorRT-RTX-1.0.0.21.Windows.win10.cuda-12.9.zip + unzip TensorRT-RTX-1.0.0.21.Windows.win10.cuda-12.9.zip + rtx_lib_dir=${PWD}/TensorRT-RTX-1.0.0.21/lib + export LD_LIBRARY_PATH=${rtx_lib_dir}:$LD_LIBRARY_PATH + echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH" + if [[ ${install_wheel_or_not} == true ]]; then + pip install TensorRT-RTX-1.0.0.21/python/tensorrt_rtx-1.0.0.21-${CPYTHON_TAG}-none-win_amd64.whl + fi + else + curl -L https://developer.nvidia.com/downloads/trt/rtx_sdk/secure/1.0/TensorRT-RTX-1.0.0.21.Linux.x86_64-gnu.cuda-12.9.tar.gz -o TensorRT-RTX-1.0.0.21.Linux.x86_64-gnu.cuda-12.9.tar.gz + tar -xzf TensorRT-RTX-1.0.0.21.Linux.x86_64-gnu.cuda-12.9.tar.gz + rtx_lib_dir=${PWD}/TensorRT-RTX-1.0.0.21/lib + export LD_LIBRARY_PATH=${rtx_lib_dir}:$LD_LIBRARY_PATH + echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH" + if [[ ${install_wheel_or_not} == true ]]; then + pip install TensorRT-RTX-1.0.0.21/python/tensorrt_rtx-1.0.0.21-${CPYTHON_TAG}-none-linux_x86_64.whl + fi + fi + else + echo "It is the standard tensorrt build, skip install tensorrt-rtx" + fi + +} \ No newline at end of file diff --git a/.github/scripts/install-torch-tensorrt.sh b/.github/scripts/install-torch-tensorrt.sh index 94de5f022a..49a367b832 100755 --- a/.github/scripts/install-torch-tensorrt.sh +++ b/.github/scripts/install-torch-tensorrt.sh @@ -21,6 +21,12 @@ pip uninstall -y torch torchvision pip install --force-reinstall --pre ${TORCHVISION} --index-url ${INDEX_URL} pip install --force-reinstall --pre ${TORCH} --index-url ${INDEX_URL} +if [[ ${USE_RTX} == true ]]; then + source .github/scripts/install-tensorrt-rtx.sh + # tensorrt-rtx is not publicly available, so we need to install the wheel from the tar ball + install_wheel_or_not=true + install_tensorrt_rtx ${install_wheel_or_not} +fi # Install Torch-TensorRT if [[ ${PLATFORM} == win32 ]]; then @@ -29,4 +35,10 @@ else pip install /opt/torch-tensorrt-builds/torch_tensorrt*.whl fi +if [[ ${USE_RTX} == true ]]; then + # currently tensorrt is installed automatically by install torch-tensorrt since it is a dependency of torch-tensorrt in pyproject.toml + # so we need to uninstall it to avoid conflict + pip uninstall -y tensorrt tensorrt_cu12 tensorrt_cu12_bindings tensorrt_cu12_libs +fi + echo -e "Running test script"; diff --git a/.github/workflows/build-test-linux-aarch64-jetpack.yml b/.github/workflows/build-test-linux-aarch64-jetpack.yml index e4e4fbfc30..33c3612d84 100644 --- a/.github/workflows/build-test-linux-aarch64-jetpack.yml +++ b/.github/workflows/build-test-linux-aarch64-jetpack.yml @@ -66,8 +66,8 @@ jobs: post-script: packaging/post_build_script.sh smoke-test-script: packaging/smoke_test_script.sh package-name: torch_tensorrt - name: Build torch-tensorrt whl package - uses: ./.github/workflows/build_wheels_linux_aarch64.yml + name: Build torch-tensorrt whl package for aarch64-jetpack + uses: ./.github/workflows/build_wheels_linux.yml with: repository: ${{ matrix.repository }} ref: "" diff --git a/.github/workflows/build-test-linux-aarch64.yml b/.github/workflows/build-test-linux-aarch64.yml index 1f83a51287..34b3e4fa34 100644 --- a/.github/workflows/build-test-linux-aarch64.yml +++ b/.github/workflows/build-test-linux-aarch64.yml @@ -62,8 +62,8 @@ jobs: post-script: packaging/post_build_script.sh smoke-test-script: packaging/smoke_test_script.sh package-name: torch_tensorrt - name: Build torch-tensorrt whl package - uses: ./.github/workflows/build_wheels_linux_aarch64.yml + name: Build torch-tensorrt whl package for aarch64 + uses: ./.github/workflows/build_wheels_linux.yml with: repository: ${{ matrix.repository }} ref: "" diff --git a/.github/workflows/build-test-linux-x86_64.yml b/.github/workflows/build-test-linux-x86_64.yml index 51f3730d02..4b18ef559d 100644 --- a/.github/workflows/build-test-linux-x86_64.yml +++ b/.github/workflows/build-test-linux-x86_64.yml @@ -13,6 +13,7 @@ on: - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+ workflow_dispatch: + jobs: generate-matrix: uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main @@ -60,8 +61,8 @@ jobs: post-script: packaging/post_build_script.sh smoke-test-script: packaging/smoke_test_script.sh package-name: torch_tensorrt - name: Build torch-tensorrt whl package - uses: pytorch/test-infra/.github/workflows/build_wheels_linux.yml@main + name: Build torch-tensorrt whl package for x86_64 + uses: ./.github/workflows/build_wheels_linux.yml with: repository: ${{ matrix.repository }} ref: "" @@ -74,6 +75,8 @@ jobs: package-name: ${{ matrix.package-name }} smoke-test-script: ${{ matrix.smoke-test-script }} trigger-event: ${{ github.event_name }} + architecture: "x86_64" + use-rtx: false tests-py-torchscript-fe: name: Test torchscript frontend [Python] @@ -338,5 +341,5 @@ jobs: popd concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }} + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-tensorrt-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }} cancel-in-progress: true diff --git a/.github/workflows/build-test-linux-x86_64_rtx.yml b/.github/workflows/build-test-linux-x86_64_rtx.yml new file mode 100644 index 0000000000..ab7c1ec9f2 --- /dev/null +++ b/.github/workflows/build-test-linux-x86_64_rtx.yml @@ -0,0 +1,360 @@ +name: Build and test Linux x86_64 wheels(RTX) + +on: + pull_request: + push: + branches: + - main + - nightly + - release/* + tags: + # NOTE: Binary build pipelines should only get triggered on release candidate builds + # Release candidate tags look like: v1.11.0-rc1 + - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+ + workflow_dispatch: + +jobs: + generate-matrix: + uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main + with: + package-type: wheel + os: linux + test-infra-repository: pytorch/test-infra + test-infra-ref: main + with-rocm: false + with-cpu: false + + filter-matrix: + needs: [generate-matrix] + outputs: + matrix: ${{ steps.generate.outputs.matrix }} + runs-on: ubuntu-latest + steps: + - uses: actions/setup-python@v5 + with: + python-version: '3.11' + - uses: actions/checkout@v4 + with: + repository: pytorch/tensorrt + - name: Generate matrix + id: generate + run: | + set -eou pipefail + MATRIX_BLOB=${{ toJSON(needs.generate-matrix.outputs.matrix) }} + MATRIX_BLOB="$(python3 .github/scripts/filter-matrix.py --matrix "${MATRIX_BLOB}")" + echo "${MATRIX_BLOB}" + echo "matrix=${MATRIX_BLOB}" >> "${GITHUB_OUTPUT}" + + build: + needs: filter-matrix + permissions: + id-token: write + contents: read + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + pre-script: packaging/pre_build_script.sh + env-var-script: packaging/env_vars.txt + post-script: packaging/post_build_script.sh + smoke-test-script: packaging/smoke_test_script.sh + package-name: torch_tensorrt + name: Build torch-tensorrt-rtx whl package for x86_64 + uses: ./.github/workflows/build_wheels_linux.yml + with: + repository: ${{ matrix.repository }} + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.filter-matrix.outputs.matrix }} + pre-script: ${{ matrix.pre-script }} + env-var-script: ${{ matrix.env-var-script }} + post-script: ${{ matrix.post-script }} + package-name: ${{ matrix.package-name }} + smoke-test-script: ${{ matrix.smoke-test-script }} + trigger-event: ${{ github.event_name }} + architecture: "x86_64" + use-rtx: true + + tests-py-torchscript-fe: + name: Test torchscript frontend [Python] + needs: [filter-matrix, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + pre-script: packaging/pre_build_script.sh + post-script: packaging/post_build_script.sh + smoke-test-script: packaging/smoke_test_script.sh + uses: ./.github/workflows/linux-test.yml + with: + job-name: tests-py-torchscript-fe + repository: "pytorch/tensorrt" + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.filter-matrix.outputs.matrix }} + pre-script: ${{ matrix.pre-script }} + use-rtx: true + script: | + set -euo pipefail + export USE_HOST_DEPS=1 + export CI_BUILD=1 + export LD_LIBRARY_PATH=/usr/lib64:$LD_LIBRARY_PATH + export FORCE_TENSORRT_RTX=1 + pushd . + cd tests/modules + python hub.py + popd + pushd . + cd tests/py/ts + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_api_test_results.xml api/ + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_models_test_results.xml models/ + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_integrations_test_results.xml integrations/ + popd + + tests-py-dynamo-converters: + name: Test dynamo converters [Python] + needs: [filter-matrix, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + pre-script: packaging/pre_build_script.sh + post-script: packaging/post_build_script.sh + smoke-test-script: packaging/smoke_test_script.sh + uses: ./.github/workflows/linux-test.yml + with: + job-name: tests-py-dynamo-converters + repository: "pytorch/tensorrt" + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.filter-matrix.outputs.matrix }} + pre-script: ${{ matrix.pre-script }} + use-rtx: true + script: | + set -euo pipefail + export USE_HOST_DEPS=1 + export CI_BUILD=1 + pushd . + cd tests/py/dynamo + export FORCE_TENSORRT_RTX=1 + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 4 conversion/ + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml automatic_plugin/test_automatic_plugin.py + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml automatic_plugin/test_automatic_plugin_with_attrs.py + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml automatic_plugin/test_flashinfer_rmsnorm.py + popd + + tests-py-dynamo-fe: + name: Test dynamo frontend [Python] + needs: [filter-matrix, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + pre-script: packaging/pre_build_script.sh + post-script: packaging/post_build_script.sh + smoke-test-script: packaging/smoke_test_script.sh + uses: ./.github/workflows/linux-test.yml + with: + job-name: tests-py-dynamo-fe + repository: "pytorch/tensorrt" + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.filter-matrix.outputs.matrix }} + pre-script: ${{ matrix.pre-script }} + use-rtx: true + script: | + set -euo pipefail + export USE_HOST_DEPS=1 + export CI_BUILD=1 + export FORCE_TENSORRT_RTX=1 + pushd . + cd tests/py + cd dynamo + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dyn_models_export.xml --ir dynamo models/ + popd + + tests-py-dynamo-serde: + name: Test dynamo export serde [Python] + needs: [filter-matrix, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + pre-script: packaging/pre_build_script.sh + post-script: packaging/post_build_script.sh + smoke-test-script: packaging/smoke_test_script.sh + uses: ./.github/workflows/linux-test.yml + with: + job-name: tests-py-dynamo-serde + repository: "pytorch/tensorrt" + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.filter-matrix.outputs.matrix }} + pre-script: ${{ matrix.pre-script }} + use-rtx: true + script: | + set -euo pipefail + export USE_HOST_DEPS=1 + export CI_BUILD=1 + export FORCE_TENSORRT_RTX=1 + pushd . + cd tests/py + cd dynamo + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/export_serde_test_results.xml --ir dynamo models/test_export_serde.py + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/reexport_test_results.xml --ir dynamo models/test_reexport.py + popd + + tests-py-torch-compile-be: + name: Test torch compile backend [Python] + needs: [filter-matrix, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + pre-script: packaging/pre_build_script.sh + post-script: packaging/post_build_script.sh + smoke-test-script: packaging/smoke_test_script.sh + uses: ./.github/workflows/linux-test.yml + with: + job-name: tests-py-torch-compile-be + repository: "pytorch/tensorrt" + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.filter-matrix.outputs.matrix }} + pre-script: ${{ matrix.pre-script }} + use-rtx: true + script: | + set -euo pipefail + export USE_HOST_DEPS=1 + export CI_BUILD=1 + export FORCE_TENSORRT_RTX=1 + pushd . + cd tests/py + cd dynamo + python -m pytest -ra -n 10 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_be_test_results.xml backend/ + python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_complete_be_e2e_test_results.xml --ir torch_compile models/test_models.py + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_dyn_models_export.xml --ir torch_compile models/test_dyn_models.py + popd + + tests-py-dynamo-core: + name: Test dynamo core [Python] + needs: [filter-matrix, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + pre-script: packaging/pre_build_script.sh + post-script: packaging/post_build_script.sh + smoke-test-script: packaging/smoke_test_script.sh + uses: ./.github/workflows/linux-test.yml + with: + job-name: tests-py-dynamo-core + repository: "pytorch/tensorrt" + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.filter-matrix.outputs.matrix }} + pre-script: ${{ matrix.pre-script }} + use-rtx: true + script: | + set -euo pipefail + export USE_HOST_DEPS=1 + export CI_BUILD=1 + export FORCE_TENSORRT_RTX=1 + pushd . + cd tests/py + cd dynamo + python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_test_results.xml --ignore runtime/test_002_cudagraphs_py.py --ignore runtime/test_002_cudagraphs_cpp.py runtime/ + python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_partitioning_test_results.xml partitioning/ + python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_lowering_test_results.xml lowering/ + popd + + tests-py-dynamo-cudagraphs: + name: Test dynamo cudagraphs [Python] + needs: [filter-matrix, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + pre-script: packaging/pre_build_script.sh + post-script: packaging/post_build_script.sh + smoke-test-script: packaging/smoke_test_script.sh + uses: ./.github/workflows/linux-test.yml + with: + job-name: tests-py-dynamo-cudagraphs + repository: "pytorch/tensorrt" + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.filter-matrix.outputs.matrix }} + pre-script: ${{ matrix.pre-script }} + use-rtx: true + script: | + set -euo pipefail + export USE_HOST_DEPS=1 + export CI_BUILD=1 + export FORCE_TENSORRT_RTX=1 + pushd . + cd tests/py + cd dynamo + nvidia-smi + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_cudagraphs_cpp_test_results.xml runtime/test_002_cudagraphs_cpp.py || true + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_cudagraphs_py_test_results.xml runtime/test_002_cudagraphs_py.py || true + popd + + tests-py-core: + name: Test core [Python] + needs: [filter-matrix, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + pre-script: packaging/pre_build_script.sh + post-script: packaging/post_build_script.sh + smoke-test-script: packaging/smoke_test_script.sh + uses: ./.github/workflows/linux-test.yml + with: + job-name: tests-py-core + repository: "pytorch/tensorrt" + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.filter-matrix.outputs.matrix }} + pre-script: ${{ matrix.pre-script }} + use-rtx: true + script: | + set -euo pipefail + export USE_HOST_DEPS=1 + export CI_BUILD=1 + export FORCE_TENSORRT_RTX=1 + pushd . + cd tests/py/core + python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_core_test_results.xml . + popd + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-tensorrt-rtx-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }} + cancel-in-progress: true diff --git a/.github/workflows/build-test-windows.yml b/.github/workflows/build-test-windows.yml index ad8ae83846..2d402a8799 100644 --- a/.github/workflows/build-test-windows.yml +++ b/.github/workflows/build-test-windows.yml @@ -71,7 +71,7 @@ jobs: smoke-test-script: packaging/smoke_test_windows.py package-name: torch_tensorrt name: Build torch-tensorrt whl package - uses: pytorch/test-infra/.github/workflows/build_wheels_windows.yml@main + uses: ./.github/workflows/build_wheels_windows.yml with: repository: ${{ matrix.repository }} ref: "" diff --git a/.github/workflows/build-test-windows_rtx.yml b/.github/workflows/build-test-windows_rtx.yml new file mode 100644 index 0000000000..31078c39b7 --- /dev/null +++ b/.github/workflows/build-test-windows_rtx.yml @@ -0,0 +1,341 @@ +name: Build and test Windows wheels(RTX) + +on: + pull_request: + push: + branches: + - main + - nightly + - release/* + tags: + # NOTE: Binary build pipelines should only get triggered on release candidate builds + # Release candidate tags look like: v1.11.0-rc1 + - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+ + workflow_dispatch: + +jobs: + generate-matrix: + uses: pytorch/test-infra/.github/workflows/generate_binary_build_matrix.yml@main + with: + package-type: wheel + os: windows + test-infra-repository: pytorch/test-infra + test-infra-ref: main + with-rocm: false + with-cpu: false + + filter-matrix: + needs: [generate-matrix] + outputs: + matrix: ${{ steps.generate.outputs.matrix }} + runs-on: ubuntu-latest + steps: + - uses: actions/setup-python@v5 + with: + python-version: '3.11' + - uses: actions/checkout@v4 + with: + repository: pytorch/tensorrt + - name: Generate matrix + id: generate + run: | + set -eou pipefail + MATRIX_BLOB=${{ toJSON(needs.generate-matrix.outputs.matrix) }} + MATRIX_BLOB="$(python3 .github/scripts/filter-matrix.py --matrix "${MATRIX_BLOB}")" + echo "${MATRIX_BLOB}" + echo "matrix=${MATRIX_BLOB}" >> "${GITHUB_OUTPUT}" + + substitute-runner: + needs: filter-matrix + outputs: + matrix: ${{ steps.substitute.outputs.matrix }} + runs-on: ubuntu-latest + steps: + - name: Substitute runner + id: substitute + run: | + echo matrix="$(echo '${{ needs.filter-matrix.outputs.matrix }}' | sed -e 's/windows.g4dn.xlarge/windows.g5.4xlarge.nvidia.gpu/g')" >> ${GITHUB_OUTPUT} + + build: + needs: substitute-runner + permissions: + id-token: write + contents: read + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + pre-script: packaging/pre_build_script_windows.sh + env-script: packaging/vc_env_helper.bat + smoke-test-script: packaging/smoke_test_windows.py + package-name: torch_tensorrt + name: Build torch-tensorrt-rtx whl package + uses: ./.github/workflows/build_wheels_windows.yml + with: + repository: ${{ matrix.repository }} + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.substitute-runner.outputs.matrix }} + pre-script: ${{ matrix.pre-script }} + env-script: ${{ matrix.env-script }} + smoke-test-script: ${{ matrix.smoke-test-script }} + package-name: ${{ matrix.package-name }} + trigger-event: ${{ github.event_name }} + wheel-build-params: "--use-rtx" + use-rtx: true + timeout: 120 + + tests-py-torchscript-fe: + name: Test torchscript frontend [Python] + needs: [substitute-runner, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + uses: ./.github/workflows/windows-test.yml + with: + job-name: tests-py-torchscript-fe + repository: ${{ matrix.repository }} + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.substitute-runner.outputs.matrix }} + pre-script: packaging/driver_upgrade.bat + use-rtx: true + script: | + set -euo pipefail + export USE_HOST_DEPS=1 + export CI_BUILD=1 + export FORCE_TENSORRT_RTX=1 + pushd . + cd tests/modules + python hub.py + popd + pushd . + cd tests/py/ts + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_api_test_results.xml api/ + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_models_test_results.xml models/ + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/ts_integrations_test_results.xml integrations/ + popd + + tests-py-dynamo-converters: + name: Test dynamo converters [Python] + needs: [substitute-runner, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + uses: ./.github/workflows/windows-test.yml + with: + job-name: tests-py-dynamo-converters + repository: ${{ matrix.repository }} + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.substitute-runner.outputs.matrix }} + pre-script: packaging/driver_upgrade.bat + use-rtx: true + script: | + set -euo pipefail + export USE_HOST_DEPS=1 + export CI_BUILD=1 + pushd . + cd tests/py + cd dynamo + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 4 conversion/ + popd + + tests-py-dynamo-fe: + name: Test dynamo frontend [Python] + needs: [substitute-runner, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + uses: ./.github/workflows/windows-test.yml + with: + job-name: tests-py-dynamo-fe + repository: ${{ matrix.repository }} + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.substitute-runner.outputs.matrix }} + pre-script: packaging/driver_upgrade.bat + use-rtx: true + script: | + set -euo pipefail + export USE_HOST_DEPS=1 + export CI_BUILD=1 + export FORCE_TENSORRT_RTX=1 + pushd . + cd tests/py + cd dynamo + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dyn_models_export.xml --ir dynamo models/ + popd + + tests-py-dynamo-serde: + name: Test dynamo export serde [Python] + needs: [substitute-runner, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + uses: ./.github/workflows/windows-test.yml + with: + job-name: tests-py-dynamo-serde + repository: ${{ matrix.repository }} + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.substitute-runner.outputs.matrix }} + pre-script: packaging/driver_upgrade.bat + use-rtx: true + script: | + set -euo pipefail + export USE_HOST_DEPS=1 + export CI_BUILD=1 + export FORCE_TENSORRT_RTX=1 + pushd . + cd tests/py + cd dynamo + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/export_serde_test_results.xml --ir dynamo models/test_export_serde.py + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/reexport_test_results.xml --ir dynamo models/test_reexport.py + popd + + tests-py-torch-compile-be: + name: Test torch compile backend [Python] + needs: [substitute-runner, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + uses: ./.github/workflows/windows-test.yml + with: + job-name: tests-py-torch-compile-be + repository: ${{ matrix.repository }} + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.substitute-runner.outputs.matrix }} + pre-script: packaging/driver_upgrade.bat + use-rtx: true + script: | + set -euo pipefail + export USE_HOST_DEPS=1 + export CI_BUILD=1 + export FORCE_TENSORRT_RTX=1 + pushd . + cd tests/py + cd dynamo + python -m pytest -ra -n 10 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_be_test_results.xml backend/ + python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_complete_be_e2e_test_results.xml --ir torch_compile models/test_models.py + ../../../packaging/vc_env_helper.bat python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/torch_compile_dyn_models_export.xml --ir torch_compile models/test_dyn_models.py + popd + + tests-py-dynamo-core: + name: Test dynamo core [Python] + needs: [substitute-runner, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + uses: ./.github/workflows/windows-test.yml + with: + job-name: tests-py-dynamo-core + repository: ${{ matrix.repository }} + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.substitute-runner.outputs.matrix }} + pre-script: packaging/driver_upgrade.bat + use-rtx: true + script: | + set -euo pipefail + export USE_HOST_DEPS=1 + export CI_BUILD=1 + export FORCE_TENSORRT_RTX=1 + pushd . + cd tests/py + cd dynamo + ../../../packaging/vc_env_helper.bat python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_test_results.xml --ignore runtime/test_002_cudagraphs_py.py --ignore runtime/test_002_cudagraphs_cpp.py runtime/ + python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_partitioning_test_results.xml partitioning/ + ../../../packaging/vc_env_helper.bat python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_lowering_test_results.xml lowering/ + popd + + tests-py-dynamo-cudagraphs: + name: Test dynamo cudagraphs [Python] + needs: [substitute-runner, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + uses: ./.github/workflows/windows-test.yml + with: + job-name: tests-py-dynamo-cudagraphs + repository: ${{ matrix.repository }} + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.substitute-runner.outputs.matrix }} + pre-script: packaging/driver_upgrade.bat + use-rtx: true + script: | + set -euo pipefail + export USE_HOST_DEPS=1 + export CI_BUILD=1 + export FORCE_TENSORRT_RTX=1 + pushd . + cd tests/py + cd dynamo + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_cudagraphs_cpp_test_results.xml runtime/test_002_cudagraphs_cpp.py + python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_dynamo_core_runtime_cudagraphs_py_test_results.xml runtime/test_002_cudagraphs_py.py + popd + + tests-py-core: + name: Test core [Python] + needs: [substitute-runner, build] + strategy: + fail-fast: false + matrix: + include: + - repository: pytorch/tensorrt + package-name: torch_tensorrt + uses: ./.github/workflows/windows-test.yml + with: + job-name: tests-py-core + repository: ${{ matrix.repository }} + ref: "" + test-infra-repository: pytorch/test-infra + test-infra-ref: main + build-matrix: ${{ needs.substitute-runner.outputs.matrix }} + pre-script: packaging/driver_upgrade.bat + use-rtx: true + script: | + set -euo pipefail + export USE_HOST_DEPS=1 + export CI_BUILD=1 + export FORCE_TENSORRT_RTX=1 + pushd . + cd tests/py/core + python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_core_test_results.xml . + popd + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-tensorrt-rtx-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }} + cancel-in-progress: true diff --git a/.github/workflows/build_wheels_linux_aarch64.yml b/.github/workflows/build_wheels_linux.yml similarity index 86% rename from .github/workflows/build_wheels_linux_aarch64.yml rename to .github/workflows/build_wheels_linux.yml index e2bfeb1540..ef0805cf73 100644 --- a/.github/workflows/build_wheels_linux_aarch64.yml +++ b/.github/workflows/build_wheels_linux.yml @@ -1,4 +1,6 @@ -name: Build Linux Wheels For aarch64 +# This workflow is used to build the wheels for the Linux platform +# it should be periodically synced with https://github.com/pytorch/test-infra/blob/main/.github/workflows/build_wheels_linux.yml +name: Build Linux Wheels on: workflow_call: @@ -88,11 +90,6 @@ on: required: false default: "python -m build --wheel" type: string - is-jetpack: - description: Set to true if the build is for jetpack - required: false - default: false - type: boolean pip-install-torch-extra-args: # NOTE: Why does this exist? # Well setuptools / python packaging doesn't actually allow you to specify dependencies @@ -110,11 +107,22 @@ on: description: 'Timeout for the job (in minutes)' default: 120 type: number + use-rtx: + description: "Set to true if use TensorRT-RTX" + default: false + type: boolean + required: false + is-jetpack: + description: Set to true if the build is for jetpack + required: false + default: false + type: boolean secrets: PYPI_API_TOKEN: description: An optional token to upload to pypi required: false + permissions: id-token: write contents: read @@ -133,7 +141,8 @@ jobs: UPLOAD_TO_BASE_BUCKET: ${{ matrix.upload_to_base_bucket }} ARCH: ${{ inputs.architecture }} BUILD_TARGET: ${{ inputs.build-target }} - name: build-wheel-${{ matrix.python_version }}-${{ matrix.desired_cuda }}-${{ matrix.gpu_arch_type }} + USE_RTX: ${{ inputs.use-rtx }} + name: build-wheel-${{ matrix.python_version }}-${{ matrix.desired_cuda }}-${{ matrix.gpu_arch_type }}-${{ inputs.architecture }}-${{ inputs.use-rtx }}-${{ inputs.is-jetpack }} runs-on: ${{ matrix.validation_runner }} environment: ${{(inputs.trigger-event == 'schedule' || (inputs.trigger-event == 'push' && (startsWith(github.event.ref, 'refs/heads/nightly') || startsWith(github.event.ref, 'refs/tags/v')))) && 'pytorchbot-env' || ''}} container: @@ -251,7 +260,6 @@ jobs: working-directory: ${{ inputs.repository }} shell: bash -l {0} run: | - #set -euxo pipefail set -x source "${BUILD_ENV_FILE}" export PYTORCH_VERSION="$(${CONDA_RUN} pip show torch | grep ^Version: | sed 's/Version: *//' | sed 's/+.\+//')" @@ -261,28 +269,37 @@ jobs: BUILD_VERSION="${BUILD_VERSION}+${CU_VERSION}" fi echo "BUILD_VERSION=$BUILD_VERSION" - if [[ ${{ inputs.is-jetpack }} == false ]]; then - ${CONDA_RUN} python setup.py bdist_wheel + echo "USE_RTX=$USE_RTX" + echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" + if [[ ${{ inputs.use-rtx }} == true ]]; then + echo "Building tensorrt-rtx wheel" + ${CONDA_RUN} python setup.py bdist_wheel --use-rtx else - ${CONDA_RUN} python setup.py bdist_wheel --jetpack --plat-name=linux_tegra_aarch64 + if [[ ${{ inputs.is-jetpack }} == true ]]; then + echo "Building tensorrt wheel for jetpack" + ${CONDA_RUN} python setup.py bdist_wheel --jetpack --plat-name=linux_tegra_aarch64 + else + echo "Building standard tensorrt wheel" + ${CONDA_RUN} python setup.py bdist_wheel + fi fi - name: Repair Manylinux_2_28 Wheel shell: bash -l {0} env: PACKAGE_NAME: ${{ inputs.package-name }} SMOKE_TEST_SCRIPT: ${{ inputs.smoke-test-script }} + if: ${{ inputs.architecture == 'x86_64' }} run: | set -euxo pipefail source "${BUILD_ENV_FILE}" - # for pkg in ${{ inputs.repository }}/dist/*-linux_*.whl; do - # # if the glob didn't match anything - # if [[ ! -e $pkg ]]; then - # continue - # fi - # abs_pkg=$(realpath $pkg) - # ./test-infra/.github/scripts/repair_manylinux_2_28.sh $abs_pkg - # done - echo "Repair Manylinux_2_28 Wheel is not supported for aarch64" + for pkg in ${{ inputs.repository }}/dist/*-linux_*.whl; do + # if the glob didn't match anything + if [[ ! -e $pkg ]]; then + continue + fi + abs_pkg=$(realpath $pkg) + ./test-infra/.github/scripts/repair_manylinux_2_28.sh $abs_pkg + done - name: Run Post-Script if: ${{ inputs.post-script != '' }} uses: ./test-infra/.github/actions/run-script-with-cache @@ -291,7 +308,6 @@ jobs: script: ${{ inputs.post-script }} - name: Smoke Test shell: bash -l {0} - if: ${{ inputs.is-jetpack == false }} env: PACKAGE_NAME: ${{ inputs.package-name }} SMOKE_TEST_SCRIPT: ${{ inputs.smoke-test-script }} @@ -336,8 +352,10 @@ jobs: upload: needs: build uses: pytorch/test-infra/.github/workflows/_binary_upload.yml@main - # for jetpack builds, only upload to pytorch index for nightly builds - if: ${{ inputs.is-jetpack == false || (github.event_name == 'push' && startsWith(github.event.ref, 'refs/heads/nightly')) }} + # if it is not the jetpack build or tensorrt-rtx build, upload to pytorch index, + # if it is the jetpack build only upload to pytorch_index for nightly builds + # for tensorrt-rtx build, do not upload to pytorch indexat all + if: ${{ (inputs.is-jetpack == false && inputs.use-rtx == false) || (inputs.is-jetpack == true && github.event_name == 'push' && startsWith(github.event.ref, 'refs/heads/nightly')) }} with: repository: ${{ inputs.repository }} ref: ${{ inputs.ref }} @@ -351,5 +369,5 @@ jobs: PYPI_API_TOKEN: ${{ secrets.PYPI_API_TOKEN }} concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }} + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{inputs.use-rtx}}-${{inputs.architecture}}-${{inputs.is-jetpack}}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }} cancel-in-progress: true diff --git a/.github/workflows/build_wheels_windows.yml b/.github/workflows/build_wheels_windows.yml new file mode 100644 index 0000000000..7af9dead8f --- /dev/null +++ b/.github/workflows/build_wheels_windows.yml @@ -0,0 +1,394 @@ +# This workflow is used to build the wheels for the Windows platform +# it should be periodically synced with https://github.com/pytorch/test-infra/blob/main/.github/workflows/build_wheels_windows.yml +name: Build Windows Wheels + +on: + workflow_call: + inputs: + repository: + description: 'Repository to checkout, defaults to ""' + default: "" + type: string + ref: + description: 'Reference to checkout, defaults to "nightly"' + default: "nightly" + type: string + test-infra-repository: + description: "Test infra repository to use" + default: "pytorch/test-infra" + type: string + test-infra-ref: + description: "Test infra reference to use" + default: "" + type: string + build-matrix: + description: "Build matrix to utilize" + default: "" + type: string + pre-script: + description: "Pre script to run prior to build" + default: "" + type: string + env-script: + description: "Script to setup environment variables for the build" + default: "" + type: string + wheel-build-params: + description: "Additional parameters for bdist_wheel" + default: "" + type: string + post-script: + description: "Post script to run prior to build" + default: "" + type: string + smoke-test-script: + description: "Script for Smoke Test for a specific domain" + default: "" + type: string + package-name: + description: "Name of the actual python package that is imported" + default: "" + type: string + build-platform: + description: Platform to build wheels, choose from 'python-build-package' or 'setup-py' + required: false + type: string + default: 'setup-py' + build-command: + description: The build command to use if build-platform is python-build-package + required: false + default: "python -m build --wheel" + type: string + trigger-event: + description: "Trigger Event in caller that determines whether or not to upload" + default: "" + type: string + cache-path: + description: "The path(s) on the runner to cache or restore. The path is relative to repository." + default: "" + type: string + cache-key: + description: "The key created when saving a cache and the key used to search for a cache." + default: "" + type: string + submodules: + description: "Works as stated in actions/checkout, but the default value is recursive" + required: false + type: string + default: recursive + timeout: + description: 'Timeout for the job (in minutes)' + default: 60 + type: number + use-rtx: + description: "Set to true if use TensorRT-RTX" + default: false + type: boolean + required: false + architecture: + description: 'CPU architecture to build for' + default: "x64" + type: string + +permissions: + id-token: write + contents: read + +jobs: + build: + strategy: + fail-fast: false + matrix: ${{ fromJSON(inputs.build-matrix) }} + env: + PYTHON_VERSION: ${{ matrix.python_version }} + PACKAGE_TYPE: wheel + REPOSITORY: ${{ inputs.repository }} + REF: ${{ inputs.ref }} + CU_VERSION: ${{ matrix.desired_cuda }} + UPLOAD_TO_BASE_BUCKET: ${{ matrix.upload_to_base_bucket }} + USE_RTX: ${{ inputs.use-rtx }} + name: build-${{ matrix.build_name }} + runs-on: ${{ matrix.validation_runner }} + defaults: + run: + shell: bash -l {0} + # If a build is taking longer than 60 minutes on these runners we need + # to have a conversation + timeout-minutes: ${{ inputs.timeout }} + steps: + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + # Support the use case where we need to checkout someone's fork + repository: ${{ inputs.test-infra-repository }} + ref: ${{ inputs.test-infra-ref }} + path: test-infra + - uses: ./test-infra/.github/actions/setup-ssh + name: Setup SSH + with: + github-secret: ${{ secrets.GITHUB_TOKEN }} + activate-with-label: false + instructions: "SSH with rdesktop using ssh -L 3389:localhost:3389 %%username%%@%%hostname%%" + - name: Add Conda scripts to GitHub path + if: inputs.architecture == 'x64' + run: | + echo "C:/Jenkins/Miniconda3/Scripts" >> $GITHUB_PATH + - name: Setup Git for Windows' minimal SDK + env: + DEPENDENCIES_DIR: c:\temp\dependencies\ + if: inputs.architecture == 'arm64' + uses: git-for-windows/setup-git-for-windows-sdk@v1 + with: + architecture: aarch64 + path: "${{env.DEPENDENCIES_DIR}}\\git" + - uses: ./test-infra/.github/actions/set-channel + - name: Set PYTORCH_VERSION + if: env.CHANNEL == 'test' + run: | + # When building RC, set the version to be the current candidate version, + # otherwise, leave it alone so nightly will pick up the latest + echo "PYTORCH_VERSION=${{ matrix.stable_version }}" >> "${GITHUB_ENV}" + - uses: ./test-infra/.github/actions/setup-binary-builds + if: inputs.architecture == 'x64' + with: + repository: ${{ inputs.repository }} + ref: ${{ inputs.ref }} + submodules: ${{ inputs.submodules }} + setup-miniconda: false + python-version: ${{ env.PYTHON_VERSION }} + cuda-version: ${{ env.CU_VERSION }} + arch: ${{ inputs.architecture }} + - name: Install XPU support package + if: ${{ matrix.gpu_arch_type == 'xpu' }} + env: + XPU_VERSION: '2025.1' + run: | + cmd //c .\\test-infra\\.github\\scripts\\install_xpu.bat + - name: Checkout Target Repository (${{ env.REPOSITORY }}) + if: inputs.architecture == 'arm64' + uses: actions/checkout@v4 + with: + repository: ${{ env.REPOSITORY }} + ref: ${{ env.REF }} + path: ${{ env.REPOSITORY }} + submodules: recursive + - name: Bootstrap python + if: inputs.architecture == 'arm64' + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python_version }} + architecture: arm64 + - name: Install torch dependency + if: inputs.architecture == 'x64' + run: | + source "${BUILD_ENV_FILE}" + # shellcheck disable=SC2086 + ${CONDA_RUN} ${PIP_INSTALL_TORCH} + - name: Run Pre-Script with Caching + if: ${{ inputs.pre-script != '' && inputs.architecture == 'x64' }} + uses: ./test-infra/.github/actions/run-script-with-cache + with: + cache-path: ${{ inputs.cache-path }} + cache-key: ${{ inputs.cache-key }} + repository: ${{ inputs.repository }} + script: ${{ inputs.pre-script }} + is_windows: 'enabled' + - name: Run Pre-Script Arm64 + if: ${{ inputs.pre-script != '' && inputs.architecture == 'arm64' }} + env: + DOWNLOADS_DIR: c:\temp\downloads\ + DEPENDENCIES_DIR: c:\temp\dependencies\ + SCRIPTS_DIR: test-infra\\.github\\scripts\\winarm64 + SRC_DIR: ${{ inputs.repository }} + PRE_SCRIPT: ${{ inputs.pre-script }} + shell: cmd + run: | + set VS_PATH=C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat + set GIT_BASH=%DEPENDENCIES_DIR%\git\usr\bin\bash.exe + + cd %SRC_DIR% + + call "%VS_PATH%" arm64 + "%GIT_BASH%" -c "bash --noprofile --norc %PRE_SCRIPT%" + - name: Install certificates for Arm64 runner + if: ${{ inputs.architecture == 'arm64' }} + working-directory: ${{ inputs.repository }} + shell: cmd + run: | + call .\.venv\Scripts\activate.bat + + pip install --upgrade certifi==2025.04.26 + for /f "delims=" %%A in ('python -m certifi') do set CERT_PATH=%%A + echo Using cert bundle at: %CERT_PATH% + + set SSL_CERT_FILE=%CERT_PATH% + set REQUESTS_CA_BUNDLE=%CERT_PATH% + + echo SSL_CERT_FILE=%CERT_PATH% >> %GITHUB_ENV% + echo REQUESTS_CA_BUNDLE=%CERT_PATH% >> %GITHUB_ENV% + - name: Build clean + if: inputs.architecture == 'x64' + working-directory: ${{ inputs.repository }} + env: + ENV_SCRIPT: ${{ inputs.env-script }} + run: | + source "${BUILD_ENV_FILE}" + if [[ -z "${ENV_SCRIPT}" ]]; then + ${CONDA_RUN} python setup.py clean + else + if [[ ! -f ${ENV_SCRIPT} ]]; then + echo "::error::Specified env-script file (${ENV_SCRIPT}) not found" + exit 1 + else + ${CONDA_RUN} ${ENV_SCRIPT} python setup.py clean + fi + fi + - name: Set PYTORCH_VERSION on x64 + if: inputs.architecture == 'x64' + working-directory: ${{ inputs.repository }} + run: | + source "${BUILD_ENV_FILE}" + if [[ "$CU_VERSION" == "cpu" ]]; then + # CUDA and CPU are ABI compatible on the CPU-only parts, so strip + # in this case + export PYTORCH_VERSION="$(${CONDA_RUN} pip show torch | grep ^Version: | sed 's/Version: *//' | sed 's/+.\+//')" + else + export PYTORCH_VERSION="$(${CONDA_RUN} pip show torch | grep ^Version: | sed 's/Version: *//')" + fi + - name: Build the wheel (python-build-package) X64 + if: ${{ inputs.build-platform == 'python-build-package' && inputs.architecture == 'x64' }} + working-directory: ${{ inputs.repository }} + env: + ENV_SCRIPT: ${{ inputs.env-script }} + BUILD_PARAMS: ${{ inputs.wheel-build-params }} + run: | + source "${BUILD_ENV_FILE}" + ${CONDA_RUN} python -m pip install build==1.2.2 + echo "Successfully installed Python build package" + if [[ -z "${ENV_SCRIPT}" ]]; then + ${CONDA_RUN} ${{ inputs.build-command }} + else + ${CONDA_RUN} ${ENV_SCRIPT} ${{ inputs.build-command }} + fi + - name: Build the wheel (setup-py) X64 + if: ${{ inputs.build-platform == 'setup-py' && inputs.architecture == 'x64' }} + working-directory: ${{ inputs.repository }} + env: + ENV_SCRIPT: ${{ inputs.env-script }} + BUILD_PARAMS: ${{ inputs.wheel-build-params }} + run: | + source "${BUILD_ENV_FILE}" + if [[ -z "${ENV_SCRIPT}" ]]; then + ${CONDA_RUN} python setup.py bdist_wheel + else + ${CONDA_RUN} ${ENV_SCRIPT} python setup.py bdist_wheel ${BUILD_PARAMS} + fi + - name: Build the wheel (bdist_wheel) Arm64 + if: inputs.architecture == 'arm64' + env: + SRC_DIR: ${{ inputs.repository }} + DEPENDENCIES_DIR: c:\temp\dependencies\ + shell: cmd + run: | + set CONDA_PREFIX=%DEPENDENCIES_DIR% + set PATH=%PATH%;%DEPENDENCIES_DIR%\Library\bin + set DISTUTILS_USE_SDK=1 + set VS_PATH=C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat + + call "%VS_PATH%" arm64 + cd %SRC_DIR% + call .venv\Scripts\activate.bat + + pip install --upgrade setuptools==72.1.0 + python setup.py bdist_wheel + - name: Run post-script + working-directory: ${{ inputs.repository }} + env: + POST_SCRIPT: ${{ inputs.post-script }} + ENV_SCRIPT: ${{ inputs.env-script }} + if: ${{ inputs.post-script != '' && inputs.architecture == 'x64'}} + run: | + set -euxo pipefail + source "${BUILD_ENV_FILE}" + ${CONDA_RUN} ${ENV_SCRIPT} ${POST_SCRIPT} + - name: Smoke Test X64 + if: inputs.architecture == 'x64' + env: + ENV_SCRIPT: ${{ inputs.env-script }} + PACKAGE_NAME: ${{ inputs.package-name }} + USE_RTX: ${{ inputs.use-rtx }} + SMOKE_TEST_SCRIPT: ${{ inputs.smoke-test-script }} + run: | + source "${BUILD_ENV_FILE}" + WHEEL_NAME=$(ls "${{ inputs.repository }}/dist/") + echo "$WHEEL_NAME" + ${CONDA_RUN} pip install "${{ inputs.repository }}/dist/$WHEEL_NAME" + if [[ $USE_RTX == true ]]; then + export FORCE_TENSORRT_RTX=1 + # TODO: lan to remove this once we have a better way to handle the LD_LIBRARY_PATH + # the LD_LIBRARY_PATH set in the pre_build_script_windows.sh will not be available in the smoke test, have to set it here again + export LD_LIBRARY_PATH=${{ inputs.repository }}/TensorRT-RTX-1.0.0.21/lib:$LD_LIBRARY_PATH + fi + if [[ ! -f "${{ inputs.repository }}"/${SMOKE_TEST_SCRIPT} ]]; then + echo "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT} not found" + ${CONDA_RUN} "${{ inputs.repository }}/${ENV_SCRIPT}" python -c "import ${PACKAGE_NAME}; print('package version is ', ${PACKAGE_NAME}.__version__)" + else + echo "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT} found" + ${CONDA_RUN} "${{ inputs.repository }}/${ENV_SCRIPT}" python "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT}" + fi + - name: Smoke Test ARM64 + if: inputs.architecture == 'arm64' + env: + PACKAGE_NAME: ${{ inputs.package-name }} + SMOKE_TEST_SCRIPT: ${{ inputs.smoke-test-script }} + SRC_DIR: ${{ inputs.repository }} + run: | + cd $SRC_DIR + source .venv/Scripts/activate + whl=$(find dist -name "${{env.PACKAGE_NAME}}-*.whl" | head -n 1) + pip install $whl + + if [[ ! -f ${SMOKE_TEST_SCRIPT} ]]; then + echo "${SMOKE_TEST_SCRIPT} not found" + python -c "import ${PACKAGE_NAME}; print('package version is ', ${PACKAGE_NAME}.__version__)" + else + echo "${SMOKE_TEST_SCRIPT} found" + python "$SMOKE_TEST_SCRIPT" + fi + # NB: Only upload to GitHub after passing smoke tests + - name: Get Artifact name + if: inputs.architecture == 'arm64' + env: + REPOSITORY: ${{ inputs.repository }} + REF: ${{ inputs.ref }} + PYTHON_VERSION: ${{ matrix.python_version }} + CU_VERSION: ${{ env.CU_VERSION }} + ARCH: ${{ inputs.architecture }} + run: | + echo "ARTIFACT_NAME=${REPOSITORY//\//_}_${REF//\//_}_${PYTHON_VERSION}_${CU_VERSION}_${ARCH}" >> "${GITHUB_ENV}" + - name: Upload wheel to GitHub + continue-on-error: true + uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2 + with: + name: ${{ env.ARTIFACT_NAME }} + path: ${{ inputs.repository }}/dist/ + - uses: ./test-infra/.github/actions/teardown-windows + if: inputs.architecture == 'x64' + name: Teardown Windows + + upload: + needs: build + uses: pytorch/test-infra/.github/workflows/_binary_upload.yml@main + # for tensorrt-rtx build, do not upload to pytorch indexat at all + if: ${{ inputs.use-rtx == false }} + with: + repository: ${{ inputs.repository }} + ref: ${{ inputs.ref }} + test-infra-repository: ${{ inputs.test-infra-repository }} + test-infra-ref: ${{ inputs.test-infra-ref }} + build-matrix: ${{ inputs.build-matrix }} + trigger-event: ${{ inputs.trigger-event }} + architecture: ${{ inputs.architecture }} + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }} + cancel-in-progress: true \ No newline at end of file diff --git a/.github/workflows/linux-test.yml b/.github/workflows/linux-test.yml index 15016ecd36..9883db653d 100644 --- a/.github/workflows/linux-test.yml +++ b/.github/workflows/linux-test.yml @@ -53,6 +53,11 @@ on: description: 'Name to give artifacts uploaded from ${RUNNER_ARTIFACT_DIR}' default: '' type: string + use-rtx: + description: "Whether to use TensorRT-RTX" + default: false + type: boolean + required: false jobs: test: @@ -68,6 +73,7 @@ jobs: SCRIPT: ${{ inputs.script }} RUNNER_TEST_RESULTS_DIR: /tmp/test_results ARCH: ${{ inputs.architecture }} + USE_RTX: ${{ inputs.use-rtx }} DOWNLOAD_ARTIFACT_NAME: pytorch_tensorrt_${{ matrix.tensorrt.version }}_${{ matrix.python_version }}_${{ matrix.desired_cuda }}_${{ inputs.architecture }} name: ${{ inputs.job-name }}-${{ matrix.tensorrt.version }}-${{ matrix.python_version }}-${{ matrix.desired_cuda }} runs-on: ${{ matrix.validation_runner }} @@ -135,6 +141,7 @@ jobs: working-directory: ${{ inputs.repository }} env: ALL_SECRETS: ${{ toJSON(secrets) }} + USE_RTX: ${{ inputs.use-rtx }} run: | set -euxo pipefail # shellcheck disable=SC2086 @@ -203,5 +210,5 @@ jobs: s3-prefix: ${{ env.REPOSITORY }}/${{ github.event.pull_request.number }} concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }} + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{inputs.use-rtx}}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }} cancel-in-progress: true diff --git a/.github/workflows/windows-test.yml b/.github/workflows/windows-test.yml index 8dc1b107d3..dcd4351fb4 100644 --- a/.github/workflows/windows-test.yml +++ b/.github/workflows/windows-test.yml @@ -43,6 +43,10 @@ on: description: 'CPU architecture to build for' default: "x64" type: string + use-rtx: + description: "Whether to use TensorRT-RTX" + default: false + type: boolean jobs: test: strategy: @@ -56,6 +60,7 @@ jobs: CU_VERSION: ${{ matrix.desired_cuda }} SCRIPT: ${{ inputs.script }} PYTHONUTF8: 1 + USE_RTX: ${{ inputs.use-rtx }} DOWNLOAD_ARTIFACT_NAME: pytorch_tensorrt_${{ matrix.tensorrt.version }}_${{ matrix.python_version }}_${{ matrix.desired_cuda }}_win_amd64 name: ${{ inputs.job-name }}-${{ matrix.tensorrt.version }}-${{ matrix.python_version }}-${{ matrix.desired_cuda }} runs-on: ${{ matrix.validation_runner }} @@ -147,5 +152,5 @@ jobs: uses: ./test-infra/.github/actions/teardown-windows concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }} + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref_name }}-${{inputs.use-rtx}}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}-${{ inputs.job-name }} cancel-in-progress: true diff --git a/BUILD.bazel b/BUILD.bazel index 950839a40e..11a96d6ae3 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -7,6 +7,16 @@ config_setting( ], ) +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, +) + pkg_tar( name = "include_core", package_dir = "include/torch_tensorrt", @@ -52,6 +62,7 @@ pkg_tar( pkg_tar( name = "lib", srcs = select({ + ":rtx_win": ["//cpp/lib:torchtrt.dll"], ":windows": ["//cpp/lib:torchtrt.dll"], "//conditions:default": [ "//cpp/lib:libtorchtrt.so", @@ -66,6 +77,7 @@ pkg_tar( pkg_tar( name = "lib_rt", srcs = select({ + ":rtx_win": ["//cpp/lib:torchtrt_runtime.dll"], ":windows": ["//cpp/lib:torchtrt_runtime.dll"], "//conditions:default": [ "//cpp/lib:libtorchtrt_runtime.so", @@ -98,6 +110,7 @@ pkg_tar( ":include_core", ":lib", ] + select({ + ":rtx_win": [], ":windows": [], "//conditions:default": [":bin"], }), diff --git a/MODULE.bazel b/MODULE.bazel index 1b66e2c900..767800e591 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -109,6 +109,15 @@ http_archive( ], ) +http_archive( + name = "tensorrt_rtx", + build_file = "@//third_party/tensorrt_rtx/archive:BUILD", + strip_prefix = "TensorRT-RTX-1.0.0.21", + urls = [ + "https://developer.nvidia.com/downloads/trt/rtx_sdk/secure/1.0/TensorRT-RTX-1.0.0.21.Linux.x86_64-gnu.cuda-12.9.tar.gz", + ], +) + http_archive( name = "tensorrt_sbsa", build_file = "@//third_party/tensorrt/archive:BUILD", @@ -136,6 +145,14 @@ http_archive( ], ) +http_archive( + name = "tensorrt_rtx_win", + build_file = "@//third_party/tensorrt_rtx/archive:BUILD", + strip_prefix = "TensorRT-RTX-1.0.0.21", + urls = [ + "https://developer.nvidia.com/downloads/trt/rtx_sdk/secure/1.0/TensorRT-RTX-1.0.0.21.Windows.win10.cuda-12.9.zip", + ], +) #################################################################################### # Locally installed dependencies (use in cases of custom dependencies or aarch64) #################################################################################### diff --git a/core/BUILD b/core/BUILD index 6f5cfad30f..c6744c66c1 100644 --- a/core/BUILD +++ b/core/BUILD @@ -6,7 +6,28 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", + }, +) + +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", }, ) @@ -16,7 +37,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -26,7 +47,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -59,14 +80,17 @@ cc_library( "//core/runtime", "//core/util/logging", ] + select({ - ":windows": ["@tensorrt_win//:nvinfer"], - ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":sbsa": ["@tensorrt_sbsa//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/conversion/BUILD b/core/conversion/BUILD index ff87c5a4b8..480481e6bd 100644 --- a/core/conversion/BUILD +++ b/core/conversion/BUILD @@ -6,7 +6,28 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", + }, +) + +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", }, ) @@ -16,7 +37,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -26,7 +47,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -54,14 +75,17 @@ cc_library( "//core/ir", "//core/util:prelude", ] + select({ - ":windows": ["@tensorrt_win//:nvinfer"], - ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":sbsa": ["@tensorrt_sbsa//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/conversion/conversion.cpp b/core/conversion/conversion.cpp index f8a26e8d77..a3b50430cc 100644 --- a/core/conversion/conversion.cpp +++ b/core/conversion/conversion.cpp @@ -204,7 +204,7 @@ void AddInputs(ConversionCtx* ctx, c10::ArrayRef input "Optimization profile is invalid, please check the input range provided (conversion.AddInputs)"); ctx->cfg->addOptimizationProfile(profile); -#if NV_TENSORRT_MAJOR > 7 || (NV_TENSORRT_MAJOR == 7 && NV_TENSORRT_MINOR >= 1) +#ifndef TRT_MAJOR_RTX && (NV_TENSORRT_MAJOR > 7 || (NV_TENSORRT_MAJOR == 7 && NV_TENSORRT_MINOR >= 1)) if (ctx->enabled_precisions.find(nvinfer1::DataType::kINT8) != ctx->enabled_precisions.end()) { ctx->cfg->setCalibrationProfile(profile); } diff --git a/core/conversion/conversionctx/BUILD b/core/conversion/conversionctx/BUILD index b6820fc757..d0ad2e7bd1 100644 --- a/core/conversion/conversionctx/BUILD +++ b/core/conversion/conversionctx/BUILD @@ -6,7 +6,28 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", + }, +) + +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", }, ) @@ -16,7 +37,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -26,7 +47,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -49,14 +70,17 @@ cc_library( "//core/ir", "//core/util:prelude", ] + select({ - ":windows": ["@tensorrt_win//:nvinfer"], - ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":sbsa": ["@tensorrt_sbsa//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/conversion/conversionctx/ConversionCtx.cpp b/core/conversion/conversionctx/ConversionCtx.cpp index 2eb363706f..625ef1b669 100644 --- a/core/conversion/conversionctx/ConversionCtx.cpp +++ b/core/conversion/conversionctx/ConversionCtx.cpp @@ -31,8 +31,8 @@ std::ostream& operator<<(std::ostream& os, const BuilderSettings& s) { if (s.device.device_type == nvinfer1::DeviceType::kDLA) { os << "\n DLACore: " << s.device.dla_core; } - os << "\n Engine Capability: " << s.capability \ - << "\n Calibrator Created: " << (s.calibrator != nullptr); + os << "\n Engine Capability: " << s.capability; + // << "\n Calibrator Created: " << (s.calibrator != nullptr); return os; } // clang-format on @@ -59,11 +59,16 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings) for (auto p = settings.enabled_precisions.begin(); p != settings.enabled_precisions.end(); ++p) { switch (*p) { case nvinfer1::DataType::kHALF: +// tensorrt_rtx is strong typed, cannot set fp16 by builder config, only do this for tensorrt build +#ifndef TRT_MAJOR_RTX TORCHTRT_CHECK( builder->platformHasFastFp16(), "Requested inference in FP16 but platform does not support FP16"); cfg->setFlag(nvinfer1::BuilderFlag::kFP16); break; +#endif case nvinfer1::DataType::kINT8: +// tensorrt_rtx is strong typed, cannot set int8 by builder config, only do this for tensorrt build +#ifndef TRT_MAJOR_RTX TORCHTRT_CHECK( builder->platformHasFastInt8(), "Requested inference in INT8 but platform does not support INT8"); cfg->setFlag(nvinfer1::BuilderFlag::kINT8); @@ -74,6 +79,7 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings) cfg->setInt8Calibrator(settings.calibrator); } break; +#endif case nvinfer1::DataType::kFLOAT: break; case nvinfer1::DataType::kINT32: @@ -89,7 +95,7 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings) if (settings.disable_tf32) { cfg->clearFlag(nvinfer1::BuilderFlag::kTF32); } -#if NV_TENSORRT_MAJOR > 7 +#if defined(TRT_MAJOR_RTX) || (NV_TENSORRT_MAJOR > 7) if (settings.sparse_weights) { cfg->setFlag(nvinfer1::BuilderFlag::kSPARSE_WEIGHTS); } @@ -163,7 +169,7 @@ void ConversionCtx::RecordNewITensor(const torch::jit::Value* value, nvinfer1::I } std::string ConversionCtx::SerializeEngine() { -#if NV_TENSORRT_MAJOR > 7 +#if defined(TRT_MAJOR_RTX) || (NV_TENSORRT_MAJOR > 7) auto serialized_network = make_trt(builder->buildSerializedNetwork(*net, *cfg)); if (!serialized_network) { TORCHTRT_THROW_ERROR("Building serialized network failed in TensorRT"); diff --git a/core/conversion/conversionctx/ConversionCtx.h b/core/conversion/conversionctx/ConversionCtx.h index 8587885eca..df5c2a646d 100644 --- a/core/conversion/conversionctx/ConversionCtx.h +++ b/core/conversion/conversionctx/ConversionCtx.h @@ -26,7 +26,9 @@ struct BuilderSettings { bool allow_shape_tensors = false; ir::Device device; nvinfer1::EngineCapability capability = TRT_ENGINE_CAPABILITY_STANDARD; +#ifndef TRT_MAJOR_RTX nvinfer1::IInt8Calibrator* calibrator = nullptr; +#endif uint64_t num_avg_timing_iters = 1; uint64_t workspace_size = 0; uint64_t dla_sram_size = DLA_SRAM_SIZE; diff --git a/core/conversion/converters/BUILD b/core/conversion/converters/BUILD index 456b8ee7d4..84864cea10 100644 --- a/core/conversion/converters/BUILD +++ b/core/conversion/converters/BUILD @@ -6,7 +6,28 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", + }, +) + +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", }, ) @@ -16,7 +37,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -26,7 +47,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -49,14 +70,17 @@ cc_library( "//core/conversion/conversionctx", "//core/util:prelude", ] + select({ - ":windows": ["@tensorrt_win//:nvinfer"], - ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":sbsa": ["@tensorrt_sbsa//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, @@ -75,14 +99,17 @@ cc_library( "//core/conversion/conversionctx", "//core/util:prelude", ] + select({ - ":windows": ["@tensorrt_win//:nvinfer"], - ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":sbsa": ["@tensorrt_sbsa//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, @@ -93,7 +120,6 @@ cc_library( srcs = [ "NodeConverterRegistry.cpp", "impl/activation.cpp", - "impl/batch_norm.cpp", "impl/bitwise.cpp", "impl/cast.cpp", "impl/chunk.cpp", @@ -106,14 +132,11 @@ cc_library( "impl/element_wise.cpp", "impl/expand.cpp", "impl/internal_ops.cpp", - "impl/interpolate.cpp", "impl/layer_norm.cpp", "impl/linear.cpp", "impl/lstm_cell.cpp", "impl/matrix_multiply.cpp", "impl/max.cpp", - "impl/normalize.cpp", - "impl/pooling.cpp", "impl/quantization.cpp", "impl/reduce.cpp", "impl/reflection_pad.cpp", @@ -126,7 +149,17 @@ cc_library( "impl/topk.cpp", "impl/unary.cpp", "impl/unsqueeze.cpp", - ], + ] + select({ + ":rtx_win": [], + # exclude plugins from rtx build + ":rtx_x86_64": [], + "//conditions:default": [ + "impl/batch_norm.cpp", + "impl/interpolate.cpp", + "impl/normalize.cpp", + "impl/pooling.cpp", + ], + }), hdrs = [ "converters.h", ], @@ -138,14 +171,17 @@ cc_library( "//core/plugins:torch_tensorrt_plugins", "//core/util:prelude", ] + select({ - ":windows": ["@tensorrt_win//:nvinfer"], - ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":sbsa": ["@tensorrt_sbsa//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/conversion/converters/impl/quantization.cpp b/core/conversion/converters/impl/quantization.cpp index addf629e6b..6a8c2e2f73 100644 --- a/core/conversion/converters/impl/quantization.cpp +++ b/core/conversion/converters/impl/quantization.cpp @@ -9,15 +9,14 @@ namespace converters { namespace impl { namespace { -#if NV_TENSORRT_MAJOR > 7 +#if defined(TRT_MAJOR_RTX) || (NV_TENSORRT_MAJOR > 7) // clang-format off bool add_qdq(ConversionCtx *ctx, const torch::jit::Node* n, nvinfer1::ITensor* input, nvinfer1::ITensor* scale, std::string& opName) { - nvinfer1::IQuantizeLayer* quantize_layer = ctx->net->addQuantize(*input, *scale); + nvinfer1::IQuantizeLayer* quantize_layer = ctx->net->addQuantize(*input, *scale, nvinfer1::DataType::kINT8); TORCHTRT_CHECK(quantize_layer, "Unable to create QuantizeLayer from node: " << *n); quantize_layer->setAxis(0); - - nvinfer1::IDequantizeLayer* dequantize_layer = ctx->net->addDequantize(*quantize_layer->getOutput(0), *scale); + nvinfer1::IDequantizeLayer* dequantize_layer = ctx->net->addDequantize(*quantize_layer->getOutput(0), *scale, input->getType()); TORCHTRT_CHECK(dequantize_layer, "Unable to create DequantizeLayer from node: " << *n); dequantize_layer->setAxis(0); @@ -54,12 +53,12 @@ auto quantization_registrations TORCHTRT_UNUSED = RegisterNodeConversionPatterns auto scale = args[1].ITensorOrFreeze(ctx); int64_t axis = args[3].unwrapToScalar().to(); // Add and configure a QuantizeLayer. - nvinfer1::IQuantizeLayer* quantize_layer = ctx->net->addQuantize(*input, *scale); + nvinfer1::IQuantizeLayer* quantize_layer = ctx->net->addQuantize(*input, *scale, nvinfer1::DataType::kINT8); // Set a channel axis which represents output channels quantize_layer->setAxis(axis); // Add and configure a DequantizeLayer. - nvinfer1::IDequantizeLayer* dequantize_layer = ctx->net->addDequantize(*quantize_layer->getOutput(0), *scale); + nvinfer1::IDequantizeLayer* dequantize_layer = ctx->net->addDequantize(*quantize_layer->getOutput(0), *scale, input->getType()); dequantize_layer->setAxis(axis); auto qdq_out = ctx->AssociateValueAndTensor(n->outputs()[0], dequantize_layer->getOutput(0)); diff --git a/core/conversion/evaluators/BUILD b/core/conversion/evaluators/BUILD index d3adad10cd..e9fc358582 100644 --- a/core/conversion/evaluators/BUILD +++ b/core/conversion/evaluators/BUILD @@ -6,7 +6,28 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", + }, +) + +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", }, ) @@ -16,7 +37,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -26,7 +47,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -55,14 +76,17 @@ cc_library( "//core/conversion/var", "//core/util:prelude", ] + select({ - ":windows": ["@tensorrt_win//:nvinfer"], - ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":sbsa": ["@tensorrt_sbsa//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/conversion/tensorcontainer/BUILD b/core/conversion/tensorcontainer/BUILD index 951a0b886e..c6f56b70c8 100644 --- a/core/conversion/tensorcontainer/BUILD +++ b/core/conversion/tensorcontainer/BUILD @@ -6,7 +6,28 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", + }, +) + +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", }, ) @@ -16,7 +37,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -26,7 +47,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -48,14 +69,17 @@ cc_library( deps = [ "//core/util:prelude", ] + select({ - ":windows": ["@tensorrt_win//:nvinfer"], - ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":sbsa": ["@tensorrt_sbsa//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/conversion/var/BUILD b/core/conversion/var/BUILD index 770d3c2120..ce58ca70f3 100644 --- a/core/conversion/var/BUILD +++ b/core/conversion/var/BUILD @@ -6,7 +6,28 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", + }, +) + +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", }, ) @@ -16,7 +37,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -26,7 +47,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -51,14 +72,17 @@ cc_library( "//core/conversion/tensorcontainer", "//core/util:prelude", ] + select({ - ":windows": ["@tensorrt_win//:nvinfer"], - ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":sbsa": ["@tensorrt_sbsa//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/ir/BUILD b/core/ir/BUILD index fce3fbe51f..5dfdeded90 100644 --- a/core/ir/BUILD +++ b/core/ir/BUILD @@ -6,7 +6,28 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", + }, +) + +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", }, ) @@ -16,7 +37,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -26,7 +47,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -51,14 +72,17 @@ cc_library( deps = [ "//core/util:prelude", ] + select({ - ":windows": ["@tensorrt_win//:nvinfer"], - ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":sbsa": ["@tensorrt_sbsa//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/lowering/BUILD b/core/lowering/BUILD index 27af435927..6084198c74 100644 --- a/core/lowering/BUILD +++ b/core/lowering/BUILD @@ -6,7 +6,28 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", + }, +) + +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", }, ) @@ -16,7 +37,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -26,7 +47,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -53,14 +74,17 @@ cc_library( "//core/lowering/passes", "//core/util:prelude", ] + select({ - ":windows": ["@tensorrt_win//:nvinfer"], - ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":sbsa": ["@tensorrt_sbsa//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/lowering/passes/BUILD b/core/lowering/passes/BUILD index 845abdb62a..bd8462eed1 100644 --- a/core/lowering/passes/BUILD +++ b/core/lowering/passes/BUILD @@ -30,6 +30,16 @@ config_setting( }, ) +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, +) + config_setting( name = "windows", constraint_values = [ @@ -76,9 +86,10 @@ cc_library( deps = [ "//core/util:prelude", ] + select({ + ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], ":use_torch_whl": ["@torch_whl//:libtorch"], ":windows": ["@libtorch_win//:libtorch"], - ":jetpack": ["@torch_l4t//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/partitioning/BUILD b/core/partitioning/BUILD index 378752cdfd..bbbb89af37 100644 --- a/core/partitioning/BUILD +++ b/core/partitioning/BUILD @@ -6,7 +6,28 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", + }, +) + +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", }, ) @@ -16,7 +37,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -26,7 +47,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -56,14 +77,17 @@ cc_library( "//core/partitioning/segmentedblock", "//core/util:prelude", ] + select({ - ":windows": ["@tensorrt_win//:nvinfer"], - ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":sbsa": ["@tensorrt_sbsa//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/partitioning/partitioningctx/BUILD b/core/partitioning/partitioningctx/BUILD index bd21aba7ff..bae63241a6 100644 --- a/core/partitioning/partitioningctx/BUILD +++ b/core/partitioning/partitioningctx/BUILD @@ -6,7 +6,28 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", + }, +) + +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", }, ) @@ -16,7 +37,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -26,7 +47,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -52,14 +73,17 @@ cc_library( "//core/partitioning/segmentedblock", "//core/util:prelude", ] + select({ - ":windows": ["@tensorrt_win//:nvinfer"], - ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":sbsa": ["@tensorrt_sbsa//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/partitioning/partitioninginfo/BUILD b/core/partitioning/partitioninginfo/BUILD index daebcd615f..04515abb10 100644 --- a/core/partitioning/partitioninginfo/BUILD +++ b/core/partitioning/partitioninginfo/BUILD @@ -6,7 +6,28 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", + }, +) + +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", }, ) @@ -16,7 +37,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -26,7 +47,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -51,14 +72,17 @@ cc_library( "//core/lowering", "//core/util:prelude", ] + select({ - ":windows": ["@tensorrt_win//:nvinfer"], - ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":sbsa": ["@tensorrt_sbsa//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/partitioning/segmentedblock/BUILD b/core/partitioning/segmentedblock/BUILD index 83e45eaf14..73916bb6bd 100644 --- a/core/partitioning/segmentedblock/BUILD +++ b/core/partitioning/segmentedblock/BUILD @@ -6,7 +6,28 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", + }, +) + +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", }, ) @@ -16,7 +37,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -26,7 +47,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -51,14 +72,17 @@ cc_library( "//core/lowering", "//core/util:prelude", ] + select({ - ":windows": ["@tensorrt_win//:nvinfer"], - ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":sbsa": ["@tensorrt_sbsa//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/plugins/BUILD b/core/plugins/BUILD index cebce31941..00503552f2 100644 --- a/core/plugins/BUILD +++ b/core/plugins/BUILD @@ -6,7 +6,28 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", + }, +) + +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", }, ) @@ -16,7 +37,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -26,7 +47,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -39,16 +60,24 @@ config_setting( cc_library( name = "torch_tensorrt_plugins", - srcs = [ - "impl/interpolate_plugin.cpp", - "impl/normalize_plugin.cpp", - "register_plugins.cpp", - ], - hdrs = [ - "impl/interpolate_plugin.h", - "impl/normalize_plugin.h", - "plugins.h", - ], + srcs = select({ + ":rtx_win": [], + ":rtx_x86_64": [], + "//conditions:default": [ + "impl/interpolate_plugin.cpp", + "impl/normalize_plugin.cpp", + "register_plugins.cpp", + ], + }), + hdrs = select({ + ":rtx_win": [], + ":rtx_x86_64": [], + "//conditions:default": [ + "impl/interpolate_plugin.h", + "impl/normalize_plugin.h", + "plugins.h", + ], + }), copts = [ "-pthread", ], @@ -58,26 +87,29 @@ cc_library( deps = [ "//core/util:prelude", ] + select({ - ":windows": [ - "@tensorrt_win//:nvinfer", - "@tensorrt_win//:nvinferplugin", + ":jetpack": [ + "@tensorrt_l4t//:nvinfer", + "@tensorrt_l4t//:nvinferplugin", ], + ":rtx_win": [], + ":rtx_x86_64": [], ":sbsa": [ "@tensorrt_sbsa//:nvinfer", "@tensorrt_sbsa//:nvinferplugin", ], - ":jetpack": [ - "@tensorrt_l4t//:nvinfer", - "@tensorrt_l4t//:nvinferplugin", + ":windows": [ + "@tensorrt_win//:nvinfer", + "@tensorrt_win//:nvinferplugin", ], "//conditions:default": [ "@tensorrt//:nvinfer", "@tensorrt//:nvinferplugin", ], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/runtime/BUILD b/core/runtime/BUILD index 72c670bff1..a573cfed78 100644 --- a/core/runtime/BUILD +++ b/core/runtime/BUILD @@ -6,7 +6,28 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", + }, +) + +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", }, ) @@ -16,7 +37,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -26,7 +47,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -63,14 +84,17 @@ cc_library( "//core/plugins:torch_tensorrt_plugins", "//core/util:prelude", ] + select({ - ":windows": ["@tensorrt_win//:nvinfer"], - ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":sbsa": ["@tensorrt_sbsa//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/util/BUILD b/core/util/BUILD index 4f522704ee..0ed97a5eda 100644 --- a/core/util/BUILD +++ b/core/util/BUILD @@ -6,7 +6,28 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", + }, +) + +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", }, ) @@ -16,7 +37,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -26,7 +47,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -60,9 +81,10 @@ cc_library( deps = [ ":macros", ] + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), ) @@ -95,9 +117,10 @@ cc_library( "build_info.h", ], deps = select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), ) @@ -114,14 +137,17 @@ cc_library( ":macros", "//core/util/logging", ] + select({ - ":windows": ["@tensorrt_win//:nvinfer"], - ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":sbsa": ["@tensorrt_sbsa//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/util/logging/BUILD b/core/util/logging/BUILD index f0cc067af9..1ac834b021 100644 --- a/core/util/logging/BUILD +++ b/core/util/logging/BUILD @@ -6,17 +6,34 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", }, ) +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], + flag_values = {"//toolchains/dep_collection:compute_libs": "rtx"}, +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], + flag_values = {"//toolchains/dep_collection:compute_libs": "rtx"}, +) + config_setting( name = "sbsa", constraint_values = [ "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -26,7 +43,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -35,6 +52,7 @@ config_setting( constraint_values = [ "@platforms//os:windows", ], + flag_values = {"//toolchains/dep_collection:compute_libs": "default"}, ) cc_library( @@ -46,14 +64,17 @@ cc_library( "TorchTRTLogger.h", ], deps = select({ - ":windows": ["@tensorrt_win//:nvinfer"], - ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":sbsa": ["@tensorrt_sbsa//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": ["@libtorch_win//:libtorch"], - ":use_torch_whl": ["@torch_whl//:libtorch"], ":jetpack": ["@torch_l4t//:libtorch"], + ":rtx_win": ["@libtorch_win//:libtorch"], + ":use_torch_whl": ["@torch_whl//:libtorch"], + ":windows": ["@libtorch_win//:libtorch"], "//conditions:default": ["@libtorch"], }), alwayslink = True, diff --git a/core/util/trt_util.h b/core/util/trt_util.h index f3df533d8b..a68e00e14d 100644 --- a/core/util/trt_util.h +++ b/core/util/trt_util.h @@ -8,7 +8,7 @@ namespace nvinfer1 { -#if NV_TENSORRT_MAJOR < 8 +#if !defined(TRT_MAJOR_RTX) && (NV_TENSORRT_MAJOR < 8) #define TRT_ENGINE_CAPABILITY_STANDARD nvinfer1::EngineCapability::kDEFAULT #define TRT_ENGINE_CAPABILITY_SAFETY nvinfer1::EngineCapability::kSAFE_GPU diff --git a/cpp/BUILD b/cpp/BUILD index e5cb1558e9..2b5877aa4a 100644 --- a/cpp/BUILD +++ b/cpp/BUILD @@ -2,21 +2,52 @@ load("@rules_cc//cc:defs.bzl", "cc_library") package(default_visibility = ["//visibility:public"]) +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, +) + cc_library( name = "torch_tensorrt", srcs = [ "src/compile_spec.cpp", "src/logging.cpp", - "src/ptq.cpp", "src/torch_tensorrt.cpp", "src/types.cpp", - ], + ] + select({ + ":rtx_win": [], + ":rtx_x86_64": [], + "//conditions:default": [ + "src/ptq.cpp", + ], + }), hdrs = [ "include/torch_tensorrt/logging.h", "include/torch_tensorrt/macros.h", - "include/torch_tensorrt/ptq.h", "include/torch_tensorrt/torch_tensorrt.h", - ], + ] + select({ + ":rtx_win": [], + ":rtx_x86_64": [], + "//conditions:default": [ + "include/torch_tensorrt/ptq.h", + ], + }), linkstatic = True, strip_include_prefix = "include/", deps = [ diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 0c0e5a43f0..690dca2749 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -4,7 +4,6 @@ add_library(${lib_name} OBJECT) set(CXX_SRCS "${CMAKE_CURRENT_SOURCE_DIR}/src/compile_spec.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/src/logging.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/src/ptq.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/src/torch_tensorrt.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/src/types.cpp" ) @@ -12,7 +11,6 @@ set(CXX_SRCS set(HEADER_FILES "${CMAKE_CURRENT_SOURCE_DIR}/include/torch_tensorrt/logging.h" "${CMAKE_CURRENT_SOURCE_DIR}/include/torch_tensorrt/macros.h" - "${CMAKE_CURRENT_SOURCE_DIR}/include/torch_tensorrt/ptq.h" "${CMAKE_CURRENT_SOURCE_DIR}/include/torch_tensorrt/torch_tensorrt.h" ) diff --git a/cpp/bin/torchtrtc/BUILD b/cpp/bin/torchtrtc/BUILD index d858d4de93..2c87eddae2 100644 --- a/cpp/bin/torchtrtc/BUILD +++ b/cpp/bin/torchtrtc/BUILD @@ -5,7 +5,7 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", }, ) @@ -15,9 +15,10 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) + config_setting( name = "windows", constraint_values = [ @@ -25,6 +26,16 @@ config_setting( ], ) +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], + flag_values = { + "//toolchains/dep_collection:compute_libs": "rtx", + }, +) + cc_binary( name = "torchtrtc", srcs = [ @@ -44,17 +55,21 @@ cc_binary( "//cpp:torch_tensorrt", "//third_party/args", ] + select({ - ":windows": [ + ":jetpack": [ + "@torch_l4t//:caffe2", + "@torch_l4t//:libtorch", + ], + ":rtx_win": [ "@libtorch_win//:caffe2", - "@libtorch_win//:libtorch" + "@libtorch_win//:libtorch", ], ":use_torch_whl": [ "@torch_whl//:caffe2", - "@torch_whl//:libtorch" + "@torch_whl//:libtorch", ], - ":jetpack": [ - "@torch_l4t//:caffe2", - "@torch_l4t//:libtorch" + ":windows": [ + "@libtorch_win//:caffe2", + "@libtorch_win//:libtorch", ], "//conditions:default": [ "@libtorch", diff --git a/cpp/bin/torchtrtc/fileio.h b/cpp/bin/torchtrtc/fileio.h index ed52d566a1..a27c0a69e1 100644 --- a/cpp/bin/torchtrtc/fileio.h +++ b/cpp/bin/torchtrtc/fileio.h @@ -23,7 +23,9 @@ #include "torch/torch.h" #include "torch_tensorrt/logging.h" +#ifndef TRT_MAJOR_RTX #include "torch_tensorrt/ptq.h" +#endif #include "torch_tensorrt/torch_tensorrt.h" namespace torchtrtc { diff --git a/cpp/bin/torchtrtc/main.cpp b/cpp/bin/torchtrtc/main.cpp index c36cfdd0fc..b93d977c95 100644 --- a/cpp/bin/torchtrtc/main.cpp +++ b/cpp/bin/torchtrtc/main.cpp @@ -7,7 +7,9 @@ #include "torch/script.h" #include "torch_tensorrt/logging.h" +#ifndef TRT_MAJOR_RTX #include "torch_tensorrt/ptq.h" +#endif #include "torch_tensorrt/torch_tensorrt.h" #include "accuracy.h" @@ -334,8 +336,12 @@ int main(int argc, char** argv) { if (calibration_cache_file) { calibration_cache_file_path = torchtrtc::fileio::resolve_path(args::get(calibration_cache_file)); } - +#ifndef TRT_MAJOR_RTX auto calibrator = torchtrt::ptq::make_int8_cache_calibrator(calibration_cache_file_path); +#else + // rtx build has no calibrator + auto calibrator = nullptr; +#endif compile_settings.require_full_compilation = require_full_compilation; @@ -368,7 +374,9 @@ int main(int argc, char** argv) { } else if (dtype == torchtrt::DataType::kChar) { compile_settings.enabled_precisions.insert(torch::kI8); if (calibration_cache_file) { +#ifndef TRT_MAJOR_RTX compile_settings.ptq_calibrator = calibrator; +#endif } else { torchtrt::logging::log( torchtrt::logging::Level::kINFO, diff --git a/cpp/bin/torchtrtc/parser_util.h b/cpp/bin/torchtrtc/parser_util.h index 9ed5f6d06b..6605ec011a 100644 --- a/cpp/bin/torchtrtc/parser_util.h +++ b/cpp/bin/torchtrtc/parser_util.h @@ -9,7 +9,9 @@ #include "torch/torch.h" #include "torch_tensorrt/logging.h" +#ifndef TRT_MAJOR_RTX #include "torch_tensorrt/ptq.h" +#endif #include "torch_tensorrt/torch_tensorrt.h" namespace torchtrtc { diff --git a/cpp/include/torch_tensorrt/torch_tensorrt.h b/cpp/include/torch_tensorrt/torch_tensorrt.h index adac75d984..4068fa6b80 100644 --- a/cpp/include/torch_tensorrt/torch_tensorrt.h +++ b/cpp/include/torch_tensorrt/torch_tensorrt.h @@ -832,12 +832,12 @@ struct CompileSpec { * host RAM used by DLA to store weights and metadata for execution */ uint64_t dla_global_dram_size = 536870912; - +#ifndef TRT_MAJOR_RTX /** * Calibration dataloaders for each input for post training quantizatiom */ nvinfer1::IInt8Calibrator* ptq_calibrator = nullptr; - +#endif /** * Require the full module be compiled to TensorRT instead of potentially running unsupported operations in PyTorch */ diff --git a/cpp/src/compile_spec.cpp b/cpp/src/compile_spec.cpp index 68a25b3912..8dba4a76b8 100644 --- a/cpp/src/compile_spec.cpp +++ b/cpp/src/compile_spec.cpp @@ -152,7 +152,7 @@ torchtrt::core::CompileSpec to_internal_compile_spec(CompileSpec external, bool internal.convert_info.engine_settings.dla_global_dram_size = external.dla_global_dram_size; internal.partitioning_info.cast_int8_inputs = true; - +#ifndef TRT_MAJOR_RTX if (internal.convert_info.engine_settings.enabled_precisions.find(nvinfer1::DataType::kINT8) != internal.convert_info.engine_settings.enabled_precisions.end()) { internal.partitioning_info.cast_int8_inputs = false; @@ -166,6 +166,7 @@ torchtrt::core::CompileSpec to_internal_compile_spec(CompileSpec external, bool } else { internal.convert_info.engine_settings.calibrator = nullptr; } +#endif return internal; } diff --git a/dev_dep_versions.yml b/dev_dep_versions.yml index c9a738feb6..113fe23de6 100644 --- a/dev_dep_versions.yml +++ b/dev_dep_versions.yml @@ -1,2 +1,3 @@ __cuda_version__: "12.8" __tensorrt_version__: "10.12.0" +__tensorrt_rtx_version__: "1.0.0" diff --git a/docsrc/getting_started/tensorrt_rtx.rst b/docsrc/getting_started/tensorrt_rtx.rst new file mode 100644 index 0000000000..8edf80699c --- /dev/null +++ b/docsrc/getting_started/tensorrt_rtx.rst @@ -0,0 +1,65 @@ +.. _Torch-TensorRT_in_RTX: + +Torch-TensorRT in RTX +############################# + +Overview +******** + +TensorRT-RTX +=========== +TensorRT for RTX builds on the proven performance of the NVIDIA TensorRT inference library, and simplifies the deployment of AI models on NVIDIA RTX GPUs across desktops, laptops, and workstations. + +TensorRT for RTX is a drop-in replacement for NVIDIA TensorRT in applications targeting NVIDIA RTX GPUs from Turing through Blackwell generations. It introduces a Just-In-Time (JIT) optimizer in the runtime that compiles improved inference engines directly on the end-user’s RTX-accelerated PC in under 30 seconds. This eliminates the need for lengthy pre-compilation steps and enables rapid engine generation, improved application portability, and cutting-edge inference performance. + +For detailed information about TensorRT-RTX, refer to: +* `TensorRT-RTX Documentation `_ + +Currenlty, Torch-TensorRT only supports TensorRT-RTX for the experiment purpose. +Torch-TensorRT by default uses TensorRT during the build and run. + +In order to use TensorRT-RTX, you need to build the wheel with ``--use-rtx`` flag. +And then set the ``FORCE_TENSORRT_RTX=1`` environment variable during run. + + + + +Prerequisites +************* + +System Preparation +================== +1. **Install TensorRT-RTX**: + TensorRT-RTX can be downloaded from https://developer.nvidia.com/tensorrt-rtx. + .. code-block:: sh + # if TensorRT-RTX is downloaded in /usr/local/tensorrt-rtx + export LD_LIBRARY_PATH=/usr/local/tensorrt-rtx/lib:$LD_LIBRARY_PATH + + +Build Torch-TensorRT with TensorRT-RTX +===================================== + +.. code-block:: sh + # if you have previously build with Standard TensorRT, make sure to clean the build environment + python setup.py clean + # build wheel with TensorRT-RTX + python setup.py bdist_wheel --use-rtx + + # install the wheel + cd dist + python -m pip install torch-tensorrt-*.whl + + # make sure the tensorrt_rtx.so file is linked to the tensorrt_rtx.so file in the TensorRT-RTX installation directory + trt_install_path=$(python -m pip show torch-tensorrt | grep "Location" | awk '{print $2}')/torch_tensorrt + + # check if the libtensorrt_rtx.so.1 is linked + ldd $trt_install_path/lib/libtorchtrt.so + + +Quick Start +=========== + +.. code-block:: py + # you have to set FORCE_TENSORRT_RTX=1 to use TensorRT-RTX + FORCE_TENSORRT_RTX=1 python examples/dynamo/torch_compile_resnet_example.py + diff --git a/packaging/pre_build_script.sh b/packaging/pre_build_script.sh index 1f1a2120a9..6294632c59 100755 --- a/packaging/pre_build_script.sh +++ b/packaging/pre_build_script.sh @@ -75,3 +75,10 @@ fi cat MODULE.bazel export CI_BUILD=1 + +if [[ ${USE_RTX} == true ]]; then + cat pyproject_rtx.toml.temp > pyproject.toml + source .github/scripts/install-tensorrt-rtx.sh + install_wheel_or_not=true + install_tensorrt_rtx ${install_wheel_or_not} +fi \ No newline at end of file diff --git a/packaging/pre_build_script_windows.sh b/packaging/pre_build_script_windows.sh index b5b62ebf05..c4d2b37322 100644 --- a/packaging/pre_build_script_windows.sh +++ b/packaging/pre_build_script_windows.sh @@ -36,3 +36,10 @@ fi cat MODULE.bazel echo "RELEASE=1" >> ${GITHUB_ENV} + +if [[ ${USE_RTX} == true ]]; then + cat pyproject_rtx.toml.temp > pyproject.toml + source .github/scripts/install-tensorrt-rtx.sh + install_wheel_or_not=true + install_tensorrt_rtx ${install_wheel_or_not} +fi \ No newline at end of file diff --git a/packaging/smoke_test_windows.py b/packaging/smoke_test_windows.py index c7880cd862..31598663f9 100644 --- a/packaging/smoke_test_windows.py +++ b/packaging/smoke_test_windows.py @@ -1,9 +1,12 @@ import subprocess -import tensorrt # noqa: F401 import torch +import torch_tensorrt +from torch_tensorrt._utils import is_tensorrt_rtx print(f"Torch CUDA version: {torch.version.cuda}") +print(f"Torch TensorRT version: {torch_tensorrt.__version__}") +print(f"Is TensorRT RTX: {is_tensorrt_rtx()}") result = subprocess.run( ["systeminfo"], diff --git a/py/torch_tensorrt/__init__.py b/py/torch_tensorrt/__init__.py index 6d79f9b4f3..1b2a498961 100644 --- a/py/torch_tensorrt/__init__.py +++ b/py/torch_tensorrt/__init__.py @@ -1,12 +1,13 @@ import ctypes +import logging import os import platform import sys from typing import Dict, List +import torch from torch_tensorrt._version import ( # noqa: F401 __cuda_version__, - __tensorrt_version__, __version__, ) @@ -17,72 +18,28 @@ "Python 2 has reached end-of-life and is not supported by Torch-TensorRT" ) +import logging -def _parse_semver(version: str) -> Dict[str, str]: - split = version.split(".") - if len(split) < 3: - split.append("") - - return {"major": split[0], "minor": split[1], "patch": split[2]} - - -def _find_lib(name: str, paths: List[str]) -> str: - for path in paths: - libpath = os.path.join(path, name) - if os.path.isfile(libpath): - return libpath +_LOGGER = logging.getLogger(__name__) - raise FileNotFoundError(f"Could not find {name}\n Search paths: {paths}") +import torch +tensorrt_package_name = "" try: - import tensorrt # noqa: F401 -except ImportError: - cuda_version = _parse_semver(__cuda_version__) - tensorrt_version = _parse_semver(__tensorrt_version__) - - CUDA_MAJOR = cuda_version["major"] - TENSORRT_MAJOR = tensorrt_version["major"] - - if sys.platform.startswith("win"): - WIN_LIBS = [ - "nvinfer.dll", - "nvinfer_plugin.dll", - ] - - WIN_PATHS = os.environ["PATH"].split(os.path.pathsep) - - for lib in WIN_LIBS: - ctypes.CDLL(_find_lib(lib, WIN_PATHS)) - - elif sys.platform.startswith("linux"): - LINUX_PATHS = ["/usr/local/cuda-12.8/lib64", "/usr/lib", "/usr/lib64"] - if "LD_LIBRARY_PATH" in os.environ: - LINUX_PATHS += os.environ["LD_LIBRARY_PATH"].split(os.path.pathsep) - - if platform.uname().processor == "x86_64": - LINUX_PATHS += [ - "/usr/lib/x86_64-linux-gnu", - ] + # note: trt_alias must be imported before any import tensorrt - elif platform.uname().processor == "aarch64": - LINUX_PATHS += ["/usr/lib/aarch64-linux-gnu"] + from . import trt_alias # noqa: F401 - LINUX_LIBS = [ - f"libnvinfer.so.{TENSORRT_MAJOR}", - f"libnvinfer_plugin.so.{TENSORRT_MAJOR}", - ] + tensorrt_package_name = trt_alias.package_name + _LOGGER.info(f"You are using {trt_alias.package_name=} ") - for lib in LINUX_LIBS: - ctypes.CDLL(_find_lib(lib, LINUX_PATHS)) - -import logging - -import torch -from torch_tensorrt._features import ENABLED_FEATURES, _enabled_features_str - -_LOGGER = logging.getLogger(__name__) -_LOGGER.debug(_enabled_features_str()) +except Exception as e: + print(f"import error when try to import trt_alias, got error {e}") + print( + f"make sure tensorrt lib is in the LD_LIBRARY_PATH: {os.environ.get('LD_LIBRARY_PATH')}" + ) + raise Exception(f"import error when try to import trt_alias, got error {e}") def _register_with_torch() -> None: @@ -111,6 +68,12 @@ def _register_with_torch() -> None: torch.ops.load_library(linked_file_runtime_full_path) +# note: trt_alias must be imported before enabled features, because enabled features will check tensorrt.plugin availability +from torch_tensorrt._features import ENABLED_FEATURES, _enabled_features_str + +_LOGGER.debug(_enabled_features_str()) + + _register_with_torch() from torch_tensorrt._Device import Device # noqa: F401 diff --git a/py/torch_tensorrt/_utils.py b/py/torch_tensorrt/_utils.py index 9c76257dee..3a17e7e267 100644 --- a/py/torch_tensorrt/_utils.py +++ b/py/torch_tensorrt/_utils.py @@ -1,6 +1,7 @@ import sys from typing import Any +import tensorrt as trt import torch @@ -24,3 +25,34 @@ def check_cross_compile_trt_win_lib() -> bool: target_lib = ".*libnvinfer_builder_resource_win.so.*" return any(re.match(target_lib, lib) for lib in loaded_libs) return False + + +def is_tensorrt_rtx() -> bool: + if trt._package_name == "tensorrt_rtx": + return True + return False + + +def is_tensorrt_version_supported(min_version: str) -> bool: + """ + Check if the installed TensorRT version supports the specified minimum version. + Args: + min_version (str): Minimum required TensorRT version + Returns: + bool: True if TensorRT version is >= min_version, False otherwise + Example: + >>> if is_tensorrt_version_supported("10.8.0"): + ... # Use FP4 features + ... pass + """ + try: + if is_tensorrt_rtx(): + return True + from importlib import metadata + + from packaging.version import Version + + return bool(Version(metadata.version("tensorrt")) >= Version(min_version)) + except (ImportError, ValueError): + # If tensorrt is not installed or version cannot be determined + return False diff --git a/py/torch_tensorrt/csrc/register_tensorrt_classes.cpp b/py/torch_tensorrt/csrc/register_tensorrt_classes.cpp index bae61881da..68f2d46ad9 100644 --- a/py/torch_tensorrt/csrc/register_tensorrt_classes.cpp +++ b/py/torch_tensorrt/csrc/register_tensorrt_classes.cpp @@ -67,7 +67,9 @@ void RegisterTRTCompileSpec() { .def("_set_precisions", &torch_tensorrt::pyapi::CompileSpec::setPrecisions) .def("_set_device", &torch_tensorrt::pyapi::CompileSpec::setDeviceIntrusive) .def("_set_torch_fallback", &torch_tensorrt::pyapi::CompileSpec::setTorchFallbackIntrusive) +#ifndef TRT_MAJOR_RTX .def("_set_ptq_calibrator", &torch_tensorrt::pyapi::CompileSpec::setPTQCalibratorViaHandle) +#endif .def("__str__", &torch_tensorrt::pyapi::CompileSpec::stringify); ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, sparse_weights); diff --git a/py/torch_tensorrt/csrc/tensorrt_classes.cpp b/py/torch_tensorrt/csrc/tensorrt_classes.cpp index bd3aa6b305..28c6addafe 100644 --- a/py/torch_tensorrt/csrc/tensorrt_classes.cpp +++ b/py/torch_tensorrt/csrc/tensorrt_classes.cpp @@ -342,7 +342,7 @@ core::CompileSpec CompileSpec::toInternalCompileSpec(bool converting_to_trt_engi } info.partitioning_info.cast_int8_inputs = true; - +#ifndef TRT_MAJOR_RTX if (ptq_calibrator) { info.convert_info.engine_settings.calibrator = ptq_calibrator; info.partitioning_info.cast_int8_inputs = false; @@ -354,6 +354,7 @@ core::CompileSpec CompileSpec::toInternalCompileSpec(bool converting_to_trt_engi info.lower_info.disable_cse = true; } } +#endif info.convert_info.engine_settings.sparse_weights = sparse_weights; info.convert_info.engine_settings.disable_tf32 = disable_tf32; info.convert_info.engine_settings.refit = refit; diff --git a/py/torch_tensorrt/csrc/tensorrt_classes.h b/py/torch_tensorrt/csrc/tensorrt_classes.h index 89c5c8661e..0b932080f4 100644 --- a/py/torch_tensorrt/csrc/tensorrt_classes.h +++ b/py/torch_tensorrt/csrc/tensorrt_classes.h @@ -139,11 +139,11 @@ struct CompileSpec : torch::CustomClassHolder { enabled_precisions.insert(static_cast(p)); } } - +#ifndef TRT_MAJOR_RTX int64_t getPTQCalibratorHandle() { return (int64_t)ptq_calibrator; } - +#endif void setDeviceIntrusive(const c10::intrusive_ptr& d) { device = *d; } @@ -152,9 +152,11 @@ struct CompileSpec : torch::CustomClassHolder { torch_fallback = *fb; } +#ifndef TRT_MAJOR_RTX void setPTQCalibratorViaHandle(int64_t handle) { ptq_calibrator = (nvinfer1::IInt8Calibrator*)handle; } +#endif ADD_FIELD_GET_SET(disable_tf32, bool); ADD_FIELD_GET_SET(sparse_weights, bool); @@ -170,11 +172,15 @@ struct CompileSpec : torch::CustomClassHolder { ADD_FIELD_GET_SET(allow_shape_tensors, bool); ADD_FIELD_GET_SET(device, Device); ADD_FIELD_GET_SET(torch_fallback, TorchFallback); +#ifndef TRT_MAJOR_RTX ADD_FIELD_GET_SET(ptq_calibrator, nvinfer1::IInt8Calibrator*); +#endif std::vector inputs; InputSignature input_signature; +#ifndef TRT_MAJOR_RTX nvinfer1::IInt8Calibrator* ptq_calibrator = nullptr; +#endif std::set enabled_precisions = {}; bool sparse_weights = false; bool disable_tf32 = false; diff --git a/py/torch_tensorrt/csrc/torch_tensorrt_py.cpp b/py/torch_tensorrt/csrc/torch_tensorrt_py.cpp index e32d102f8b..043ddacccb 100644 --- a/py/torch_tensorrt/csrc/torch_tensorrt_py.cpp +++ b/py/torch_tensorrt/csrc/torch_tensorrt_py.cpp @@ -18,6 +18,7 @@ namespace py = pybind11; namespace torch_tensorrt { namespace pyapi { +#ifndef TRT_MAJOR_RTX template class pyCalibratorTrampoline : public Derived { public: @@ -146,6 +147,7 @@ class pyIInt8LegacyCalibrator : public pyCalibratorTrampoline(m, "CalibrationAlgo", py::module_local(), "Type of calibration algorithm") .value("LEGACY_CALIBRATION", nvinfer1::CalibrationAlgoType::kLEGACY_CALIBRATION) .value("ENTROPY_CALIBRATION", nvinfer1::CalibrationAlgoType::kENTROPY_CALIBRATION) @@ -319,6 +322,7 @@ PYBIND11_MODULE(_C, m) { .def(py::init_alias<>()) // Always initialize trampoline class. .def("get_batch_size", &nvinfer1::IInt8MinMaxCalibrator::getBatchSize, "Get batch size") .def("get_algorithm", &nvinfer1::IInt8MinMaxCalibrator::getAlgorithm, "Get algorithm"); +#endif py::class_(m, "Device") .def(py::init<>()) @@ -362,11 +366,13 @@ PYBIND11_MODULE(_C, m) { py::class_(ts_sub_mod, "CompileSpec") .def(py::init<>()) .def("__str__", &torch_tensorrt::pyapi::CompileSpec::stringify) +#ifndef TRT_MAJOR_RTX .def("_get_calibrator_handle", &CompileSpec::getPTQCalibratorHandle, "[Internal] gets a handle from a calibrator") +#endif .def_readwrite("inputs", &CompileSpec::inputs) .def_readwrite("input_signature", &CompileSpec::input_signature) .def_readwrite("enabled_precisions", &CompileSpec::enabled_precisions) - .def_readwrite("ptq_calibrator", &CompileSpec::ptq_calibrator) + // .def_readwrite("ptq_calibrator", &CompileSpec::ptq_calibrator) .def_readwrite("refit", &CompileSpec::refit) .def_readwrite("sparse_weights", &CompileSpec::sparse_weights) .def_readwrite("disable_tf32", &CompileSpec::disable_tf32) diff --git a/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py b/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py index b8d4994fca..749e6c5dbe 100644 --- a/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py +++ b/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py @@ -28,6 +28,7 @@ from torch_tensorrt._enums import dtype from torch_tensorrt._features import needs_refit from torch_tensorrt._Input import Input +from torch_tensorrt._utils import is_tensorrt_rtx, is_tensorrt_version_supported from torch_tensorrt.dynamo import _defaults from torch_tensorrt.dynamo._engine_cache import BaseEngineCache from torch_tensorrt.dynamo._settings import CompilationSettings, settings_are_compatible @@ -51,8 +52,6 @@ from torch_tensorrt.fx.observer import Observer from torch_tensorrt.logging import TRT_LOGGER -from packaging import version - _LOGGER: logging.Logger = logging.getLogger(__name__) TRT_INTERPRETER_CALL_PRE_OBSERVER: Observer[Callable[[torch.fx.GraphModule], None]] = ( @@ -90,11 +89,19 @@ def __init__( self.builder = trt.Builder(self.logger) self._debugger_config = _debugger_config flag = 0 - if compilation_settings.use_explicit_typing: - STRONGLY_TYPED = 1 << (int)( - trt.NetworkDefinitionCreationFlag.STRONGLY_TYPED - ) - flag |= STRONGLY_TYPED + # rtx build, strongly typed is enabled by default, can not set it by builder config + if is_tensorrt_rtx(): + if not compilation_settings.use_explicit_typing: + warnings.warn( + "Strongly typed is enabled by default in rtx build, setting use_explicit_typing to True" + ) + compilation_settings.use_explicit_typing = True + else: + if compilation_settings.use_explicit_typing: + STRONGLY_TYPED = 1 << (int)( + trt.NetworkDefinitionCreationFlag.STRONGLY_TYPED + ) + flag |= STRONGLY_TYPED self.ctx = ConversionContext( self.builder.create_network(flag), compilation_settings @@ -217,14 +224,14 @@ def _populate_trt_builder_config( trt.MemoryPoolType.WORKSPACE, self.compilation_settings.workspace_size ) - if version.parse(trt.__version__) >= version.parse("8.2"): + if is_tensorrt_version_supported("8.2"): builder_config.profiling_verbosity = ( trt.ProfilingVerbosity.DETAILED if self._debugger_config and self._debugger_config.save_engine_profile else trt.ProfilingVerbosity.LAYER_NAMES_ONLY ) - if version.parse(trt.__version__) >= version.parse("8.6"): + if is_tensorrt_version_supported("8.6"): if self.compilation_settings.max_aux_streams is not None: _LOGGER.info( f"Setting max aux streams to {self.compilation_settings.max_aux_streams}" @@ -277,6 +284,7 @@ def _populate_trt_builder_config( trt.MemoryPoolType.DLA_GLOBAL_DRAM, self.compilation_settings.dla_global_dram_size, ) + if not self.compilation_settings.use_explicit_typing: if dtype.float16 in self.compilation_settings.enabled_precisions: builder_config.set_flag(trt.BuilderFlag.FP16) @@ -336,7 +344,7 @@ def _populate_trt_builder_config( if self.compilation_settings.enable_weight_streaming: builder_config.set_flag(trt.BuilderFlag.WEIGHT_STREAMING) - if version.parse(trt.__version__) >= version.parse("10.8"): + if is_tensorrt_version_supported("10.8"): TilingOptimizationLevel = { "none": trt.TilingOptimizationLevel.NONE, "fast": trt.TilingOptimizationLevel.FAST, diff --git a/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py b/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py index fe9a01b06c..ac7883fb30 100644 --- a/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py +++ b/py/torch_tensorrt/dynamo/conversion/aten_ops_converters.py @@ -7,6 +7,7 @@ import numpy as np import torch from torch.fx.node import Argument, Node, Target +from torch_tensorrt._utils import is_tensorrt_rtx from torch_tensorrt.dynamo._settings import CompilationSettings from torch_tensorrt.dynamo._SourceIR import SourceIR from torch_tensorrt.dynamo.conversion import impl @@ -3561,25 +3562,29 @@ def aten_ops_full( ) -@dynamo_tensorrt_converter( - torch.ops.aten.nonzero.default, - supports_dynamic_shapes=True, - requires_output_allocator=True, -) -def aten_ops_nonzero( - ctx: ConversionContext, - target: Target, - args: Tuple[Argument, ...], - kwargs: Dict[str, Argument], - name: str, -) -> Union[TRTTensor, Sequence[TRTTensor]]: - return impl.unary.nonzero( - ctx, - target, - SourceIR.ATEN, - name, - args[0], +# currently nonzero is not supported for tensorrt_rtx +# TODO: lan to remove this once rtx team has fixed the bug +if not is_tensorrt_rtx(): + + @dynamo_tensorrt_converter( + torch.ops.aten.nonzero.default, + supports_dynamic_shapes=True, + requires_output_allocator=True, ) + def aten_ops_nonzero( + ctx: ConversionContext, + target: Target, + args: Tuple[Argument, ...], + kwargs: Dict[str, Argument], + name: str, + ) -> Union[TRTTensor, Sequence[TRTTensor]]: + return impl.unary.nonzero( + ctx, + target, + SourceIR.ATEN, + name, + args[0], + ) @dynamo_tensorrt_converter(torch.ops.aten.linear.default, supports_dynamic_shapes=True) diff --git a/py/torch_tensorrt/dynamo/conversion/impl/normalization/ops.py b/py/torch_tensorrt/dynamo/conversion/impl/normalization/ops.py index f9b47542a8..2156ffb26f 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/normalization/ops.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/normalization/ops.py @@ -5,6 +5,7 @@ import tensorrt as trt import torch from torch.fx.node import Target +from torch_tensorrt._utils import is_tensorrt_rtx from torch_tensorrt.dynamo._SourceIR import SourceIR from torch_tensorrt.dynamo.conversion import impl from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext @@ -50,7 +51,9 @@ def batch_norm( # We perform constant folding for batch norm when the weight, bias, running_mean, and running_var are all tensors. # Batch norm operation can be fused into a single layer, which is more efficient than the original implementation. # In this way, the batch norm layer will be fused with the Convolution layer and get a performance boost. - if any( + # TODO: lanl: to remove this once we have solved the batchnorm constant folding issue in RTX + # https://github.com/pytorch/TensorRT/issues/3699 + if is_tensorrt_rtx() or any( [ isinstance(weight, trt.ITensor), isinstance(bias, trt.ITensor), diff --git a/py/torch_tensorrt/dynamo/conversion/impl/quantize.py b/py/torch_tensorrt/dynamo/conversion/impl/quantize.py index 2aeedb144e..f3505c7ff6 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/quantize.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/quantize.py @@ -5,6 +5,7 @@ import torch from torch.fx.experimental.proxy_tensor import unset_fake_temporarily from torch.fx.node import Target +from torch_tensorrt._utils import is_tensorrt_rtx from torch_tensorrt.dynamo._SourceIR import SourceIR from torch_tensorrt.dynamo.conversion import impl from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext @@ -74,6 +75,17 @@ def quantize( dtype = trt.DataType.FP8 max_bound = 448 + if ( + dtype == trt.DataType.INT8 + and ".input_quantizer" in name + and is_tensorrt_rtx() + ): + # RTX does not support int8 activation quantization + # TODO: lan to remove this once rtx team has added the support for int8 activation quantization + raise NotImplementedError( + "TensorRT-RTX does not support int8 activation quantization, only support int8 weight quantization" + ) + axis = None # int8 weight quantization is per-channel quantization(it can have one or multiple amax values) if dtype == trt.DataType.INT8 and amax.numel() > 1: diff --git a/py/torch_tensorrt/dynamo/conversion/impl/unsqueeze.py b/py/torch_tensorrt/dynamo/conversion/impl/unsqueeze.py index 35f21198d4..efe4ccc6f4 100644 --- a/py/torch_tensorrt/dynamo/conversion/impl/unsqueeze.py +++ b/py/torch_tensorrt/dynamo/conversion/impl/unsqueeze.py @@ -2,6 +2,7 @@ from typing import List, Optional, Sequence, cast from torch.fx.node import Target +from torch_tensorrt._utils import is_tensorrt_version_supported from torch_tensorrt.dynamo._SourceIR import SourceIR from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext from torch_tensorrt.dynamo.conversion.converter_utils import ( @@ -22,19 +23,18 @@ def unsqueeze( input: TRTTensor, dim: int, ) -> TRTTensor: - from importlib import metadata - - from packaging.version import Version - - if Version(metadata.version("tensorrt")) < Version("10.7.0"): + # tensorrt version < 10.7.0, use the old unsqueeze implementation + if is_tensorrt_version_supported("10.7.0"): + # use the new unsqueeze implementation + axes = get_trt_tensor(ctx, dim, f"{name}_axes") + layer = ctx.net.add_unsqueeze(input, axes) + set_layer_name(layer, target, name, source_ir) + return layer.get_output(0) + else: logger.warning( - f"IUnsqueezeLayer is supported starting from TensorRT 10.7.0, using the old unsqueeze implementation in the current TensorRT version: {metadata.version('tensorrt')}" + "IUnsqueezeLayer is supported starting from TensorRT 10.7.0, using the old unsqueeze implementation in the current TensorRT version" ) return unsqueeze_old(ctx, target, source_ir, name, input, dim) - axes = get_trt_tensor(ctx, dim, f"{name}_axes") - layer = ctx.net.add_unsqueeze(input, axes) - set_layer_name(layer, target, name, source_ir) - return layer.get_output(0) # old implementation for jetson due to IUnsqueezeLayer was not supported prior to 10.7.0 diff --git a/py/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.py b/py/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.py index 8e18a3ae32..d18a5674e0 100644 --- a/py/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.py +++ b/py/torch_tensorrt/dynamo/runtime/_PythonTorchTensorRTModule.py @@ -270,6 +270,7 @@ def setup_engine(self) -> None: if self.settings.enable_weight_streaming: self.set_default_device_memory_budget() self.context = self.engine.create_execution_context() + assert self.context is not None, "Failed to create execution context" assert self.engine.num_io_tensors == ( len(self.input_names) + len(self.output_names) ) @@ -430,7 +431,7 @@ def create_output_allocator(self) -> None: def forward(self, *inputs: torch.Tensor) -> torch.Tensor | Tuple[torch.Tensor, ...]: def run_standard_execution() -> torch.Tensor | Tuple[torch.Tensor, ...]: - shape_changed = self.validate_input_shapes(inputs) + shape_changed = self.validate_input_shapes(contiguous_inputs) ( need_cudagraphs_record, can_use_pre_allocated_outputs, diff --git a/py/torch_tensorrt/dynamo/utils.py b/py/torch_tensorrt/dynamo/utils.py index 0703fd1cb9..7a8e6466cd 100644 --- a/py/torch_tensorrt/dynamo/utils.py +++ b/py/torch_tensorrt/dynamo/utils.py @@ -17,6 +17,7 @@ from torch_tensorrt._enums import dtype from torch_tensorrt._features import ENABLED_FEATURES from torch_tensorrt._Input import Input +from torch_tensorrt._utils import is_tensorrt_version_supported from torch_tensorrt.dynamo import _defaults from torch_tensorrt.dynamo._defaults import default_device from torch_tensorrt.dynamo._engine_cache import BaseEngineCache @@ -76,7 +77,7 @@ class Frameworks(Enum): }, } -if trt.__version__ >= "7.0": +if is_tensorrt_version_supported("7.0"): DataTypeEquivalence[trt.bool] = { Frameworks.NUMPY: np.bool_, Frameworks.TORCH: torch.bool, diff --git a/py/torch_tensorrt/fx/converters/acc_ops_converters.py b/py/torch_tensorrt/fx/converters/acc_ops_converters.py index f998ddb27a..bf2680f12a 100644 --- a/py/torch_tensorrt/fx/converters/acc_ops_converters.py +++ b/py/torch_tensorrt/fx/converters/acc_ops_converters.py @@ -12,6 +12,7 @@ import torch from torch.fx.immutable_collections import immutable_list from torch.fx.node import Argument, Target +from torch_tensorrt._utils import is_tensorrt_version_supported from torch_tensorrt.fx.converters.impl import activation, convolution from torch_tensorrt.fx.passes.lower_basic_pass import ( trt_transposed_linear, @@ -207,7 +208,7 @@ def acc_ops_conv_transposend( return layer.get_output(0) -@tensorrt_converter(acc_ops.pad, enabled=trt.__version__ < "8.2") +@tensorrt_converter(acc_ops.pad, enabled=(not is_tensorrt_version_supported("8.2"))) def acc_ops_pad_with_padding_layer( network: TRTNetwork, target: Target, @@ -257,7 +258,10 @@ def acc_ops_pad_with_padding_layer( return layer.get_output(0) -@tensorrt_converter(acc_ops.pad, enabled=trt.__version__ >= "8.2") +@tensorrt_converter( + acc_ops.pad, + enabled=is_tensorrt_version_supported("8.2"), +) def acc_ops_pad_with_slice_layer( network: TRTNetwork, target: Target, @@ -880,7 +884,10 @@ def acc_ops_sign( ) -> Union[TRTTensor, Sequence[TRTTensor]]: input_val = kwargs["input"] - if trt.__version__ >= "8.2" and not network.has_implicit_batch_dimension: + if ( + is_tensorrt_version_supported("8.2") + and not network.has_implicit_batch_dimension + ): input_val = kwargs["input"] operation_type = trt.UnaryOperation.SIGN return add_unary_layer(network, input_val, operation_type, target, name) diff --git a/py/torch_tensorrt/fx/fx2trt.py b/py/torch_tensorrt/fx/fx2trt.py index 6a29932b1b..f241a936d6 100644 --- a/py/torch_tensorrt/fx/fx2trt.py +++ b/py/torch_tensorrt/fx/fx2trt.py @@ -13,6 +13,7 @@ from torch._ops import OpOverload from torch.fx.node import _get_qualified_name from torch.fx.passes.shape_prop import TensorMetadata +from torch_tensorrt._utils import is_tensorrt_version_supported from .converter_registry import CONVERTERS from .input_tensor_spec import InputTensorSpec @@ -213,7 +214,10 @@ def run( builder_config.max_workspace_size = max_workspace_size # Speed up TRT build time in the test environment - if trt.__version__ >= "8.6" and os.environ.get("TRT_TEST_ENV", "0") == "1": + if ( + is_tensorrt_version_supported("8.6") + and os.environ.get("TRT_TEST_ENV", "0") == "1" + ): _LOGGER.info("Set TRT optimization level to 0") builder_config.builder_optimization_level = 0 @@ -225,7 +229,7 @@ def run( cache = builder_config.create_timing_cache(b"") builder_config.set_timing_cache(cache, False) - if trt.__version__ >= "8.2": + if is_tensorrt_version_supported("8.2"): builder_config.profiling_verbosity = ( profiling_verbosity if profiling_verbosity diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_dequantize.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_dequantize.py index 7f32b749c5..217b92f19c 100644 --- a/py/torch_tensorrt/fx/test/converters/acc_op/test_dequantize.py +++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_dequantize.py @@ -3,9 +3,9 @@ import tensorrt as trt import torch.fx import torch.nn as nn - import torch_tensorrt.fx.tracer.acc_tracer.acc_ops as acc_ops from torch.testing._internal.common_utils import run_tests +from torch_tensorrt._utils import is_tensorrt_version_supported from torch_tensorrt.fx.tools.common_fx2trt import AccTestCase, InputTensorSpec @@ -15,7 +15,7 @@ """ ) @unittest.skipIf( - trt.__version__ < "8.0", + not is_tensorrt_version_supported("8.0"), "Explicit quantization only supported in TensorRT 8.0 and later", ) class TestDequantizeConverter(AccTestCase): diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_pad.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_pad.py index c82eee79ee..f5b6005782 100644 --- a/py/torch_tensorrt/fx/test/converters/acc_op/test_pad.py +++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_pad.py @@ -3,10 +3,10 @@ import tensorrt as trt import torch import torch.nn as nn - import torch_tensorrt.fx.tracer.acc_tracer.acc_ops as acc_ops from parameterized import parameterized from torch.testing._internal.common_utils import run_tests +from torch_tensorrt._utils import is_tensorrt_version_supported from torch_tensorrt.fx.tools.common_fx2trt import AccTestCase # from torch_tensorrt.fx.tools.common_fx2trt import AccTestCase, InputTensorSpec @@ -80,7 +80,7 @@ def forward(self, x): ] ) @unittest.skipIf( - trt.__version__ < "8.2", + not is_tensorrt_version_supported("8.2"), "Padding 3d only supported in TensorRT 8.2 and later", ) def test_pad_3d(self, _, pad): diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_quantize_per_tensor.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_quantize_per_tensor.py index c7b050c4ac..3c2708bf91 100644 --- a/py/torch_tensorrt/fx/test/converters/acc_op/test_quantize_per_tensor.py +++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_quantize_per_tensor.py @@ -3,9 +3,9 @@ import tensorrt as trt import torch.fx import torch.nn as nn - import torch_tensorrt.fx.tracer.acc_tracer.acc_ops as acc_ops from torch.testing._internal.common_utils import run_tests +from torch_tensorrt._utils import is_tensorrt_version_supported from torch_tensorrt.fx.tools.common_fx2trt import AccTestCase, InputTensorSpec @@ -15,7 +15,7 @@ """ ) @unittest.skipIf( - trt.__version__ < "8.0", + not is_tensorrt_version_supported("8.0"), "Explicit quantization only supported in TensorRT 8.0 and later", ) class TestQuantizePerTensorConverter(AccTestCase): diff --git a/py/torch_tensorrt/fx/test/converters/aten_op/test_reshape_aten.py b/py/torch_tensorrt/fx/test/converters/aten_op/test_reshape_aten.py index 538e575d6e..2942945523 100644 --- a/py/torch_tensorrt/fx/test/converters/aten_op/test_reshape_aten.py +++ b/py/torch_tensorrt/fx/test/converters/aten_op/test_reshape_aten.py @@ -4,6 +4,7 @@ import torch from parameterized import parameterized from torch.testing._internal.common_utils import run_tests +from torch_tensorrt._utils import is_tensorrt_version_supported from torch_tensorrt.fx.tools.common_fx2trt import DispatchTestCase, InputTensorSpec @@ -15,7 +16,7 @@ class TestReshapeConverter(DispatchTestCase): ] ) @unittest.skipIf( - trt.__version__ < "8.5", + not is_tensorrt_version_supported("8.5"), "Shape tensor supported well in TensorRT 8.5 and later", ) def test_reshape(self, target_shape): @@ -42,7 +43,7 @@ def forward(self, x): ] ) @unittest.skipIf( - trt.__version__ < "8.5", + not is_tensorrt_version_supported("8.5"), "Shape tensor supported well in TensorRT 8.5 and later", ) def test_reshape_with_dynamic_shape(self, target_shape): @@ -68,7 +69,7 @@ def forward(self, x): ) @unittest.skipIf( - trt.__version__ < "8.5", + not is_tensorrt_version_supported("8.5"), "Shape tensor supported well in TensorRT 8.5 and later", ) def test_reshape_with_dynamic_shape_size(self): diff --git a/py/torch_tensorrt/fx/tools/common_fx2trt.py b/py/torch_tensorrt/fx/tools/common_fx2trt.py index 2ddd832c2a..66f343a55b 100644 --- a/py/torch_tensorrt/fx/tools/common_fx2trt.py +++ b/py/torch_tensorrt/fx/tools/common_fx2trt.py @@ -13,6 +13,7 @@ from torch.fx.passes import shape_prop from torch.fx.passes.infra.pass_base import PassResult from torch.testing._internal.common_utils import TestCase +from torch_tensorrt._utils import is_tensorrt_version_supported from torch_tensorrt.fx import InputTensorSpec, TRTInterpreter, TRTModule from torch_tensorrt.fx.passes.lower_basic_pass_aten import ( compose_bmm, @@ -258,7 +259,7 @@ def run_test( pass_tracer = chain_passes(*apply_passes) mod = pass_tracer(mod, inputs) - if trt.__version__ >= "8.6": + if is_tensorrt_version_supported("8.6"): test_implicit_batch_dim = False if test_implicit_batch_dim: interp = TRTInterpreter(mod, InputTensorSpec.from_tensors(inputs)) diff --git a/py/torch_tensorrt/fx/utils.py b/py/torch_tensorrt/fx/utils.py index 5bef21b6be..da5cdc0d7f 100644 --- a/py/torch_tensorrt/fx/utils.py +++ b/py/torch_tensorrt/fx/utils.py @@ -8,7 +8,7 @@ import torch from functorch import make_fx from functorch.experimental import functionalize -from torch_tensorrt._utils import sanitized_torch_version +from torch_tensorrt._utils import is_tensorrt_version_supported, sanitized_torch_version from torch_tensorrt.fx.passes.lower_basic_pass import ( replace_op_with_indices, run_const_fold, @@ -60,7 +60,7 @@ class Frameworks(Enum): }, } -if trt.__version__ >= "7.0": +if is_tensorrt_version_supported("7.0"): DataTypeEquivalence[trt.bool] = { Frameworks.NUMPY: np.bool_, Frameworks.TORCH: torch.bool, @@ -105,7 +105,11 @@ def unified_dtype_converter( trt_major_version = int(trt.__version__.split(".")[0]) if dtype in (np.int8, torch.int8, trt.int8): return DataTypeEquivalence[trt.int8][to] - elif trt_major_version >= 7 and dtype in (np.bool_, torch.bool, trt.bool): + elif is_tensorrt_version_supported("7.0") and dtype in ( + np.bool_, + torch.bool, + trt.bool, + ): return DataTypeEquivalence[trt.bool][to] elif dtype in (np.int32, torch.int32, trt.int32): return DataTypeEquivalence[trt.int32][to] diff --git a/py/torch_tensorrt/trt_alias.py b/py/torch_tensorrt/trt_alias.py new file mode 100644 index 0000000000..4a80d1c12e --- /dev/null +++ b/py/torch_tensorrt/trt_alias.py @@ -0,0 +1,159 @@ +import ctypes +import importlib +import importlib.util +import os +import platform +import sys +from types import ModuleType +from typing import Any, Dict, List + +package_imported = False +package_name = "" + + +def _parse_semver(version: str) -> Dict[str, str]: + split = version.split(".") + if len(split) < 3: + split.append("") + + return {"major": split[0], "minor": split[1], "patch": split[2]} + + +def _find_lib(name: str, paths: List[str]) -> str: + for path in paths: + libpath = os.path.join(path, name) + if os.path.isfile(libpath): + return libpath + + raise FileNotFoundError(f"Could not find {name}\n Search paths: {paths}") + + +# TensorRTProxyModule is a proxy module that allows us to register the tensorrt or tensorrt-rtx package +# since tensorrt-rtx is the drop-in replacement for tensorrt, we can use the same interface to use tensorrt-rtx +class TensorRTProxyModule(ModuleType): + def __init__(self, target_module: ModuleType) -> None: + spec = importlib.util.spec_from_loader("tensorrt", loader=None) + self.__spec__ = spec + self.__package__ = target_module.__package__ + self.__path__ = target_module.__path__ + self.__file__ = target_module.__file__ + self.__loader__ = target_module.__loader__ + self.__version__ = target_module.__version__ + self._target_module = target_module + self._nested_module = None + self._package_name: str = "" + + # For RTX: tensorrt.tensorrt -> tensorrt_rtx.tensorrt_rtx + # For standard: tensorrt.tensorrt -> tensorrt.tensorrt (no change) + if hasattr(target_module, "tensorrt_rtx"): + self._nested_module = target_module.tensorrt_rtx + elif hasattr(target_module, "tensorrt"): + self._nested_module = target_module.tensorrt + + # Set up the nested module structure + if self._nested_module: + self.tensorrt = self._nested_module + + # __getattr__ is used to get the attribute from the target module + def __getattr__(self, name: str) -> Any: + # First try to get from the target module + try: + return getattr(self._target_module, name) + except AttributeError: + print(f"AttributeError: {name}") + # For nested modules like tensorrt.tensorrt + if name == "tensorrt" and self._nested_module: + return self._nested_module + raise + + def __dir__(self) -> list[str]: + return dir(self._target_module) + + +def alias_tensorrt() -> None: + global package_imported + global package_name + # tensorrt package has been imported, no need to alias again + if package_imported: + return + + # in order not to break or change the existing behavior, we only build and run with tensorrt by default, tensorrt-rtx is for experiment only + # if we want to test with tensorrt-rtx, we have to build the wheel with --use-rtx and test with FORCE_TENSORRT_RTX=1 + # eg: FORCE_TENSORRT_RTX=1 python test.py + # in future, we can do dynamic linking either to tensorrt or tensorrt-rtx based on the gpu type + use_rtx = False + if os.environ.get("FORCE_TENSORRT_RTX", "0") == "1": + use_rtx = True + package_name = "tensorrt_rtx" if use_rtx else "tensorrt" + # Import the appropriate package + try: + target_module = importlib.import_module(package_name) + proxy = TensorRTProxyModule(target_module) + proxy._package_name = package_name + sys.modules["tensorrt"] = proxy + package_imported = True + except ImportError as e: + # Fallback to standard tensorrt if RTX version not available + print(f"import error when try to import {package_name=} got error {e}") + print( + f"make sure tensorrt lib is in the LD_LIBRARY_PATH: {os.environ.get('LD_LIBRARY_PATH')}" + ) + if use_rtx: + from torch_tensorrt._version import __tensorrt_rtx_version__ + + tensorrt_version = _parse_semver(__tensorrt_rtx_version__) + tensorrt_major = tensorrt_version["major"] + tensorrt_minor = tensorrt_version["minor"] + tensorrt_lib = { + "win": [ + f"tensorrt_rtx_{tensorrt_major}_{tensorrt_minor}.dll", + ], + "linux": [ + f"libtensorrt_rtx.so.{tensorrt_major}", + ], + } + else: + from torch_tensorrt._version import __tensorrt_version__ + + tensorrt_version = _parse_semver(__tensorrt_version__) + tensorrt_major = tensorrt_version["major"] + tensorrt_minor = tensorrt_version["minor"] + tensorrt_lib = { + "win": [ + f"nvinfer_{tensorrt_major}.dll", + f"nvinfer_plugin_{tensorrt_major}.dll", + ], + "linux": [ + f"libnvinfer.so.{tensorrt_major}", + f"libnvinfer_plugin.so.{tensorrt_major}", + ], + } + + from torch_tensorrt import __cuda_version__ + + if sys.platform.startswith("win"): + WIN_LIBS = tensorrt_lib["win"] + WIN_PATHS = os.environ["PATH"].split(os.path.pathsep) + for lib in WIN_LIBS: + ctypes.CDLL(_find_lib(lib, WIN_PATHS)) + + elif sys.platform.startswith("linux"): + LINUX_PATHS = [ + f"/usr/local/cuda-{__cuda_version__}/lib64", + "/usr/lib", + "/usr/lib64", + ] + if "LD_LIBRARY_PATH" in os.environ: + LINUX_PATHS += os.environ["LD_LIBRARY_PATH"].split(os.path.pathsep) + if platform.uname().processor == "x86_64": + LINUX_PATHS += [ + "/usr/lib/x86_64-linux-gnu", + ] + elif platform.uname().processor == "aarch64": + LINUX_PATHS += ["/usr/lib/aarch64-linux-gnu"] + LINUX_LIBS = tensorrt_lib["linux"] + for lib in LINUX_LIBS: + ctypes.CDLL(_find_lib(lib, LINUX_PATHS)) + + +alias_tensorrt() diff --git a/pyproject_rtx.toml.temp b/pyproject_rtx.toml.temp new file mode 100644 index 0000000000..9feb0ce550 --- /dev/null +++ b/pyproject_rtx.toml.temp @@ -0,0 +1,358 @@ +[build-system] +requires = [ + "setuptools>=77.0.0", + "packaging>=23.1", + "wheel>=0.40.0", + "ninja>=1.11.0", + "pyyaml>=6.0", + "cffi>=1.15.1", + "torch>=2.9.0.dev,<2.10.0", + "pybind11==2.6.2", +] +build-backend = "setuptools.build_meta" + +[project] +name = "torch_tensorrt_rtx" +authors = [{ name = "NVIDIA Corporation", email = "narens@nvidia.com" }] +description = "Torch-TensorRT-RTX is a package which allows users to automatically compile PyTorch and TorchScript modules to TensorRT while remaining in PyTorch" +license = { file = "LICENSE" } +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Environment :: GPU :: NVIDIA CUDA", + "License :: OSI Approved :: BSD License", + "Intended Audience :: Developers", + "Intended Audience :: Science/Research", + "Operating System :: POSIX :: Linux", + "Programming Language :: C++", + "Programming Language :: Python", + "Programming Language :: Python :: Implementation :: CPython", + "Topic :: Scientific/Engineering", + "Topic :: Scientific/Engineering :: Artificial Intelligence", + "Topic :: Software Development", + "Topic :: Software Development :: Libraries", +] +readme = { file = "README.md", content-type = "text/markdown" } +requires-python = ">=3.9" +keywords = [ + "pytorch", + "torch", + "tensorrt", + "tensorrt_rtx", + "trt", + "ai", + "artificial intelligence", + "ml", + "machine learning", + "dl", + "deep learning", + "compiler", + "dynamo", + "torchscript", + "inference", +] +dependencies = [ + "torch>=2.9.0.dev,<2.10.0", + # currently tensorrt_rtx wheel is not publicly accessible, it is only included inside the rtx tar ball + # hence the tensorrt_rtx wheel version is fixed since the version the rtx tar ball downloaded is fixed + "tensorrt_rtx==1.0.0.21", + "packaging>=23", + "numpy", + "typing-extensions>=4.7.0", + "dllist", +] + +dynamic = ["version"] + +[dependency-groups] +dev = [ + "pre-commit>=2.20.0", + "black>=22.6.0", + "clang-format==14.0.6", + "typos", + "mypy", + "isort", + "ruff", + "pyyaml", +] + +debug = [ + "pydot >= 4.0.0", + "tabulate >= 0.8.10", + "graphviz >= 0.20.3" +] + +test = [ + "pytest", + "pytest-xdist", + "parameterized>=0.2.0", + "expecttest==0.1.6", + "timm>=1.0.3", + "transformers>=4.49.0", +] + +[project.optional-dependencies] +torchvision = [ + "torchvision>=0.23.0.dev,<0.24.0", +] +quantization = ["nvidia-modelopt[all]>=0.27.1"] + +[project.urls] +Homepage = "https://pytorch.org/tensorrt" +Documentation = "https://pytorch.org/tensorrt" +Repository = "https://github.com/pytorch/tensorrt.git" +Changelog = "https://github.com/pytorch/tensorrt/releases" + +[tool.setuptools] +package-dir = { "" = "py" } +include-package-data = false + +[tool.uv] +package = true +environments = ["sys_platform == 'linux'", "sys_platform == 'windows'"] +prerelease = "if-necessary-or-explicit" +index-strategy = "unsafe-best-match" + +[tool.uv.sources] +torch = [ + { index = "pytorch-nightly-cu129", marker = "platform_machine != 'aarch64' or (platform_machine == 'aarch64' and 'tegra' not in platform_release)" }, +] +torchvision = [ + { index = "pytorch-nightly-cu129", marker = "platform_machine != 'aarch64' or (platform_machine == 'aarch64' and 'tegra' not in platform_release)" }, +] + +[[tool.uv.index]] +name = "pytorch-nightly-cu129" +url = "https://download.pytorch.org/whl/nightly/cu129" +explicit = false + +[[tool.uv.index]] +name = "nvidia" +url = "https://pypi.nvidia.com" +explicit = false + +[tool.ruff] +# NOTE: Synchoronize the ignores with .flake8 +lint.ignore = [ + # these ignores are from flake8-bugbear; please fix! + "B007", + "B008", + "B017", + "B018", # Useless expression + "B019", + "B020", + "B023", + "B024", + "B026", + "B028", # No explicit `stacklevel` keyword argument found + "B904", + "B905", + "E402", + "C408", # C408 ignored because we like the dict keyword argument syntax + "E501", # E501 is not flexible enough, we're using B950 instead + "E721", + "E731", # Assign lambda expression + "E741", + "EXE001", + "F405", + "F821", + "F841", + # these ignores are from flake8-logging-format; please fix! + "G101", + "G201", + "G202", + "G003", + "G004", + # these ignores are from RUFF perf; please fix! + "PERF203", + "PERF4", + "SIM102", + "SIM103", + "SIM112", # flake8-simplify code styles + "SIM105", # these ignores are from flake8-simplify. please fix or ignore with commented reason + "SIM108", + "SIM110", + "SIM114", # Combine `if` branches using logical `or` operator + "SIM115", + "SIM116", # Disable Use a dictionary instead of consecutive `if` statements + "SIM117", + "SIM118", +] +#line-length = 120 +lint.select = [ + "B", + "C4", + "G", + "E", + "F", + "SIM1", + "W", + # Not included in flake8 + "PERF", + "PLE", + "TRY302", +] + +# Allow unused variables when underscore-prefixed. +lint.dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" + +# Allow autofix for all enabled rules (when `--fix`) is provided. +lint.fixable = [ + "A", + "B", + "C", + "D", + "E", + "F", + "G", + "I", + "N", + "Q", + "S", + "T", + "W", + "ANN", + "ARG", + "BLE", + "COM", + "DJ", + "DTZ", + "EM", + "ERA", + "EXE", + "FBT", + "ICN", + "INP", + "ISC", + "NPY", + "PD", + "PGH", + "PIE", + "PL", + "PT", + "PTH", + "PYI", + "RET", + "RSE", + "RUF", + "SIM", + "SLF", + "TCH", + "TID", + "TRY", + "UP", + "YTT", +] +lint.unfixable = [] +target-version = "py311" + +# Exclude a variety of commonly ignored directories. +exclude = [ + ".bzr", + ".direnv", + ".eggs", + ".git", + ".git-rewrite", + ".hg", + ".mypy_cache", + ".nox", + ".pants.d", + ".pytype", + ".ruff_cache", + ".svn", + ".tox", + ".venv", + "__pypackages__", + "_build", + "buck-out", + "build", + "dist", + "node_modules", + "venv", + "env", + "py/torch_tensorrt/fx", + ".github", + "examples", + "tests", + "tools", + "docs", + "docsrc", + "tests", + "setup.py", + "noxfile.py", + "__init__.py", +] + +[tool.ruff.lint.mccabe] +# Unlike Flake8, default to a complexity level of 10. +max-complexity = 10 + +[tool.isort] +profile = "black" +py_version = 311 +skip = ["py/torch_tensorrt/fx"] + +[tool.black] +#line-length = 120 +target-version = ["py39", "py310", "py311", "py312", "py313"] +force-exclude = """ +elu_converter/setup.py +""" + +[tool.mypy] +strict = true +ignore_missing_imports = true +show_error_codes = true +disable_error_code = "attr-defined" +no_implicit_optional = true +exclude = [ + "^py/torch_tensorrt/fx", + "py/torch_tensorrt/fx", + "torch_tensorrt/fx", + "py/torch_tensorrt/_C.so", + "examples", + "docs", + "docsrc", + "tests", + "setup.py", + "noxfile.py", +] +python_version = "3.11" + +follow_imports = "skip" + +[[tool.mypy.overrides]] +module = "torch_tensorrt.dynamo.conversion.aten_converters" +disable_error_code = "arg-type" + +[[tool.mypy.overrides]] +module = "torch_tensorrt.dynamo.lowering._decompositions" +disallow_untyped_calls = false + +[[tool.mypy.overrides]] +module = "torch_tensorrt.fx.*" +ignore_errors = true +follow_imports = "skip" + +[tool.typos] +files.extend-exclude = [ + "docs/**/*", + "*/fx/*", + "docsrc/_rendered_examples/", + "core/*", + "!core/runtime/", + "third_party/", + "CHANGELOG.md", + "*.ipynb", + "cpp/", + "py/torch_tensorrt/fx/", +] + +[tool.typos.default] +extend-ignore-identifiers-re = [ + "^([A-z]|[a-z])*Nd*", + "^([A-z]|[a-z])*nd*", + "active*([A-z]|[a-z]|[0-9])*,", +] + +[tool.typos.default.extend-words] +arange = "arange" diff --git a/setup.py b/setup.py index f829602f1a..cea08fb028 100644 --- a/setup.py +++ b/setup.py @@ -28,6 +28,7 @@ __version__: str = "0.0.0" __cuda_version__: str = "0.0" __tensorrt_version__: str = "0.0" +__tensorrt_rtx_version__: str = "0.0" LEGACY_BASE_VERSION_SUFFIX_PATTERN = re.compile("a0$") @@ -63,6 +64,7 @@ def get_base_version() -> str: def load_dep_info(): global __cuda_version__ global __tensorrt_version__ + global __tensorrt_rtx_version__ with open("dev_dep_versions.yml", "r") as stream: versions = yaml.safe_load(stream) if (gpu_arch_version := os.environ.get("CU_VERSION")) is not None: @@ -72,6 +74,7 @@ def load_dep_info(): else: __cuda_version__ = versions["__cuda_version__"] __tensorrt_version__ = versions["__tensorrt_version__"] + __tensorrt_rtx_version__ = versions["__tensorrt_rtx_version__"] load_dep_info() @@ -86,6 +89,11 @@ def load_dep_info(): LEGACY = False RELEASE = False CI_BUILD = False +USE_RTX = False + +if "--use-rtx" in sys.argv: + USE_RTX = True + sys.argv.remove("--use-rtx") if "--fx-only" in sys.argv: PY_ONLY = True @@ -115,6 +123,10 @@ def load_dep_info(): if py_only_env_var == "1": PY_ONLY = True +if (use_rtx_env_var := os.environ.get("FORCE_TENSORRT_RTX")) is not None: + if use_rtx_env_var == "1": + USE_RTX = True + if (release_env_var := os.environ.get("RELEASE")) is not None: if release_env_var == "1": RELEASE = True @@ -210,6 +222,10 @@ def build_libtorchtrt_cxx11_abi( else: cmd.append("--config=linux") + if USE_RTX: + cmd.append("--config=rtx") + print("TensorRT RTX build") + if IS_JETPACK: cmd.append("--config=jetpack") print("Jetpack build") @@ -240,6 +256,7 @@ def gen_version_file(): f.write('__version__ = "' + __version__ + '"\n') f.write('__cuda_version__ = "' + __cuda_version__ + '"\n') f.write('__tensorrt_version__ = "' + __tensorrt_version__ + '"\n') + f.write('__tensorrt_rtx_version__ = "' + __tensorrt_rtx_version__ + '"\n') def copy_libtorchtrt(multilinux=False, rt_only=False): @@ -487,6 +504,15 @@ def run(self): .split("/BUILD.bazel")[0] ) + tensorrt_rtx_external_dir = ( + lambda: subprocess.check_output( + [BAZEL_EXE, "query", "@tensorrt_rtx//:nvinfer", "--output", "location"] + ) + .decode("ascii") + .strip() + .split("/BUILD.bazel")[0] + ) + tensorrt_sbsa_external_dir = ( lambda: subprocess.check_output( [BAZEL_EXE, "query", "@tensorrt_sbsa//:nvinfer", "--output", "location"] @@ -510,16 +536,35 @@ def run(self): elif IS_JETPACK: tensorrt_linux_external_dir = tensorrt_jetpack_external_dir else: - tensorrt_linux_external_dir = tensorrt_x86_64_external_dir + if USE_RTX: + tensorrt_linux_external_dir = tensorrt_rtx_external_dir + else: + tensorrt_linux_external_dir = tensorrt_x86_64_external_dir - tensorrt_windows_external_dir = ( - lambda: subprocess.check_output( - [BAZEL_EXE, "query", "@tensorrt_win//:nvinfer", "--output", "location"] + if USE_RTX: + tensorrt_windows_external_dir = ( + lambda: subprocess.check_output( + [ + BAZEL_EXE, + "query", + "@tensorrt_rtx_win//:nvinfer", + "--output", + "location", + ] + ) + .decode("ascii") + .strip() + .split("/BUILD.bazel")[0] + ) + else: + tensorrt_windows_external_dir = ( + lambda: subprocess.check_output( + [BAZEL_EXE, "query", "@tensorrt_win//:nvinfer", "--output", "location"] + ) + .decode("ascii") + .strip() + .split("/BUILD.bazel")[0] ) - .decode("ascii") - .strip() - .split("/BUILD.bazel")[0] - ) ext_modules += [ CUDAExtension( diff --git a/tests/py/core/test_classes.py b/tests/py/core/test_classes.py index 5f6df00ad8..8d59461d7c 100644 --- a/tests/py/core/test_classes.py +++ b/tests/py/core/test_classes.py @@ -2,9 +2,7 @@ import unittest from typing import Dict -import tensorrt as trt import torch -import torch_tensorrt import torch_tensorrt as torchtrt from torch_tensorrt.dynamo.runtime._TorchTensorRTModule import TorchTensorRTModule @@ -58,7 +56,7 @@ def test_from_torch(self): @unittest.skipIf( - not torch_tensorrt.ENABLED_FEATURES.torch_tensorrt_runtime, + not torchtrt.ENABLED_FEATURES.torch_tensorrt_runtime, "Torch-TensorRT runtime is not available", ) class TestPlatform(unittest.TestCase): diff --git a/tests/py/dynamo/conversion/test_nonzero_aten.py b/tests/py/dynamo/conversion/test_nonzero_aten.py index f2c5123575..69a8024077 100644 --- a/tests/py/dynamo/conversion/test_nonzero_aten.py +++ b/tests/py/dynamo/conversion/test_nonzero_aten.py @@ -1,8 +1,11 @@ +import unittest + import torch import torch.nn as nn from parameterized import parameterized from torch.testing._internal.common_utils import run_tests from torch_tensorrt import Input +from torch_tensorrt._utils import is_tensorrt_rtx from .harness import DispatchTestCase @@ -17,6 +20,10 @@ class TestNonZeroConverter(DispatchTestCase): ((2, 3, 4, 5), torch.float), ] ) + @unittest.skipIf( + is_tensorrt_rtx(), + "nonzero is not supported for tensorrt_rtx", + ) def test_nonzero_dds(self, input_shape, dtype): class NonZero(nn.Module): # This is a DDS network @@ -39,6 +46,10 @@ def forward(self, input): ((2, 3, 4, 5), torch.float), ] ) + @unittest.skipIf( + is_tensorrt_rtx(), + "nonzero is not supported for tensorrt_rtx", + ) def test_nonzero_non_dds(self, input_shape, dtype): class NonZero(nn.Module): # This is a static network @@ -78,6 +89,10 @@ def forward(self, input): ), ] ) + @unittest.skipIf( + is_tensorrt_rtx(), + "nonzero is not supported for tensorrt_rtx", + ) def test_nonzero_dynamic_shape_dds(self, _, min_shape, opt_shape, max_shape, dtype): class NonZero(nn.Module): def forward(self, input): @@ -119,6 +134,10 @@ def forward(self, input): ), ] ) + @unittest.skipIf( + is_tensorrt_rtx(), + "nonzero is not supported for tensorrt_rtx", + ) def test_nonzero_dynamic_shape_non_dds( self, _, min_shape, opt_shape, max_shape, dtype ): diff --git a/tests/py/dynamo/models/test_models_export.py b/tests/py/dynamo/models/test_models_export.py index 66f7a1821b..583ee9a18e 100644 --- a/tests/py/dynamo/models/test_models_export.py +++ b/tests/py/dynamo/models/test_models_export.py @@ -7,6 +7,7 @@ import pytest import torch import torch_tensorrt as torchtrt +from torch_tensorrt._utils import is_tensorrt_rtx from torch_tensorrt.dynamo.utils import COSINE_THRESHOLD, cosine_similarity from packaging.version import Version @@ -410,6 +411,9 @@ def calibrate_loop(model): input_tensor = torch.randn(1, 10).cuda().to(dtype) model = SimpleNetwork().eval().cuda().to(dtype) quant_cfg = mtq.INT8_DEFAULT_CFG + # RTX does not support INT8 default quantization(weights+activations), only support INT8 weights only quantization + if is_tensorrt_rtx(): + quant_cfg["quant_cfg"]["*input_quantizer"] = {"enable": False} mtq.quantize(model, quant_cfg, forward_loop=calibrate_loop) # model has INT8 qdq nodes at this point output_pyt = model(input_tensor) @@ -463,6 +467,9 @@ def calibrate_loop(model): model = SimpleNetwork().eval().cuda().to(dtype) quant_cfg = mtq.INT8_DEFAULT_CFG + # RTX does not support INT8 default quantization(weights+activations), only support INT8 weights only quantization + if torchtrt.tensorrt_package_name == "tensorrt_rtx": + quant_cfg["quant_cfg"]["*input_quantizer"] = {"enable": False} mtq.quantize(model, quant_cfg, forward_loop=calibrate_loop) # model has INT8 qdq nodes at this point diff --git a/tests/py/dynamo/runtime/test_000_compilation_settings.py b/tests/py/dynamo/runtime/test_000_compilation_settings.py index 1f7a74fefd..500ca9ef71 100644 --- a/tests/py/dynamo/runtime/test_000_compilation_settings.py +++ b/tests/py/dynamo/runtime/test_000_compilation_settings.py @@ -5,6 +5,7 @@ import torch import torch_tensorrt from torch.testing._internal.common_utils import TestCase, run_tests +from torch_tensorrt._utils import is_tensorrt_version_supported from torch_tensorrt.dynamo.utils import is_tegra_platform from packaging.version import Version @@ -61,7 +62,7 @@ def forward(self, x): torch._dynamo.reset() @unittest.skipIf( - is_tegra_platform() and Version(metadata.version("tensorrt")) > Version("10.8"), + is_tegra_platform() and is_tensorrt_version_supported("10.8"), "DLA is not supported on Jetson platform starting TRT 10.8", ) def test_dla_args(self): diff --git a/tests/util/BUILD b/tests/util/BUILD index 3b89c9073e..fb33140b7e 100644 --- a/tests/util/BUILD +++ b/tests/util/BUILD @@ -5,7 +5,7 @@ package(default_visibility = ["//visibility:public"]) config_setting( name = "use_torch_whl", flag_values = { - "//toolchains/dep_src:torch": "whl" + "//toolchains/dep_src:torch": "whl", }, ) @@ -22,7 +22,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "default" + "//toolchains/dep_collection:compute_libs": "default", }, ) @@ -32,7 +32,7 @@ config_setting( "@platforms//cpu:aarch64", ], flag_values = { - "//toolchains/dep_collection:compute_libs": "jetpack" + "//toolchains/dep_collection:compute_libs": "jetpack", }, ) @@ -58,22 +58,24 @@ cc_library( deps = [ "@googletest//:gtest_main", ] + select({ - ":windows": ["@tensorrt_win//:nvinfer"], - ":sbsa": ["@tensorrt_sbsa//:nvinfer"], ":jetpack": ["@tensorrt_l4t//:nvinfer"], + ":rtx_win": ["@tensorrt_rtx_win//:nvinfer"], + ":rtx_x86_64": ["@tensorrt_rtx//:nvinfer"], + ":sbsa": ["@tensorrt_sbsa//:nvinfer"], + ":windows": ["@tensorrt_win//:nvinfer"], "//conditions:default": ["@tensorrt//:nvinfer"], }) + select({ - ":windows": [ - "@libtorch_win//:caffe2", - "@libtorch_win//:libtorch", + ":jetpack": [ + "@torch_l4t//:caffe2", + "@torch_l4t//:libtorch", ], ":use_torch_whl": [ "@torch_whl//:caffe2", "@torch_whl//:libtorch", ], - ":jetpack": [ - "@torch_l4t//:libtorch", - "@torch_l4t//:caffe2", + ":windows": [ + "@libtorch_win//:caffe2", + "@libtorch_win//:libtorch", ], "//conditions:default": [ "@libtorch", diff --git a/third_party/tensorrt_rtx/archive/BUILD b/third_party/tensorrt_rtx/archive/BUILD new file mode 100644 index 0000000000..ec6ebbe985 --- /dev/null +++ b/third_party/tensorrt_rtx/archive/BUILD @@ -0,0 +1,68 @@ +load("@rules_cc//cc:defs.bzl", "cc_import", "cc_library") + +package(default_visibility = ["//visibility:public"]) + +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], + flag_values = {"@//toolchains/dep_collection:compute_libs": "rtx"}, +) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], + flag_values = {"@//toolchains/dep_collection:compute_libs": "rtx"}, +) + +cc_library( + name = "nvinfer_headers", + hdrs = glob( + [ + "include/NvInfer*.h", + ], + allow_empty = True, + # exclude = [ + # "include/NvInferPlugin.h", + # "include/NvInferPluginUtils.h", + # ], + ), + includes = ["include/"], + visibility = ["//visibility:private"], +) + +cc_import( + name = "nvinfer_lib", + shared_library = select({ + ":rtx_win": "lib/tensorrt_rtx_1_0.dll", + ":rtx_x86_64": "lib/libtensorrt_rtx.so", + }), + visibility = ["//visibility:private"], +) + +cc_import( + name = "nvinfer_static_lib", + static_library = select({ + ":rtx_win": "lib/tensorrt_rtx_1_0.lib", + }), + visibility = ["//visibility:private"], +) + +cc_library( + name = "nvinfer", + visibility = ["//visibility:public"], + deps = [ + "nvinfer_headers", + "nvinfer_lib", + ] + select({ + ":rtx_win": [ + "nvinfer_static_lib", + "@cuda_win//:cudart", + ], + ":rtx_x86_64": ["@cuda//:cudart"], + }), +) diff --git a/third_party/tensorrt_rtx/local/BUILD b/third_party/tensorrt_rtx/local/BUILD new file mode 100644 index 0000000000..fe844170fb --- /dev/null +++ b/third_party/tensorrt_rtx/local/BUILD @@ -0,0 +1,80 @@ +load("@rules_cc//cc:defs.bzl", "cc_import", "cc_library") + +package(default_visibility = ["//visibility:public"]) + +config_setting( + name = "rtx_win", + constraint_values = [ + "@platforms//os:windows", + ], + flag_values = {"@//toolchains/dep_collection:compute_libs": "rtx"}, +) + +config_setting( + name = "rtx_x86_64", + constraint_values = [ + "@platforms//cpu:x86_64", + "@platforms//os:linux", + ], + flag_values = {"@//toolchains/dep_collection:compute_libs": "rtx"}, +) + +cc_library( + name = "nvinfer_headers", + hdrs = select({ + ":rtx_win": glob( + [ + "include/NvInfer*.h", + ], + allow_empty = True, + # exclude = [ + # "include/NvInferPlugin.h", + # "include/NvInferPluginUtils.h", + # ], + ), + ":rtx_x86_64": glob( + [ + "include/NvInfer*.h", + ], + allow_empty = True, + # exclude = [ + # "include/NvInferPlugin.h", + # "include/NvInferPluginUtils.h", + # ], + ), + }), + includes = ["include/"], + visibility = ["//visibility:private"], +) + +cc_import( + name = "nvinfer_static_lib", + static_library = select({ + ":rtx_win": "lib/tensorrt_rtx_1_0.lib", + }), + visibility = ["//visibility:private"], +) + +cc_import( + name = "nvinfer_lib", + shared_library = select({ + ":rtx_win": "lib/tensorrt_rtx_1_0.dll", + ":rtx_x86_64": "lib/libtensorrt_rtx.so", + }), + visibility = ["//visibility:private"], +) + +cc_library( + name = "nvinfer", + visibility = ["//visibility:public"], + deps = [ + "nvinfer_headers", + "nvinfer_lib", + ] + select({ + ":rtx_win": [ + "nvinfer_static_lib", + "@cuda_win//:cudart", + ], + "//conditions:default": ["@cuda//:cudart"], + }), +) diff --git a/toolchains/ci_workspaces/MODULE.bazel.tmpl b/toolchains/ci_workspaces/MODULE.bazel.tmpl index 4f29a469da..11483872ed 100644 --- a/toolchains/ci_workspaces/MODULE.bazel.tmpl +++ b/toolchains/ci_workspaces/MODULE.bazel.tmpl @@ -81,6 +81,15 @@ http_archive( ], ) +http_archive( + name = "tensorrt_rtx", + build_file = "@//third_party/tensorrt_rtx/archive:BUILD", + strip_prefix = "TensorRT-RTX-1.0.0.21", + urls = [ + "https://developer.nvidia.com/downloads/trt/rtx_sdk/secure/1.0/TensorRT-RTX-1.0.0.21.Linux.x86_64-gnu.cuda-12.9.tar.gz", + ], +) + http_archive( name = "tensorrt_sbsa", build_file = "@//third_party/tensorrt/archive:BUILD", @@ -108,6 +117,14 @@ http_archive( ], ) +http_archive( + name = "tensorrt_rtx_win", + build_file = "@//third_party/tensorrt_rtx/archive:BUILD", + strip_prefix = "TensorRT-RTX-1.0.0.21", + urls = [ + "https://developer.nvidia.com/downloads/trt/rtx_sdk/secure/1.0/TensorRT-RTX-1.0.0.21.Windows.win10.cuda-12.9.zip", + ], +) #################################################################################### # Locally installed dependencies (use in cases of custom dependencies or aarch64) diff --git a/toolchains/dep_collection/defs.bzl b/toolchains/dep_collection/defs.bzl index 6eaa710261..873ef7ec42 100644 --- a/toolchains/dep_collection/defs.bzl +++ b/toolchains/dep_collection/defs.bzl @@ -1,7 +1,7 @@ # buildifier: disable=module-docstring DependencyCollectionInfo = provider(doc = "", fields = ["type"]) -collection_types = ["default", "jetpack"] +collection_types = ["default", "jetpack", "rtx"] def _impl(ctx): _type = ctx.build_setting_value diff --git a/tools/debug/engine_visualization/__init__.py b/tools/debug/engine_visualization/__init__.py new file mode 100644 index 0000000000..e69de29bb2 From 7d34b492baa624f1e768313f434d9a149918c51e Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Wed, 6 Aug 2025 12:17:39 -0700 Subject: [PATCH 02/12] LD_LIBRARY_PATH fix for windows smoke test --- .github/workflows/build_wheels_windows.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build_wheels_windows.yml b/.github/workflows/build_wheels_windows.yml index 7af9dead8f..0aa6570f09 100644 --- a/.github/workflows/build_wheels_windows.yml +++ b/.github/workflows/build_wheels_windows.yml @@ -326,7 +326,7 @@ jobs: export FORCE_TENSORRT_RTX=1 # TODO: lan to remove this once we have a better way to handle the LD_LIBRARY_PATH # the LD_LIBRARY_PATH set in the pre_build_script_windows.sh will not be available in the smoke test, have to set it here again - export LD_LIBRARY_PATH=${{ inputs.repository }}/TensorRT-RTX-1.0.0.21/lib:$LD_LIBRARY_PATH + export LD_LIBRARY_PATH=${PWD}/${{ inputs.repository }}/TensorRT-RTX-1.0.0.21/lib:$LD_LIBRARY_PATH fi if [[ ! -f "${{ inputs.repository }}"/${SMOKE_TEST_SCRIPT} ]]; then echo "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT} not found" From 184c84c35c1b13f154e61aa6484350d48e166039 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Wed, 6 Aug 2025 14:31:45 -0700 Subject: [PATCH 03/12] resolve comments --- .github/scripts/install-tensorrt-rtx.sh | 2 +- .github/scripts/install-torch-tensorrt.sh | 8 +----- .../workflows/build-test-linux-x86_64_rtx.yml | 18 ++++++------- .github/workflows/build-test-windows_rtx.yml | 16 ++++++------ .github/workflows/build_wheels_linux.yml | 4 +-- .github/workflows/build_wheels_windows.yml | 25 +++++++++---------- .github/workflows/linux-test.yml | 4 +-- .github/workflows/windows-test.yml | 2 +- docsrc/getting_started/tensorrt_rtx.rst | 6 ++--- packaging/pre_build_script.sh | 2 +- packaging/pre_build_script_windows.sh | 2 +- .../{trt_alias.py => _TensorRTProxyModule.py} | 6 ++--- py/torch_tensorrt/__init__.py | 16 ++++++------ setup.py | 14 +++++------ 14 files changed, 60 insertions(+), 65 deletions(-) rename py/torch_tensorrt/{trt_alias.py => _TensorRTProxyModule.py} (97%) diff --git a/.github/scripts/install-tensorrt-rtx.sh b/.github/scripts/install-tensorrt-rtx.sh index bb44681607..ef5c2945f3 100644 --- a/.github/scripts/install-tensorrt-rtx.sh +++ b/.github/scripts/install-tensorrt-rtx.sh @@ -1,6 +1,6 @@ install_tensorrt_rtx() { - if [[ ${USE_RTX} == true ]]; then + if [[ ${USE_TRT_RTX} == true ]]; then install_wheel_or_not=${1:-false} echo "It is the tensorrt-rtx build, install tensorrt-rtx with install_wheel_or_not:${install_wheel_or_not}" PLATFORM=$(python -c "import sys; print(sys.platform)") diff --git a/.github/scripts/install-torch-tensorrt.sh b/.github/scripts/install-torch-tensorrt.sh index 49a367b832..7d0b7a5947 100755 --- a/.github/scripts/install-torch-tensorrt.sh +++ b/.github/scripts/install-torch-tensorrt.sh @@ -21,7 +21,7 @@ pip uninstall -y torch torchvision pip install --force-reinstall --pre ${TORCHVISION} --index-url ${INDEX_URL} pip install --force-reinstall --pre ${TORCH} --index-url ${INDEX_URL} -if [[ ${USE_RTX} == true ]]; then +if [[ ${USE_TRT_RTX} == true ]]; then source .github/scripts/install-tensorrt-rtx.sh # tensorrt-rtx is not publicly available, so we need to install the wheel from the tar ball install_wheel_or_not=true @@ -35,10 +35,4 @@ else pip install /opt/torch-tensorrt-builds/torch_tensorrt*.whl fi -if [[ ${USE_RTX} == true ]]; then - # currently tensorrt is installed automatically by install torch-tensorrt since it is a dependency of torch-tensorrt in pyproject.toml - # so we need to uninstall it to avoid conflict - pip uninstall -y tensorrt tensorrt_cu12 tensorrt_cu12_bindings tensorrt_cu12_libs -fi - echo -e "Running test script"; diff --git a/.github/workflows/build-test-linux-x86_64_rtx.yml b/.github/workflows/build-test-linux-x86_64_rtx.yml index ab7c1ec9f2..44f6cb5580 100644 --- a/.github/workflows/build-test-linux-x86_64_rtx.yml +++ b/.github/workflows/build-test-linux-x86_64_rtx.yml @@ -1,4 +1,4 @@ -name: Build and test Linux x86_64 wheels(RTX) +name: RTX - Build and test Linux x86_64 wheels on: pull_request: @@ -104,7 +104,7 @@ jobs: export USE_HOST_DEPS=1 export CI_BUILD=1 export LD_LIBRARY_PATH=/usr/lib64:$LD_LIBRARY_PATH - export FORCE_TENSORRT_RTX=1 + export USE_TRT_RTX=1 pushd . cd tests/modules python hub.py @@ -144,7 +144,7 @@ jobs: export CI_BUILD=1 pushd . cd tests/py/dynamo - export FORCE_TENSORRT_RTX=1 + export USE_TRT_RTX=1 python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 4 conversion/ python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml automatic_plugin/test_automatic_plugin.py python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml automatic_plugin/test_automatic_plugin_with_attrs.py @@ -177,7 +177,7 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export FORCE_TENSORRT_RTX=1 + export USE_TRT_RTX=1 pushd . cd tests/py cd dynamo @@ -210,7 +210,7 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export FORCE_TENSORRT_RTX=1 + export USE_TRT_RTX=1 pushd . cd tests/py cd dynamo @@ -244,7 +244,7 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export FORCE_TENSORRT_RTX=1 + export USE_TRT_RTX=1 pushd . cd tests/py cd dynamo @@ -279,7 +279,7 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export FORCE_TENSORRT_RTX=1 + export USE_TRT_RTX=1 pushd . cd tests/py cd dynamo @@ -314,7 +314,7 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export FORCE_TENSORRT_RTX=1 + export USE_TRT_RTX=1 pushd . cd tests/py cd dynamo @@ -349,7 +349,7 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export FORCE_TENSORRT_RTX=1 + export USE_TRT_RTX=1 pushd . cd tests/py/core python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_core_test_results.xml . diff --git a/.github/workflows/build-test-windows_rtx.yml b/.github/workflows/build-test-windows_rtx.yml index 31078c39b7..b271487395 100644 --- a/.github/workflows/build-test-windows_rtx.yml +++ b/.github/workflows/build-test-windows_rtx.yml @@ -1,4 +1,4 @@ -name: Build and test Windows wheels(RTX) +name: RTX - Build and test Windows wheels on: pull_request: @@ -110,7 +110,7 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export FORCE_TENSORRT_RTX=1 + export USE_TRT_RTX=1 pushd . cd tests/modules python hub.py @@ -174,7 +174,7 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export FORCE_TENSORRT_RTX=1 + export USE_TRT_RTX=1 pushd . cd tests/py cd dynamo @@ -204,7 +204,7 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export FORCE_TENSORRT_RTX=1 + export USE_TRT_RTX=1 pushd . cd tests/py cd dynamo @@ -235,7 +235,7 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export FORCE_TENSORRT_RTX=1 + export USE_TRT_RTX=1 pushd . cd tests/py cd dynamo @@ -267,7 +267,7 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export FORCE_TENSORRT_RTX=1 + export USE_TRT_RTX=1 pushd . cd tests/py cd dynamo @@ -299,7 +299,7 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export FORCE_TENSORRT_RTX=1 + export USE_TRT_RTX=1 pushd . cd tests/py cd dynamo @@ -330,7 +330,7 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export FORCE_TENSORRT_RTX=1 + export USE_TRT_RTX=1 pushd . cd tests/py/core python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_core_test_results.xml . diff --git a/.github/workflows/build_wheels_linux.yml b/.github/workflows/build_wheels_linux.yml index ef0805cf73..c2d88da347 100644 --- a/.github/workflows/build_wheels_linux.yml +++ b/.github/workflows/build_wheels_linux.yml @@ -141,7 +141,7 @@ jobs: UPLOAD_TO_BASE_BUCKET: ${{ matrix.upload_to_base_bucket }} ARCH: ${{ inputs.architecture }} BUILD_TARGET: ${{ inputs.build-target }} - USE_RTX: ${{ inputs.use-rtx }} + USE_TRT_RTX: ${{ inputs.use-rtx }} name: build-wheel-${{ matrix.python_version }}-${{ matrix.desired_cuda }}-${{ matrix.gpu_arch_type }}-${{ inputs.architecture }}-${{ inputs.use-rtx }}-${{ inputs.is-jetpack }} runs-on: ${{ matrix.validation_runner }} environment: ${{(inputs.trigger-event == 'schedule' || (inputs.trigger-event == 'push' && (startsWith(github.event.ref, 'refs/heads/nightly') || startsWith(github.event.ref, 'refs/tags/v')))) && 'pytorchbot-env' || ''}} @@ -269,7 +269,7 @@ jobs: BUILD_VERSION="${BUILD_VERSION}+${CU_VERSION}" fi echo "BUILD_VERSION=$BUILD_VERSION" - echo "USE_RTX=$USE_RTX" + echo "USE_TRT_RTX=$USE_TRT_RTX" echo "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" if [[ ${{ inputs.use-rtx }} == true ]]; then echo "Building tensorrt-rtx wheel" diff --git a/.github/workflows/build_wheels_windows.yml b/.github/workflows/build_wheels_windows.yml index 0aa6570f09..5c9e391cd6 100644 --- a/.github/workflows/build_wheels_windows.yml +++ b/.github/workflows/build_wheels_windows.yml @@ -106,7 +106,7 @@ jobs: REF: ${{ inputs.ref }} CU_VERSION: ${{ matrix.desired_cuda }} UPLOAD_TO_BASE_BUCKET: ${{ matrix.upload_to_base_bucket }} - USE_RTX: ${{ inputs.use-rtx }} + USE_TRT_RTX: ${{ inputs.use-rtx }} name: build-${{ matrix.build_name }} runs-on: ${{ matrix.validation_runner }} defaults: @@ -315,25 +315,24 @@ jobs: env: ENV_SCRIPT: ${{ inputs.env-script }} PACKAGE_NAME: ${{ inputs.package-name }} - USE_RTX: ${{ inputs.use-rtx }} + USE_TRT_RTX: ${{ inputs.use-rtx }} SMOKE_TEST_SCRIPT: ${{ inputs.smoke-test-script }} run: | source "${BUILD_ENV_FILE}" WHEEL_NAME=$(ls "${{ inputs.repository }}/dist/") echo "$WHEEL_NAME" ${CONDA_RUN} pip install "${{ inputs.repository }}/dist/$WHEEL_NAME" - if [[ $USE_RTX == true ]]; then - export FORCE_TENSORRT_RTX=1 - # TODO: lan to remove this once we have a better way to handle the LD_LIBRARY_PATH - # the LD_LIBRARY_PATH set in the pre_build_script_windows.sh will not be available in the smoke test, have to set it here again - export LD_LIBRARY_PATH=${PWD}/${{ inputs.repository }}/TensorRT-RTX-1.0.0.21/lib:$LD_LIBRARY_PATH - fi - if [[ ! -f "${{ inputs.repository }}"/${SMOKE_TEST_SCRIPT} ]]; then - echo "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT} not found" - ${CONDA_RUN} "${{ inputs.repository }}/${ENV_SCRIPT}" python -c "import ${PACKAGE_NAME}; print('package version is ', ${PACKAGE_NAME}.__version__)" + if [[ $USE_TRT_RTX == true ]]; then + # TODO: lan to remove this once we have a better way to do a smoke test + echo "Smoke test for TensorRT-RTX is not skipped for now" else - echo "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT} found" - ${CONDA_RUN} "${{ inputs.repository }}/${ENV_SCRIPT}" python "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT}" + if [[ ! -f "${{ inputs.repository }}"/${SMOKE_TEST_SCRIPT} ]]; then + echo "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT} not found" + ${CONDA_RUN} "${{ inputs.repository }}/${ENV_SCRIPT}" python -c "import ${PACKAGE_NAME}; print('package version is ', ${PACKAGE_NAME}.__version__)" + else + echo "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT} found" + ${CONDA_RUN} "${{ inputs.repository }}/${ENV_SCRIPT}" python "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT}" + fi fi - name: Smoke Test ARM64 if: inputs.architecture == 'arm64' diff --git a/.github/workflows/linux-test.yml b/.github/workflows/linux-test.yml index 9883db653d..ae2b5216d5 100644 --- a/.github/workflows/linux-test.yml +++ b/.github/workflows/linux-test.yml @@ -73,7 +73,7 @@ jobs: SCRIPT: ${{ inputs.script }} RUNNER_TEST_RESULTS_DIR: /tmp/test_results ARCH: ${{ inputs.architecture }} - USE_RTX: ${{ inputs.use-rtx }} + USE_TRT_RTX: ${{ inputs.use-rtx }} DOWNLOAD_ARTIFACT_NAME: pytorch_tensorrt_${{ matrix.tensorrt.version }}_${{ matrix.python_version }}_${{ matrix.desired_cuda }}_${{ inputs.architecture }} name: ${{ inputs.job-name }}-${{ matrix.tensorrt.version }}-${{ matrix.python_version }}-${{ matrix.desired_cuda }} runs-on: ${{ matrix.validation_runner }} @@ -141,7 +141,7 @@ jobs: working-directory: ${{ inputs.repository }} env: ALL_SECRETS: ${{ toJSON(secrets) }} - USE_RTX: ${{ inputs.use-rtx }} + USE_TRT_RTX: ${{ inputs.use-rtx }} run: | set -euxo pipefail # shellcheck disable=SC2086 diff --git a/.github/workflows/windows-test.yml b/.github/workflows/windows-test.yml index dcd4351fb4..5e3b5c9918 100644 --- a/.github/workflows/windows-test.yml +++ b/.github/workflows/windows-test.yml @@ -60,7 +60,7 @@ jobs: CU_VERSION: ${{ matrix.desired_cuda }} SCRIPT: ${{ inputs.script }} PYTHONUTF8: 1 - USE_RTX: ${{ inputs.use-rtx }} + USE_TRT_RTX: ${{ inputs.use-rtx }} DOWNLOAD_ARTIFACT_NAME: pytorch_tensorrt_${{ matrix.tensorrt.version }}_${{ matrix.python_version }}_${{ matrix.desired_cuda }}_win_amd64 name: ${{ inputs.job-name }}-${{ matrix.tensorrt.version }}-${{ matrix.python_version }}-${{ matrix.desired_cuda }} runs-on: ${{ matrix.validation_runner }} diff --git a/docsrc/getting_started/tensorrt_rtx.rst b/docsrc/getting_started/tensorrt_rtx.rst index 8edf80699c..42c9e916d8 100644 --- a/docsrc/getting_started/tensorrt_rtx.rst +++ b/docsrc/getting_started/tensorrt_rtx.rst @@ -19,7 +19,7 @@ Currenlty, Torch-TensorRT only supports TensorRT-RTX for the experiment purpose. Torch-TensorRT by default uses TensorRT during the build and run. In order to use TensorRT-RTX, you need to build the wheel with ``--use-rtx`` flag. -And then set the ``FORCE_TENSORRT_RTX=1`` environment variable during run. +And then set the ``USE_TRT_RTX=1`` environment variable during run. @@ -60,6 +60,6 @@ Quick Start =========== .. code-block:: py - # you have to set FORCE_TENSORRT_RTX=1 to use TensorRT-RTX - FORCE_TENSORRT_RTX=1 python examples/dynamo/torch_compile_resnet_example.py + # you have to set USE_TRT_RTX=1 to use TensorRT-RTX + USE_TRT_RTX=1 python examples/dynamo/torch_compile_resnet_example.py diff --git a/packaging/pre_build_script.sh b/packaging/pre_build_script.sh index 6294632c59..d4ab7a6241 100755 --- a/packaging/pre_build_script.sh +++ b/packaging/pre_build_script.sh @@ -76,7 +76,7 @@ fi cat MODULE.bazel export CI_BUILD=1 -if [[ ${USE_RTX} == true ]]; then +if [[ ${USE_TRT_RTX} == true ]]; then cat pyproject_rtx.toml.temp > pyproject.toml source .github/scripts/install-tensorrt-rtx.sh install_wheel_or_not=true diff --git a/packaging/pre_build_script_windows.sh b/packaging/pre_build_script_windows.sh index c4d2b37322..5cc04011b3 100644 --- a/packaging/pre_build_script_windows.sh +++ b/packaging/pre_build_script_windows.sh @@ -37,7 +37,7 @@ fi cat MODULE.bazel echo "RELEASE=1" >> ${GITHUB_ENV} -if [[ ${USE_RTX} == true ]]; then +if [[ ${USE_TRT_RTX} == true ]]; then cat pyproject_rtx.toml.temp > pyproject.toml source .github/scripts/install-tensorrt-rtx.sh install_wheel_or_not=true diff --git a/py/torch_tensorrt/trt_alias.py b/py/torch_tensorrt/_TensorRTProxyModule.py similarity index 97% rename from py/torch_tensorrt/trt_alias.py rename to py/torch_tensorrt/_TensorRTProxyModule.py index 4a80d1c12e..832e8c8932 100644 --- a/py/torch_tensorrt/trt_alias.py +++ b/py/torch_tensorrt/_TensorRTProxyModule.py @@ -78,11 +78,11 @@ def alias_tensorrt() -> None: return # in order not to break or change the existing behavior, we only build and run with tensorrt by default, tensorrt-rtx is for experiment only - # if we want to test with tensorrt-rtx, we have to build the wheel with --use-rtx and test with FORCE_TENSORRT_RTX=1 - # eg: FORCE_TENSORRT_RTX=1 python test.py + # if we want to test with tensorrt-rtx, we have to build the wheel with --use-rtx and test with USE_TRT_RTX=1 + # eg: USE_TRT_RTX=1 python test.py # in future, we can do dynamic linking either to tensorrt or tensorrt-rtx based on the gpu type use_rtx = False - if os.environ.get("FORCE_TENSORRT_RTX", "0") == "1": + if os.environ.get("USE_TRT_RTX", "0") == "1": use_rtx = True package_name = "tensorrt_rtx" if use_rtx else "tensorrt" # Import the appropriate package diff --git a/py/torch_tensorrt/__init__.py b/py/torch_tensorrt/__init__.py index 1b2a498961..d127f42690 100644 --- a/py/torch_tensorrt/__init__.py +++ b/py/torch_tensorrt/__init__.py @@ -27,19 +27,21 @@ tensorrt_package_name = "" try: - # note: trt_alias must be imported before any import tensorrt + # note: _TensorRTProxyModule must be imported before any import tensorrt - from . import trt_alias # noqa: F401 + from . import _TensorRTProxyModule # noqa: F401 - tensorrt_package_name = trt_alias.package_name - _LOGGER.info(f"You are using {trt_alias.package_name=} ") + tensorrt_package_name = _TensorRTProxyModule.package_name + _LOGGER.info(f"You are using {_TensorRTProxyModule.package_name=} ") except Exception as e: - print(f"import error when try to import trt_alias, got error {e}") + print(f"import error when try to import _TensorRTProxyModule, got error {e}") print( f"make sure tensorrt lib is in the LD_LIBRARY_PATH: {os.environ.get('LD_LIBRARY_PATH')}" ) - raise Exception(f"import error when try to import trt_alias, got error {e}") + raise Exception( + f"import error when try to import _TensorRTProxyModule, got error {e}" + ) def _register_with_torch() -> None: @@ -68,7 +70,7 @@ def _register_with_torch() -> None: torch.ops.load_library(linked_file_runtime_full_path) -# note: trt_alias must be imported before enabled features, because enabled features will check tensorrt.plugin availability +# note: _TensorRTProxyModule must be imported before enabled features, because enabled features will check tensorrt.plugin availability from torch_tensorrt._features import ENABLED_FEATURES, _enabled_features_str _LOGGER.debug(_enabled_features_str()) diff --git a/setup.py b/setup.py index cea08fb028..a4bbfb2834 100644 --- a/setup.py +++ b/setup.py @@ -89,10 +89,10 @@ def load_dep_info(): LEGACY = False RELEASE = False CI_BUILD = False -USE_RTX = False +USE_TRT_RTX = False if "--use-rtx" in sys.argv: - USE_RTX = True + USE_TRT_RTX = True sys.argv.remove("--use-rtx") if "--fx-only" in sys.argv: @@ -123,9 +123,9 @@ def load_dep_info(): if py_only_env_var == "1": PY_ONLY = True -if (use_rtx_env_var := os.environ.get("FORCE_TENSORRT_RTX")) is not None: +if (use_rtx_env_var := os.environ.get("USE_TRT_RTX")) is not None: if use_rtx_env_var == "1": - USE_RTX = True + USE_TRT_RTX = True if (release_env_var := os.environ.get("RELEASE")) is not None: if release_env_var == "1": @@ -222,7 +222,7 @@ def build_libtorchtrt_cxx11_abi( else: cmd.append("--config=linux") - if USE_RTX: + if USE_TRT_RTX: cmd.append("--config=rtx") print("TensorRT RTX build") @@ -536,12 +536,12 @@ def run(self): elif IS_JETPACK: tensorrt_linux_external_dir = tensorrt_jetpack_external_dir else: - if USE_RTX: + if USE_TRT_RTX: tensorrt_linux_external_dir = tensorrt_rtx_external_dir else: tensorrt_linux_external_dir = tensorrt_x86_64_external_dir - if USE_RTX: + if USE_TRT_RTX: tensorrt_windows_external_dir = ( lambda: subprocess.check_output( [ From cf809977256a9088453cd23e5844715f4a75ca1b Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Wed, 6 Aug 2025 15:50:30 -0700 Subject: [PATCH 04/12] change the pyproject.toml to make dependencies dynamic --- .../workflows/build-test-linux-x86_64_rtx.yml | 8 - .github/workflows/build-test-windows_rtx.yml | 7 - docsrc/getting_started/tensorrt_rtx.rst | 19 +- packaging/pre_build_script.sh | 1 - packaging/pre_build_script_windows.sh | 1 - py/torch_tensorrt/_TensorRTProxyModule.py | 9 +- pyproject.toml | 29 +- pyproject_rtx.toml.temp | 358 --------------- setup.py | 55 ++- uv.lock | 422 +++--------------- 10 files changed, 137 insertions(+), 772 deletions(-) delete mode 100644 pyproject_rtx.toml.temp diff --git a/.github/workflows/build-test-linux-x86_64_rtx.yml b/.github/workflows/build-test-linux-x86_64_rtx.yml index 44f6cb5580..24d0169e2c 100644 --- a/.github/workflows/build-test-linux-x86_64_rtx.yml +++ b/.github/workflows/build-test-linux-x86_64_rtx.yml @@ -104,7 +104,6 @@ jobs: export USE_HOST_DEPS=1 export CI_BUILD=1 export LD_LIBRARY_PATH=/usr/lib64:$LD_LIBRARY_PATH - export USE_TRT_RTX=1 pushd . cd tests/modules python hub.py @@ -144,7 +143,6 @@ jobs: export CI_BUILD=1 pushd . cd tests/py/dynamo - export USE_TRT_RTX=1 python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml -n 4 conversion/ python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml automatic_plugin/test_automatic_plugin.py python -m pytest -ra --junitxml=${RUNNER_TEST_RESULTS_DIR}/dynamo_converters_test_results.xml automatic_plugin/test_automatic_plugin_with_attrs.py @@ -177,7 +175,6 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export USE_TRT_RTX=1 pushd . cd tests/py cd dynamo @@ -210,7 +207,6 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export USE_TRT_RTX=1 pushd . cd tests/py cd dynamo @@ -244,7 +240,6 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export USE_TRT_RTX=1 pushd . cd tests/py cd dynamo @@ -279,7 +274,6 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export USE_TRT_RTX=1 pushd . cd tests/py cd dynamo @@ -314,7 +308,6 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export USE_TRT_RTX=1 pushd . cd tests/py cd dynamo @@ -349,7 +342,6 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export USE_TRT_RTX=1 pushd . cd tests/py/core python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_core_test_results.xml . diff --git a/.github/workflows/build-test-windows_rtx.yml b/.github/workflows/build-test-windows_rtx.yml index b271487395..6321cd8a52 100644 --- a/.github/workflows/build-test-windows_rtx.yml +++ b/.github/workflows/build-test-windows_rtx.yml @@ -110,7 +110,6 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export USE_TRT_RTX=1 pushd . cd tests/modules python hub.py @@ -174,7 +173,6 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export USE_TRT_RTX=1 pushd . cd tests/py cd dynamo @@ -204,7 +202,6 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export USE_TRT_RTX=1 pushd . cd tests/py cd dynamo @@ -235,7 +232,6 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export USE_TRT_RTX=1 pushd . cd tests/py cd dynamo @@ -267,7 +263,6 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export USE_TRT_RTX=1 pushd . cd tests/py cd dynamo @@ -299,7 +294,6 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export USE_TRT_RTX=1 pushd . cd tests/py cd dynamo @@ -330,7 +324,6 @@ jobs: set -euo pipefail export USE_HOST_DEPS=1 export CI_BUILD=1 - export USE_TRT_RTX=1 pushd . cd tests/py/core python -m pytest -ra -n 4 --junitxml=${RUNNER_TEST_RESULTS_DIR}/tests_py_core_test_results.xml . diff --git a/docsrc/getting_started/tensorrt_rtx.rst b/docsrc/getting_started/tensorrt_rtx.rst index 42c9e916d8..5e617b459a 100644 --- a/docsrc/getting_started/tensorrt_rtx.rst +++ b/docsrc/getting_started/tensorrt_rtx.rst @@ -18,8 +18,8 @@ For detailed information about TensorRT-RTX, refer to: Currenlty, Torch-TensorRT only supports TensorRT-RTX for the experiment purpose. Torch-TensorRT by default uses TensorRT during the build and run. -In order to use TensorRT-RTX, you need to build the wheel with ``--use-rtx`` flag. -And then set the ``USE_TRT_RTX=1`` environment variable during run. +In order to use TensorRT-RTX, you need to build the wheel with ``--use-rtx`` flag or ``USE_TRT_RTX=true``. +And during the run, you need set the ``USE_TRT_RTX=true`` environment variable to invoke with TensorRT-RTX. @@ -34,6 +34,11 @@ System Preparation .. code-block:: sh # if TensorRT-RTX is downloaded in /usr/local/tensorrt-rtx export LD_LIBRARY_PATH=/usr/local/tensorrt-rtx/lib:$LD_LIBRARY_PATH + cd /usr/local/tensorrt-rtx/python + # install the tensorrt_rtx wheel + # currently tensorrt_rtx wheel is only available from tarball downloaded, not from pypi yet. + python -m pip install tensorrt_rtx-1.0.0.21-cp39-none-linux_x86_64.whl + Build Torch-TensorRT with TensorRT-RTX @@ -49,17 +54,19 @@ Build Torch-TensorRT with TensorRT-RTX cd dist python -m pip install torch-tensorrt-*.whl + # check that tensorrt related wheel is not installed, only tensorrt_rtx is there + python -m pip list | grep tensorrt + # make sure the tensorrt_rtx.so file is linked to the tensorrt_rtx.so file in the TensorRT-RTX installation directory trt_install_path=$(python -m pip show torch-tensorrt | grep "Location" | awk '{print $2}')/torch_tensorrt - # check if the libtensorrt_rtx.so.1 is linked + # check if the libtensorrt_rtx.so.1 is linked, and make sure there is no libnvinfer.so.10 linked ldd $trt_install_path/lib/libtorchtrt.so - Quick Start =========== .. code-block:: py - # you have to set USE_TRT_RTX=1 to use TensorRT-RTX - USE_TRT_RTX=1 python examples/dynamo/torch_compile_resnet_example.py + # you have to set USE_TRT_RTX=true to use TensorRT-RTX + USE_TRT_RTX=true python examples/dynamo/torch_compile_resnet_example.py diff --git a/packaging/pre_build_script.sh b/packaging/pre_build_script.sh index d4ab7a6241..914c203997 100755 --- a/packaging/pre_build_script.sh +++ b/packaging/pre_build_script.sh @@ -77,7 +77,6 @@ cat MODULE.bazel export CI_BUILD=1 if [[ ${USE_TRT_RTX} == true ]]; then - cat pyproject_rtx.toml.temp > pyproject.toml source .github/scripts/install-tensorrt-rtx.sh install_wheel_or_not=true install_tensorrt_rtx ${install_wheel_or_not} diff --git a/packaging/pre_build_script_windows.sh b/packaging/pre_build_script_windows.sh index 5cc04011b3..4be0018f0d 100644 --- a/packaging/pre_build_script_windows.sh +++ b/packaging/pre_build_script_windows.sh @@ -38,7 +38,6 @@ cat MODULE.bazel echo "RELEASE=1" >> ${GITHUB_ENV} if [[ ${USE_TRT_RTX} == true ]]; then - cat pyproject_rtx.toml.temp > pyproject.toml source .github/scripts/install-tensorrt-rtx.sh install_wheel_or_not=true install_tensorrt_rtx ${install_wheel_or_not} diff --git a/py/torch_tensorrt/_TensorRTProxyModule.py b/py/torch_tensorrt/_TensorRTProxyModule.py index 832e8c8932..c5917a3ae0 100644 --- a/py/torch_tensorrt/_TensorRTProxyModule.py +++ b/py/torch_tensorrt/_TensorRTProxyModule.py @@ -78,12 +78,13 @@ def alias_tensorrt() -> None: return # in order not to break or change the existing behavior, we only build and run with tensorrt by default, tensorrt-rtx is for experiment only - # if we want to test with tensorrt-rtx, we have to build the wheel with --use-rtx and test with USE_TRT_RTX=1 - # eg: USE_TRT_RTX=1 python test.py + # if we want to test with tensorrt-rtx, we have to build the wheel with --use-rtx and test with USE_TRT_RTX=true + # eg: USE_TRT_RTX=true python test.py # in future, we can do dynamic linking either to tensorrt or tensorrt-rtx based on the gpu type use_rtx = False - if os.environ.get("USE_TRT_RTX", "0") == "1": - use_rtx = True + if (use_rtx_env_var := os.environ.get("USE_TRT_RTX")) is not None: + if use_rtx_env_var.lower() == "true": + use_rtx = True package_name = "tensorrt_rtx" if use_rtx else "tensorrt" # Import the appropriate package try: diff --git a/pyproject.toml b/pyproject.toml index d390e8b4a9..19cd737bc1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,30 +50,8 @@ keywords = [ "torchscript", "inference", ] -dependencies = [ - "torch>=2.9.0.dev,<2.10.0; platform_machine != 'aarch64' or (platform_machine == 'aarch64' and 'tegra' not in platform_release)", - "torch>=2.7.0,<2.8.0; platform_machine == 'aarch64' and 'tegra' in platform_release", - - "tensorrt>=10.12.0,<10.13.0; platform_machine != 'aarch64' or (platform_machine == 'aarch64' and 'tegra' not in platform_release)", - "tensorrt-cu12>=10.12.0,<10.13.0; platform_machine != 'aarch64' or (platform_machine == 'aarch64' and 'tegra' not in platform_release)", - "tensorrt-cu12-bindings>=10.12.0,<10.13.0; platform_machine != 'aarch64' or (platform_machine == 'aarch64' and 'tegra' not in platform_release)", - "tensorrt-cu12-libs>=10.12.0,<10.13.0; platform_machine != 'aarch64' or (platform_machine == 'aarch64' and 'tegra' not in platform_release)", - - "tensorrt>=10.3.0,<10.4.0;platform_machine == 'aarch64' and 'tegra' in platform_release", - "tensorrt-cu12>=10.3.0,<10.4.0; platform_machine == 'aarch64' and 'tegra' in platform_release", - "tensorrt-cu12-bindings>=10.3.0,<10.4.0; platform_machine == 'aarch64' and 'tegra' in platform_release", - "tensorrt-cu12-libs>=10.3.0,<10.4.0; platform_machine == 'aarch64' and 'tegra' in platform_release", - - "packaging>=23", - "numpy; platform_machine != 'aarch64' or (platform_machine == 'aarch64' and 'tegra' not in platform_release)", - "numpy<2.0.0; platform_machine == 'aarch64' and 'tegra' in platform_release", - - "typing-extensions>=4.7.0", - "dllist", -] - -dynamic = ["version"] +dynamic = ["version", "dependencies", "optional-dependencies"] [dependency-groups] dev = [ @@ -102,11 +80,6 @@ test = [ "transformers>=4.49.0", ] -[project.optional-dependencies] -torchvision = [ - "torchvision>=0.23.0.dev,<0.24.0; platform_machine != 'aarch64' or (platform_machine == 'aarch64' and 'tegra' not in platform_release)", - "torchvision>=0.22.0,<0.23.0; platform_machine == 'aarch64' and 'tegra' in platform_release", -] quantization = ["nvidia-modelopt[all]>=0.27.1"] [project.urls] diff --git a/pyproject_rtx.toml.temp b/pyproject_rtx.toml.temp deleted file mode 100644 index 9feb0ce550..0000000000 --- a/pyproject_rtx.toml.temp +++ /dev/null @@ -1,358 +0,0 @@ -[build-system] -requires = [ - "setuptools>=77.0.0", - "packaging>=23.1", - "wheel>=0.40.0", - "ninja>=1.11.0", - "pyyaml>=6.0", - "cffi>=1.15.1", - "torch>=2.9.0.dev,<2.10.0", - "pybind11==2.6.2", -] -build-backend = "setuptools.build_meta" - -[project] -name = "torch_tensorrt_rtx" -authors = [{ name = "NVIDIA Corporation", email = "narens@nvidia.com" }] -description = "Torch-TensorRT-RTX is a package which allows users to automatically compile PyTorch and TorchScript modules to TensorRT while remaining in PyTorch" -license = { file = "LICENSE" } -classifiers = [ - "Development Status :: 5 - Production/Stable", - "Environment :: GPU :: NVIDIA CUDA", - "License :: OSI Approved :: BSD License", - "Intended Audience :: Developers", - "Intended Audience :: Science/Research", - "Operating System :: POSIX :: Linux", - "Programming Language :: C++", - "Programming Language :: Python", - "Programming Language :: Python :: Implementation :: CPython", - "Topic :: Scientific/Engineering", - "Topic :: Scientific/Engineering :: Artificial Intelligence", - "Topic :: Software Development", - "Topic :: Software Development :: Libraries", -] -readme = { file = "README.md", content-type = "text/markdown" } -requires-python = ">=3.9" -keywords = [ - "pytorch", - "torch", - "tensorrt", - "tensorrt_rtx", - "trt", - "ai", - "artificial intelligence", - "ml", - "machine learning", - "dl", - "deep learning", - "compiler", - "dynamo", - "torchscript", - "inference", -] -dependencies = [ - "torch>=2.9.0.dev,<2.10.0", - # currently tensorrt_rtx wheel is not publicly accessible, it is only included inside the rtx tar ball - # hence the tensorrt_rtx wheel version is fixed since the version the rtx tar ball downloaded is fixed - "tensorrt_rtx==1.0.0.21", - "packaging>=23", - "numpy", - "typing-extensions>=4.7.0", - "dllist", -] - -dynamic = ["version"] - -[dependency-groups] -dev = [ - "pre-commit>=2.20.0", - "black>=22.6.0", - "clang-format==14.0.6", - "typos", - "mypy", - "isort", - "ruff", - "pyyaml", -] - -debug = [ - "pydot >= 4.0.0", - "tabulate >= 0.8.10", - "graphviz >= 0.20.3" -] - -test = [ - "pytest", - "pytest-xdist", - "parameterized>=0.2.0", - "expecttest==0.1.6", - "timm>=1.0.3", - "transformers>=4.49.0", -] - -[project.optional-dependencies] -torchvision = [ - "torchvision>=0.23.0.dev,<0.24.0", -] -quantization = ["nvidia-modelopt[all]>=0.27.1"] - -[project.urls] -Homepage = "https://pytorch.org/tensorrt" -Documentation = "https://pytorch.org/tensorrt" -Repository = "https://github.com/pytorch/tensorrt.git" -Changelog = "https://github.com/pytorch/tensorrt/releases" - -[tool.setuptools] -package-dir = { "" = "py" } -include-package-data = false - -[tool.uv] -package = true -environments = ["sys_platform == 'linux'", "sys_platform == 'windows'"] -prerelease = "if-necessary-or-explicit" -index-strategy = "unsafe-best-match" - -[tool.uv.sources] -torch = [ - { index = "pytorch-nightly-cu129", marker = "platform_machine != 'aarch64' or (platform_machine == 'aarch64' and 'tegra' not in platform_release)" }, -] -torchvision = [ - { index = "pytorch-nightly-cu129", marker = "platform_machine != 'aarch64' or (platform_machine == 'aarch64' and 'tegra' not in platform_release)" }, -] - -[[tool.uv.index]] -name = "pytorch-nightly-cu129" -url = "https://download.pytorch.org/whl/nightly/cu129" -explicit = false - -[[tool.uv.index]] -name = "nvidia" -url = "https://pypi.nvidia.com" -explicit = false - -[tool.ruff] -# NOTE: Synchoronize the ignores with .flake8 -lint.ignore = [ - # these ignores are from flake8-bugbear; please fix! - "B007", - "B008", - "B017", - "B018", # Useless expression - "B019", - "B020", - "B023", - "B024", - "B026", - "B028", # No explicit `stacklevel` keyword argument found - "B904", - "B905", - "E402", - "C408", # C408 ignored because we like the dict keyword argument syntax - "E501", # E501 is not flexible enough, we're using B950 instead - "E721", - "E731", # Assign lambda expression - "E741", - "EXE001", - "F405", - "F821", - "F841", - # these ignores are from flake8-logging-format; please fix! - "G101", - "G201", - "G202", - "G003", - "G004", - # these ignores are from RUFF perf; please fix! - "PERF203", - "PERF4", - "SIM102", - "SIM103", - "SIM112", # flake8-simplify code styles - "SIM105", # these ignores are from flake8-simplify. please fix or ignore with commented reason - "SIM108", - "SIM110", - "SIM114", # Combine `if` branches using logical `or` operator - "SIM115", - "SIM116", # Disable Use a dictionary instead of consecutive `if` statements - "SIM117", - "SIM118", -] -#line-length = 120 -lint.select = [ - "B", - "C4", - "G", - "E", - "F", - "SIM1", - "W", - # Not included in flake8 - "PERF", - "PLE", - "TRY302", -] - -# Allow unused variables when underscore-prefixed. -lint.dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" - -# Allow autofix for all enabled rules (when `--fix`) is provided. -lint.fixable = [ - "A", - "B", - "C", - "D", - "E", - "F", - "G", - "I", - "N", - "Q", - "S", - "T", - "W", - "ANN", - "ARG", - "BLE", - "COM", - "DJ", - "DTZ", - "EM", - "ERA", - "EXE", - "FBT", - "ICN", - "INP", - "ISC", - "NPY", - "PD", - "PGH", - "PIE", - "PL", - "PT", - "PTH", - "PYI", - "RET", - "RSE", - "RUF", - "SIM", - "SLF", - "TCH", - "TID", - "TRY", - "UP", - "YTT", -] -lint.unfixable = [] -target-version = "py311" - -# Exclude a variety of commonly ignored directories. -exclude = [ - ".bzr", - ".direnv", - ".eggs", - ".git", - ".git-rewrite", - ".hg", - ".mypy_cache", - ".nox", - ".pants.d", - ".pytype", - ".ruff_cache", - ".svn", - ".tox", - ".venv", - "__pypackages__", - "_build", - "buck-out", - "build", - "dist", - "node_modules", - "venv", - "env", - "py/torch_tensorrt/fx", - ".github", - "examples", - "tests", - "tools", - "docs", - "docsrc", - "tests", - "setup.py", - "noxfile.py", - "__init__.py", -] - -[tool.ruff.lint.mccabe] -# Unlike Flake8, default to a complexity level of 10. -max-complexity = 10 - -[tool.isort] -profile = "black" -py_version = 311 -skip = ["py/torch_tensorrt/fx"] - -[tool.black] -#line-length = 120 -target-version = ["py39", "py310", "py311", "py312", "py313"] -force-exclude = """ -elu_converter/setup.py -""" - -[tool.mypy] -strict = true -ignore_missing_imports = true -show_error_codes = true -disable_error_code = "attr-defined" -no_implicit_optional = true -exclude = [ - "^py/torch_tensorrt/fx", - "py/torch_tensorrt/fx", - "torch_tensorrt/fx", - "py/torch_tensorrt/_C.so", - "examples", - "docs", - "docsrc", - "tests", - "setup.py", - "noxfile.py", -] -python_version = "3.11" - -follow_imports = "skip" - -[[tool.mypy.overrides]] -module = "torch_tensorrt.dynamo.conversion.aten_converters" -disable_error_code = "arg-type" - -[[tool.mypy.overrides]] -module = "torch_tensorrt.dynamo.lowering._decompositions" -disallow_untyped_calls = false - -[[tool.mypy.overrides]] -module = "torch_tensorrt.fx.*" -ignore_errors = true -follow_imports = "skip" - -[tool.typos] -files.extend-exclude = [ - "docs/**/*", - "*/fx/*", - "docsrc/_rendered_examples/", - "core/*", - "!core/runtime/", - "third_party/", - "CHANGELOG.md", - "*.ipynb", - "cpp/", - "py/torch_tensorrt/fx/", -] - -[tool.typos.default] -extend-ignore-identifiers-re = [ - "^([A-z]|[a-z])*Nd*", - "^([A-z]|[a-z])*nd*", - "active*([A-z]|[a-z]|[0-9])*,", -] - -[tool.typos.default.extend-words] -arange = "arange" diff --git a/setup.py b/setup.py index a4bbfb2834..1195dc7954 100644 --- a/setup.py +++ b/setup.py @@ -124,7 +124,7 @@ def load_dep_info(): PY_ONLY = True if (use_rtx_env_var := os.environ.get("USE_TRT_RTX")) is not None: - if use_rtx_env_var == "1": + if use_rtx_env_var == "1" or use_rtx_env_var.lower() == "true": USE_TRT_RTX = True if (release_env_var := os.environ.get("RELEASE")) is not None: @@ -702,6 +702,58 @@ def run(self): with open(os.path.join(get_root_dir(), "README.md"), "r", encoding="utf-8") as fh: long_description = fh.read() + +def get_requirements(): + requirements = [ + "packaging>=23", + "typing-extensions>=4.7.0", + "dllist", + ] + + if IS_JETPACK: + requirements.extend( + [ + "torch>=2.8.0,<2.9.0", + "tensorrt>=10.3.0,<10.4.0", + "numpy<2.0.0", + ] + ) + elif IS_SBSA: + requirements.extend( + [ + "torch>=2.9.0.dev,<2.10.0", + "tensorrt>=10.12.0,<10.13.0", + "tensorrt-cu12>=10.12.0,<10.13.0", + "tensorrt-cu12-bindings>=10.12.0,<10.13.0", + "tensorrt-cu12-libs>=10.12.0,<10.13.0", + "numpy", + ] + ) + else: + requirements.extend( + [ + "torch>=2.9.0.dev,<2.10.0", + "numpy", + ] + ) + if USE_TRT_RTX: + requirements.extend( + [ + "tensorrt-rtx>=1.0.0.21", + ] + ) + else: + requirements.extend( + [ + "tensorrt>=10.12.0,<10.13.0", + "tensorrt-cu12>=10.12.0,<10.13.0", + "tensorrt-cu12-bindings>=10.12.0,<10.13.0", + "tensorrt-cu12-libs>=10.12.0,<10.13.0", + ] + ) + return requirements + + setup( name="torch_tensorrt", ext_modules=ext_modules, @@ -715,6 +767,7 @@ def run(self): "editable_wheel": EditableWheelCommand, }, zip_safe=False, + install_requires=get_requirements(), packages=packages, package_dir=package_dir, include_package_data=False, diff --git a/uv.lock b/uv.lock index 18b5f3d7ed..eb2c536573 100644 --- a/uv.lock +++ b/uv.lock @@ -48,8 +48,7 @@ dependencies = [ { name = "psutil", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, { name = "pyyaml", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, { name = "safetensors", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, - { name = "torch", version = "2.7.0", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "torch", version = "2.9.0.dev20250701+cu129", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'windows') or ('tegra' not in platform_release and sys_platform == 'linux') or ('tegra' not in platform_release and sys_platform == 'windows')" }, + { name = "torch", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/97/33/47bbd507e3a851d33d19ce7b2141c5ea3689bfae91ba168044d7db24b0e9/accelerate-1.7.0.tar.gz", hash = "sha256:e8a2a5503d6237b9eee73cc8d36cf543f9c2d8dd2c6713450b322f5e6d53a610", size = 376026, upload-time = "2025-05-15T10:00:52.117Z" } wheels = [ @@ -1343,11 +1342,9 @@ all = [ { name = "pynvml", marker = "(python_full_version < '3.12' and 'tegra' in platform_release and sys_platform == 'linux') or (python_full_version < '3.12' and 'tegra' in platform_release and sys_platform == 'windows') or (python_full_version < '3.10' and sys_platform == 'linux') or (python_full_version < '3.10' and sys_platform == 'windows')" }, { name = "regex", marker = "(python_full_version < '3.12' and 'tegra' in platform_release and sys_platform == 'linux') or (python_full_version < '3.12' and 'tegra' in platform_release and sys_platform == 'windows') or (python_full_version < '3.10' and sys_platform == 'linux') or (python_full_version < '3.10' and sys_platform == 'windows')" }, { name = "safetensors", marker = "(python_full_version < '3.12' and 'tegra' in platform_release and sys_platform == 'linux') or (python_full_version < '3.12' and 'tegra' in platform_release and sys_platform == 'windows') or (python_full_version < '3.10' and sys_platform == 'linux') or (python_full_version < '3.10' and sys_platform == 'windows')" }, - { name = "torch", version = "2.7.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.12' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (python_full_version < '3.12' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "torch", version = "2.9.0.dev20250701+cu129", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(python_full_version < '3.12' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (python_full_version < '3.12' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows') or (python_full_version < '3.10' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.10' and platform_machine != 'aarch64' and sys_platform == 'windows') or (python_full_version < '3.10' and 'tegra' not in platform_release and sys_platform == 'linux') or (python_full_version < '3.10' and 'tegra' not in platform_release and sys_platform == 'windows')" }, + { name = "torch", marker = "(python_full_version < '3.12' and 'tegra' in platform_release and sys_platform == 'linux') or (python_full_version < '3.12' and 'tegra' in platform_release and sys_platform == 'windows') or (python_full_version < '3.10' and sys_platform == 'linux') or (python_full_version < '3.10' and sys_platform == 'windows')" }, { name = "torchprofile", marker = "(python_full_version < '3.12' and 'tegra' in platform_release and sys_platform == 'linux') or (python_full_version < '3.12' and 'tegra' in platform_release and sys_platform == 'windows') or (python_full_version < '3.10' and sys_platform == 'linux') or (python_full_version < '3.10' and sys_platform == 'windows')" }, - { name = "torchvision", version = "0.22.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version < '3.12' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (python_full_version < '3.12' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "torchvision", version = "0.23.0.dev20250701+cu129", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(python_full_version < '3.12' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (python_full_version < '3.12' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows') or (python_full_version < '3.10' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.10' and platform_machine != 'aarch64' and sys_platform == 'windows') or (python_full_version < '3.10' and 'tegra' not in platform_release and sys_platform == 'linux') or (python_full_version < '3.10' and 'tegra' not in platform_release and sys_platform == 'windows')" }, + { name = "torchvision", marker = "(python_full_version < '3.12' and 'tegra' in platform_release and sys_platform == 'linux') or (python_full_version < '3.12' and 'tegra' in platform_release and sys_platform == 'windows') or (python_full_version < '3.10' and sys_platform == 'linux') or (python_full_version < '3.10' and sys_platform == 'windows')" }, { name = "transformers", marker = "(python_full_version < '3.12' and 'tegra' in platform_release and sys_platform == 'linux') or (python_full_version < '3.12' and 'tegra' in platform_release and sys_platform == 'windows') or (python_full_version < '3.10' and sys_platform == 'linux') or (python_full_version < '3.10' and sys_platform == 'windows')" }, ] @@ -1406,11 +1403,9 @@ all = [ { name = "pulp", marker = "(python_full_version >= '3.10' and 'tegra' not in platform_release and sys_platform == 'linux') or (python_full_version >= '3.10' and 'tegra' not in platform_release and sys_platform == 'windows') or (python_full_version >= '3.12' and sys_platform == 'linux') or (python_full_version >= '3.12' and sys_platform == 'windows')" }, { name = "regex", marker = "(python_full_version >= '3.10' and 'tegra' not in platform_release and sys_platform == 'linux') or (python_full_version >= '3.10' and 'tegra' not in platform_release and sys_platform == 'windows') or (python_full_version >= '3.12' and sys_platform == 'linux') or (python_full_version >= '3.12' and sys_platform == 'windows')" }, { name = "safetensors", marker = "(python_full_version >= '3.10' and 'tegra' not in platform_release and sys_platform == 'linux') or (python_full_version >= '3.10' and 'tegra' not in platform_release and sys_platform == 'windows') or (python_full_version >= '3.12' and sys_platform == 'linux') or (python_full_version >= '3.12' and sys_platform == 'windows')" }, - { name = "torch", version = "2.7.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.12' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (python_full_version >= '3.12' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "torch", version = "2.9.0.dev20250701+cu129", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(python_full_version >= '3.10' and python_full_version < '3.12' and 'tegra' not in platform_release and sys_platform == 'linux') or (python_full_version >= '3.10' and python_full_version < '3.12' and 'tegra' not in platform_release and sys_platform == 'windows') or (python_full_version >= '3.10' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux') or (python_full_version >= '3.10' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'windows') or (python_full_version >= '3.12' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.12' and platform_machine != 'aarch64' and sys_platform == 'windows')" }, + { name = "torch", marker = "(python_full_version >= '3.10' and 'tegra' not in platform_release and sys_platform == 'linux') or (python_full_version >= '3.10' and 'tegra' not in platform_release and sys_platform == 'windows') or (python_full_version >= '3.12' and sys_platform == 'linux') or (python_full_version >= '3.12' and sys_platform == 'windows')" }, { name = "torchprofile", marker = "(python_full_version >= '3.10' and 'tegra' not in platform_release and sys_platform == 'linux') or (python_full_version >= '3.10' and 'tegra' not in platform_release and sys_platform == 'windows') or (python_full_version >= '3.12' and sys_platform == 'linux') or (python_full_version >= '3.12' and sys_platform == 'windows')" }, - { name = "torchvision", version = "0.22.0", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.12' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (python_full_version >= '3.12' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "torchvision", version = "0.23.0.dev20250701+cu129", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(python_full_version >= '3.10' and python_full_version < '3.12' and 'tegra' not in platform_release and sys_platform == 'linux') or (python_full_version >= '3.10' and python_full_version < '3.12' and 'tegra' not in platform_release and sys_platform == 'windows') or (python_full_version >= '3.10' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux') or (python_full_version >= '3.10' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'windows') or (python_full_version >= '3.12' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.12' and platform_machine != 'aarch64' and sys_platform == 'windows')" }, + { name = "torchvision", marker = "(python_full_version >= '3.10' and 'tegra' not in platform_release and sys_platform == 'linux') or (python_full_version >= '3.10' and 'tegra' not in platform_release and sys_platform == 'windows') or (python_full_version >= '3.12' and sys_platform == 'linux') or (python_full_version >= '3.12' and sys_platform == 'windows')" }, { name = "transformers", marker = "(python_full_version >= '3.10' and 'tegra' not in platform_release and sys_platform == 'linux') or (python_full_version >= '3.10' and 'tegra' not in platform_release and sys_platform == 'windows') or (python_full_version >= '3.12' and sys_platform == 'linux') or (python_full_version >= '3.12' and sys_platform == 'windows')" }, ] @@ -1791,8 +1786,7 @@ dependencies = [ { name = "psutil", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, { name = "pyyaml", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, { name = "safetensors", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, - { name = "torch", version = "2.7.0", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "torch", version = "2.9.0.dev20250701+cu129", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'windows') or ('tegra' not in platform_release and sys_platform == 'linux') or ('tegra' not in platform_release and sys_platform == 'windows')" }, + { name = "torch", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, { name = "tqdm", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, { name = "transformers", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, ] @@ -2507,207 +2501,51 @@ wheels = [ [[package]] name = "tensorrt" -version = "10.3.0" +version = "10.12.0.36" source = { registry = "https://pypi.nvidia.com/" } -resolution-markers = [ - "python_full_version >= '3.12' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version == '3.11.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version == '3.10.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version < '3.10' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version >= '3.12' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version == '3.11.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version == '3.10.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version < '3.10' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", -] dependencies = [ - { name = "tensorrt-cu12", version = "10.3.0", source = { registry = "https://pypi.nvidia.com/" }, marker = "(platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, + { name = "tensorrt-cu12", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, ] -sdist = { url = "https://pypi.nvidia.com/tensorrt/tensorrt-10.3.0.tar.gz", hash = "sha256:23b0dbeeada4ba1c72021d3ee0a2f172fb7cb60c72ad5e268b62822fab698d1e" } - -[[package]] -name = "tensorrt" -version = "10.11.0.33" -source = { registry = "https://pypi.nvidia.com/" } -resolution-markers = [ - "python_full_version >= '3.12' and platform_machine != 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version >= '3.12' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version == '3.11.*' and platform_machine != 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version == '3.10.*' and platform_machine != 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version == '3.11.*' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version == '3.10.*' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version < '3.10' and platform_machine != 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version < '3.10' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version >= '3.12' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version == '3.11.*' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version == '3.10.*' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version < '3.10' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version >= '3.12' and 'tegra' not in platform_release and sys_platform == 'windows'", - "python_full_version == '3.11.*' and 'tegra' not in platform_release and sys_platform == 'windows'", - "python_full_version == '3.10.*' and 'tegra' not in platform_release and sys_platform == 'windows'", - "python_full_version < '3.10' and 'tegra' not in platform_release and sys_platform == 'windows'", - "python_full_version >= '3.12' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version == '3.11.*' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version == '3.10.*' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version < '3.10' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", -] -dependencies = [ - { name = "tensorrt-cu12", version = "10.11.0.33", source = { registry = "https://pypi.nvidia.com/" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'windows') or ('tegra' not in platform_release and sys_platform == 'linux') or ('tegra' not in platform_release and sys_platform == 'windows')" }, -] -sdist = { url = "https://pypi.nvidia.com/tensorrt/tensorrt-10.11.0.33.tar.gz", hash = "sha256:a3d6048f86e11ea5202d473646194d3be866c0c8d578ac0b7eeb91d923f65d0b" } - -[[package]] -name = "tensorrt-cu12" -version = "10.3.0" -source = { registry = "https://pypi.nvidia.com/" } -resolution-markers = [ - "python_full_version >= '3.12' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version == '3.11.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version == '3.10.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version < '3.10' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version >= '3.12' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version == '3.11.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version == '3.10.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version < '3.10' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", -] -sdist = { url = "https://pypi.nvidia.com/tensorrt-cu12/tensorrt-cu12-10.3.0.tar.gz", hash = "sha256:14f0e60f40713a658f9634fffb1a5a665c35feb019be48b2f49e25ac12d2d084" } +sdist = { url = "https://pypi.nvidia.com/tensorrt/tensorrt-10.12.0.36.tar.gz", hash = "sha256:b246a830c26713e097b73151917e101cfb81aa0e7274c3c3b4c1f9f8b886be2e" } [[package]] name = "tensorrt-cu12" -version = "10.11.0.33" +version = "10.12.0.36" source = { registry = "https://pypi.nvidia.com/" } -resolution-markers = [ - "python_full_version >= '3.12' and platform_machine != 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version >= '3.12' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version == '3.11.*' and platform_machine != 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version == '3.10.*' and platform_machine != 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version == '3.11.*' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version == '3.10.*' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version < '3.10' and platform_machine != 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version < '3.10' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version >= '3.12' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version == '3.11.*' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version == '3.10.*' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version < '3.10' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version >= '3.12' and 'tegra' not in platform_release and sys_platform == 'windows'", - "python_full_version == '3.11.*' and 'tegra' not in platform_release and sys_platform == 'windows'", - "python_full_version == '3.10.*' and 'tegra' not in platform_release and sys_platform == 'windows'", - "python_full_version < '3.10' and 'tegra' not in platform_release and sys_platform == 'windows'", - "python_full_version >= '3.12' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version == '3.11.*' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version == '3.10.*' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version < '3.10' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", -] dependencies = [ - { name = "tensorrt-cu12-bindings", version = "10.11.0.33", source = { registry = "https://pypi.nvidia.com/" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'windows') or ('tegra' not in platform_release and sys_platform == 'linux') or ('tegra' not in platform_release and sys_platform == 'windows')" }, - { name = "tensorrt-cu12-libs", version = "10.11.0.33", source = { registry = "https://pypi.nvidia.com/" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'windows') or ('tegra' not in platform_release and sys_platform == 'linux') or ('tegra' not in platform_release and sys_platform == 'windows')" }, + { name = "tensorrt-cu12-bindings", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, + { name = "tensorrt-cu12-libs", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, ] -sdist = { url = "https://pypi.nvidia.com/tensorrt-cu12/tensorrt_cu12-10.11.0.33.tar.gz", hash = "sha256:7e29c8b16771c025320035ba9609c2a074767d9a8c05696a30c9d5c0fdfb37df" } +sdist = { url = "https://pypi.nvidia.com/tensorrt-cu12/tensorrt_cu12-10.12.0.36.tar.gz", hash = "sha256:aedeee0195c042592ac6b0536b19bc8cdbb1a548f35e09d24fbe78e1c76217c5" } [[package]] name = "tensorrt-cu12-bindings" -version = "10.3.0" +version = "10.12.0.36" source = { registry = "https://pypi.nvidia.com/" } -resolution-markers = [ - "python_full_version >= '3.12' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version == '3.11.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version == '3.10.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version < '3.10' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version >= '3.12' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version == '3.11.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version == '3.10.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version < '3.10' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", -] - -[[package]] -name = "tensorrt-cu12-bindings" -version = "10.11.0.33" -source = { registry = "https://pypi.nvidia.com/" } -resolution-markers = [ - "python_full_version >= '3.12' and platform_machine != 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version >= '3.12' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version == '3.11.*' and platform_machine != 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version == '3.10.*' and platform_machine != 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version == '3.11.*' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version == '3.10.*' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version < '3.10' and platform_machine != 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version < '3.10' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version >= '3.12' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version == '3.11.*' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version == '3.10.*' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version < '3.10' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version >= '3.12' and 'tegra' not in platform_release and sys_platform == 'windows'", - "python_full_version == '3.11.*' and 'tegra' not in platform_release and sys_platform == 'windows'", - "python_full_version == '3.10.*' and 'tegra' not in platform_release and sys_platform == 'windows'", - "python_full_version < '3.10' and 'tegra' not in platform_release and sys_platform == 'windows'", - "python_full_version >= '3.12' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version == '3.11.*' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version == '3.10.*' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version < '3.10' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", -] wheels = [ - { url = "https://pypi.nvidia.com/tensorrt-cu12-bindings/tensorrt_cu12_bindings-10.11.0.33-cp310-none-manylinux_2_28_x86_64.whl", hash = "sha256:a2d27745575be5d7f06caa9565230025b8e41a8915ee6a5dc735d41c3faf206d" }, - { url = "https://pypi.nvidia.com/tensorrt-cu12-bindings/tensorrt_cu12_bindings-10.11.0.33-cp310-none-manylinux_2_31_aarch64.whl", hash = "sha256:546c7ee976366dc9cb76ffefbde555dec4feddcfb508b4c99ee626447b8c72de" }, - { url = "https://pypi.nvidia.com/tensorrt-cu12-bindings/tensorrt_cu12_bindings-10.11.0.33-cp311-none-manylinux_2_28_x86_64.whl", hash = "sha256:e7b7a5b80174f8c4ddd8a63bc9fa97cad3320409eafad79428bc2b1e15884068" }, - { url = "https://pypi.nvidia.com/tensorrt-cu12-bindings/tensorrt_cu12_bindings-10.11.0.33-cp311-none-manylinux_2_31_aarch64.whl", hash = "sha256:492e3e91d7c1083bff1f7c15fdd8f5fb09a782dcfa6d1d0f8d9034b2e3b38cad" }, - { url = "https://pypi.nvidia.com/tensorrt-cu12-bindings/tensorrt_cu12_bindings-10.11.0.33-cp312-none-manylinux_2_28_x86_64.whl", hash = "sha256:a8f374f6d752ce4b0d4a8303d29c3ba9904eb29da0dc95b4db6b75c501997e4a" }, - { url = "https://pypi.nvidia.com/tensorrt-cu12-bindings/tensorrt_cu12_bindings-10.11.0.33-cp312-none-manylinux_2_31_aarch64.whl", hash = "sha256:6a3b768cea69b153ed0c2eb50130d150406d5c1498fdb0bf6c8a1be160137a6a" }, - { url = "https://pypi.nvidia.com/tensorrt-cu12-bindings/tensorrt_cu12_bindings-10.11.0.33-cp313-none-manylinux_2_28_x86_64.whl", hash = "sha256:1ceda290d1ed79b6107b0eb29eeb178f569d007c1506b72caae8248975d57662" }, - { url = "https://pypi.nvidia.com/tensorrt-cu12-bindings/tensorrt_cu12_bindings-10.11.0.33-cp313-none-manylinux_2_31_aarch64.whl", hash = "sha256:3c27e0d6e36a3b1f06e1dc8b735e34f04f5b8aac3e7d9b21762b8264496e825f" }, - { url = "https://pypi.nvidia.com/tensorrt-cu12-bindings/tensorrt_cu12_bindings-10.11.0.33-cp39-none-manylinux_2_28_x86_64.whl", hash = "sha256:9a801886f389b75f92e69fc6be40308392ec7746dbf4de4a2b76585d591960f0" }, - { url = "https://pypi.nvidia.com/tensorrt-cu12-bindings/tensorrt_cu12_bindings-10.11.0.33-cp39-none-manylinux_2_31_aarch64.whl", hash = "sha256:42e9b3cc2e3c6bcc0785c9c96b4dd25cd7043ff95e4fd09c8d35331f63ce9634" }, -] - -[[package]] -name = "tensorrt-cu12-libs" -version = "10.3.0" -source = { registry = "https://pypi.nvidia.com/" } -resolution-markers = [ - "python_full_version >= '3.12' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version == '3.11.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version == '3.10.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version < '3.10' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version >= '3.12' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version == '3.11.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version == '3.10.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version < '3.10' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", -] -dependencies = [ - { name = "nvidia-cuda-runtime-cu12", version = "12.8.90", source = { registry = "https://pypi.nvidia.com/" }, marker = "(platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, + { url = "https://pypi.nvidia.com/tensorrt-cu12-bindings/tensorrt_cu12_bindings-10.12.0.36-cp310-none-manylinux_2_28_x86_64.whl", hash = "sha256:7ecdb6fc2555caed7d4fbbd8158ed7ced64e230c125484f62a5369c40dcc70e5" }, + { url = "https://pypi.nvidia.com/tensorrt-cu12-bindings/tensorrt_cu12_bindings-10.12.0.36-cp310-none-manylinux_2_31_aarch64.whl", hash = "sha256:d8548ab5976ca5c91279c68ee77f4c892e03460709cfa3fbd2a22aa8123cb731" }, + { url = "https://pypi.nvidia.com/tensorrt-cu12-bindings/tensorrt_cu12_bindings-10.12.0.36-cp311-none-manylinux_2_28_x86_64.whl", hash = "sha256:58cf45605bb330e86f8ad49bc8997ed68cfdf5b09da229534fb7f84aa3fe5bf4" }, + { url = "https://pypi.nvidia.com/tensorrt-cu12-bindings/tensorrt_cu12_bindings-10.12.0.36-cp311-none-manylinux_2_31_aarch64.whl", hash = "sha256:ae0866a89caaeada1c16776de85413a523f78f53b1fd83f1b903c39eed264d82" }, + { url = "https://pypi.nvidia.com/tensorrt-cu12-bindings/tensorrt_cu12_bindings-10.12.0.36-cp312-none-manylinux_2_28_x86_64.whl", hash = "sha256:fb3a2ce96c7472a46bbee2030ce6a54fd6a32deda401c1c67d9de057550e0171" }, + { url = "https://pypi.nvidia.com/tensorrt-cu12-bindings/tensorrt_cu12_bindings-10.12.0.36-cp312-none-manylinux_2_31_aarch64.whl", hash = "sha256:f5128b8b2a379e65c09745ba97df58abf3a418cbfd6508d37f76121d9bdd3bc8" }, + { url = "https://pypi.nvidia.com/tensorrt-cu12-bindings/tensorrt_cu12_bindings-10.12.0.36-cp313-none-manylinux_2_28_x86_64.whl", hash = "sha256:0eb8d3e41279b1d0d329b85372d5d720c8d2ff1228f6273142d717b44d75935b" }, + { url = "https://pypi.nvidia.com/tensorrt-cu12-bindings/tensorrt_cu12_bindings-10.12.0.36-cp313-none-manylinux_2_31_aarch64.whl", hash = "sha256:a850992cad842340e6fed41fe74f529064064ff61881d50ef5a2be1816526f9b" }, + { url = "https://pypi.nvidia.com/tensorrt-cu12-bindings/tensorrt_cu12_bindings-10.12.0.36-cp39-none-manylinux_2_28_x86_64.whl", hash = "sha256:986cb86202ef9541279b59d4e254743aff43bae1def87d14dd06e02369107c8b" }, + { url = "https://pypi.nvidia.com/tensorrt-cu12-bindings/tensorrt_cu12_bindings-10.12.0.36-cp39-none-manylinux_2_31_aarch64.whl", hash = "sha256:c5b86638ae5e3a2101755d469ac2ce831d4bdece1d20fa2bd546c05c554b5952" }, ] [[package]] name = "tensorrt-cu12-libs" -version = "10.11.0.33" +version = "10.12.0.36" source = { registry = "https://pypi.nvidia.com/" } -resolution-markers = [ - "python_full_version >= '3.12' and platform_machine != 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version >= '3.12' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version == '3.11.*' and platform_machine != 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version == '3.10.*' and platform_machine != 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version == '3.11.*' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version == '3.10.*' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version < '3.10' and platform_machine != 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version < '3.10' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version >= '3.12' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version == '3.11.*' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version == '3.10.*' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version < '3.10' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version >= '3.12' and 'tegra' not in platform_release and sys_platform == 'windows'", - "python_full_version == '3.11.*' and 'tegra' not in platform_release and sys_platform == 'windows'", - "python_full_version == '3.10.*' and 'tegra' not in platform_release and sys_platform == 'windows'", - "python_full_version < '3.10' and 'tegra' not in platform_release and sys_platform == 'windows'", - "python_full_version >= '3.12' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version == '3.11.*' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version == '3.10.*' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version < '3.10' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", -] dependencies = [ - { name = "nvidia-cuda-runtime-cu12", version = "12.8.90", source = { registry = "https://pypi.nvidia.com/" }, marker = "(platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'windows') or ('tegra' not in platform_release and sys_platform == 'windows')" }, + { name = "nvidia-cuda-runtime-cu12", version = "12.8.90", source = { registry = "https://pypi.nvidia.com/" }, marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or sys_platform == 'windows'" }, { name = "nvidia-cuda-runtime-cu12", version = "12.9.79", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "platform_machine != 'aarch64' and sys_platform == 'linux'" }, ] wheels = [ - { url = "https://pypi.nvidia.com/tensorrt-cu12-libs/tensorrt_cu12_libs-10.11.0.33-py2.py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:81ace8d3284fdbef0804c444a4d7555343ee079370e79c93cb328c7d9b08f968" }, - { url = "https://pypi.nvidia.com/tensorrt-cu12-libs/tensorrt_cu12_libs-10.11.0.33-py2.py3-none-manylinux_2_31_aarch64.whl", hash = "sha256:b6846dbc32d717a5031d9757f16293dd9e25de8a1c4aae8c00701d52351ef173" }, + { url = "https://pypi.nvidia.com/tensorrt-cu12-libs/tensorrt_cu12_libs-10.12.0.36-py2.py3-none-manylinux_2_28_x86_64.whl", hash = "sha256:3910039e1d49de0edfdc8bf273e40ad4b85a9d57c7c383fe0e22f75417df9610" }, + { url = "https://pypi.nvidia.com/tensorrt-cu12-libs/tensorrt_cu12_libs-10.12.0.36-py2.py3-none-manylinux_2_31_aarch64.whl", hash = "sha256:1c117effa7318b65508457e9a11e67941859c8e5c346b59fd0090f66be28f2f4" }, ] [[package]] @@ -2718,10 +2556,8 @@ dependencies = [ { name = "huggingface-hub", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, { name = "pyyaml", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, { name = "safetensors", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, - { name = "torch", version = "2.7.0", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "torch", version = "2.9.0.dev20250701+cu129", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'windows') or ('tegra' not in platform_release and sys_platform == 'linux') or ('tegra' not in platform_release and sys_platform == 'windows')" }, - { name = "torchvision", version = "0.22.0", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "torchvision", version = "0.23.0.dev20250701+cu129", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'windows') or ('tegra' not in platform_release and sys_platform == 'linux') or ('tegra' not in platform_release and sys_platform == 'windows')" }, + { name = "torch", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, + { name = "torchvision", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/bc/0c/66b0f9b4a4cb9ffdac7b52b17b37c7d3c4f75623b469e388b0c6d89b4e88/timm-1.0.15.tar.gz", hash = "sha256:756a3bc30c96565f056e608a9b559daed904617eaadb6be536f96874879b1055", size = 2230258, upload-time = "2025-02-23T05:05:55.959Z" } wheels = [ @@ -2776,73 +2612,17 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6e/c2/61d3e0f47e2b74ef40a68b9e6ad5984f6241a942f7cd3bbfbdbd03861ea9/tomli-2.2.1-py3-none-any.whl", hash = "sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc", size = 14257, upload-time = "2024-11-27T22:38:35.385Z" }, ] -[[package]] -name = "torch" -version = "2.7.0" -source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.12' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version == '3.11.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version == '3.10.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version < '3.10' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version >= '3.12' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version == '3.11.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version == '3.10.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version < '3.10' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", -] -dependencies = [ - { name = "filelock", marker = "(platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "fsspec", marker = "(platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "jinja2", marker = "(platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "networkx", version = "3.2.1", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(python_full_version < '3.10' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (python_full_version < '3.10' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "networkx", version = "3.4.2", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(python_full_version == '3.10.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (python_full_version == '3.10.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "networkx", version = "3.5", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(python_full_version >= '3.11' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (python_full_version >= '3.11' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "setuptools", marker = "(python_full_version >= '3.12' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (python_full_version >= '3.12' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "sympy", marker = "(platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "typing-extensions", marker = "(platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, -] -wheels = [ - { url = "https://files.pythonhosted.org/packages/46/c2/3fb87940fa160d956ee94d644d37b99a24b9c05a4222bf34f94c71880e28/torch-2.7.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:c9afea41b11e1a1ab1b258a5c31afbd646d6319042bfe4f231b408034b51128b", size = 99158447, upload-time = "2025-04-23T14:35:10.557Z" }, - { url = "https://files.pythonhosted.org/packages/40/da/7378d16cc636697f2a94f791cb496939b60fb8580ddbbef22367db2c2274/torch-2.7.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:2b7813e904757b125faf1a9a3154e1d50381d539ced34da1992f52440567c156", size = 99159397, upload-time = "2025-04-23T14:35:35.304Z" }, - { url = "https://files.pythonhosted.org/packages/aa/5e/ac759f4c0ab7c01feffa777bd68b43d2ac61560a9770eeac074b450f81d4/torch-2.7.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:36a6368c7ace41ad1c0f69f18056020b6a5ca47bedaca9a2f3b578f5a104c26c", size = 99013250, upload-time = "2025-04-23T14:35:15.589Z" }, - { url = "https://files.pythonhosted.org/packages/14/24/720ea9a66c29151b315ea6ba6f404650834af57a26b2a04af23ec246b2d5/torch-2.7.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:868ccdc11798535b5727509480cd1d86d74220cfdc42842c4617338c1109a205", size = 99015553, upload-time = "2025-04-23T14:34:41.075Z" }, - { url = "https://files.pythonhosted.org/packages/cb/b4/8df3f9fe6bdf59e56a0e538592c308d18638eb5f5dc4b08d02abb173c9f0/torch-2.7.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:2a885fc25afefb6e6eb18a7d1e8bfa01cc153e92271d980a49243b250d5ab6d9", size = 99091348, upload-time = "2025-04-23T14:33:48.975Z" }, - { url = "https://files.pythonhosted.org/packages/57/6a/36775d1b553a443ba1453e1bfeae903ef20d94c95ab31aa09225bf52fda1/torch-2.7.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:e362efaa5b3078e5f75c33efc05005b9b46de0d2e899519d5b4cad0e050ed0f7", size = 99197389, upload-time = "2025-04-23T14:32:33.083Z" }, -] - [[package]] name = "torch" version = "2.9.0.dev20250701+cu129" source = { registry = "https://download.pytorch.org/whl/nightly/cu129" } -resolution-markers = [ - "python_full_version >= '3.12' and platform_machine != 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version >= '3.12' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version == '3.11.*' and platform_machine != 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version == '3.10.*' and platform_machine != 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version == '3.11.*' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version == '3.10.*' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version < '3.10' and platform_machine != 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version < '3.10' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version >= '3.12' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version == '3.11.*' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version == '3.10.*' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version < '3.10' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version >= '3.12' and 'tegra' not in platform_release and sys_platform == 'windows'", - "python_full_version == '3.11.*' and 'tegra' not in platform_release and sys_platform == 'windows'", - "python_full_version == '3.10.*' and 'tegra' not in platform_release and sys_platform == 'windows'", - "python_full_version < '3.10' and 'tegra' not in platform_release and sys_platform == 'windows'", - "python_full_version >= '3.12' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version == '3.11.*' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version == '3.10.*' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version < '3.10' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", -] dependencies = [ - { name = "filelock", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'windows') or ('tegra' not in platform_release and sys_platform == 'linux') or ('tegra' not in platform_release and sys_platform == 'windows')" }, - { name = "fsspec", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'windows') or ('tegra' not in platform_release and sys_platform == 'linux') or ('tegra' not in platform_release and sys_platform == 'windows')" }, - { name = "jinja2", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'windows') or ('tegra' not in platform_release and sys_platform == 'linux') or ('tegra' not in platform_release and sys_platform == 'windows')" }, - { name = "networkx", version = "3.2.1", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(python_full_version < '3.10' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.10' and platform_machine != 'aarch64' and sys_platform == 'windows') or (python_full_version < '3.10' and 'tegra' not in platform_release and sys_platform == 'linux') or (python_full_version < '3.10' and 'tegra' not in platform_release and sys_platform == 'windows')" }, - { name = "networkx", version = "3.4.2", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(python_full_version == '3.10.*' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version == '3.10.*' and platform_machine != 'aarch64' and sys_platform == 'windows') or (python_full_version == '3.10.*' and 'tegra' not in platform_release and sys_platform == 'linux') or (python_full_version == '3.10.*' and 'tegra' not in platform_release and sys_platform == 'windows')" }, - { name = "networkx", version = "3.5", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(python_full_version >= '3.11' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.11' and platform_machine != 'aarch64' and sys_platform == 'windows') or (python_full_version >= '3.11' and 'tegra' not in platform_release and sys_platform == 'linux') or (python_full_version >= '3.11' and 'tegra' not in platform_release and sys_platform == 'windows')" }, + { name = "filelock", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, + { name = "fsspec", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, + { name = "jinja2", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, + { name = "networkx", version = "3.2.1", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(python_full_version < '3.10' and sys_platform == 'linux') or (python_full_version < '3.10' and sys_platform == 'windows')" }, + { name = "networkx", version = "3.4.2", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(python_full_version == '3.10.*' and sys_platform == 'linux') or (python_full_version == '3.10.*' and sys_platform == 'windows')" }, + { name = "networkx", version = "3.5", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(python_full_version >= '3.11' and sys_platform == 'linux') or (python_full_version >= '3.11' and sys_platform == 'windows')" }, { name = "nvidia-cublas-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "nvidia-cuda-cupti-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "nvidia-cuda-nvrtc-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, @@ -2858,9 +2638,9 @@ dependencies = [ { name = "nvidia-nvjitlink-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "nvidia-nvtx-cu12", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, { name = "pytorch-triton", marker = "platform_machine == 'x86_64' and sys_platform == 'linux'" }, - { name = "setuptools", marker = "(python_full_version >= '3.12' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version >= '3.12' and platform_machine != 'aarch64' and sys_platform == 'windows') or (python_full_version >= '3.12' and 'tegra' not in platform_release and sys_platform == 'linux') or (python_full_version >= '3.12' and 'tegra' not in platform_release and sys_platform == 'windows')" }, - { name = "sympy", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'windows') or ('tegra' not in platform_release and sys_platform == 'linux') or ('tegra' not in platform_release and sys_platform == 'windows')" }, - { name = "typing-extensions", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'windows') or ('tegra' not in platform_release and sys_platform == 'linux') or ('tegra' not in platform_release and sys_platform == 'windows')" }, + { name = "setuptools", marker = "(python_full_version >= '3.12' and sys_platform == 'linux') or (python_full_version >= '3.12' and sys_platform == 'windows')" }, + { name = "sympy", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, + { name = "typing-extensions", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, ] wheels = [ { url = "https://download.pytorch.org/whl/nightly/cu129/torch-2.9.0.dev20250701%2Bcu129-cp310-cp310-manylinux_2_28_aarch64.whl" }, @@ -2885,29 +2665,14 @@ dependencies = [ { name = "numpy", version = "1.26.4", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(python_full_version < '3.10' and sys_platform == 'linux') or (python_full_version < '3.10' and sys_platform == 'windows') or ('tegra' in platform_release and sys_platform == 'linux') or ('tegra' in platform_release and sys_platform == 'windows')" }, { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.10' and 'tegra' not in platform_release and sys_platform == 'linux') or (python_full_version >= '3.10' and 'tegra' not in platform_release and sys_platform == 'windows')" }, { name = "packaging", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, - { name = "tensorrt", version = "10.3.0", source = { registry = "https://pypi.nvidia.com/" }, marker = "(platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "tensorrt", version = "10.11.0.33", source = { registry = "https://pypi.nvidia.com/" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'windows') or ('tegra' not in platform_release and sys_platform == 'linux') or ('tegra' not in platform_release and sys_platform == 'windows')" }, - { name = "tensorrt-cu12", version = "10.3.0", source = { registry = "https://pypi.nvidia.com/" }, marker = "(platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "tensorrt-cu12", version = "10.11.0.33", source = { registry = "https://pypi.nvidia.com/" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'windows') or ('tegra' not in platform_release and sys_platform == 'linux') or ('tegra' not in platform_release and sys_platform == 'windows')" }, - { name = "tensorrt-cu12-bindings", version = "10.3.0", source = { registry = "https://pypi.nvidia.com/" }, marker = "(platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "tensorrt-cu12-bindings", version = "10.11.0.33", source = { registry = "https://pypi.nvidia.com/" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'windows') or ('tegra' not in platform_release and sys_platform == 'linux') or ('tegra' not in platform_release and sys_platform == 'windows')" }, - { name = "tensorrt-cu12-libs", version = "10.3.0", source = { registry = "https://pypi.nvidia.com/" }, marker = "(platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "tensorrt-cu12-libs", version = "10.11.0.33", source = { registry = "https://pypi.nvidia.com/" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'windows') or ('tegra' not in platform_release and sys_platform == 'linux') or ('tegra' not in platform_release and sys_platform == 'windows')" }, - { name = "torch", version = "2.7.0", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "torch", version = "2.9.0.dev20250701+cu129", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'windows') or ('tegra' not in platform_release and sys_platform == 'linux') or ('tegra' not in platform_release and sys_platform == 'windows')" }, + { name = "tensorrt", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, + { name = "tensorrt-cu12", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, + { name = "tensorrt-cu12-bindings", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, + { name = "tensorrt-cu12-libs", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, + { name = "torch", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, { name = "typing-extensions", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, ] -[package.optional-dependencies] -quantization = [ - { name = "nvidia-modelopt", version = "0.29.0", source = { registry = "https://pypi.nvidia.com/" }, extra = ["all"], marker = "(python_full_version < '3.12' and 'tegra' in platform_release and sys_platform == 'linux') or (python_full_version < '3.12' and 'tegra' in platform_release and sys_platform == 'windows') or (python_full_version < '3.10' and sys_platform == 'linux') or (python_full_version < '3.10' and sys_platform == 'windows')" }, - { name = "nvidia-modelopt", version = "0.31.0", source = { registry = "https://pypi.nvidia.com/" }, extra = ["all"], marker = "(python_full_version >= '3.10' and 'tegra' not in platform_release and sys_platform == 'linux') or (python_full_version >= '3.10' and 'tegra' not in platform_release and sys_platform == 'windows') or (python_full_version >= '3.12' and sys_platform == 'linux') or (python_full_version >= '3.12' and sys_platform == 'windows')" }, -] -torchvision = [ - { name = "torchvision", version = "0.22.0", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "torchvision", version = "0.23.0.dev20250701+cu129", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'windows') or ('tegra' not in platform_release and sys_platform == 'linux') or ('tegra' not in platform_release and sys_platform == 'windows')" }, -] - [package.dev-dependencies] debug = [ { name = "graphviz", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, @@ -2924,6 +2689,10 @@ dev = [ { name = "ruff", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, { name = "typos", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, ] +quantization = [ + { name = "nvidia-modelopt", version = "0.29.0", source = { registry = "https://pypi.nvidia.com/" }, extra = ["all"], marker = "(python_full_version < '3.12' and 'tegra' in platform_release and sys_platform == 'linux') or (python_full_version < '3.12' and 'tegra' in platform_release and sys_platform == 'windows') or (python_full_version < '3.10' and sys_platform == 'linux') or (python_full_version < '3.10' and sys_platform == 'windows')" }, + { name = "nvidia-modelopt", version = "0.31.0", source = { registry = "https://pypi.nvidia.com/" }, extra = ["all"], marker = "(python_full_version >= '3.10' and 'tegra' not in platform_release and sys_platform == 'linux') or (python_full_version >= '3.10' and 'tegra' not in platform_release and sys_platform == 'windows') or (python_full_version >= '3.12' and sys_platform == 'linux') or (python_full_version >= '3.12' and sys_platform == 'windows')" }, +] test = [ { name = "expecttest", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, { name = "parameterized", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, @@ -2936,25 +2705,16 @@ test = [ [package.metadata] requires-dist = [ { name = "dllist" }, - { name = "numpy", marker = "platform_machine != 'aarch64' or 'tegra' not in platform_release" }, - { name = "numpy", marker = "platform_machine == 'aarch64' and 'tegra' in platform_release", specifier = "<2.0.0" }, - { name = "nvidia-modelopt", extras = ["all"], marker = "extra == 'quantization'", specifier = ">=0.27.1" }, + { name = "numpy" }, { name = "packaging", specifier = ">=23" }, - { name = "tensorrt", marker = "platform_machine != 'aarch64' or 'tegra' not in platform_release", specifier = ">=10.11.0,<10.12.0" }, - { name = "tensorrt", marker = "platform_machine == 'aarch64' and 'tegra' in platform_release", specifier = ">=10.3.0,<10.4.0" }, - { name = "tensorrt-cu12", marker = "platform_machine != 'aarch64' or 'tegra' not in platform_release", specifier = ">=10.11.0,<10.12.0" }, - { name = "tensorrt-cu12", marker = "platform_machine == 'aarch64' and 'tegra' in platform_release", specifier = ">=10.3.0,<10.4.0" }, - { name = "tensorrt-cu12-bindings", marker = "platform_machine != 'aarch64' or 'tegra' not in platform_release", specifier = ">=10.11.0,<10.12.0" }, - { name = "tensorrt-cu12-bindings", marker = "platform_machine == 'aarch64' and 'tegra' in platform_release", specifier = ">=10.3.0,<10.4.0" }, - { name = "tensorrt-cu12-libs", marker = "platform_machine != 'aarch64' or 'tegra' not in platform_release", specifier = ">=10.11.0,<10.12.0" }, - { name = "tensorrt-cu12-libs", marker = "platform_machine == 'aarch64' and 'tegra' in platform_release", specifier = ">=10.3.0,<10.4.0" }, + { name = "tensorrt", specifier = ">=10.12.0,<10.13.0" }, + { name = "tensorrt-cu12", specifier = ">=10.12.0,<10.13.0" }, + { name = "tensorrt-cu12-bindings", specifier = ">=10.12.0,<10.13.0" }, + { name = "tensorrt-cu12-libs", specifier = ">=10.12.0,<10.13.0" }, { name = "torch", marker = "platform_machine != 'aarch64' or 'tegra' not in platform_release", specifier = ">=2.9.0.dev0,<2.10.0", index = "https://download.pytorch.org/whl/nightly/cu129" }, - { name = "torch", marker = "platform_machine == 'aarch64' and 'tegra' in platform_release", specifier = ">=2.7.0,<2.8.0" }, - { name = "torchvision", marker = "platform_machine == 'aarch64' and 'tegra' in platform_release and extra == 'torchvision'", specifier = ">=0.22.0,<0.23.0" }, - { name = "torchvision", marker = "(platform_machine != 'aarch64' and extra == 'torchvision') or ('tegra' not in platform_release and extra == 'torchvision')", specifier = ">=0.23.0.dev0,<0.24.0", index = "https://download.pytorch.org/whl/nightly/cu129" }, + { name = "torch", marker = "platform_machine == 'aarch64' and 'tegra' in platform_release", specifier = ">=2.9.0.dev0,<2.10.0" }, { name = "typing-extensions", specifier = ">=4.7.0" }, ] -provides-extras = ["torchvision", "quantization"] [package.metadata.requires-dev] debug = [ @@ -2972,6 +2732,7 @@ dev = [ { name = "ruff" }, { name = "typos" }, ] +quantization = [{ name = "nvidia-modelopt", extras = ["all"], specifier = ">=0.27.1" }] test = [ { name = "expecttest", specifier = "==0.1.6" }, { name = "parameterized", specifier = ">=0.2.0" }, @@ -2988,10 +2749,8 @@ source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "numpy", version = "1.26.4", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(python_full_version < '3.10' and sys_platform == 'linux') or (python_full_version < '3.10' and sys_platform == 'windows') or ('tegra' in platform_release and sys_platform == 'linux') or ('tegra' in platform_release and sys_platform == 'windows')" }, { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.10' and 'tegra' not in platform_release and sys_platform == 'linux') or (python_full_version >= '3.10' and 'tegra' not in platform_release and sys_platform == 'windows')" }, - { name = "torch", version = "2.7.0", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "torch", version = "2.9.0.dev20250701+cu129", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'windows') or ('tegra' not in platform_release and sys_platform == 'linux') or ('tegra' not in platform_release and sys_platform == 'windows')" }, - { name = "torchvision", version = "0.22.0", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "torchvision", version = "0.23.0.dev20250701+cu129", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'windows') or ('tegra' not in platform_release and sys_platform == 'linux') or ('tegra' not in platform_release and sys_platform == 'windows')" }, + { name = "torch", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, + { name = "torchvision", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, ] sdist = { url = "https://files.pythonhosted.org/packages/6f/36/574c0c46e818533b78b3c09505211162918188325ab4165ef11a3f295755/torchprofile-0.0.4.tar.gz", hash = "sha256:96b6da17d752a06b02977e078aea95614893b31d4117dd5dcd081f30ce65611b", size = 4557, upload-time = "2021-06-22T04:58:03.592Z" } wheels = [ @@ -3000,71 +2759,18 @@ wheels = [ [[package]] name = "torchvision" -version = "0.22.0" +version = "0.11.3" source = { registry = "https://pypi.org/simple" } -resolution-markers = [ - "python_full_version >= '3.12' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version == '3.11.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version == '3.10.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version < '3.10' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version >= '3.12' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version == '3.11.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version == '3.10.*' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version < '3.10' and platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", -] dependencies = [ - { name = "numpy", version = "1.26.4", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "pillow", marker = "(platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, - { name = "torch", version = "2.7.0", source = { registry = "https://pypi.org/simple" }, marker = "(platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (platform_machine == 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, -] -wheels = [ - { url = "https://files.pythonhosted.org/packages/a3/e5/ec4b52041cd8c440521b75864376605756bd2d112d6351ea6a1ab25008c1/torchvision-0.22.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:810ea4af3bc63cf39e834f91f4218ff5999271caaffe2456247df905002bd6c0", size = 2512604, upload-time = "2025-04-23T14:41:56.515Z" }, - { url = "https://files.pythonhosted.org/packages/7e/71/ce9a303b94e64fe25d534593522ffc76848c4e64c11e4cbe9f6b8d537210/torchvision-0.22.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:6c5620e10ffe388eb6f4744962106ed7cf1508d26e6fdfa0c10522d3249aea24", size = 2514016, upload-time = "2025-04-23T14:41:48.566Z" }, - { url = "https://files.pythonhosted.org/packages/72/ef/21f8b6122e13ae045b8e49658029c695fd774cd21083b3fa5c3f9c5d3e35/torchvision-0.22.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:8f116bc82e0c076e70ba7776e611ed392b9666aa443662e687808b08993d26af", size = 2514571, upload-time = "2025-04-23T14:41:53.458Z" }, - { url = "https://files.pythonhosted.org/packages/77/77/88f64879483d66daf84f1d1c4d5c31ebb08e640411139042a258d5f7dbfe/torchvision-0.22.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:471c6dd75bb984c6ebe4f60322894a290bf3d4b195e769d80754f3689cd7f238", size = 2471592, upload-time = "2025-04-23T14:41:54.991Z" }, - { url = "https://files.pythonhosted.org/packages/6a/9a/2b59f5758ba7e3f23bc84e16947493bbce97392ec6d18efba7bdf0a3b10e/torchvision-0.22.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:753d3c84eeadd5979a33b3b73a25ecd0aa4af44d6b45ed2c70d44f5e0ac68312", size = 2476555, upload-time = "2025-04-23T14:41:38.357Z" }, - { url = "https://files.pythonhosted.org/packages/2c/40/ca84add0f8e548a5b083b271e832786cd397047a9c2e7fac76c0c1f3de04/torchvision-0.22.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:4095fac2b2e49a9c30f701e09ec1bdf3d11b1e48b006a76a9015a2ed8b39556e", size = 2512670, upload-time = "2025-04-23T14:41:33.739Z" }, -] - -[[package]] -name = "torchvision" -version = "0.23.0.dev20250701+cu129" -source = { registry = "https://download.pytorch.org/whl/nightly/cu129" } -resolution-markers = [ - "python_full_version >= '3.12' and platform_machine != 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version >= '3.12' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version == '3.11.*' and platform_machine != 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version == '3.10.*' and platform_machine != 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version == '3.11.*' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version == '3.10.*' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version < '3.10' and platform_machine != 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version < '3.10' and platform_machine == 'aarch64' and 'tegra' not in platform_release and sys_platform == 'linux'", - "python_full_version >= '3.12' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version == '3.11.*' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version == '3.10.*' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version < '3.10' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux'", - "python_full_version >= '3.12' and 'tegra' not in platform_release and sys_platform == 'windows'", - "python_full_version == '3.11.*' and 'tegra' not in platform_release and sys_platform == 'windows'", - "python_full_version == '3.10.*' and 'tegra' not in platform_release and sys_platform == 'windows'", - "python_full_version < '3.10' and 'tegra' not in platform_release and sys_platform == 'windows'", - "python_full_version >= '3.12' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version == '3.11.*' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version == '3.10.*' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", - "python_full_version < '3.10' and platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows'", -] -dependencies = [ - { name = "numpy", version = "1.26.4", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(python_full_version < '3.10' and platform_machine != 'aarch64' and sys_platform == 'linux') or (python_full_version < '3.10' and platform_machine != 'aarch64' and sys_platform == 'windows') or (python_full_version < '3.10' and 'tegra' not in platform_release and sys_platform == 'linux') or (python_full_version < '3.10' and 'tegra' not in platform_release and sys_platform == 'windows') or (platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'linux') or (platform_machine != 'aarch64' and 'tegra' in platform_release and sys_platform == 'windows')" }, + { name = "numpy", version = "1.26.4", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(python_full_version < '3.10' and sys_platform == 'linux') or (python_full_version < '3.10' and sys_platform == 'windows') or ('tegra' in platform_release and sys_platform == 'linux') or ('tegra' in platform_release and sys_platform == 'windows')" }, { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "(python_full_version >= '3.10' and 'tegra' not in platform_release and sys_platform == 'linux') or (python_full_version >= '3.10' and 'tegra' not in platform_release and sys_platform == 'windows')" }, - { name = "pillow", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'windows') or ('tegra' not in platform_release and sys_platform == 'linux') or ('tegra' not in platform_release and sys_platform == 'windows')" }, - { name = "torch", version = "2.9.0.dev20250701+cu129", source = { registry = "https://download.pytorch.org/whl/nightly/cu129" }, marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (platform_machine != 'aarch64' and sys_platform == 'windows') or ('tegra' not in platform_release and sys_platform == 'linux') or ('tegra' not in platform_release and sys_platform == 'windows')" }, + { name = "pillow", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, + { name = "torch", marker = "sys_platform == 'linux' or sys_platform == 'windows'" }, ] wheels = [ - { url = "https://download.pytorch.org/whl/nightly/cu129/torchvision-0.23.0.dev20250701%2Bcu129-cp310-cp310-manylinux_2_28_x86_64.whl" }, - { url = "https://download.pytorch.org/whl/nightly/cu129/torchvision-0.23.0.dev20250701%2Bcu129-cp311-cp311-manylinux_2_28_x86_64.whl" }, - { url = "https://download.pytorch.org/whl/nightly/cu129/torchvision-0.23.0.dev20250701%2Bcu129-cp312-cp312-manylinux_2_28_x86_64.whl" }, - { url = "https://download.pytorch.org/whl/nightly/cu129/torchvision-0.23.0.dev20250701%2Bcu129-cp313-cp313-manylinux_2_28_x86_64.whl" }, - { url = "https://download.pytorch.org/whl/nightly/cu129/torchvision-0.23.0.dev20250701%2Bcu129-cp313-cp313t-manylinux_2_28_x86_64.whl" }, - { url = "https://download.pytorch.org/whl/nightly/cu129/torchvision-0.23.0.dev20250701%2Bcu129-cp39-cp39-manylinux_2_28_x86_64.whl" }, + { url = "https://files.pythonhosted.org/packages/48/20/380758a94be49d38798a6cfd25824f72ec1f230b00c0014efb15903777c6/torchvision-0.11.3-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:8bc8a7db80c97ca254be362ba883a202192e361ba2f6dff7ff5bb010d4bfc23a", size = 14675721, upload-time = "2022-01-27T20:36:45.882Z" }, + { url = "https://files.pythonhosted.org/packages/ac/b1/9702d02e233bec7ce231cc8be94489ee31084fb6d350703f0ed22086ebed/torchvision-0.11.3-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:eca0b0f7a0e462bdecf7926d89faae6dcd51da418ca0cf70e725981ed775a11b", size = 23199346, upload-time = "2022-01-27T20:36:38.508Z" }, + { url = "https://files.pythonhosted.org/packages/ac/d3/913e25d7775c74f76d174a82eba45bf68e384dc78373598f6c2b3a727fed/torchvision-0.11.3-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:25e72231be8ce03467a77806d9c3f5fd34b9cd23b9543d3e999bf57622377532", size = 14674764, upload-time = "2022-01-27T20:37:09.227Z" }, ] [[package]] From 7727fc8c2b1e024cb61fced877583d0889d63791 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Thu, 7 Aug 2025 14:24:46 -0700 Subject: [PATCH 05/12] add documentation --- docsrc/getting_started/tensorrt_rtx.rst | 137 ++++++++++++++++++------ 1 file changed, 103 insertions(+), 34 deletions(-) diff --git a/docsrc/getting_started/tensorrt_rtx.rst b/docsrc/getting_started/tensorrt_rtx.rst index 5e617b459a..07cf476f38 100644 --- a/docsrc/getting_started/tensorrt_rtx.rst +++ b/docsrc/getting_started/tensorrt_rtx.rst @@ -1,72 +1,141 @@ .. _Torch-TensorRT_in_RTX: Torch-TensorRT in RTX -############################# +===================== Overview -******** +-------- TensorRT-RTX -=========== +~~~~~~~~~~~~ + TensorRT for RTX builds on the proven performance of the NVIDIA TensorRT inference library, and simplifies the deployment of AI models on NVIDIA RTX GPUs across desktops, laptops, and workstations. TensorRT for RTX is a drop-in replacement for NVIDIA TensorRT in applications targeting NVIDIA RTX GPUs from Turing through Blackwell generations. It introduces a Just-In-Time (JIT) optimizer in the runtime that compiles improved inference engines directly on the end-user’s RTX-accelerated PC in under 30 seconds. This eliminates the need for lengthy pre-compilation steps and enables rapid engine generation, improved application portability, and cutting-edge inference performance. For detailed information about TensorRT-RTX, refer to: + * `TensorRT-RTX Documentation `_ -Currenlty, Torch-TensorRT only supports TensorRT-RTX for the experiment purpose. -Torch-TensorRT by default uses TensorRT during the build and run. +Currently, Torch-TensorRT only supports TensorRT-RTX for experimental purposes. +Torch-TensorRT by default uses standard TensorRT during the build and run. + +To use TensorRT-RTX: -In order to use TensorRT-RTX, you need to build the wheel with ``--use-rtx`` flag or ``USE_TRT_RTX=true``. -And during the run, you need set the ``USE_TRT_RTX=true`` environment variable to invoke with TensorRT-RTX. +- Build the wheel with the ``--use-rtx`` flag or set ``USE_TRT_RTX=true``. +- During runtime, set the ``USE_TRT_RTX=true`` environment variable to invoke TensorRT-RTX. +Prerequisites +------------- +Install Bazel +~~~~~~~~~~~~~ +Bazel is required to build the wheel with TensorRT-RTX. -Prerequisites -************* +**In Linux:** + +.. code-block:: sh + + curl -L https://github.com/bazelbuild/bazelisk/releases/download/v1.26.0/bazelisk-linux-amd64 \ + -o bazelisk \ + && mv bazelisk /usr/bin/bazel \ + && chmod +x /usr/bin/bazel + +**In Windows:** + +.. code-block:: sh -System Preparation -================== -1. **Install TensorRT-RTX**: - TensorRT-RTX can be downloaded from https://developer.nvidia.com/tensorrt-rtx. - .. code-block:: sh - # if TensorRT-RTX is downloaded in /usr/local/tensorrt-rtx - export LD_LIBRARY_PATH=/usr/local/tensorrt-rtx/lib:$LD_LIBRARY_PATH - cd /usr/local/tensorrt-rtx/python - # install the tensorrt_rtx wheel - # currently tensorrt_rtx wheel is only available from tarball downloaded, not from pypi yet. - python -m pip install tensorrt_rtx-1.0.0.21-cp39-none-linux_x86_64.whl + choco install bazelisk -y +Install TensorRT-RTX Tarball +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +TensorRT-RTX tarball can be downloaded from https://developer.nvidia.com/tensorrt-rtx. +Currently, Torch-TensorRT uses TensorRT-RTX version **1.0.0.21**. + +Once downloaded: + +**In Linux:** + +Make sure you add the lib path to the ``LD_LIBRARY_PATH`` environment variable. + +.. code-block:: sh + + # If TensorRT-RTX is downloaded in /your_local_download_path/TensorRT-RTX-1.0.0.21 + export LD_LIBRARY_PATH=/your_local_download_path/TensorRT-RTX-1.0.0.21/lib:$LD_LIBRARY_PATH + echo $LD_LIBRARY_PATH | grep TensorRT-RTX + +**In Windows:** + +Make sure you add the lib path to the Windows system variable ``PATH``. + +.. code-block:: sh + # If TensorRT-RTX is downloaded in C:\your_local_download_path\TensorRT-RTX-1.0.0.21 + set PATH="%PATH%;C:\your_local_download_path\TensorRT-RTX-1.0.0.21\lib" + echo %PATH% | findstr TensorRT-RTX + +Install TensorRT-RTX Wheel +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Currently, the `tensorrt_rtx` wheel is not published on PyPI. +You must install it manually from the downloaded tarball. + +.. code-block:: sh + + # If the tarball is downloaded in /your_local_download_path/TensorRT-RTX-1.0.0.21 + python -m pip install /your_local_download_path/TensorRT-RTX-1.0.0.21/python/tensorrt_rtx-1.0.0.21-cp39-none-linux_x86_64.whl Build Torch-TensorRT with TensorRT-RTX -===================================== +-------------------------------------- + +Build Locally with TensorRT-RTX +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code-block:: sh - # if you have previously build with Standard TensorRT, make sure to clean the build environment + + # If you have previously built with standard TensorRT, make sure to clean the build environment python setup.py clean - # build wheel with TensorRT-RTX + bazel clean --expunge + + # Build wheel with TensorRT-RTX python setup.py bdist_wheel --use-rtx - # install the wheel - cd dist - python -m pip install torch-tensorrt-*.whl + # Install the wheel + python -m pip install dist/torch-tensorrt-*.whl + +Quick Start +----------- + +.. code-block:: python - # check that tensorrt related wheel is not installed, only tensorrt_rtx is there + # You must set USE_TRT_RTX=true to use TensorRT-RTX + USE_TRT_RTX=true python examples/dynamo/torch_compile_resnet_example.py + +Troubleshooting +--------------- + +If you encounter load or link errors, check if `tensorrt_rtx` is linked correctly. +If not, clean up the environment and rebuild. + +**In Linux:** + +.. code-block:: sh + + # Ensure only tensorrt_rtx is installed (no standard tensorrt wheels) python -m pip list | grep tensorrt - # make sure the tensorrt_rtx.so file is linked to the tensorrt_rtx.so file in the TensorRT-RTX installation directory + # Check if libtorchtrt.so links to the correct tensorrt_rtx shared object trt_install_path=$(python -m pip show torch-tensorrt | grep "Location" | awk '{print $2}')/torch_tensorrt - # check if the libtensorrt_rtx.so.1 is linked, and make sure there is no libnvinfer.so.10 linked + # Verify libtensorrt_rtx.so.1 is linked, and libnvinfer.so.10 is NOT ldd $trt_install_path/lib/libtorchtrt.so -Quick Start -=========== +**In Windows:** -.. code-block:: py - # you have to set USE_TRT_RTX=true to use TensorRT-RTX - USE_TRT_RTX=true python examples/dynamo/torch_compile_resnet_example.py +.. code-block:: sh + # Check if tensorrt_rtx_1_0.dll is linked, and libnvinfer.dll is NOT + cd py/torch_tensorrt + dumpbin /DEPENDENTS torchtrt.dll From 8eee6af89a09e0175fbff551190b55bcd9a1a59d Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Mon, 11 Aug 2025 10:49:32 -0700 Subject: [PATCH 06/12] resolve merge conflict --- .github/scripts/install-tensorrt-rtx.sh | 4 +- .../build-test-linux-aarch64-jetpack.yml | 2 +- .../workflows/build-test-linux-aarch64.yml | 2 +- .github/workflows/build-test-linux-x86_64.yml | 2 +- .../workflows/build-test-linux-x86_64_rtx.yml | 2 +- .github/workflows/build-test-windows.yml | 2 +- .github/workflows/build-test-windows_rtx.yml | 2 +- .github/workflows/build_wheels_linux.yml | 3 +- .../conversionctx/ConversionCtx.cpp | 1 - cpp/BUILD | 16 +-- py/torch_tensorrt/csrc/tensorrt_classes.cpp | 2 +- .../fx/converters/acc_ops_converters.py | 13 +-- py/torch_tensorrt/fx/fx2trt.py | 8 +- .../test/converters/acc_op/test_dequantize.py | 3 +- .../fx/test/converters/acc_op/test_pad.py | 3 +- .../acc_op/test_quantize_per_tensor.py | 3 +- .../converters/aten_op/test_reshape_aten.py | 7 +- py/torch_tensorrt/fx/tools/common_fx2trt.py | 3 +- py/torch_tensorrt/fx/utils.py | 10 +- setup.py | 104 +++++++++++------- tests/py/ts/api/test_classes.py | 5 + 21 files changed, 99 insertions(+), 98 deletions(-) diff --git a/.github/scripts/install-tensorrt-rtx.sh b/.github/scripts/install-tensorrt-rtx.sh index ef5c2945f3..61a0ce2ae6 100644 --- a/.github/scripts/install-tensorrt-rtx.sh +++ b/.github/scripts/install-tensorrt-rtx.sh @@ -17,8 +17,8 @@ install_tensorrt_rtx() { curl -L https://developer.nvidia.com/downloads/trt/rtx_sdk/secure/1.0/TensorRT-RTX-1.0.0.21.Windows.win10.cuda-12.9.zip -o TensorRT-RTX-1.0.0.21.Windows.win10.cuda-12.9.zip unzip TensorRT-RTX-1.0.0.21.Windows.win10.cuda-12.9.zip rtx_lib_dir=${PWD}/TensorRT-RTX-1.0.0.21/lib - export LD_LIBRARY_PATH=${rtx_lib_dir}:$LD_LIBRARY_PATH - echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH" + export PATH=${rtx_lib_dir}:$PATH + echo "PATH: $PATH" if [[ ${install_wheel_or_not} == true ]]; then pip install TensorRT-RTX-1.0.0.21/python/tensorrt_rtx-1.0.0.21-${CPYTHON_TAG}-none-win_amd64.whl fi diff --git a/.github/workflows/build-test-linux-aarch64-jetpack.yml b/.github/workflows/build-test-linux-aarch64-jetpack.yml index a2e8a3a736..9523872acf 100644 --- a/.github/workflows/build-test-linux-aarch64-jetpack.yml +++ b/.github/workflows/build-test-linux-aarch64-jetpack.yml @@ -65,7 +65,7 @@ jobs: post-script: packaging/post_build_script.sh smoke-test-script: packaging/smoke_test_script.sh package-name: torch_tensorrt - name: Build torch-tensorrt whl package for aarch64-jetpack + name: Build torch-tensorrt whl package for jetpack uses: ./.github/workflows/build_wheels_linux.yml with: repository: ${{ matrix.repository }} diff --git a/.github/workflows/build-test-linux-aarch64.yml b/.github/workflows/build-test-linux-aarch64.yml index 34b3e4fa34..a2b2a78db9 100644 --- a/.github/workflows/build-test-linux-aarch64.yml +++ b/.github/workflows/build-test-linux-aarch64.yml @@ -62,7 +62,7 @@ jobs: post-script: packaging/post_build_script.sh smoke-test-script: packaging/smoke_test_script.sh package-name: torch_tensorrt - name: Build torch-tensorrt whl package for aarch64 + name: Build torch-tensorrt whl package for SBSA uses: ./.github/workflows/build_wheels_linux.yml with: repository: ${{ matrix.repository }} diff --git a/.github/workflows/build-test-linux-x86_64.yml b/.github/workflows/build-test-linux-x86_64.yml index 4b18ef559d..3998eab0e7 100644 --- a/.github/workflows/build-test-linux-x86_64.yml +++ b/.github/workflows/build-test-linux-x86_64.yml @@ -61,7 +61,7 @@ jobs: post-script: packaging/post_build_script.sh smoke-test-script: packaging/smoke_test_script.sh package-name: torch_tensorrt - name: Build torch-tensorrt whl package for x86_64 + name: Build torch-tensorrt whl package for Linux x86_64 uses: ./.github/workflows/build_wheels_linux.yml with: repository: ${{ matrix.repository }} diff --git a/.github/workflows/build-test-linux-x86_64_rtx.yml b/.github/workflows/build-test-linux-x86_64_rtx.yml index 24d0169e2c..1f9b605584 100644 --- a/.github/workflows/build-test-linux-x86_64_rtx.yml +++ b/.github/workflows/build-test-linux-x86_64_rtx.yml @@ -60,7 +60,7 @@ jobs: post-script: packaging/post_build_script.sh smoke-test-script: packaging/smoke_test_script.sh package-name: torch_tensorrt - name: Build torch-tensorrt-rtx whl package for x86_64 + name: RTX - Build torch-tensorrt-rtx whl package for Linux x86_64 uses: ./.github/workflows/build_wheels_linux.yml with: repository: ${{ matrix.repository }} diff --git a/.github/workflows/build-test-windows.yml b/.github/workflows/build-test-windows.yml index 2d402a8799..ca39f8cb5f 100644 --- a/.github/workflows/build-test-windows.yml +++ b/.github/workflows/build-test-windows.yml @@ -70,7 +70,7 @@ jobs: env-script: packaging/vc_env_helper.bat smoke-test-script: packaging/smoke_test_windows.py package-name: torch_tensorrt - name: Build torch-tensorrt whl package + name: Build torch-tensorrt whl package for Windows uses: ./.github/workflows/build_wheels_windows.yml with: repository: ${{ matrix.repository }} diff --git a/.github/workflows/build-test-windows_rtx.yml b/.github/workflows/build-test-windows_rtx.yml index 6321cd8a52..137adb252c 100644 --- a/.github/workflows/build-test-windows_rtx.yml +++ b/.github/workflows/build-test-windows_rtx.yml @@ -70,7 +70,7 @@ jobs: env-script: packaging/vc_env_helper.bat smoke-test-script: packaging/smoke_test_windows.py package-name: torch_tensorrt - name: Build torch-tensorrt-rtx whl package + name: RTX - Build torch-tensorrt-rtx whl package for Windows uses: ./.github/workflows/build_wheels_windows.yml with: repository: ${{ matrix.repository }} diff --git a/.github/workflows/build_wheels_linux.yml b/.github/workflows/build_wheels_linux.yml index 7cfddd49c8..4efa65add7 100644 --- a/.github/workflows/build_wheels_linux.yml +++ b/.github/workflows/build_wheels_linux.yml @@ -288,7 +288,8 @@ jobs: env: PACKAGE_NAME: ${{ inputs.package-name }} SMOKE_TEST_SCRIPT: ${{ inputs.smoke-test-script }} - if: ${{ inputs.architecture == 'x86_64' }} + # TODO: lan to verify whether manylinux repair is needed for jetpack + #if: ${{ inputs.is-jetpack == true }} run: | set -euxo pipefail source "${BUILD_ENV_FILE}" diff --git a/core/conversion/conversionctx/ConversionCtx.cpp b/core/conversion/conversionctx/ConversionCtx.cpp index 0983060e55..07ade4b17f 100644 --- a/core/conversion/conversionctx/ConversionCtx.cpp +++ b/core/conversion/conversionctx/ConversionCtx.cpp @@ -68,7 +68,6 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings) case nvinfer1::DataType::kINT8: LOG_DEBUG("INT8 precision has been enabled, we assume the network has Q/DQ nodes obtained from modelopt"); break; -#endif case nvinfer1::DataType::kFLOAT: break; case nvinfer1::DataType::kINT32: diff --git a/cpp/BUILD b/cpp/BUILD index 2b5877aa4a..79081e1f81 100644 --- a/cpp/BUILD +++ b/cpp/BUILD @@ -30,24 +30,12 @@ cc_library( "src/logging.cpp", "src/torch_tensorrt.cpp", "src/types.cpp", - ] + select({ - ":rtx_win": [], - ":rtx_x86_64": [], - "//conditions:default": [ - "src/ptq.cpp", - ], - }), + ], hdrs = [ "include/torch_tensorrt/logging.h", "include/torch_tensorrt/macros.h", "include/torch_tensorrt/torch_tensorrt.h", - ] + select({ - ":rtx_win": [], - ":rtx_x86_64": [], - "//conditions:default": [ - "include/torch_tensorrt/ptq.h", - ], - }), + ], linkstatic = True, strip_include_prefix = "include/", deps = [ diff --git a/py/torch_tensorrt/csrc/tensorrt_classes.cpp b/py/torch_tensorrt/csrc/tensorrt_classes.cpp index 694f6b9930..553c6238a5 100644 --- a/py/torch_tensorrt/csrc/tensorrt_classes.cpp +++ b/py/torch_tensorrt/csrc/tensorrt_classes.cpp @@ -349,7 +349,7 @@ core::CompileSpec CompileSpec::toInternalCompileSpec(bool converting_to_trt_engi info.lower_info.unfreeze_module = true; info.lower_info.disable_cse = true; } -#endif + info.convert_info.engine_settings.sparse_weights = sparse_weights; info.convert_info.engine_settings.disable_tf32 = disable_tf32; info.convert_info.engine_settings.refit = refit; diff --git a/py/torch_tensorrt/fx/converters/acc_ops_converters.py b/py/torch_tensorrt/fx/converters/acc_ops_converters.py index bf2680f12a..f998ddb27a 100644 --- a/py/torch_tensorrt/fx/converters/acc_ops_converters.py +++ b/py/torch_tensorrt/fx/converters/acc_ops_converters.py @@ -12,7 +12,6 @@ import torch from torch.fx.immutable_collections import immutable_list from torch.fx.node import Argument, Target -from torch_tensorrt._utils import is_tensorrt_version_supported from torch_tensorrt.fx.converters.impl import activation, convolution from torch_tensorrt.fx.passes.lower_basic_pass import ( trt_transposed_linear, @@ -208,7 +207,7 @@ def acc_ops_conv_transposend( return layer.get_output(0) -@tensorrt_converter(acc_ops.pad, enabled=(not is_tensorrt_version_supported("8.2"))) +@tensorrt_converter(acc_ops.pad, enabled=trt.__version__ < "8.2") def acc_ops_pad_with_padding_layer( network: TRTNetwork, target: Target, @@ -258,10 +257,7 @@ def acc_ops_pad_with_padding_layer( return layer.get_output(0) -@tensorrt_converter( - acc_ops.pad, - enabled=is_tensorrt_version_supported("8.2"), -) +@tensorrt_converter(acc_ops.pad, enabled=trt.__version__ >= "8.2") def acc_ops_pad_with_slice_layer( network: TRTNetwork, target: Target, @@ -884,10 +880,7 @@ def acc_ops_sign( ) -> Union[TRTTensor, Sequence[TRTTensor]]: input_val = kwargs["input"] - if ( - is_tensorrt_version_supported("8.2") - and not network.has_implicit_batch_dimension - ): + if trt.__version__ >= "8.2" and not network.has_implicit_batch_dimension: input_val = kwargs["input"] operation_type = trt.UnaryOperation.SIGN return add_unary_layer(network, input_val, operation_type, target, name) diff --git a/py/torch_tensorrt/fx/fx2trt.py b/py/torch_tensorrt/fx/fx2trt.py index f241a936d6..6a29932b1b 100644 --- a/py/torch_tensorrt/fx/fx2trt.py +++ b/py/torch_tensorrt/fx/fx2trt.py @@ -13,7 +13,6 @@ from torch._ops import OpOverload from torch.fx.node import _get_qualified_name from torch.fx.passes.shape_prop import TensorMetadata -from torch_tensorrt._utils import is_tensorrt_version_supported from .converter_registry import CONVERTERS from .input_tensor_spec import InputTensorSpec @@ -214,10 +213,7 @@ def run( builder_config.max_workspace_size = max_workspace_size # Speed up TRT build time in the test environment - if ( - is_tensorrt_version_supported("8.6") - and os.environ.get("TRT_TEST_ENV", "0") == "1" - ): + if trt.__version__ >= "8.6" and os.environ.get("TRT_TEST_ENV", "0") == "1": _LOGGER.info("Set TRT optimization level to 0") builder_config.builder_optimization_level = 0 @@ -229,7 +225,7 @@ def run( cache = builder_config.create_timing_cache(b"") builder_config.set_timing_cache(cache, False) - if is_tensorrt_version_supported("8.2"): + if trt.__version__ >= "8.2": builder_config.profiling_verbosity = ( profiling_verbosity if profiling_verbosity diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_dequantize.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_dequantize.py index 217b92f19c..7d17056b62 100644 --- a/py/torch_tensorrt/fx/test/converters/acc_op/test_dequantize.py +++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_dequantize.py @@ -5,7 +5,6 @@ import torch.nn as nn import torch_tensorrt.fx.tracer.acc_tracer.acc_ops as acc_ops from torch.testing._internal.common_utils import run_tests -from torch_tensorrt._utils import is_tensorrt_version_supported from torch_tensorrt.fx.tools.common_fx2trt import AccTestCase, InputTensorSpec @@ -15,7 +14,7 @@ """ ) @unittest.skipIf( - not is_tensorrt_version_supported("8.0"), + trt.__version__ < "8.0", "Explicit quantization only supported in TensorRT 8.0 and later", ) class TestDequantizeConverter(AccTestCase): diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_pad.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_pad.py index f5b6005782..b21779b65f 100644 --- a/py/torch_tensorrt/fx/test/converters/acc_op/test_pad.py +++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_pad.py @@ -6,7 +6,6 @@ import torch_tensorrt.fx.tracer.acc_tracer.acc_ops as acc_ops from parameterized import parameterized from torch.testing._internal.common_utils import run_tests -from torch_tensorrt._utils import is_tensorrt_version_supported from torch_tensorrt.fx.tools.common_fx2trt import AccTestCase # from torch_tensorrt.fx.tools.common_fx2trt import AccTestCase, InputTensorSpec @@ -80,7 +79,7 @@ def forward(self, x): ] ) @unittest.skipIf( - not is_tensorrt_version_supported("8.2"), + trt.__version__ < "8.2", "Padding 3d only supported in TensorRT 8.2 and later", ) def test_pad_3d(self, _, pad): diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_quantize_per_tensor.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_quantize_per_tensor.py index 3c2708bf91..22cbdd826d 100644 --- a/py/torch_tensorrt/fx/test/converters/acc_op/test_quantize_per_tensor.py +++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_quantize_per_tensor.py @@ -5,7 +5,6 @@ import torch.nn as nn import torch_tensorrt.fx.tracer.acc_tracer.acc_ops as acc_ops from torch.testing._internal.common_utils import run_tests -from torch_tensorrt._utils import is_tensorrt_version_supported from torch_tensorrt.fx.tools.common_fx2trt import AccTestCase, InputTensorSpec @@ -15,7 +14,7 @@ """ ) @unittest.skipIf( - not is_tensorrt_version_supported("8.0"), + trt.__version__ < "8.0", "Explicit quantization only supported in TensorRT 8.0 and later", ) class TestQuantizePerTensorConverter(AccTestCase): diff --git a/py/torch_tensorrt/fx/test/converters/aten_op/test_reshape_aten.py b/py/torch_tensorrt/fx/test/converters/aten_op/test_reshape_aten.py index 2942945523..538e575d6e 100644 --- a/py/torch_tensorrt/fx/test/converters/aten_op/test_reshape_aten.py +++ b/py/torch_tensorrt/fx/test/converters/aten_op/test_reshape_aten.py @@ -4,7 +4,6 @@ import torch from parameterized import parameterized from torch.testing._internal.common_utils import run_tests -from torch_tensorrt._utils import is_tensorrt_version_supported from torch_tensorrt.fx.tools.common_fx2trt import DispatchTestCase, InputTensorSpec @@ -16,7 +15,7 @@ class TestReshapeConverter(DispatchTestCase): ] ) @unittest.skipIf( - not is_tensorrt_version_supported("8.5"), + trt.__version__ < "8.5", "Shape tensor supported well in TensorRT 8.5 and later", ) def test_reshape(self, target_shape): @@ -43,7 +42,7 @@ def forward(self, x): ] ) @unittest.skipIf( - not is_tensorrt_version_supported("8.5"), + trt.__version__ < "8.5", "Shape tensor supported well in TensorRT 8.5 and later", ) def test_reshape_with_dynamic_shape(self, target_shape): @@ -69,7 +68,7 @@ def forward(self, x): ) @unittest.skipIf( - not is_tensorrt_version_supported("8.5"), + trt.__version__ < "8.5", "Shape tensor supported well in TensorRT 8.5 and later", ) def test_reshape_with_dynamic_shape_size(self): diff --git a/py/torch_tensorrt/fx/tools/common_fx2trt.py b/py/torch_tensorrt/fx/tools/common_fx2trt.py index 66f343a55b..2ddd832c2a 100644 --- a/py/torch_tensorrt/fx/tools/common_fx2trt.py +++ b/py/torch_tensorrt/fx/tools/common_fx2trt.py @@ -13,7 +13,6 @@ from torch.fx.passes import shape_prop from torch.fx.passes.infra.pass_base import PassResult from torch.testing._internal.common_utils import TestCase -from torch_tensorrt._utils import is_tensorrt_version_supported from torch_tensorrt.fx import InputTensorSpec, TRTInterpreter, TRTModule from torch_tensorrt.fx.passes.lower_basic_pass_aten import ( compose_bmm, @@ -259,7 +258,7 @@ def run_test( pass_tracer = chain_passes(*apply_passes) mod = pass_tracer(mod, inputs) - if is_tensorrt_version_supported("8.6"): + if trt.__version__ >= "8.6": test_implicit_batch_dim = False if test_implicit_batch_dim: interp = TRTInterpreter(mod, InputTensorSpec.from_tensors(inputs)) diff --git a/py/torch_tensorrt/fx/utils.py b/py/torch_tensorrt/fx/utils.py index da5cdc0d7f..5bef21b6be 100644 --- a/py/torch_tensorrt/fx/utils.py +++ b/py/torch_tensorrt/fx/utils.py @@ -8,7 +8,7 @@ import torch from functorch import make_fx from functorch.experimental import functionalize -from torch_tensorrt._utils import is_tensorrt_version_supported, sanitized_torch_version +from torch_tensorrt._utils import sanitized_torch_version from torch_tensorrt.fx.passes.lower_basic_pass import ( replace_op_with_indices, run_const_fold, @@ -60,7 +60,7 @@ class Frameworks(Enum): }, } -if is_tensorrt_version_supported("7.0"): +if trt.__version__ >= "7.0": DataTypeEquivalence[trt.bool] = { Frameworks.NUMPY: np.bool_, Frameworks.TORCH: torch.bool, @@ -105,11 +105,7 @@ def unified_dtype_converter( trt_major_version = int(trt.__version__.split(".")[0]) if dtype in (np.int8, torch.int8, trt.int8): return DataTypeEquivalence[trt.int8][to] - elif is_tensorrt_version_supported("7.0") and dtype in ( - np.bool_, - torch.bool, - trt.bool, - ): + elif trt_major_version >= 7 and dtype in (np.bool_, torch.bool, trt.bool): return DataTypeEquivalence[trt.bool][to] elif dtype in (np.int32, torch.int32, trt.int32): return DataTypeEquivalence[trt.int32][to] diff --git a/setup.py b/setup.py index 4e2efffdaa..da5455da1e 100644 --- a/setup.py +++ b/setup.py @@ -416,7 +416,18 @@ def run(self): ext_modules = [] -packages = [ +fx_packages = [ + "torch_tensorrt.fx", + "torch_tensorrt.fx.converters", + "torch_tensorrt.fx.converters.impl", + "torch_tensorrt.fx.passes", + "torch_tensorrt.fx.tools", + "torch_tensorrt.fx.tracer", + "torch_tensorrt.fx.tracer.acc_tracer", + "torch_tensorrt.fx.tracer.dispatch_tracer", +] + +dynamo_packages = [ "torch_tensorrt", "torch_tensorrt.dynamo", "torch_tensorrt.dynamo.backend", @@ -435,18 +446,22 @@ def run(self): "torch_tensorrt.dynamo.partitioning", "torch_tensorrt.dynamo.runtime", "torch_tensorrt.dynamo.tools", - "torch_tensorrt.fx", - "torch_tensorrt.fx.converters", - "torch_tensorrt.fx.converters.impl", - "torch_tensorrt.fx.passes", - "torch_tensorrt.fx.tools", - "torch_tensorrt.fx.tracer", - "torch_tensorrt.fx.tracer.acc_tracer", - "torch_tensorrt.fx.tracer.dispatch_tracer", "torch_tensorrt.runtime", ] -package_dir = { +fx_package_dir = { + "torch_tensorrt.fx": "py/torch_tensorrt/fx", + "torch_tensorrt.fx.converters": "py/torch_tensorrt/fx/converters", + "torch_tensorrt.fx.converters.impl": "py/torch_tensorrt/fx/converters/impl", + "torch_tensorrt.fx.passes": "py/torch_tensorrt/fx/passes", + "torch_tensorrt.fx.tools": "py/torch_tensorrt/fx/tools", + "torch_tensorrt.fx.tracer": "py/torch_tensorrt/fx/tracer", + "torch_tensorrt.fx.tracer.acc_tracer": "py/torch_tensorrt/fx/tracer/acc_tracer", + "torch_tensorrt.fx.tracer.dispatch_tracer": "py/torch_tensorrt/fx/tracer/dispatch_tracer", +} + + +dynamo_package_dir = { "torch_tensorrt": "py/torch_tensorrt", "torch_tensorrt.dynamo": "py/torch_tensorrt/dynamo", "torch_tensorrt.dynamo.backend": "py/torch_tensorrt/dynamo/backend", @@ -465,17 +480,49 @@ def run(self): "torch_tensorrt.dynamo.partitioning": "py/torch_tensorrt/dynamo/partitioning", "torch_tensorrt.dynamo.runtime": "py/torch_tensorrt/dynamo/runtime", "torch_tensorrt.dynamo.tools": "py/torch_tensorrt/dynamo/tools", - "torch_tensorrt.fx": "py/torch_tensorrt/fx", - "torch_tensorrt.fx.converters": "py/torch_tensorrt/fx/converters", - "torch_tensorrt.fx.converters.impl": "py/torch_tensorrt/fx/converters/impl", - "torch_tensorrt.fx.passes": "py/torch_tensorrt/fx/passes", - "torch_tensorrt.fx.tools": "py/torch_tensorrt/fx/tools", - "torch_tensorrt.fx.tracer": "py/torch_tensorrt/fx/tracer", - "torch_tensorrt.fx.tracer.acc_tracer": "py/torch_tensorrt/fx/tracer/acc_tracer", - "torch_tensorrt.fx.tracer.dispatch_tracer": "py/torch_tensorrt/fx/tracer/dispatch_tracer", "torch_tensorrt.runtime": "py/torch_tensorrt/runtime", } +if USE_TRT_RTX: + package_dir = dynamo_package_dir + packages = dynamo_packages + exclude_package_data = { + "": [ + "py/torch_tensorrt/csrc/*.cpp", + "torch_tensorrt/csrc/*.cpp", + "test*", + "*.cpp", + ], + "torch_tensorrt": [ + "py/torch_tensorrt/csrc/*.cpp", + "torch_tensorrt/csrc/*.cpp", + "test*", + "*.cpp", + ], + } +else: + package_dir = dynamo_package_dir | fx_package_dir + packages = dynamo_packages + fx_packages + exclude_package_data = { + "": [ + "py/torch_tensorrt/csrc/*.cpp", + "py/torch_tensorrt/fx/test*", + "torch_tensorrt/csrc/*.cpp", + "torch_tensorrt/fx/test*", + "test*", + "*.cpp", + ], + "torch_tensorrt": [ + "py/torch_tensorrt/csrc/*.cpp", + "py/torch_tensorrt/fx/test*", + "torch_tensorrt/csrc/*.cpp", + "torch_tensorrt/fx/test*", + "test*", + "*.cpp", + ], + "torch_tensorrt.fx": ["test/*.py"], + } + package_data = {} if not (PY_ONLY or NO_TS): @@ -756,24 +803,5 @@ def get_requirements(): package_dir=package_dir, include_package_data=False, package_data=package_data, - exclude_package_data={ - "": [ - "py/torch_tensorrt/csrc/*.cpp", - "py/torch_tensorrt/fx/test*", - "torch_tensorrt/csrc/*.cpp", - "torch_tensorrt/fx/test*", - "test*", - "*.cpp", - ], - "torch_tensorrt": [ - "py/torch_tensorrt/csrc/*.cpp", - "py/torch_tensorrt/fx/test*", - "torch_tensorrt/csrc/*.cpp", - "torch_tensorrt/fx/test*", - "test*", - "*.cpp", - ], - "torch_tensorrt.dynamo": ["test/*.py"], - "torch_tensorrt.fx": ["test/*.py"], - }, + exclude_package_data=exclude_package_data, ) diff --git a/tests/py/ts/api/test_classes.py b/tests/py/ts/api/test_classes.py index 39983d29f8..a32b2b2722 100644 --- a/tests/py/ts/api/test_classes.py +++ b/tests/py/ts/api/test_classes.py @@ -5,6 +5,7 @@ import torch import torch_tensorrt as torchtrt import torchvision.models as models +from torch_tensorrt._utils import is_tensorrt_rtx from torch_tensorrt.dynamo.runtime._TorchTensorRTModule import TorchTensorRTModule @@ -313,6 +314,10 @@ def test_set_get_profile_path_prefix(self): trt_mod.engine.profile_path_prefix = "/tmp/" self.assertTrue(trt_mod.engine.profile_path_prefix == "/tmp/") + @unittest.skipIf( + is_tensorrt_rtx(), + "layer info is different for tensorrt_rtx", + ) def test_get_layer_info(self): """ { From bfd9d3f5d558d2cca1033205d4b3c6eb59895e67 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Tue, 12 Aug 2025 13:28:23 -0700 Subject: [PATCH 07/12] resolve comments --- packaging/pre_build_script.sh | 4 ++-- .../dynamo/conversion/_ConverterRegistry.py | 21 +++++++++++++------ tests/py/ts/api/test_ts_backend.py | 19 ++++++++++------- 3 files changed, 28 insertions(+), 16 deletions(-) diff --git a/packaging/pre_build_script.sh b/packaging/pre_build_script.sh index 66dd0fc455..be449b73af 100755 --- a/packaging/pre_build_script.sh +++ b/packaging/pre_build_script.sh @@ -12,10 +12,10 @@ if [[ $(uname -m) == "aarch64" ]]; then if [[ ${os_name} == "ubuntu" ]]; then IS_JETPACK=true apt-get update - apt-get install -y ninja-build gettext curl libopenblas-dev + apt-get install -y ninja-build gettext curl libopenblas-dev zip unzip else IS_SBSA=true - yum install -y ninja-build gettext + yum install -y ninja-build gettext zip unzip fi else BAZEL_PLATFORM="amd64" diff --git a/py/torch_tensorrt/dynamo/conversion/_ConverterRegistry.py b/py/torch_tensorrt/dynamo/conversion/_ConverterRegistry.py index eb1692e392..ea7b3c3677 100644 --- a/py/torch_tensorrt/dynamo/conversion/_ConverterRegistry.py +++ b/py/torch_tensorrt/dynamo/conversion/_ConverterRegistry.py @@ -23,9 +23,9 @@ from torch import SymBool, SymFloat, SymInt from torch._ops import OpOverloadPacket from torch.fx.node import Argument, Node, Target, _get_qualified_name +from torch_tensorrt._utils import is_tensorrt_rtx from torch_tensorrt.dynamo._settings import CompilationSettings from torch_tensorrt.dynamo.conversion._ConversionContext import ConversionContext -from torch_tensorrt.fx.converter_registry import CONVERTERS as FX_CONVERTERS logger = logging.getLogger(__name__) @@ -624,8 +624,17 @@ def display_all_available_converters(self) -> str: # Initialize dynamo converter registry with the FX and Dynamo aten registries # Note the Dynamo registry is listed first, for precedence -DYNAMO_CONVERTERS: ConverterRegistry = ConverterRegistry( - [DYNAMO_ATEN_CONVERTERS, FX_CONVERTERS], # type: ignore[list-item] - ["Dynamo ATen Converters Registry", "FX Legacy ATen Converters Registry"], - [CallingConvention.CTX, CallingConvention.LEGACY], -) +if is_tensorrt_rtx(): + DYNAMO_CONVERTERS = ConverterRegistry( + [DYNAMO_ATEN_CONVERTERS], # type: ignore[list-item] + ["Dynamo ATen Converters Registry"], + [CallingConvention.CTX], + ) +else: + from torch_tensorrt.fx.converter_registry import CONVERTERS as FX_CONVERTERS + + DYNAMO_CONVERTERS = ConverterRegistry( + [DYNAMO_ATEN_CONVERTERS, FX_CONVERTERS], # type: ignore[list-item] + ["Dynamo ATen Converters Registry", "FX Legacy ATen Converters Registry"], + [CallingConvention.CTX, CallingConvention.LEGACY], + ) diff --git a/tests/py/ts/api/test_ts_backend.py b/tests/py/ts/api/test_ts_backend.py index e56ab4f902..c4d1ba403a 100644 --- a/tests/py/ts/api/test_ts_backend.py +++ b/tests/py/ts/api/test_ts_backend.py @@ -1,10 +1,12 @@ +import copy import unittest -import torch_tensorrt as torchtrt +from typing import Dict + import torch +import torch_tensorrt as torchtrt import torchvision.models as models -import copy -from typing import Dict -from utils import cosine_similarity, COSINE_THRESHOLD +from torch_tensorrt._utils import is_tensorrt_rtx +from utils import COSINE_THRESHOLD, cosine_similarity class TestCompile(unittest.TestCase): @@ -139,10 +141,11 @@ def test_module_type(self): torchtrt._compile._parse_module_type(ts_module), torchtrt._compile._ModuleType.ts, ) - self.assertEqual( - torchtrt._compile._parse_module_type(fx_module), - torchtrt._compile._ModuleType.fx, - ) + if not is_tensorrt_rtx(): + self.assertEqual( + torchtrt._compile._parse_module_type(fx_module), + torchtrt._compile._ModuleType.fx, + ) if __name__ == "__main__": From 74f746467fa0bc3539758b80afc3e244a229e10a Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Tue, 12 Aug 2025 16:44:02 -0700 Subject: [PATCH 08/12] replace fx.observer --- .github/scripts/install-tensorrt-rtx.sh | 17 +- .github/workflows/build_wheels_linux.yml | 44 ++-- docsrc/getting_started/tensorrt_rtx.rst | 5 +- docsrc/index.rst | 1 + .../dynamo/conversion/_ConverterRegistry.py | 30 ++- .../dynamo/conversion/_TRTInterpreter.py | 2 +- py/torch_tensorrt/dynamo/observer.py | 194 ++++++++++++++++++ 7 files changed, 253 insertions(+), 40 deletions(-) create mode 100644 py/torch_tensorrt/dynamo/observer.py diff --git a/.github/scripts/install-tensorrt-rtx.sh b/.github/scripts/install-tensorrt-rtx.sh index 61a0ce2ae6..4e746d18c6 100644 --- a/.github/scripts/install-tensorrt-rtx.sh +++ b/.github/scripts/install-tensorrt-rtx.sh @@ -1,6 +1,7 @@ install_tensorrt_rtx() { if [[ ${USE_TRT_RTX} == true ]]; then + TRT_RTX_VERSION=1.0.0.21 install_wheel_or_not=${1:-false} echo "It is the tensorrt-rtx build, install tensorrt-rtx with install_wheel_or_not:${install_wheel_or_not}" PLATFORM=$(python -c "import sys; print(sys.platform)") @@ -14,22 +15,22 @@ install_tensorrt_rtx() { # python version is like 3.11, we need to convert it to cp311 CPYTHON_TAG="cp${PYTHON_VERSION//./}" if [[ ${PLATFORM} == win32 ]]; then - curl -L https://developer.nvidia.com/downloads/trt/rtx_sdk/secure/1.0/TensorRT-RTX-1.0.0.21.Windows.win10.cuda-12.9.zip -o TensorRT-RTX-1.0.0.21.Windows.win10.cuda-12.9.zip - unzip TensorRT-RTX-1.0.0.21.Windows.win10.cuda-12.9.zip - rtx_lib_dir=${PWD}/TensorRT-RTX-1.0.0.21/lib + curl -L https://developer.nvidia.com/downloads/trt/rtx_sdk/secure/1.0/TensorRT-RTX-${TRT_RTX_VERSION}.Windows.win10.cuda-12.9.zip -o TensorRT-RTX-${TRT_RTX_VERSION}.Windows.win10.cuda-12.9.zip + unzip TensorRT-RTX-${TRT_RTX_VERSION}.Windows.win10.cuda-12.9.zip + rtx_lib_dir=${PWD}/TensorRT-RTX-${TRT_RTX_VERSION}/lib export PATH=${rtx_lib_dir}:$PATH echo "PATH: $PATH" if [[ ${install_wheel_or_not} == true ]]; then - pip install TensorRT-RTX-1.0.0.21/python/tensorrt_rtx-1.0.0.21-${CPYTHON_TAG}-none-win_amd64.whl + pip install TensorRT-RTX-${TRT_RTX_VERSION}/python/tensorrt_rtx-${TRT_RTX_VERSION}-${CPYTHON_TAG}-none-win_amd64.whl fi else - curl -L https://developer.nvidia.com/downloads/trt/rtx_sdk/secure/1.0/TensorRT-RTX-1.0.0.21.Linux.x86_64-gnu.cuda-12.9.tar.gz -o TensorRT-RTX-1.0.0.21.Linux.x86_64-gnu.cuda-12.9.tar.gz - tar -xzf TensorRT-RTX-1.0.0.21.Linux.x86_64-gnu.cuda-12.9.tar.gz - rtx_lib_dir=${PWD}/TensorRT-RTX-1.0.0.21/lib + curl -L https://developer.nvidia.com/downloads/trt/rtx_sdk/secure/1.0/TensorRT-RTX-${TRT_RTX_VERSION}.Linux.x86_64-gnu.cuda-12.9.tar.gz -o TensorRT-RTX-${TRT_RTX_VERSION}.Linux.x86_64-gnu.cuda-12.9.tar.gz + tar -xzf TensorRT-RTX-${TRT_RTX_VERSION}.Linux.x86_64-gnu.cuda-12.9.tar.gz + rtx_lib_dir=${PWD}/TensorRT-RTX-${TRT_RTX_VERSION}/lib export LD_LIBRARY_PATH=${rtx_lib_dir}:$LD_LIBRARY_PATH echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH" if [[ ${install_wheel_or_not} == true ]]; then - pip install TensorRT-RTX-1.0.0.21/python/tensorrt_rtx-1.0.0.21-${CPYTHON_TAG}-none-linux_x86_64.whl + pip install TensorRT-RTX-${TRT_RTX_VERSION}/python/tensorrt_rtx-${TRT_RTX_VERSION}-${CPYTHON_TAG}-none-linux_x86_64.whl fi fi else diff --git a/.github/workflows/build_wheels_linux.yml b/.github/workflows/build_wheels_linux.yml index 4efa65add7..4b95a90e86 100644 --- a/.github/workflows/build_wheels_linux.yml +++ b/.github/workflows/build_wheels_linux.yml @@ -317,30 +317,34 @@ jobs: source "${BUILD_ENV_FILE}" WHEEL_NAME=$(ls "${{ inputs.repository }}/dist/") echo "$WHEEL_NAME" + if [[ ${{ inputs.is-jetpack }} == true ]]; then + echo "Skipping smoke test for jetpack, since it is not the actual jetpack environment" + else + ${CONDA_RUN} pip install "${{ inputs.repository }}/dist/$WHEEL_NAME" + # Checking that we have a pinned version of torch in our dependency tree + ( + pushd "${RUNNER_TEMP}" + unzip -o "${GITHUB_WORKSPACE}/${{ inputs.repository }}/dist/$WHEEL_NAME" + # Ensure that pytorch version is pinned, should output file where it was found + grep "Requires-Dist: torch (==.*)" -r . + ) - ${CONDA_RUN} pip install "${{ inputs.repository }}/dist/$WHEEL_NAME" - # Checking that we have a pinned version of torch in our dependency tree - ( - pushd "${RUNNER_TEMP}" - unzip -o "${GITHUB_WORKSPACE}/${{ inputs.repository }}/dist/$WHEEL_NAME" - # Ensure that pytorch version is pinned, should output file where it was found - grep "Requires-Dist: torch (==.*)" -r . - ) - - if [[ (! -f "${{ inputs.repository }}"/${SMOKE_TEST_SCRIPT}) ]]; then - echo "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT} not found" - if [[ "${PACKAGE_NAME}" = "torchrec" ]]; then - # Special case for torchrec temporarily since __version__ does not - # work correctly on main in torchrec. This block will be - # removed once we fix it. - ${CONDA_RUN} python -c "import ${PACKAGE_NAME}" + if [[ (! -f "${{ inputs.repository }}"/${SMOKE_TEST_SCRIPT}) ]]; then + echo "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT} not found" + if [[ "${PACKAGE_NAME}" = "torchrec" ]]; then + # Special case for torchrec temporarily since __version__ does not + # work correctly on main in torchrec. This block will be + # removed once we fix it. + ${CONDA_RUN} python -c "import ${PACKAGE_NAME}" + else + ${CONDA_RUN} python -c "import ${PACKAGE_NAME}; print('package version is ', ${PACKAGE_NAME}.__version__)" + fi else - ${CONDA_RUN} python -c "import ${PACKAGE_NAME}; print('package version is ', ${PACKAGE_NAME}.__version__)" + echo "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT} found" + ${CONDA_RUN} python "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT}" fi - else - echo "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT} found" - ${CONDA_RUN} python "${{ inputs.repository }}/${SMOKE_TEST_SCRIPT}" fi + # NB: Only upload to GitHub after passing smoke tests - name: Upload wheel to GitHub diff --git a/docsrc/getting_started/tensorrt_rtx.rst b/docsrc/getting_started/tensorrt_rtx.rst index 07cf476f38..32104ddcef 100644 --- a/docsrc/getting_started/tensorrt_rtx.rst +++ b/docsrc/getting_started/tensorrt_rtx.rst @@ -95,9 +95,12 @@ Build Locally with TensorRT-RTX .. code-block:: sh - # If you have previously built with standard TensorRT, make sure to clean the build environment + # If you have previously built with standard TensorRT, make sure to clean the build environment, + # otherwise it will use the existing .so built with standard TensorRT, which is not compatible with TensorRT-RTX. python setup.py clean bazel clean --expunge + #remove everything under build directory, + rm -rf build/* # Build wheel with TensorRT-RTX python setup.py bdist_wheel --use-rtx diff --git a/docsrc/index.rst b/docsrc/index.rst index 4d28d77640..68e1ba5259 100644 --- a/docsrc/index.rst +++ b/docsrc/index.rst @@ -28,6 +28,7 @@ Getting Started getting_started/installation getting_started/jetpack getting_started/quick_start + getting_started/tensorrt_rtx User Guide ------------ diff --git a/py/torch_tensorrt/dynamo/conversion/_ConverterRegistry.py b/py/torch_tensorrt/dynamo/conversion/_ConverterRegistry.py index ea7b3c3677..7851d54cd6 100644 --- a/py/torch_tensorrt/dynamo/conversion/_ConverterRegistry.py +++ b/py/torch_tensorrt/dynamo/conversion/_ConverterRegistry.py @@ -625,16 +625,26 @@ def display_all_available_converters(self) -> str: # Initialize dynamo converter registry with the FX and Dynamo aten registries # Note the Dynamo registry is listed first, for precedence if is_tensorrt_rtx(): - DYNAMO_CONVERTERS = ConverterRegistry( - [DYNAMO_ATEN_CONVERTERS], # type: ignore[list-item] - ["Dynamo ATen Converters Registry"], - [CallingConvention.CTX], - ) + registries = [ + DYNAMO_ATEN_CONVERTERS, + ] + registry_names = ["Dynamo ATen Converters Registry"] + registry_calling_conventions = [ + CallingConvention.CTX, + ] else: from torch_tensorrt.fx.converter_registry import CONVERTERS as FX_CONVERTERS - DYNAMO_CONVERTERS = ConverterRegistry( - [DYNAMO_ATEN_CONVERTERS, FX_CONVERTERS], # type: ignore[list-item] - ["Dynamo ATen Converters Registry", "FX Legacy ATen Converters Registry"], - [CallingConvention.CTX, CallingConvention.LEGACY], - ) + registries = [DYNAMO_ATEN_CONVERTERS, FX_CONVERTERS] + registry_names = [ + "Dynamo ATen Converters Registry", + "FX Legacy ATen Converters Registry", + ] + registry_calling_conventions = [CallingConvention.CTX, CallingConvention.LEGACY] + + +DYNAMO_CONVERTERS: ConverterRegistry = ConverterRegistry( + registries, + registry_names, + registry_calling_conventions, +) diff --git a/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py b/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py index 749e6c5dbe..9cd0bc99ba 100644 --- a/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py +++ b/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py @@ -48,8 +48,8 @@ ) from torch_tensorrt.dynamo.debug._DebuggerConfig import DebuggerConfig from torch_tensorrt.dynamo.debug._supports_debugger import cls_supports_debugger +from torch_tensorrt.dynamo.observer import Observer from torch_tensorrt.dynamo.utils import DYNAMIC_DIM, deallocate_module, to_torch_device -from torch_tensorrt.fx.observer import Observer from torch_tensorrt.logging import TRT_LOGGER _LOGGER: logging.Logger = logging.getLogger(__name__) diff --git a/py/torch_tensorrt/dynamo/observer.py b/py/torch_tensorrt/dynamo/observer.py new file mode 100644 index 0000000000..3742bd2840 --- /dev/null +++ b/py/torch_tensorrt/dynamo/observer.py @@ -0,0 +1,194 @@ +import contextlib +import functools +import logging +import traceback +import typing as t +from contextvars import ContextVar +from dataclasses import dataclass, field + +_LOGGER = logging.getLogger(__name__) + +# A context variable to hold registered callbacks for all the observers for the +# current execution context. The callbacks list could have been a member +# variable on the observer instance, however, contextvars document advice +# against creating context variables not at module-global level. +# https://docs.python.org/3/library/contextvars.html#contextvars.ContextVar +_CALLBACKS: ContextVar[t.Dict["Observer", t.List[t.Callable]]] = ContextVar( + "_CALLBACKS", default=None +) + +TObserverCallback = t.TypeVar("TObserverCallback", bound=t.Callable[..., t.Any]) + +# Whether to rethrow the exception caught while calling observer callbacks. +# Default to False. True is only used during tests. +RETHROW_CALLBACK_EXCEPTION: bool = False + + +@dataclass(frozen=True) +class Observer(t.Generic[TObserverCallback]): + """ + Usage: + + >>> some_observer: Observer = ... + >>> with some_observer.add(callback_func): + >>> # do stuff, and when some_observer.observe() is called, + >>> # it will execute callback_func() + >>> ... + + """ + + name: str = "" + # Ensure each Observer instance is considered a distinct key when stored in + # the `_CALLBACKS` dictionary. + unique_id: object = field(default_factory=lambda: object()) + + def add(self, callback: TObserverCallback) -> t.ContextManager: + self._get_callbacks().append(callback) + + # Cannot decorate the outer `add` directly with `contextmanager`, + # because if it were not used with a `with` statement, its body won't + # be executed. + @contextlib.contextmanager + def _add(): + try: + yield + finally: + try: + self._get_callbacks().remove(callback) + except ValueError: + # Callback should be in the callbacks list. I'm just being + # extra cautious here. I don't want it to throw and affect + # business logic. + pass + + return _add() + + def observe(self, *args, **kwargs) -> None: + for callback in self._get_callbacks(): + with _log_error( + "Error calling observer callback", rethrow=RETHROW_CALLBACK_EXCEPTION + ): + callback(*args, **kwargs) + + def _get_callbacks(self) -> t.List[t.Callable]: + """ + Gets the callbacks registered in current execution context. Any code + that manipulates the returned list (add, remove, iterate) is + concurrency safe. + """ + callbacks_dict = _CALLBACKS.get() + if callbacks_dict is None: + callbacks_dict = {} + _CALLBACKS.set(callbacks_dict) + + if self not in callbacks_dict: + callbacks_dict[self] = [] + + return callbacks_dict[self] + + +@dataclass(frozen=True) +class ObserveContext: + """ + Passed to the registered callables that observes any function decorated by + `observable`. See `observable` for detail. + + Attributes: + callable: the observed callable object + args: the args passed to the callable + kwargs: the kwargs passed to the callable + return_value: the return value returned by the callable, only available + when observing the callable after its invocation (via + `CallableObservers.post`) + """ + + callable: t.Callable + args: t.List[t.Any] + kwargs: t.Mapping[str, t.Any] + return_value: t.Any = None + + +def observable(): + """ + A decorator to turn a function into observable + + Example: + + >>> @observable() + >>> def func_to_observe(x, y) -> int: + >>> ... + >>> + >>> def log(ctx: ObserveContext): + >>> print( + >>> f"called {ctx.callable.__name__} with {ctx.args} {ctx.kwargs}" + >>> ) + >>> + >>> # register: + >>> with func_to_observe.observers.pre.add(log): + >>> func_to_observe(1, 2) + >>> # print out "called func_to_observe with (1,2) + >>> # here it won't print + """ + + def decorator(observed_func: callable) -> ObservedCallable: + wrapped_func = _make_observable(orig_func=observed_func) + return functools.wraps(observed_func)(wrapped_func) + + return decorator + + +@dataclass(frozen=True) +class CallableObservers: + pre: Observer[t.Callable[[ObserveContext], None]] + post: Observer[t.Callable[[ObserveContext], None]] + + +class ObservedCallable: + """ + Interface for an observed callable + """ + + observers: CallableObservers + orig_func: callable + + def __call__(self, *args, **kwargs) -> t.Any: + raise NotImplementedError() + + +def _make_observable(orig_func: t.Callable) -> ObservedCallable: + """ + A wrapper for a callable which is to be observed. + """ + + observers = CallableObservers( + pre=Observer(), + post=Observer(), + ) + + @functools.wraps(orig_func) + def observed_func(*args, **kwargs): + observers.pre.observe(ObserveContext(orig_func, args, kwargs)) + return_value = None + try: + return_value = orig_func(*args, **kwargs) + return return_value + finally: + observers.post.observe( + ObserveContext(orig_func, args, kwargs, return_value) + ) + + observed_func.orig_func = orig_func + observed_func.observers = observers + + return observed_func + + +@contextlib.contextmanager +def _log_error(msg: str, rethrow: bool = False) -> t.ContextManager: + try: + yield + except Exception as e: + _e = e # noqa: F841 + _LOGGER.info(f"{msg} (This error is handled): {traceback.format_exc()}") + if rethrow: + raise From edfd75265c26b9608f52f78ab6d088f27795a325 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Tue, 12 Aug 2025 16:57:07 -0700 Subject: [PATCH 09/12] resolve comments --- py/torch_tensorrt/_compile.py | 23 +++++++++++++---------- py/torch_tensorrt/_features.py | 3 ++- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/py/torch_tensorrt/_compile.py b/py/torch_tensorrt/_compile.py index acae618f1b..3c2385f6da 100644 --- a/py/torch_tensorrt/_compile.py +++ b/py/torch_tensorrt/_compile.py @@ -7,17 +7,14 @@ from typing import Any, Callable, List, Optional, Sequence, Set, Union import torch -import torch.fx from torch_tensorrt._enums import dtype from torch_tensorrt._features import ENABLED_FEATURES, needs_cross_compile from torch_tensorrt._Input import Input +from torch_tensorrt._utils import is_tensorrt_rtx from torch_tensorrt.dynamo import _defaults from torch_tensorrt.dynamo.runtime._CudaGraphsTorchTensorRTModule import ( CudaGraphsTorchTensorRTModule, ) -from torch_tensorrt.fx import InputTensorSpec -from torch_tensorrt.fx.lower import compile as fx_compile -from torch_tensorrt.fx.utils import LowerPrecision from typing_extensions import TypeGuard if ENABLED_FEATURES.torchscript_frontend: @@ -62,12 +59,6 @@ def _non_fx_input_interface( return all(isinstance(i, (torch.Tensor, Input)) for i in inputs) -def _fx_input_interface( - inputs: Sequence[Input | torch.Tensor | InputTensorSpec], -) -> TypeGuard[List[InputTensorSpec | torch.Tensor]]: - return all(isinstance(i, (torch.Tensor, InputTensorSpec)) for i in inputs) - - class _IRType(Enum): """Enum to determine the type of IR selected for model compilation""" @@ -237,6 +228,13 @@ def compile( ) return compiled_ts_module elif target_ir == _IRType.fx: + if is_tensorrt_rtx(): + raise RuntimeError("FX frontend is not supported on TensorRT-RTX") + import torch.fx + from torch_tensorrt.fx import InputTensorSpec + from torch_tensorrt.fx.lower import compile as fx_compile + from torch_tensorrt.fx.utils import LowerPrecision + if ( torch.float16 in enabled_precisions_set or torch_tensorrt.dtype.half in enabled_precisions_set @@ -250,6 +248,11 @@ def compile( else: raise ValueError(f"Precision {enabled_precisions_set} not supported on FX") + def _fx_input_interface( + inputs: Sequence[Input | torch.Tensor | InputTensorSpec], + ) -> TypeGuard[List[InputTensorSpec | torch.Tensor]]: + return all(isinstance(i, (torch.Tensor, InputTensorSpec)) for i in inputs) + assert _fx_input_interface(input_list) compiled_fx_module: torch.nn.Module = fx_compile( module, diff --git a/py/torch_tensorrt/_features.py b/py/torch_tensorrt/_features.py index f1993486cb..0300f4b296 100644 --- a/py/torch_tensorrt/_features.py +++ b/py/torch_tensorrt/_features.py @@ -6,6 +6,7 @@ from torch_tensorrt._utils import ( check_cross_compile_trt_win_lib, + is_tensorrt_rtx, sanitized_torch_version, ) @@ -42,7 +43,7 @@ _TS_FE_AVAIL = os.path.isfile(linked_file_full_path) _TORCHTRT_RT_AVAIL = _TS_FE_AVAIL or os.path.isfile(linked_file_runtime_full_path) _DYNAMO_FE_AVAIL = version.parse(sanitized_torch_version()) >= version.parse("2.1.dev") -_FX_FE_AVAIL = True +_FX_FE_AVAIL = False if is_tensorrt_rtx() else True _REFIT_AVAIL = True _WINDOWS_CROSS_COMPILE = check_cross_compile_trt_win_lib() From 97d6432b06eed2928a36f1b760d7364420c5c19c Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Wed, 13 Aug 2025 09:27:43 -0700 Subject: [PATCH 10/12] test --- py/torch_tensorrt/_compile.py | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/py/torch_tensorrt/_compile.py b/py/torch_tensorrt/_compile.py index 3c2385f6da..0541bebe88 100644 --- a/py/torch_tensorrt/_compile.py +++ b/py/torch_tensorrt/_compile.py @@ -10,13 +10,22 @@ from torch_tensorrt._enums import dtype from torch_tensorrt._features import ENABLED_FEATURES, needs_cross_compile from torch_tensorrt._Input import Input -from torch_tensorrt._utils import is_tensorrt_rtx from torch_tensorrt.dynamo import _defaults from torch_tensorrt.dynamo.runtime._CudaGraphsTorchTensorRTModule import ( CudaGraphsTorchTensorRTModule, ) from typing_extensions import TypeGuard +if ENABLED_FEATURES.fx_frontend: + import torch.fx + from torch_tensorrt.fx import InputTensorSpec + from torch_tensorrt.fx.lower import compile as fx_compile + from torch_tensorrt.fx.utils import LowerPrecision + + InputType = Union[Input, torch.Tensor, InputTensorSpec] +else: + InputType = Union[Input, torch.Tensor] + if ENABLED_FEATURES.torchscript_frontend: import torch_tensorrt.ts from torch_tensorrt.ts._compiler import compile as torchscript_compile @@ -54,7 +63,7 @@ def _non_fx_input_interface( - inputs: Sequence[Input | torch.Tensor | InputTensorSpec], + inputs: Sequence[Input | torch.Tensor], ) -> TypeGuard[List[Input | torch.Tensor]]: return all(isinstance(i, (torch.Tensor, Input)) for i in inputs) @@ -158,7 +167,7 @@ def _get_target_fe(module_type: _ModuleType, ir: str) -> _IRType: def compile( module: Any, ir: str = "default", - inputs: Optional[Sequence[Input | torch.Tensor | InputTensorSpec]] = None, + inputs: Optional[Sequence[InputType]] = None, arg_inputs: Optional[Sequence[Sequence[Any]]] = None, kwarg_inputs: Optional[dict[Any, Any]] = None, enabled_precisions: Optional[Set[Union[torch.dtype, dtype]]] = None, @@ -228,12 +237,10 @@ def compile( ) return compiled_ts_module elif target_ir == _IRType.fx: - if is_tensorrt_rtx(): - raise RuntimeError("FX frontend is not supported on TensorRT-RTX") - import torch.fx - from torch_tensorrt.fx import InputTensorSpec - from torch_tensorrt.fx.lower import compile as fx_compile - from torch_tensorrt.fx.utils import LowerPrecision + if not ENABLED_FEATURES.fx_frontend: + raise RuntimeError( + "FX frontend is not enabled, cannot compile with target_ir=fx" + ) if ( torch.float16 in enabled_precisions_set @@ -423,7 +430,7 @@ def torch_compile(module: torch.nn.Module, **kwargs: Any) -> Any: def convert_method_to_trt_engine( module: Any, method_name: str = "forward", - inputs: Optional[Sequence[Input | torch.Tensor | InputTensorSpec]] = None, + inputs: Optional[Sequence[Input | torch.Tensor]] = None, arg_inputs: Optional[Sequence[Sequence[Any]]] = None, kwarg_inputs: Optional[dict[Any, Any]] = None, ir: str = "default", @@ -670,7 +677,7 @@ def save( inductor_configs = kwargs["inductor_configs"] torch._inductor.aoti_compile_and_package( - exp_program, + module, inductor_configs=inductor_configs, package_path=file_path, ) From f7b6c9a9b21489fb4ada8c0d71f151509b3de101 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Thu, 14 Aug 2025 22:43:13 -0700 Subject: [PATCH 11/12] merge main to the branch --- py/torch_tensorrt/csrc/tensorrt_classes.cpp | 1 - py/torch_tensorrt/fx/test/converters/acc_op/test_dequantize.py | 1 + py/torch_tensorrt/fx/test/converters/acc_op/test_pad.py | 1 + .../fx/test/converters/acc_op/test_quantize_per_tensor.py | 1 + 4 files changed, 3 insertions(+), 1 deletion(-) diff --git a/py/torch_tensorrt/csrc/tensorrt_classes.cpp b/py/torch_tensorrt/csrc/tensorrt_classes.cpp index 553c6238a5..788a45184b 100644 --- a/py/torch_tensorrt/csrc/tensorrt_classes.cpp +++ b/py/torch_tensorrt/csrc/tensorrt_classes.cpp @@ -349,7 +349,6 @@ core::CompileSpec CompileSpec::toInternalCompileSpec(bool converting_to_trt_engi info.lower_info.unfreeze_module = true; info.lower_info.disable_cse = true; } - info.convert_info.engine_settings.sparse_weights = sparse_weights; info.convert_info.engine_settings.disable_tf32 = disable_tf32; info.convert_info.engine_settings.refit = refit; diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_dequantize.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_dequantize.py index 7d17056b62..7f32b749c5 100644 --- a/py/torch_tensorrt/fx/test/converters/acc_op/test_dequantize.py +++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_dequantize.py @@ -3,6 +3,7 @@ import tensorrt as trt import torch.fx import torch.nn as nn + import torch_tensorrt.fx.tracer.acc_tracer.acc_ops as acc_ops from torch.testing._internal.common_utils import run_tests from torch_tensorrt.fx.tools.common_fx2trt import AccTestCase, InputTensorSpec diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_pad.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_pad.py index b21779b65f..c82eee79ee 100644 --- a/py/torch_tensorrt/fx/test/converters/acc_op/test_pad.py +++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_pad.py @@ -3,6 +3,7 @@ import tensorrt as trt import torch import torch.nn as nn + import torch_tensorrt.fx.tracer.acc_tracer.acc_ops as acc_ops from parameterized import parameterized from torch.testing._internal.common_utils import run_tests diff --git a/py/torch_tensorrt/fx/test/converters/acc_op/test_quantize_per_tensor.py b/py/torch_tensorrt/fx/test/converters/acc_op/test_quantize_per_tensor.py index 22cbdd826d..c7b050c4ac 100644 --- a/py/torch_tensorrt/fx/test/converters/acc_op/test_quantize_per_tensor.py +++ b/py/torch_tensorrt/fx/test/converters/acc_op/test_quantize_per_tensor.py @@ -3,6 +3,7 @@ import tensorrt as trt import torch.fx import torch.nn as nn + import torch_tensorrt.fx.tracer.acc_tracer.acc_ops as acc_ops from torch.testing._internal.common_utils import run_tests from torch_tensorrt.fx.tools.common_fx2trt import AccTestCase, InputTensorSpec From 6c72548c42352bc4e03bced6799eccc5a540a003 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Fri, 22 Aug 2025 12:14:45 -0700 Subject: [PATCH 12/12] add skip test for rtx --- .../dynamo/conversion/_TRTInterpreter.py | 7 ++++++- .../dynamo/conversion/test_deconvolution_aten.py | 15 ++++++++++----- tests/py/dynamo/conversion/test_hardtanh_aten.py | 7 +++++++ tests/py/dynamo/models/test_dyn_models.py | 3 +++ tests/py/dynamo/models/test_models.py | 9 +++++++++ tests/py/dynamo/models/test_models_export.py | 8 +++++++- 6 files changed, 42 insertions(+), 7 deletions(-) diff --git a/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py b/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py index 9cd0bc99ba..9f151954a0 100644 --- a/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py +++ b/py/torch_tensorrt/dynamo/conversion/_TRTInterpreter.py @@ -111,7 +111,7 @@ def __init__( if not CONVERTERS.compilation_settings: # Configure user compilation settings to converters. CONVERTERS.set_compilation_settings(compilation_settings) - + self.validate_compile_settings assert TRTInterpreter._all_precisions_supported( compilation_settings.enabled_precisions ), f"Attempted to enable kernel precisions that are not supported (got: {compilation_settings.enabled_precisions}, support: {_defaults.SUPPORTED_KERNEL_PRECISIONS})" @@ -196,6 +196,11 @@ def _all_precisions_supported(enabled_precisions: Set[dtype]) -> bool: return enabled_precisions.issubset(_defaults.SUPPORTED_KERNEL_PRECISIONS) def validate_compile_settings(self) -> None: + if is_tensorrt_rtx(): + if dtype.bfloat16 in self.compilation_settings.enabled_precisions: + raise RuntimeError("TensorRT-RTX does not support bfloat16!") + return + if ( dtype.i8 in self.compilation_settings.enabled_precisions and not self.builder.platform_has_fast_int8 diff --git a/tests/py/dynamo/conversion/test_deconvolution_aten.py b/tests/py/dynamo/conversion/test_deconvolution_aten.py index 1909cb8fbb..e2898758e6 100644 --- a/tests/py/dynamo/conversion/test_deconvolution_aten.py +++ b/tests/py/dynamo/conversion/test_deconvolution_aten.py @@ -1,8 +1,10 @@ +import unittest + import torch from parameterized import param, parameterized from torch.testing._internal.common_utils import run_tests - from torch_tensorrt import Input +from torch_tensorrt._utils import is_tensorrt_rtx from .harness import DispatchTestCase @@ -22,14 +24,15 @@ class TestDeconvolutionConverter(DispatchTestCase): param("output_padding_4", 3, stride=3, padding=2, output_padding=1), param("output_padding_5", 3, stride=3, padding=3, output_padding=1), param("output_padding_6", 3, stride=3, padding=3, output_padding=2), + # tensorrt-rtx: does not support both strided and dilated deconv due to cuDNN limitation param( "combined_params", 3, - stride=3, + stride=1, padding=3, dilation=2, groups=3, - output_padding=2, + output_padding=1, ), ] ) @@ -126,14 +129,15 @@ def forward(self, x): param("output_padding_5", 3, stride=3, padding=2, output_padding=1), param("output_padding_6", 3, stride=3, padding=3, output_padding=1), param("output_padding_7", 3, stride=3, padding=3, output_padding=2), + # tensorrt-rtx: does not support both strided and dilated deconv due to cuDNN limitation param( "combined_params", 3, - stride=3, + stride=1, padding=3, dilation=2, groups=3, - output_padding=2, + output_padding=1, ), ] ) @@ -200,6 +204,7 @@ def forward(self, x): enable_passes=True, ) + @unittest.skipIf(is_tensorrt_rtx(), "TensorRT-RTX has bug on deconv3d") @parameterized.expand( [ ("default", 1), diff --git a/tests/py/dynamo/conversion/test_hardtanh_aten.py b/tests/py/dynamo/conversion/test_hardtanh_aten.py index 1c8cae2d53..e286c1cf6e 100644 --- a/tests/py/dynamo/conversion/test_hardtanh_aten.py +++ b/tests/py/dynamo/conversion/test_hardtanh_aten.py @@ -1,11 +1,18 @@ +import unittest + import torch import torch.nn as nn from torch.testing._internal.common_utils import run_tests from torch_tensorrt import Input +from torch_tensorrt._utils import is_tensorrt_rtx from .harness import DispatchTestCase +@unittest.skipIf( + is_tensorrt_rtx(), + "hardtanh is implemented in fx, need to move to dynamo, skip for TensorRT-RTX for now", +) class TestHardTanHConverter(DispatchTestCase): def test_hardtanh(self): class TestModule(nn.Module): diff --git a/tests/py/dynamo/models/test_dyn_models.py b/tests/py/dynamo/models/test_dyn_models.py index fb3a3b8688..28171ed41e 100644 --- a/tests/py/dynamo/models/test_dyn_models.py +++ b/tests/py/dynamo/models/test_dyn_models.py @@ -183,6 +183,9 @@ def test_resnet_dynamic(ir, dtype): """ Tests the Resnet18 model (which is fully convertible) with dynamic shapes """ + if is_tensorrt_rtx() and dtype == torch.bfloat16: + pytest.skip("TensorRT-RTX does not support bfloat16") + import torchvision.models as models model = models.resnet18(pretrained=True).eval().to("cuda").to(dtype) diff --git a/tests/py/dynamo/models/test_models.py b/tests/py/dynamo/models/test_models.py index 84f36e48ce..c598d43392 100644 --- a/tests/py/dynamo/models/test_models.py +++ b/tests/py/dynamo/models/test_models.py @@ -138,6 +138,9 @@ def test_resnet18_torch_exec_ops(ir): ) @pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16, torch.float32]) def test_mobilenet_v2(ir, dtype): + if is_tensorrt_rtx() and dtype == torch.bfloat16: + pytest.skip("TensorRT-RTX does not support bfloat16") + model = models.mobilenet_v2(pretrained=True).eval().to("cuda").to(dtype) input = torch.randn((1, 3, 224, 224)).to("cuda").to(dtype) @@ -177,6 +180,9 @@ def test_mobilenet_v2(ir, dtype): ) @pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16, torch.float32]) def test_efficientnet_b0(ir, dtype): + if is_tensorrt_rtx() and dtype == torch.bfloat16: + pytest.skip("TensorRT-RTX does not support bfloat16") + model = ( timm.create_model("efficientnet_b0", pretrained=True) .eval() @@ -221,6 +227,9 @@ def test_efficientnet_b0(ir, dtype): ) @pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16, torch.float32]) def test_bert_base_uncased(ir, dtype): + if is_tensorrt_rtx() and dtype == torch.bfloat16: + pytest.skip("TensorRT-RTX does not support bfloat16") + from transformers import BertModel model = BertModel.from_pretrained("bert-base-uncased").cuda().eval().to(dtype) diff --git a/tests/py/dynamo/models/test_models_export.py b/tests/py/dynamo/models/test_models_export.py index 583ee9a18e..3971786926 100644 --- a/tests/py/dynamo/models/test_models_export.py +++ b/tests/py/dynamo/models/test_models_export.py @@ -284,7 +284,7 @@ def test_base_fp4_static_shapes(ir): import modelopt.torch.quantization as mtq from modelopt.torch.quantization.utils import export_torch_mode - dtype = torch.bfloat16 + dtype = torch.float16 class SimpleNetwork(torch.nn.Module): def __init__(self): @@ -392,6 +392,9 @@ def test_base_int8(ir, dtype): import modelopt.torch.quantization as mtq from modelopt.torch.quantization.utils import export_torch_mode + if is_tensorrt_rtx() and dtype == torch.bfloat16: + pytest.skip("TensorRT-RTX does not support bfloat16") + class SimpleNetwork(torch.nn.Module): def __init__(self): super(SimpleNetwork, self).__init__() @@ -448,6 +451,9 @@ def test_base_int8_dynamic_shape(ir, dtype): import modelopt.torch.quantization as mtq from modelopt.torch.quantization.utils import export_torch_mode + if is_tensorrt_rtx() and dtype == torch.bfloat16: + pytest.skip("TensorRT-RTX does not support bfloat16") + class SimpleNetwork(torch.nn.Module): def __init__(self): super(SimpleNetwork, self).__init__()