Skip to content

Commit 3511e54

Browse files
committed
chore: rebase with main
Signed-off-by: Dheeraj Peri <[email protected]>
2 parents 7f9da2d + e6b0a88 commit 3511e54

File tree

268 files changed

+1008
-2711
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

268 files changed

+1008
-2711
lines changed

.github/scripts/filter-matrix.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from typing import Any, Dict, List
88

99
# currently we don't support python 3.13t due to tensorrt does not support 3.13t
10-
disabled_python_versions: List[str] = ["3.13t"]
10+
disabled_python_versions: List[str] = ["3.13t", "3.14", "3.14t"]
1111

1212
# jetpack 6.2 only officially supports python 3.10 and cu126
1313
jetpack_python_versions: List[str] = ["3.10"]

.github/scripts/install-cuda-aarch64.sh

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,27 @@ install_cuda_aarch64() {
55
CU_VER=${CU_VERSION:2:2}-${CU_VERSION:4:1}
66
# CU_VERSION: cu129 --> CU_DOT_VER: 12.9
77
CU_DOT_VER=${CU_VERSION:2:2}.${CU_VERSION:4:1}
8+
# CUDA_MAJOR_VERSION: cu128 --> 12
9+
CUDA_MAJOR_VERSION=${CU_VERSION:2:2}
810
dnf config-manager --add-repo https://developer.download.nvidia.com/compute/cuda/repos/rhel8/sbsa/cuda-rhel8.repo
911
# nccl version must match libtorch_cuda.so was built with https://github.com/pytorch/pytorch/blob/main/.ci/docker/ci_commit_pins/nccl-cu12.txt
1012
dnf -y install cuda-compiler-${CU_VER}.aarch64 \
1113
cuda-libraries-${CU_VER}.aarch64 \
1214
cuda-libraries-devel-${CU_VER}.aarch64 \
1315
libnccl-2.27.3-1+cuda${CU_DOT_VER} libnccl-devel-2.27.3-1+cuda${CU_DOT_VER} libnccl-static-2.27.3-1+cuda${CU_DOT_VER}
1416
dnf clean all
15-
export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/lib64:$LD_LIBRARY_PATH
17+
18+
nvshmem_version=3.3.9
19+
nvshmem_path="https://developer.download.nvidia.com/compute/redist/nvshmem/${nvshmem_version}/builds/cuda${CUDA_MAJOR_VERSION}/txz/agnostic/aarch64"
20+
nvshmem_filename="libnvshmem_cuda12-linux-sbsa-${nvshmem_version}.tar.gz"
21+
curl -L ${nvshmem_path}/${nvshmem_filename} -o nvshmem.tar.gz
22+
tar -xzf nvshmem.tar.gz
23+
cp -a libnvshmem/lib/* /usr/local/cuda/lib64/
24+
cp -a libnvshmem/include/* /usr/local/cuda/include/
25+
rm -rf nvshmem.tar.gz nvshmem
26+
echo "nvshmem ${nvshmem_version} for cuda ${CUDA_MAJOR_VERSION} installed successfully"
27+
28+
export LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda/include:/usr/lib64:$LD_LIBRARY_PATH
1629
ls -lart /usr/local/
1730
nvcc --version
1831
echo "cuda ${CU_VER} installed successfully"

.github/workflows/build-test-linux-aarch64-jetpack.yml

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,16 @@
11
name: Build and test Linux aarch64 wheels for Jetpack
22

33
on:
4-
# TODO: Uncomment this when we have a stable release
5-
# pull_request:
6-
# push:
7-
# branches:
8-
# - main
9-
# - nightly
10-
# - release/*
11-
# tags:
12-
# # NOTE: Binary build pipelines should only get triggered on release candidate builds
13-
# # Release candidate tags look like: v1.11.0-rc1
14-
# - v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
4+
pull_request:
5+
push:
6+
branches:
7+
- main
8+
- nightly
9+
- release/*
10+
tags:
11+
# NOTE: Binary build pipelines should only get triggered on release candidate builds
12+
# Release candidate tags look like: v1.11.0-rc1
13+
- v[0-9]+.[0-9]+.[0-9]+-rc[0-9]+
1514
workflow_dispatch:
1615

1716
jobs:
@@ -66,7 +65,7 @@ jobs:
6665
post-script: packaging/post_build_script.sh
6766
smoke-test-script: packaging/smoke_test_script.sh
6867
package-name: torch_tensorrt
69-
name: Build torch-tensorrt whl package
68+
name: Build torch-tensorrt whl package for jetpack
7069
uses: ./.github/workflows/build_wheels_linux_aarch64.yml
7170
with:
7271
repository: ${{ matrix.repository }}

.github/workflows/build-test-linux-aarch64.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ jobs:
6262
post-script: packaging/post_build_script.sh
6363
smoke-test-script: packaging/smoke_test_script.sh
6464
package-name: torch_tensorrt
65-
name: Build torch-tensorrt whl package
65+
name: Build torch-tensorrt whl package for SBSA
6666
uses: ./.github/workflows/build_wheels_linux_aarch64.yml
6767
with:
6868
repository: ${{ matrix.repository }}

.github/workflows/build_wheels_linux_aarch64.yml

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ jobs:
133133
UPLOAD_TO_BASE_BUCKET: ${{ matrix.upload_to_base_bucket }}
134134
ARCH: ${{ inputs.architecture }}
135135
BUILD_TARGET: ${{ inputs.build-target }}
136-
name: build-wheel-${{ matrix.python_version }}-${{ matrix.desired_cuda }}-${{ matrix.gpu_arch_type }}
136+
name: build-wheel-${{ matrix.python_version }}-${{ matrix.desired_cuda }}-${{ matrix.gpu_arch_type }}-${{ inputs.is-jetpack }}
137137
runs-on: ${{ matrix.validation_runner }}
138138
environment: ${{(inputs.trigger-event == 'schedule' || (inputs.trigger-event == 'push' && (startsWith(github.event.ref, 'refs/heads/nightly') || startsWith(github.event.ref, 'refs/tags/v')))) && 'pytorchbot-env' || ''}}
139139
container:
@@ -264,7 +264,7 @@ jobs:
264264
if [[ ${{ inputs.is-jetpack }} == false ]]; then
265265
${CONDA_RUN} python setup.py bdist_wheel
266266
else
267-
${CONDA_RUN} python setup.py bdist_wheel --jetpack --plat-name=linux_tegra_aarch64
267+
${CONDA_RUN} python setup.py bdist_wheel --jetpack
268268
fi
269269
- name: Repair Manylinux_2_28 Wheel
270270
shell: bash -l {0}
@@ -335,9 +335,10 @@ jobs:
335335

336336
upload:
337337
needs: build
338+
name: upload-wheel-${{ matrix.python_version }}-${{ matrix.desired_cuda }}-${{ matrix.gpu_arch_type }}-${{ inputs.is-jetpack }}
338339
uses: pytorch/test-infra/.github/workflows/_binary_upload.yml@main
339-
# for jetpack builds, only upload to pytorch index for nightly builds
340-
if: ${{ inputs.is-jetpack == false || (github.event_name == 'push' && startsWith(github.event.ref, 'refs/heads/nightly')) }}
340+
# for jetpack builds, do not upload to pytorch nightly index, only upload to https://pypi.jetson-ai-lab.io/ manually for each release
341+
if: ${{ inputs.is-jetpack == false }}
341342
with:
342343
repository: ${{ inputs.repository }}
343344
ref: ${{ inputs.ref }}
@@ -351,5 +352,5 @@ jobs:
351352
PYPI_API_TOKEN: ${{ secrets.PYPI_API_TOKEN }}
352353

353354
concurrency:
354-
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ inputs.repository }}-${{ github.event_name == 'workflow_dispatch' }}
355+
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }}-${{ inputs.repository }}-${{ inputs.is-jetpack }}-${{ github.event_name == 'workflow_dispatch' }}
355356
cancel-in-progress: true

BUILD.bazel

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,7 @@ pkg_tar(
8888
name = "libtorchtrt",
8989
srcs = [
9090
"//:LICENSE",
91-
"//bzl_def:BUILD",
92-
"//bzl_def:WORKSPACE",
91+
"//third_party/torch_tensorrt:BUILD",
9392
],
9493
extension = "tar.gz",
9594
package_dir = "torch_tensorrt",
@@ -107,8 +106,7 @@ pkg_tar(
107106
name = "libtorchtrt_runtime",
108107
srcs = [
109108
"//:LICENSE",
110-
"//bzl_def:BUILD",
111-
"//bzl_def:WORKSPACE",
109+
"//third_party/torch_tensorrt:BUILD",
112110
],
113111
extension = "tar.gz",
114112
package_dir = "torch_tensorrt_runtime",

MODULE.bazel

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,16 @@ git_override(
2424

2525
local_repository = use_repo_rule("@bazel_tools//tools/build_defs/repo:local.bzl", "local_repository")
2626

27+
28+
new_local_repository = use_repo_rule("@bazel_tools//tools/build_defs/repo:local.bzl", "new_local_repository")
29+
2730
# External dependency for torch_tensorrt if you already have precompiled binaries.
28-
local_repository(
31+
new_local_repository(
2932
name = "torch_tensorrt",
30-
path = "/opt/conda/lib/python3.11/site-packages/torch_tensorrt",
33+
build_file = "@//third_party/torch_tensorrt:BUILD",
34+
path = "/usr/local/lib/python3.12/site-packages/torch_tensorrt/",
3135
)
3236

33-
new_local_repository = use_repo_rule("@bazel_tools//tools/build_defs/repo:local.bzl", "new_local_repository")
34-
3537
# CUDA should be installed on the system locally
3638
# for linux x86_64 and aarch64
3739
new_local_repository(
@@ -90,10 +92,9 @@ http_archive(
9092
http_archive(
9193
name = "torch_l4t",
9294
build_file = "@//third_party/libtorch:BUILD",
93-
sha256 = "6eff643c0a7acda92734cc798338f733ff35c7df1a4434576f5ff7c66fc97319",
9495
strip_prefix = "torch",
9596
type = "zip",
96-
urls = ["https://pypi.jetson-ai-lab.dev/jp6/cu126/+f/6ef/f643c0a7acda9/torch-2.7.0-cp310-cp310-linux_aarch64.whl"],
97+
urls = ["https://pypi.jetson-ai-lab.io/jp6/cu126/+f/62a/1beee9f2f1470/torch-2.8.0-cp310-cp310-linux_aarch64.whl"],
9798
)
9899

99100
# Download these tarballs manually from the NVIDIA website

README.md

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ Torch-TensorRT
77
[![Documentation](https://img.shields.io/badge/docs-master-brightgreen)](https://nvidia.github.io/Torch-TensorRT/)
88
[![pytorch](https://img.shields.io/badge/PyTorch-2.8-green)](https://download.pytorch.org/whl/nightly/cu128)
99
[![cuda](https://img.shields.io/badge/CUDA-12.8-green)](https://developer.nvidia.com/cuda-downloads)
10-
[![trt](https://img.shields.io/badge/TensorRT-10.11.0-green)](https://github.com/nvidia/tensorrt-llm)
10+
[![trt](https://img.shields.io/badge/TensorRT-10.12.0-green)](https://github.com/nvidia/tensorrt-llm)
1111
[![license](https://img.shields.io/badge/license-BSD--3--Clause-blue)](./LICENSE)
1212
[![Linux x86-64 Nightly Wheels](https://github.com/pytorch/TensorRT/actions/workflows/build-test-linux-x86_64.yml/badge.svg?branch=nightly)](https://github.com/pytorch/TensorRT/actions/workflows/build-test-linux-x86_64.yml)
1313
[![Linux SBSA Nightly Wheels](https://github.com/pytorch/TensorRT/actions/workflows/build-test-linux-aarch64.yml/badge.svg?branch=nightly)](https://github.com/pytorch/TensorRT/actions/workflows/build-test-linux-aarch64.yml)
@@ -93,9 +93,11 @@ auto results = trt_mod.forward({input_tensor});
9393
```
9494
9595
## Further resources
96+
- [Double PyTorch Inference Speed for Diffusion Models Using Torch-TensorRT](https://developer.nvidia.com/blog/double-pytorch-inference-speed-for-diffusion-models-using-torch-tensorrt/)
9697
- [Up to 50% faster Stable Diffusion inference with one line of code](https://pytorch.org/TensorRT/tutorials/_rendered_examples/dynamo/torch_compile_stable_diffusion.html#sphx-glr-tutorials-rendered-examples-dynamo-torch-compile-stable-diffusion-py)
97-
- [Optimize LLMs from Hugging Face with Torch-TensorRT]() \[coming soon\]
98+
- [Optimize LLMs from Hugging Face with Torch-TensorRT](https://docs.pytorch.org/TensorRT/tutorials/compile_hf_models.html#compile-hf-models)
9899
- [Run your model in FP8 with Torch-TensorRT](https://pytorch.org/TensorRT/tutorials/_rendered_examples/dynamo/vgg16_fp8_ptq.html)
100+
- [Accelerated Inference in PyTorch 2.X with Torch-TensorRT](https://www.youtube.com/watch?v=eGDMJ3MY4zk&t=1s)
99101
- [Tools to resolve graph breaks and boost performance]() \[coming soon\]
100102
- [Tech Talk (GTC '23)](https://www.nvidia.com/en-us/on-demand/session/gtcspring23-s51714/)
101103
- [Documentation](https://nvidia.github.io/Torch-TensorRT/)
@@ -119,9 +121,9 @@ auto results = trt_mod.forward({input_tensor});
119121
These are the following dependencies used to verify the testcases. Torch-TensorRT can work with other versions, but the tests are not guaranteed to pass.
120122
121123
- Bazel 8.1.1
122-
- Libtorch 2.8.0.dev (latest nightly)
124+
- Libtorch 2.9.0.dev (latest nightly)
123125
- CUDA 12.8 (CUDA 12.6 on Jetson)
124-
- TensorRT 10.11 (TensorRT 10.3 on Jetson)
126+
- TensorRT 10.12 (TensorRT 10.3 on Jetson)
125127
126128
## Deprecation Policy
127129

bzl_def/WORKSPACE

Lines changed: 0 additions & 1 deletion
This file was deleted.

core/conversion/conversion.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -202,13 +202,7 @@ void AddInputs(ConversionCtx* ctx, c10::ArrayRef<const torch::jit::Value*> input
202202
TORCHTRT_CHECK(
203203
profile->isValid(),
204204
"Optimization profile is invalid, please check the input range provided (conversion.AddInputs)");
205-
206205
ctx->cfg->addOptimizationProfile(profile);
207-
#if NV_TENSORRT_MAJOR > 7 || (NV_TENSORRT_MAJOR == 7 && NV_TENSORRT_MINOR >= 1)
208-
if (ctx->enabled_precisions.find(nvinfer1::DataType::kINT8) != ctx->enabled_precisions.end()) {
209-
ctx->cfg->setCalibrationProfile(profile);
210-
}
211-
#endif
212206
}
213207

214208
void MarkOutputs(ConversionCtx* ctx, at::ArrayRef<const torch::jit::Value*> outputs) {

0 commit comments

Comments
 (0)