Skip to content
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 1 addition & 4 deletions constraints.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,2 @@
# These vulnerabilities were inherited from the base image (pytorch:25.06-py3) and should be removed when the base image
# These vulnerabilities were inherited from the base image (pytorch:25.10-py3) and should be removed when the base image
# is updated.

# WAR against https://github.com/advisories/GHSA-8qvm-5x2c-j2w7
protobuf>=4.25.8
26 changes: 4 additions & 22 deletions docker/Dockerfile.multi
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
# Multi-stage Dockerfile
ARG BASE_IMAGE=nvcr.io/nvidia/pytorch
ARG TRITON_IMAGE=nvcr.io/nvidia/tritonserver
ARG BASE_TAG=25.08-py3
ARG TRITON_BASE_TAG=25.08-py3
ARG BASE_TAG=25.10-py3
# [TODO] Update to NVIDIA Triton 25.10 when it's available
ARG TRITON_BASE_TAG=25.09-py3
ARG DEVEL_IMAGE=devel

FROM ${BASE_IMAGE}:${BASE_TAG} AS base
Expand Down Expand Up @@ -71,26 +72,7 @@ RUN GITHUB_MIRROR=${GITHUB_MIRROR} bash ./install.sh --mpi4py && rm install_mpi4
ARG TORCH_INSTALL_TYPE="skip"
RUN TORCH_INSTALL_TYPE=${TORCH_INSTALL_TYPE} bash ./install.sh --pytorch && rm install_pytorch.sh

RUN bash ./install.sh --opencv && bash ./install.sh --protobuf && rm install.sh

# wait for new triton to be published
# Rename pytorch_triton package to triton
RUN if [ -f /etc/redhat-release ]; then \
echo "Rocky8 detected, skipping symlink and ldconfig steps"; \
else \
cd /usr/local/lib/python3.12/dist-packages/ && \
ls -la | grep pytorch_triton && \
mv pytorch_triton-3.3.1+gitc8757738.dist-info triton-3.3.1+gitc8757738.dist-info && \
cd triton-3.3.1+gitc8757738.dist-info && \
echo "Current directory: $(pwd)" && \
echo "Files in directory:" && \
ls -la && \
sed -i 's/^Name: pytorch-triton/Name: triton/' METADATA && \
sed -i 's|pytorch_triton-3.3.1+gitc8757738.dist-info/|triton-3.3.1+gitc8757738.dist-info/|g' RECORD && \
echo "METADATA after update:" && \
grep "^Name:" METADATA; \
fi

RUN bash ./install.sh --opencv && rm install.sh

FROM ${TRITON_IMAGE}:${TRITON_BASE_TAG} AS triton

Expand Down
6 changes: 3 additions & 3 deletions docker/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -192,16 +192,16 @@ jenkins-rockylinux8_%: PYTHON_VERSION_TAG_ID = $(if $(findstring 3.12,${PYTHON_V
jenkins-rockylinux8_%: IMAGE_WITH_TAG = $(shell . ../jenkins/current_image_tags.properties && echo $$LLM_ROCKYLINUX8_${PYTHON_VERSION_TAG_ID}_DOCKER_IMAGE)
jenkins-rockylinux8_%: STAGE = tritondevel
jenkins-rockylinux8_%: BASE_IMAGE = nvcr.io/nvidia/cuda
jenkins-rockylinux8_%: BASE_TAG = 13.0.0-devel-rockylinux8
jenkins-rockylinux8_%: BASE_TAG = 13.0.1-devel-rockylinux8

rockylinux8_%: STAGE = tritondevel
rockylinux8_%: BASE_IMAGE = nvcr.io/nvidia/cuda
rockylinux8_%: BASE_TAG = 13.0.0-devel-rockylinux8
rockylinux8_%: BASE_TAG = 13.0.1-devel-rockylinux8

# For x86_64 and aarch64
ubuntu22_%: STAGE = tritondevel
ubuntu22_%: BASE_IMAGE = nvcr.io/nvidia/cuda
ubuntu22_%: BASE_TAG = 13.0.0-devel-ubuntu22.04
ubuntu22_%: BASE_TAG = 13.0.1-devel-ubuntu22.04

trtllm_%: STAGE = release
trtllm_%: PUSH_TO_STAGING := 0
Expand Down
13 changes: 0 additions & 13 deletions docker/common/install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ polygraphy=0
mpi4py=0
pytorch=0
opencv=0
protobuf=0

while [[ $# -gt 0 ]]; do
case $1 in
Expand Down Expand Up @@ -56,10 +55,6 @@ while [[ $# -gt 0 ]]; do
opencv=1
shift 1
;;
--protobuf)
protobuf=1
shift 1
;;
--all)
base=1
cmake=1
Expand All @@ -70,7 +65,6 @@ while [[ $# -gt 0 ]]; do
mpi4py=1
pytorch=1
opencv=1
protobuf=1
shift 1
;;
*)
Expand Down Expand Up @@ -135,10 +129,3 @@ if [ $opencv -eq 1 ]; then
rm -rf /usr/local/lib/python3*/dist-packages/cv2/
pip3 install opencv-python-headless --force-reinstall --no-deps --no-cache-dir
fi

# WARs against security issues inherited from pytorch:25.06
# * https://github.com/advisories/GHSA-8qvm-5x2c-j2w7
if [ $protobuf -eq 1 ]; then
pip3 install --upgrade --no-cache-dir \
"protobuf>=4.25.8"
fi
2 changes: 1 addition & 1 deletion docker/common/install_cuda_toolkit.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ set -ex
# This script is used for reinstalling CUDA on Rocky Linux 8 with the run file.
# CUDA version is usually aligned with the latest NGC CUDA image tag.
# Only use when public CUDA image is not ready.
CUDA_VER="13.0.0_580.65.06"
CUDA_VER="13.0.2_580.95.05"
CUDA_VER_SHORT="${CUDA_VER%_*}"

NVCC_VERSION_OUTPUT=$(nvcc --version)
Expand Down
7 changes: 5 additions & 2 deletions docker/common/install_mpi4py.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,15 @@ diff --git a/src/mpi4py/futures/_lib.py b/src/mpi4py/futures/_lib.py
index f14934d1..eebfb8fc 100644
--- a/src/mpi4py/futures/_lib.py
+++ b/src/mpi4py/futures/_lib.py
@@ -278,6 +278,40 @@ def _manager_comm(pool, options, comm, full=True):
@@ -278,6 +278,43 @@ def _manager_comm(pool, options, comm, full=True):
def _manager_split(pool, options, comm, root):
+ if(os.getenv("TRTLLM_USE_MPI_KVCACHE")=="1"):
+ from cuda import cudart
+ try:
+ from cuda.bindings import runtime as cudart
+ except ImportError:
+ from cuda import cudart
+ has_slurm_rank=False
+ has_ompi_rank=False
+ slurm_rank=0
Expand Down
8 changes: 4 additions & 4 deletions docker/common/install_pytorch.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ set -ex

# Use latest stable version from https://pypi.org/project/torch/#history
# and closest to the version specified in
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-08.html#rel-25-08
TORCH_VERSION="2.8.0"
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-10.html#rel-25-10
TORCH_VERSION="2.9.0"
SYSTEM_ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')

prepare_environment() {
Expand Down Expand Up @@ -69,8 +69,8 @@ install_from_pypi() {
if [ "$ARCH" = "amd64" ];then ARCH="x86_64";fi
if [ "$ARCH" = "aarch64" ];then ARCH="sbsa";fi

pip3 uninstall -y torch torchvision torchaudio
pip3 install torch==${TORCH_VERSION} torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128
pip3 uninstall -y torch torchvision
pip3 install torch==${TORCH_VERSION} torchvision --index-url https://download.pytorch.org/whl/cu130
}

case "$1" in
Expand Down
19 changes: 8 additions & 11 deletions docker/common/install_tensorrt.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,20 @@

set -ex

TRT_VER="10.13.2.6"
TRT_VER="10.13.3.9"
# Align with the pre-installed cuDNN / cuBLAS / NCCL versions from
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-08.html#rel-25-08
CUDA_VER="13.0" # 13.0.0
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-10.html#rel-25-10
CUDA_VER="13.0" # 13.0.2
# Keep the installation for cuDNN if users want to install PyTorch with source codes.
# PyTorch 2.x can compile with cuDNN v9.
CUDNN_VER="9.12.0.46-1"
# NCCL version 2.26.x used in the NGC PyTorch 25.05 image but has a performance regression issue.
# Use NCCL version 2.27.5 which has the fixes.
CUDNN_VER="9.14.0.64-1"
NCCL_VER="2.27.7-1+cuda13.0"
# Use cuBLAS version 13.0.0.19 instead.
CUBLAS_VER="13.0.0.19-1"
CUBLAS_VER="13.1.0.3-1"
# Align with the pre-installed CUDA / NVCC / NVRTC versions from
# https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html
NVRTC_VER="13.0.48-1"
CUDA_RUNTIME="13.0.48-1"
CUDA_DRIVER_VERSION="580.65.06-1.el8"
NVRTC_VER="13.0.88-1"
CUDA_RUNTIME="13.0.96-1"
CUDA_DRIVER_VERSION="580.95.05-1.el8"

for i in "$@"; do
case $i in
Expand Down
5 changes: 0 additions & 5 deletions docs/source/installation/build-from-source-linux.md
Original file line number Diff line number Diff line change
Expand Up @@ -147,11 +147,6 @@ check <https://github.com/NVIDIA/TensorRT-LLM/tree/main/docker>.

## Build TensorRT LLM

```{tip}
:name: build-from-source-tip-cuda-version
TensorRT LLM 1.1 supports both CUDA 12.9 and 13.0 while some dependency changes are required. The `requirements.txt` contains dependencies needed by CUDA 13.0. If you are using CUDA 12.9, please uncomment lines end with `# <For CUDA 12.9>` and comment out the next lines.
```

### Option 1: Full Build with C++ Compilation

The following command compiles the C++ code and packages the compiled libraries along with the Python files into a wheel. When developing C++ code, you need this full build command to apply your code changes.
Expand Down
11 changes: 2 additions & 9 deletions docs/source/installation/linux.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,20 +12,13 @@
Install CUDA Toolkit following the [CUDA Installation Guide for Linux](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/) and
make sure `CUDA_HOME` environment variable is properly set.

```{tip}
:name: installation-linux-tip-cuda-version
TensorRT LLM 1.1 supports both CUDA 12.9 and 13.0. The wheel package release only supports CUDA 12.9, while CUDA 13.0 is only supported through NGC container release.
```

```bash
# Optional step: Only required for NVIDIA Blackwell GPUs and SBSA platform
pip3 install torch==2.7.1 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128
# By default, PyTorch CUDA 12.8 package is installed. Install PyTorch CUDA 13.0 package to align with the CUDA version used for building TensorRT LLM wheels.
pip3 install torch==2.9.0 torchvision --index-url https://download.pytorch.org/whl/cu130

sudo apt-get -y install libopenmpi-dev
```

PyTorch CUDA 12.8 package is required for supporting NVIDIA Blackwell GPUs and SBSA platform. On prior GPUs or Linux x86_64 platform, this extra installation is not required.

```{tip}
Instead of manually installing the preqrequisites as described
above, it is also possible to use the pre-built [TensorRT LLM Develop container
Expand Down
2 changes: 1 addition & 1 deletion docs/source/legacy/reference/support-matrix.md
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ The following table shows the supported software for TensorRT-LLM.
* -
- Software Compatibility
* - Container
- [25.08](https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html)
- [25.10](https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html)
* - TensorRT
- [10.13](https://docs.nvidia.com/deeplearning/tensorrt/release-notes/index.html)
* - Precision
Expand Down
33 changes: 1 addition & 32 deletions jenkins/Build.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,6 @@ AARCH64_TRIPLE = "aarch64-linux-gnu"

LLM_DOCKER_IMAGE = env.dockerImage

LLM_DOCKER_IMAGE_12_9 = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202509091430-7383"
LLM_SBSA_DOCKER_IMAGE_12_9 = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-aarch64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202509091430-7383"

// Always use x86_64 image for agent
AGENT_IMAGE = env.dockerImage.replace("aarch64", "x86_64")

Expand All @@ -40,9 +37,6 @@ def BUILD_JOBS_FOR_CONFIG = "buildJobsForConfig"
@Field
def CONFIG_LINUX_X86_64_VANILLA = "linux_x86_64_Vanilla"

@Field
def CONFIG_LINUX_X86_64_VANILLA_CU12 = "linux_x86_64_Vanilla_CU12"

@Field
def CONFIG_LINUX_X86_64_SINGLE_DEVICE = "linux_x86_64_SingleDevice"

Expand All @@ -52,9 +46,6 @@ def CONFIG_LINUX_X86_64_LLVM = "linux_x86_64_LLVM"
@Field
def CONFIG_LINUX_AARCH64 = "linux_aarch64"

@Field
def CONFIG_LINUX_AARCH64_CU12 = "linux_aarch64_CU12"

@Field
def CONFIG_LINUX_AARCH64_LLVM = "linux_aarch64_LLVM"

Expand All @@ -73,11 +64,6 @@ def BUILD_CONFIGS = [
(TARNAME) : "TensorRT-LLM.tar.gz",
(WHEEL_ARCHS): "80-real;86-real;89-real;90-real;100-real;103-real;120-real",
],
(CONFIG_LINUX_X86_64_VANILLA_CU12) : [
(WHEEL_EXTRA_ARGS) : "--extra-cmake-vars ENABLE_MULTI_DEVICE=1 --extra-cmake-vars WARNING_IS_ERROR=ON --extra-cmake-vars NIXL_ROOT=/opt/nvidia/nvda_nixl --micro_benchmarks",
(TARNAME) : "TensorRT-LLM-CU12.tar.gz",
(WHEEL_ARCHS): "80-real;86-real;89-real;90-real;100-real;103-real;120-real",
],
(CONFIG_LINUX_X86_64_PYBIND) : [
(WHEEL_EXTRA_ARGS) : "--binding_type pybind --extra-cmake-vars ENABLE_MULTI_DEVICE=1 --extra-cmake-vars WARNING_IS_ERROR=ON --extra-cmake-vars NIXL_ROOT=/opt/nvidia/nvda_nixl --micro_benchmarks",
(TARNAME) : "pybind-TensorRT-LLM.tar.gz",
Expand All @@ -99,12 +85,6 @@ def BUILD_CONFIGS = [
(WHEEL_ARCHS): "90-real;100-real;103-real;120-real",
(BUILD_JOBS_FOR_CONFIG): "6",
],
(CONFIG_LINUX_AARCH64_CU12): [
(WHEEL_EXTRA_ARGS) : "--extra-cmake-vars WARNING_IS_ERROR=ON",
(TARNAME) : "TensorRT-LLM-GH200-CU12.tar.gz",
(WHEEL_ARCHS): "90-real;100-real;103-real;120-real",
(BUILD_JOBS_FOR_CONFIG): "6",
],
(CONFIG_LINUX_AARCH64_PYBIND): [
(WHEEL_EXTRA_ARGS) : "--binding_type pybind --extra-cmake-vars WARNING_IS_ERROR=ON --extra-cmake-vars NIXL_ROOT=/opt/nvidia/nvda_nixl",
(TARNAME) : "pybind-TensorRT-LLM-GH200.tar.gz",
Expand Down Expand Up @@ -454,9 +434,6 @@ def runLLMBuild(pipeline, buildFlags, tarName, is_linux_x86_64)
pipArgs = ""
}

if (tarName.contains("CU12")) {
trtllm_utils.llmExecStepWithRetry(pipeline, script: "cd ${LLM_ROOT} && sed -i '/^# .*<For CUDA 12\\.9>\$/ {s/^# //; n; s/^/# /}' requirements.txt && cat requirements.txt")
}
// install python package
trtllm_utils.llmExecStepWithRetry(pipeline, script: "cd ${LLM_ROOT} && pip3 install -r requirements-dev.txt ${pipArgs}")

Expand All @@ -477,10 +454,7 @@ def runLLMBuild(pipeline, buildFlags, tarName, is_linux_x86_64)
def llmPath = sh (script: "realpath ${LLM_ROOT}",returnStdout: true).trim()
// TODO: Remove after the cmake version is upgraded to 3.31.8
// Get triton tag from docker/dockerfile.multi
def tritonShortTag = "r25.08"
if (tarName.contains("CU12")) {
tritonShortTag = "r25.06"
}
def tritonShortTag = "r25.09"
sh "cd ${LLM_ROOT}/triton_backend/inflight_batcher_llm && mkdir build && cd build && cmake .. -DTRTLLM_DIR=${llmPath} -DTRITON_COMMON_REPO_TAG=${tritonShortTag} -DTRITON_CORE_REPO_TAG=${tritonShortTag} -DTRITON_THIRD_PARTY_REPO_TAG=${tritonShortTag} -DTRITON_BACKEND_REPO_TAG=${tritonShortTag} -DUSE_CXX11_ABI=ON && make -j${buildJobs} install"

// Step 3: packaging wheels into tarfile
Expand Down Expand Up @@ -570,14 +544,9 @@ def launchStages(pipeline, cpu_arch, enableFailFast, globalVars)
wheelDockerImage = env.dockerImage
}

def LLM_DOCKER_IMAGE_CU12 = cpu_arch == AARCH64_TRIPLE ? LLM_SBSA_DOCKER_IMAGE_12_9 : LLM_DOCKER_IMAGE_12_9

buildConfigs = [
"Build TRT-LLM": [LLM_DOCKER_IMAGE] + prepareLLMBuild(
pipeline, cpu_arch == AARCH64_TRIPLE ? CONFIG_LINUX_AARCH64 : CONFIG_LINUX_X86_64_VANILLA),
// Disable CUDA12 build for too slow to build (cost > 5 hours on SBSA)
"Build TRT-LLM CUDA12": [LLM_DOCKER_IMAGE_CU12] + prepareLLMBuild(
pipeline, cpu_arch == AARCH64_TRIPLE ? CONFIG_LINUX_AARCH64_CU12 : CONFIG_LINUX_X86_64_VANILLA_CU12),
"Build TRT-LLM LLVM": [LLM_DOCKER_IMAGE] + prepareLLMBuild(
pipeline, cpu_arch == AARCH64_TRIPLE ? CONFIG_LINUX_AARCH64_LLVM : CONFIG_LINUX_X86_64_LLVM),
"Build TRT-LLM Pybind": [LLM_DOCKER_IMAGE] + prepareLLMBuild(
Expand Down
Loading