NVIDIA · chzblych · Nov 3, 2025 · Oct 30, 2025 · Oct 30, 2025 · Oct 30, 2025
@@ -1,5 +1,2 @@
-# These vulnerabilities were inherited from the base image (pytorch:25.06-py3) and should be removed when the base image
+# These vulnerabilities were inherited from the base image (pytorch:25.10-py3) and should be removed when the base image
 # is updated.
-
-# WAR against https://github.com/advisories/GHSA-8qvm-5x2c-j2w7
-protobuf>=4.25.8
@@ -1,8 +1,9 @@
 # Multi-stage Dockerfile
 ARG BASE_IMAGE=nvcr.io/nvidia/pytorch
 ARG TRITON_IMAGE=nvcr.io/nvidia/tritonserver
-ARG BASE_TAG=25.08-py3
-ARG TRITON_BASE_TAG=25.08-py3
+ARG BASE_TAG=25.10-py3
+# [TODO] Update to NVIDIA Triton 25.10 when it's available
+ARG TRITON_BASE_TAG=25.09-py3
 ARG DEVEL_IMAGE=devel
 
 FROM ${BASE_IMAGE}:${BASE_TAG} AS base
@@ -71,26 +72,7 @@ RUN GITHUB_MIRROR=${GITHUB_MIRROR} bash ./install.sh --mpi4py && rm install_mpi4
 ARG TORCH_INSTALL_TYPE="skip"
 RUN TORCH_INSTALL_TYPE=${TORCH_INSTALL_TYPE} bash ./install.sh --pytorch && rm install_pytorch.sh
 
-RUN bash ./install.sh --opencv && bash ./install.sh --protobuf && rm install.sh
-
-# wait for new triton to be published
-# Rename pytorch_triton package to triton
-RUN if [ -f /etc/redhat-release ]; then \
-        echo "Rocky8 detected, skipping symlink and ldconfig steps"; \
-    else \
-        cd /usr/local/lib/python3.12/dist-packages/ && \
-        ls -la | grep pytorch_triton && \
-        mv pytorch_triton-3.3.1+gitc8757738.dist-info triton-3.3.1+gitc8757738.dist-info && \
-        cd triton-3.3.1+gitc8757738.dist-info && \
-        echo "Current directory: $(pwd)" && \
-        echo "Files in directory:" && \
-        ls -la && \
-        sed -i 's/^Name: pytorch-triton/Name: triton/' METADATA && \
-        sed -i 's|pytorch_triton-3.3.1+gitc8757738.dist-info/|triton-3.3.1+gitc8757738.dist-info/|g' RECORD && \
-        echo "METADATA after update:" && \
-        grep "^Name:" METADATA; \
-    fi
-
+RUN bash ./install.sh --opencv && rm install.sh
 
 FROM ${TRITON_IMAGE}:${TRITON_BASE_TAG} AS triton
 

@@ -192,16 +192,16 @@ jenkins-rockylinux8_%: PYTHON_VERSION_TAG_ID = $(if $(findstring 3.12,${PYTHON_V
 jenkins-rockylinux8_%: IMAGE_WITH_TAG = $(shell . ../jenkins/current_image_tags.properties && echo $$LLM_ROCKYLINUX8_${PYTHON_VERSION_TAG_ID}_DOCKER_IMAGE)
 jenkins-rockylinux8_%: STAGE = tritondevel
 jenkins-rockylinux8_%: BASE_IMAGE = nvcr.io/nvidia/cuda
-jenkins-rockylinux8_%: BASE_TAG = 13.0.0-devel-rockylinux8
+jenkins-rockylinux8_%: BASE_TAG = 13.0.1-devel-rockylinux8
 
 rockylinux8_%: STAGE = tritondevel
 rockylinux8_%: BASE_IMAGE = nvcr.io/nvidia/cuda
-rockylinux8_%: BASE_TAG = 13.0.0-devel-rockylinux8
+rockylinux8_%: BASE_TAG = 13.0.1-devel-rockylinux8
 
 # For x86_64 and aarch64
 ubuntu22_%: STAGE = tritondevel
 ubuntu22_%: BASE_IMAGE = nvcr.io/nvidia/cuda
-ubuntu22_%: BASE_TAG = 13.0.0-devel-ubuntu22.04
+ubuntu22_%: BASE_TAG = 13.0.1-devel-ubuntu22.04
 
 trtllm_%: STAGE = release
 trtllm_%: PUSH_TO_STAGING := 0

@@ -16,7 +16,6 @@ polygraphy=0
 mpi4py=0
 pytorch=0
 opencv=0
-protobuf=0
 
 while [[ $# -gt 0 ]]; do
     case $1 in
@@ -56,10 +55,6 @@ while [[ $# -gt 0 ]]; do
             opencv=1
             shift 1
             ;;
-        --protobuf)
-            protobuf=1
-            shift 1
-            ;;
         --all)
             base=1
             cmake=1
@@ -70,7 +65,6 @@ while [[ $# -gt 0 ]]; do
             mpi4py=1
             pytorch=1
             opencv=1
-            protobuf=1
             shift 1
             ;;
         *)
@@ -135,10 +129,3 @@ if [ $opencv -eq 1 ]; then
     rm -rf /usr/local/lib/python3*/dist-packages/cv2/
     pip3 install opencv-python-headless --force-reinstall --no-deps --no-cache-dir
 fi
-
-# WARs against security issues inherited from pytorch:25.06
-# * https://github.com/advisories/GHSA-8qvm-5x2c-j2w7
-if [ $protobuf -eq 1 ]; then
-    pip3 install --upgrade --no-cache-dir \
-    "protobuf>=4.25.8"
-fi
@@ -5,7 +5,7 @@ set -ex
 # This script is used for reinstalling CUDA on Rocky Linux 8 with the run file.
 # CUDA version is usually aligned with the latest NGC CUDA image tag.
 # Only use when public CUDA image is not ready.
-CUDA_VER="13.0.0_580.65.06"
+CUDA_VER="13.0.2_580.95.05"
 CUDA_VER_SHORT="${CUDA_VER%_*}"
 
 NVCC_VERSION_OUTPUT=$(nvcc --version)

@@ -27,12 +27,15 @@ diff --git a/src/mpi4py/futures/_lib.py b/src/mpi4py/futures/_lib.py
 index f14934d1..eebfb8fc 100644
 --- a/src/mpi4py/futures/_lib.py
 +++ b/src/mpi4py/futures/_lib.py
-@@ -278,6 +278,40 @@ def _manager_comm(pool, options, comm, full=True):
+@@ -278,6 +278,43 @@ def _manager_comm(pool, options, comm, full=True):
 
 
  def _manager_split(pool, options, comm, root):
 +    if(os.getenv("TRTLLM_USE_MPI_KVCACHE")=="1"):
-+        from cuda import cudart
++        try:
++            from cuda.bindings import runtime as cudart
++        except ImportError:
++            from cuda import cudart
 +        has_slurm_rank=False
 +        has_ompi_rank=False
 +        slurm_rank=0

@@ -4,8 +4,8 @@ set -ex
 
 # Use latest stable version from https://pypi.org/project/torch/#history
 # and closest to the version specified in
-# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-08.html#rel-25-08
-TORCH_VERSION="2.8.0"
+# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-10.html#rel-25-10
+TORCH_VERSION="2.9.0"
 SYSTEM_ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
 
 prepare_environment() {
@@ -69,8 +69,8 @@ install_from_pypi() {
     if [ "$ARCH" = "amd64" ];then ARCH="x86_64";fi
     if [ "$ARCH" = "aarch64" ];then ARCH="sbsa";fi
 
-    pip3 uninstall -y torch torchvision torchaudio
-    pip3 install torch==${TORCH_VERSION} torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128
+    pip3 uninstall -y torch torchvision
+    pip3 install torch==${TORCH_VERSION} torchvision --index-url https://download.pytorch.org/whl/cu130
 }
 
 case "$1" in

@@ -2,23 +2,20 @@
 
 set -ex
 
-TRT_VER="10.13.2.6"
+TRT_VER="10.13.3.9"
 # Align with the pre-installed cuDNN / cuBLAS / NCCL versions from
-# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-08.html#rel-25-08
-CUDA_VER="13.0" # 13.0.0
+# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-10.html#rel-25-10
+CUDA_VER="13.0" # 13.0.2
 # Keep the installation for cuDNN if users want to install PyTorch with source codes.
 # PyTorch 2.x can compile with cuDNN v9.
-CUDNN_VER="9.12.0.46-1"
-# NCCL version 2.26.x used in the NGC PyTorch 25.05 image but has a performance regression issue.
-# Use NCCL version 2.27.5 which has the fixes.
+CUDNN_VER="9.14.0.64-1"
 NCCL_VER="2.27.7-1+cuda13.0"
-# Use cuBLAS version 13.0.0.19 instead.
-CUBLAS_VER="13.0.0.19-1"
+CUBLAS_VER="13.1.0.3-1"
 # Align with the pre-installed CUDA / NVCC / NVRTC versions from
 # https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html
-NVRTC_VER="13.0.48-1"
-CUDA_RUNTIME="13.0.48-1"
-CUDA_DRIVER_VERSION="580.65.06-1.el8"
+NVRTC_VER="13.0.88-1"
+CUDA_RUNTIME="13.0.96-1"
+CUDA_DRIVER_VERSION="580.95.05-1.el8"
 
 for i in "$@"; do
     case $i in

@@ -147,11 +147,6 @@ check <https://github.com/NVIDIA/TensorRT-LLM/tree/main/docker>.
 
 ## Build TensorRT LLM
 
-```{tip}
-:name: build-from-source-tip-cuda-version
-TensorRT LLM 1.1 supports both CUDA 12.9 and 13.0 while some dependency changes are required. The `requirements.txt` contains dependencies needed by CUDA 13.0. If you are using CUDA 12.9, please uncomment lines end with `# <For CUDA 12.9>` and comment out the next lines.
-```
-
 ### Option 1: Full Build with C++ Compilation
 
 The following command compiles the C++ code and packages the compiled libraries along with the Python files into a wheel. When developing C++ code, you need this full build command to apply your code changes.

@@ -12,20 +12,13 @@
    Install CUDA Toolkit following the [CUDA Installation Guide for Linux](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/) and
    make sure `CUDA_HOME` environment variable is properly set.
 
-   ```{tip}
-   :name: installation-linux-tip-cuda-version
-   TensorRT LLM 1.1 supports both CUDA 12.9 and 13.0. The wheel package release only supports CUDA 12.9, while CUDA 13.0 is only supported through NGC container release.
-   ```
-
    ```bash
-   # Optional step: Only required for NVIDIA Blackwell GPUs and SBSA platform
-   pip3 install torch==2.7.1 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128
+   # By default, PyTorch CUDA 12.8 package is installed. Install PyTorch CUDA 13.0 package to align with the CUDA version used for building TensorRT LLM wheels.
+   pip3 install torch==2.9.0 torchvision --index-url https://download.pytorch.org/whl/cu130
 
    sudo apt-get -y install libopenmpi-dev
    ```
 
-   PyTorch CUDA 12.8 package is required for supporting NVIDIA Blackwell GPUs and SBSA platform. On prior GPUs or Linux x86_64 platform, this extra installation is not required.
-
    ```{tip}
    Instead of manually installing the preqrequisites as described
    above, it is also possible to use the pre-built [TensorRT LLM Develop container

@@ -152,7 +152,7 @@ The following table shows the supported software for TensorRT-LLM.
 * -
   - Software Compatibility
 * - Container
-  - [25.08](https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html)
+  - [25.10](https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html)
 * - TensorRT
   - [10.13](https://docs.nvidia.com/deeplearning/tensorrt/release-notes/index.html)
 * - Precision

@@ -16,9 +16,6 @@ AARCH64_TRIPLE = "aarch64-linux-gnu"
 
 LLM_DOCKER_IMAGE = env.dockerImage
 
-LLM_DOCKER_IMAGE_12_9 = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202509091430-7383"
-LLM_SBSA_DOCKER_IMAGE_12_9 = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-aarch64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202509091430-7383"
-
 // Always use x86_64 image for agent
 AGENT_IMAGE = env.dockerImage.replace("aarch64", "x86_64")
 
@@ -40,9 +37,6 @@ def BUILD_JOBS_FOR_CONFIG = "buildJobsForConfig"
 @Field
 def CONFIG_LINUX_X86_64_VANILLA = "linux_x86_64_Vanilla"
 
-@Field
-def CONFIG_LINUX_X86_64_VANILLA_CU12 = "linux_x86_64_Vanilla_CU12"
-
 @Field
 def CONFIG_LINUX_X86_64_SINGLE_DEVICE = "linux_x86_64_SingleDevice"
 
@@ -52,9 +46,6 @@ def CONFIG_LINUX_X86_64_LLVM = "linux_x86_64_LLVM"
 @Field
 def CONFIG_LINUX_AARCH64 = "linux_aarch64"
 
-@Field
-def CONFIG_LINUX_AARCH64_CU12 = "linux_aarch64_CU12"
-
 @Field
 def CONFIG_LINUX_AARCH64_LLVM = "linux_aarch64_LLVM"
 
@@ -73,11 +64,6 @@ def BUILD_CONFIGS = [
     (TARNAME) : "TensorRT-LLM.tar.gz",
     (WHEEL_ARCHS): "80-real;86-real;89-real;90-real;100-real;103-real;120-real",
   ],
-  (CONFIG_LINUX_X86_64_VANILLA_CU12) : [
-    (WHEEL_EXTRA_ARGS) : "--extra-cmake-vars ENABLE_MULTI_DEVICE=1 --extra-cmake-vars WARNING_IS_ERROR=ON --extra-cmake-vars NIXL_ROOT=/opt/nvidia/nvda_nixl --micro_benchmarks",
-    (TARNAME) : "TensorRT-LLM-CU12.tar.gz",
-    (WHEEL_ARCHS): "80-real;86-real;89-real;90-real;100-real;103-real;120-real",
-  ],
   (CONFIG_LINUX_X86_64_PYBIND) : [
     (WHEEL_EXTRA_ARGS) : "--binding_type pybind --extra-cmake-vars ENABLE_MULTI_DEVICE=1 --extra-cmake-vars WARNING_IS_ERROR=ON --extra-cmake-vars NIXL_ROOT=/opt/nvidia/nvda_nixl --micro_benchmarks",
     (TARNAME) : "pybind-TensorRT-LLM.tar.gz",
@@ -99,12 +85,6 @@ def BUILD_CONFIGS = [
     (WHEEL_ARCHS): "90-real;100-real;103-real;120-real",
     (BUILD_JOBS_FOR_CONFIG): "6",
   ],
-  (CONFIG_LINUX_AARCH64_CU12): [
-    (WHEEL_EXTRA_ARGS) : "--extra-cmake-vars WARNING_IS_ERROR=ON",
-    (TARNAME) : "TensorRT-LLM-GH200-CU12.tar.gz",
-    (WHEEL_ARCHS): "90-real;100-real;103-real;120-real",
-    (BUILD_JOBS_FOR_CONFIG): "6",
-  ],
   (CONFIG_LINUX_AARCH64_PYBIND): [
     (WHEEL_EXTRA_ARGS) : "--binding_type pybind --extra-cmake-vars WARNING_IS_ERROR=ON --extra-cmake-vars NIXL_ROOT=/opt/nvidia/nvda_nixl",
     (TARNAME) : "pybind-TensorRT-LLM-GH200.tar.gz",
@@ -454,9 +434,6 @@ def runLLMBuild(pipeline, buildFlags, tarName, is_linux_x86_64)
         pipArgs = ""
     }
 
-    if (tarName.contains("CU12")) {
-        trtllm_utils.llmExecStepWithRetry(pipeline, script: "cd ${LLM_ROOT} && sed -i '/^# .*<For CUDA 12\\.9>\$/ {s/^# //; n; s/^/# /}' requirements.txt && cat requirements.txt")
-    }
     // install python package
     trtllm_utils.llmExecStepWithRetry(pipeline, script: "cd ${LLM_ROOT} && pip3 install -r requirements-dev.txt ${pipArgs}")
 
@@ -477,10 +454,7 @@ def runLLMBuild(pipeline, buildFlags, tarName, is_linux_x86_64)
     def llmPath = sh (script: "realpath ${LLM_ROOT}",returnStdout: true).trim()
     // TODO: Remove after the cmake version is upgraded to 3.31.8
     // Get triton tag from docker/dockerfile.multi
-    def tritonShortTag = "r25.08"
-    if (tarName.contains("CU12")) {
-        tritonShortTag = "r25.06"
-    }
+    def tritonShortTag = "r25.09"
     sh "cd ${LLM_ROOT}/triton_backend/inflight_batcher_llm && mkdir build && cd build && cmake .. -DTRTLLM_DIR=${llmPath} -DTRITON_COMMON_REPO_TAG=${tritonShortTag} -DTRITON_CORE_REPO_TAG=${tritonShortTag} -DTRITON_THIRD_PARTY_REPO_TAG=${tritonShortTag} -DTRITON_BACKEND_REPO_TAG=${tritonShortTag} -DUSE_CXX11_ABI=ON && make -j${buildJobs} install"
 
     // Step 3: packaging wheels into tarfile
@@ -570,14 +544,9 @@ def launchStages(pipeline, cpu_arch, enableFailFast, globalVars)
         wheelDockerImage = env.dockerImage
     }
 
-    def LLM_DOCKER_IMAGE_CU12 = cpu_arch == AARCH64_TRIPLE ? LLM_SBSA_DOCKER_IMAGE_12_9 : LLM_DOCKER_IMAGE_12_9
-
     buildConfigs = [
         "Build TRT-LLM": [LLM_DOCKER_IMAGE] + prepareLLMBuild(
             pipeline, cpu_arch == AARCH64_TRIPLE ? CONFIG_LINUX_AARCH64 : CONFIG_LINUX_X86_64_VANILLA),
-        // Disable CUDA12 build for too slow to build (cost > 5 hours on SBSA)
-        "Build TRT-LLM CUDA12": [LLM_DOCKER_IMAGE_CU12] + prepareLLMBuild(
-            pipeline, cpu_arch == AARCH64_TRIPLE ? CONFIG_LINUX_AARCH64_CU12 : CONFIG_LINUX_X86_64_VANILLA_CU12),
         "Build TRT-LLM LLVM": [LLM_DOCKER_IMAGE] + prepareLLMBuild(
             pipeline, cpu_arch == AARCH64_TRIPLE ? CONFIG_LINUX_AARCH64_LLVM : CONFIG_LINUX_X86_64_LLVM),
         "Build TRT-LLM Pybind": [LLM_DOCKER_IMAGE] + prepareLLMBuild(