Skip to content

Commit 6207953

Browse files
committed
upgrade DLFW pytorch 25.10 + tritonserver 25.09
Signed-off-by: ZhanruiSunCh <184402041+ZhanruiSunCh@users.noreply.github.com>
1 parent 6adccd7 commit 6207953

File tree

13 files changed

+67
-183
lines changed

13 files changed

+67
-183
lines changed

constraints.txt

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,2 @@
11
# These vulnerabilities were inherited from the base image (pytorch:25.06-py3) and should be removed when the base image
22
# is updated.
3-
4-
# WAR against https://github.com/advisories/GHSA-8qvm-5x2c-j2w7
5-
protobuf>=4.25.8

docker/Dockerfile.multi

Lines changed: 3 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
# Multi-stage Dockerfile
22
ARG BASE_IMAGE=nvcr.io/nvidia/pytorch
33
ARG TRITON_IMAGE=nvcr.io/nvidia/tritonserver
4-
ARG BASE_TAG=25.08-py3
5-
ARG TRITON_BASE_TAG=25.08-py3
4+
ARG BASE_TAG=25.10-py3
5+
ARG TRITON_BASE_TAG=25.09-py3
66
ARG DEVEL_IMAGE=devel
77

88
FROM ${BASE_IMAGE}:${BASE_TAG} AS base
@@ -71,26 +71,7 @@ RUN GITHUB_MIRROR=${GITHUB_MIRROR} bash ./install.sh --mpi4py && rm install_mpi4
7171
ARG TORCH_INSTALL_TYPE="skip"
7272
RUN TORCH_INSTALL_TYPE=${TORCH_INSTALL_TYPE} bash ./install.sh --pytorch && rm install_pytorch.sh
7373

74-
RUN bash ./install.sh --opencv && bash ./install.sh --protobuf && rm install.sh
75-
76-
# wait for new triton to be published
77-
# Rename pytorch_triton package to triton
78-
RUN if [ -f /etc/redhat-release ]; then \
79-
echo "Rocky8 detected, skipping symlink and ldconfig steps"; \
80-
else \
81-
cd /usr/local/lib/python3.12/dist-packages/ && \
82-
ls -la | grep pytorch_triton && \
83-
mv pytorch_triton-3.3.1+gitc8757738.dist-info triton-3.3.1+gitc8757738.dist-info && \
84-
cd triton-3.3.1+gitc8757738.dist-info && \
85-
echo "Current directory: $(pwd)" && \
86-
echo "Files in directory:" && \
87-
ls -la && \
88-
sed -i 's/^Name: pytorch-triton/Name: triton/' METADATA && \
89-
sed -i 's|pytorch_triton-3.3.1+gitc8757738.dist-info/|triton-3.3.1+gitc8757738.dist-info/|g' RECORD && \
90-
echo "METADATA after update:" && \
91-
grep "^Name:" METADATA; \
92-
fi
93-
74+
RUN bash ./install.sh --opencv && rm install.sh
9475

9576
FROM ${TRITON_IMAGE}:${TRITON_BASE_TAG} AS triton
9677

docker/Makefile

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -192,16 +192,16 @@ jenkins-rockylinux8_%: PYTHON_VERSION_TAG_ID = $(if $(findstring 3.12,${PYTHON_V
192192
jenkins-rockylinux8_%: IMAGE_WITH_TAG = $(shell . ../jenkins/current_image_tags.properties && echo $$LLM_ROCKYLINUX8_${PYTHON_VERSION_TAG_ID}_DOCKER_IMAGE)
193193
jenkins-rockylinux8_%: STAGE = tritondevel
194194
jenkins-rockylinux8_%: BASE_IMAGE = nvcr.io/nvidia/cuda
195-
jenkins-rockylinux8_%: BASE_TAG = 13.0.0-devel-rockylinux8
195+
jenkins-rockylinux8_%: BASE_TAG = 13.0.1-devel-rockylinux8
196196

197197
rockylinux8_%: STAGE = tritondevel
198198
rockylinux8_%: BASE_IMAGE = nvcr.io/nvidia/cuda
199-
rockylinux8_%: BASE_TAG = 13.0.0-devel-rockylinux8
199+
rockylinux8_%: BASE_TAG = 13.0.1-devel-rockylinux8
200200

201201
# For x86_64 and aarch64
202202
ubuntu22_%: STAGE = tritondevel
203203
ubuntu22_%: BASE_IMAGE = nvcr.io/nvidia/cuda
204-
ubuntu22_%: BASE_TAG = 13.0.0-devel-ubuntu22.04
204+
ubuntu22_%: BASE_TAG = 13.0.1-devel-ubuntu22.04
205205

206206
trtllm_%: STAGE = release
207207
trtllm_%: PUSH_TO_STAGING := 0

docker/common/install.sh

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ polygraphy=0
1616
mpi4py=0
1717
pytorch=0
1818
opencv=0
19-
protobuf=0
2019

2120
while [[ $# -gt 0 ]]; do
2221
case $1 in
@@ -56,10 +55,6 @@ while [[ $# -gt 0 ]]; do
5655
opencv=1
5756
shift 1
5857
;;
59-
--protobuf)
60-
protobuf=1
61-
shift 1
62-
;;
6358
--all)
6459
base=1
6560
cmake=1
@@ -70,7 +65,6 @@ while [[ $# -gt 0 ]]; do
7065
mpi4py=1
7166
pytorch=1
7267
opencv=1
73-
protobuf=1
7468
shift 1
7569
;;
7670
*)
@@ -135,10 +129,3 @@ if [ $opencv -eq 1 ]; then
135129
rm -rf /usr/local/lib/python3*/dist-packages/cv2/
136130
pip3 install opencv-python-headless --force-reinstall --no-deps --no-cache-dir
137131
fi
138-
139-
# WARs against security issues inherited from pytorch:25.06
140-
# * https://github.com/advisories/GHSA-8qvm-5x2c-j2w7
141-
if [ $protobuf -eq 1 ]; then
142-
pip3 install --upgrade --no-cache-dir \
143-
"protobuf>=4.25.8"
144-
fi

docker/common/install_cuda_toolkit.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ set -ex
55
# This script is used for reinstalling CUDA on Rocky Linux 8 with the run file.
66
# CUDA version is usually aligned with the latest NGC CUDA image tag.
77
# Only use when public CUDA image is not ready.
8-
CUDA_VER="13.0.0_580.65.06"
8+
CUDA_VER="13.0.2_580.95.05"
99
CUDA_VER_SHORT="${CUDA_VER%_*}"
1010

1111
NVCC_VERSION_OUTPUT=$(nvcc --version)

docker/common/install_mpi4py.sh

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,12 +27,15 @@ diff --git a/src/mpi4py/futures/_lib.py b/src/mpi4py/futures/_lib.py
2727
index f14934d1..eebfb8fc 100644
2828
--- a/src/mpi4py/futures/_lib.py
2929
+++ b/src/mpi4py/futures/_lib.py
30-
@@ -278,6 +278,40 @@ def _manager_comm(pool, options, comm, full=True):
30+
@@ -278,6 +278,43 @@ def _manager_comm(pool, options, comm, full=True):
3131
3232
3333
def _manager_split(pool, options, comm, root):
3434
+ if(os.getenv("TRTLLM_USE_MPI_KVCACHE")=="1"):
35-
+ from cuda import cudart
35+
+ try:
36+
+ from cuda.bindings import runtime as cudart
37+
+ except ImportError:
38+
+ from cuda import cudart
3639
+ has_slurm_rank=False
3740
+ has_ompi_rank=False
3841
+ slurm_rank=0

docker/common/install_pytorch.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@ set -ex
44

55
# Use latest stable version from https://pypi.org/project/torch/#history
66
# and closest to the version specified in
7-
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-08.html#rel-25-08
8-
TORCH_VERSION="2.8.0"
7+
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-10.html#rel-25-10
8+
TORCH_VERSION="2.9.0"
99
SYSTEM_ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
1010

1111
prepare_environment() {

docker/common/install_tensorrt.sh

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,23 +2,23 @@
22

33
set -ex
44

5-
TRT_VER="10.13.2.6"
5+
TRT_VER="10.13.3.9"
66
# Align with the pre-installed cuDNN / cuBLAS / NCCL versions from
7-
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-08.html#rel-25-08
8-
CUDA_VER="13.0" # 13.0.0
7+
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-10.html#rel-25-10
8+
CUDA_VER="13.0" # 13.0.2
99
# Keep the installation for cuDNN if users want to install PyTorch with source codes.
1010
# PyTorch 2.x can compile with cuDNN v9.
11-
CUDNN_VER="9.12.0.46-1"
11+
CUDNN_VER="9.14.0.64-1"
1212
# NCCL version 2.26.x used in the NGC PyTorch 25.05 image but has a performance regression issue.
1313
# Use NCCL version 2.27.5 which has the fixes.
1414
NCCL_VER="2.27.7-1+cuda13.0"
15-
# Use cuBLAS version 13.0.0.19 instead.
16-
CUBLAS_VER="13.0.0.19-1"
15+
# Use cuBLAS version 13.1.0.3 instead.
16+
CUBLAS_VER="13.1.0.3-1"
1717
# Align with the pre-installed CUDA / NVCC / NVRTC versions from
1818
# https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html
19-
NVRTC_VER="13.0.48-1"
20-
CUDA_RUNTIME="13.0.48-1"
21-
CUDA_DRIVER_VERSION="580.65.06-1.el8"
19+
NVRTC_VER="13.0.88-1"
20+
CUDA_RUNTIME="13.0.96-1"
21+
CUDA_DRIVER_VERSION="580.95.05-1.el8"
2222

2323
for i in "$@"; do
2424
case $i in

jenkins/Build.groovy

Lines changed: 1 addition & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,6 @@ AARCH64_TRIPLE = "aarch64-linux-gnu"
1616

1717
LLM_DOCKER_IMAGE = env.dockerImage
1818

19-
LLM_DOCKER_IMAGE_12_9 = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202509091430-7383"
20-
LLM_SBSA_DOCKER_IMAGE_12_9 = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-aarch64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202509091430-7383"
21-
2219
// Always use x86_64 image for agent
2320
AGENT_IMAGE = env.dockerImage.replace("aarch64", "x86_64")
2421

@@ -40,9 +37,6 @@ def BUILD_JOBS_FOR_CONFIG = "buildJobsForConfig"
4037
@Field
4138
def CONFIG_LINUX_X86_64_VANILLA = "linux_x86_64_Vanilla"
4239

43-
@Field
44-
def CONFIG_LINUX_X86_64_VANILLA_CU12 = "linux_x86_64_Vanilla_CU12"
45-
4640
@Field
4741
def CONFIG_LINUX_X86_64_SINGLE_DEVICE = "linux_x86_64_SingleDevice"
4842

@@ -52,9 +46,6 @@ def CONFIG_LINUX_X86_64_LLVM = "linux_x86_64_LLVM"
5246
@Field
5347
def CONFIG_LINUX_AARCH64 = "linux_aarch64"
5448

55-
@Field
56-
def CONFIG_LINUX_AARCH64_CU12 = "linux_aarch64_CU12"
57-
5849
@Field
5950
def CONFIG_LINUX_AARCH64_LLVM = "linux_aarch64_LLVM"
6051

@@ -73,11 +64,6 @@ def BUILD_CONFIGS = [
7364
(TARNAME) : "TensorRT-LLM.tar.gz",
7465
(WHEEL_ARCHS): "80-real;86-real;89-real;90-real;100-real;103-real;120-real",
7566
],
76-
(CONFIG_LINUX_X86_64_VANILLA_CU12) : [
77-
(WHEEL_EXTRA_ARGS) : "--extra-cmake-vars ENABLE_MULTI_DEVICE=1 --extra-cmake-vars WARNING_IS_ERROR=ON --extra-cmake-vars NIXL_ROOT=/opt/nvidia/nvda_nixl --micro_benchmarks",
78-
(TARNAME) : "TensorRT-LLM-CU12.tar.gz",
79-
(WHEEL_ARCHS): "80-real;86-real;89-real;90-real;100-real;103-real;120-real",
80-
],
8167
(CONFIG_LINUX_X86_64_PYBIND) : [
8268
(WHEEL_EXTRA_ARGS) : "--binding_type pybind --extra-cmake-vars ENABLE_MULTI_DEVICE=1 --extra-cmake-vars WARNING_IS_ERROR=ON --extra-cmake-vars NIXL_ROOT=/opt/nvidia/nvda_nixl --micro_benchmarks",
8369
(TARNAME) : "pybind-TensorRT-LLM.tar.gz",
@@ -99,12 +85,6 @@ def BUILD_CONFIGS = [
9985
(WHEEL_ARCHS): "90-real;100-real;103-real;120-real",
10086
(BUILD_JOBS_FOR_CONFIG): "6",
10187
],
102-
(CONFIG_LINUX_AARCH64_CU12): [
103-
(WHEEL_EXTRA_ARGS) : "--extra-cmake-vars WARNING_IS_ERROR=ON",
104-
(TARNAME) : "TensorRT-LLM-GH200-CU12.tar.gz",
105-
(WHEEL_ARCHS): "90-real;100-real;103-real;120-real",
106-
(BUILD_JOBS_FOR_CONFIG): "6",
107-
],
10888
(CONFIG_LINUX_AARCH64_PYBIND): [
10989
(WHEEL_EXTRA_ARGS) : "--binding_type pybind --extra-cmake-vars WARNING_IS_ERROR=ON --extra-cmake-vars NIXL_ROOT=/opt/nvidia/nvda_nixl",
11090
(TARNAME) : "pybind-TensorRT-LLM-GH200.tar.gz",
@@ -454,9 +434,6 @@ def runLLMBuild(pipeline, buildFlags, tarName, is_linux_x86_64)
454434
pipArgs = ""
455435
}
456436

457-
if (tarName.contains("_CU12")) {
458-
trtllm_utils.llmExecStepWithRetry(pipeline, script: "cd ${LLM_ROOT} && sed -i '/^# .*<For CUDA 12\\.9>\$/ {s/^# //; n; s/^/# /}' requirements.txt && cat requirements.txt")
459-
}
460437
// install python package
461438
trtllm_utils.llmExecStepWithRetry(pipeline, script: "cd ${LLM_ROOT} && pip3 install -r requirements-dev.txt ${pipArgs}")
462439

@@ -477,10 +454,7 @@ def runLLMBuild(pipeline, buildFlags, tarName, is_linux_x86_64)
477454
def llmPath = sh (script: "realpath ${LLM_ROOT}",returnStdout: true).trim()
478455
// TODO: Remove after the cmake version is upgraded to 3.31.8
479456
// Get triton tag from docker/dockerfile.multi
480-
def tritonShortTag = "r25.08"
481-
if (tarName.contains("CU12")) {
482-
tritonShortTag = "r25.06"
483-
}
457+
def tritonShortTag = "r25.09"
484458
sh "cd ${LLM_ROOT}/triton_backend/inflight_batcher_llm && mkdir build && cd build && cmake .. -DTRTLLM_DIR=${llmPath} -DTRITON_COMMON_REPO_TAG=${tritonShortTag} -DTRITON_CORE_REPO_TAG=${tritonShortTag} -DTRITON_THIRD_PARTY_REPO_TAG=${tritonShortTag} -DTRITON_BACKEND_REPO_TAG=${tritonShortTag} -DUSE_CXX11_ABI=ON && make -j${buildJobs} install"
485459

486460
// Step 3: packaging wheels into tarfile
@@ -570,14 +544,9 @@ def launchStages(pipeline, cpu_arch, enableFailFast, globalVars)
570544
wheelDockerImage = env.dockerImage
571545
}
572546

573-
def LLM_DOCKER_IMAGE_CU12 = cpu_arch == AARCH64_TRIPLE ? LLM_SBSA_DOCKER_IMAGE_12_9 : LLM_DOCKER_IMAGE_12_9
574-
575547
buildConfigs = [
576548
"Build TRT-LLM": [LLM_DOCKER_IMAGE] + prepareLLMBuild(
577549
pipeline, cpu_arch == AARCH64_TRIPLE ? CONFIG_LINUX_AARCH64 : CONFIG_LINUX_X86_64_VANILLA),
578-
// Disable CUDA12 build for too slow to build (cost > 5 hours on SBSA)
579-
"Build TRT-LLM CUDA12": [LLM_DOCKER_IMAGE_CU12] + prepareLLMBuild(
580-
pipeline, cpu_arch == AARCH64_TRIPLE ? CONFIG_LINUX_AARCH64_CU12 : CONFIG_LINUX_X86_64_VANILLA_CU12),
581550
"Build TRT-LLM LLVM": [LLM_DOCKER_IMAGE] + prepareLLMBuild(
582551
pipeline, cpu_arch == AARCH64_TRIPLE ? CONFIG_LINUX_AARCH64_LLVM : CONFIG_LINUX_X86_64_LLVM),
583552
"Build TRT-LLM Pybind": [LLM_DOCKER_IMAGE] + prepareLLMBuild(

0 commit comments

Comments
 (0)