Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
6d7c72b
Update dependencies
EmmaQiaoCh Dec 9, 2025
c825b6e
Update tensorrt version
EmmaQiaoCh Dec 9, 2025
b1869de
update tensorrt version
EmmaQiaoCh Dec 9, 2025
b85dfb7
update tensorrt version
EmmaQiaoCh Dec 9, 2025
36dc36d
Correct cudnn version
EmmaQiaoCh Dec 9, 2025
f04b584
Update base and cudnn version
EmmaQiaoCh Dec 10, 2025
941ae8a
Update cuda base image since 13.0.2 is available
EmmaQiaoCh Dec 10, 2025
ae91628
Correct torch version in requirements
EmmaQiaoCh Dec 11, 2025
1f51d88
Fix a typo
EmmaQiaoCh Dec 11, 2025
4d4def4
Fix requirements conflict
EmmaQiaoCh Dec 11, 2025
f00d478
Add some degug info
EmmaQiaoCh Dec 12, 2025
eb21621
Fix compile error <sm90.
bobboli Dec 16, 2025
df9ebbe
Merge branch 'main' into emma/update_dependencies_1.2
EmmaQiaoCh Dec 16, 2025
e0da075
Missing semicolons.
bobboli Dec 17, 2025
7c43508
Update image tag to test
EmmaQiaoCh Dec 17, 2025
e11d5ba
Fix parsing split test list
EmmaQiaoCh Dec 18, 2025
57bea6a
Resolve the requirements conflicts
EmmaQiaoCh Dec 18, 2025
9a13d33
Fix devel image to kuse tritondevel
EmmaQiaoCh Dec 19, 2025
b342ed7
Update to latest candidate
EmmaQiaoCh Dec 22, 2025
ec526fc
Update build job to 8 since build wheel is so slow
EmmaQiaoCh Dec 22, 2025
66f380f
Update to public pytorch and cuda image
EmmaQiaoCh Dec 23, 2025
9c83a3c
Fix typo
EmmaQiaoCh Dec 23, 2025
e696736
Fix triton tag
EmmaQiaoCh Dec 23, 2025
8a2e383
Update to the new ci devel image
EmmaQiaoCh Dec 23, 2025
d936323
Update dlfw image in groovy
EmmaQiaoCh Dec 23, 2025
42d08f2
Update the torch and torchvision versions in sanity
EmmaQiaoCh Dec 24, 2025
6f3e085
Merge branch 'main' into emma/update_dependencies_1.2
EmmaQiaoCh Dec 24, 2025
98d8458
Waive failed cases
EmmaQiaoCh Dec 24, 2025
e127987
Merge branch 'main' into emma/update_dependencies_1.2
EmmaQiaoCh Dec 24, 2025
9fbe4c6
Fix for comment and update sbsa build process temporarily
EmmaQiaoCh Dec 25, 2025
c7537d6
Merge branch 'main' into emma/update_dependencies_1.2
EmmaQiaoCh Dec 25, 2025
3248684
Update to use public triton
EmmaQiaoCh Dec 30, 2025
bfe1e20
Merge branch 'main' into emma/update_dependencies_1.2
EmmaQiaoCh Dec 30, 2025
aabca98
Fix typo
EmmaQiaoCh Dec 30, 2025
cb0057a
Fix typo and update sanity test image
EmmaQiaoCh Dec 30, 2025
fc1e128
Fix dockerfile
EmmaQiaoCh Dec 31, 2025
894b1bd
Merge branch 'main' into emma/update_dependencies_1.2
EmmaQiaoCh Dec 31, 2025
bff85f4
Update image tags
EmmaQiaoCh Jan 1, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -493,7 +493,12 @@ __global__ void moeA2ADispatchKernel(int32_t const* token_selected_experts, // [
#if !DISABLE_SYNC_FOR_PROFILING
uint32_t expected_value = *ptrs.flag_val;

#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 900
// .acquire and .release qualifiers for fence instruction require sm_90 or higher.
asm volatile("fence.release.sys;");
#else
asm volatile("fence.acq_rel.sys;");
#endif
#pragma unroll 1 // No unroll as one iter is typically enough
for (int target_rank = lane_id; target_rank < ep_size; target_rank += warpSize)
{
Expand Down Expand Up @@ -525,7 +530,6 @@ __global__ void moeA2ADispatchKernel(int32_t const* token_selected_experts, // [
flag_set = flag_value == expected_value;
} while (!flag_set);
}
// asm volatile("fence.acquire.sys;");
#endif
}
}
Expand Down Expand Up @@ -1018,7 +1022,6 @@ __global__ void moeA2ACombineKernel(

if (blockIdx.x == 0)
{
// asm volatile("fence.release.sys;");
#pragma unroll 1 // No unroll
for (int peer_rank = lane_id; peer_rank < ep_size; peer_rank += warpSize)
{
Expand Down Expand Up @@ -1050,7 +1053,12 @@ __global__ void moeA2ACombineKernel(
flag_set = flag_value == expected_value;
} while (!flag_set);
}
#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 900
// .acquire and .release qualifiers for fence instruction require sm_90 or higher.
asm volatile("fence.acquire.sys;");
#else
asm volatile("fence.acq_rel.sys;");
#endif
}
__syncthreads();
#endif
Expand Down
5 changes: 3 additions & 2 deletions docker/Dockerfile.multi
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# Multi-stage Dockerfile
ARG BASE_IMAGE=nvcr.io/nvidia/pytorch
ARG TRITON_IMAGE=nvcr.io/nvidia/tritonserver
ARG BASE_TAG=25.10-py3
ARG TRITON_BASE_TAG=25.10-py3
ARG BASE_TAG=25.12-py3
ARG TRITON_BASE_TAG=25.12-py3
ARG DEVEL_IMAGE=devel

FROM ${BASE_IMAGE}:${BASE_TAG} AS base
Expand Down Expand Up @@ -147,6 +147,7 @@ RUN --mount=type=cache,target=/root/.cache/pip --mount=type=bind,from=wheel,sour
pip install /tmp/wheel/tensorrt_llm*.whl

COPY README.md ./
COPY --from=wheel /src/tensorrt_llm/build/tensorrt_llm*.whl ./
COPY docs docs
COPY cpp/include include

Expand Down
7 changes: 3 additions & 4 deletions docker/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -202,17 +202,16 @@ jenkins-rockylinux8_%: PYTHON_VERSION_TAG_ID = $(if $(findstring 3.12,${PYTHON_V
jenkins-rockylinux8_%: IMAGE_WITH_TAG = $(shell . ../jenkins/current_image_tags.properties && echo $$LLM_ROCKYLINUX8_${PYTHON_VERSION_TAG_ID}_DOCKER_IMAGE)
jenkins-rockylinux8_%: STAGE = tritondevel
jenkins-rockylinux8_%: BASE_IMAGE = nvcr.io/nvidia/cuda
# [TODO] Update to NVIDIA CUDA 13.0.2 when it's available
jenkins-rockylinux8_%: BASE_TAG = 13.0.1-devel-rockylinux8
jenkins-rockylinux8_%: BASE_TAG = 13.1.0-devel-rockylinux8

rockylinux8_%: STAGE = tritondevel
rockylinux8_%: BASE_IMAGE = nvcr.io/nvidia/cuda
rockylinux8_%: BASE_TAG = 13.0.1-devel-rockylinux8
rockylinux8_%: BASE_TAG = 13.1.0-devel-rockylinux8

# For x86_64 and aarch64
ubuntu22_%: STAGE = tritondevel
ubuntu22_%: BASE_IMAGE = nvcr.io/nvidia/cuda
ubuntu22_%: BASE_TAG = 13.0.1-devel-ubuntu22.04
ubuntu22_%: BASE_TAG = 13.1.0-devel-ubuntu22.04

trtllm_%: STAGE = release
trtllm_%: PUSH_TO_STAGING := 0
Expand Down
2 changes: 1 addition & 1 deletion docker/common/install_cuda_toolkit.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ set -ex
# This script is used for reinstalling CUDA on Rocky Linux 8 with the run file.
# CUDA version is usually aligned with the latest NGC CUDA image tag.
# Only use when public CUDA image is not ready.
CUDA_VER="13.0.2_580.95.05"
CUDA_VER="13.1.0_590.44.01"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

🧩 Analysis chain

🌐 Web query:

CUDA 13.1.0 driver 590.44.01 NVIDIA availability compatibility

💡 Result:

Short answer: Yes — NVIDIA driver 590.44.01 (beta, released Dec 2, 2025) is a newer driver branch than the R580 baseline required for CUDA 13.x, so it should be compatible with CUDA 13.1.0. (docs.nvidia.com)

Notes:

  • CUDA 13.x requires a driver >= the R580 series (minimum driver ~580.xx); any later driver branch (including 590) is backward‑compatible for running CUDA 13.x applications. (docs.nvidia.com)
  • 590.44.01 is a Linux beta release (Dec 2, 2025) and its release notes / community reports indicate changes to supported hardware (some older GTX 9/10-series behavior on Linux). If you use older Maxwell/Pascal GPUs, test carefully before upgrading. (nvidia.com)

If you want, I can: (a) check the exact CUDA 13.1 toolkit release notes for any additional driver-minimums, or (b) verify whether a specific GPU model you have is listed as supported with the 590.44.01 driver.

Citations:


CUDA 13.1.0 and driver 590.44.01 are compatible and available.

Both versions exist and are legitimate NVIDIA releases. CUDA 13.x requires driver >= R580 series, and driver 590.44.01 is backward-compatible. However, note that 590.44.01 is a beta driver (released Dec 2, 2025) with hardware compatibility changes for older Maxwell/Pascal GPUs—test thoroughly before upgrading on systems using legacy GPU models.

🤖 Prompt for AI Agents
In docker/common/install_cuda_toolkit.sh around line 8, the script sets
CUDA_VER="13.1.0_590.44.01" but lacks documentation about driver compatibility
and the fact that 590.44.01 is a beta release with potential hardware impacts;
add an inline comment above this variable stating that CUDA 13.1 requires driver
R580+ and that driver 590.44.01 is a beta release (Dec 2, 2025) which may affect
older Maxwell/Pascal GPUs and should be tested on legacy hardware before
deployment, so operators are warned to validate hardware compatibility and
consider pinning a stable driver if needed.

CUDA_VER_SHORT="${CUDA_VER%_*}"

NVCC_VERSION_OUTPUT=$(nvcc --version)
Expand Down
2 changes: 1 addition & 1 deletion docker/common/install_polygraphy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ set -ex
if [ -n "${GITHUB_MIRROR}" ]; then
export PIP_INDEX_URL="https://urm.nvidia.com/artifactory/api/pypi/pypi-remote/simple"
fi
pip3 install polygraphy==0.49.9
pip3 install polygraphy==0.49.26

# Clean up pip cache and temporary files
pip3 cache purge
Expand Down
2 changes: 1 addition & 1 deletion docker/common/install_pytorch.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ set -ex
# Use latest stable version from https://pypi.org/project/torch/#history
# and closest to the version specified in
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-10.html#rel-25-10
TORCH_VERSION="2.9.0"
TORCH_VERSION="2.9.1"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

🧩 Analysis chain

🌐 Web query:

PyTorch 2.9.1 PyPI availability CUDA 13.1 compatibility

💡 Result:

Short answer: Yes — PyTorch 2.9.1 is published to PyPI (release files uploaded Nov 12, 2025). Official prebuilt CUDA wheels for 2.9.1 on download.pytorch.org are provided for CUDA 13.0 (cu130) — CUDA 13.1 / cu131 is not listed as an official wheel. [1][2][3]

Sources:

  • PyTorch 2.9.1 GA announcement (promotions to PyPI/download done). [1]
  • PyTorch wheel index showing torch-2.9.1+cu130 files. [2]
  • torch (PyPI) release files (torch‑2.9.1 upload entries). [3]

Address CUDA 13.1 compatibility gap for PyTorch 2.9.1.

PyTorch 2.9.1 is available on PyPI but official prebuilt wheels are only provided for CUDA 13.0 (cu130). CUDA 13.1 (cu131) is not officially supported. If this script targets CUDA 13.1, either use the cu130 wheel (with caution), downgrade to a version with CUDA 13.1 support, or add a note documenting this limitation.

🤖 Prompt for AI Agents
docker/common/install_pytorch.sh around line 8: TORCH_VERSION is set to "2.9.1"
but PyTorch 2.9.1 does not have official cu131 wheels; update the script to
explicitly pick a compatible CUDA wheel or document the limitation. Either (A)
set TORCH_VERSION and the corresponding CUDA tag to an officially supported
pairing (e.g., keep 2.9.1 but force the cu130 wheel tag when installing), or (B)
change TORCH_VERSION to a release that officially supports CUDA 13.1, and add a
short comment above the variable explaining which CUDA tag is targeted and that
cu131 has no official 2.9.1 wheel. Ensure the install command uses the matching
CUDA wheel tag and add the comment so future maintainers know the compatibility
choice.

SYSTEM_ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')

prepare_environment() {
Expand Down
23 changes: 14 additions & 9 deletions docker/common/install_tensorrt.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,20 @@

set -ex

TRT_VER="10.13.3.9"
TRT_VER="10.14.1.48"
# Align with the pre-installed cuDNN / cuBLAS / NCCL versions from
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-10.html#rel-25-10
CUDA_VER="13.0" # 13.0.2
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-12.html#rel-25-12
CUDA_VER="13.1" # 13.1.0
# Keep the installation for cuDNN if users want to install PyTorch with source codes.
# PyTorch 2.x can compile with cuDNN v9.
CUDNN_VER="9.14.0.64-1"
NCCL_VER="2.27.7-1+cuda13.0"
CUBLAS_VER="13.1.0.3-1"
CUDNN_VER="9.17.0.29-1"
NCCL_VER="2.28.9-1+cuda13.0"
CUBLAS_VER="13.2.0.9-1"
# Align with the pre-installed CUDA / NVCC / NVRTC versions from
# https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html
NVRTC_VER="13.0.88-1"
CUDA_RUNTIME="13.0.96-1"
CUDA_DRIVER_VERSION="580.95.05-1.el8"
NVRTC_VER="13.1.80-1"
CUDA_RUNTIME="13.1.80-1"
CUDA_DRIVER_VERSION="590.44.01-1.el8"

for i in "$@"; do
case $i in
Expand Down Expand Up @@ -118,7 +118,12 @@ install_rockylinux_requirements() {
install_tensorrt() {
PY_VERSION=$(python3 -c 'import sys; print(".".join(map(str, sys.version_info[0:2])))')
PARSED_PY_VERSION=$(echo "${PY_VERSION//./}")

TRT_CUDA_VERSION=${CUDA_VER}
# No cuda-13.1 version for TensorRT yet, use cuda-13.0 instead
if [ "$CUDA_VER" = "13.1" ]; then
TRT_CUDA_VERSION="13.0"
fi
TRT_VER_SHORT=$(echo $TRT_VER | cut -d. -f1-3)

if [ -z "$RELEASE_URL_TRT" ];then
Expand Down
6 changes: 3 additions & 3 deletions jenkins/Build.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -83,19 +83,19 @@ def BUILD_CONFIGS = [
(WHEEL_EXTRA_ARGS) : "--extra-cmake-vars WARNING_IS_ERROR=ON --extra-cmake-vars NIXL_ROOT=/opt/nvidia/nvda_nixl --extra-cmake-vars MOONCAKE_ROOT=/usr/local/Mooncake",
(TARNAME) : "TensorRT-LLM-GH200.tar.gz",
(WHEEL_ARCHS): "90-real;100-real;103-real;120-real",
(BUILD_JOBS_FOR_CONFIG): "4", // TODO: Remove after fix the build OOM issue on SBSA
(BUILD_JOBS_FOR_CONFIG): "8", // TODO: Remove after fix the build OOM issue on SBSA
],
(CONFIG_LINUX_AARCH64_PYBIND): [
(WHEEL_EXTRA_ARGS) : "--binding_type pybind --extra-cmake-vars WARNING_IS_ERROR=ON --extra-cmake-vars NIXL_ROOT=/opt/nvidia/nvda_nixl --extra-cmake-vars MOONCAKE_ROOT=/usr/local/Mooncake",
(TARNAME) : "pybind-TensorRT-LLM-GH200.tar.gz",
(WHEEL_ARCHS): "90-real;100-real;103-real;120-real",
(BUILD_JOBS_FOR_CONFIG): "4", // TODO: Remove after fix the build OOM issue on SBSA
(BUILD_JOBS_FOR_CONFIG): "8", // TODO: Remove after fix the build OOM issue on SBSA
],
(CONFIG_LINUX_AARCH64_LLVM) : [
(WHEEL_EXTRA_ARGS) : "--extra-cmake-vars WARNING_IS_ERROR=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_CUDA_HOST_COMPILER=clang -DCMAKE_LINKER_TYPE=LLD",
(TARNAME) : "llvm-TensorRT-LLM-GH200.tar.gz",
(WHEEL_ARCHS): "90-real;100-real;103-real;120-real",
(BUILD_JOBS_FOR_CONFIG): "4", // TODO: Remove after fix the build OOM issue on SBSA
(BUILD_JOBS_FOR_CONFIG): "8", // TODO: Remove after fix the build OOM issue on SBSA
],
]

Expand Down
6 changes: 4 additions & 2 deletions jenkins/L0_MergeRequest.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -1092,7 +1092,8 @@ def launchStages(pipeline, reuseBuild, testFilter, enableFailFast, globalVars)
}

if (singleGpuTestFailed) {
if (env.JOB_NAME ==~ /.*PostMerge.*/) {
//if (env.JOB_NAME ==~ /.*PostMerge.*/) {
if (testFilter[(IS_POST_MERGE)] || env.JOB_NAME ==~ /.*PostMerge.*/) {
echo "In the official post-merge pipeline, x86_64 single-GPU test failed, whereas multi-GPU test is still kept running."
} else {
stage("[Test-x86_64-Multi-GPU] Blocked") {
Expand Down Expand Up @@ -1196,7 +1197,8 @@ def launchStages(pipeline, reuseBuild, testFilter, enableFailFast, globalVars)
}

if (singleGpuTestFailed) {
if (env.JOB_NAME ==~ /.*PostMerge.*/) {
// if (env.JOB_NAME ==~ /.*PostMerge.*/) {
if (testFilter[(IS_POST_MERGE)] || env.JOB_NAME ==~ /.*PostMerge.*/) {
echo "In the official post-merge pipeline, SBSA single-GPU test failed, whereas multi-GPU test is still kept running."
} else {
stage("[Test-SBSA-Multi-GPU] Blocked") {
Expand Down
19 changes: 12 additions & 7 deletions jenkins/L0_Test.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE = env.wheelDockerImagePy310
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE = env.wheelDockerImagePy312

// DLFW torch image
DLFW_IMAGE = "urm.nvidia.com/docker/nvidia/pytorch:25.10-py3"
DLFW_IMAGE = "urm.nvidia.com/docker/nvidia/pytorch:25.12-py3"

//Ubuntu base image
UBUNTU_22_04_IMAGE = "urm.nvidia.com/docker/ubuntu:22.04"
Expand Down Expand Up @@ -83,7 +83,7 @@ BUILD_CORES_REQUEST = "8"
BUILD_CORES_LIMIT = "8"
BUILD_MEMORY_REQUEST = "48Gi"
BUILD_MEMORY_LIMIT = "96Gi"
BUILD_JOBS = "4"
BUILD_JOBS = "8"

SLURM_CORES_REQUEST = "1"
SLURM_CORES_LIMIT = "1"
Expand Down Expand Up @@ -316,6 +316,11 @@ def processShardTestList(llmSrc, testDBList, splitId, splits, perfMode=false) {
foundRunningLine = true
return false // Don't include the "Running" line itself
}
// Stop collecting when we hit the warnings/errors summary separator
if (foundRunningLine && line.contains('======================')) {
foundRunningLine = false // Stop collecting
return false
}

def hasDoubleColon = line.contains('::')
def shouldInclude = foundRunningLine && hasDoubleColon
Expand Down Expand Up @@ -3329,7 +3334,7 @@ def launchTestJobs(pipeline, testFilter)
// Python version and OS for sanity check
x86SanityCheckConfigs = [
"PY312-DLFW": [
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE,
DLFW_IMAGE,
"B200_PCIe",
X86_64_TRIPLE,
false,
Expand Down Expand Up @@ -3364,7 +3369,7 @@ def launchTestJobs(pipeline, testFilter)
AARCH64_TRIPLE,
false,
"",
UBUNTU_24_04_IMAGE,
DLFW_IMAGE,
true, // Extra PyTorch CUDA 13.0 install
],
"PY312-DLFW": [
Expand Down Expand Up @@ -3464,17 +3469,17 @@ def launchTestJobs(pipeline, testFilter)
def platform = cpu_arch == X86_64_TRIPLE ? "x86_64" : "sbsa"
trtllm_utils.llmExecStepWithRetry(pipeline, script: "wget https://developer.download.nvidia.com/compute/cuda/repos/${ubuntu_version}/${platform}/cuda-keyring_1.1-1_all.deb")
trtllm_utils.llmExecStepWithRetry(pipeline, script: "dpkg -i cuda-keyring_1.1-1_all.deb")
trtllm_utils.llmExecStepWithRetry(pipeline, script: "apt-get update && apt-get install -y cuda-toolkit-13-0")
trtllm_utils.llmExecStepWithRetry(pipeline, script: "apt-get update && apt-get install -y cuda-toolkit-13-1")
}
// Extra PyTorch CUDA 13.0 install for all bare-metal environments (Default PyTorch is for CUDA 12.8)
if (values[6]) {
echo "###### Extra PyTorch CUDA 13.0 install Start ######"
// Use internal mirror instead of https://download.pytorch.org/whl/cu130 for better network stability.
// PyTorch CUDA 13.0 package and torchvision package can be installed as expected.
if (k8s_arch == "amd64") {
trtllm_utils.llmExecStepWithRetry(pipeline, script: "pip3 install torch==2.9.0+cu130 torchvision==0.24.0+cu130 --extra-index-url https://urm.nvidia.com/artifactory/api/pypi/pytorch-cu128-remote/simple")
trtllm_utils.llmExecStepWithRetry(pipeline, script: "pip3 install torch==2.9.1+cu130 torchvision==0.24.1+cu130 --extra-index-url https://urm.nvidia.com/artifactory/api/pypi/pytorch-cu128-remote/simple")
} else {
trtllm_utils.llmExecStepWithRetry(pipeline, script: "pip3 install torch==2.9.0+cu130 torchvision==0.24.0 --extra-index-url https://urm.nvidia.com/artifactory/api/pypi/pytorch-cu128-remote/simple")
trtllm_utils.llmExecStepWithRetry(pipeline, script: "pip3 install torch==2.9.1+cu130 torchvision==0.24.1 --extra-index-url https://urm.nvidia.com/artifactory/api/pypi/pytorch-cu128-remote/simple")
}
}

Expand Down
8 changes: 4 additions & 4 deletions jenkins/current_image_tags.properties
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# images are adopted from PostMerge pipelines, the abbreviated commit hash is used instead.
IMAGE_NAME=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm

LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.10-py3-x86_64-ubuntu24.04-trt10.13.3.9-skip-tritondevel-202512241744-10055
LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.10-py3-aarch64-ubuntu24.04-trt10.13.3.9-skip-tritondevel-202512241744-10055
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.0.2-devel-rocky8-x86_64-rocky8-py310-trt10.13.3.9-skip-tritondevel-202512241744-10055
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.0.2-devel-rocky8-x86_64-rocky8-py312-trt10.13.3.9-skip-tritondevel-202512241744-10055
LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.12-py3-x86_64-ubuntu24.04-trt10.14.1.48-skip-tritondevel-202601011103-9818
LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.12-py3-aarch64-ubuntu24.04-trt10.14.1.48-skip-tritondevel-202601011103-9818
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.1.0-devel-rocky8-x86_64-rocky8-py310-trt10.14.1.48-skip-tritondevel-202601011103-9818
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.1.0-devel-rocky8-x86_64-rocky8-py312-trt10.14.1.48-skip-tritondevel-202601011103-9818
13 changes: 7 additions & 6 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,14 @@ pandas
h5py==3.12.1
StrEnum
sentencepiece>=0.1.99
tensorrt~=10.13.3
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-10.html#rel-25-10 uses 2.9.0a0.
torch>=2.9.0a0,<=2.9.0
tensorrt~=10.14.1
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-12.html#rel-25-12 uses 2.10.0a0.
torch>=2.9.1,<=2.10.0a0
torchvision
nvidia-modelopt[torch]~=0.37.0
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-10.html#rel-25-10 uses 2.27.7
nvidia-nccl-cu13==2.27.7
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-12.html#rel-25-12 uses 2.28.9
# torch 2.9.1+cu130 depends on nvidia-nccl-cu13==2.27.7; platform_system == "Linux"
nvidia-nccl-cu13>=2.27.7,<=2.28.9
nvidia-cuda-nvrtc
transformers==4.57.1
prometheus_client
Expand Down Expand Up @@ -65,7 +66,7 @@ ninja
etcd3 @ git+https://github.com/kragniz/python-etcd3.git@e58a899579ba416449c4e225b61f039457c8072a
blake3
soundfile
triton==3.5.0
triton==3.5.1
tiktoken
blobfile
openai-harmony==0.0.4
Expand Down
Loading
Loading