From 9e4f6c593a94b4418bbddfc7b3b3c99e8ac43f9c Mon Sep 17 00:00:00 2001 From: Bo Deng Date: Tue, 11 Nov 2025 02:37:51 +0000 Subject: [PATCH 1/6] [TRTLLM-9247][infra] Upgrade NIXL to 0.7.1 Signed-off-by: Bo Deng --- docker/common/install_nixl.sh | 2 +- docker/common/install_ucx.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/common/install_nixl.sh b/docker/common/install_nixl.sh index 2bf03ca4b7a..b7ee249c1f8 100644 --- a/docker/common/install_nixl.sh +++ b/docker/common/install_nixl.sh @@ -4,7 +4,7 @@ set -ex GITHUB_URL="https://github.com" UCX_INSTALL_PATH="/usr/local/ucx/" CUDA_PATH="/usr/local/cuda" -NIXL_VERSION="0.5.0" +NIXL_VERSION="0.7.1" NIXL_REPO="https://github.com/ai-dynamo/nixl.git" OLD_LD_LIBRARY_PATH=$LD_LIBRARY_PATH diff --git a/docker/common/install_ucx.sh b/docker/common/install_ucx.sh index 2807182bd8e..95a4d239aad 100644 --- a/docker/common/install_ucx.sh +++ b/docker/common/install_ucx.sh @@ -1,7 +1,7 @@ #!/bin/bash set -ex -UCX_VERSION="v1.19.x" +UCX_VERSION="v1.20.x" UCX_INSTALL_PATH="/usr/local/ucx/" CUDA_PATH="/usr/local/cuda" UCX_REPO="https://github.com/openucx/ucx.git" From 8b3da14b8b305433a3d657cac31afc3c5a2d7088 Mon Sep 17 00:00:00 2001 From: Bo Deng Date: Wed, 12 Nov 2025 02:48:39 +0000 Subject: [PATCH 2/6] Update docker images Signed-off-by: Bo Deng --- jenkins/current_image_tags.properties | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/jenkins/current_image_tags.properties b/jenkins/current_image_tags.properties index 8f390ad084d..e355b2019fc 100644 --- a/jenkins/current_image_tags.properties +++ b/jenkins/current_image_tags.properties @@ -13,7 +13,7 @@ # images are adopted from PostMerge pipelines, the abbreviated commit hash is used instead. IMAGE_NAME=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm -LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.10-py3-x86_64-ubuntu24.04-trt10.13.3.9-skip-tritondevel-202511131803-8929 -LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.10-py3-aarch64-ubuntu24.04-trt10.13.3.9-skip-tritondevel-202511131803-8929 -LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.0.2-devel-rocky8-x86_64-rocky8-py310-trt10.13.3.9-skip-tritondevel-202511131803-8929 -LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.0.2-devel-rocky8-x86_64-rocky8-py312-trt10.13.3.9-skip-tritondevel-202511131803-8929 +LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging/tritondevel:x86_64-tritondevel-torch_skip-9cb3a67-github-pr-9055-1138 +LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging/tritondevel:sbsa-tritondevel-torch_skip-9cb3a67-github-pr-9055-1138 +LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging/tritondevel:x86_64-rockylinux8-torch_skip-py310-9cb3a67-github-pr-9055-1138 +LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging/tritondevel:x86_64-rockylinux8-torch_skip-py312-9cb3a67-github-pr-9055-1138 From f4966845f3a30653a3ec96427ec4d3f2bd62f19c Mon Sep 17 00:00:00 2001 From: Bo Deng Date: Fri, 14 Nov 2025 02:42:15 +0000 Subject: [PATCH 3/6] back to ucx 1.19 Signed-off-by: Bo Deng --- docker/common/install_ucx.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/common/install_ucx.sh b/docker/common/install_ucx.sh index 95a4d239aad..2807182bd8e 100644 --- a/docker/common/install_ucx.sh +++ b/docker/common/install_ucx.sh @@ -1,7 +1,7 @@ #!/bin/bash set -ex -UCX_VERSION="v1.20.x" +UCX_VERSION="v1.19.x" UCX_INSTALL_PATH="/usr/local/ucx/" CUDA_PATH="/usr/local/cuda" UCX_REPO="https://github.com/openucx/ucx.git" From 86de216fc8cfe2773451982db531ccf27117740e Mon Sep 17 00:00:00 2001 From: Bo Deng Date: Fri, 14 Nov 2025 14:09:36 +0000 Subject: [PATCH 4/6] update docker images Signed-off-by: Bo Deng --- jenkins/current_image_tags.properties | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/jenkins/current_image_tags.properties b/jenkins/current_image_tags.properties index e355b2019fc..7c09d6a538a 100644 --- a/jenkins/current_image_tags.properties +++ b/jenkins/current_image_tags.properties @@ -13,7 +13,7 @@ # images are adopted from PostMerge pipelines, the abbreviated commit hash is used instead. IMAGE_NAME=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm -LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging/tritondevel:x86_64-tritondevel-torch_skip-9cb3a67-github-pr-9055-1138 -LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging/tritondevel:sbsa-tritondevel-torch_skip-9cb3a67-github-pr-9055-1138 -LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging/tritondevel:x86_64-rockylinux8-torch_skip-py310-9cb3a67-github-pr-9055-1138 -LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging/tritondevel:x86_64-rockylinux8-torch_skip-py312-9cb3a67-github-pr-9055-1138 +LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging/tritondevel:x86_64-tritondevel-torch_skip-4d09ed1-github-pr-9055-1155 +LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging/tritondevel:sbsa-tritondevel-torch_skip-4d09ed1-github-pr-9055-1155 +LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging/tritondevel:x86_64-rockylinux8-torch_skip-py310-4d09ed1-github-pr-9055-1155 +LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging/tritondevel:x86_64-rockylinux8-torch_skip-py312-4d09ed1-github-pr-9055-1155 From 0f6f8a8a9465dad0d3507b87e2c6b8544b53c1eb Mon Sep 17 00:00:00 2001 From: Bo Deng Date: Sat, 15 Nov 2025 17:15:04 +0000 Subject: [PATCH 5/6] update docker images Signed-off-by: Bo Deng --- jenkins/current_image_tags.properties | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/jenkins/current_image_tags.properties b/jenkins/current_image_tags.properties index 7c09d6a538a..b62916438e5 100644 --- a/jenkins/current_image_tags.properties +++ b/jenkins/current_image_tags.properties @@ -13,7 +13,7 @@ # images are adopted from PostMerge pipelines, the abbreviated commit hash is used instead. IMAGE_NAME=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm -LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging/tritondevel:x86_64-tritondevel-torch_skip-4d09ed1-github-pr-9055-1155 -LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging/tritondevel:sbsa-tritondevel-torch_skip-4d09ed1-github-pr-9055-1155 -LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging/tritondevel:x86_64-rockylinux8-torch_skip-py310-4d09ed1-github-pr-9055-1155 -LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging/tritondevel:x86_64-rockylinux8-torch_skip-py312-4d09ed1-github-pr-9055-1155 +LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging/tritondevel:x86_64-tritondevel-torch_skip-86de216-github-pr-9055-1161 +LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging/tritondevel:sbsa-tritondevel-torch_skip-86de216-github-pr-9055-1161 +LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging/tritondevel:x86_64-rockylinux8-torch_skip-py310-86de216-github-pr-9055-1161 +LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging/tritondevel:x86_64-rockylinux8-torch_skip-py312-86de216-github-pr-9055-1161 From db6e7052144e5b63d9b60c99d82c0e9686af039b Mon Sep 17 00:00:00 2001 From: jthomson04 Date: Mon, 17 Nov 2025 09:48:38 -0800 Subject: [PATCH 6/6] UCX backend Signed-off-by: jthomson04 --- tests/integration/defs/llmapi/test_llm_api_connector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/integration/defs/llmapi/test_llm_api_connector.py b/tests/integration/defs/llmapi/test_llm_api_connector.py index ba624b299d0..f4f6c122823 100644 --- a/tests/integration/defs/llmapi/test_llm_api_connector.py +++ b/tests/integration/defs/llmapi/test_llm_api_connector.py @@ -365,7 +365,7 @@ def test_connector_disagg_prefill(enforce_single_worker, model_with_connector, model = model_fn( disable_overlap_scheduler=True, - cache_transceiver_config=CacheTransceiverConfig(backend="DEFAULT")) + cache_transceiver_config=CacheTransceiverConfig(backend="UCX")) sampling_params = SamplingParams(ignore_eos=True)