Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docker/common/install_nixl.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ set -ex
GITHUB_URL="https://github.com"
UCX_INSTALL_PATH="/usr/local/ucx/"
CUDA_PATH="/usr/local/cuda"
NIXL_VERSION="0.5.0"
NIXL_VERSION="0.7.1"
NIXL_REPO="https://github.com/ai-dynamo/nixl.git"
OLD_LD_LIBRARY_PATH=$LD_LIBRARY_PATH

Expand Down
8 changes: 4 additions & 4 deletions jenkins/current_image_tags.properties
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# images are adopted from PostMerge pipelines, the abbreviated commit hash is used instead.
IMAGE_NAME=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm

LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.10-py3-x86_64-ubuntu24.04-trt10.13.3.9-skip-tritondevel-202511131803-8929
LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.10-py3-aarch64-ubuntu24.04-trt10.13.3.9-skip-tritondevel-202511131803-8929
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.0.2-devel-rocky8-x86_64-rocky8-py310-trt10.13.3.9-skip-tritondevel-202511131803-8929
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-13.0.2-devel-rocky8-x86_64-rocky8-py312-trt10.13.3.9-skip-tritondevel-202511131803-8929
LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging/tritondevel:x86_64-tritondevel-torch_skip-86de216-github-tritondevel-202511191426-9055
LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging/tritondevel:sbsa-tritondevel-torch_skip-86de216-github-tritondevel-202511191426-9055
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging/tritondevel:x86_64-rockylinux8-torch_skip-py310-86de216-github-tritondevel-202511191426-9055
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm-staging/tritondevel:x86_64-rockylinux8-torch_skip-py312-86de216-github-tritondevel-202511191426-9055
21 changes: 16 additions & 5 deletions tests/integration/defs/llmapi/test_llm_api_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,11 +363,15 @@ def test_connector_disagg_prefill(enforce_single_worker, model_with_connector,
save_async):
model_fn, scheduler, worker = model_with_connector

model = model_fn(
prefill_worker = model_fn(
disable_overlap_scheduler=True,
cache_transceiver_config=CacheTransceiverConfig(backend="DEFAULT"))

sampling_params = SamplingParams(ignore_eos=True)
decode_worker = model_fn(
cache_transceiver_config=CacheTransceiverConfig(backend="DEFAULT"),
kv_connector_config=None)

sampling_params = SamplingParams(ignore_eos=True, max_tokens=16)

disaggregated_params = DisaggregatedParams(request_type="context_only")

Expand All @@ -382,9 +386,16 @@ def test_connector_disagg_prefill(enforce_single_worker, model_with_connector,
scheduler.request_finished.return_value = False
worker.get_finished.return_value = [], []

model.generate([0] * 48,
sampling_params=sampling_params,
disaggregated_params=disaggregated_params)
result = prefill_worker.generate([0] * 48,
sampling_params=sampling_params,
disaggregated_params=disaggregated_params)

gen_disagg_params = result.disaggregated_params
gen_disagg_params.request_type = "generation_only"

result = decode_worker.generate([0] * 48,
sampling_params=sampling_params,
disaggregated_params=gen_disagg_params)

assert scheduler.build_connector_meta.call_count == 1

Expand Down