[https://nvbugs/5680310][fix] Fix ctx only timed out test (NVIDIA#9410)

pcastonguay · web-flow · commit 1b2da426cdd9 · 2025-11-27T11:21:21.000+08:00
Signed-off-by: Patrice Castonguay &lt;55748270+pcastonguay@users.noreply.github.com&gt;
diff --git a/cpp/tensorrt_llm/batch_manager/cacheTransceiver.cpp b/cpp/tensorrt_llm/batch_manager/cacheTransceiver.cpp
@@ -493,8 +493,8 @@ void CacheTransceiver::checkContextTransferStatus(std::optional<int> const& atLe
                 }
                 else if (status == std::future_status::timeout)
                 {
-                    TLLM_LOG_WARNING("Timed out waiting for context transfer for request %ld after %d milliseconds.",
-                        request->mRequestId, senderFutureTimeoutMs.value());
+                    TLLM_LOG_WARNING("Timed out waiting for context KV cache transfer after %d milliseconds.",
+                        senderFutureTimeoutMs.value());
                     ++it;
                 }
                 else
diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt
@@ -390,7 +390,6 @@ accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_fp8_eagle3_tp8[
 accuracy/test_llm_api_pytorch.py::TestNemotronNas::test_auto_dtype_tp8 SKIP (https://nvbugs/5673527)
 disaggregated/test_auto_scaling.py::test_disagg_server_restart[etcd-round_robin] SKIP (https://nvbugs/5633340)
 disaggregated/test_auto_scaling.py::test_disagg_server_restart[http-round_robin] SKIP (https://nvbugs/5633340)
-unittest/llmapi/test_llm_pytorch.py::test_llm_context_only_timed_out_kv_cache_exhausted SKIP (https://nvbugs/5680310)
 accuracy/test_llm_api_autodeploy.py::TestLlama3_1_8B::test_auto_dtype[False-4] SKIP (https://nvbugs/5680312, https://nvbugs/5636912)
 accuracy/test_llm_api_autodeploy.py::TestLlama3_1_8B::test_auto_dtype[False-2] SKIP (https://nvbugs/5680312, https://nvbugs/5636912)
 unittest/_torch/auto_deploy/unit/multigpu/test_ad_build_small_multi.py::test_build_ad[meta-llama/Meta-Llama-3.1-8B-Instruct-llm_extra_args0-2] SKIP (https://nvbugs/5680755)
diff --git a/tests/unittest/llmapi/test_llm_pytorch.py b/tests/unittest/llmapi/test_llm_pytorch.py
@@ -992,7 +992,7 @@ def test_llm_context_only_timed_out():
               kv_cache_config=global_kvcache_config,
               tensor_parallel_size=tp_size,
               cache_transceiver_config=CacheTransceiverConfig(
-                  backend="DEFAULT", kv_transfer_timeout_ms=1000),
+                  backend="UCX", kv_transfer_timeout_ms=1000),
               **llm_args_extra)
 
     max_tokens = 1
@@ -1070,7 +1070,7 @@ def test_llm_context_only_timed_out_kv_cache_exhausted(
         kv_cache_config=kv_cache_config,
         tensor_parallel_size=tp_size,
         cache_transceiver_config=CacheTransceiverConfig(
-            backend="DEFAULT",
+            backend="UCX",
             kv_transfer_timeout_ms=1000,
             kv_transfer_sender_future_timeout_ms=sender_future_timeout_ms),
         **llm_args_extra)

Original file line number	Diff line number	Diff line change
`@@ -493,8 +493,8 @@ void CacheTransceiver::checkContextTransferStatus(std::optional<int> const& atLe`
`493`	`493`	`}`
`494`	`494`	`else if (status == std::future_status::timeout)`
`495`	`495`	`{`
`496`		`- TLLM_LOG_WARNING("Timed out waiting for context transfer for request %ld after %d milliseconds.",`
`497`		`- request->mRequestId, senderFutureTimeoutMs.value());`
	`496`	`+ TLLM_LOG_WARNING("Timed out waiting for context KV cache transfer after %d milliseconds.",`
	`497`	`+ senderFutureTimeoutMs.value());`
`498`	`498`	`++it;`
`499`	`499`	`}`
`500`	`500`	`else`