File tree Expand file tree Collapse file tree 3 files changed +4
-5
lines changed
cpp/tensorrt_llm/batch_manager Expand file tree Collapse file tree 3 files changed +4
-5
lines changed Original file line number Diff line number Diff line change @@ -493,8 +493,8 @@ void CacheTransceiver::checkContextTransferStatus(std::optional<int> const& atLe
493493 }
494494 else if (status == std::future_status::timeout)
495495 {
496- TLLM_LOG_WARNING (" Timed out waiting for context transfer for request %ld after %d milliseconds." ,
497- request-> mRequestId , senderFutureTimeoutMs.value ());
496+ TLLM_LOG_WARNING (" Timed out waiting for context KV cache transfer after %d milliseconds." ,
497+ senderFutureTimeoutMs.value ());
498498 ++it;
499499 }
500500 else
Original file line number Diff line number Diff line change @@ -390,7 +390,6 @@ accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_fp8_eagle3_tp8[
390390accuracy/test_llm_api_pytorch.py::TestNemotronNas::test_auto_dtype_tp8 SKIP (https://nvbugs/5673527)
391391disaggregated/test_auto_scaling.py::test_disagg_server_restart[etcd-round_robin] SKIP (https://nvbugs/5633340)
392392disaggregated/test_auto_scaling.py::test_disagg_server_restart[http-round_robin] SKIP (https://nvbugs/5633340)
393- unittest/llmapi/test_llm_pytorch.py::test_llm_context_only_timed_out_kv_cache_exhausted SKIP (https://nvbugs/5680310)
394393accuracy/test_llm_api_autodeploy.py::TestLlama3_1_8B::test_auto_dtype[False-4] SKIP (https://nvbugs/5680312, https://nvbugs/5636912)
395394accuracy/test_llm_api_autodeploy.py::TestLlama3_1_8B::test_auto_dtype[False-2] SKIP (https://nvbugs/5680312, https://nvbugs/5636912)
396395unittest/_torch/auto_deploy/unit/multigpu/test_ad_build_small_multi.py::test_build_ad[meta-llama/Meta-Llama-3.1-8B-Instruct-llm_extra_args0-2] SKIP (https://nvbugs/5680755)
Original file line number Diff line number Diff line change @@ -992,7 +992,7 @@ def test_llm_context_only_timed_out():
992992 kv_cache_config = global_kvcache_config ,
993993 tensor_parallel_size = tp_size ,
994994 cache_transceiver_config = CacheTransceiverConfig (
995- backend = "DEFAULT " , kv_transfer_timeout_ms = 1000 ),
995+ backend = "UCX " , kv_transfer_timeout_ms = 1000 ),
996996 ** llm_args_extra )
997997
998998 max_tokens = 1
@@ -1070,7 +1070,7 @@ def test_llm_context_only_timed_out_kv_cache_exhausted(
10701070 kv_cache_config = kv_cache_config ,
10711071 tensor_parallel_size = tp_size ,
10721072 cache_transceiver_config = CacheTransceiverConfig (
1073- backend = "DEFAULT " ,
1073+ backend = "UCX " ,
10741074 kv_transfer_timeout_ms = 1000 ,
10751075 kv_transfer_sender_future_timeout_ms = sender_future_timeout_ms ),
10761076 ** llm_args_extra )
You can’t perform that action at this time.
0 commit comments