File tree Expand file tree Collapse file tree 2 files changed +3
-4
lines changed
tensorrt_llm/_torch/pyexecutor
tests/integration/test_lists Expand file tree Collapse file tree 2 files changed +3
-4
lines changed Original file line number Diff line number Diff line change 11import copy
22import enum
3+ import gc
34import importlib
45import os
56from concurrent .futures import ThreadPoolExecutor
@@ -600,6 +601,8 @@ def drafting_loop_wrapper(model):
600601 with mem_monitor .observe_creation_stage (
601602 _ExecutorCreationStage .INIT_EXTRA_RESOURCES
602603 if estimating_kv_cache else _ExecutorCreationStage .EXTRA_RESOURCES ):
604+ # run gc.collect() to free memory of the previous py_executor, avoid cudaFree overlap with cuda graph capture
605+ gc .collect ()
603606 py_executor = create_py_executor_instance (
604607 dist = dist ,
605608 resources = resources ,
Original file line number Diff line number Diff line change @@ -355,10 +355,6 @@ triton_server/test_triton_llm.py::test_mistral_small_3_1_24b_pixtral[TYPE_FP16-T
355355triton_server/test_triton_llm.py::test_mistral_small_3_1_24b_pixtral[TYPE_FP16-TYPE_BF16-False-1---False-True-False-0-1-enableDecoupleMode-inflight_fused_batching-disableTrtOverlap--0.7-max_utilization---1-1-1-False-tensorrt_llm_bls] SKIP (https://nvbugs/5606136)
356356accuracy/test_cli_flow.py::TestMinitron4BBase::test_fp8 SKIP (https://nvbugs/5606233)
357357examples/test_gpt.py::test_llm_minitron_fp8_with_pseudo_loras[4b] SKIP (https://nvbugs/5606233)
358- disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_simple_deepseek[True-False-DeepSeek-V3-Lite-fp8/fp8] SKIP (https://nvbugs/5626197)
359- disaggregated/test_disaggregated_single_gpu.py::test_disaggregated_simple_deepseek[True-True-DeepSeek-V3-Lite-fp8/fp8] SKIP (https://nvbugs/5628952)
360358cpp/test_e2e.py::test_benchmarks[t5-90] SKIP (https://nvbugs/5630196)
361359accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4_4gpus[latency_moe_trtllm_eagle3] SKIP (https://nvbugs/5630700)
362- accuracy/test_disaggregated_serving.py::TestQwen3_8B::test_nixl_backend SKIP (https://nvbugs/5628952)
363- accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_guided_decoding[xgrammar-mtp_nextn=2] SKIP (https://nvbugs/5628952)
364360full:RTX_PRO_6000_Blackwell_Server_Edition/accuracy/test_llm_api_pytorch.py::TestGemma3_1BInstruct::test_auto_dtype SKIP (https://nvbugs/5569696)
You can’t perform that action at this time.
0 commit comments