Skip to content

Commit 31a7368

Browse files
committed
Print memory info
Signed-off-by: Hui Gao <[email protected]>
1 parent 93b5433 commit 31a7368

File tree

4 files changed

+12
-15
lines changed

4 files changed

+12
-15
lines changed

tests/integration/defs/accuracy/test_llm_api_pytorch.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,10 @@
2828
SamplingParams, TorchCompileConfig)
2929
from tensorrt_llm.quantization import QuantAlgo
3030

31-
from ..conftest import (get_device_count, get_device_memory, print_device_memory, llm_models_root,
32-
parametrize_with_ids, skip_no_hopper,
33-
skip_post_blackwell, skip_pre_ada, skip_pre_blackwell,
34-
skip_pre_hopper, skip_ray)
31+
from ..conftest import (get_device_count, get_device_memory, llm_models_root,
32+
parametrize_with_ids, print_device_memory,
33+
skip_no_hopper, skip_post_blackwell, skip_pre_ada,
34+
skip_pre_blackwell, skip_pre_hopper, skip_ray)
3535
from .accuracy_core import (GSM8K, MMLU, CnnDailymail, GPQADiamond,
3636
JsonModeEval, LlmapiAccuracyTestHarness,
3737
LongBenchV2)

tests/integration/defs/conftest.py

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1996,12 +1996,14 @@ def get_device_memory_str():
19961996
with tempfile.TemporaryDirectory() as temp_dirname:
19971997
suffix = ".exe" if is_windows() else ""
19981998
cmd = " ".join([
1999-
"nvidia-smi" + suffix, "--query-gpu=memory.total,memory.reserved,memory.used,memory.free",
2000-
"--format=csv,noheader"
2001-
])
1999+
"nvidia-smi" + suffix,
2000+
"--query-gpu=memory.total,memory.reserved,memory.used,memory.free",
2001+
"--format=csv,noheader"
2002+
])
20022003
output = check_output(cmd, shell=True, cwd=temp_dirname)
20032004
return output.strip()
20042005

2006+
20052007
def get_device_memory():
20062008
"get gpu memory"
20072009
memory = 0
@@ -2032,11 +2034,7 @@ def get_device_memory():
20322034
def print_device_memory():
20332035
memory_str = get_device_memory_str()
20342036
print(f"Device Memory:\ntotal: reserved: used: free: \n{memory_str}")
2035-
torch.cuda.empty_cache()
2036-
import gc
2037-
gc.collect()
2038-
memory_str = get_device_memory_str()
2039-
print(f"Device Memory:\ntotal: reserved: used: free: \n{memory_str}")
2037+
20402038

20412039
def pytest_addoption(parser):
20422040
parser.addoption(

tests/integration/test_lists/test-db/l0_gb200_multi_nodes.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,10 @@ l0_gb200_multi_nodes:
3232
backend: pytorch
3333
tests:
3434
- accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[latency] TIMEOUT (180)
35-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[throughput_tp8] TIMEOUT (180)
35+
- accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[throughput_tp8] TIMEOUT (180) ISOLATION
3636
- accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[latency_trtllmgen] TIMEOUT (180)
3737
- accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[latency_adp_lmtp] TIMEOUT (180)
38-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[latency_trtllmgen_adp_lmtp] TIMEOUT (180)
38+
- accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[latency_trtllmgen_adp_lmtp] TIMEOUT (180) ISOLATION
3939
- accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_cutlass] TIMEOUT (90)
4040
- accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_trtllm] TIMEOUT (90)
4141
- accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_trtllm_attention_dp] TIMEOUT (90)

tests/integration/test_lists/waives.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -341,7 +341,6 @@ accuracy/test_disaggregated_serving.py::TestGPTOSS::test_auto_dtype[True] SKIP (
341341
accuracy/test_disaggregated_serving.py::TestGPTOSS::test_auto_dtype[False] SKIP (https://nvbugs/5644632)
342342
test_e2e.py::test_ptp_quickstart_multimodal[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-image-True] SKIP (https://nvbugs/5648560)
343343
test_e2e.py::test_ptp_quickstart_multimodal[mistral-small-3.1-24b-instruct-Mistral-Small-3.1-24B-Instruct-2503-image-False] SKIP (https://nvbugs/5648560)
344-
accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[latency_trtllmgen_adp_lmtp] SKIP (https://nvbugs/5629136)
345344
perf/test_perf.py::test_perf[perf_sanity_upload-l0_dgx_b200] SKIP (https://nvbugs/5643646)
346345
perf/test_perf.py::test_perf[perf_sanity_upload-l0_dgx_b300] SKIP (https://nvbugs/5643646)
347346
unittest/bindings/test_hostfunc.py::test_hostfunc SKIP (https://nvbugs/5643631)

0 commit comments

Comments
 (0)