Skip to content

Commit c280de9

Browse files
committed
Print memory info
Signed-off-by: Hui Gao <[email protected]>
1 parent 93b5433 commit c280de9

File tree

3 files changed

+11
-8
lines changed

3 files changed

+11
-8
lines changed

tests/integration/defs/accuracy/test_llm_api_pytorch.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,10 @@
2828
SamplingParams, TorchCompileConfig)
2929
from tensorrt_llm.quantization import QuantAlgo
3030

31-
from ..conftest import (get_device_count, get_device_memory, print_device_memory, llm_models_root,
32-
parametrize_with_ids, skip_no_hopper,
33-
skip_post_blackwell, skip_pre_ada, skip_pre_blackwell,
34-
skip_pre_hopper, skip_ray)
31+
from ..conftest import (get_device_count, get_device_memory, llm_models_root,
32+
parametrize_with_ids, print_device_memory,
33+
skip_no_hopper, skip_post_blackwell, skip_pre_ada,
34+
skip_pre_blackwell, skip_pre_hopper, skip_ray)
3535
from .accuracy_core import (GSM8K, MMLU, CnnDailymail, GPQADiamond,
3636
JsonModeEval, LlmapiAccuracyTestHarness,
3737
LongBenchV2)

tests/integration/defs/conftest.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1996,12 +1996,14 @@ def get_device_memory_str():
19961996
with tempfile.TemporaryDirectory() as temp_dirname:
19971997
suffix = ".exe" if is_windows() else ""
19981998
cmd = " ".join([
1999-
"nvidia-smi" + suffix, "--query-gpu=memory.total,memory.reserved,memory.used,memory.free",
2000-
"--format=csv,noheader"
2001-
])
1999+
"nvidia-smi" + suffix,
2000+
"--query-gpu=memory.total,memory.reserved,memory.used,memory.free",
2001+
"--format=csv,noheader"
2002+
])
20022003
output = check_output(cmd, shell=True, cwd=temp_dirname)
20032004
return output.strip()
20042005

2006+
20052007
def get_device_memory():
20062008
"get gpu memory"
20072009
memory = 0
@@ -2038,6 +2040,7 @@ def print_device_memory():
20382040
memory_str = get_device_memory_str()
20392041
print(f"Device Memory:\ntotal: reserved: used: free: \n{memory_str}")
20402042

2043+
20412044
def pytest_addoption(parser):
20422045
parser.addoption(
20432046
"--test-list",

tests/integration/test_lists/test-db/l0_gb200_multi_nodes.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ l0_gb200_multi_nodes:
3535
- accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[throughput_tp8] TIMEOUT (180)
3636
- accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[latency_trtllmgen] TIMEOUT (180)
3737
- accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[latency_adp_lmtp] TIMEOUT (180)
38-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[latency_trtllmgen_adp_lmtp] TIMEOUT (180)
38+
- accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[latency_trtllmgen_adp_lmtp] TIMEOUT (180) ISOLATION
3939
- accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_cutlass] TIMEOUT (90)
4040
- accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_trtllm] TIMEOUT (90)
4141
- accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_trtllm_attention_dp] TIMEOUT (90)

0 commit comments

Comments
 (0)