Skip to content

Commit 8686868

Browse files
crazydemoLarryXFly
andauthored
tests: [TRTQA-2905] improve timeout report for qa test cases (NVIDIA#4753)
Signed-off-by: Ivy Zhang <[email protected]> Co-authored-by: Larry <[email protected]>
1 parent ec796e4 commit 8686868

File tree

4 files changed

+64
-20
lines changed

4 files changed

+64
-20
lines changed

tests/integration/defs/examples/test_llama.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,9 @@
3030
venv_mpi_check_call)
3131
# yapf: disable
3232
from defs.conftest import (get_device_count, get_device_memory,
33-
get_host_total_memory, skip_fp8_pre_ada,
34-
skip_no_nvls, skip_post_blackwell, skip_pre_ada,
35-
skip_pre_blackwell)
33+
get_host_total_memory, get_sm_version,
34+
skip_fp8_pre_ada, skip_no_nvls, skip_post_blackwell,
35+
skip_pre_ada, skip_pre_blackwell)
3636
# yapf: enable
3737
from defs.trt_test_alternative import check_call, exists
3838

@@ -3022,6 +3022,7 @@ def test_llm_llama_v3_8b_1048k_long_context_ppl(llama_example_root,
30223022
'Llama-3-8B-Instruct-Gradient-1048k', 'Llama-3-70B-Instruct-Gradient-1048k'
30233023
],
30243024
indirect=True)
3025+
@pytest.mark.timeout(10800 if get_sm_version() < 89 else 3600)
30253026
def test_llm_llama_v3_1m_long_context_8gpus(llama_example_root,
30263027
llama_model_root, llm_venv,
30273028
engine_dir, cmodel_dir):

tests/integration/defs/examples/test_mistral.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,22 +13,40 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515
"""Module test_mistral test mistral examples."""
16+
import multiprocessing
1617
import platform
1718

19+
import psutil
1820
import pytest
1921
from defs.common import (convert_weights, quantize_data,
2022
test_multi_lora_support, venv_check_call)
2123
from defs.conftest import skip_pre_ada
2224
from defs.trt_test_alternative import check_call
2325

2426

27+
def get_optimal_jobs():
28+
cpu_count = multiprocessing.cpu_count()
29+
available_memory = psutil.virtual_memory().available / (1024 * 1024 * 1024)
30+
memory_per_job = 4
31+
memory_based_jobs = int(available_memory / memory_per_job)
32+
system_load = psutil.getloadavg()[0] / cpu_count
33+
if system_load > 0.7:
34+
cpu_factor = 0.5
35+
else:
36+
cpu_factor = 0.75
37+
cpu_based_jobs = max(1, int(cpu_count * cpu_factor))
38+
optimal_jobs = max(1, min(cpu_based_jobs, memory_based_jobs))
39+
return optimal_jobs
40+
41+
2542
@pytest.fixture(autouse=True, scope="module")
2643
def mistral_example_root(llm_venv):
2744
if platform.system() != "Windows":
2845
# https://github.com/Dao-AILab/flash-attention/issues/345
2946
# No wheel for flash-attn on windows and compilation fails locally.
47+
max_jobs = get_optimal_jobs()
3048
install_cmd = [
31-
"MAX_JOBS=4",
49+
f"MAX_JOBS={max_jobs}",
3250
"python3",
3351
"-m",
3452
"pip",

tests/integration/defs/trt_test_alternative.py

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -203,11 +203,12 @@ def call(*popenargs,
203203
**kwargs):
204204
if not suppress_output_info:
205205
print(f"Start subprocess with call({popenargs}, {kwargs})")
206+
actual_timeout = get_pytest_timeout(timeout)
206207
with popen(*popenargs,
207208
start_new_session=start_new_session,
208209
suppress_output_info=True,
209210
**kwargs) as p:
210-
return p.wait(timeout=timeout)
211+
return p.wait(timeout=actual_timeout)
211212

212213

213214
def check_call(*popenargs, **kwargs):
@@ -223,12 +224,13 @@ def check_call(*popenargs, **kwargs):
223224

224225
def check_output(*popenargs, timeout=None, start_new_session=True, **kwargs):
225226
print(f"Start subprocess with check_output({popenargs}, {kwargs})")
227+
actual_timeout = get_pytest_timeout(timeout)
226228
with Popen(*popenargs,
227229
stdout=subprocess.PIPE,
228230
start_new_session=start_new_session,
229231
**kwargs) as process:
230232
try:
231-
stdout, stderr = process.communicate(None, timeout=timeout)
233+
stdout, stderr = process.communicate(None, timeout=actual_timeout)
232234
except subprocess.TimeoutExpired as exc:
233235
cleanup_process_tree(process, start_new_session)
234236
if is_windows():
@@ -303,3 +305,26 @@ def check_call_negative_test(*popenargs, **kwargs):
303305
f"Subprocess expected to fail with check_call_negative_test({popenargs}, {kwargs}), but passed."
304306
)
305307
raise subprocess.CalledProcessError(1, cmd)
308+
309+
310+
def get_pytest_timeout(timeout=None):
311+
try:
312+
import pytest
313+
marks = None
314+
try:
315+
current_item = pytest.current_test
316+
if hasattr(current_item, 'iter_markers'):
317+
marks = list(current_item.iter_markers('timeout'))
318+
except (AttributeError, NameError):
319+
pass
320+
321+
if marks and len(marks) > 0:
322+
timeout_mark = marks[0]
323+
timeout_pytest = timeout_mark.args[0] if timeout_mark.args else None
324+
if timeout_pytest and isinstance(timeout_pytest, (int, float)):
325+
return max(30, int(timeout_pytest * 0.9))
326+
327+
except (ImportError, Exception) as e:
328+
print(f"Error getting pytest timeout: {e}")
329+
330+
return timeout

tests/integration/test_lists/qa/examples_test_list.txt

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,12 @@ examples/test_chatglm.py::test_llm_glm_4_9b_single_gpu_summary[glm-4-9b-chat-ena
1414
examples/test_chatglm.py::test_llm_glm_4_9b_single_gpu_summary[glm-4-9b-enable_weight_only]
1515
examples/test_commandr.py::test_llm_commandr_v01_single_gpu_summary[disable_weight_only]
1616
examples/test_commandr.py::test_llm_commandr_v01_single_gpu_summary[enable_weight_only]
17-
examples/test_commandr.py::test_llm_commandr_plus_4gpus_summary[disable_weight_only]
18-
examples/test_commandr.py::test_llm_commandr_plus_4gpus_summary[enable_weight_only]
17+
examples/test_commandr.py::test_llm_commandr_plus_4gpus_summary[disable_weight_only] TIMEOUT (40)
18+
examples/test_commandr.py::test_llm_commandr_plus_4gpus_summary[enable_weight_only] TIMEOUT (40)
1919
examples/test_eagle.py::test_llm_eagle_1gpu_modelopt_ckpt[llama3.1-eagle-8b-hf_v0.5-float16-bs8]
2020
examples/test_eagle.py::test_llm_eagle_1gpu[EAGLE-Vicuna-7B-v1.3-float16-bs1-eagle1]
2121
examples/test_eagle.py::test_llm_eagle_1gpu[EAGLE-Vicuna-7B-v1.3-float16-bs1-eagle2]
22-
examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-bart-large-cnn-float16-enable_gemm_plugin-enable_attention_plugin-enable_paged_kv_cache-tp:1-pp:1-nb:1-enable_fp8]
22+
examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-bart-large-cnn-float16-enable_gemm_plugin-enable_attention_plugin-enable_paged_kv_cache-tp:1-pp:1-nb:1-enable_fp8] TIMEOUT (60)
2323
examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-byt5-small-float32-enable_gemm_plugin-enable_attention_plugin-enable_paged_kv_cache-tp:1-pp:1-nb:1-enable_fp8]
2424
examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-flan-t5-small-float32-enable_gemm_plugin-enable_attention_plugin-enable_paged_kv_cache-tp:1-pp:1-nb:1-disable_fp8]
2525
examples/test_enc_dec.py::test_llm_enc_dec_general[compare_hf-flan-t5-small-float32-enable_gemm_plugin-enable_attention_plugin-enable_paged_kv_cache-tp:2-pp:2-nb:1-enable_fp8]
@@ -70,7 +70,7 @@ examples/test_gpt.py::test_starcoder_fp8_quantization_2gpu[starcoder]
7070
examples/test_gpt.py::test_starcoder_fp8_quantization_2gpu[starcoderplus]
7171
examples/test_gpt.py::test_starcoder_fp8_quantization_2gpu[starcoder2]
7272
examples/test_llama.py::test_mistral_nemo_fp8_with_bf16_lora[Mistral-Nemo-12b-Base]
73-
examples/test_mistral.py::test_mistral_nemo_minitron_fp8_with_bf16_lora[Mistral-NeMo-Minitron-8B-Instruct]
73+
examples/test_mistral.py::test_mistral_nemo_minitron_fp8_with_bf16_lora[Mistral-NeMo-Minitron-8B-Instruct] TIMEOUT (40)
7474
examples/test_phi.py::test_phi_fp8_with_bf16_lora[phi-2]
7575
examples/test_phi.py::test_phi_fp8_with_bf16_lora[Phi-3-mini-128k-instruct]
7676
examples/test_phi.py::test_phi_fp8_with_bf16_lora[Phi-3-small-128k-instruct]
@@ -110,9 +110,9 @@ examples/test_llama.py::test_llm_llama_code_llama_1gpu_summary[CodeLlama-7b-Inst
110110
examples/test_llama.py::test_llm_llama_code_llama_1gpu_summary[CodeLlama-7b-Instruct-enable_with_fp32_acc-enable_gemm_plugin-enable_attention_plugin-nb:1]
111111
examples/test_llama.py::test_llm_llama_code_llama_multi_gpus_summary[CodeLlama-34b-Instruct-tp4pp1-nb:4]
112112
examples/test_llama.py::test_llm_llama_code_llama_multi_gpus_summary[CodeLlama-70b-hf-tp2pp2-nb:1]
113-
examples/test_llama.py::test_llm_llama_code_llama_quantization_4gpus_summary[CodeLlama-34b-Instruct-tp2pp2-int4_awq-nb:4]
113+
examples/test_llama.py::test_llm_llama_code_llama_quantization_4gpus_summary[CodeLlama-34b-Instruct-tp2pp2-int4_awq-nb:4] TIMEOUT (40)
114114
examples/test_llama.py::test_llm_llama_code_llama_quantization_4gpus_summary[CodeLlama-34b-Instruct-tp4pp1-fp8-nb:1]
115-
examples/test_llama.py::test_llm_llama_code_llama_quantization_4gpus_summary[CodeLlama-70b-hf-tp2pp2-int4_awq-nb:1]
115+
examples/test_llama.py::test_llm_llama_code_llama_quantization_4gpus_summary[CodeLlama-70b-hf-tp2pp2-int4_awq-nb:1] TIMEOUT (40)
116116
examples/test_llama.py::test_llm_llama_code_llama_quantization_4gpus_summary[CodeLlama-70b-hf-tp4pp1-fp8-nb:4]
117117
examples/test_llama.py::test_codellama_fp8_with_bf16_lora[CodeLlama-7b-Instruct]
118118
examples/test_llama.py::test_llama_3_x_fp8_with_bf16_lora[llama-v2-7b-hf]
@@ -135,11 +135,11 @@ examples/test_llama.py::test_llm_llama_v2_lora_1gpu[chinese-llama-2-lora-13b-lla
135135
examples/test_llama.py::test_llm_llama_v2_lora_1gpu[chinese-llama-2-lora-13b-llama-v2-13b-hf-lora_fp16-base_int8_wo]
136136
examples/test_llama.py::test_llm_llama_v2_lora_1gpu[chinese-llama-2-lora-13b-llama-v2-13b-hf-lora_fp16-base_sq_ootb]
137137
examples/test_llama.py::test_llm_llama_v2_lora_benchmark_2gpu[chinese_lora-llama-v2-13b-hf]
138-
examples/test_llama.py::test_llm_llama_v3_1_1node_multi_gpus[enable_gemm_allreduce_plugin-llama-3.1-405b-enable_fp8]
139-
examples/test_llama.py::test_llm_llama_v3_1_1node_multi_gpus[enable_gemm_allreduce_plugin-llama-3.1-405b-fp8-disable_fp8]
140-
examples/test_llama.py::test_llm_llama_v3_1_1node_multi_gpus[enable_gemm_allreduce_plugin-llama-3.1-70b-disable_fp8]
138+
examples/test_llama.py::test_llm_llama_v3_1_1node_multi_gpus[enable_gemm_allreduce_plugin-llama-3.1-405b-enable_fp8] TIMEOUT (120)
139+
examples/test_llama.py::test_llm_llama_v3_1_1node_multi_gpus[enable_gemm_allreduce_plugin-llama-3.1-405b-fp8-disable_fp8] TIMEOUT (90)
140+
examples/test_llama.py::test_llm_llama_v3_1_1node_multi_gpus[enable_gemm_allreduce_plugin-llama-3.1-70b-disable_fp8] TIMEOUT (40)
141141
examples/test_llama.py::test_llm_llama_v3_1_1node_multi_gpus[disable_gemm_allreduce_plugin-llama-3.1-70b-enable_fp8]
142-
examples/test_llama.py::test_llm_llama_v3_1m_long_context_8gpus[Llama-3-8B-Instruct-Gradient-1048k]
142+
examples/test_llama.py::test_llm_llama_v3_1m_long_context_8gpus[Llama-3-8B-Instruct-Gradient-1048k] TIMEOUT (180)
143143
examples/test_llama.py::test_llm_llama_v3_dora_1gpu[commonsense-llama-v3-8b-dora-r32-llama-v3-8b-hf-base_fp16]
144144
examples/test_llama.py::test_llm_llama_1gpu_fp4[llama-3.1-70b-instruct-enable_norm_quant_fusion-enable_fused_quant-fp4_plugin-bfloat16]
145145
examples/test_llama.py::test_llm_llama_2gpu_fp4[llama-3.1-70b-instruct-fp4_plugin]
@@ -216,7 +216,7 @@ examples/test_phi.py::test_llm_phi_quantization_1gpu[phi-2-fp8-bfloat16]
216216
examples/test_phi.py::test_llm_phi_quantization_1gpu[Phi-3-mini-128k-instruct-fp8-float16]
217217
examples/test_phi.py::test_llm_phi_quantization_1gpu[Phi-3-small-128k-instruct-fp8-bfloat16]
218218
examples/test_phi.py::test_llm_phi_quantization_1gpu[Phi-3.5-mini-instruct-fp8-float16]
219-
examples/test_phi.py::test_llm_phi_quantization_1gpu[Phi-3.5-MoE-instruct-fp8-bfloat16]
219+
examples/test_phi.py::test_llm_phi_quantization_1gpu[Phi-3.5-MoE-instruct-fp8-bfloat16] TIMEOUT (60)
220220
examples/test_phi.py::test_llm_phi_quantization_1gpu[Phi-4-mini-instruct-fp8-bfloat16]
221221
examples/test_qwen.py::test_llm_qwen1_5_7b_single_gpu_lora[qwen1.5_7b_chat-Qwen1.5-7B-Chat-750Mb-lora]
222222
examples/test_qwen.py::test_llm_qwen1_5_moe_plugin_single_gpu_lora[qwen1.5_moe_a2.7b_chat-Upcycled-Qwen1.5-MoE2.7B-LoRA]
@@ -394,13 +394,13 @@ accuracy/test_cli_flow.py::TestMixtral8x7B::test_weight_only_int8_tp2
394394
accuracy/test_cli_flow.py::TestMixtral8x7B::test_pp_reduce_scatter_tp2pp2
395395
accuracy/test_cli_flow.py::TestMixtral8x7B::test_ootb_except_mha_tp8[expert_parallel]
396396
accuracy/test_cli_flow.py::TestMixtral8x7B::test_ootb_except_mha_tp8[mixed_parallel]
397-
accuracy/test_cli_flow.py::TestMixtral8x7B::test_ootb_except_mha_tp8[tensor_parallel]
397+
accuracy/test_cli_flow.py::TestMixtral8x7B::test_ootb_except_mha_tp8[tensor_parallel] TIMEOUT (40)
398398
accuracy/test_cli_flow.py::TestMixtral8x7B::test_plugin_tp8[no_renormalize-tensor_parallel]
399399
accuracy/test_cli_flow.py::TestMixtral8x7B::test_plugin_tp8[renormalize-expert_parallel]
400400
accuracy/test_cli_flow.py::TestMixtral8x7B::test_plugin_tp8[renormalize-mixed_parallel]
401401
accuracy/test_cli_flow.py::TestMixtral8x7B::test_plugin_tp8[renormalize-tensor_parallel]
402402
accuracy/test_cli_flow.py::TestMixtral8x7B::test_nvfp4_prequantized
403-
accuracy/test_cli_flow.py::TestMixtral8x22B::test_fp8_tp2pp2
403+
accuracy/test_cli_flow.py::TestMixtral8x22B::test_fp8_tp2pp2 TIMEOUT (50)
404404
accuracy/test_cli_flow.py::TestMixtral8x22B::test_int8_plugin_tp8[renormalize-tensor_parallel]
405405
accuracy/test_cli_flow.py::TestGemma2_9BIt::test_auto_dtype
406406
accuracy/test_cli_flow.py::TestGemma2_9BIt::test_weight_only[int8]
@@ -509,7 +509,7 @@ test_e2e.py::test_ptp_quickstart_advanced_8gpus[Nemotron-Ultra-253B-nemotron-nas
509509
test_e2e.py::test_ptp_quickstart_advanced_deepseek_r1_8gpus[DeepSeek-R1-DeepSeek-R1/DeepSeek-R1]
510510
test_e2e.py::test_relaxed_acceptance_quickstart_advanced_deepseek_r1_8gpus[DeepSeek-R1-DeepSeek-R1/DeepSeek-R1]
511511
test_e2e.py::test_ptp_quickstart_multimodal[NVILA-8B-FP16-vila/NVILA-8B-image]
512-
test_e2e.py::test_ptp_quickstart_multimodal[NVILA-8B-FP16-vila/NVILA-8B-video]
512+
test_e2e.py::test_ptp_quickstart_multimodal[NVILA-8B-FP16-vila/NVILA-8B-video] TIMEOUT (60)
513513
test_e2e.py::test_ptp_quickstart_multimodal[llava-v1.6-mistral-7b-llava-v1.6-mistral-7b-hf-image]
514514
test_e2e.py::test_ptp_quickstart_multimodal[qwen2-vl-7b-instruct-Qwen2-VL-7B-Instruct-image]
515515
test_e2e.py::test_ptp_quickstart_multimodal[qwen2-vl-7b-instruct-Qwen2-VL-7B-Instruct-video]

0 commit comments

Comments
 (0)