Skip to content

Commit b0f3a44

Browse files
xinhe-nvdominicshanshan
authored andcommitted
[TRTLLM-8638][fix] fix test issues (NVIDIA#8557)
Signed-off-by: Xin He (SW-GPU) <200704525+xinhe-nv@users.noreply.github.com>
1 parent 289f71c commit b0f3a44

File tree

4 files changed

+17
-5
lines changed

4 files changed

+17
-5
lines changed

tests/integration/defs/.test_durations

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -570,7 +570,7 @@
570570
"examples/test_exaone.py::test_llm_exaone_2gpu[exaone_3.0_7.8b_instruct-float16-nb:1]": 7155.35844087804435,
571571
"examples/test_gemma.py::test_hf_gemma_fp8_base_bf16_multi_lora[gemma-2-27b-it]": 317.7816583644599,
572572
"examples/test_gemma.py::test_hf_gemma_fp8_base_bf16_multi_lora[gemma-2-9b-it]": 317.7816583644599,
573-
"examples/test_gemma.py::test_llm_gemma_1gpu_summary[gemma-2-27b-it-other-bfloat16-8]": 1620.1766637390829,
573+
"examples/test_gemma.py::test_llm_gemma_1gpu_summary[gemma-2-27b-it-other-bfloat16-8]": 7020.1766637390829,
574574
"examples/test_gemma.py::test_llm_gemma_1gpu_summary_vswa[gemma-3-1b-it-other-bfloat16-8]": 195.3050664511975,
575575
"examples/test_gemma.py::test_llm_hf_gemma_quantization_1gpu[gemma-2-27b-it-fp8-bfloat16-8]": 317.7816583644599,
576576
"examples/test_gpt.py::test_llm_gpt2_medium_1gpu[non_streaming-use_cpp_session-enable_gemm_plugin]": 114.20040711760521,

tests/integration/defs/accuracy/test_llm_api_pytorch.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2219,6 +2219,8 @@ def test_nvfp4_multi_gpus_corner_case(self):
22192219

22202220
@pytest.mark.skip_less_mpi_world_size(8)
22212221
@skip_pre_hopper
2222+
@pytest.mark.skipif(get_sm_version() >= 100,
2223+
reason="https://nvbugs/5547584 WNF")
22222224
@pytest.mark.skip_less_device_memory(140000)
22232225
@pytest.mark.parametrize(
22242226
"tp_size,pp_size,ep_size,mtp_nextn,fp8kv,attention_dp,cuda_graph,overlap_scheduler,max_batch_size,moe_backend",
@@ -2272,6 +2274,7 @@ def test_fp8_blockscale(self, tp_size, pp_size, ep_size, mtp_nextn, fp8kv,
22722274

22732275
@pytest.mark.skip_less_mpi_world_size(8)
22742276
@skip_pre_hopper
2277+
@pytest.mark.skip_less_device_memory(140000)
22752278
@pytest.mark.parametrize(
22762279
"tp_size,pp_size,ep_size,mtp_nextn,fp8kv,attention_dp,cuda_graph,overlap_scheduler,max_batch_size",
22772280
[(8, 1, 4, 3, False, False, True, True, 1),
@@ -3573,6 +3576,7 @@ def test_w4_chunked_prefill(self, kv_cache_dtype, moe_backend, mocker):
35733576
extra_evaluator_kwargs=self.extra_evaluator_kwargs)
35743577

35753578

3579+
@skip_pre_hopper
35763580
class TestEXAONE4(LlmapiAccuracyTestHarness):
35773581
MODEL_NAME = "LGAI-EXAONE/EXAONE-4.0-32B"
35783582
kv_cache_config = KvCacheConfig(enable_block_reuse=False,

tests/integration/defs/examples/test_gemma.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ def test_llm_gemma_1gpu_summary_vswa(batch_size, data_type, gemma_model_root,
210210
max_attention_window)
211211

212212

213-
@pytest.mark.timeout(5400)
213+
@pytest.mark.timeout(7200)
214214
@pytest.mark.parametrize("batch_size", [8])
215215
@pytest.mark.parametrize("data_type", ['float16', 'bfloat16'])
216216
@pytest.mark.parametrize("test_case", [

tests/integration/defs/test_e2e.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3534,23 +3534,31 @@ def test_ptp_quickstart_advanced_llama_multi_nodes(llm_root, llm_venv,
35343534
marks=skip_pre_hopper),
35353535
])
35363536
def test_multi_nodes_eval(llm_venv, model_path, tp_size, pp_size, ep_size,
3537-
eval_task):
3537+
eval_task, mmlu_dataset_root):
35383538
if "Llama-4" in model_path and tp_size == 16:
35393539
pytest.skip("Llama-4 with tp16 is not supported")
35403540

35413541
mmlu_threshold = 81.5
3542+
model_dir = f"{llm_models_root()}/{model_path}"
35423543
run_cmd = [
35433544
"trtllm-llmapi-launch",
35443545
"trtllm-eval",
3545-
f"--model={llm_models_root()}/{model_path}",
3546+
f"--model={model_dir}",
35463547
f"--ep_size={ep_size}",
35473548
f"--tp_size={tp_size}",
35483549
f"--pp_size={pp_size}",
35493550
f"--kv_cache_free_gpu_memory_fraction={_MEM_FRACTION_80}",
35503551
"--max_batch_size=32",
3551-
eval_task,
3552+
"--backend=pytorch",
35523553
]
35533554

3555+
if "Kimi" in model_path:
3556+
run_cmd.append("--trust_remote_code")
3557+
else:
3558+
run_cmd.append(f"--tokenizer={model_dir}")
3559+
3560+
run_cmd.extend([eval_task, f"--dataset_path={mmlu_dataset_root}"])
3561+
35543562
llm_venv._new_env["TRT_LLM_DISABLE_LOAD_WEIGHTS_IN_PARALLEL"] = "1"
35553563
output = check_output(" ".join(run_cmd), shell=True, env=llm_venv._new_env)
35563564

0 commit comments

Comments
 (0)