Skip to content

Commit a1ed03f

Browse files
authored
[None][fix] AD test_trtllm_bench to use small model config and skip loading weights (#8149)
Signed-off-by: Eran Geva <19514940+MrGeva@users.noreply.github.com>
1 parent fdbeea5 commit a1ed03f

File tree

2 files changed

+12
-14
lines changed

2 files changed

+12
-14
lines changed

tests/unittest/_torch/auto_deploy/_utils_test/_model_test_utils.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -480,6 +480,12 @@ def apply_rotary_pos_emb_ds(q, k, cos, sin, position_ids, unsqueeze_dim=1):
480480
"ssm_state_size": 32,
481481
},
482482
},
483+
"TinyLlama/TinyLlama-1.1B-Chat-v1.0": {
484+
"llm_models_subdir": "llama-models-v2/TinyLlama-1.1B-Chat-v1.0",
485+
"model_kwargs": {
486+
"num_hidden_layers": 2,
487+
},
488+
},
483489
}
484490

485491

tests/unittest/_torch/auto_deploy/unit/singlegpu/test_ad_trtllm_bench.py

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,20 +4,13 @@
44

55
import pytest
66
import yaml
7-
from _model_test_utils import _hf_model_dir_or_hub_id
7+
from _model_test_utils import get_small_model_config
88
from click.testing import CliRunner
99
from utils.cpp_paths import llm_root # noqa: F401
1010

1111
from tensorrt_llm.commands.bench import main
1212

1313

14-
def tiny_llama_details():
15-
model_path = "llama-models-v2/TinyLlama-1.1B-Chat-v1.0"
16-
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
17-
model_path_or_name = _hf_model_dir_or_hub_id(model_path, model_name)
18-
return model_path_or_name, model_name, model_path
19-
20-
2114
def run_benchmark(model_name: str, dataset_path: str, extra_llm_api_options_path: str):
2215
runner = CliRunner()
2316

@@ -74,20 +67,19 @@ def prepare_dataset(root_dir: str, temp_dir: str, model_path_or_name: str):
7467

7568

7669
@pytest.mark.parametrize("compile_backend", ["torch-compile", "torch-opt", "torch-cudagraph"])
77-
def test_trtllm_bench(llm_root, compile_backend): # noqa: F811
78-
model_path_or_name, model_name, model_path = tiny_llama_details()
70+
@pytest.mark.parametrize("model_name", ["TinyLlama/TinyLlama-1.1B-Chat-v1.0"])
71+
def test_trtllm_bench(llm_root, compile_backend, model_name): # noqa: F811
72+
config = get_small_model_config(model_name)
7973
with tempfile.TemporaryDirectory() as temp_dir:
8074
extra_llm_api_options_path = f"{temp_dir}/extra_llm_api_options.yaml"
8175
with open(extra_llm_api_options_path, "w") as f:
8276
yaml.dump(
8377
{
84-
"model_kwargs": {"num_hidden_layers": 2},
85-
"cuda_graph_batch_sizes": [1, 2, 4, 8, 16, 32, 64, 128],
86-
"max_batch_size": 128,
8778
"compile_backend": compile_backend,
79+
**config["args"],
8880
},
8981
f,
9082
)
9183

92-
dataset_path = prepare_dataset(llm_root, temp_dir, model_path_or_name)
84+
dataset_path = prepare_dataset(llm_root, temp_dir, config["args"]["model"])
9385
run_benchmark(model_name, dataset_path, extra_llm_api_options_path)

0 commit comments

Comments
 (0)