Skip to content

Commit d555fe2

Browse files
ruodilLarryXFly
andauthored
test: fix for perf test script issue (NVIDIA#4230)
fix for perf test script issue Signed-off-by: Ruodi <[email protected]> Co-authored-by: Larry <[email protected]>
1 parent 0cebc16 commit d555fe2

File tree

4 files changed

+17
-17
lines changed

4 files changed

+17
-17
lines changed

tests/integration/defs/perf/model_yaml_config.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@ def get_model_yaml_config(model_label: str) -> dict:
3333
'print_iter_log': True,
3434
'use_cuda_graph': True,
3535
'cuda_graph_padding_enabled': True,
36-
'cuda_graph_batch_sizes': [1, 512],
3736
'cuda_graph_max_batch_size': 4096,
3837
}
3938
}

tests/integration/defs/perf/test_perf.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -608,8 +608,8 @@ def validate(self):
608608
# Validate quantization mode.
609609
if self.model_name in MODEL_PATH_DICT.keys():
610610
VALID_QUANTS = [
611-
"", "nvfp4", "fp8", "int8_sq", "int4_awq", "w4a8_awq",
612-
"w4a16_awq", "int8_wo", "int4_wo", "full_prec"
611+
"", "nvfp4", "fp8", "int8", "int4_awq", "w4a8_awq", "w4a16_awq",
612+
"int4_wo", "full_prec"
613613
]
614614
else:
615615
VALID_QUANTS = [
@@ -625,6 +625,8 @@ def validate(self):
625625
"int4_weight_only_gptq",
626626
]
627627
assert self.quantization in VALID_QUANTS, f"Invalid quantization {self.quantization}!"
628+
if self.backend == "pytorch":
629+
assert self.quantization == "", f"Not support passing quantization {self.quantization} for pytorch backend!"
628630
assert self.num_beams >= 1, f"Invalid num_beams: {self.num_beams}!"
629631
assert self.num_loras >= 0, f"Invalid num_loras: {self.num_loras}!"
630632
assert self.num_reqs >= 1, f"Invalid num_reqs: {self.num_reqs}!"
@@ -803,8 +805,8 @@ def get_convert_weights_command(self, model_dir, engine_dir) -> str:
803805
if self._config.quantization != "":
804806
command, checkpoint_dir = quantize_data(
805807
llm_venv=None,
806-
example_root=os.path.join(get_llm_root(), "examples",
807-
example_name),
808+
example_root=os.path.join(get_llm_root(), "examples", "models",
809+
"core", example_name),
808810
model_dir=model_dir,
809811
calib_dataset=os.path.join(llm_models_root(), "datasets",
810812
"cnn_dailymail"),
@@ -816,8 +818,8 @@ def get_convert_weights_command(self, model_dir, engine_dir) -> str:
816818
else:
817819
command, checkpoint_dir = convert_weights(
818820
llm_venv=None,
819-
example_root=os.path.join(get_llm_root(), "examples",
820-
example_name),
821+
example_root=os.path.join(get_llm_root(), "examples", "models",
822+
"core", example_name),
821823
cmodel_dir=engine_dir,
822824
model=self._config.model_name,
823825
model_path=model_dir,
@@ -1393,7 +1395,7 @@ def run_metrics(self, llm_venv, gpu_clock_lock, session_data_writer,
13931395
Run through the commands and parse multiple perf metrics from the logs.
13941396
"""
13951397
#print info to separate cases
1396-
print_info(f"Running perf test for case: {self._short_test_name_body}")
1398+
print_info(f"Running perf test for case: {self._short_test_name}")
13971399
self._current_cmd_idx = 0
13981400
metrics = self._get_metrics()
13991401
outputs = {}

tests/integration/test_lists/qa/trt_llm_release_perf_sanity_test.yml

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,7 @@ trt_llm_release_perf_sanity_test:
6464
tests:
6565
- perf/test_perf.py::test_perf[t5-bench-float16-maxbs:1-input_output_len:128,20-gpus:2]
6666
- perf/test_perf.py::test_perf[flan_t5_large-bench-float16-maxbs:1-input_output_len:128,20-gpus:2]
67-
- perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-bfloat16-input_output_len:128,128-quant:int8_sq-gpus:2]
68-
- perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-bfloat16-input_output_len:128,128-quant:int8_wo-gpus:2]
67+
- perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-bfloat16-input_output_len:128,128-quant:int8-gpus:2]
6968
- perf/test_perf.py::test_perf[llama_v3.1_8b-bench-bfloat16-maxbs:256-input_output_len:128,128-gpu:2]
7069
- perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-streaming-bfloat16-input_output_len:128,128-gpus:2]
7170
- perf/test_perf.py::test_perf[llama_v3.2_1b-bench-bfloat16-maxbs:1-input_output_len:128,128-reqs:10-gpus:2]
@@ -101,8 +100,8 @@ trt_llm_release_perf_sanity_test:
101100
gte: 4
102101
tests:
103102
- perf/test_perf.py::test_perf[llama_v3.1_70b-bench-bfloat16-maxbs:1-input_output_len:128,128-reqs:10-gpus:4]
104-
- perf/test_perf.py::test_perf[qwen_14b_chat-bench-float16-input_output_len:128,128-gpus:4]
105-
- perf/test_perf.py::test_perf[starcoder_15b-bench-float16-maxbs:1-input_output_len:512,200-reqs:10-gpus:4]
103+
- perf/test_perf.py::test_perf[qwen_14b_chat-cppmanager-ootb_except_mha-float16-input_output_len:128,128-gpus:4]
104+
- perf/test_perf.py::test_perf[starcoder_15.5b-cppmanager-exe-plugin_ifb-float16-maxbs:1-input_output_len:512,200-reqs:10-gpus:4]
106105

107106
# Tests for systems with 8+ GPUs
108107
- condition:

tests/integration/test_lists/qa/trt_llm_release_perf_test.yml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ trt_llm_release_perf_test:
6464
- perf/test_perf.py::test_perf[llama_v3.1_8b-cppmanager-exe-plugin_ifb-bfloat16-mp-maxbs:256-input_output_len:128,128-pp:2]
6565
- perf/test_perf.py::test_perf[t5-bench-float16-input_output_len:128,20-gpus:2]
6666
- perf/test_perf.py::test_perf[flan_t5_large-bench-float16-input_output_len:128,20-gpus:2]
67-
- perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-bfloat16-input_output_len:128,128-quant:int8_sq-gpus:2]
67+
- perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-bfloat16-input_output_len:128,128-quant:int8-gpus:2]
6868
- perf/test_perf.py::test_perf[llama_v3.1_8b_instruct-bench-bfloat16-input_output_len:128,128-quant:int8_wo-gpus:2]
6969
- perf/test_perf.py::test_perf[llama_v3.1_8b-bench-bfloat16-input_output_len:128,128-gpus:2]
7070
- perf/test_perf.py::test_perf[llama_v3.1_8b-bench-streaming-bfloat16-input_output_len:128,128-gpus:2]
@@ -96,9 +96,10 @@ trt_llm_release_perf_test:
9696
- perf/test_perf.py::test_perf[llama_v3.1_70b-bench-streaming-bfloat16-input_output_len:128,128-gpus:4]
9797
- perf/test_perf.py::test_perf[llama_v3.1_70b-bench-bfloat16-input_output_len:128,128-gpus:4]
9898
- perf/test_perf.py::test_perf[llama_v3.1_70b-bench-bfloat16-input_output_len:512,32-gpus:4]
99-
- perf/test_perf.py::test_perf[qwen_14b_chat-bench-float16-input_output_len:128,128-gpus:4]
100-
- perf/test_perf.py::test_perf[qwen_14b_chat-bench-float16-input_output_len:512,32-gpus:4]
101-
- perf/test_perf.py::test_perf[starcoder_15b-bench-pytorch-float16-input_output_len:512,200-gpus:4]
99+
- perf/test_perf.py::test_perf[qwen_14b_chat-cppmanager-exe-plugin_ifb-float16-input_output_len:128,128-gpus:4]
100+
- perf/test_perf.py::test_perf[qwen_14b_chat-cppmanager-ootb_except_mha-float16-input_output_len:128,128+512,32-gpus:4]
101+
- perf/test_perf.py::test_perf[starcoder_15.5b-cppmanager-exe-plugin_ifb-float16-maxbs:1-input_output_len:512,200-reqs:10-gpus:4]
102+
- perf/test_perf.py::test_perf[starcoder_15.5b-cppmanager-ootb_except_mha-float16-maxbs:1-input_output_len:512,200-reqs:10-gpus:4]
102103

103104
- condition:
104105
ranges:
@@ -197,7 +198,6 @@ trt_llm_release_perf_test:
197198
- perf/test_perf.py::test_perf[llama_v3.1_70b-bench-bfloat16-input_output_len:512,200-quant:fp8-tp:4]
198199
- perf/test_perf.py::test_perf[llama_v3.3_70b_instruct_fp8-bench-pytorch-float8-input_output_len:128,128-tp:4]
199200
- perf/test_perf.py::test_perf[mixtral_8x22b_v0.1-bench-float16-input_output_len:512,512-quant:fp8-tp:4]
200-
- perf/test_perf.py::test_perf[starcoder_15b-bench-pytorch-float16-input_output_len:512,512-quant:fp8-tp:4]
201201

202202
- condition:
203203
terms:

0 commit comments

Comments
 (0)