Skip to content

Commit 5d71f66

Browse files
authored
[https://nvbugs/5698434][test] Add Qwen3-4B-Eagle3 One-model perf test (#10041)
Signed-off-by: yufeiwu-nv <[email protected]>
1 parent 4740419 commit 5d71f66

File tree

3 files changed

+19
-0
lines changed

3 files changed

+19
-0
lines changed

tests/integration/defs/perf/pytorch_model_config.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,23 @@ def get_model_yaml_config(model_label: str,
215215
}
216216
}
217217
},
218+
{
219+
'patterns': [
220+
'qwen3_4b-bench-pytorch-streaming-bfloat16-maxbs:4-kv_frac:0.6-input_output_len:500,100-reqs:200-con:4',
221+
],
222+
'config': {
223+
'speculative_config': {
224+
'decoding_type': 'Eagle',
225+
'eagle3_one_model': True,
226+
'speculative_model_dir': 'Qwen3-4B_eagle3',
227+
'max_draft_len': 3,
228+
},
229+
'kv_cache_config': {
230+
'enable_block_reuse': False,
231+
},
232+
'enable_chunked_prefill': False,
233+
}
234+
},
218235
# Llama-v3.3 models with fp8 quantization
219236
{
220237
'patterns': [

tests/integration/defs/perf/test_perf.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@
110110
"deepseek_v3_lite_nvfp4": "DeepSeek-V3-Lite/nvfp4_moe_only",
111111
"qwen2_7b_instruct": "Qwen2-7B-Instruct",
112112
"qwen_14b_chat": "Qwen-14B-Chat",
113+
"qwen3_4b_eagle3": "Qwen3/Qwen3-4B",
113114
"qwen3_235b_a22b_fp8": "Qwen3/saved_models_Qwen3-235B-A22B_fp8_hf",
114115
"qwen3_235b_a22b_fp4": "Qwen3/saved_models_Qwen3-235B-A22B_nvfp4_hf",
115116
"starcoder2_3b": "starcoder2-3b",

tests/integration/test_lists/qa/llm_perf_sanity.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ llm_perf_sanity:
2727
- perf/test_perf.py::test_perf[ministral_8b-bench-pytorch-bfloat16-input_output_len:500,2000-reqs:500-con:250]
2828
- perf/test_perf.py::test_perf[phi_4_mini_instruct-bench-pytorch-bfloat16-input_output_len:500,2000]
2929
- perf/test_perf.py::test_perf[nemotron_nano_9b_v2-bench-pytorch-bfloat16-input_output_len:512,512]
30+
- perf/test_perf.py::test_perf[qwen3_4b_eagle3-bench-pytorch-streaming-bfloat16-maxbs:4-kv_frac:0.6-input_output_len:500,100-reqs:200-con:4]
3031

3132

3233
# FP8 specific tests

0 commit comments

Comments
 (0)