Skip to content

Commit 4e31451

Browse files
committed
add qwen3-4b accuracy test case
Signed-off-by: Ivy Zhang <[email protected]>
1 parent f631b25 commit 4e31451

File tree

3 files changed

+35
-0
lines changed

3 files changed

+35
-0
lines changed

tests/integration/defs/accuracy/references/gsm8k.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,9 @@ deepseek-ai/DeepSeek-V3.2-Exp:
106106
- quant_algo: NVFP4
107107
spec_dec_algo: MTP
108108
accuracy: 95.6
109+
Qwen3/Qwen3-4B:
110+
- spec_dec_algo: Eagle
111+
accuracy: 85.823
109112
Qwen3/Qwen3-8B:
110113
- accuracy: 87.1114
111114
- spec_dec_algo: Eagle

tests/integration/defs/accuracy/test_llm_api_pytorch.py

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3294,6 +3294,37 @@ def test_auto_dtype(self):
32943294
extra_evaluator_kwargs=self.EXTRA_EVALUATOR_KWARGS)
32953295

32963296

3297+
class TestQwen3_4B(LlmapiAccuracyTestHarness):
3298+
MODEL_NAME = "Qwen3/Qwen3-4B"
3299+
3300+
def test_eagle3(self):
3301+
"RCCA: https://nvbugspro.nvidia.com/bug/5698434"
3302+
pytorch_config = dict(
3303+
disable_overlap_scheduler=True,
3304+
cuda_graph_config=CudaGraphConfig(),
3305+
)
3306+
kv_cache_config = KvCacheConfig(
3307+
enable_block_reuse=False,
3308+
free_gpu_memory_fraction=0.6,
3309+
)
3310+
3311+
eagle_model_dir = f"{llm_models_root()}/Qwen3/Qwen3-4B_eagle3/"
3312+
target_model_dir = f"{llm_models_root()}/Qwen3/Qwen3-4B"
3313+
3314+
draft_len = 3
3315+
spec_config = EagleDecodingConfig(max_draft_len=draft_len,
3316+
speculative_model_dir=eagle_model_dir)
3317+
3318+
llm = LLM(model=target_model_dir,
3319+
**pytorch_config,
3320+
kv_cache_config=kv_cache_config,
3321+
speculative_config=spec_config)
3322+
3323+
with llm:
3324+
task = GSM8K(self.MODEL_NAME)
3325+
task.evaluate(llm)
3326+
3327+
32973328
class TestQwen3_8B(LlmapiAccuracyTestHarness):
32983329
MODEL_NAME = "Qwen3/Qwen3-8B"
32993330

tests/integration/test_lists/qa/llm_function_core.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -523,6 +523,7 @@ accuracy/test_llm_api_pytorch.py::TestGLM4_6::test_nvfp4_multi_gpus[throughput]
523523
accuracy/test_llm_api_pytorch.py::TestGLM4_6::test_nvfp4_multi_gpus[throughput_trtllm]
524524
accuracy/test_llm_api_pytorch.py::TestGLM4_6::test_nvfp4_2_model_mtp[2model]
525525
accuracy/test_llm_api_pytorch.py::TestGLM4_6::test_nvfp4_2_model_mtp[2model_trtllm]
526+
accuracy/test_llm_api_pytorch.py::TestQwen3_4B::test_eagle3
526527
accuracy/test_llm_api_pytorch.py::TestQwen3_8B::test_fp8_block_scales[latency]
527528
accuracy/test_llm_api_pytorch.py::TestQwen3_8B::test_bf16[multi_gpus_no_cache]
528529
accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_fp8_block_scales[latency-torch_compile=False]

0 commit comments

Comments
 (0)