Skip to content

Commit 5e6f1bc

Browse files
authored
[TRTLLM-8979][test] Improve qwen3 spec dec test coverage (#8767)
Signed-off-by: Mike Iovine <[email protected]>
1 parent 0f67636 commit 5e6f1bc

File tree

2 files changed

+5
-5
lines changed

2 files changed

+5
-5
lines changed

tests/integration/defs/accuracy/references/gsm8k.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,8 @@ deepseek-ai/DeepSeek-V3.2-Exp:
108108
accuracy: 95.6
109109
Qwen3/Qwen3-8B:
110110
- accuracy: 87.1114
111+
- spec_dec_algo: Eagle
112+
accuracy: 87.1114
111113
- quant_algo: FP8
112114
kv_cache_quant_algo: FP8
113115
accuracy: 87.1114

tests/integration/defs/accuracy/test_llm_api_pytorch.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -258,8 +258,6 @@ def test_eagle3(self, overlap_scheduler, eagle3_one_model):
258258
build_config=None) as llm:
259259
task = CnnDailymail(self.MODEL_NAME)
260260
task.evaluate(llm)
261-
task = MMLU(self.MODEL_NAME)
262-
task.evaluate(llm)
263261
task = GSM8K(self.MODEL_NAME)
264262
task.evaluate(llm)
265263

@@ -2852,7 +2850,7 @@ def test_bf16(self, tp_size, pp_size, ep_size, attention_dp, cuda_graph,
28522850
@parametrize_with_ids("enable_chunked_prefill", [False, True])
28532851
def test_eagle3(self, enable_chunked_prefill, eagle3_one_model):
28542852
pytorch_config = dict(
2855-
disable_overlap_scheduler=True,
2853+
disable_overlap_scheduler=not eagle3_one_model,
28562854
cuda_graph_config=CudaGraphConfig(),
28572855
)
28582856
kv_cache_config = KvCacheConfig(
@@ -2877,7 +2875,7 @@ def test_eagle3(self, enable_chunked_prefill, eagle3_one_model):
28772875
build_config=None)
28782876

28792877
with llm:
2880-
task = MMLU(self.MODEL_NAME)
2878+
task = GSM8K(self.MODEL_NAME)
28812879
task.evaluate(llm)
28822880

28832881
@skip_pre_blackwell
@@ -3040,7 +3038,7 @@ def test_nvfp4(
30403038

30413039
def test_eagle3(self):
30423040
pytorch_config = dict(
3043-
disable_overlap_scheduler=True,
3041+
disable_overlap_scheduler=False,
30443042
cuda_graph_config=CudaGraphConfig(batch_sizes=[1, 2, 3, 4, 8]),
30453043
)
30463044
kv_cache_config = KvCacheConfig(enable_block_reuse=False)

0 commit comments

Comments
 (0)