Skip to content

Commit 5bdd155

Browse files
authored
[CI] Fix async scheduling + spec decoding test flake (#28902)
Signed-off-by: Nick Hill <[email protected]>
1 parent 0168f69 commit 5bdd155

File tree

1 file changed

+5
-3
lines changed

1 file changed

+5
-3
lines changed

tests/v1/e2e/test_async_scheduling.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ def test_with_spec_decoding(monkeypatch: pytest.MonkeyPatch):
8484
"num_speculative_tokens": 2,
8585
"model": "nm-testing/Llama3_2_1B_speculator.eagle3",
8686
}
87+
# Set small draft model len to force doesn't-fit-in-drafter case.
8788
spec_config_short = spec_config | {"max_model_len": 50}
8889

8990
# test_preemption, executor, async_scheduling,
@@ -174,13 +175,14 @@ def run_tests(
174175
):
175176
if "spec_mml=None" in test_config:
176177
assert (
177-
pytest.approx(test_acceptance_rate, rel=5e-2)
178-
== base_acceptance_rate
178+
test_acceptance_rate > base_acceptance_rate
179+
or test_acceptance_rate
180+
== pytest.approx(base_acceptance_rate, rel=5e-2)
179181
)
180182
else:
181183
# Currently the reported acceptance rate is expected to be
182184
# lower when we sometimes skip drafting altogether.
183-
assert test_acceptance_rate > 0.05
185+
assert test_acceptance_rate > 0.1
184186
print(
185187
f"PASSED: config=[{test_config}], params={params}"
186188
f" accept_rate={test_acceptance_rate}"

0 commit comments

Comments
 (0)