Skip to content

Commit 0853bbe

Browse files
VALLIS-NERIAmikeiovine
authored andcommitted
[https://nvbugs/5601203] [fix]Restrict fp8 blockscale moe case (#8583)
Signed-off-by: Xiwen Yu <[email protected]>
1 parent 839c3c4 commit 0853bbe

File tree

1 file changed

+4
-2
lines changed

1 file changed

+4
-2
lines changed

tests/integration/defs/examples/serve/test_serve.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import time
33

44
import requests
5-
from defs.conftest import llm_models_root, skip_pre_hopper
5+
from defs.conftest import llm_models_root, skip_no_hopper
66
from defs.trt_test_alternative import popen, print_error, print_info
77
from openai import OpenAI
88
from requests.exceptions import RequestException
@@ -92,9 +92,11 @@ def check_openai_chat_completion(http_port="8000",
9292
raise
9393

9494

95-
@skip_pre_hopper
95+
@skip_no_hopper
9696
def test_extra_llm_api_options(serve_test_root):
9797
test_configs_root = f"{serve_test_root}/test_configs"
98+
99+
# moe backend = CUTLASS which only supports fp8 blockscale on Hopper
98100
config_file = f"{test_configs_root}/Qwen3-30B-A3B-FP8.yml"
99101
model_path = f"{llm_models_root()}/Qwen3/Qwen3-30B-A3B-FP8"
100102

0 commit comments

Comments
 (0)