Skip to content

Commit 7920e9b

Browse files
authored
[Bugfix] Fix failing GPT-OSS initialization test (#22557)
Signed-off-by: Isotr0py <[email protected]>
1 parent b7c0942 commit 7920e9b

File tree

2 files changed

+6
-1
lines changed

2 files changed

+6
-1
lines changed

tests/models/registry.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ def check_available_online(
200200
{"6b": "EleutherAI/gpt-j-6b"}),
201201
"GPTNeoXForCausalLM": _HfExamplesInfo("EleutherAI/pythia-70m",
202202
{"1b": "EleutherAI/pythia-1.4b"}),
203-
"GptOssForCausalLM": _HfExamplesInfo("openai/gpt-oss-20b"),
203+
"GptOssForCausalLM": _HfExamplesInfo("lmsys/gpt-oss-20b-bf16"),
204204
"GraniteForCausalLM": _HfExamplesInfo("ibm/PowerLM-3b"),
205205
"GraniteMoeForCausalLM": _HfExamplesInfo("ibm/PowerMoE-3b"),
206206
"GraniteMoeHybridForCausalLM": _HfExamplesInfo("ibm-granite/granite-4.0-tiny-preview"), # noqa: E501

tests/models/test_initialization.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,11 @@ def _initialize_kv_caches_v1(self, vllm_config):
6868
if model_arch == "Phi4FlashForCausalLM":
6969
# Phi4FlashForCausalLM only supports DIFFERENTIAL_FLASH_ATTN backend
7070
m.setenv("VLLM_ATTENTION_BACKEND", "DIFFERENTIAL_FLASH_ATTN")
71+
if model_arch == "GptOssForCausalLM":
72+
# FIXME: A hack to bypass FA3 assertion because our CI's L4 GPU
73+
# has cc==8.9 which hasn't supported FA3 yet. Remove this hack when
74+
# L4 supports FA3.
75+
m.setenv("VLLM_ATTENTION_BACKEND", "TRITON_ATTN_VLLM_V1")
7176
LLM(
7277
model_info.default,
7378
tokenizer=model_info.tokenizer,

0 commit comments

Comments
 (0)