Skip to content

Commit c3bb8e7

Browse files
Add gpt-5.4 to resolve_model_config.py (#2374)
Co-authored-by: openhands <openhands@all-hands.dev>
1 parent f0f323e commit c3bb8e7

File tree

6 files changed

+40
-11
lines changed

6 files changed

+40
-11
lines changed

.github/run-eval/resolve_model_config.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,14 @@
140140
"reasoning_effort": "high",
141141
},
142142
},
143+
"gpt-5.4": {
144+
"id": "gpt-5.4",
145+
"display_name": "GPT-5.4",
146+
"llm_config": {
147+
"model": "litellm_proxy/openai/gpt-5.4",
148+
"reasoning_effort": "high",
149+
},
150+
},
143151
"minimax-m2": {
144152
"id": "minimax-m2",
145153
"display_name": "MiniMax M2",

openhands-sdk/openhands/sdk/llm/utils/model_features.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ class ModelFeatures:
6969
"gemini-3.1-pro-preview",
7070
# OpenAI GPT-5 family (includes mini variants)
7171
"gpt-5",
72+
"gpt-5.4",
7273
# Anthropic Opus 4.5 and 4.6
7374
"claude-opus-4-5",
7475
"claude-opus-4-6",

openhands-sdk/openhands/sdk/llm/utils/model_prompt_spec.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ class ModelPromptSpec(BaseModel):
4040
"gpt-5-codex",
4141
("gpt-5-codex", "gpt-5.1-codex", "gpt-5.2-codex", "gpt-5.3-codex"),
4242
),
43-
("gpt-5", ("gpt-5", "gpt-5.1", "gpt-5.2")),
43+
("gpt-5", ("gpt-5", "gpt-5.1", "gpt-5.2", "gpt-5.4")),
4444
),
4545
}
4646

tests/github_workflows/test_resolve_model_config.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -491,3 +491,13 @@ def test_models_importable_without_litellm():
491491
f"stderr: {result.stderr}"
492492
)
493493
assert "SUCCESS" in result.stdout
494+
495+
496+
def test_gpt_5_4_config():
497+
"""Test that gpt-5.4 has correct configuration."""
498+
model = MODELS["gpt-5.4"]
499+
500+
assert model["id"] == "gpt-5.4"
501+
assert model["display_name"] == "GPT-5.4"
502+
assert model["llm_config"]["model"] == "litellm_proxy/openai/gpt-5.4"
503+
assert model["llm_config"]["reasoning_effort"] == "high"

tests/integration/tests/c01_thinking_block_condenser.py

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,21 @@
11
"""
22
Integration test for thinking block handling during condensation.
33
4-
This test validates that Claude Opus's thinking blocks are properly handled
4+
This test validates that Anthropic Claude's thinking blocks are properly handled
55
during conversation condensation, preventing malformed signature errors that
66
can occur when thinking blocks are included in conversation history.
7+
8+
Note: This test only applies to models that support extended_thinking (Anthropic
9+
Claude models). Models with reasoning_effort (like OpenAI o-series and GPT-5.x)
10+
produce reasoning items instead of thinking blocks, and are skipped.
711
"""
812

913
from openhands.sdk import LLM, Message, TextContent, Tool
1014
from openhands.sdk.context.condenser.base import CondenserBase
1115
from openhands.sdk.context.view import View
1216
from openhands.sdk.conversation.impl.local_conversation import LocalConversation
1317
from openhands.sdk.event import ActionEvent, Condensation
18+
from openhands.sdk.llm.utils.model_features import get_features
1419
from openhands.sdk.tool import register_tool
1520
from openhands.tools.terminal import TerminalTool
1621
from tests.integration.base import BaseIntegrationTest, SkipTest, TestResult
@@ -135,16 +140,16 @@ def setup(self) -> None:
135140
"""
136141
Validate that the model supports extended thinking.
137142
138-
Thinking blocks are primarily supported by:
139-
- Anthropic Claude models (extended_thinking)
140-
- Some Gemini models (extended_thinking)
141-
- Some other models (reasoning_effort)
143+
Thinking blocks are specifically supported by Anthropic Claude models
144+
with extended_thinking enabled. Models that only support reasoning_effort
145+
(like OpenAI o-series and GPT-5.x) produce reasoning items instead of
146+
thinking blocks, so they should be skipped.
142147
"""
143148
model = self.llm_config.get("model", "")
149+
features = get_features(model)
144150

145-
# Check if model has extended thinking or reasoning effort configured
151+
# Check if model has extended thinking configured
146152
has_extended_thinking = self.llm_config.get("extended_thinking", False)
147-
has_reasoning_effort = "reasoning_effort" in self.llm_config
148153

149154
# For Claude Opus, automatically enable extended thinking if not set
150155
if "opus" in model.lower() and not has_extended_thinking:
@@ -154,11 +159,15 @@ def setup(self) -> None:
154159
**{**self.llm.model_dump(), **self.llm_config}
155160
)
156161
self.agent.llm = self.llm
162+
has_extended_thinking = True
157163

158-
# Skip test if model doesn't support thinking blocks
159-
if not has_extended_thinking and not has_reasoning_effort:
164+
# Skip test if model doesn't support extended thinking (which produces
165+
# thinking_blocks). Models that only support reasoning_effort produce
166+
# responses_reasoning_item instead, which is a different mechanism.
167+
if not has_extended_thinking and not features.supports_extended_thinking:
160168
raise SkipTest(
161-
f"Model {model} does not support extended thinking or reasoning effort"
169+
f"Model {model} does not support extended thinking "
170+
"(produces reasoning items instead of thinking blocks)"
162171
)
163172

164173
def conversation_callback(self, event):

tests/sdk/llm/test_model_features.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ def test_model_matches(name, pattern, expected):
3939
# GPT-5 family
4040
("gpt-5.2", True),
4141
("gpt-5.2-codex", True),
42+
("gpt-5.4", True),
4243
("gpt-4o", False),
4344
("claude-3-5-sonnet", False),
4445
("gemini-1.5-pro", False),

0 commit comments

Comments
 (0)