Skip to content

Commit 71e0f5c

Browse files
Fix structured LLM response handling, update eval model name
Extract text content from thinking+text block responses (gpt-oss on Groq returns structured blocks instead of plain strings). Add extract_text_content utility used by all agents. Update frontend model display to show GPT-OSS-120B for evaluation.
1 parent a740b44 commit 71e0f5c

File tree

7 files changed

+38
-13
lines changed

7 files changed

+38
-13
lines changed

frontend/index.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1235,7 +1235,7 @@ <h3>Status</h3>
12351235
<a href="https://docs.annotation.garden/projects/hedit/telemetry" target="_blank" rel="noopener noreferrer" title="Learn more about telemetry">Learn more</a>
12361236
</div>
12371237
<div class="model-info">
1238-
<span>Models: <a href="https://openrouter.ai/anthropic/claude-haiku-4.5" target="_blank" rel="noopener noreferrer">Claude Haiku 4.5</a> (annotation) · <a href="https://openrouter.ai/qwen/qwen3-vl-30b-a3b-instruct" target="_blank" rel="noopener noreferrer">Qwen3-VL-30B</a> (vision) · <a href="https://openrouter.ai/qwen/qwen3-235b-a22b-2507" target="_blank" rel="noopener noreferrer">Qwen3-235B</a> (evaluation) · To change models or use programmatically, install <a href="https://pypi.org/project/hedit/" target="_blank" rel="noopener noreferrer">HEDit CLI</a></span>
1238+
<span>Models: <a href="https://openrouter.ai/anthropic/claude-haiku-4.5" target="_blank" rel="noopener noreferrer">Claude Haiku 4.5</a> (annotation) · <a href="https://openrouter.ai/qwen/qwen3-vl-30b-a3b-instruct" target="_blank" rel="noopener noreferrer">Qwen3-VL-30B</a> (vision) · <a href="https://openrouter.ai/openai/gpt-oss-120b" target="_blank" rel="noopener noreferrer">GPT-OSS-120B</a> (evaluation) · To change models or use programmatically, install <a href="https://pypi.org/project/hedit/" target="_blank" rel="noopener noreferrer">HEDit CLI</a></span>
12391239
</div>
12401240

12411241
<script src="config.js"></script>

src/agents/annotation_agent.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from langchain_core.messages import HumanMessage, SystemMessage
1212

1313
from src.agents.state import HedAnnotationState
14+
from src.utils import extract_text_content
1415
from src.utils.hed_comprehensive_guide import get_comprehensive_hed_guide
1516
from src.utils.json_schema_loader import HedJsonSchemaLoader, load_latest_schema
1617

@@ -212,8 +213,7 @@ async def annotate(self, state: HedAnnotationState) -> dict:
212213
except Exception as e:
213214
logger.error("LLM invocation failed: %s", e, exc_info=True)
214215
raise
215-
content = response.content
216-
raw_annotation = content.strip() if isinstance(content, str) else str(content)
216+
raw_annotation = extract_text_content(response.content)
217217

218218
# Clean up LLM output - extract just the HED annotation
219219
annotation = self._extract_hed_annotation(raw_annotation)

src/agents/assessment_agent.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from langchain_core.messages import HumanMessage, SystemMessage
1212

1313
from src.agents.state import HedAnnotationState
14+
from src.utils import extract_text_content
1415

1516
logger = logging.getLogger(__name__)
1617

@@ -112,8 +113,7 @@ async def assess(self, state: HedAnnotationState) -> dict:
112113
except Exception as e:
113114
logger.error("Assessment LLM invocation failed: %s", e, exc_info=True)
114115
raise
115-
content = response.content
116-
feedback = content.strip() if isinstance(content, str) else str(content)
116+
feedback = extract_text_content(response.content)
117117

118118
# Parse completion status from assessment feedback
119119
# Format is "COMPLETENESS: complete" and "STATUS: COMPLETE"

src/agents/evaluation_agent.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
from langchain_core.messages import HumanMessage, SystemMessage
1313

1414
from src.agents.state import HedAnnotationState
15+
from src.utils import extract_text_content
1516
from src.utils.json_schema_loader import HedJsonSchemaLoader, load_latest_schema
1617

1718
logger = logging.getLogger(__name__)
@@ -172,8 +173,7 @@ async def evaluate(self, state: HedAnnotationState) -> dict:
172173
except Exception as e:
173174
logger.error("Evaluation LLM invocation failed: %s", e, exc_info=True)
174175
raise
175-
content = response.content
176-
feedback = content.strip() if isinstance(content, str) else str(content)
176+
feedback = extract_text_content(response.content)
177177

178178
# Parse decision with multiple fallbacks
179179
is_faithful = self._parse_decision(feedback)

src/agents/feedback_summarizer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from langchain_core.messages import HumanMessage, SystemMessage
1111

1212
from src.agents.state import HedAnnotationState
13+
from src.utils import extract_text_content
1314

1415
logger = logging.getLogger(__name__)
1516

@@ -121,8 +122,7 @@ async def summarize(self, state: HedAnnotationState) -> dict:
121122
except Exception as e:
122123
logger.error("Feedback summarization LLM invocation failed: %s", e, exc_info=True)
123124
raise
124-
content = response.content
125-
summarized_feedback = content.strip() if isinstance(content, str) else str(content)
125+
summarized_feedback = extract_text_content(response.content)
126126

127127
# Replace verbose feedback with summary (only augmented fields for LLM, not raw for users)
128128
return {

src/agents/vision_agent.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from langchain_core.language_models import BaseChatModel
88
from langchain_core.messages import HumanMessage
99

10+
from src.utils import extract_text_content
1011
from src.utils.image_processing import prepare_image_for_vision_model
1112

1213
DEFAULT_VISION_PROMPT = """Describe what you see in this image. Include the setting, main elements, colors, lighting, and overall composition. Be specific and detailed. Form the response as a continuous paragraph. Maximum 200 words."""
@@ -70,8 +71,7 @@ async def describe_image(
7071

7172
# Generate description
7273
response = await self.llm.ainvoke([message])
73-
content = response.content
74-
description = content.strip() if isinstance(content, str) else str(content)
74+
description = extract_text_content(response.content)
7575

7676
return {
7777
"description": description,
@@ -115,8 +115,7 @@ def describe_image_sync(
115115

116116
# Generate description
117117
response = self.llm.invoke([message])
118-
content = response.content
119-
description = content.strip() if isinstance(content, str) else str(content)
118+
description = extract_text_content(response.content)
120119

121120
return {
122121
"description": description,

src/utils/__init__.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,27 @@
11
"""Utility functions."""
2+
3+
4+
def extract_text_content(content: object) -> str:
5+
"""Extract text from an LLM response content field.
6+
7+
Some models (e.g. gpt-oss on Groq) return structured blocks including
8+
thinking and text parts instead of a plain string. This helper extracts
9+
only the text parts and joins them.
10+
11+
Args:
12+
content: The ``response.content`` value, either a plain string or a
13+
list of dicts with ``type`` and ``text``/``thinking`` keys.
14+
15+
Returns:
16+
The concatenated text content, stripped of whitespace.
17+
"""
18+
if isinstance(content, str):
19+
return content.strip()
20+
if isinstance(content, list):
21+
parts: list[str] = []
22+
for block in content:
23+
if isinstance(block, dict) and block.get("type") == "text":
24+
parts.append(block.get("text", ""))
25+
if parts:
26+
return "\n".join(parts).strip()
27+
return str(content).strip()

0 commit comments

Comments
 (0)