Skip to content

Commit 3c56886

Browse files
mattgodboltclaude
andcommitted
Fix code quality issues in haiku implementation
- Remove duplication between get_audience_metadata methods - implement instance method in terms of class method - Remove overly complex get_all_audience_locations method, add comment about future needs - Remove hardcoded haiku-specific evaluation criteria - use same criteria for all explanation types - Remove magic string detection for haiku targeting - explanation types should be explicitly specified - Simplify prompt_advisor.py to directly check both audience locations without complex abstractions - Fix test to check audience information in user prompt where it actually appears - Use .values() instead of .items() when we don't need the keys 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 51271ee commit 3c56886

File tree

4 files changed

+62
-128
lines changed

4 files changed

+62
-128
lines changed

app/prompt.py

Lines changed: 4 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -48,12 +48,7 @@ def __init__(self, config: dict[str, Any] | Path):
4848

4949
def get_audience_metadata(self, audience: str, for_explanation: str | None = None) -> dict[str, str]:
5050
"""Get metadata for an audience level (and optionally an explanation type)."""
51-
audience_metadata = self.audience_levels[audience]
52-
if for_explanation and (
53-
explanation_audience := self.explanation_types[for_explanation].get("audience_levels", {}).get(audience)
54-
):
55-
audience_metadata = {**audience_metadata, **explanation_audience}
56-
return audience_metadata
51+
return self.get_audience_metadata_from_dict(self.config, audience, for_explanation)
5752

5853
def get_explanation_metadata(self, explanation: str) -> dict[str, str]:
5954
"""Get metadata for an explanation type."""
@@ -93,30 +88,9 @@ def has_audience_override(cls, prompt_dict: dict[str, Any], explanation: str, au
9388
and audience in prompt_dict["explanation_types"][explanation]["audience_levels"]
9489
)
9590

96-
@classmethod
97-
def get_all_audience_locations(cls, prompt_dict: dict[str, Any], audience: str) -> list[tuple[str, ...]]:
98-
"""Get all locations in the prompt dict where audience guidance might be found.
99-
100-
Returns a list of key paths as tuples, e.g.:
101-
[("audience_levels", "beginner"), ("explanation_types", "haiku", "audience_levels", "beginner")]
102-
"""
103-
locations = []
104-
105-
# Base audience location
106-
if "audience_levels" in prompt_dict and audience in prompt_dict["audience_levels"]:
107-
locations.append(("audience_levels", audience))
108-
109-
# Explanation-specific audience overrides
110-
if "explanation_types" in prompt_dict:
111-
for exp_type, exp_config in prompt_dict["explanation_types"].items():
112-
if (
113-
isinstance(exp_config, dict)
114-
and "audience_levels" in exp_config
115-
and audience in exp_config["audience_levels"]
116-
):
117-
locations.append(("explanation_types", exp_type, "audience_levels", audience))
118-
119-
return locations
91+
# Note: In the future, prompt_advisor may need the ability to create new
92+
# explanation-specific audience overrides (like we did manually for haiku).
93+
# This would involve adding new audience_levels sections within explanation_types.
12094

12195
def select_important_assembly(
12296
self, asm_array: list[dict], label_definitions: dict, max_lines: int = MAX_ASSEMBLY_LINES

app/test_explain.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,11 +116,15 @@ async def test_process_request_success(self, sample_request, mock_anthropic_clie
116116

117117
# Verify the system prompt contains appropriate instructions
118118
system_prompt = kwargs["system"]
119-
assert "beginner" in system_prompt.lower()
120119
assert "assembly" in system_prompt.lower()
121120
assert "c++" in system_prompt.lower()
122121
assert "amd64" in system_prompt.lower()
123122

123+
# Check that audience information is in the user prompt (messages)
124+
messages = kwargs["messages"]
125+
user_message = messages[0]["content"][0]["text"]
126+
assert "beginner" in user_message.lower()
127+
124128
# Check that the messages array contains user and assistant messages
125129
messages = kwargs["messages"]
126130
assert len(messages) == 2

prompt_testing/evaluation/claude_reviewer.py

Lines changed: 8 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -84,60 +84,6 @@ class ReviewCriteria:
8484
- Content matches the requested explanation type
8585
"""
8686

87-
def get_haiku_criteria(self) -> dict[str, str]:
88-
"""Get haiku-specific evaluation criteria."""
89-
return {
90-
"accuracy": """
91-
Evaluate technical accuracy (0-100):
92-
- Does the haiku capture the actual behavior of the code?
93-
- No false claims about what the code does
94-
- Correctly identifies key actions (recursion, loops, data manipulation)
95-
- Avoids technical inaccuracies even in poetic form
96-
""",
97-
"relevance": """
98-
Evaluate relevance to the actual code (0-100):
99-
- Captures the essence of THIS specific code's behavior
100-
- Reflects key patterns (recursion, optimization, simplicity)
101-
- Not generic poetry that could apply to any code
102-
- Connects to the actual assembly operations
103-
""",
104-
"conciseness": """
105-
Evaluate haiku format adherence (0-100):
106-
- Exactly three lines
107-
- Appropriate syllable structure (approximately 5-7-5)
108-
- No extra text beyond the haiku
109-
- Each line contributes meaningfully
110-
""",
111-
"insight": """
112-
Evaluate poetic insight and imagery (0-100):
113-
- Uses vivid, concrete imagery
114-
- Captures deeper meaning of the code's purpose
115-
- Creative metaphors that illuminate the code's nature
116-
- Poetic language that enhances understanding
117-
""",
118-
"appropriateness": """
119-
Evaluate haiku quality and appropriateness (0-100):
120-
- Maintains the spirit and form of traditional haiku
121-
- Balances technical accuracy with poetic expression
122-
- Language is evocative and memorable
123-
- Successfully bridges code analysis and poetry
124-
""",
125-
}
126-
127-
def get_criteria_for_type(self, explanation_type: ExplanationType) -> dict[str, str]:
128-
"""Get criteria appropriate for the explanation type."""
129-
if explanation_type == ExplanationType.HAIKU:
130-
return self.get_haiku_criteria()
131-
132-
# Default assembly criteria
133-
return {
134-
"accuracy": self.accuracy,
135-
"relevance": self.relevance,
136-
"conciseness": self.conciseness,
137-
"insight": self.insight,
138-
"appropriateness": self.appropriateness,
139-
}
140-
14187

14288
_AUDIENCE_LEVEL = {
14389
AudienceLevel.BEGINNER: """The explanation should be aimed at beginners.
@@ -182,8 +128,14 @@ def _build_evaluation_prompt(
182128
) -> str:
183129
"""Build the evaluation prompt for Claude."""
184130

185-
# Get type-specific criteria
186-
criteria = self.criteria.get_criteria_for_type(explanation_type)
131+
# Use the same criteria for all explanation types
132+
criteria = {
133+
"accuracy": self.criteria.accuracy,
134+
"relevance": self.criteria.relevance,
135+
"conciseness": self.criteria.conciseness,
136+
"insight": self.criteria.insight,
137+
"appropriateness": self.criteria.appropriateness,
138+
}
187139

188140
prompt = f"""You are an expert in compiler technology and technical education.
189141
Your task is to evaluate an AI-generated explanation of Compiler Explorer's output using our metrics.

prompt_testing/evaluation/prompt_advisor.py

Lines changed: 45 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88

99
from anthropic import Anthropic
1010

11-
from app.prompt import Prompt
1211
from prompt_testing.evaluation.reviewer import HumanReview, ReviewManager
1312
from prompt_testing.yaml_utils import create_yaml_dumper, load_yaml_file
1413

@@ -603,10 +602,6 @@ def _classify_suggestion_target(self, suggestion_text: str) -> dict[str, list[st
603602
):
604603
targets["audiences"].append("expert")
605604

606-
# Haiku-specific suggestions
607-
if any(term in suggestion_lower for term in ["haiku", "poetry", "imagery", "vivid", "concise", "three-line"]):
608-
targets["explanation_types"].append("haiku")
609-
610605
# Remove duplicates
611606
targets["audiences"] = list(set(targets["audiences"]))
612607
targets["explanation_types"] = list(set(targets["explanation_types"]))
@@ -627,26 +622,31 @@ def _apply_targeted_improvement(self, new_prompt: dict[str, Any], improvement: d
627622
) and "system_prompt" in new_prompt:
628623
new_prompt["system_prompt"] = new_prompt["system_prompt"].replace(current_text, suggested_text)
629624

630-
# Apply to specific audience levels (including nested ones in explanation types)
625+
# Apply to specific audience levels (check both base and explanation-specific locations)
626+
# TODO: In the future, we may need to create new explanation-specific audience overrides
631627
if targets["audiences"]:
632628
for audience in targets["audiences"]:
633-
# Get all locations where this audience guidance might be stored
634-
audience_locations = Prompt.get_all_audience_locations(new_prompt, audience)
635-
636-
for location_path in audience_locations:
637-
# Navigate to the guidance using the path
638-
current_section = new_prompt
639-
for key in location_path:
640-
if key in current_section:
641-
current_section = current_section[key]
642-
else:
643-
current_section = None
644-
break
645-
646-
if current_section and isinstance(current_section, dict):
647-
guidance = current_section.get("guidance", "")
648-
if current_text in guidance:
649-
current_section["guidance"] = guidance.replace(current_text, suggested_text)
629+
# Check base audience level
630+
if "audience_levels" in new_prompt and audience in new_prompt["audience_levels"]:
631+
guidance = new_prompt["audience_levels"][audience].get("guidance", "")
632+
if current_text in guidance:
633+
new_prompt["audience_levels"][audience]["guidance"] = guidance.replace(
634+
current_text, suggested_text
635+
)
636+
637+
# Check explanation-specific audience overrides
638+
if "explanation_types" in new_prompt:
639+
for exp_config in new_prompt["explanation_types"].values():
640+
if (
641+
isinstance(exp_config, dict)
642+
and "audience_levels" in exp_config
643+
and audience in exp_config["audience_levels"]
644+
):
645+
guidance = exp_config["audience_levels"][audience].get("guidance", "")
646+
if current_text in guidance:
647+
exp_config["audience_levels"][audience]["guidance"] = guidance.replace(
648+
current_text, suggested_text
649+
)
650650

651651
# Apply to specific explanation types
652652
if targets["explanation_types"] and "explanation_types" in new_prompt:
@@ -664,26 +664,30 @@ def _apply_targeted_additions(self, new_prompt: dict[str, Any], additions: list[
664664
targets = self._classify_suggestion_target(addition)
665665
applied = False
666666

667-
# Apply to specific audience levels (including nested ones in explanation types)
667+
# Apply to specific audience levels (check both base and explanation-specific locations)
668668
if targets["audiences"]:
669669
for audience in targets["audiences"]:
670-
# Get all locations where this audience guidance might be stored
671-
audience_locations = Prompt.get_all_audience_locations(new_prompt, audience)
672-
673-
for location_path in audience_locations:
674-
# Navigate to the guidance using the path
675-
current_section = new_prompt
676-
for key in location_path:
677-
if key in current_section:
678-
current_section = current_section[key]
679-
else:
680-
current_section = None
681-
break
682-
683-
if current_section and isinstance(current_section, dict):
684-
current_guidance = current_section.get("guidance", "")
685-
current_section["guidance"] = current_guidance.rstrip() + f"\n{addition}\n"
686-
applied = True
670+
# Check base audience level
671+
if "audience_levels" in new_prompt and audience in new_prompt["audience_levels"]:
672+
current_guidance = new_prompt["audience_levels"][audience].get("guidance", "")
673+
new_prompt["audience_levels"][audience]["guidance"] = (
674+
current_guidance.rstrip() + f"\n{addition}\n"
675+
)
676+
applied = True
677+
678+
# Check explanation-specific audience overrides
679+
if "explanation_types" in new_prompt:
680+
for exp_config in new_prompt["explanation_types"].values():
681+
if (
682+
isinstance(exp_config, dict)
683+
and "audience_levels" in exp_config
684+
and audience in exp_config["audience_levels"]
685+
):
686+
current_guidance = exp_config["audience_levels"][audience].get("guidance", "")
687+
exp_config["audience_levels"][audience]["guidance"] = (
688+
current_guidance.rstrip() + f"\n{addition}\n"
689+
)
690+
applied = True
687691

688692
# Apply to specific explanation types
689693
if targets["explanation_types"] and "explanation_types" in new_prompt:

0 commit comments

Comments
 (0)