Fix code quality issues in haiku implementation

mattgodbolt · claude · mattgodbolt · commit 3c568864c484 · 2025-08-24T09:29:41.000-05:00
- Remove duplication between get_audience_metadata methods - implement instance method in terms of class method - Remove overly complex get_all_audience_locations method, add comment about future needs - Remove hardcoded haiku-specific evaluation criteria - use same criteria for all explanation types - Remove magic string detection for haiku targeting - explanation types should be explicitly specified - Simplify prompt_advisor.py to directly check both audience locations without complex abstractions - Fix test to check audience information in user prompt where it actually appears - Use .values() instead of .items() when we don't need the keys 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
diff --git a/app/prompt.py b/app/prompt.py
@@ -48,12 +48,7 @@ def __init__(self, config: dict[str, Any] | Path):
 
     def get_audience_metadata(self, audience: str, for_explanation: str | None = None) -> dict[str, str]:
         """Get metadata for an audience level (and optionally an explanation type)."""
-        audience_metadata = self.audience_levels[audience]
-        if for_explanation and (
-            explanation_audience := self.explanation_types[for_explanation].get("audience_levels", {}).get(audience)
-        ):
-            audience_metadata = {**audience_metadata, **explanation_audience}
-        return audience_metadata
+        return self.get_audience_metadata_from_dict(self.config, audience, for_explanation)
 
     def get_explanation_metadata(self, explanation: str) -> dict[str, str]:
         """Get metadata for an explanation type."""
@@ -93,30 +88,9 @@ def has_audience_override(cls, prompt_dict: dict[str, Any], explanation: str, au
             and audience in prompt_dict["explanation_types"][explanation]["audience_levels"]
         )
 
-    @classmethod
-    def get_all_audience_locations(cls, prompt_dict: dict[str, Any], audience: str) -> list[tuple[str, ...]]:
-        """Get all locations in the prompt dict where audience guidance might be found.
-
-        Returns a list of key paths as tuples, e.g.:
-        [("audience_levels", "beginner"), ("explanation_types", "haiku", "audience_levels", "beginner")]
-        """
-        locations = []
-
-        # Base audience location
-        if "audience_levels" in prompt_dict and audience in prompt_dict["audience_levels"]:
-            locations.append(("audience_levels", audience))
-
-        # Explanation-specific audience overrides
-        if "explanation_types" in prompt_dict:
-            for exp_type, exp_config in prompt_dict["explanation_types"].items():
-                if (
-                    isinstance(exp_config, dict)
-                    and "audience_levels" in exp_config
-                    and audience in exp_config["audience_levels"]
-                ):
-                    locations.append(("explanation_types", exp_type, "audience_levels", audience))
-
-        return locations
+    # Note: In the future, prompt_advisor may need the ability to create new
+    # explanation-specific audience overrides (like we did manually for haiku).
+    # This would involve adding new audience_levels sections within explanation_types.
 
     def select_important_assembly(
         self, asm_array: list[dict], label_definitions: dict, max_lines: int = MAX_ASSEMBLY_LINES
diff --git a/app/test_explain.py b/app/test_explain.py
@@ -116,11 +116,15 @@ async def test_process_request_success(self, sample_request, mock_anthropic_clie
 
         # Verify the system prompt contains appropriate instructions
         system_prompt = kwargs["system"]
-        assert "beginner" in system_prompt.lower()
         assert "assembly" in system_prompt.lower()
         assert "c++" in system_prompt.lower()
         assert "amd64" in system_prompt.lower()
 
+        # Check that audience information is in the user prompt (messages)
+        messages = kwargs["messages"]
+        user_message = messages[0]["content"][0]["text"]
+        assert "beginner" in user_message.lower()
+
         # Check that the messages array contains user and assistant messages
         messages = kwargs["messages"]
         assert len(messages) == 2
diff --git a/prompt_testing/evaluation/claude_reviewer.py b/prompt_testing/evaluation/claude_reviewer.py
@@ -84,60 +84,6 @@ class ReviewCriteria:
     - Content matches the requested explanation type
     """
 
-    def get_haiku_criteria(self) -> dict[str, str]:
-        """Get haiku-specific evaluation criteria."""
-        return {
-            "accuracy": """
-            Evaluate technical accuracy (0-100):
-            - Does the haiku capture the actual behavior of the code?
-            - No false claims about what the code does
-            - Correctly identifies key actions (recursion, loops, data manipulation)
-            - Avoids technical inaccuracies even in poetic form
-            """,
-            "relevance": """
-            Evaluate relevance to the actual code (0-100):
-            - Captures the essence of THIS specific code's behavior
-            - Reflects key patterns (recursion, optimization, simplicity)
-            - Not generic poetry that could apply to any code
-            - Connects to the actual assembly operations
-            """,
-            "conciseness": """
-            Evaluate haiku format adherence (0-100):
-            - Exactly three lines
-            - Appropriate syllable structure (approximately 5-7-5)
-            - No extra text beyond the haiku
-            - Each line contributes meaningfully
-            """,
-            "insight": """
-            Evaluate poetic insight and imagery (0-100):
-            - Uses vivid, concrete imagery
-            - Captures deeper meaning of the code's purpose
-            - Creative metaphors that illuminate the code's nature
-            - Poetic language that enhances understanding
-            """,
-            "appropriateness": """
-            Evaluate haiku quality and appropriateness (0-100):
-            - Maintains the spirit and form of traditional haiku
-            - Balances technical accuracy with poetic expression
-            - Language is evocative and memorable
-            - Successfully bridges code analysis and poetry
-            """,
-        }
-
-    def get_criteria_for_type(self, explanation_type: ExplanationType) -> dict[str, str]:
-        """Get criteria appropriate for the explanation type."""
-        if explanation_type == ExplanationType.HAIKU:
-            return self.get_haiku_criteria()
-
-        # Default assembly criteria
-        return {
-            "accuracy": self.accuracy,
-            "relevance": self.relevance,
-            "conciseness": self.conciseness,
-            "insight": self.insight,
-            "appropriateness": self.appropriateness,
-        }
-
 
 _AUDIENCE_LEVEL = {
     AudienceLevel.BEGINNER: """The explanation should be aimed at beginners.
@@ -182,8 +128,14 @@ def _build_evaluation_prompt(
     ) -> str:
         """Build the evaluation prompt for Claude."""
 
-        # Get type-specific criteria
-        criteria = self.criteria.get_criteria_for_type(explanation_type)
+        # Use the same criteria for all explanation types
+        criteria = {
+            "accuracy": self.criteria.accuracy,
+            "relevance": self.criteria.relevance,
+            "conciseness": self.criteria.conciseness,
+            "insight": self.criteria.insight,
+            "appropriateness": self.criteria.appropriateness,
+        }
 
         prompt = f"""You are an expert in compiler technology and technical education.
 Your task is to evaluate an AI-generated explanation of Compiler Explorer's output using our metrics.
diff --git a/prompt_testing/evaluation/prompt_advisor.py b/prompt_testing/evaluation/prompt_advisor.py
@@ -8,7 +8,6 @@
 
 from anthropic import Anthropic
 
-from app.prompt import Prompt
 from prompt_testing.evaluation.reviewer import HumanReview, ReviewManager
 from prompt_testing.yaml_utils import create_yaml_dumper, load_yaml_file
 
@@ -603,10 +602,6 @@ def _classify_suggestion_target(self, suggestion_text: str) -> dict[str, list[st
         ):
             targets["audiences"].append("expert")
 
-        # Haiku-specific suggestions
-        if any(term in suggestion_lower for term in ["haiku", "poetry", "imagery", "vivid", "concise", "three-line"]):
-            targets["explanation_types"].append("haiku")
-
         # Remove duplicates
         targets["audiences"] = list(set(targets["audiences"]))
         targets["explanation_types"] = list(set(targets["explanation_types"]))
@@ -627,26 +622,31 @@ def _apply_targeted_improvement(self, new_prompt: dict[str, Any], improvement: d
         ) and "system_prompt" in new_prompt:
             new_prompt["system_prompt"] = new_prompt["system_prompt"].replace(current_text, suggested_text)
 
-        # Apply to specific audience levels (including nested ones in explanation types)
+        # Apply to specific audience levels (check both base and explanation-specific locations)
+        # TODO: In the future, we may need to create new explanation-specific audience overrides
         if targets["audiences"]:
             for audience in targets["audiences"]:
-                # Get all locations where this audience guidance might be stored
-                audience_locations = Prompt.get_all_audience_locations(new_prompt, audience)
-
-                for location_path in audience_locations:
-                    # Navigate to the guidance using the path
-                    current_section = new_prompt
-                    for key in location_path:
-                        if key in current_section:
-                            current_section = current_section[key]
-                        else:
-                            current_section = None
-                            break
-
-                    if current_section and isinstance(current_section, dict):
-                        guidance = current_section.get("guidance", "")
-                        if current_text in guidance:
-                            current_section["guidance"] = guidance.replace(current_text, suggested_text)
+                # Check base audience level
+                if "audience_levels" in new_prompt and audience in new_prompt["audience_levels"]:
+                    guidance = new_prompt["audience_levels"][audience].get("guidance", "")
+                    if current_text in guidance:
+                        new_prompt["audience_levels"][audience]["guidance"] = guidance.replace(
+                            current_text, suggested_text
+                        )
+
+                # Check explanation-specific audience overrides
+                if "explanation_types" in new_prompt:
+                    for exp_config in new_prompt["explanation_types"].values():
+                        if (
+                            isinstance(exp_config, dict)
+                            and "audience_levels" in exp_config
+                            and audience in exp_config["audience_levels"]
+                        ):
+                            guidance = exp_config["audience_levels"][audience].get("guidance", "")
+                            if current_text in guidance:
+                                exp_config["audience_levels"][audience]["guidance"] = guidance.replace(
+                                    current_text, suggested_text
+                                )
 
         # Apply to specific explanation types
         if targets["explanation_types"] and "explanation_types" in new_prompt:
@@ -664,26 +664,30 @@ def _apply_targeted_additions(self, new_prompt: dict[str, Any], additions: list[
             targets = self._classify_suggestion_target(addition)
             applied = False
 
-            # Apply to specific audience levels (including nested ones in explanation types)
+            # Apply to specific audience levels (check both base and explanation-specific locations)
             if targets["audiences"]:
                 for audience in targets["audiences"]:
-                    # Get all locations where this audience guidance might be stored
-                    audience_locations = Prompt.get_all_audience_locations(new_prompt, audience)
-
-                    for location_path in audience_locations:
-                        # Navigate to the guidance using the path
-                        current_section = new_prompt
-                        for key in location_path:
-                            if key in current_section:
-                                current_section = current_section[key]
-                            else:
-                                current_section = None
-                                break
-
-                        if current_section and isinstance(current_section, dict):
-                            current_guidance = current_section.get("guidance", "")
-                            current_section["guidance"] = current_guidance.rstrip() + f"\n{addition}\n"
-                            applied = True
+                    # Check base audience level
+                    if "audience_levels" in new_prompt and audience in new_prompt["audience_levels"]:
+                        current_guidance = new_prompt["audience_levels"][audience].get("guidance", "")
+                        new_prompt["audience_levels"][audience]["guidance"] = (
+                            current_guidance.rstrip() + f"\n{addition}\n"
+                        )
+                        applied = True
+
+                    # Check explanation-specific audience overrides
+                    if "explanation_types" in new_prompt:
+                        for exp_config in new_prompt["explanation_types"].values():
+                            if (
+                                isinstance(exp_config, dict)
+                                and "audience_levels" in exp_config
+                                and audience in exp_config["audience_levels"]
+                            ):
+                                current_guidance = exp_config["audience_levels"][audience].get("guidance", "")
+                                exp_config["audience_levels"][audience]["guidance"] = (
+                                    current_guidance.rstrip() + f"\n{addition}\n"
+                                )
+                                applied = True
 
             # Apply to specific explanation types
             if targets["explanation_types"] and "explanation_types" in new_prompt: