Improve prompt testing framework YAML handling and validation

mattgodbolt · claude · mattgodbolt · commit d273f1d0c9f1 · 2025-05-24T14:34:34.000-05:00
- Add validation requiring --output when using --create-improved flag - Create centralized yaml_utils module for consistent YAML formatting: - Literal block style ( < /dev/null | ) for multiline strings - 120 character line width - Preserves quotes, comments and formatting - Single dumper that works for all use cases - Update all YAML usage to use the centralized utilities - Add comprehensive tests for YAML round-tripping with comments and multiline strings - Simplify API by removing unnecessary roundtrip parameter All tests pass and YAML formatting is now consistent across the codebase. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
diff --git a/.idea/misc.xml b/.idea/misc.xml
diff --git a/prompt_testing/cli.py b/prompt_testing/cli.py
@@ -200,6 +200,11 @@ def cmd_analyze(args):
 def cmd_improve(args):
     """Analyze results and suggest prompt improvements."""
 
+    # Validate arguments
+    if args.create_improved and not args.output:
+        print("Error: --output is required when using --create-improved")
+        return 1
+
     optimizer = PromptOptimizer(args.project_root)
 
     # If specific results file provided
diff --git a/prompt_testing/enricher.py b/prompt_testing/enricher.py
@@ -4,9 +4,8 @@
 from pathlib import Path
 from typing import Any
 
-from ruamel.yaml import YAML
-
 from prompt_testing.ce_api import CompilationError, CompileRequest, CompilerExplorerClient
+from prompt_testing.yaml_utils import create_yaml_dumper
 
 
 class TestCaseEnricher:
@@ -102,11 +101,8 @@ def enrich_file(
         Returns:
             Path to enriched output file
         """
-        # Initialize YAML handler with round-trip mode to preserve formatting
-        yaml = YAML()
-        yaml.preserve_quotes = True
-        yaml.width = 120
-        yaml.default_flow_style = False
+        # Initialize YAML handler to preserve formatting
+        yaml = create_yaml_dumper()
 
         # Load input file
         with input_file.open(encoding="utf-8") as f:
diff --git a/prompt_testing/evaluation/prompt_advisor.py b/prompt_testing/evaluation/prompt_advisor.py
@@ -8,6 +8,8 @@
 
 from anthropic import Anthropic
 
+from prompt_testing.yaml_utils import create_yaml_dumper, load_yaml_file
+
 
 class PromptAdvisor:
     """Uses Claude to analyze test results and suggest prompt improvements."""
@@ -297,12 +299,7 @@ def analyze_and_improve(
 
         # Load current prompt
         prompt_path = self.prompts_dir / f"{prompt_version}.yaml"
-        from ruamel.yaml import YAML
-
-        yaml = YAML(typ="safe")
-
-        with prompt_path.open() as f:
-            current_prompt = yaml.load(f)
+        current_prompt = load_yaml_file(prompt_path)
 
         # Get improvement suggestions
         suggestions = self.advisor.analyze_results_and_suggest_improvements(
@@ -323,8 +320,7 @@ def analyze_and_improve(
             )
 
             new_prompt_path = self.prompts_dir / f"{output_name}.yaml"
-            yaml_out = YAML()
-            yaml_out.default_flow_style = False
+            yaml_out = create_yaml_dumper()
             with new_prompt_path.open("w") as f:
                 yaml_out.dump(new_prompt, f)
 
diff --git a/prompt_testing/evaluation/scorer.py b/prompt_testing/evaluation/scorer.py
@@ -5,13 +5,13 @@
 from pathlib import Path
 from typing import Any
 
-from ruamel.yaml import YAML
+from prompt_testing.yaml_utils import create_yaml_loader
 
 
 def load_test_case(file_path: str, case_id: str) -> dict[str, Any]:
     """Load a specific test case from a YAML file."""
     path = Path(file_path)
-    yaml = YAML(typ="safe")
+    yaml = create_yaml_loader()
     with path.open(encoding="utf-8") as f:
         data = yaml.load(f)
 
@@ -26,7 +26,7 @@ def load_all_test_cases(test_cases_dir: str) -> list[dict[str, Any]]:
     """Load all test cases from the test_cases directory."""
     all_cases = []
     test_dir = Path(test_cases_dir)
-    yaml = YAML(typ="safe")
+    yaml = create_yaml_loader()
 
     for file_path in test_dir.glob("*.yaml"):
         with file_path.open(encoding="utf-8") as f:
diff --git a/prompt_testing/runner.py b/prompt_testing/runner.py
@@ -10,14 +10,14 @@
 
 from anthropic import Anthropic
 from dotenv import load_dotenv
-from ruamel.yaml import YAML
 
 from app.explain import MAX_TOKENS, MODEL, prepare_structured_data
 from app.explain_api import AssemblyItem, ExplainRequest
 from app.explanation_types import AudienceLevel, ExplanationType
 from app.metrics import NoopMetricsProvider
 from prompt_testing.evaluation.claude_reviewer import ClaudeReviewer
 from prompt_testing.evaluation.scorer import load_all_test_cases
+from prompt_testing.yaml_utils import load_yaml_file
 
 # Load environment variables from .env file
 load_dotenv()
@@ -54,9 +54,7 @@ def load_prompt(self, prompt_version: str) -> dict[str, Any]:
         if not prompt_file.exists():
             raise FileNotFoundError(f"Prompt file not found: {prompt_file}")
 
-        yaml = YAML(typ="safe")
-        with prompt_file.open(encoding="utf-8") as f:
-            return yaml.load(f)
+        return load_yaml_file(prompt_file)
 
     def convert_test_case_to_request(self, test_case: dict[str, Any]) -> ExplainRequest:
         """Convert a test case to an ExplainRequest object."""
diff --git a/prompt_testing/test_yaml_utils.py b/prompt_testing/test_yaml_utils.py
@@ -0,0 +1,203 @@
+"""Tests for YAML utilities."""
+
+import io
+import tempfile
+from pathlib import Path
+
+import pytest
+from ruamel.yaml import YAMLError
+
+from prompt_testing.yaml_utils import create_yaml_dumper, create_yaml_loader, load_yaml_file, save_yaml_file
+
+
+class TestYAMLUtils:
+    """Test YAML utility functions."""
+
+    def test_multiline_string_formatting(self):
+        """Test that multiline strings are formatted with literal block style."""
+        yaml = create_yaml_dumper()
+
+        data = {
+            "single_line": "This is a single line",
+            "multiline": "This is line one\nThis is line two\nThis is line three",
+            "nested": {"another_multiline": "First line\nSecond line"},
+        }
+
+        # Dump to string
+        stream = io.StringIO()
+        yaml.dump(data, stream)
+        result = stream.getvalue()
+
+        # Check that multiline strings use literal block style
+        assert "multiline: |" in result
+        assert "another_multiline: |" in result
+        # Single line should not use block style
+        assert "single_line: |" not in result
+
+    def test_preserves_comments(self):
+        """Test that comments are preserved when loading and saving."""
+        # Create a YAML file with comments
+        yaml_content = """# This is a file comment
+name: test  # This is an inline comment
+# This is a comment before multiline
+description: |
+  This is a multiline
+  description with multiple lines
+
+# Section comment
+section:
+  key: value  # Another inline comment
+"""
+
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f:
+            f.write(yaml_content)
+            temp_path = Path(f.name)
+
+        try:
+            # Load with dumper
+            yaml = create_yaml_dumper()
+            with temp_path.open() as f:
+                data = yaml.load(f)
+
+            # Save it back
+            output = io.StringIO()
+            yaml.dump(data, output)
+            result = output.getvalue()
+
+            # Check that comments are preserved
+            assert "# This is a file comment" in result
+            assert "# This is an inline comment" in result
+            assert "# This is a comment before multiline" in result
+            assert "# Section comment" in result
+            assert "# Another inline comment" in result
+
+        finally:
+            temp_path.unlink()
+
+    def test_preserves_formatting(self):
+        """Test that original formatting is preserved when loading and saving."""
+        yaml_content = """name: "quoted string"
+unquoted: string
+number: 42
+multiline: |
+  Line 1
+  Line 2
+  Line 3
+list:
+  - item1
+  - item2
+  - item3
+"""
+
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f:
+            f.write(yaml_content)
+            temp_path = Path(f.name)
+
+        try:
+            # Load with dumper
+            yaml = create_yaml_dumper()
+            with temp_path.open() as f:
+                data = yaml.load(f)
+
+            # Save it back
+            output = io.StringIO()
+            yaml.dump(data, output)
+            result = output.getvalue()
+
+            # Check that formatting is preserved
+            assert '"quoted string"' in result  # Quotes preserved
+            assert "unquoted: string" in result  # No quotes added
+            assert "multiline: |" in result  # Block style preserved
+
+        finally:
+            temp_path.unlink()
+
+    def test_load_yaml_file(self):
+        """Test load_yaml_file function."""
+        yaml_content = """
+name: test
+items:
+  - one
+  - two
+  - three
+metadata:
+  version: 1.0
+  description: |
+    A test file
+    with multiple lines
+"""
+
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f:
+            f.write(yaml_content)
+            temp_path = Path(f.name)
+
+        try:
+            # Load the file
+            data = load_yaml_file(temp_path)
+
+            # Verify content
+            assert data["name"] == "test"
+            assert data["items"] == ["one", "two", "three"]
+            assert data["metadata"]["version"] == 1.0
+            assert "A test file\nwith multiple lines" in data["metadata"]["description"]
+
+        finally:
+            temp_path.unlink()
+
+    def test_save_yaml_file_with_multiline(self):
+        """Test save_yaml_file properly formats multiline strings."""
+        data = {
+            "title": "Test Document",
+            "content": "Line 1\nLine 2\nLine 3",
+            "sections": {
+                "intro": "Single line intro",
+                "body": "This is the body\nwith multiple paragraphs\nand line breaks",
+            },
+        }
+
+        with tempfile.TemporaryDirectory() as tmpdir:
+            output_path = Path(tmpdir) / "test.yaml"
+
+            # Save the file
+            save_yaml_file(output_path, data)
+
+            # Read it back as text to check formatting
+            content = output_path.read_text()
+
+            # Check multiline strings use block style
+            assert "content: |" in content
+            assert "body: |" in content
+            # Single line should not use block style
+            assert "title: |" not in content
+            assert "intro: |" not in content
+
+    def test_safe_loader_does_not_execute_code(self):
+        """Test that safe loader doesn't execute arbitrary code."""
+        # YAML with Python code that should not be executed
+        dangerous_yaml = """
+test: !!python/object/apply:os.system ['echo "danger"']
+"""
+
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f:
+            f.write(dangerous_yaml)
+            temp_path = Path(f.name)
+
+        try:
+            # This should raise an error, not execute the code
+            with pytest.raises(YAMLError):
+                load_yaml_file(temp_path)
+
+        finally:
+            temp_path.unlink()
+
+    def test_create_yaml_loader_is_safe(self):
+        """Test that create_yaml_loader returns a safe YAML instance."""
+        yaml = create_yaml_loader()
+
+        # Should not be able to load Python objects
+        dangerous_yaml = "test: !!python/object/apply:os.system ['echo danger']"
+
+        stream = io.StringIO(dangerous_yaml)
+
+        with pytest.raises(YAMLError):
+            yaml.load(stream)
diff --git a/prompt_testing/yaml_utils.py b/prompt_testing/yaml_utils.py