Skip to content

Commit d273f1d

Browse files
mattgodboltclaude
andcommitted
Improve prompt testing framework YAML handling and validation
- Add validation requiring --output when using --create-improved flag - Create centralized yaml_utils module for consistent YAML formatting: - Literal block style ( < /dev/null | ) for multiline strings - 120 character line width - Preserves quotes, comments and formatting - Single dumper that works for all use cases - Update all YAML usage to use the centralized utilities - Add comprehensive tests for YAML round-tripping with comments and multiline strings - Simplify API by removing unnecessary roundtrip parameter All tests pass and YAML formatting is now consistent across the codebase. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 32d4633 commit d273f1d

File tree

8 files changed

+296
-22
lines changed

8 files changed

+296
-22
lines changed

.idea/misc.xml

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

prompt_testing/cli.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,11 @@ def cmd_analyze(args):
200200
def cmd_improve(args):
201201
"""Analyze results and suggest prompt improvements."""
202202

203+
# Validate arguments
204+
if args.create_improved and not args.output:
205+
print("Error: --output is required when using --create-improved")
206+
return 1
207+
203208
optimizer = PromptOptimizer(args.project_root)
204209

205210
# If specific results file provided

prompt_testing/enricher.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,8 @@
44
from pathlib import Path
55
from typing import Any
66

7-
from ruamel.yaml import YAML
8-
97
from prompt_testing.ce_api import CompilationError, CompileRequest, CompilerExplorerClient
8+
from prompt_testing.yaml_utils import create_yaml_dumper
109

1110

1211
class TestCaseEnricher:
@@ -102,11 +101,8 @@ def enrich_file(
102101
Returns:
103102
Path to enriched output file
104103
"""
105-
# Initialize YAML handler with round-trip mode to preserve formatting
106-
yaml = YAML()
107-
yaml.preserve_quotes = True
108-
yaml.width = 120
109-
yaml.default_flow_style = False
104+
# Initialize YAML handler to preserve formatting
105+
yaml = create_yaml_dumper()
110106

111107
# Load input file
112108
with input_file.open(encoding="utf-8") as f:

prompt_testing/evaluation/prompt_advisor.py

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88

99
from anthropic import Anthropic
1010

11+
from prompt_testing.yaml_utils import create_yaml_dumper, load_yaml_file
12+
1113

1214
class PromptAdvisor:
1315
"""Uses Claude to analyze test results and suggest prompt improvements."""
@@ -297,12 +299,7 @@ def analyze_and_improve(
297299

298300
# Load current prompt
299301
prompt_path = self.prompts_dir / f"{prompt_version}.yaml"
300-
from ruamel.yaml import YAML
301-
302-
yaml = YAML(typ="safe")
303-
304-
with prompt_path.open() as f:
305-
current_prompt = yaml.load(f)
302+
current_prompt = load_yaml_file(prompt_path)
306303

307304
# Get improvement suggestions
308305
suggestions = self.advisor.analyze_results_and_suggest_improvements(
@@ -323,8 +320,7 @@ def analyze_and_improve(
323320
)
324321

325322
new_prompt_path = self.prompts_dir / f"{output_name}.yaml"
326-
yaml_out = YAML()
327-
yaml_out.default_flow_style = False
323+
yaml_out = create_yaml_dumper()
328324
with new_prompt_path.open("w") as f:
329325
yaml_out.dump(new_prompt, f)
330326

prompt_testing/evaluation/scorer.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,13 @@
55
from pathlib import Path
66
from typing import Any
77

8-
from ruamel.yaml import YAML
8+
from prompt_testing.yaml_utils import create_yaml_loader
99

1010

1111
def load_test_case(file_path: str, case_id: str) -> dict[str, Any]:
1212
"""Load a specific test case from a YAML file."""
1313
path = Path(file_path)
14-
yaml = YAML(typ="safe")
14+
yaml = create_yaml_loader()
1515
with path.open(encoding="utf-8") as f:
1616
data = yaml.load(f)
1717

@@ -26,7 +26,7 @@ def load_all_test_cases(test_cases_dir: str) -> list[dict[str, Any]]:
2626
"""Load all test cases from the test_cases directory."""
2727
all_cases = []
2828
test_dir = Path(test_cases_dir)
29-
yaml = YAML(typ="safe")
29+
yaml = create_yaml_loader()
3030

3131
for file_path in test_dir.glob("*.yaml"):
3232
with file_path.open(encoding="utf-8") as f:

prompt_testing/runner.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,14 @@
1010

1111
from anthropic import Anthropic
1212
from dotenv import load_dotenv
13-
from ruamel.yaml import YAML
1413

1514
from app.explain import MAX_TOKENS, MODEL, prepare_structured_data
1615
from app.explain_api import AssemblyItem, ExplainRequest
1716
from app.explanation_types import AudienceLevel, ExplanationType
1817
from app.metrics import NoopMetricsProvider
1918
from prompt_testing.evaluation.claude_reviewer import ClaudeReviewer
2019
from prompt_testing.evaluation.scorer import load_all_test_cases
20+
from prompt_testing.yaml_utils import load_yaml_file
2121

2222
# Load environment variables from .env file
2323
load_dotenv()
@@ -54,9 +54,7 @@ def load_prompt(self, prompt_version: str) -> dict[str, Any]:
5454
if not prompt_file.exists():
5555
raise FileNotFoundError(f"Prompt file not found: {prompt_file}")
5656

57-
yaml = YAML(typ="safe")
58-
with prompt_file.open(encoding="utf-8") as f:
59-
return yaml.load(f)
57+
return load_yaml_file(prompt_file)
6058

6159
def convert_test_case_to_request(self, test_case: dict[str, Any]) -> ExplainRequest:
6260
"""Convert a test case to an ExplainRequest object."""

prompt_testing/test_yaml_utils.py

Lines changed: 203 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
"""Tests for YAML utilities."""
2+
3+
import io
4+
import tempfile
5+
from pathlib import Path
6+
7+
import pytest
8+
from ruamel.yaml import YAMLError
9+
10+
from prompt_testing.yaml_utils import create_yaml_dumper, create_yaml_loader, load_yaml_file, save_yaml_file
11+
12+
13+
class TestYAMLUtils:
14+
"""Test YAML utility functions."""
15+
16+
def test_multiline_string_formatting(self):
17+
"""Test that multiline strings are formatted with literal block style."""
18+
yaml = create_yaml_dumper()
19+
20+
data = {
21+
"single_line": "This is a single line",
22+
"multiline": "This is line one\nThis is line two\nThis is line three",
23+
"nested": {"another_multiline": "First line\nSecond line"},
24+
}
25+
26+
# Dump to string
27+
stream = io.StringIO()
28+
yaml.dump(data, stream)
29+
result = stream.getvalue()
30+
31+
# Check that multiline strings use literal block style
32+
assert "multiline: |" in result
33+
assert "another_multiline: |" in result
34+
# Single line should not use block style
35+
assert "single_line: |" not in result
36+
37+
def test_preserves_comments(self):
38+
"""Test that comments are preserved when loading and saving."""
39+
# Create a YAML file with comments
40+
yaml_content = """# This is a file comment
41+
name: test # This is an inline comment
42+
# This is a comment before multiline
43+
description: |
44+
This is a multiline
45+
description with multiple lines
46+
47+
# Section comment
48+
section:
49+
key: value # Another inline comment
50+
"""
51+
52+
with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f:
53+
f.write(yaml_content)
54+
temp_path = Path(f.name)
55+
56+
try:
57+
# Load with dumper
58+
yaml = create_yaml_dumper()
59+
with temp_path.open() as f:
60+
data = yaml.load(f)
61+
62+
# Save it back
63+
output = io.StringIO()
64+
yaml.dump(data, output)
65+
result = output.getvalue()
66+
67+
# Check that comments are preserved
68+
assert "# This is a file comment" in result
69+
assert "# This is an inline comment" in result
70+
assert "# This is a comment before multiline" in result
71+
assert "# Section comment" in result
72+
assert "# Another inline comment" in result
73+
74+
finally:
75+
temp_path.unlink()
76+
77+
def test_preserves_formatting(self):
78+
"""Test that original formatting is preserved when loading and saving."""
79+
yaml_content = """name: "quoted string"
80+
unquoted: string
81+
number: 42
82+
multiline: |
83+
Line 1
84+
Line 2
85+
Line 3
86+
list:
87+
- item1
88+
- item2
89+
- item3
90+
"""
91+
92+
with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f:
93+
f.write(yaml_content)
94+
temp_path = Path(f.name)
95+
96+
try:
97+
# Load with dumper
98+
yaml = create_yaml_dumper()
99+
with temp_path.open() as f:
100+
data = yaml.load(f)
101+
102+
# Save it back
103+
output = io.StringIO()
104+
yaml.dump(data, output)
105+
result = output.getvalue()
106+
107+
# Check that formatting is preserved
108+
assert '"quoted string"' in result # Quotes preserved
109+
assert "unquoted: string" in result # No quotes added
110+
assert "multiline: |" in result # Block style preserved
111+
112+
finally:
113+
temp_path.unlink()
114+
115+
def test_load_yaml_file(self):
116+
"""Test load_yaml_file function."""
117+
yaml_content = """
118+
name: test
119+
items:
120+
- one
121+
- two
122+
- three
123+
metadata:
124+
version: 1.0
125+
description: |
126+
A test file
127+
with multiple lines
128+
"""
129+
130+
with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f:
131+
f.write(yaml_content)
132+
temp_path = Path(f.name)
133+
134+
try:
135+
# Load the file
136+
data = load_yaml_file(temp_path)
137+
138+
# Verify content
139+
assert data["name"] == "test"
140+
assert data["items"] == ["one", "two", "three"]
141+
assert data["metadata"]["version"] == 1.0
142+
assert "A test file\nwith multiple lines" in data["metadata"]["description"]
143+
144+
finally:
145+
temp_path.unlink()
146+
147+
def test_save_yaml_file_with_multiline(self):
148+
"""Test save_yaml_file properly formats multiline strings."""
149+
data = {
150+
"title": "Test Document",
151+
"content": "Line 1\nLine 2\nLine 3",
152+
"sections": {
153+
"intro": "Single line intro",
154+
"body": "This is the body\nwith multiple paragraphs\nand line breaks",
155+
},
156+
}
157+
158+
with tempfile.TemporaryDirectory() as tmpdir:
159+
output_path = Path(tmpdir) / "test.yaml"
160+
161+
# Save the file
162+
save_yaml_file(output_path, data)
163+
164+
# Read it back as text to check formatting
165+
content = output_path.read_text()
166+
167+
# Check multiline strings use block style
168+
assert "content: |" in content
169+
assert "body: |" in content
170+
# Single line should not use block style
171+
assert "title: |" not in content
172+
assert "intro: |" not in content
173+
174+
def test_safe_loader_does_not_execute_code(self):
175+
"""Test that safe loader doesn't execute arbitrary code."""
176+
# YAML with Python code that should not be executed
177+
dangerous_yaml = """
178+
test: !!python/object/apply:os.system ['echo "danger"']
179+
"""
180+
181+
with tempfile.NamedTemporaryFile(mode="w", suffix=".yaml", delete=False) as f:
182+
f.write(dangerous_yaml)
183+
temp_path = Path(f.name)
184+
185+
try:
186+
# This should raise an error, not execute the code
187+
with pytest.raises(YAMLError):
188+
load_yaml_file(temp_path)
189+
190+
finally:
191+
temp_path.unlink()
192+
193+
def test_create_yaml_loader_is_safe(self):
194+
"""Test that create_yaml_loader returns a safe YAML instance."""
195+
yaml = create_yaml_loader()
196+
197+
# Should not be able to load Python objects
198+
dangerous_yaml = "test: !!python/object/apply:os.system ['echo danger']"
199+
200+
stream = io.StringIO(dangerous_yaml)
201+
202+
with pytest.raises(YAMLError):
203+
yaml.load(stream)

0 commit comments

Comments
 (0)