Skip to content
This repository was archived by the owner on Sep 23, 2025. It is now read-only.

Commit ea8b742

Browse files
nikomatsakisclaude
andcommitted
Improve code documentation and modernize Python style
Code review improvements: - Add comprehensive Attributes sections to all dataclass docstrings - Replace generic Dict[str, List[str]] with explicit response_should_contain/response_should_not_contain fields - Use modern Python class field declarations with type annotations - Remove inline comments in favor of proper docstrings - Document parameter validation options with examples The code is now more self-documenting and follows Python best practices for type hints and documentation. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <[email protected]>
1 parent ac7f005 commit ea8b742

File tree

1 file changed

+77
-24
lines changed

1 file changed

+77
-24
lines changed

dialectic/dialectic.py

Lines changed: 77 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,33 @@
1919

2020
@dataclass
2121
class ToolExpectation:
22-
"""Expected tool usage with parameter validation rules."""
22+
"""
23+
Expected tool usage with parameter validation rules.
24+
25+
Attributes:
26+
tool: Name of the tool that should be called (e.g., 'Bash', 'mcp__socratic-shell__consolidate')
27+
parameters: Optional validation rules for tool parameters. Can be:
28+
- Dict with 'should_contain' key for substring matching
29+
- Dict with 'should_be' key for exact matching
30+
- Direct value for simple equality check
31+
Example: {'content': {'should_contain': ['JWT', 'tokens']},
32+
'category': 'technical-insight'}
33+
"""
2334
tool: str
24-
parameters: Optional[Dict[str, Any]] = None # Parameter validation rules
35+
parameters: Optional[Dict[str, Any]] = None
2536

2637

2738
@dataclass
2839
class TestCase:
29-
"""A complete test case loaded from YAML."""
40+
"""
41+
A complete test case loaded from YAML.
42+
43+
Attributes:
44+
name: Short descriptive name for the test case
45+
description: Longer explanation of what the test validates
46+
tags: Optional list of tags for categorizing tests (e.g., ['memory', 'consolidation'])
47+
conversation: List of conversation steps to execute in order
48+
"""
3049
name: str
3150
description: str
3251
tags: List[str]
@@ -35,10 +54,25 @@ class TestCase:
3554

3655
@dataclass
3756
class ConversationStep:
38-
"""A single step in a test conversation loaded from YAML."""
57+
"""
58+
A single step in a test conversation loaded from YAML.
59+
60+
Each step represents one user message and the expected LLM behavior.
61+
62+
Attributes:
63+
user_message: The message sent to the LLM in this conversation step
64+
response_should_contain: Phrases that must appear in the LLM's response
65+
Example: ['listening', 'tell me more']
66+
response_should_not_contain: Phrases that must NOT appear in the LLM's response
67+
Example: ['I'll help you set up', 'Let me check']
68+
expected_tools: Tools we expect the LLM to call (or not call) in this step.
69+
Empty list means no tools should be called.
70+
Each ToolExpectation can specify parameter validation rules.
71+
"""
3972
user_message: str
40-
expected_response: Dict[str, List[str]]
41-
expected_tools: List[ToolExpectation]
73+
response_should_contain: List[str] = None
74+
response_should_not_contain: List[str] = None
75+
expected_tools: List[ToolExpectation] = None
4276

4377

4478
@dataclass
@@ -49,25 +83,46 @@ class TestResult:
4983
This captures what happened when we sent a message to the LLM and
5084
validated the response against our YAML test expectations. It's the
5185
"report card" for one step in a conversation test.
86+
87+
Attributes:
88+
success: Did the LLM response pass all validations?
89+
found_phrases: Required phrases that were found in response
90+
missing_phrases: Required phrases that were missing from response
91+
unexpected_phrases: Forbidden phrases that appeared in response
92+
found_tools: Expected tools that were called with correct parameters
93+
invalid_tools: Expected tools that were called with wrong parameters
94+
missing_tools: Expected tools that were not called
95+
unexpected_tools: Tools that were called but not expected
96+
(includes full tool info: {'tool': name, 'parameters': {...}})
97+
response_text: Full LLM response text for debugging
98+
response_length: Length of response in characters
5299
"""
53-
success: bool # Did the LLM response pass all validations?
54-
found_phrases: List[str] # Required phrases that were found in response
55-
missing_phrases: List[str] # Required phrases that were missing from response
56-
unexpected_phrases: List[str] # Forbidden phrases that appeared in response
57-
found_tools: List[ToolExpectation] # Expected tools that were called with correct parameters
58-
invalid_tools: List[ToolExpectation] # Expected tools that were called with wrong parameters
59-
missing_tools: List[ToolExpectation] # Expected tools that were not called
60-
unexpected_tools: List[Dict[str, Any]] # Tools that were called but not expected
61-
response_text: str # Full LLM response text for debugging
62-
response_length: int # Length of response in characters
100+
success: bool
101+
found_phrases: List[str]
102+
missing_phrases: List[str]
103+
unexpected_phrases: List[str]
104+
found_tools: List[ToolExpectation]
105+
invalid_tools: List[ToolExpectation]
106+
missing_tools: List[ToolExpectation]
107+
unexpected_tools: List[Dict[str, Any]]
108+
response_text: str
109+
response_length: int
63110

64111

65112
class DialecticRunner:
66-
"""YAML-based test runner for prompt engineering validation."""
113+
"""
114+
YAML-based test runner for prompt engineering validation.
115+
116+
Attributes:
117+
results: Accumulates TestResult objects from all conversation steps executed.
118+
Used for final summary statistics and debugging.
119+
"""
120+
121+
results: List[TestResult]
67122

68123
def __init__(self):
69124
"""Initialize the test runner."""
70-
self.results: List[TestResult] = []
125+
self.results = []
71126

72127
def load_test_case(self, yaml_path: Path) -> TestCase:
73128
"""Load a test case from a YAML file."""
@@ -93,7 +148,8 @@ def load_test_case(self, yaml_path: Path) -> TestCase:
93148

94149
step = ConversationStep(
95150
user_message=step_data['user'],
96-
expected_response=expected_response,
151+
response_should_contain=expected_response.get('should_contain', []),
152+
response_should_not_contain=expected_response.get('should_not_contain', []),
97153
expected_tools=expected_tools
98154
)
99155
conversation.append(step)
@@ -169,15 +225,12 @@ async def run_conversation_step(self, step: ConversationStep) -> TestResult:
169225
print(f"🤖 Assistant: {response_text[:200]}{'...' if len(response_text) > 200 else ''}")
170226

171227
# Validate response content
172-
should_contain = step.expected_response.get('should_contain', [])
173-
should_not_contain = step.expected_response.get('should_not_contain', [])
174-
175228
found_phrases = []
176229
missing_phrases = []
177230
unexpected_phrases = []
178231

179232
# Check required phrases
180-
for phrase in should_contain:
233+
for phrase in (step.response_should_contain or []):
181234
if phrase.lower() in response_text.lower():
182235
found_phrases.append(phrase)
183236
print(f"✅ Found required: '{phrase}'")
@@ -186,7 +239,7 @@ async def run_conversation_step(self, step: ConversationStep) -> TestResult:
186239
print(f"❌ Missing required: '{phrase}'")
187240

188241
# Check forbidden phrases
189-
for phrase in should_not_contain:
242+
for phrase in (step.response_should_not_contain or []):
190243
if phrase.lower() in response_text.lower():
191244
unexpected_phrases.append(phrase)
192245
print(f"❌ Found forbidden: '{phrase}'")

0 commit comments

Comments
 (0)