Skip to content
This repository was archived by the owner on Sep 23, 2025. It is now read-only.

Commit 78e47cb

Browse files
nikomatsakisclaude
andcommitted
Improve test execution flow and output visibility
Test runner improvements: - Add fail-fast behavior: stop executing remaining conversation steps when one fails - Implement real-time streaming response display instead of truncated preview - Show tool calls on separate lines with full parameter details - Add clear indication of skipped steps when tests fail early These changes make test execution more efficient and provide better visibility into what Claude is actually doing during tests. The fail-fast approach prevents testing invalid conversation states, and streaming output helps debug failures. 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <[email protected]>
1 parent ea8b742 commit 78e47cb

File tree

1 file changed

+13
-1
lines changed

1 file changed

+13
-1
lines changed

dialectic/dialectic.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -211,18 +211,24 @@ async def run_conversation_step(self, step: ConversationStep) -> TestResult:
211211
response_text = ""
212212
tools_used = []
213213

214+
print(f"🤖 Assistant: ", end="", flush=True)
214215
async for message in query(prompt=step.user_message):
215216
if isinstance(message, AssistantMessage):
216217
for block in message.content:
217218
if isinstance(block, TextBlock):
218219
response_text += block.text
220+
print(block.text, end="", flush=True)
219221
elif isinstance(block, ToolUseBlock):
220222
tools_used.append({
221223
'tool': block.name,
222224
'parameters': block.input
223225
})
226+
print(f"\n🔧 Tool: {block.name}")
227+
if block.input:
228+
print(f" Parameters: {block.input}")
229+
print(f"🤖 Assistant: ", end="", flush=True)
224230

225-
print(f"🤖 Assistant: {response_text[:200]}{'...' if len(response_text) > 200 else ''}")
231+
print() # New line after streaming response
226232

227233
# Validate response content
228234
found_phrases = []
@@ -322,6 +328,12 @@ async def run_test_case(self, test_case: TestCase) -> bool:
322328
else:
323329
print(f"❌ Step {i} FAILED")
324330
all_steps_passed = False
331+
332+
# Stop executing remaining steps - conversation state is now wrong
333+
remaining_steps = len(test_case.conversation) - i
334+
if remaining_steps > 0:
335+
print(f"⏭️ Skipping {remaining_steps} remaining step(s) due to failure")
336+
break
325337

326338
print(f"\n🎯 Test Case Result: {'PASSED' if all_steps_passed else 'FAILED'}")
327339
return all_steps_passed

0 commit comments

Comments
 (0)