Skip to content

Commit 09d9d7c

Browse files
feat: add prompt guidelines for dom diff
1 parent ff11211 commit 09d9d7c

File tree

2 files changed

+323
-32
lines changed

2 files changed

+323
-32
lines changed

webqa_agent/testers/case_gen/agents/execute_agent.py

Lines changed: 73 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ async def generate_dynamic_steps_with_llm(
175175
Executed Steps (for context):
176176
{json.dumps(executed_steps_detail, ensure_ascii=False, indent=2) if executed_steps_detail else "None"}
177177
178-
Remaining Steps (may need adjustment after insertion):
178+
Remaining Steps (may need adjustment after replan):
179179
{json.dumps(remaining_steps, ensure_ascii=False, indent=2) if remaining_steps else "None"}
180180
"""
181181

@@ -201,27 +201,51 @@ async def generate_dynamic_steps_with_llm(
201201
202202
{test_case_context}
203203
204-
## Generation Requirements
204+
## Structured Analysis Requirements
205205
Max steps to generate: {max_steps}
206-
207-
Please analyze these new elements and decide on the best strategy:
208-
1. **STRATEGY DECISION**: Choose "insert" to add steps alongside existing ones, or "replace" to override remaining steps
209-
2. **STEP GENERATION**: Create test steps that enhance coverage without duplicating completed work
210-
3. **FLOW INTEGRATION**: Ensure steps fit naturally into the test narrative
211-
212-
Return your response in this exact format:
206+
Test Objective: "{test_objective}"
207+
208+
### Step 1: Calculate Objective Completion Score
209+
Assess what percentage of the remaining test objective can be achieved using ONLY these new elements:
210+
- **100%**: New elements fully complete ALL remaining objectives independently
211+
- **75-99%**: Elements achieve most objectives with minor gaps
212+
- **25-74%**: Significant contribution but requires original steps
213+
- **0-24%**: Minimal or supplementary value only
214+
215+
### Step 2: Apply Quantitative Decision Framework
216+
**Primary Decision Rules:**
217+
- Score ≥ 75% AND remaining steps don't test different aspects → "replace"
218+
- Score < 75% OR remaining steps test different aspects → "insert"
219+
220+
### Step 3: Binary Validation Checklist
221+
Answer these YES/NO questions:
222+
□ Can new elements complete the test objective independently?
223+
□ Do remaining steps become unnecessary after using new elements?
224+
□ Do new elements test the SAME aspects as remaining steps?
225+
□ Is there a more efficient path through new elements?
226+
227+
**Scoring**: 3+ YES → "replace", ≤2 YES → "insert"
228+
229+
### Step 4: Generate Structured Response
230+
Return your analysis in this EXACT format:
213231
```json
214232
{{
233+
"analysis": {{
234+
"objective_completion_score": [0-100],
235+
"can_complete_objective_alone": [true/false],
236+
"remaining_steps_redundant": [true/false],
237+
"confidence_level": ["HIGH"|"MEDIUM"|"LOW"]
238+
}},
215239
"strategy": "insert" or "replace",
216-
"reason": "Clear explanation for why you chose this strategy",
240+
"reason": "Based on [X]% completion score: [detailed explanation of decision logic]",
217241
"steps": [
218242
{{"action": "specific action description"}},
219243
{{"verify": "specific verification description"}}
220244
]
221245
}}
222246
```
223247
224-
If elements are not important or irrelevant, return: {{"strategy": "insert", "reason": "explanation", "steps": []}}
248+
**For irrelevant elements**: {{"analysis": {{"objective_completion_score": 0, "can_complete_objective_alone": false, "remaining_steps_redundant": false, "confidence_level": "HIGH"}}, "strategy": "insert", "reason": "Elements provide no functional value", "steps": []}}
225249
"""
226250

227251
logging.debug(f"Requesting LLM to generate dynamic steps for {len(new_elements)} new elements")
@@ -265,26 +289,62 @@ async def generate_dynamic_steps_with_llm(
265289
reason = result.get("reason", "No reason provided")
266290
steps = result.get("steps", [])
267291

292+
# Extract and validate analysis fields (new format)
293+
analysis = result.get("analysis", {})
294+
completion_score = analysis.get("objective_completion_score", 0) if isinstance(analysis, dict) else 0
295+
can_complete_alone = analysis.get("can_complete_objective_alone", False) if isinstance(analysis, dict) else False
296+
steps_redundant = analysis.get("remaining_steps_redundant", False) if isinstance(analysis, dict) else False
297+
confidence = analysis.get("confidence_level", "MEDIUM") if isinstance(analysis, dict) else "MEDIUM"
298+
268299
# Validate strategy value
269300
if strategy not in ["insert", "replace"]:
270301
logging.warning(f"Invalid strategy '{strategy}', defaulting to 'insert'")
271302
strategy = "insert"
272303

304+
# Validate completion score if provided
305+
if not isinstance(completion_score, (int, float)) or not (0 <= completion_score <= 100):
306+
logging.debug(f"Invalid completion score {completion_score}, defaulting to 0")
307+
completion_score = 0
308+
309+
# Validate confidence level
310+
if confidence not in ["HIGH", "MEDIUM", "LOW"]:
311+
logging.debug(f"Invalid confidence level {confidence}, defaulting to MEDIUM")
312+
confidence = "MEDIUM"
313+
273314
# Validate and limit step count
274315
valid_steps = []
275316
if isinstance(steps, list):
276317
for step in steps[:max_steps]:
277318
if isinstance(step, dict) and ("action" in step or "verify" in step):
278319
valid_steps.append(step)
279320

280-
logging.info(f"Generated {len(valid_steps)} dynamic steps with strategy '{strategy}' from {len(new_elements)} new elements")
321+
# Enhanced logging with analysis data
322+
if completion_score > 0:
323+
logging.info(f"Generated {len(valid_steps)} dynamic steps with strategy '{strategy}' (score: {completion_score}%, confidence: {confidence}) from {len(new_elements)} new elements")
324+
else:
325+
logging.info(f"Generated {len(valid_steps)} dynamic steps with strategy '{strategy}' from {len(new_elements)} new elements")
326+
281327
logging.debug(f"Strategy reason: {reason}")
328+
if analysis:
329+
logging.debug(f"Analysis: completion_score={completion_score}%, can_complete_alone={can_complete_alone}, steps_redundant={steps_redundant}, confidence={confidence}")
282330

283-
return {
331+
# Return enhanced result with analysis
332+
result_data = {
284333
"strategy": strategy,
285334
"reason": reason,
286335
"steps": valid_steps
287336
}
337+
338+
# Include analysis if provided (backward compatibility)
339+
if analysis:
340+
result_data["analysis"] = {
341+
"objective_completion_score": completion_score,
342+
"can_complete_objective_alone": can_complete_alone,
343+
"remaining_steps_redundant": steps_redundant,
344+
"confidence_level": confidence
345+
}
346+
347+
return result_data
288348
else:
289349
logging.warning("LLM response missing required fields (strategy, steps)")
290350
return {"strategy": "insert", "reason": "Invalid response format", "steps": []}

0 commit comments

Comments
 (0)