@@ -175,7 +175,7 @@ async def generate_dynamic_steps_with_llm(
175175Executed Steps (for context):
176176{ json .dumps (executed_steps_detail , ensure_ascii = False , indent = 2 ) if executed_steps_detail else "None" }
177177
178- Remaining Steps (may need adjustment after insertion ):
178+ Remaining Steps (may need adjustment after replan ):
179179{ json .dumps (remaining_steps , ensure_ascii = False , indent = 2 ) if remaining_steps else "None" }
180180"""
181181
@@ -201,27 +201,51 @@ async def generate_dynamic_steps_with_llm(
201201
202202{ test_case_context }
203203
204- ## Generation Requirements
204+ ## Structured Analysis Requirements
205205Max steps to generate: { max_steps }
206-
207- Please analyze these new elements and decide on the best strategy:
208- 1. **STRATEGY DECISION**: Choose "insert" to add steps alongside existing ones, or "replace" to override remaining steps
209- 2. **STEP GENERATION**: Create test steps that enhance coverage without duplicating completed work
210- 3. **FLOW INTEGRATION**: Ensure steps fit naturally into the test narrative
211-
212- Return your response in this exact format:
206+ Test Objective: "{ test_objective } "
207+
208+ ### Step 1: Calculate Objective Completion Score
209+ Assess what percentage of the remaining test objective can be achieved using ONLY these new elements:
210+ - **100%**: New elements fully complete ALL remaining objectives independently
211+ - **75-99%**: Elements achieve most objectives with minor gaps
212+ - **25-74%**: Significant contribution but requires original steps
213+ - **0-24%**: Minimal or supplementary value only
214+
215+ ### Step 2: Apply Quantitative Decision Framework
216+ **Primary Decision Rules:**
217+ - Score ≥ 75% AND remaining steps don't test different aspects → "replace"
218+ - Score < 75% OR remaining steps test different aspects → "insert"
219+
220+ ### Step 3: Binary Validation Checklist
221+ Answer these YES/NO questions:
222+ □ Can new elements complete the test objective independently?
223+ □ Do remaining steps become unnecessary after using new elements?
224+ □ Do new elements test the SAME aspects as remaining steps?
225+ □ Is there a more efficient path through new elements?
226+
227+ **Scoring**: 3+ YES → "replace", ≤2 YES → "insert"
228+
229+ ### Step 4: Generate Structured Response
230+ Return your analysis in this EXACT format:
213231```json
214232{{
233+ "analysis": {{
234+ "objective_completion_score": [0-100],
235+ "can_complete_objective_alone": [true/false],
236+ "remaining_steps_redundant": [true/false],
237+ "confidence_level": ["HIGH"|"MEDIUM"|"LOW"]
238+ }},
215239 "strategy": "insert" or "replace",
216- "reason": "Clear explanation for why you chose this strategy ",
240+ "reason": "Based on [X]% completion score: [detailed explanation of decision logic] ",
217241 "steps": [
218242 {{"action": "specific action description"}},
219243 {{"verify": "specific verification description"}}
220244 ]
221245}}
222246```
223247
224- If elements are not important or irrelevant, return: {{" strategy": "insert", "reason": "explanation ", "steps": []}}
248+ **For irrelevant elements**: {{"analysis": {{"objective_completion_score": 0, "can_complete_objective_alone": false, "remaining_steps_redundant": false, "confidence_level": "HIGH"}}, " strategy": "insert", "reason": "Elements provide no functional value ", "steps": []}}
225249 """
226250
227251 logging .debug (f"Requesting LLM to generate dynamic steps for { len (new_elements )} new elements" )
@@ -265,26 +289,62 @@ async def generate_dynamic_steps_with_llm(
265289 reason = result .get ("reason" , "No reason provided" )
266290 steps = result .get ("steps" , [])
267291
292+ # Extract and validate analysis fields (new format)
293+ analysis = result .get ("analysis" , {})
294+ completion_score = analysis .get ("objective_completion_score" , 0 ) if isinstance (analysis , dict ) else 0
295+ can_complete_alone = analysis .get ("can_complete_objective_alone" , False ) if isinstance (analysis , dict ) else False
296+ steps_redundant = analysis .get ("remaining_steps_redundant" , False ) if isinstance (analysis , dict ) else False
297+ confidence = analysis .get ("confidence_level" , "MEDIUM" ) if isinstance (analysis , dict ) else "MEDIUM"
298+
268299 # Validate strategy value
269300 if strategy not in ["insert" , "replace" ]:
270301 logging .warning (f"Invalid strategy '{ strategy } ', defaulting to 'insert'" )
271302 strategy = "insert"
272303
304+ # Validate completion score if provided
305+ if not isinstance (completion_score , (int , float )) or not (0 <= completion_score <= 100 ):
306+ logging .debug (f"Invalid completion score { completion_score } , defaulting to 0" )
307+ completion_score = 0
308+
309+ # Validate confidence level
310+ if confidence not in ["HIGH" , "MEDIUM" , "LOW" ]:
311+ logging .debug (f"Invalid confidence level { confidence } , defaulting to MEDIUM" )
312+ confidence = "MEDIUM"
313+
273314 # Validate and limit step count
274315 valid_steps = []
275316 if isinstance (steps , list ):
276317 for step in steps [:max_steps ]:
277318 if isinstance (step , dict ) and ("action" in step or "verify" in step ):
278319 valid_steps .append (step )
279320
280- logging .info (f"Generated { len (valid_steps )} dynamic steps with strategy '{ strategy } ' from { len (new_elements )} new elements" )
321+ # Enhanced logging with analysis data
322+ if completion_score > 0 :
323+ logging .info (f"Generated { len (valid_steps )} dynamic steps with strategy '{ strategy } ' (score: { completion_score } %, confidence: { confidence } ) from { len (new_elements )} new elements" )
324+ else :
325+ logging .info (f"Generated { len (valid_steps )} dynamic steps with strategy '{ strategy } ' from { len (new_elements )} new elements" )
326+
281327 logging .debug (f"Strategy reason: { reason } " )
328+ if analysis :
329+ logging .debug (f"Analysis: completion_score={ completion_score } %, can_complete_alone={ can_complete_alone } , steps_redundant={ steps_redundant } , confidence={ confidence } " )
282330
283- return {
331+ # Return enhanced result with analysis
332+ result_data = {
284333 "strategy" : strategy ,
285334 "reason" : reason ,
286335 "steps" : valid_steps
287336 }
337+
338+ # Include analysis if provided (backward compatibility)
339+ if analysis :
340+ result_data ["analysis" ] = {
341+ "objective_completion_score" : completion_score ,
342+ "can_complete_objective_alone" : can_complete_alone ,
343+ "remaining_steps_redundant" : steps_redundant ,
344+ "confidence_level" : confidence
345+ }
346+
347+ return result_data
288348 else :
289349 logging .warning ("LLM response missing required fields (strategy, steps)" )
290350 return {"strategy" : "insert" , "reason" : "Invalid response format" , "steps" : []}
0 commit comments