@@ -170,6 +170,18 @@ def extract_caption_and_description(trajectory: Dict[str, Any]) -> Dict[str, Any
170170 file_path = trajectory .get ("__file_path__" , "" )
171171 traj_name = Path (file_path ).stem
172172
173+ # Only process successful trajectories
174+ if "success" not in file_path .lower ():
175+ return {
176+ "trajectory_name" : traj_name ,
177+ "ground_truth_description" : "" ,
178+ "vlm_caption" : "" ,
179+ "has_ground_truth" : False ,
180+ "has_caption" : False ,
181+ "is_match" : False ,
182+ "comparison_explanation" : "Skipped - not a successful trajectory"
183+ }
184+
173185 # Parse metadata to get language description
174186 ground_truth_description = ""
175187 try :
@@ -264,16 +276,19 @@ def extract_caption_and_description(trajectory: Dict[str, Any]) -> Dict[str, Any
264276 from robodm .agent .vlm_service import get_vlm_service
265277 vlm_service = get_vlm_service ()
266278
267- comparison_prompt = f"""Compare these two robot task descriptions and determine if they describe the same task:
279+ comparison_prompt = f"""Compare these two robot task descriptions and determine if they describe the same or similar task:
268280
269281Description 1 (Ground Truth): { ground_truth_description }
270282
271283Description 2 (VLM Caption): { vlm_caption }
272284
285+ Be generous in your matching. Only say NO if they describe COMPLETELY different tasks with different goals.
286+ It is fine that the VLM Caption is more specific compared to the Ground Truth.
287+
273288Respond with only YES or NO followed by a brief explanation.
274289
275290Format:
276- YES/NO: Your explanation here """
291+ YES/NO: Your one sentence explanation """
277292
278293 comparison_response = vlm_service .generate_code (comparison_prompt )
279294
@@ -331,11 +346,16 @@ def extract_caption_and_description(trajectory: Dict[str, Any]) -> Dict[str, Any
331346 true_negatives = 0 # VLM correctly identifies non-match (not applicable here)
332347
333348 valid_comparisons = 0
349+ skipped_trajectories = 0
334350
335351 print ("\n Detailed Caption Comparison Results:" )
336352 print ("-" * 80 )
337353
338354 for result in results :
355+ if not result ["has_ground_truth" ] and not result ["has_caption" ] and "Skipped" in result .get ("comparison_explanation" , "" ):
356+ skipped_trajectories += 1
357+ continue
358+
339359 if result ["has_ground_truth" ] and result ["has_caption" ]:
340360 valid_comparisons += 1
341361
@@ -371,7 +391,9 @@ def extract_caption_and_description(trajectory: Dict[str, Any]) -> Dict[str, Any
371391 print ("⚠️ No valid comparisons found (missing ground truth or captions)" )
372392
373393 print (f"\n Overall Captioning Metrics:" )
374- print (f"Valid comparisons: { valid_comparisons } /{ len (results )} " )
394+ print (f"Total trajectories: { len (results )} " )
395+ print (f"Successful trajectories processed: { valid_comparisons } " )
396+ print (f"Failed trajectories skipped: { skipped_trajectories } " )
375397 print (f"Matches (True Positives): { true_positives } " )
376398 print (f"No Matches (False Negatives): { false_negatives } " )
377399 print (f"Precision: { precision :.3f} " )
@@ -384,7 +406,8 @@ def extract_caption_and_description(trajectory: Dict[str, Any]) -> Dict[str, Any
384406 f .write (f"Trajectory Captioning F1 Summary\n " )
385407 f .write (f"================================\n " )
386408 f .write (f"Total trajectories: { len (results )} \n " )
387- f .write (f"Valid comparisons: { valid_comparisons } \n " )
409+ f .write (f"Successful trajectories processed: { valid_comparisons } \n " )
410+ f .write (f"Failed trajectories skipped: { skipped_trajectories } \n " )
388411 f .write (f"Matches (True Positives): { true_positives } \n " )
389412 f .write (f"No Matches (False Negatives): { false_negatives } \n " )
390413 f .write (f"Precision: { precision :.3f} \n " )
0 commit comments