Skip to content

Commit 69502fa

Browse files
author
Your Name
committed
additional fixes on the captioning results
1 parent 28deddd commit 69502fa

File tree

1 file changed

+27
-4
lines changed

1 file changed

+27
-4
lines changed

examples/droid/droid_vlm_demo.py

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,18 @@ def extract_caption_and_description(trajectory: Dict[str, Any]) -> Dict[str, Any
170170
file_path = trajectory.get("__file_path__", "")
171171
traj_name = Path(file_path).stem
172172

173+
# Only process successful trajectories
174+
if "success" not in file_path.lower():
175+
return {
176+
"trajectory_name": traj_name,
177+
"ground_truth_description": "",
178+
"vlm_caption": "",
179+
"has_ground_truth": False,
180+
"has_caption": False,
181+
"is_match": False,
182+
"comparison_explanation": "Skipped - not a successful trajectory"
183+
}
184+
173185
# Parse metadata to get language description
174186
ground_truth_description = ""
175187
try:
@@ -264,16 +276,19 @@ def extract_caption_and_description(trajectory: Dict[str, Any]) -> Dict[str, Any
264276
from robodm.agent.vlm_service import get_vlm_service
265277
vlm_service = get_vlm_service()
266278

267-
comparison_prompt = f"""Compare these two robot task descriptions and determine if they describe the same task:
279+
comparison_prompt = f"""Compare these two robot task descriptions and determine if they describe the same or similar task:
268280
269281
Description 1 (Ground Truth): {ground_truth_description}
270282
271283
Description 2 (VLM Caption): {vlm_caption}
272284
285+
Be generous in your matching. Only say NO if they describe COMPLETELY different tasks with different goals.
286+
It is fine that the VLM Caption is more specific compared to the Ground Truth.
287+
273288
Respond with only YES or NO followed by a brief explanation.
274289
275290
Format:
276-
YES/NO: Your explanation here"""
291+
YES/NO: Your one sentence explanation"""
277292

278293
comparison_response = vlm_service.generate_code(comparison_prompt)
279294

@@ -331,11 +346,16 @@ def extract_caption_and_description(trajectory: Dict[str, Any]) -> Dict[str, Any
331346
true_negatives = 0 # VLM correctly identifies non-match (not applicable here)
332347

333348
valid_comparisons = 0
349+
skipped_trajectories = 0
334350

335351
print("\nDetailed Caption Comparison Results:")
336352
print("-" * 80)
337353

338354
for result in results:
355+
if not result["has_ground_truth"] and not result["has_caption"] and "Skipped" in result.get("comparison_explanation", ""):
356+
skipped_trajectories += 1
357+
continue
358+
339359
if result["has_ground_truth"] and result["has_caption"]:
340360
valid_comparisons += 1
341361

@@ -371,7 +391,9 @@ def extract_caption_and_description(trajectory: Dict[str, Any]) -> Dict[str, Any
371391
print("⚠️ No valid comparisons found (missing ground truth or captions)")
372392

373393
print(f"\nOverall Captioning Metrics:")
374-
print(f"Valid comparisons: {valid_comparisons}/{len(results)}")
394+
print(f"Total trajectories: {len(results)}")
395+
print(f"Successful trajectories processed: {valid_comparisons}")
396+
print(f"Failed trajectories skipped: {skipped_trajectories}")
375397
print(f"Matches (True Positives): {true_positives}")
376398
print(f"No Matches (False Negatives): {false_negatives}")
377399
print(f"Precision: {precision:.3f}")
@@ -384,7 +406,8 @@ def extract_caption_and_description(trajectory: Dict[str, Any]) -> Dict[str, Any
384406
f.write(f"Trajectory Captioning F1 Summary\n")
385407
f.write(f"================================\n")
386408
f.write(f"Total trajectories: {len(results)}\n")
387-
f.write(f"Valid comparisons: {valid_comparisons}\n")
409+
f.write(f"Successful trajectories processed: {valid_comparisons}\n")
410+
f.write(f"Failed trajectories skipped: {skipped_trajectories}\n")
388411
f.write(f"Matches (True Positives): {true_positives}\n")
389412
f.write(f"No Matches (False Negatives): {false_negatives}\n")
390413
f.write(f"Precision: {precision:.3f}\n")

0 commit comments

Comments
 (0)