Skip to content

Commit 2117ba5

Browse files
committed
fix(dataset): validate expected file paths instead of glob counts
1 parent 360268a commit 2117ba5

File tree

1 file changed

+15
-5
lines changed

1 file changed

+15
-5
lines changed

src/opentau/datasets/lerobot_dataset.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1694,11 +1694,21 @@ def save_episode(self, episode_data: dict | None = None) -> None:
16941694
self.tolerance_s,
16951695
)
16961696

1697-
video_files = list(self.root.rglob("*.mp4"))
1698-
assert len(video_files) == self.num_episodes * len(self.meta.video_keys)
1699-
1700-
parquet_files = list(self.root.rglob("*.parquet"))
1701-
assert len(parquet_files) == self.num_episodes
1697+
expected_episodes = self.meta.total_episodes
1698+
missing_videos: list[str] = []
1699+
for ep_idx in range(expected_episodes):
1700+
for vid_key in self.meta.video_keys:
1701+
video_path = self.root / self.meta.get_video_file_path(ep_idx, vid_key)
1702+
if not video_path.is_file():
1703+
missing_videos.append(str(video_path))
1704+
assert not missing_videos, "Missing expected encoded videos:\n" + "\n".join(missing_videos)
1705+
1706+
missing_parquet: list[str] = []
1707+
for ep_idx in range(expected_episodes):
1708+
parquet_path = self.root / self.meta.get_data_file_path(ep_idx)
1709+
if not parquet_path.is_file():
1710+
missing_parquet.append(str(parquet_path))
1711+
assert not missing_parquet, "Missing expected parquet episode files:\n" + "\n".join(missing_parquet)
17021712

17031713
# delete images
17041714
img_dir = self.root / "images"

0 commit comments

Comments
 (0)