Skip to content

Commit 455c029

Browse files
authored
Add Ability to Visualize Partial LeRobot Dataset (#11228)
### Related This is a subset of #11226 where I am not including the MP4 changes that need more discussion. ### What Before when I tried to open a partial LeRobot dataset (if I only had a subset of the contents listed in the metadata) we got all kinds of weird errors. I made the episode collection a map instead of a vector which allows us to display an arbitrary subset of episodes from a dataset.
1 parent b04361a commit 455c029

File tree

2 files changed

+26
-7
lines changed

2 files changed

+26
-7
lines changed

crates/store/re_data_loader/src/lerobot.rs

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
//! See [`LeRobotDataset`] for more information on the dataset format.
1111
1212
use std::borrow::Cow;
13+
use std::collections::BTreeMap;
1314
use std::fmt;
1415
use std::fs::File;
1516
use std::io::BufReader;
@@ -169,7 +170,7 @@ impl LeRobotDataset {
169170

170171
/// Read the Parquet data file for the provided episode.
171172
pub fn read_episode_data(&self, episode: EpisodeIndex) -> Result<RecordBatch, LeRobotError> {
172-
if self.metadata.episodes.get(episode.0).is_none() {
173+
if !self.metadata.episodes.contains_key(&episode) {
173174
return Err(LeRobotError::InvalidEpisodeIndex(episode));
174175
}
175176

@@ -219,11 +220,21 @@ impl LeRobotDataset {
219220
#[allow(dead_code)] // TODO(gijsd): The list of tasks is not used yet!
220221
pub struct LeRobotDatasetMetadata {
221222
pub info: LeRobotDatasetInfo,
222-
pub episodes: Vec<LeRobotDatasetEpisode>,
223+
pub episodes: BTreeMap<EpisodeIndex, LeRobotDatasetEpisode>,
223224
pub tasks: Vec<LeRobotDatasetTask>,
224225
}
225226

226227
impl LeRobotDatasetMetadata {
228+
/// Get the number of episodes in the dataset.
229+
pub fn episode_count(&self) -> usize {
230+
self.episodes.len()
231+
}
232+
233+
/// Get episode metadata by index.
234+
pub fn get_episode(&self, episode: EpisodeIndex) -> Option<&LeRobotDatasetEpisode> {
235+
self.episodes.get(&episode)
236+
}
237+
227238
/// Loads all metadata files from the provided directory.
228239
///
229240
/// This method reads dataset metadata from JSON and JSONL files stored in the `meta/` directory.
@@ -232,10 +243,18 @@ impl LeRobotDatasetMetadata {
232243
let metadir = metadir.as_ref();
233244

234245
let info = LeRobotDatasetInfo::load_from_json_file(metadir.join("info.json"))?;
235-
let mut episodes = load_jsonl_file(metadir.join("episodes.jsonl"))?;
246+
let mut episodes_vec: Vec<LeRobotDatasetEpisode> =
247+
load_jsonl_file(metadir.join("episodes.jsonl"))?;
236248
let mut tasks = load_jsonl_file(metadir.join("tasks.jsonl"))?;
237249

238-
episodes.sort_by_key(|e: &LeRobotDatasetEpisode| e.index);
250+
// Sort episodes by index to ensure consistent ordering when loading
251+
episodes_vec.sort_by_key(|e: &LeRobotDatasetEpisode| e.index);
252+
// Convert episodes vec to HashMap for efficient lookup by index
253+
let episodes = episodes_vec
254+
.into_iter()
255+
.map(|episode| (episode.index, episode))
256+
.collect::<BTreeMap<EpisodeIndex, LeRobotDatasetEpisode>>();
257+
239258
tasks.sort_by_key(|e: &LeRobotDatasetTask| e.index);
240259

241260
Ok(Self {

crates/store/re_data_loader/src/loader_lerobot.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ impl DataLoader for LeRobotDatasetLoader {
9797
re_log::info!(
9898
"Loading LeRobot dataset from {:?}, with {} episode(s)",
9999
dataset.path,
100-
dataset.metadata.episodes.len(),
100+
dataset.metadata.episode_count(),
101101
);
102102
load_and_stream(&dataset, &application_id, &tx);
103103
}
@@ -177,8 +177,8 @@ fn prepare_episode_chunks(
177177
) -> Vec<(EpisodeIndex, StoreId)> {
178178
let mut store_ids = vec![];
179179

180-
for episode in &dataset.metadata.episodes {
181-
let episode = episode.index;
180+
for episode_index in dataset.metadata.episodes.keys() {
181+
let episode = *episode_index;
182182

183183
let store_id = StoreId::recording(application_id.clone(), format!("episode_{}", episode.0));
184184
let set_store_info = LoadedData::LogMsg(

0 commit comments

Comments
 (0)