Skip to content

Commit aa66e11

Browse files
committed
feat: support keeping asset video in rerun viz
1 parent 594e1af commit aa66e11

File tree

2 files changed

+114
-6
lines changed

2 files changed

+114
-6
lines changed

docs/source/tutorials/visualization.rst

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,32 @@ The following example shows how to visualize the first episode (index 0) of the
1111
1212
This command will open a `rerun <https://rerun.io>`_ window displaying the selected episode, allowing you to explore the episode interactively.
1313

14+
Camera Logging Mode
15+
-------------------
16+
17+
For datasets that include camera streams stored as MP4 files, ``opentau-dataset-viz`` supports different camera logging modes:
18+
19+
- ``frames``: logs decoded frames with ``rr.Image``.
20+
- ``asset_video``: logs source MP4 files with ``rr.AssetVideo`` and synchronizes playback using ``rr.VideoFrameReference``.
21+
- ``auto`` (default): same behavior as ``asset_video`` with graceful fallback to ``frames`` when MP4 assets or required Rerun APIs are unavailable.
22+
23+
If your episodes contain one or more videos and the generated ``.rrd`` files are large, prefer ``asset_video`` (or ``auto``) to take advantage of video compression.
24+
25+
.. code-block:: bash
26+
27+
# Prefer MP4 assets (smaller .rrd when videos are available)
28+
opentau-dataset-viz --repo-id lerobot/droid_100 --episode-index 0 --camera-log-mode asset_video
29+
30+
# Always log decoded frames (existing behavior)
31+
opentau-dataset-viz --repo-id lerobot/droid_100 --episode-index 0 --camera-log-mode frames
32+
33+
To compare storage size, save both outputs and compare the resulting files:
34+
35+
.. code-block:: bash
36+
37+
opentau-dataset-viz --repo-id lerobot/droid_100 --episode-index 0 --save 1 --output-dir ./rrd_frames --camera-log-mode frames
38+
opentau-dataset-viz --repo-id lerobot/droid_100 --episode-index 0 --save 1 --output-dir ./rrd_asset --camera-log-mode asset_video
39+
1440
OpenTau also supports visualizing a dataset with URDF models. To do this, you need to first install ``opentau`` with optional URDF support:
1541

1642
.. code-block:: bash

src/opentau/scripts/visualize_dataset.py

Lines changed: 88 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,16 @@
3333
local$ opentau-dataset-viz --repo-id lerobot/pusht --episode-index 0
3434
```
3535
36+
- Visualize using source MP4 assets when available (smaller .rrd files):
37+
```
38+
local$ opentau-dataset-viz --repo-id lerobot/pusht --episode-index 0 --camera-log-mode asset_video
39+
```
40+
41+
- Keep frame-by-frame image logging explicitly (previous default behavior):
42+
```
43+
local$ opentau-dataset-viz --repo-id lerobot/pusht --episode-index 0 --camera-log-mode frames
44+
```
45+
3646
- Visualize data stored on a distant machine with a local viewer:
3747
```
3848
distant$ opentau-dataset-viz --repo-id lerobot/pusht --episode-index 0 --save 1 --output-dir path/to/directory
@@ -69,12 +79,19 @@
6979
from opentau.datasets.lerobot_dataset import LeRobotDataset
7080

7181
PERMIT_URDF = hasattr(rr, "urdf")
82+
PERMIT_ASSET_VIDEO = hasattr(rr, "AssetVideo") and hasattr(rr, "VideoFrameReference")
7283
if not PERMIT_URDF:
7384
warnings.warn(
7485
"`rerun.urdf` module not found. Make sure you have rerun >= 0.28.2 installed. "
7586
" One way to ensure this is to install OpenTau with the '[urdf]' extra: `pip install opentau[urdf]`.",
7687
stacklevel=2,
7788
)
89+
if not PERMIT_ASSET_VIDEO:
90+
warnings.warn(
91+
"Rerun video asset APIs are unavailable (need AssetVideo + VideoFrameReference). "
92+
"Falling back to per-frame image logging for camera streams.",
93+
stacklevel=2,
94+
)
7895

7996

8097
# Older and newer versions of rerun have different APIs for setting time / sequence
@@ -142,8 +159,8 @@ def create_mock_train_config() -> TrainPipelineConfig:
142159

143160
class EpisodeSampler(torch.utils.data.Sampler):
144161
def __init__(self, dataset: LeRobotDataset, episode_index: int):
145-
from_idx = dataset.episode_data_index["from"][episode_index].item()
146-
to_idx = dataset.episode_data_index["to"][episode_index].item()
162+
from_idx = int(dataset.episode_data_index["from"][episode_index].item())
163+
to_idx = int(dataset.episode_data_index["to"][episode_index].item())
147164
self.frame_ids = range(from_idx, to_idx)
148165

149166
def __iter__(self) -> Iterator:
@@ -173,6 +190,7 @@ def visualize_dataset(
173190
output_dir: Path | None = None,
174191
urdf: Path | None = None,
175192
joint_names: list[str] | None = None,
193+
camera_log_mode: str = "frames",
176194
) -> Path | None:
177195
r"""
178196
Visualize data of a given episode of a LeRobotDataset with rerun.
@@ -188,6 +206,10 @@ def visualize_dataset(
188206
output_dir: Directory to save the .rrd file if `save` is True. Required if `save` is True. Defaults to None.
189207
urdf: Path to a URDF file to load and visualize alongside the dataset. Defaults to None.
190208
joint_names: List of joint names for each state dimension, in order. Used for associating state dimensions with URDF joints. If not provided, state names from dataset metadata will be used. Defaults to None.
209+
camera_log_mode: Camera logging strategy.
210+
- "frames": always log decoded image frames (existing behavior)
211+
- "asset_video": prefer logging source mp4 videos through rerun AssetVideo
212+
- "auto": same as asset_video with graceful fallback to frame logging
191213
"""
192214
if save:
193215
assert output_dir is not None, (
@@ -209,6 +231,8 @@ def visualize_dataset(
209231

210232
if mode not in ["local", "distant"]:
211233
raise ValueError(mode)
234+
if camera_log_mode not in ["frames", "asset_video", "auto"]:
235+
raise ValueError(camera_log_mode)
212236

213237
spawn_local_viewer = mode == "local" and not save
214238
rr.init(f"{repo_id}/episode_{episode_index}", spawn=spawn_local_viewer)
@@ -239,18 +263,63 @@ def visualize_dataset(
239263
if mode == "distant":
240264
rr.serve_web_viewer(open_browser=False, web_port=web_port)
241265

266+
camera_features = dataset.meta.features
267+
video_asset_keys: set[str] = set()
268+
use_asset_video = camera_log_mode in ["asset_video", "auto"] and PERMIT_ASSET_VIDEO
269+
if camera_log_mode in ["asset_video", "auto"] and not PERMIT_ASSET_VIDEO:
270+
logging.warning(
271+
"camera_log_mode=%s requested but rerun video asset APIs are unavailable. "
272+
"Falling back to frame logging.",
273+
camera_log_mode,
274+
)
275+
276+
if use_asset_video:
277+
for key in dataset.meta.camera_keys:
278+
dtype = camera_features.get(key, {}).get("dtype")
279+
if dtype != "video":
280+
continue
281+
282+
video_path = dataset.root / dataset.meta.get_video_file_path(ep_index=episode_index, vid_key=key)
283+
if not video_path.exists():
284+
logging.warning(
285+
"Video file missing for %s: %s. Falling back to frame logging.", key, video_path
286+
)
287+
continue
288+
if video_path.suffix.lower() != ".mp4":
289+
logging.warning(
290+
"Video file for %s is not mp4 (%s). Falling back to frame logging.", key, video_path
291+
)
292+
continue
293+
294+
try:
295+
rr.log(key, rr.AssetVideo(path=video_path), static=True)
296+
video_asset_keys.add(key)
297+
except Exception:
298+
logging.exception(
299+
"Failed to log AssetVideo for %s (%s). Falling back to frame logging.", key, video_path
300+
)
301+
242302
logging.info("Logging to Rerun")
303+
episode_start_ts: float | None = None
243304

244305
for batch in tqdm.tqdm(dataloader, total=len(dataloader)):
245306
# iterate over the batch
246307
for i in range(len(batch["index"])):
247-
_rr_set_sequence("frame_index", batch["frame_index"][i].item())
248-
_rr_set_seconds("timestamp", batch["timestamp"][i].item())
308+
frame_index = batch["frame_index"][i].item()
309+
timestamp_s = batch["timestamp"][i].item()
310+
_rr_set_sequence("frame_index", frame_index)
311+
_rr_set_seconds("timestamp", timestamp_s)
312+
if episode_start_ts is None:
313+
episode_start_ts = timestamp_s
314+
episode_video_t = max(0.0, timestamp_s - episode_start_ts)
249315

250316
# display each camera image
251317
for key in dataset.meta.camera_keys:
252-
# TODO(rcadene): add `.compress()`? is it lossless?
253-
rr.log(key, rr.Image(to_hwc_uint8_numpy(batch[key][i])))
318+
if key in video_asset_keys:
319+
rr.log(key, rr.VideoFrameReference(seconds=episode_video_t, video_reference=key))
320+
else:
321+
# TODO(rcadene): add `.compress()`? is it lossless?
322+
rr.log(key, rr.Image(to_hwc_uint8_numpy(batch[key][i])))
254323

255324
# display each dimension of action space (e.g. actuators command)
256325
if "action" in batch:
@@ -279,6 +348,7 @@ def visualize_dataset(
279348

280349
if mode == "local" and save:
281350
# save .rrd locally
351+
assert output_dir is not None
282352
output_dir = Path(output_dir)
283353
output_dir.mkdir(parents=True, exist_ok=True)
284354
repo_id_str = repo_id.replace("/", "_")
@@ -361,6 +431,18 @@ def parse_args() -> dict:
361431
"Visualize the data by running `rerun path/to/file.rrd` on your local machine."
362432
),
363433
)
434+
parser.add_argument(
435+
"--camera-log-mode",
436+
type=str,
437+
default="auto",
438+
choices=["frames", "asset_video", "auto"],
439+
help=(
440+
"Camera logging strategy. "
441+
"'frames' logs decoded frames with rr.Image (larger .rrd). "
442+
"'asset_video' logs source MP4 for video features via rr.AssetVideo + rr.VideoFrameReference. "
443+
"'auto' behaves like 'asset_video' with graceful fallback to frames if unavailable."
444+
),
445+
)
364446
parser.add_argument(
365447
"--tolerance-s",
366448
type=float,

0 commit comments

Comments
 (0)