From b15143addf509e52f4e30bdbc7cee3b1cc24bc21 Mon Sep 17 00:00:00 2001
From: Andrea Delli <andrea.delli2001@gmail.com>
Date: Sat, 25 Oct 2025 22:20:46 +0200
Subject: [PATCH 1/8] feat: add RLBench simulator support

---
 docs/source/_toctree.yml             |   2 +
 docs/source/rlbench.mdx              | 117 ++++++++
 pyproject.toml                       |   2 +
 src/lerobot/envs/configs.py          |  52 ++++
 src/lerobot/envs/factory.py          |  13 +
 src/lerobot/envs/rlbench.py          | 405 +++++++++++++++++++++++++++
 src/lerobot/envs/rlbench_config.json | 219 +++++++++++++++
 7 files changed, 810 insertions(+)
 create mode 100644 docs/source/rlbench.mdx
 create mode 100644 src/lerobot/envs/rlbench.py
 create mode 100644 src/lerobot/envs/rlbench_config.json

diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml
index 5e100013a4..de2e8d3cb9 100644
--- a/docs/source/_toctree.yml
+++ b/docs/source/_toctree.yml
@@ -45,6 +45,8 @@
     title: Using Libero
   - local: metaworld
     title: Using MetaWorld
+  - local: rlbench
+    title: Using RLBench
   title: "Simulation"
 - sections:
   - local: introduction_processors
diff --git a/docs/source/rlbench.mdx b/docs/source/rlbench.mdx
new file mode 100644
index 0000000000..a477176f3e
--- /dev/null
+++ b/docs/source/rlbench.mdx
@@ -0,0 +1,117 @@
+# RLBench
+
+**RLBench** is a large-scale benchmark designed to accelerate research in **robot learning**, with a strong focus on **vision-guided manipulation**. It provides a challenging and standardized environment for developing and testing algorithms that can learn complex robotic tasks.
+
+- 📄 [RLBench paper](https://arxiv.org/abs/1909.12271)
+- 💻 [Original RLBench repo](https://github.com/stepjam/RLBench)
+
+![RLBench Tasks](https://github.com/stepjam/RLBench/blob/master/readme_files/task_grid.png)
+
+## Why RLBench?
+
+- **Diverse and Challenging Tasks:** RLBench includes over 100 unique, hand-designed tasks, ranging from simple reaching and pushing to complex, multi-stage activities like opening an oven and placing a tray inside. This diversity tests an algorithm's ability to generalize across different objectives and dynamics.
+- **Rich, Multi-Modal Observations:** The benchmark provides both proprioceptive (joint states) and visual observations. Visual data comes from multiple camera angles, including over-the-shoulder cameras and wrist camera, with options for RGB, depth, and segmentation masks.
+- **Infinite Demonstrations:** A key feature of RLBench is its ability to generate an infinite supply of demonstrations for each task. These demonstrations are created using motion planners, making RLBench an ideal platform for research in imitation learning and offline reinforcement learning.
+- **Scalability and Customization:** RLBench is designed to be extensible. Researchers can easily create and contribute new tasks, helping the benchmark evolve and stay relevant.
+
+RLBench includes **eight task sets**, which consist of a collection of multiple tasks (FS=Few-Shot, MT=Multi-Task).
+
+- **`FS10_v1`** – 10 training tasks, 5 test tasks
+- **`FS25_v1`** – 25 training tasks, 5 test tasks
+- **`FS50_v1`** – 50 training tasks, 5 test tasks
+- **`FS95_v1`** – 95 training tasks, 5 test tasks
+- **`MT15_v1`** – 15 training tasks (all tasks of `FS10_v1`, training+test)
+- **`MT30_v1`** – 30 training tasks (all tasks of `FS25_v1`, training+test)
+- **`MT55_v1`** – 55 training tasks (all tasks of `FS50_v1`, training+test)
+- **`MT100_v1`** – 100 training tasks (all tasks of `FS95_v1`, training+test)
+
+For details about the tasks and task sets, please refer to the [original definition](https://github.com/stepjam/RLBench/blob/master/rlbench/tasks/__init__.py).
+
+## RLBench in LeRobot
+
+LeRobot's integration with RLBench allows you to train and evaluate policies on its rich set of tasks. The integration is designed to be seamless, leveraging LeRobot's training and evaluation pipelines.
+
+### Get started
+
+RLBench is built around CoppeliaSim v4.1.0 and [PyRep](https://github.com/stepjam/PyRep).
+
+First, install CoppeliaSim:
+```bash
+# set env variables
+export COPPELIASIM_ROOT=${HOME}/CoppeliaSim
+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$COPPELIASIM_ROOT
+export QT_QPA_PLATFORM_PLUGIN_PATH=$COPPELIASIM_ROOT
+
+wget https://downloads.coppeliarobotics.com/V4_1_0/CoppeliaSim_Edu_V4_1_0_Ubuntu20_04.tar.xz
+mkdir -p $COPPELIASIM_ROOT && tar -xf CoppeliaSim_Edu_V4_1_0_Ubuntu20_04.tar.xz -C $COPPELIASIM_ROOT --strip-components 1
+rm -rf CoppeliaSim_Edu_V4_1_0_Ubuntu20_04.tar.xz
+```
+
+Next, install the necessary dependencies:
+`pip install -e ".[rlbench]"`
+
+That's it! You can now use RLBench environments within LeRobot. To run headless, check the documentation on the original [RLBench repo](https://github.com/stepjam/RLBench).
+
+### Evaluating a Policy
+
+You can evaluate a trained policy on a specific RLBench task or a suite of tasks.
+
+```bash
+export COPPELIASIM_ROOT=${HOME}/CoppeliaSim
+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$COPPELIASIM_ROOT
+export QT_QPA_PLATFORM_PLUGIN_PATH=$COPPELIASIM_ROOT
+
+lerobot-eval \
+  --policy.path="your-policy-id" \
+  --env.type=rlbench \
+  --env.task=put_rubbish_in_bin \
+  --eval.batch_size=1 \
+  --eval.n_episodes=10
+```
+
+- `--env.task` specifies the RLBench task to evaluate on. You can also use task suites like `FS10_V1` or `MT30_V1`.
+- The evaluation script will report the success rate for the given task(s).
+
+### Training a Policy
+
+You can train a policy on RLBench tasks using the `lerobot-train` command. You'll need a dataset in the correct format.
+
+```bash
+export COPPELIASIM_ROOT=${HOME}/CoppeliaSim
+export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$COPPELIASIM_ROOT
+export QT_QPA_PLATFORM_PLUGIN_PATH=$COPPELIASIM_ROOT
+
+lerobot-train \
+  --policy.type=smolvla \
+  --policy.repo_id=${HF_USER}/rlbench-test \
+  --dataset.repo_id=lerobot/rlbench_put_rubbish_in_bin \
+  --env.type=rlbench \
+  --env.task=put_rubbish_in_bin \
+  --output_dir=./outputs/ \
+  --steps=100000 \
+  --batch_size=4 \
+  --eval.batch_size=1 \
+  --eval.n_episodes=10 \
+  --eval_freq=1000
+```
+
+> If running on a headless server, ensure that the CoppeliaSim environment is set up to run without a GUI.
+> Refer to the [RLBench documentation](https://github.com/stepjam/RLBench).
+
+### RLBench Datasets
+
+LeRobot expects datasets to be in a specific format. While there isn't an official `lerobot`-prepared RLBench dataset on the Hugging Face Hub yet, you can create your own by converting demonstrations from the original RLBench format.
+
+The environment expects the following observation and action keys:
+
+- **Observations:**
+  - `observation.state`: Proprioceptive features (usually joint positions + gripper).
+  - `observation.images.front_rgb`: Front RGB camera view.
+  - `observation.images.wrist_rgb`: Wrist RGB camera view.
+  - `observation.images.overhead_rgb`: Overhead RGB camera view.
+  - `observation.images.left_shoulder_rgb`: Left shoulder RGB camera view.
+  - `observation.images.right_shoulder_rgb`: Right shoulder RGB camera view.
+- **Actions:**
+  - A continuous control vector for the robot's joints and gripper (e.g. for franka, 8 dimensions: 7 joint positions + 1 gripper state).
+
+Make sure your dataset's metadata and parquet files use these keys to ensure compatibility with LeRobot's RLBench environment.
diff --git a/pyproject.toml b/pyproject.toml
index 1c71acec07..0ba8c9629f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -133,6 +133,7 @@ aloha = ["gym-aloha>=0.1.2,<0.2.0"]
 pusht = ["gym-pusht>=0.1.5,<0.2.0", "pymunk>=6.6.0,<7.0.0"] # TODO: Fix pymunk version in gym-pusht instead
 libero = ["lerobot[transformers-dep]", "libero @ git+https://github.com/huggingface/lerobot-libero.git@main#egg=libero"]
 metaworld = ["metaworld==3.0.0"]
+rlbench = ["rlbench @ git+https://github.com/stepjam/RLBench.git"]
 
 # All
 all = [
@@ -155,6 +156,7 @@ all = [
     "lerobot[phone]",
     "lerobot[libero]",
     "lerobot[metaworld]",
+    "lerobot[rlbench]",
 ]
 
 [project.scripts]
diff --git a/src/lerobot/envs/configs.py b/src/lerobot/envs/configs.py
index dc526114dc..4d0f010eda 100644
--- a/src/lerobot/envs/configs.py
+++ b/src/lerobot/envs/configs.py
@@ -319,3 +319,55 @@ def gym_kwargs(self) -> dict:
             "obs_type": self.obs_type,
             "render_mode": self.render_mode,
         }
+
+@EnvConfig.register_subclass("rlbench")
+@dataclass
+class RLBenchEnv(EnvConfig):
+    task: str = "FS10_V1"  # can also choose other task suites or single tasks
+    fps: int = 30
+    episode_length: int = 400
+    obs_type: str = "pixels_agent_pos"
+    render_mode: str = "rgb_array"
+    camera_name: str = "front_rgb,wrist_rgb"
+    camera_name_mapping: dict[str, str] | None = None
+    observation_height: int = 256
+    observation_width: int = 256
+    features: dict[str, PolicyFeature] = field(
+        default_factory=lambda: {
+            ACTION: PolicyFeature(type=FeatureType.ACTION, shape=(8,)),
+        }
+    )
+    features_map: dict[str, str] = field(
+        default_factory=lambda: {
+            ACTION: ACTION,
+            "agent_pos": OBS_STATE,
+            "pixels/front_rgb": f"{OBS_IMAGES}.image",
+            "pixels/wrist_rgb": f"{OBS_IMAGES}.image2",
+        }
+    )
+
+    def __post_init__(self):
+        if self.obs_type == "pixels":
+            self.features["pixels/front_rgb"] = PolicyFeature(
+                type=FeatureType.VISUAL, shape=(self.observation_height, self.observation_width, 3)
+            )
+            self.features["pixels/wrist_rgb"] = PolicyFeature(
+                type=FeatureType.VISUAL, shape=(self.observation_height, self.observation_width, 3)
+            )
+        elif self.obs_type == "pixels_agent_pos":
+            self.features["agent_pos"] = PolicyFeature(type=FeatureType.STATE, shape=(8,))
+            self.features["pixels/front_rgb"] = PolicyFeature(
+                type=FeatureType.VISUAL, shape=(self.observation_height, self.observation_width, 3)
+            )
+            self.features["pixels/wrist_rgb"] = PolicyFeature(
+                type=FeatureType.VISUAL, shape=(self.observation_height, self.observation_width, 3)
+            )
+        else:
+            raise ValueError(f"Unsupported obs_type: {self.obs_type}")
+
+    @property
+    def gym_kwargs(self) -> dict:
+        return {
+            "obs_type": self.obs_type,
+            "render_mode": self.render_mode,
+        }
\ No newline at end of file
diff --git a/src/lerobot/envs/factory.py b/src/lerobot/envs/factory.py
index 52c7cbb966..2c9093bb2c 100644
--- a/src/lerobot/envs/factory.py
+++ b/src/lerobot/envs/factory.py
@@ -85,6 +85,19 @@ def make_env(
             gym_kwargs=cfg.gym_kwargs,
             env_cls=env_cls,
         )
+    elif "rlbench" in cfg.type:
+        from lerobot.envs.rlbench import create_rlbench_envs
+
+        if cfg.task is None:
+            raise ValueError("RLBench requires a task to be specified")
+
+        return create_rlbench_envs(
+            task=cfg.task,
+            n_envs=n_envs,
+            camera_name=cfg.camera_name,
+            gym_kwargs=cfg.gym_kwargs,
+            env_cls=env_cls,
+        )
 
     if cfg.gym_id not in gym_registry:
         print(f"gym id '{cfg.gym_id}' not found, attempting to import '{cfg.package_name}'...")
diff --git a/src/lerobot/envs/rlbench.py b/src/lerobot/envs/rlbench.py
new file mode 100644
index 0000000000..0a37f3f0f6
--- /dev/null
+++ b/src/lerobot/envs/rlbench.py
@@ -0,0 +1,405 @@
+#!/usr/bin/env python
+
+# Copyright 2025 The HuggingFace Inc. team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from functools import partial
+import json, os
+from collections import defaultdict
+from collections.abc import Callable, Iterable, Sequence
+from pathlib import Path
+from typing import Any, Mapping
+import numpy as np
+
+import gymnasium as gym
+from gymnasium import spaces
+
+from rlbench.environment import Environment
+from rlbench.backend.task import Task
+from rlbench.gym import RLBenchEnv as RLBenchGymEnv
+from rlbench.observation_config import ObservationConfig
+from rlbench.action_modes.action_mode import MoveArmThenGripper
+from rlbench.action_modes.arm_action_modes import JointPosition
+from rlbench.action_modes.gripper_action_modes import Discrete
+from rlbench.utils import name_to_task_class
+from rlbench.tasks import (FS10_V1, FS25_V1, FS50_V1, FS95_V1, MT15_V1, MT30_V1, MT55_V1, MT100_V1)
+
+os.environ["QT_QPA_PLATFORM"] = "offscreen"
+
+# ---- Load configuration data from the external JSON file ----
+CONFIG_PATH = Path(__file__).parent / "rlbench_config.json"
+try:
+    with open(CONFIG_PATH) as f:
+        data = json.load(f)
+except FileNotFoundError as err:
+    raise FileNotFoundError(
+        "Could not find 'rlbench_config.json'. "
+        "Please ensure the configuration file is in the same directory as the script."
+    ) from err
+except json.JSONDecodeError as err:
+    raise ValueError(
+        "Failed to decode 'rlbench_config.json'. Please ensure it is a valid JSON file."
+    ) from err
+
+# ---- Process the loaded data ----
+
+# extract and type-check top-level dicts
+TASK_DESCRIPTIONS: dict[str, str] = data.get("TASK_DESCRIPTIONS", {})
+TASK_NAME_TO_ID: dict[str, int] = data.get("TASK_NAME_TO_ID", {})
+
+ACTION_DIM = 8  # 7 joints + 1 gripper  # NOTE: RLBench supports also EEF pose+gripper (dim=8, [x,y,z,rx,ry,rz,gripper])
+OBS_DIM = 8  # 7 joints + 1 gripper
+
+def _parse_camera_names(camera_name: str | Sequence[str]) -> list[str]:
+    """Normalize camera_name into a non-empty list of strings."""
+    if isinstance(camera_name, str):
+        cams = [c.strip() for c in camera_name.split(",") if c.strip()]
+    elif isinstance(camera_name, (list | tuple)):
+        cams = [str(c).strip() for c in camera_name if str(c).strip()]
+    else:
+        raise TypeError(f"camera_name must be str or sequence[str], got {type(camera_name).__name__}")
+    if not cams:
+        raise ValueError("camera_name resolved to an empty list.")
+    return cams
+
+def _get_suite(name: str) -> dict[str, list[Task]]:
+    """Instantiate a RLBench suite by name with clear validation."""
+    
+    suites = {
+        "FS10_V1": FS10_V1,
+        "FS25_V1": FS25_V1,
+        "FS50_V1": FS50_V1,
+        "FS95_V1": FS95_V1,
+        "MT15_V1": MT15_V1,
+        "MT30_V1": MT30_V1,
+        "MT55_V1": MT55_V1,
+        "MT100_V1": MT100_V1,
+    }
+
+    if name not in suites:
+        raise ValueError(f"Unknown RLBench suite '{name}'. Available: {', '.join(sorted(suites.keys()))}")
+    suite = suites[name]
+
+    if not suite.get("train", None) and not suite.get("test", None):
+        raise ValueError(f"Suite '{name}' has no tasks.")
+    return suite
+
+def _select_task_ids(total_tasks: int, task_ids: Iterable[int] | None) -> list[int]:
+    """Validate/normalize task ids. If None → all tasks."""
+    if task_ids is None:
+        return list(range(total_tasks))
+    ids = sorted({int(t) for t in task_ids})
+    for t in ids:
+        if t < 0 or t >= total_tasks:
+            raise ValueError(f"task_id {t} out of range [0, {total_tasks - 1}].")
+    return ids
+
+
+class RLBenchEnv(gym.Env):
+    metadata = {"render_modes": ["rgb_array"], "render_fps": 30}
+
+    def __init__(
+        self,
+        task: Task | None = None,
+        task_suite: dict[str, list[Task]] | None = None,
+        camera_name: str | Sequence[str] = "front_rgb,wrist_rgb",
+        obs_type: str = "pixels",
+        render_mode: str = "rgb_array",
+        observation_width: int = 256,
+        observation_height: int = 256,
+        visualization_width: int = 640,
+        visualization_height: int = 480,
+        camera_name_mapping: dict[str, str] | None = None,
+    ):
+        super().__init__()
+        self.task = task
+        self.obs_type = obs_type
+        self.render_mode = render_mode
+        self.observation_width = observation_width
+        self.observation_height = observation_height
+        self.visualization_width = visualization_width
+        self.visualization_height = visualization_height
+        self.camera_name = _parse_camera_names(camera_name)
+        
+        # Map raw camera names to "image1" and "image2".
+        # The preprocessing step `preprocess_observation` will then prefix these with `.images.*`,
+        # following the LeRobot convention (e.g., `observation.images.image`, `observation.images.image2`).
+        # This ensures the policy consistently receives observations in the
+        # expected format regardless of the original camera naming.
+        if camera_name_mapping is None:
+            camera_name_mapping = {
+                "front_rgb": "image",
+                "wrist_rgb": "image2",
+            }
+        self.camera_name_mapping = camera_name_mapping
+
+        self._env = self._make_envs_task(self.task)
+        self._max_episode_steps = 500   # TODO: make configurable depending on task suite
+        self.task_description = TASK_DESCRIPTIONS.get(self.task, "")
+
+        images = {}
+        for cam in self.camera_name:
+            images[self.camera_name_mapping[cam]] = spaces.Box(
+                low=0,
+                high=255,
+                shape=(self.observation_height, self.observation_width, 3),
+                dtype=np.uint8,
+            )
+
+        if self.obs_type == "state":    # TODO: This can be implemented in RLBench!
+            raise (
+                "The 'state' observation type is not supported in RLBench. "
+                "Please switch to an image-based obs_type (e.g. 'pixels', 'pixels_agent_pos')."
+            )
+            
+        elif self.obs_type == "pixels":
+            self.observation_space = spaces.Dict(
+                {
+                    "pixels": spaces.Box(
+                        low=0,
+                        high=255,
+                        shape=(self.observation_height, self.observation_width, 3),
+                        dtype=np.uint8,
+                    )
+                }
+            )
+        elif self.obs_type == "pixels_agent_pos":
+            self.observation_space = spaces.Dict(
+                {
+                    "pixels": spaces.Box(
+                        low=0,
+                        high=255,
+                        shape=(self.observation_height, self.observation_width, 3),
+                        dtype=np.uint8,
+                    ),
+                    "agent_pos": spaces.Box(
+                        low=-1000.0,
+                        high=1000.0,
+                        shape=(OBS_DIM,),
+                        dtype=np.float64,
+                    ),
+                }
+            )
+
+        self.action_space = spaces.Box(low=-1, high=1, shape=(ACTION_DIM,), dtype=np.float32)
+
+    def render(self) -> np.ndarray:
+        """
+        Render the current environment frame.
+
+        Returns:
+            np.ndarray: The rendered RGB image from the environment.
+        """
+        return self._env.render()
+
+    def _make_envs_task(self, task: Task) -> RLBenchGymEnv:
+        return RLBenchGymEnv(
+            task,
+            observation_mode="vision",
+            render_mode=self.render_mode,
+            action_mode=MoveArmThenGripper(
+                JointPosition(absolute_mode=True),   # Arm in absolute joint positions
+                Discrete()  # Gripper in discrete open/close (<0.5 → close, >=0.5 → open)
+            )
+        )
+
+    def _format_raw_obs(self, raw_obs: dict) -> dict[str, Any]:
+        
+        images = {}
+        for camera_name in self.camera_name:
+            image = raw_obs[camera_name]
+            image = image[::-1, ::-1]  # rotate 180 degrees
+            images[self.camera_name_mapping[camera_name]] = image
+        
+        agent_pos = np.concatenate(
+            raw_obs["joint_positions"],
+            [raw_obs["gripper_open"]],
+        )
+
+        if self.obs_type == "state":    # TODO: REMOVE?
+            raise NotImplementedError(
+                "'state' obs_type not implemented for RLBench. Use pixel modes instead."
+            )
+
+        if self.obs_type == "pixels":
+            obs = {"pixels": image.copy()}
+        elif self.obs_type == "pixels_agent_pos":
+            obs = {
+                "pixels": image.copy(),
+                "agent_pos": agent_pos,
+            }
+        else:
+            raise NotImplementedError(
+                f"The observation type '{self.obs_type}' is not supported in RLBench. "
+                "Please switch to an image-based obs_type (e.g. 'pixels', 'pixels_agent_pos')."
+            )
+        return obs
+
+    def reset(
+        self,
+        seed: int | None = None,
+        **kwargs,
+    ) -> tuple[dict[str, Any], dict[str, Any]]:
+        """
+        Reset the environment to its initial state.
+
+        Args:
+            seed (Optional[int]): Random seed for environment initialization.
+
+        Returns:
+            observation (Dict[str, Any]): The initial formatted observation.
+            info (Dict[str, Any]): Additional info about the reset state.
+        """
+        super().reset(seed=seed)
+
+        raw_obs, info = self._env.reset(seed=seed)
+
+        observation = self._format_raw_obs(raw_obs)
+
+        info = {"is_success": False}
+        return observation, info
+
+    def step(self, action: np.ndarray) -> tuple[dict[str, Any], float, bool, bool, dict[str, Any]]:
+        """
+        Perform one environment step.
+
+        Args:
+            action (np.ndarray): The action to execute, must be 1-D with shape (action_dim,).
+
+        Returns:
+            observation (Dict[str, Any]): The formatted observation after the step.
+            reward (float): The scalar reward for this step.
+            terminated (bool): Whether the episode terminated successfully.
+            truncated (bool): Whether the episode was truncated due to a time limit.
+            info (Dict[str, Any]): Additional environment info.
+        """
+        if action.ndim != 1:
+            raise ValueError(
+                f"Expected action to be 1-D (shape (action_dim,)), "
+                f"but got shape {action.shape} with ndim={action.ndim}"
+            )
+        raw_obs, reward, done, truncated, info = self._env.step(action)
+
+        # Determine whether the task was successful
+        is_success = bool(info.get("success", 0))
+        terminated = done or is_success
+        info.update(
+            {
+                "task": self.task,
+                "done": done,
+                "is_success": is_success,
+            }
+        )
+
+        # Format the raw observation into the expected structure
+        observation = self._format_raw_obs(raw_obs)
+        if terminated:
+            info["final_info"] = {
+                "task": self.task,
+                "done": bool(done),
+                "is_success": bool(is_success),
+            }
+            self.reset()
+
+        return observation, reward, terminated, truncated, info
+
+    def close(self):
+        self._env.close()
+
+
+def _make_env_fns(
+    *,
+    suite: dict[str, list[Task]],
+    task: Task,
+    n_envs: int,
+    camera_names: list[str],
+    gym_kwargs: Mapping[str, Any],
+) -> list[Callable[[], RLBenchEnv]]:
+    """Build n_envs factory callables for a single (suite, task)."""
+
+    def _make_env(**kwargs) -> RLBenchEnv:
+        local_kwargs = dict(kwargs)
+        return RLBenchEnv(
+            task=task,
+            task_suite=suite,
+            camera_name=camera_names,
+            **local_kwargs,
+        )
+
+    fns: list[Callable[[], RLBenchEnv]] = []
+    for _ in range(n_envs):
+        fns.append(partial(_make_env, **gym_kwargs))
+    return fns
+    
+
+# ---- Main API ----------------------------------------------------------------
+
+
+def create_rlbench_envs(
+    task: str,
+    n_envs: int,
+    gym_kwargs: dict[str, Any] | None = None,
+    camera_name: str | Sequence[str] = "front_rgb,wrist_rgb",
+    env_cls: Callable[[Sequence[Callable[[], Any]]], Any] | None = None,
+) -> dict[str, dict[int, Any]]:
+    """
+    Create vectorized RLBench environments with a consistent return shape.
+
+    Returns:
+        dict[task_group][task_id] -> vec_env (env_cls([...]) with exactly n_envs factories)
+    Notes:
+        - n_envs is the number of rollouts *per task* (episode_index = 0..n_envs-1).
+        - `task` can be a single suite or a comma-separated list of suites.
+        - You may pass `task_names` (list[str]) inside `gym_kwargs` to restrict tasks per suite.
+    """
+    
+    if env_cls is None or not callable(env_cls):
+        raise ValueError("env_cls must be a callable that wraps a list of environment factory callables.")
+    if not isinstance(n_envs, int) or n_envs <= 0:
+        raise ValueError(f"n_envs must be a positive int; got {n_envs}.")
+
+    gym_kwargs = dict(gym_kwargs or {})
+    task_ids_filter = gym_kwargs.pop("task_ids", None)  # optional: limit to specific tasks
+    
+    camera_names = _parse_camera_names(camera_name)
+    suite_names = [s.strip() for s in str(task).split(",") if s.strip()]
+    if not suite_names:
+        raise ValueError("`task` must contain at least one RLBench suite name.")
+
+    print(f"Creating RLBench envs | task_groups={suite_names} | n_envs(per task)={n_envs}")
+    if task_ids_filter is not None:
+        print(f"Restricting to task_ids={task_ids_filter}")
+
+    out: dict[str, dict[int, Any]] = defaultdict(dict)
+
+    for suite_name in suite_names:
+        suite = _get_suite(suite_name)
+        total = len(suite['train'])
+        selected = _select_task_ids(total, task_ids_filter)
+
+        if not selected:
+            raise ValueError(f"No tasks selected for suite '{suite_name}' (available: {total}).")
+
+        for tid in selected:
+            fns = _make_env_fns(
+                suite=suite,
+                task=suite['train'][tid],
+                n_envs=n_envs,
+                camera_names=camera_names,
+                gym_kwargs=gym_kwargs,
+            )
+            out[suite_name][tid] = env_cls(fns)
+            print(f"Built vec env | suite={suite_name} | task_id={tid} | n_envs={n_envs}")
+
+    # return plain dicts for predictability
+    return {group: dict(task_map) for group, task_map in out.items()}
diff --git a/src/lerobot/envs/rlbench_config.json b/src/lerobot/envs/rlbench_config.json
new file mode 100644
index 0000000000..01af0e999b
--- /dev/null
+++ b/src/lerobot/envs/rlbench_config.json
@@ -0,0 +1,219 @@
+{
+  "TASK_DESCRIPTIONS": {
+    "basketball_in_hoop": "Put the ball in the hoop",
+    "beat_the_buzz": "Beat the buzz",
+    "block_pyramid": "Stack blocks in a pyramid",
+    "change_channel": "Turn the channel",
+    "change_clock": "Change the clock to show time 12.15",
+    "close_box": "Close box",
+    "close_door": "Close the door",
+    "close_drawer": "Close drawer",
+    "close_fridge": "Close fridge",
+    "close_grill": "Close the grill",
+    "close_jar": "Close the jar",
+    "close_laptop_lid": "Close laptop lid",
+    "close_microwave": "Close microwave",
+    "empty_container": "Empty the container in the to container",
+    "empty_dishwasher": "Empty the dishwasher",
+    "get_ice_from_fridge": "Get ice from fridge",
+    "hang_frame_on_hanger": "Hang frame on hanger",
+    "hit_ball_with_queue": "Hit ball with queue in to the goal",
+    "hockey": "Hit the ball into the net",
+    "insert_onto_square_peg": "Put the ring on the spoke",
+    "insert_usb_in_computer": "Insert usb in computer",
+    "lamp_off": "Turn off the light",
+    "lamp_on": "Turn on the light",
+    "lift_numbered_block": "Pick up the block with the number",
+    "light_bulb_in": "Screw in the light bulb",
+    "light_bulb_out": "Put the bulb in the holder",
+    "meat_off_grill": "Take the off the grill",
+    "meat_on_grill": "Put the on the grill",
+    "move_hanger": "Move hanger onto the other rack",
+    "open_box": "Open box",
+    "open_door": "Open the door",
+    "open_drawer": "Open drawer",
+    "open_fridge": "Open fridge",
+    "open_grill": "Open the grill",
+    "open_jar": "Open the jar",
+    "open_microwave": "Open microwave",
+    "open_oven": "Open the oven",
+    "open_washing_machine": "Open washing machine",
+    "open_window": "Open left window",
+    "open_wine_bottle": "Open wine bottle",
+    "phone_on_base": "Put the phone on the base",
+    "pick_and_lift": "Pick up the block and lift it up to the target",
+    "pick_and_lift_small": "Pick up the and lift it up to the target",
+    "pick_up_cup": "Pick up the cup",
+    "place_cups": "Place 1 cup on the cup holder",
+    "place_hanger_on_rack": "Pick up the hanger and place in on the rack",
+    "place_shape_in_shape_sorter": "Put the in the shape sorter",
+    "play_jenga": "Play jenga",
+    "plug_charger_in_power_supply": "Plug charger in power supply",
+    "pour_from_cup_to_cup": "Pour liquid from the cup to the cup",
+    "press_switch": "Press switch",
+    "push_button": "Push the button",
+    "push_buttons": "Push the buttons",
+    "put_all_groceries_in_cupboard": "Put all of the groceries in the cupboard",
+    "put_books_on_bookshelf": "Put books on bookshelf",
+    "put_bottle_in_fridge": "Put bottle in fridge",
+    "put_groceries_in_cupboard": "Put the in the cupboard",
+    "put_item_in_drawer": "Put item in drawer",
+    "put_knife_in_knife_block": "Put the knife in the knife block",
+    "put_knife_on_chopping_board": "Put the knife on the chopping board",
+    "put_money_in_safe": "Put the money away in the safe on the shelf",
+    "put_plate_in_colored_dish_rack": "Put the plate between the pillars of the dish rack",
+    "put_rubbish_in_bin": "Put rubbish in bin",
+    "put_shoes_in_box": "Put the shoes in the box",
+    "put_toilet_roll_on_stand": "Put toilet roll on stand",
+    "put_tray_in_oven": "Put tray in oven",
+    "put_umbrella_in_umbrella_stand": "Put umbrella in umbrella stand",
+    "reach_and_drag": "Use the stick to drag the cube onto the target",
+    "reach_target": "Reach the target",
+    "remove_cups": "Remove 1 cup from the cup holder and place it on the",
+    "scoop_with_spatula": "Scoop up the cube and lift it with the spatula",
+    "screw_nail": "Screw the nail in to the block",
+    "set_the_table": "Set the table",
+    "setup_checkers": "Setup checkers",
+    "setup_chess": "Setup chess",
+    "slide_block_to_target": "Slide the block to target",
+    "slide_cabinet_open": "Slide cabinet open",
+    "slide_cabinet_open_and_place_cups": "Put cup in cabinet",
+    "solve_puzzle": "Solve the puzzle",
+    "stack_blocks": "Stack blocks",
+    "stack_chairs": "Stack the other chairs on top of the chair",
+    "stack_cups": "Stack the other cups on top of the cup",
+    "stack_wine": "Stack wine bottle",
+    "straighten_rope": "Straighten rope",
+    "sweep_to_dustpan": "Sweep dirt to dustpan",
+    "take_cup_out_from_cabinet": "Take out a cup from the half of the cabinet",
+    "take_frame_off_hanger": "Take frame off hanger",
+    "take_item_out_of_drawer": "Take item out of the drawer",
+    "take_lid_off_saucepan": "Take lid off the saucepan",
+    "take_money_out_safe": "Take the money out of the bottom shelf and place it on",
+    "take_off_weighing_scales": "Remove the pepper from the weighing scales and place it",
+    "take_plate_off_colored_dish_rack": "Take plate off the colored rack",
+    "take_shoes_out_of_box": "Take shoes out of box",
+    "take_toilet_roll_off_stand": "Take toilet roll off stand",
+    "take_tray_out_of_oven": "Take tray out of oven",
+    "take_umbrella_out_of_umbrella_stand": "Take umbrella out of umbrella stand",
+    "take_usb_out_of_computer": "Take usb out of computer",
+    "toilet_seat_down": "Toilet seat down",
+    "toilet_seat_up": "Lift toilet seat up",
+    "turn_oven_on": "Turn on the oven",
+    "turn_tap": "Turn tap",
+    "tv_on": "Turn on the TV",
+    "unplug_charger": "Unplug charger",
+    "water_plants": "Water plant",
+    "weighing_scales": "Weigh the pepper",
+    "wipe_desk": "Wipe dirt off the desk"
+  },
+
+  "TASK_NAME_TO_ID": {
+    "basketball_in_hoop": 0,
+    "beat_the_buzz": 1,
+    "block_pyramid": 2,
+    "change_channel": 3,
+    "change_clock": 4,
+    "close_box": 5,
+    "close_door": 6,
+    "close_drawer": 7,
+    "close_fridge": 8,
+    "close_grill": 9,
+    "close_jar": 10,
+    "close_laptop_lid": 11,
+    "close_microwave": 12,
+    "empty_container": 13,
+    "empty_dishwasher": 14,
+    "get_ice_from_fridge": 15,
+    "hang_frame_on_hanger": 16,
+    "hit_ball_with_queue": 17,
+    "hockey": 18,
+    "insert_onto_square_peg": 19,
+    "insert_usb_in_computer": 20,
+    "lamp_off": 21,
+    "lamp_on": 22,
+    "lift_numbered_block": 23,
+    "light_bulb_in": 24,
+    "light_bulb_out": 25,
+    "meat_off_grill": 26,
+    "meat_on_grill": 27,
+    "move_hanger": 28,
+    "open_box": 29,
+    "open_door": 30,
+    "open_drawer": 31,
+    "open_fridge": 32,
+    "open_grill": 33,
+    "open_jar": 34,
+    "open_microwave": 35,
+    "open_oven": 36,
+    "open_washing_machine": 37,
+    "open_window": 38,
+    "open_wine_bottle": 39,
+    "phone_on_base": 40,
+    "pick_and_lift": 41,
+    "pick_and_lift_small": 42,
+    "pick_up_cup": 43,
+    "place_cups": 44,
+    "place_hanger_on_rack": 45,
+    "place_shape_in_shape_sorter": 46,
+    "play_jenga": 47,
+    "plug_charger_in_power_supply": 48,
+    "pour_from_cup_to_cup": 49,
+    "press_switch": 50,
+    "push_button": 51,
+    "push_buttons": 52,
+    "put_all_groceries_in_cupboard": 53,
+    "put_books_on_bookshelf": 54,
+    "put_bottle_in_fridge": 55,
+    "put_groceries_in_cupboard": 56,
+    "put_item_in_drawer": 57,
+    "put_knife_in_knife_block": 58,
+    "put_knife_on_chopping_board": 59,
+    "put_money_in_safe": 60,
+    "put_plate_in_colored_dish_rack": 61,
+    "put_rubbish_in_bin": 62,
+    "put_shoes_in_box": 63,
+    "put_toilet_roll_on_stand": 64,
+    "put_tray_in_oven": 65,
+    "put_umbrella_in_umbrella_stand": 66,
+    "reach_and_drag": 67,
+    "reach_target": 68,
+    "remove_cups": 69,
+    "scoop_with_spatula": 70,
+    "screw_nail": 71,
+    "set_the_table": 72,
+    "setup_checkers": 73,
+    "setup_chess": 74,
+    "slide_block_to_target": 75,
+    "slide_cabinet_open": 76,
+    "slide_cabinet_open_and_place_cups": 77,
+    "solve_puzzle": 78,
+    "stack_blocks": 79,
+    "stack_chairs": 80,
+    "stack_cups": 81,
+    "stack_wine": 82,
+    "straighten_rope": 83,
+    "sweep_to_dustpan": 84,
+    "take_cup_out_from_cabinet": 85,
+    "take_frame_off_hanger": 86,
+    "take_item_out_of_drawer": 87,
+    "take_lid_off_saucepan": 88,
+    "take_money_out_safe": 89,
+    "take_off_weighing_scales": 90,
+    "take_plate_off_colored_dish_rack": 91,
+    "take_shoes_out_of_box": 92,
+    "take_toilet_roll_off_stand": 93,
+    "take_tray_out_of_oven": 94,
+    "take_umbrella_out_of_umbrella_stand": 95,
+    "take_usb_out_of_computer": 96,
+    "toilet_seat_down": 97,
+    "toilet_seat_up": 98,
+    "turn_oven_on": 99,
+    "turn_tap": 100,
+    "tv_on": 101,
+    "unplug_charger": 102,
+    "water_plants": 103,
+    "weighing_scales": 104,
+    "wipe_desk": 105
+  }
+}
\ No newline at end of file

From 4583031f90661d3447e6536011422d976bdc7bfa Mon Sep 17 00:00:00 2001
From: Andrea Delli <andrea.delli2001@gmail.com>
Date: Wed, 29 Oct 2025 12:59:41 +0100
Subject: [PATCH 2/8] feat: specify action mode for rlbench

---
 src/lerobot/envs/rlbench.py | 38 +++++++++++++++++++++++++------------
 1 file changed, 26 insertions(+), 12 deletions(-)

diff --git a/src/lerobot/envs/rlbench.py b/src/lerobot/envs/rlbench.py
index 0a37f3f0f6..03ab3cba84 100644
--- a/src/lerobot/envs/rlbench.py
+++ b/src/lerobot/envs/rlbench.py
@@ -24,17 +24,34 @@
 import gymnasium as gym
 from gymnasium import spaces
 
-from rlbench.environment import Environment
 from rlbench.backend.task import Task
 from rlbench.gym import RLBenchEnv as RLBenchGymEnv
-from rlbench.observation_config import ObservationConfig
 from rlbench.action_modes.action_mode import MoveArmThenGripper
 from rlbench.action_modes.arm_action_modes import JointPosition
 from rlbench.action_modes.gripper_action_modes import Discrete
 from rlbench.utils import name_to_task_class
 from rlbench.tasks import (FS10_V1, FS25_V1, FS50_V1, FS95_V1, MT15_V1, MT30_V1, MT55_V1, MT100_V1)
 
-os.environ["QT_QPA_PLATFORM"] = "offscreen"
+
+class AbsoluteJointPositionActionMode(MoveArmThenGripper):
+    """Absolute joint position action mode for arm and gripper.
+    
+    The arm action is first applied, followed by the gripper action.
+    """
+
+    def __init__(self):
+        # Call super
+        super(AbsoluteJointPositionActionMode, self).__init__(
+            JointPosition(absolute_mode=True),   # Arm in absolute joint positions
+            Discrete()  # Gripper in discrete open/close (<0.5 → close, >=0.5 → open)
+        )
+
+    def action_bounds(self):
+        """Returns the min and max of the action mode.
+        
+        Range is [- 2*pi, 2*pi] for each joint and [0.0, 1.0] for the gripper.
+        """
+        return np.array([-2 * np.pi] * 7 + [0.0]), np.array([2 * np.pi] * 7 + [1.0])
 
 # ---- Load configuration data from the external JSON file ----
 CONFIG_PATH = Path(__file__).parent / "rlbench_config.json"
@@ -144,7 +161,7 @@ def __init__(
         self.camera_name_mapping = camera_name_mapping
 
         self._env = self._make_envs_task(self.task)
-        self._max_episode_steps = 500   # TODO: make configurable depending on task suite
+        self._max_episode_steps = 500   # TODO: make configurable depending on task suite?
         self.task_description = TASK_DESCRIPTIONS.get(self.task, "")
 
         images = {}
@@ -156,7 +173,7 @@ def __init__(
                 dtype=np.uint8,
             )
 
-        if self.obs_type == "state":    # TODO: This can be implemented in RLBench!
+        if self.obs_type == "state":    # TODO: This can be implemented in RLBench, because the observation contains joint positions and gripper pose
             raise (
                 "The 'state' observation type is not supported in RLBench. "
                 "Please switch to an image-based obs_type (e.g. 'pixels', 'pixels_agent_pos')."
@@ -202,15 +219,12 @@ def render(self) -> np.ndarray:
         """
         return self._env.render()
 
-    def _make_envs_task(self, task: Task) -> RLBenchGymEnv:
+    def _make_envs_task(self, task: Task):
         return RLBenchGymEnv(
             task,
             observation_mode="vision",
             render_mode=self.render_mode,
-            action_mode=MoveArmThenGripper(
-                JointPosition(absolute_mode=True),   # Arm in absolute joint positions
-                Discrete()  # Gripper in discrete open/close (<0.5 → close, >=0.5 → open)
-            )
+            action_mode=AbsoluteJointPositionActionMode()
         )
 
     def _format_raw_obs(self, raw_obs: dict) -> dict[str, Any]:
@@ -226,7 +240,7 @@ def _format_raw_obs(self, raw_obs: dict) -> dict[str, Any]:
             [raw_obs["gripper_open"]],
         )
 
-        if self.obs_type == "state":    # TODO: REMOVE?
+        if self.obs_type == "state":    # TODO: this can be implemented in RLBench, because the observation contains joint positions and gripper pose
             raise NotImplementedError(
                 "'state' obs_type not implemented for RLBench. Use pixel modes instead."
             )
@@ -390,7 +404,7 @@ def create_rlbench_envs(
         if not selected:
             raise ValueError(f"No tasks selected for suite '{suite_name}' (available: {total}).")
 
-        for tid in selected:
+        for tid in selected:    # FIXME: this breaks!
             fns = _make_env_fns(
                 suite=suite,
                 task=suite['train'][tid],

From 274d19ceeff198f88902b53689af05a2de636af5 Mon Sep 17 00:00:00 2001
From: Andrea Delli <andrea.delli2001@gmail.com>
Date: Wed, 29 Oct 2025 13:00:18 +0100
Subject: [PATCH 3/8] feat: example script to collect a lerobot dataset from
 RLBench

---
 examples/dataset/rlbench_collect_dataset.py | 328 ++++++++++++++++++++
 1 file changed, 328 insertions(+)
 create mode 100644 examples/dataset/rlbench_collect_dataset.py

diff --git a/examples/dataset/rlbench_collect_dataset.py b/examples/dataset/rlbench_collect_dataset.py
new file mode 100644
index 0000000000..0c5bd4485e
--- /dev/null
+++ b/examples/dataset/rlbench_collect_dataset.py
@@ -0,0 +1,328 @@
+from enum import Enum
+import argparse
+import os
+import shutil
+
+import numpy as np
+from scipy.spatial.transform import Rotation as R
+from tqdm import tqdm
+from lerobot.datasets.lerobot_dataset import LeRobotDataset
+
+# RLBench
+from rlbench import CameraConfig, Environment
+from rlbench.tasks import *
+from rlbench.action_modes.action_mode import MoveArmThenGripper
+from rlbench.action_modes.arm_action_modes import EndEffectorPoseViaPlanning
+from rlbench.action_modes.gripper_action_modes import Discrete
+from rlbench.observation_config import ObservationConfig
+from rlbench.demo import Demo
+
+# You can define how end-effector actions and rotations are represented.
+# The action represents the joint positions (joint1...joint7, gripper_open)
+# or end-effector pose (position, rotation, gripper_open), absolute or relative (delta from current pose).
+# The rotation can be represented as either Euler angles (3 values) or quaternions (4 values).
+EULER_EEF = "euler"  # Actions have 7 values: [x, y, z, roll, pitch, yaw, gripper_state]
+QUAT_EEF = "quat"  # Actions have 8 values: [x, y, z, qx, qy, qz, qw, gripper_state]
+JOINTS = "joints"  # Actions have 8 values: [joint_1, joint_2, joint_3, joint_4, joint_5, joint_6, joint_7, gripper_state]
+
+def get_target_pose(demo: Demo, index: int):
+    """ Get the target pose (gripper position and open state) for a specific observation in the demo. """
+    return np.array([*demo._observations[max(0, index)].gripper_pose, demo._observations[max(0, index)].gripper_open])
+
+def get_target_joints(demo: Demo, index: int):
+    """ Get the target joint positions for a specific observation in the demo. """
+    return np.array([*demo._observations[max(0, index)].joint_positions, demo._observations[max(0, index)].gripper_open])
+
+
+def action_conversion(action: np.ndarray, to_representation: str = 'euler', is_relative: bool = False, previous_action: np.ndarray = None):
+    """Convert an action between Euler and quaternion representations.
+
+    Args:
+        action: np.ndarray of shape (7,) (Euler) or (8,) (quaternion).
+                Euler format: [x, y, z, roll, pitch, yaw, gripper]
+                Quaternion format: [x, y, z, qx, qy, qz, qw, gripper]
+        to_representation: 'euler' or 'quat' target representation.
+        is_relative: if True, compute the delta between `action` and
+                     `previous_action` and return the delta (position and
+                     rotation). `previous_action` must be provided in this case.
+        previous_action: previous action (same format as `action`) used when
+                         is_relative is True.
+
+    Returns:
+        np.ndarray converted action in the target representation. For relative
+        mode the returned rotation represents the delta rotation (as Euler
+        angles or as a unit quaternion depending on `to_representation`).
+
+    Notes:
+        - Quaternion ordering is (qx, qy, qz, qw) to match the rest of the
+          codebase. Rotation objects from scipy are created/consumed with
+          this ordering via as_quat(scalar_first=False).
+        - When producing quaternions we always normalize to guard against
+          numerical drift.
+    """
+
+    if to_representation not in ('euler', 'quat'):
+        raise ValueError("to_representation must be 'euler' or 'quat'")
+
+    a = np.asarray(action, dtype=float)
+    if a.size not in (7, 8):
+        raise ValueError('action must be length 7 (Euler) or 8 (quaternion)')
+
+    if is_relative and previous_action is None:
+        raise ValueError('previous_action must be provided when is_relative is True')
+
+    def _ensure_unit_quat(q):
+        q = np.asarray(q, dtype=float)
+        n = np.linalg.norm(q)
+        if n == 0:
+            raise ValueError('Zero quaternion encountered')
+        return q / n
+
+    # Helper: construct Rotation from either euler or quat stored in action array
+    def _rot_from_action(arr):
+        arr = np.asarray(arr, dtype=float)
+        if arr.size == 7:
+            return R.from_euler('xyz', arr[3:6], degrees=False)
+        else:
+            return R.from_quat(arr[3:7])  # (qx, qy, qz, qw)
+
+    # Gripper state (keep as-is, demo code expects absolute gripper state even for deltas)
+    gripper = a[-1]
+
+    # Relative case: compute deltas
+    if is_relative:
+        prev = np.asarray(previous_action, dtype=float)
+        if prev.size not in (7, 8):
+            raise ValueError('previous_action must be length 7 or 8')
+
+        delta_pos = a[:3] - prev[:3]
+
+        # If both are Euler, simple subtraction of angles is fine
+        if a.size == 7 and prev.size == 7:
+            delta_ang = a[3:6] - prev[3:6]
+            if to_representation == 'euler':
+                return np.array([*delta_pos, *delta_ang, gripper], dtype=float)
+            else:
+                # convert delta Euler to quaternion (and normalize)
+                q = R.from_euler('xyz', delta_ang, degrees=False).as_quat(scalar_first=False)
+                q = _ensure_unit_quat(q)
+                return np.array([*delta_pos, *q, gripper], dtype=float)
+
+        # Otherwise use rotation algebra to compute the delta rotation
+        r_cur = _rot_from_action(a)
+        r_prev = _rot_from_action(prev)
+        r_delta = r_cur * r_prev.inv()
+
+        if to_representation == 'euler':
+            delta_ang = r_delta.as_euler('xyz', degrees=False)
+            return np.array([*delta_pos, *delta_ang, gripper], dtype=float)
+        else:
+            q = r_delta.as_quat(scalar_first=False)
+            q = _ensure_unit_quat(q)
+            return np.array([*delta_pos, *q, gripper], dtype=float)
+
+    # Absolute case: just convert representations
+    if to_representation == 'euler':
+        if a.size == 7:
+            return a.astype(float)
+        else:
+            euler = R.from_quat(a[3:7]).as_euler('xyz', degrees=False)
+            return np.array([*a[:3], *euler, gripper], dtype=float)
+    else:  # to_representation == 'quat'
+        if a.size == 8:
+            q = _ensure_unit_quat(a[3:7])
+            return np.array([*a[:3], *q, gripper], dtype=float)
+        else:
+            q = R.from_euler('xyz', a[3:6], degrees=False).as_quat(scalar_first=False)
+            q = _ensure_unit_quat(q)
+            return np.array([*a[:3], *q, gripper], dtype=float)
+
+# ------------------------
+# Main
+# ------------------------
+
+def main(args):
+    # Dynamically get the task class
+    try:
+        task_class = globals()[args.task]
+    except KeyError:
+        raise ValueError(f"Task {args.task} not found.")
+
+    # RLBench setup
+    camera_config = CameraConfig(image_size=(args.image_height, args.image_width))
+    obs_config = ObservationConfig(
+        left_shoulder_camera=camera_config,
+        right_shoulder_camera=camera_config,
+        overhead_camera=camera_config,
+        wrist_camera=camera_config,
+        front_camera=camera_config,
+    )
+    obs_config.set_all(True)
+
+    action_mode = MoveArmThenGripper(
+        arm_action_mode=EndEffectorPoseViaPlanning(absolute_mode=args.absolute_actions),
+        gripper_action_mode=Discrete(),
+    )
+    env = Environment(action_mode, obs_config=obs_config, headless=True)
+    env.launch()
+    task = env.get_task(task_class)
+
+    # Remove the dataset root if already exists
+    if os.path.exists(args.save_path):
+        print(f"Dataset root {args.save_path} already exists. Removing it.")
+        shutil.rmtree(args.save_path)
+
+    camera_names = ["left_shoulder_rgb", "right_shoulder_rgb", "front_rgb", "wrist_rgb", "overhead_rgb"]
+
+    action_feature = {}
+    if args.action_repr == "euler":
+        action_feature = {
+            "shape": (7,),  # pos(3) + euler(3) + gripper(1)
+            "dtype": "float32",
+            "names": ["x", "y", "z", "roll", "pitch", "yaw", "gripper"],
+            "description": "End-effector position (x,y,z), orientation (roll,pitch,yaw) and gripper state (0.0 closed, 1.0 open).",
+        }
+    elif args.action_repr == "quat":
+        action_feature = {
+            "shape": (8,),  # pos(3) + quat(4) + gripper(1)
+            "dtype": "float32",
+            "names": ["x", "y", "z", "qx", "qy", "qz", "qw", "gripper"],
+            "description": "End-effector position (x,y,z), orientation (qx,qy,qz,qw) and gripper state (0.0 closed, 1.0 open).",
+        }
+    elif args.action_repr == "joints":
+        action_feature = {
+            "shape": (8,),  # joint_1 to joint_7 + gripper(1)
+            "dtype": "float32",
+            "names": ["joint_1", "joint_2", "joint_3", "joint_4", "joint_5", "joint_6", "joint_7", "gripper"],
+            "description": "Robot joint positions (absolute rotations) and gripper state (0.0 closed, 1.0 open).",
+        }
+    
+    dataset = LeRobotDataset.create(
+        repo_id=args.repo_id,
+        fps=args.fps,
+        root=args.save_path,
+        robot_type="franka",
+        features={
+            "observation.state": {
+                "dtype": "float32",
+                "shape": (7,),  # pos(3) + euler(3) + gripper(1)
+                "names": ["x", "y", "z", "roll", "pitch", "yaw", "gripper"],
+                "description": "End-effector position (x,y,z), orientation (roll,pitch,yaw) and gripper state (0.0 closed, 1.0 open).",
+            },
+            "observation.state.joints": {
+                "dtype": "float32",
+                "shape": (7,),
+                "names": ["joint_1", "joint_2", "joint_3", "joint_4", "joint_5", "joint_6", "joint_7"],
+                "description": "Robot joint positions (absolute rotations).",
+            },
+            "action": action_feature,
+            
+            # All camera images
+            **{f"observation.images.{cam}": {
+                    "dtype": "video",
+                    "shape": (args.image_height, args.image_width, 3),
+                    "names": ["height", "width", "channels"],
+                    "info": { "video.fps": args.fps, "video.height": args.image_height, "video.width": args.image_width, "video.channels": 3, "video.is_depth_map": False, "has_audio": False }
+                } for cam in camera_names
+            }
+        },
+    )
+
+    # Collect demonstrations and add them to the LeRobot dataset
+    print(f"Generating {args.num_episodes} demos for task: {args.task}")
+    for i in tqdm(range(args.num_episodes), desc="Collecting demos"):
+        # generate a new demo
+        demo = task.get_demos(1, live_demos=True)[0]
+        
+        for frame_index, observation in enumerate(demo):
+            
+            action = None
+            if args.action_repr in ["euler", "quat"]:
+                action = action_conversion(
+                    get_target_pose(demo, frame_index + 1 if frame_index < len(demo) - 1 else frame_index),
+                    args.action_repr,
+                    not args.absolute_actions,
+                    get_target_pose(demo, frame_index)
+                )
+            elif args.action_repr == "joints":
+                action = get_target_joints(demo, frame_index + 1 if frame_index < len(demo) - 1 else frame_index)
+            
+            # Create the frame data, following the same structure as the features defined above
+            frame_data = {
+                "observation.state": action_conversion(get_target_pose(demo, frame_index)).astype(np.float32),
+                "observation.state.joints": observation.joint_positions.astype(np.float32),
+                "action": action.astype(np.float32),
+                "task": task.get_name(),
+            }
+            for cam in camera_names:
+                frame_data[f"observation.images.{cam}"] = getattr(observation, cam)
+            
+            # Save the frame
+            dataset.add_frame(frame_data)
+        dataset.save_episode()
+    env.shutdown()
+        
+    dataset.push_to_hub()
+    print(f"\033[92mDataset saved to {args.save_path} and pushed to HuggingFace Hub: {args.repo_id}\033[0m")
+
+    
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Collect RLBench demonstrations and save to LeRobot dataset format.")
+    parser.add_argument(
+        "--save_path",
+        type=str,
+        default=os.path.join(os.getcwd(), "datasets"),
+        help="Path to save the LeRobot dataset."
+    )
+    parser.add_argument(
+        "--repo_id",
+        type=str,
+        required=True,
+        help="HuggingFace Hub repository ID (e.g., 'username/dataset-name')."
+    )
+    parser.add_argument(
+        "--num_episodes",
+        type=int,
+        default=100,
+        help="Number of demonstrations to record."
+    )
+    parser.add_argument(
+        "--task",
+        type=str,
+        default="PutRubbishInBin",
+        help="Name of the RLBench task."
+    )
+    parser.add_argument(
+        "--action_repr",
+        type=str,
+        choices=["euler", "quat", "joints"],
+        default="euler",
+        help="Action representation: 'euler' for Euler angles, 'quat' for quaternions, or 'joints' for joint positions."
+    )
+    parser.add_argument(
+        "--absolute_actions",
+        action="store_true",
+        default=False,
+        help="Whether to use absolute actions (default: False). Valid only for 'euler' and 'quat' action representations."
+    )
+    parser.add_argument(
+        "--fps",
+        type=int,
+        default=30,
+        help="Video frames per second for the dataset (default: 30)."
+    )
+    parser.add_argument(
+        "--image_width",
+        type=int,
+        default=256,
+        help="Image width in pixels (default: 256)."
+    )
+    parser.add_argument(
+        "--image_height",
+        type=int,
+        default=256,
+        help="Image height in pixels (default: 256)."
+    )
+    
+    args = parser.parse_args()
+    main(args)

From c09299152ebd88a64ddd7f29510022cb631873a8 Mon Sep 17 00:00:00 2001
From: Andrea Delli <andrea.delli2001@gmail.com>
Date: Wed, 29 Oct 2025 13:35:00 +0100
Subject: [PATCH 4/8] refactor: code style

---
 docs/source/rlbench.mdx                     |   1 +
 examples/dataset/rlbench_collect_dataset.py | 155 ++++++++++----------
 src/lerobot/envs/configs.py                 |   3 +-
 src/lerobot/envs/rlbench.py                 |  70 +++++----
 src/lerobot/envs/rlbench_config.json        |   2 +-
 5 files changed, 119 insertions(+), 112 deletions(-)

diff --git a/docs/source/rlbench.mdx b/docs/source/rlbench.mdx
index a477176f3e..7cafa89aae 100644
--- a/docs/source/rlbench.mdx
+++ b/docs/source/rlbench.mdx
@@ -36,6 +36,7 @@ LeRobot's integration with RLBench allows you to train and evaluate policies on
 RLBench is built around CoppeliaSim v4.1.0 and [PyRep](https://github.com/stepjam/PyRep).
 
 First, install CoppeliaSim:
+
 ```bash
 # set env variables
 export COPPELIASIM_ROOT=${HOME}/CoppeliaSim
diff --git a/examples/dataset/rlbench_collect_dataset.py b/examples/dataset/rlbench_collect_dataset.py
index 0c5bd4485e..872e73dae9 100644
--- a/examples/dataset/rlbench_collect_dataset.py
+++ b/examples/dataset/rlbench_collect_dataset.py
@@ -1,21 +1,21 @@
-from enum import Enum
 import argparse
 import os
 import shutil
 
 import numpy as np
-from scipy.spatial.transform import Rotation as R
-from tqdm import tqdm
-from lerobot.datasets.lerobot_dataset import LeRobotDataset
 
 # RLBench
 from rlbench import CameraConfig, Environment
-from rlbench.tasks import *
 from rlbench.action_modes.action_mode import MoveArmThenGripper
 from rlbench.action_modes.arm_action_modes import EndEffectorPoseViaPlanning
 from rlbench.action_modes.gripper_action_modes import Discrete
-from rlbench.observation_config import ObservationConfig
 from rlbench.demo import Demo
+from rlbench.observation_config import ObservationConfig
+from rlbench.utils import name_to_task_class
+from scipy.spatial.transform import Rotation
+from tqdm import tqdm
+
+from lerobot.datasets.lerobot_dataset import LeRobotDataset
 
 # You can define how end-effector actions and rotations are represented.
 # The action represents the joint positions (joint1...joint7, gripper_open)
@@ -25,16 +25,27 @@
 QUAT_EEF = "quat"  # Actions have 8 values: [x, y, z, qx, qy, qz, qw, gripper_state]
 JOINTS = "joints"  # Actions have 8 values: [joint_1, joint_2, joint_3, joint_4, joint_5, joint_6, joint_7, gripper_state]
 
+
 def get_target_pose(demo: Demo, index: int):
-    """ Get the target pose (gripper position and open state) for a specific observation in the demo. """
-    return np.array([*demo._observations[max(0, index)].gripper_pose, demo._observations[max(0, index)].gripper_open])
+    """Get the target pose (gripper position and open state) for a specific observation in the demo."""
+    return np.array(
+        [*demo._observations[max(0, index)].gripper_pose, demo._observations[max(0, index)].gripper_open]
+    )
+
 
 def get_target_joints(demo: Demo, index: int):
-    """ Get the target joint positions for a specific observation in the demo. """
-    return np.array([*demo._observations[max(0, index)].joint_positions, demo._observations[max(0, index)].gripper_open])
+    """Get the target joint positions for a specific observation in the demo."""
+    return np.array(
+        [*demo._observations[max(0, index)].joint_positions, demo._observations[max(0, index)].gripper_open]
+    )
 
 
-def action_conversion(action: np.ndarray, to_representation: str = 'euler', is_relative: bool = False, previous_action: np.ndarray = None):
+def action_conversion(
+    action: np.ndarray,
+    to_representation: str = "euler",
+    is_relative: bool = False,
+    previous_action: np.ndarray = None,
+):
     """Convert an action between Euler and quaternion representations.
 
     Args:
@@ -61,30 +72,30 @@ def action_conversion(action: np.ndarray, to_representation: str = 'euler', is_r
           numerical drift.
     """
 
-    if to_representation not in ('euler', 'quat'):
+    if to_representation not in ("euler", "quat"):
         raise ValueError("to_representation must be 'euler' or 'quat'")
 
     a = np.asarray(action, dtype=float)
     if a.size not in (7, 8):
-        raise ValueError('action must be length 7 (Euler) or 8 (quaternion)')
+        raise ValueError("action must be length 7 (Euler) or 8 (quaternion)")
 
     if is_relative and previous_action is None:
-        raise ValueError('previous_action must be provided when is_relative is True')
+        raise ValueError("previous_action must be provided when is_relative is True")
 
     def _ensure_unit_quat(q):
         q = np.asarray(q, dtype=float)
         n = np.linalg.norm(q)
         if n == 0:
-            raise ValueError('Zero quaternion encountered')
+            raise ValueError("Zero quaternion encountered")
         return q / n
 
     # Helper: construct Rotation from either euler or quat stored in action array
     def _rot_from_action(arr):
         arr = np.asarray(arr, dtype=float)
         if arr.size == 7:
-            return R.from_euler('xyz', arr[3:6], degrees=False)
+            return Rotation.from_euler("xyz", arr[3:6], degrees=False)
         else:
-            return R.from_quat(arr[3:7])  # (qx, qy, qz, qw)
+            return Rotation.from_quat(arr[3:7])  # (qx, qy, qz, qw)
 
     # Gripper state (keep as-is, demo code expects absolute gripper state even for deltas)
     gripper = a[-1]
@@ -93,18 +104,18 @@ def _rot_from_action(arr):
     if is_relative:
         prev = np.asarray(previous_action, dtype=float)
         if prev.size not in (7, 8):
-            raise ValueError('previous_action must be length 7 or 8')
+            raise ValueError("previous_action must be length 7 or 8")
 
         delta_pos = a[:3] - prev[:3]
 
         # If both are Euler, simple subtraction of angles is fine
         if a.size == 7 and prev.size == 7:
             delta_ang = a[3:6] - prev[3:6]
-            if to_representation == 'euler':
+            if to_representation == "euler":
                 return np.array([*delta_pos, *delta_ang, gripper], dtype=float)
             else:
                 # convert delta Euler to quaternion (and normalize)
-                q = R.from_euler('xyz', delta_ang, degrees=False).as_quat(scalar_first=False)
+                q = Rotation.from_euler("xyz", delta_ang, degrees=False).as_quat(scalar_first=False)
                 q = _ensure_unit_quat(q)
                 return np.array([*delta_pos, *q, gripper], dtype=float)
 
@@ -113,8 +124,8 @@ def _rot_from_action(arr):
         r_prev = _rot_from_action(prev)
         r_delta = r_cur * r_prev.inv()
 
-        if to_representation == 'euler':
-            delta_ang = r_delta.as_euler('xyz', degrees=False)
+        if to_representation == "euler":
+            delta_ang = r_delta.as_euler("xyz", degrees=False)
             return np.array([*delta_pos, *delta_ang, gripper], dtype=float)
         else:
             q = r_delta.as_quat(scalar_first=False)
@@ -122,31 +133,29 @@ def _rot_from_action(arr):
             return np.array([*delta_pos, *q, gripper], dtype=float)
 
     # Absolute case: just convert representations
-    if to_representation == 'euler':
+    if to_representation == "euler":
         if a.size == 7:
             return a.astype(float)
         else:
-            euler = R.from_quat(a[3:7]).as_euler('xyz', degrees=False)
+            euler = Rotation.from_quat(a[3:7]).as_euler("xyz", degrees=False)
             return np.array([*a[:3], *euler, gripper], dtype=float)
     else:  # to_representation == 'quat'
         if a.size == 8:
             q = _ensure_unit_quat(a[3:7])
             return np.array([*a[:3], *q, gripper], dtype=float)
         else:
-            q = R.from_euler('xyz', a[3:6], degrees=False).as_quat(scalar_first=False)
+            q = Rotation.from_euler("xyz", a[3:6], degrees=False).as_quat(scalar_first=False)
             q = _ensure_unit_quat(q)
             return np.array([*a[:3], *q, gripper], dtype=float)
 
+
 # ------------------------
 # Main
 # ------------------------
 
+
 def main(args):
-    # Dynamically get the task class
-    try:
-        task_class = globals()[args.task]
-    except KeyError:
-        raise ValueError(f"Task {args.task} not found.")
+    task_class = name_to_task_class(args.task)
 
     # RLBench setup
     camera_config = CameraConfig(image_size=(args.image_height, args.image_width))
@@ -196,7 +205,7 @@ def main(args):
             "names": ["joint_1", "joint_2", "joint_3", "joint_4", "joint_5", "joint_6", "joint_7", "gripper"],
             "description": "Robot joint positions (absolute rotations) and gripper state (0.0 closed, 1.0 open).",
         }
-    
+
     dataset = LeRobotDataset.create(
         repo_id=args.repo_id,
         fps=args.fps,
@@ -216,37 +225,46 @@ def main(args):
                 "description": "Robot joint positions (absolute rotations).",
             },
             "action": action_feature,
-            
             # All camera images
-            **{f"observation.images.{cam}": {
+            **{
+                f"observation.images.{cam}": {
                     "dtype": "video",
                     "shape": (args.image_height, args.image_width, 3),
                     "names": ["height", "width", "channels"],
-                    "info": { "video.fps": args.fps, "video.height": args.image_height, "video.width": args.image_width, "video.channels": 3, "video.is_depth_map": False, "has_audio": False }
-                } for cam in camera_names
-            }
+                    "info": {
+                        "video.fps": args.fps,
+                        "video.height": args.image_height,
+                        "video.width": args.image_width,
+                        "video.channels": 3,
+                        "video.is_depth_map": False,
+                        "has_audio": False,
+                    },
+                }
+                for cam in camera_names
+            },
         },
     )
 
     # Collect demonstrations and add them to the LeRobot dataset
     print(f"Generating {args.num_episodes} demos for task: {args.task}")
-    for i in tqdm(range(args.num_episodes), desc="Collecting demos"):
+    for _ in tqdm(range(args.num_episodes), desc="Collecting demos"):
         # generate a new demo
         demo = task.get_demos(1, live_demos=True)[0]
-        
+
         for frame_index, observation in enumerate(demo):
-            
             action = None
             if args.action_repr in ["euler", "quat"]:
                 action = action_conversion(
                     get_target_pose(demo, frame_index + 1 if frame_index < len(demo) - 1 else frame_index),
                     args.action_repr,
                     not args.absolute_actions,
-                    get_target_pose(demo, frame_index)
+                    get_target_pose(demo, frame_index),
                 )
             elif args.action_repr == "joints":
-                action = get_target_joints(demo, frame_index + 1 if frame_index < len(demo) - 1 else frame_index)
-            
+                action = get_target_joints(
+                    demo, frame_index + 1 if frame_index < len(demo) - 1 else frame_index
+                )
+
             # Create the frame data, following the same structure as the features defined above
             frame_data = {
                 "observation.state": action_conversion(get_target_pose(demo, frame_index)).astype(np.float32),
@@ -256,73 +274,54 @@ def main(args):
             }
             for cam in camera_names:
                 frame_data[f"observation.images.{cam}"] = getattr(observation, cam)
-            
+
             # Save the frame
             dataset.add_frame(frame_data)
         dataset.save_episode()
     env.shutdown()
-        
-    dataset.push_to_hub()
+
+    # dataset.push_to_hub()
     print(f"\033[92mDataset saved to {args.save_path} and pushed to HuggingFace Hub: {args.repo_id}\033[0m")
 
-    
+
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Collect RLBench demonstrations and save to LeRobot dataset format.")
+    parser = argparse.ArgumentParser(
+        description="Collect RLBench demonstrations and save to LeRobot dataset format."
+    )
     parser.add_argument(
         "--save_path",
         type=str,
         default=os.path.join(os.getcwd(), "datasets"),
-        help="Path to save the LeRobot dataset."
+        help="Path to save the LeRobot dataset.",
     )
     parser.add_argument(
         "--repo_id",
         type=str,
         required=True,
-        help="HuggingFace Hub repository ID (e.g., 'username/dataset-name')."
-    )
-    parser.add_argument(
-        "--num_episodes",
-        type=int,
-        default=100,
-        help="Number of demonstrations to record."
-    )
-    parser.add_argument(
-        "--task",
-        type=str,
-        default="PutRubbishInBin",
-        help="Name of the RLBench task."
+        help="HuggingFace Hub repository ID (e.g., 'username/dataset-name').",
     )
+    parser.add_argument("--num_episodes", type=int, default=100, help="Number of demonstrations to record.")
+    parser.add_argument("--task", type=str, default="put_rubbish_in_bin", help="Name of the RLBench task.")
     parser.add_argument(
         "--action_repr",
         type=str,
         choices=["euler", "quat", "joints"],
         default="euler",
-        help="Action representation: 'euler' for Euler angles, 'quat' for quaternions, or 'joints' for joint positions."
+        help="Action representation: 'euler' for Euler angles, 'quat' for quaternions, or 'joints' for joint positions.",
     )
     parser.add_argument(
         "--absolute_actions",
         action="store_true",
         default=False,
-        help="Whether to use absolute actions (default: False). Valid only for 'euler' and 'quat' action representations."
+        help="Whether to use absolute actions (default: False). Valid only for 'euler' and 'quat' action representations.",
     )
     parser.add_argument(
-        "--fps",
-        type=int,
-        default=30,
-        help="Video frames per second for the dataset (default: 30)."
+        "--fps", type=int, default=30, help="Video frames per second for the dataset (default: 30)."
     )
+    parser.add_argument("--image_width", type=int, default=256, help="Image width in pixels (default: 256).")
     parser.add_argument(
-        "--image_width",
-        type=int,
-        default=256,
-        help="Image width in pixels (default: 256)."
+        "--image_height", type=int, default=256, help="Image height in pixels (default: 256)."
     )
-    parser.add_argument(
-        "--image_height",
-        type=int,
-        default=256,
-        help="Image height in pixels (default: 256)."
-    )
-    
+
     args = parser.parse_args()
     main(args)
diff --git a/src/lerobot/envs/configs.py b/src/lerobot/envs/configs.py
index 4d0f010eda..93f186f061 100644
--- a/src/lerobot/envs/configs.py
+++ b/src/lerobot/envs/configs.py
@@ -320,6 +320,7 @@ def gym_kwargs(self) -> dict:
             "render_mode": self.render_mode,
         }
 
+
 @EnvConfig.register_subclass("rlbench")
 @dataclass
 class RLBenchEnv(EnvConfig):
@@ -370,4 +371,4 @@ def gym_kwargs(self) -> dict:
         return {
             "obs_type": self.obs_type,
             "render_mode": self.render_mode,
-        }
\ No newline at end of file
+        }
diff --git a/src/lerobot/envs/rlbench.py b/src/lerobot/envs/rlbench.py
index 03ab3cba84..fcb4dcb679 100644
--- a/src/lerobot/envs/rlbench.py
+++ b/src/lerobot/envs/rlbench.py
@@ -13,46 +13,45 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from functools import partial
-import json, os
+import json
 from collections import defaultdict
-from collections.abc import Callable, Iterable, Sequence
+from collections.abc import Callable, Iterable, Mapping, Sequence
+from functools import partial
 from pathlib import Path
-from typing import Any, Mapping
-import numpy as np
+from typing import Any
 
 import gymnasium as gym
+import numpy as np
 from gymnasium import spaces
-
-from rlbench.backend.task import Task
-from rlbench.gym import RLBenchEnv as RLBenchGymEnv
 from rlbench.action_modes.action_mode import MoveArmThenGripper
 from rlbench.action_modes.arm_action_modes import JointPosition
 from rlbench.action_modes.gripper_action_modes import Discrete
-from rlbench.utils import name_to_task_class
-from rlbench.tasks import (FS10_V1, FS25_V1, FS50_V1, FS95_V1, MT15_V1, MT30_V1, MT55_V1, MT100_V1)
+from rlbench.backend.task import Task
+from rlbench.gym import RLBenchEnv as RLBenchGymEnv
+from rlbench.tasks import FS10_V1, FS25_V1, FS50_V1, FS95_V1, MT15_V1, MT30_V1, MT55_V1, MT100_V1
 
 
 class AbsoluteJointPositionActionMode(MoveArmThenGripper):
     """Absolute joint position action mode for arm and gripper.
-    
+
     The arm action is first applied, followed by the gripper action.
     """
 
     def __init__(self):
         # Call super
-        super(AbsoluteJointPositionActionMode, self).__init__(
-            JointPosition(absolute_mode=True),   # Arm in absolute joint positions
-            Discrete()  # Gripper in discrete open/close (<0.5 → close, >=0.5 → open)
+        super().__init__(
+            JointPosition(absolute_mode=True),  # Arm in absolute joint positions
+            Discrete(),  # Gripper in discrete open/close (<0.5 → close, >=0.5 → open)
         )
 
     def action_bounds(self):
         """Returns the min and max of the action mode.
-        
+
         Range is [- 2*pi, 2*pi] for each joint and [0.0, 1.0] for the gripper.
         """
         return np.array([-2 * np.pi] * 7 + [0.0]), np.array([2 * np.pi] * 7 + [1.0])
 
+
 # ---- Load configuration data from the external JSON file ----
 CONFIG_PATH = Path(__file__).parent / "rlbench_config.json"
 try:
@@ -77,6 +76,7 @@ def action_bounds(self):
 ACTION_DIM = 8  # 7 joints + 1 gripper  # NOTE: RLBench supports also EEF pose+gripper (dim=8, [x,y,z,rx,ry,rz,gripper])
 OBS_DIM = 8  # 7 joints + 1 gripper
 
+
 def _parse_camera_names(camera_name: str | Sequence[str]) -> list[str]:
     """Normalize camera_name into a non-empty list of strings."""
     if isinstance(camera_name, str):
@@ -89,9 +89,10 @@ def _parse_camera_names(camera_name: str | Sequence[str]) -> list[str]:
         raise ValueError("camera_name resolved to an empty list.")
     return cams
 
+
 def _get_suite(name: str) -> dict[str, list[Task]]:
     """Instantiate a RLBench suite by name with clear validation."""
-    
+
     suites = {
         "FS10_V1": FS10_V1,
         "FS25_V1": FS25_V1,
@@ -111,6 +112,7 @@ def _get_suite(name: str) -> dict[str, list[Task]]:
         raise ValueError(f"Suite '{name}' has no tasks.")
     return suite
 
+
 def _select_task_ids(total_tasks: int, task_ids: Iterable[int] | None) -> list[int]:
     """Validate/normalize task ids. If None → all tasks."""
     if task_ids is None:
@@ -147,7 +149,7 @@ def __init__(
         self.visualization_width = visualization_width
         self.visualization_height = visualization_height
         self.camera_name = _parse_camera_names(camera_name)
-        
+
         # Map raw camera names to "image1" and "image2".
         # The preprocessing step `preprocess_observation` will then prefix these with `.images.*`,
         # following the LeRobot convention (e.g., `observation.images.image`, `observation.images.image2`).
@@ -161,8 +163,9 @@ def __init__(
         self.camera_name_mapping = camera_name_mapping
 
         self._env = self._make_envs_task(self.task)
-        self._max_episode_steps = 500   # TODO: make configurable depending on task suite?
-        self.task_description = TASK_DESCRIPTIONS.get(self.task, "")
+        self._max_episode_steps = 500  # TODO: make configurable depending on task suite?
+        task_name = self.task.get_name() if self.task is not None else ""
+        self.task_description = TASK_DESCRIPTIONS.get(task_name, "")
 
         images = {}
         for cam in self.camera_name:
@@ -173,12 +176,14 @@ def __init__(
                 dtype=np.uint8,
             )
 
-        if self.obs_type == "state":    # TODO: This can be implemented in RLBench, because the observation contains joint positions and gripper pose
-            raise (
+        if (
+            self.obs_type == "state"
+        ):  # TODO: This can be implemented in RLBench, because the observation contains joint positions and gripper pose
+            raise ValueError(
                 "The 'state' observation type is not supported in RLBench. "
                 "Please switch to an image-based obs_type (e.g. 'pixels', 'pixels_agent_pos')."
             )
-            
+
         elif self.obs_type == "pixels":
             self.observation_space = spaces.Dict(
                 {
@@ -224,23 +229,24 @@ def _make_envs_task(self, task: Task):
             task,
             observation_mode="vision",
             render_mode=self.render_mode,
-            action_mode=AbsoluteJointPositionActionMode()
+            action_mode=AbsoluteJointPositionActionMode(),
         )
 
     def _format_raw_obs(self, raw_obs: dict) -> dict[str, Any]:
-        
         images = {}
         for camera_name in self.camera_name:
             image = raw_obs[camera_name]
             image = image[::-1, ::-1]  # rotate 180 degrees
             images[self.camera_name_mapping[camera_name]] = image
-        
+
         agent_pos = np.concatenate(
             raw_obs["joint_positions"],
             [raw_obs["gripper_open"]],
         )
 
-        if self.obs_type == "state":    # TODO: this can be implemented in RLBench, because the observation contains joint positions and gripper pose
+        if (
+            self.obs_type == "state"
+        ):  # TODO: this can be implemented in RLBench, because the observation contains joint positions and gripper pose
             raise NotImplementedError(
                 "'state' obs_type not implemented for RLBench. Use pixel modes instead."
             )
@@ -354,7 +360,7 @@ def _make_env(**kwargs) -> RLBenchEnv:
     for _ in range(n_envs):
         fns.append(partial(_make_env, **gym_kwargs))
     return fns
-    
+
 
 # ---- Main API ----------------------------------------------------------------
 
@@ -376,7 +382,7 @@ def create_rlbench_envs(
         - `task` can be a single suite or a comma-separated list of suites.
         - You may pass `task_names` (list[str]) inside `gym_kwargs` to restrict tasks per suite.
     """
-    
+
     if env_cls is None or not callable(env_cls):
         raise ValueError("env_cls must be a callable that wraps a list of environment factory callables.")
     if not isinstance(n_envs, int) or n_envs <= 0:
@@ -384,7 +390,7 @@ def create_rlbench_envs(
 
     gym_kwargs = dict(gym_kwargs or {})
     task_ids_filter = gym_kwargs.pop("task_ids", None)  # optional: limit to specific tasks
-    
+
     camera_names = _parse_camera_names(camera_name)
     suite_names = [s.strip() for s in str(task).split(",") if s.strip()]
     if not suite_names:
@@ -398,16 +404,16 @@ def create_rlbench_envs(
 
     for suite_name in suite_names:
         suite = _get_suite(suite_name)
-        total = len(suite['train'])
+        total = len(suite["train"])
         selected = _select_task_ids(total, task_ids_filter)
 
         if not selected:
             raise ValueError(f"No tasks selected for suite '{suite_name}' (available: {total}).")
 
-        for tid in selected:    # FIXME: this breaks!
+        for tid in selected:  # FIXME: this breaks!
             fns = _make_env_fns(
                 suite=suite,
-                task=suite['train'][tid],
+                task=suite["train"][tid],
                 n_envs=n_envs,
                 camera_names=camera_names,
                 gym_kwargs=gym_kwargs,
diff --git a/src/lerobot/envs/rlbench_config.json b/src/lerobot/envs/rlbench_config.json
index 01af0e999b..44ae174b5a 100644
--- a/src/lerobot/envs/rlbench_config.json
+++ b/src/lerobot/envs/rlbench_config.json
@@ -216,4 +216,4 @@
     "weighing_scales": 104,
     "wipe_desk": 105
   }
-}
\ No newline at end of file
+}

From ee6df7c1ab5f2fd6372961b604483a56bd808183 Mon Sep 17 00:00:00 2001
From: Andrea Delli <andrea.delli2001@gmail.com>
Date: Thu, 30 Oct 2025 00:12:16 +0100
Subject: [PATCH 5/8] fix: rlbench - get task description by class name

---
 docs/source/rlbench.mdx              |   8 +-
 src/lerobot/envs/rlbench.py          |   4 +-
 src/lerobot/envs/rlbench_config.json | 424 +++++++++++++--------------
 3 files changed, 221 insertions(+), 215 deletions(-)

diff --git a/docs/source/rlbench.mdx b/docs/source/rlbench.mdx
index 7cafa89aae..f0bca9f74d 100644
--- a/docs/source/rlbench.mdx
+++ b/docs/source/rlbench.mdx
@@ -38,7 +38,7 @@ RLBench is built around CoppeliaSim v4.1.0 and [PyRep](https://github.com/stepja
 First, install CoppeliaSim:
 
 ```bash
-# set env variables
+# set environment variables
 export COPPELIASIM_ROOT=${HOME}/CoppeliaSim
 export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$COPPELIASIM_ROOT
 export QT_QPA_PLATFORM_PLUGIN_PATH=$COPPELIASIM_ROOT
@@ -49,7 +49,11 @@ rm -rf CoppeliaSim_Edu_V4_1_0_Ubuntu20_04.tar.xz
 ```
 
 Next, install the necessary dependencies:
-`pip install -e ".[rlbench]"`
+
+```bash
+pip install pycparser    # needed while cloning rlbench
+pip install -e ".[rlbench]"
+```
 
 That's it! You can now use RLBench environments within LeRobot. To run headless, check the documentation on the original [RLBench repo](https://github.com/stepjam/RLBench).
 
diff --git a/src/lerobot/envs/rlbench.py b/src/lerobot/envs/rlbench.py
index fcb4dcb679..1718e4a118 100644
--- a/src/lerobot/envs/rlbench.py
+++ b/src/lerobot/envs/rlbench.py
@@ -164,7 +164,9 @@ def __init__(
 
         self._env = self._make_envs_task(self.task)
         self._max_episode_steps = 500  # TODO: make configurable depending on task suite?
-        task_name = self.task.get_name() if self.task is not None else ""
+
+        # Get task description
+        task_name = self.task.__name__ if self.task is not None else ""
         self.task_description = TASK_DESCRIPTIONS.get(task_name, "")
 
         images = {}
diff --git a/src/lerobot/envs/rlbench_config.json b/src/lerobot/envs/rlbench_config.json
index 44ae174b5a..64ae01ca76 100644
--- a/src/lerobot/envs/rlbench_config.json
+++ b/src/lerobot/envs/rlbench_config.json
@@ -1,219 +1,219 @@
 {
   "TASK_DESCRIPTIONS": {
-    "basketball_in_hoop": "Put the ball in the hoop",
-    "beat_the_buzz": "Beat the buzz",
-    "block_pyramid": "Stack blocks in a pyramid",
-    "change_channel": "Turn the channel",
-    "change_clock": "Change the clock to show time 12.15",
-    "close_box": "Close box",
-    "close_door": "Close the door",
-    "close_drawer": "Close drawer",
-    "close_fridge": "Close fridge",
-    "close_grill": "Close the grill",
-    "close_jar": "Close the jar",
-    "close_laptop_lid": "Close laptop lid",
-    "close_microwave": "Close microwave",
-    "empty_container": "Empty the container in the to container",
-    "empty_dishwasher": "Empty the dishwasher",
-    "get_ice_from_fridge": "Get ice from fridge",
-    "hang_frame_on_hanger": "Hang frame on hanger",
-    "hit_ball_with_queue": "Hit ball with queue in to the goal",
-    "hockey": "Hit the ball into the net",
-    "insert_onto_square_peg": "Put the ring on the spoke",
-    "insert_usb_in_computer": "Insert usb in computer",
-    "lamp_off": "Turn off the light",
-    "lamp_on": "Turn on the light",
-    "lift_numbered_block": "Pick up the block with the number",
-    "light_bulb_in": "Screw in the light bulb",
-    "light_bulb_out": "Put the bulb in the holder",
-    "meat_off_grill": "Take the off the grill",
-    "meat_on_grill": "Put the on the grill",
-    "move_hanger": "Move hanger onto the other rack",
-    "open_box": "Open box",
-    "open_door": "Open the door",
-    "open_drawer": "Open drawer",
-    "open_fridge": "Open fridge",
-    "open_grill": "Open the grill",
-    "open_jar": "Open the jar",
-    "open_microwave": "Open microwave",
-    "open_oven": "Open the oven",
-    "open_washing_machine": "Open washing machine",
-    "open_window": "Open left window",
-    "open_wine_bottle": "Open wine bottle",
-    "phone_on_base": "Put the phone on the base",
-    "pick_and_lift": "Pick up the block and lift it up to the target",
-    "pick_and_lift_small": "Pick up the and lift it up to the target",
-    "pick_up_cup": "Pick up the cup",
-    "place_cups": "Place 1 cup on the cup holder",
-    "place_hanger_on_rack": "Pick up the hanger and place in on the rack",
-    "place_shape_in_shape_sorter": "Put the in the shape sorter",
-    "play_jenga": "Play jenga",
-    "plug_charger_in_power_supply": "Plug charger in power supply",
-    "pour_from_cup_to_cup": "Pour liquid from the cup to the cup",
-    "press_switch": "Press switch",
-    "push_button": "Push the button",
-    "push_buttons": "Push the buttons",
-    "put_all_groceries_in_cupboard": "Put all of the groceries in the cupboard",
-    "put_books_on_bookshelf": "Put books on bookshelf",
-    "put_bottle_in_fridge": "Put bottle in fridge",
-    "put_groceries_in_cupboard": "Put the in the cupboard",
-    "put_item_in_drawer": "Put item in drawer",
-    "put_knife_in_knife_block": "Put the knife in the knife block",
-    "put_knife_on_chopping_board": "Put the knife on the chopping board",
-    "put_money_in_safe": "Put the money away in the safe on the shelf",
-    "put_plate_in_colored_dish_rack": "Put the plate between the pillars of the dish rack",
-    "put_rubbish_in_bin": "Put rubbish in bin",
-    "put_shoes_in_box": "Put the shoes in the box",
-    "put_toilet_roll_on_stand": "Put toilet roll on stand",
-    "put_tray_in_oven": "Put tray in oven",
-    "put_umbrella_in_umbrella_stand": "Put umbrella in umbrella stand",
-    "reach_and_drag": "Use the stick to drag the cube onto the target",
-    "reach_target": "Reach the target",
-    "remove_cups": "Remove 1 cup from the cup holder and place it on the",
-    "scoop_with_spatula": "Scoop up the cube and lift it with the spatula",
-    "screw_nail": "Screw the nail in to the block",
-    "set_the_table": "Set the table",
-    "setup_checkers": "Setup checkers",
-    "setup_chess": "Setup chess",
-    "slide_block_to_target": "Slide the block to target",
-    "slide_cabinet_open": "Slide cabinet open",
-    "slide_cabinet_open_and_place_cups": "Put cup in cabinet",
-    "solve_puzzle": "Solve the puzzle",
-    "stack_blocks": "Stack blocks",
-    "stack_chairs": "Stack the other chairs on top of the chair",
-    "stack_cups": "Stack the other cups on top of the cup",
-    "stack_wine": "Stack wine bottle",
-    "straighten_rope": "Straighten rope",
-    "sweep_to_dustpan": "Sweep dirt to dustpan",
-    "take_cup_out_from_cabinet": "Take out a cup from the half of the cabinet",
-    "take_frame_off_hanger": "Take frame off hanger",
-    "take_item_out_of_drawer": "Take item out of the drawer",
-    "take_lid_off_saucepan": "Take lid off the saucepan",
-    "take_money_out_safe": "Take the money out of the bottom shelf and place it on",
-    "take_off_weighing_scales": "Remove the pepper from the weighing scales and place it",
-    "take_plate_off_colored_dish_rack": "Take plate off the colored rack",
-    "take_shoes_out_of_box": "Take shoes out of box",
-    "take_toilet_roll_off_stand": "Take toilet roll off stand",
-    "take_tray_out_of_oven": "Take tray out of oven",
-    "take_umbrella_out_of_umbrella_stand": "Take umbrella out of umbrella stand",
-    "take_usb_out_of_computer": "Take usb out of computer",
-    "toilet_seat_down": "Toilet seat down",
-    "toilet_seat_up": "Lift toilet seat up",
-    "turn_oven_on": "Turn on the oven",
-    "turn_tap": "Turn tap",
-    "tv_on": "Turn on the TV",
-    "unplug_charger": "Unplug charger",
-    "water_plants": "Water plant",
-    "weighing_scales": "Weigh the pepper",
-    "wipe_desk": "Wipe dirt off the desk"
+    "BasketballInHoop": "Put the ball in the hoop",
+    "BeatTheBuzz": "Beat the buzz",
+    "BlockPyramid": "Stack blocks in a pyramid",
+    "ChangeChannel": "Turn the channel",
+    "ChangeClock": "Change the clock to show time 12.15",
+    "CloseBox": "Close box",
+    "CloseDoor": "Close the door",
+    "CloseDrawer": "Close drawer",
+    "CloseFridge": "Close fridge",
+    "CloseGrill": "Close the grill",
+    "CloseJar": "Close the jar",
+    "CloseLaptopLid": "Close laptop lid",
+    "CloseMicrowave": "Close microwave",
+    "EmptyContainer": "Empty the container in the to container",
+    "EmptyDishwasher": "Empty the dishwasher",
+    "GetIceFromFridge": "Get ice from fridge",
+    "HangFrameOnHanger": "Hang frame on hanger",
+    "HitBallWithQueue": "Hit ball with queue in to the goal",
+    "Hockey": "Hit the ball into the net",
+    "InsertOntoSquarePeg": "Put the ring on the spoke",
+    "InsertUsbInComputer": "Insert usb in computer",
+    "LampOff": "Turn off the light",
+    "LampOn": "Turn on the light",
+    "LiftNumberedBlock": "Pick up the block with the number",
+    "LightBulbIn": "Screw in the light bulb",
+    "LightBulbOut": "Put the bulb in the holder",
+    "MeatOffGrill": "Take the off the grill",
+    "MeatOnGrill": "Put the on the grill",
+    "MoveHanger": "Move hanger onto the other rack",
+    "OpenBox": "Open box",
+    "OpenDoor": "Open the door",
+    "OpenDrawer": "Open drawer",
+    "OpenFridge": "Open fridge",
+    "OpenGrill": "Open the grill",
+    "OpenJar": "Open the jar",
+    "OpenMicrowave": "Open microwave",
+    "OpenOven": "Open the oven",
+    "OpenWashingMachine": "Open washing machine",
+    "OpenWindow": "Open left window",
+    "OpenWineBottle": "Open wine bottle",
+    "PhoneOnBase": "Put the phone on the base",
+    "PickAndLift": "Pick up the block and lift it up to the target",
+    "PickAndLiftSmall": "Pick up the and lift it up to the target",
+    "PickUpCup": "Pick up the cup",
+    "PlaceCups": "Place 1 cup on the cup holder",
+    "PlaceHangerOnRack": "Pick up the hanger and place in on the rack",
+    "PlaceShapeInShapeSorter": "Put the in the shape sorter",
+    "PlayJenga": "Play jenga",
+    "PlugChargerInPowerSupply": "Plug charger in power supply",
+    "PourFromCupToCup": "Pour liquid from the cup to the cup",
+    "PressSwitch": "Press switch",
+    "PushButton": "Push the button",
+    "PushButtons": "Push the buttons",
+    "PutAllGroceriesInCupboard": "Put all of the groceries in the cupboard",
+    "PutBooksOnBookshelf": "Put books on bookshelf",
+    "PutBottleInFridge": "Put bottle in fridge",
+    "PutGroceriesInCupboard": "Put the in the cupboard",
+    "PutItemInDrawer": "Put item in drawer",
+    "PutKnifeInKnifeBlock": "Put the knife in the knife block",
+    "PutKnifeOnChoppingBoard": "Put the knife on the chopping board",
+    "PutMoneyInSafe": "Put the money away in the safe on the shelf",
+    "PutPlateInColoredDishRack": "Put the plate between the pillars of the dish rack",
+    "PutRubbishInBin": "Put rubbish in bin",
+    "PutShoesInBox": "Put the shoes in the box",
+    "PutToiletRollOnStand": "Put toilet roll on stand",
+    "PutTrayInOven": "Put tray in oven",
+    "PutUmbrellaInUmbrellaStand": "Put umbrella in umbrella stand",
+    "ReachAndDrag": "Use the stick to drag the cube onto the target",
+    "ReachTarget": "Reach the target",
+    "RemoveCups": "Remove 1 cup from the cup holder and place it on the",
+    "ScoopWithSpatula": "Scoop up the cube and lift it with the spatula",
+    "ScrewNail": "Screw the nail in to the block",
+    "SetTheTable": "Set the table",
+    "SetupCheckers": "Setup checkers",
+    "SetupChess": "Setup chess",
+    "SlideBlockToTarget": "Slide the block to target",
+    "SlideCabinetOpen": "Slide cabinet open",
+    "SlideCabinetOpenAndPlaceCups": "Put cup in cabinet",
+    "SolvePuzzle": "Solve the puzzle",
+    "StackBlocks": "Stack blocks",
+    "StackChairs": "Stack the other chairs on top of the chair",
+    "StackCups": "Stack the other cups on top of the cup",
+    "StackWine": "Stack wine bottle",
+    "StraightenRope": "Straighten rope",
+    "SweepToDustpan": "Sweep dirt to dustpan",
+    "TakeCupOutFromCabinet": "Take out a cup from the half of the cabinet",
+    "TakeFrameOffHanger": "Take frame off hanger",
+    "TakeItemOutOfDrawer": "Take item out of the drawer",
+    "TakeLidOffSaucepan": "Take lid off the saucepan",
+    "TakeMoneyOutSafe": "Take the money out of the bottom shelf and place it on",
+    "TakeOffWeighingScales": "Remove the pepper from the weighing scales and place it",
+    "TakePlateOffColoredDishRack": "Take plate off the colored rack",
+    "TakeShoesOutOfBox": "Take shoes out of box",
+    "TakeToiletRollOffStand": "Take toilet roll off stand",
+    "TakeTrayOutOfOven": "Take tray out of oven",
+    "TakeUmbrellaOutOfUmbrellaStand": "Take umbrella out of umbrella stand",
+    "TakeUsbOutOfComputer": "Take usb out of computer",
+    "ToiletSeatDown": "Toilet seat down",
+    "ToiletSeatUp": "Lift toilet seat up",
+    "TurnOvenOn": "Turn on the oven",
+    "TurnTap": "Turn tap",
+    "TvOn": "Turn on the TV",
+    "UnplugCharger": "Unplug charger",
+    "WaterPlants": "Water plant",
+    "WeighingScales": "Weigh the pepper",
+    "WipeDesk": "Wipe dirt off the desk"
   },
 
   "TASK_NAME_TO_ID": {
-    "basketball_in_hoop": 0,
-    "beat_the_buzz": 1,
-    "block_pyramid": 2,
-    "change_channel": 3,
-    "change_clock": 4,
-    "close_box": 5,
-    "close_door": 6,
-    "close_drawer": 7,
-    "close_fridge": 8,
-    "close_grill": 9,
-    "close_jar": 10,
-    "close_laptop_lid": 11,
-    "close_microwave": 12,
-    "empty_container": 13,
-    "empty_dishwasher": 14,
-    "get_ice_from_fridge": 15,
-    "hang_frame_on_hanger": 16,
-    "hit_ball_with_queue": 17,
-    "hockey": 18,
-    "insert_onto_square_peg": 19,
-    "insert_usb_in_computer": 20,
-    "lamp_off": 21,
-    "lamp_on": 22,
-    "lift_numbered_block": 23,
-    "light_bulb_in": 24,
-    "light_bulb_out": 25,
-    "meat_off_grill": 26,
-    "meat_on_grill": 27,
-    "move_hanger": 28,
-    "open_box": 29,
-    "open_door": 30,
-    "open_drawer": 31,
-    "open_fridge": 32,
-    "open_grill": 33,
-    "open_jar": 34,
-    "open_microwave": 35,
-    "open_oven": 36,
-    "open_washing_machine": 37,
-    "open_window": 38,
-    "open_wine_bottle": 39,
-    "phone_on_base": 40,
-    "pick_and_lift": 41,
-    "pick_and_lift_small": 42,
-    "pick_up_cup": 43,
-    "place_cups": 44,
-    "place_hanger_on_rack": 45,
-    "place_shape_in_shape_sorter": 46,
-    "play_jenga": 47,
-    "plug_charger_in_power_supply": 48,
-    "pour_from_cup_to_cup": 49,
-    "press_switch": 50,
-    "push_button": 51,
-    "push_buttons": 52,
-    "put_all_groceries_in_cupboard": 53,
-    "put_books_on_bookshelf": 54,
-    "put_bottle_in_fridge": 55,
-    "put_groceries_in_cupboard": 56,
-    "put_item_in_drawer": 57,
-    "put_knife_in_knife_block": 58,
-    "put_knife_on_chopping_board": 59,
-    "put_money_in_safe": 60,
-    "put_plate_in_colored_dish_rack": 61,
-    "put_rubbish_in_bin": 62,
-    "put_shoes_in_box": 63,
-    "put_toilet_roll_on_stand": 64,
-    "put_tray_in_oven": 65,
-    "put_umbrella_in_umbrella_stand": 66,
-    "reach_and_drag": 67,
-    "reach_target": 68,
-    "remove_cups": 69,
-    "scoop_with_spatula": 70,
-    "screw_nail": 71,
-    "set_the_table": 72,
-    "setup_checkers": 73,
-    "setup_chess": 74,
-    "slide_block_to_target": 75,
-    "slide_cabinet_open": 76,
-    "slide_cabinet_open_and_place_cups": 77,
-    "solve_puzzle": 78,
-    "stack_blocks": 79,
-    "stack_chairs": 80,
-    "stack_cups": 81,
-    "stack_wine": 82,
-    "straighten_rope": 83,
-    "sweep_to_dustpan": 84,
-    "take_cup_out_from_cabinet": 85,
-    "take_frame_off_hanger": 86,
-    "take_item_out_of_drawer": 87,
-    "take_lid_off_saucepan": 88,
-    "take_money_out_safe": 89,
-    "take_off_weighing_scales": 90,
-    "take_plate_off_colored_dish_rack": 91,
-    "take_shoes_out_of_box": 92,
-    "take_toilet_roll_off_stand": 93,
-    "take_tray_out_of_oven": 94,
-    "take_umbrella_out_of_umbrella_stand": 95,
-    "take_usb_out_of_computer": 96,
-    "toilet_seat_down": 97,
-    "toilet_seat_up": 98,
-    "turn_oven_on": 99,
-    "turn_tap": 100,
-    "tv_on": 101,
-    "unplug_charger": 102,
-    "water_plants": 103,
-    "weighing_scales": 104,
-    "wipe_desk": 105
+    "BasketballInHoop": 0,
+    "BeatTheBuzz": 1,
+    "BlockPyramid": 2,
+    "ChangeChannel": 3,
+    "ChangeClock": 4,
+    "CloseBox": 5,
+    "CloseDoor": 6,
+    "CloseDrawer": 7,
+    "CloseFridge": 8,
+    "CloseGrill": 9,
+    "CloseJar": 10,
+    "CloseLaptopLid": 11,
+    "CloseMicrowave": 12,
+    "EmptyContainer": 13,
+    "EmptyDishwasher": 14,
+    "GetIceFromFridge": 15,
+    "HangFrameOnHanger": 16,
+    "HitBallWithQueue": 17,
+    "Hockey": 18,
+    "InsertOntoSquarePeg": 19,
+    "InsertUsbInComputer": 20,
+    "LampOff": 21,
+    "LampOn": 22,
+    "LiftNumberedBlock": 23,
+    "LightBulbIn": 24,
+    "LightBulbOut": 25,
+    "MeatOffGrill": 26,
+    "MeatOnGrill": 27,
+    "MoveHanger": 28,
+    "OpenBox": 29,
+    "OpenDoor": 30,
+    "OpenDrawer": 31,
+    "OpenFridge": 32,
+    "OpenGrill": 33,
+    "OpenJar": 34,
+    "OpenMicrowave": 35,
+    "OpenOven": 36,
+    "OpenWashingMachine": 37,
+    "OpenWindow": 38,
+    "OpenWineBottle": 39,
+    "PhoneOnBase": 40,
+    "PickAndLift": 41,
+    "PickAndLiftSmall": 42,
+    "PickUpCup": 43,
+    "PlaceCups": 44,
+    "PlaceHangerOnRack": 45,
+    "PlaceShapeInShapeSorter": 46,
+    "PlayJenga": 47,
+    "PlugChargerInPowerSupply": 48,
+    "PourFromCupToCup": 49,
+    "PressSwitch": 50,
+    "PushButton": 51,
+    "PushButtons": 52,
+    "PutAllGroceriesInCupboard": 53,
+    "PutBooksOnBookshelf": 54,
+    "PutBottleInFridge": 55,
+    "PutGroceriesInCupboard": 56,
+    "PutItemInDrawer": 57,
+    "PutKnifeInKnifeBlock": 58,
+    "PutKnifeOnChoppingBoard": 59,
+    "PutMoneyInSafe": 60,
+    "PutPlateInColoredDishRack": 61,
+    "PutRubbishInBin": 62,
+    "PutShoesInBox": 63,
+    "PutToiletRollOnStand": 64,
+    "PutTrayInOven": 65,
+    "PutUmbrellaInUmbrellaStand": 66,
+    "ReachAndDrag": 67,
+    "ReachTarget": 68,
+    "RemoveCups": 69,
+    "ScoopWithSpatula": 70,
+    "ScrewNail": 71,
+    "SetTheTable": 72,
+    "SetupCheckers": 73,
+    "SetupChess": 74,
+    "SlideBlockToTarget": 75,
+    "SlideCabinetOpen": 76,
+    "SlideCabinetOpenAndPlaceCups": 77,
+    "SolvePuzzle": 78,
+    "StackBlocks": 79,
+    "StackChairs": 80,
+    "StackCups": 81,
+    "StackWine": 82,
+    "StraightenRope": 83,
+    "SweepToDustpan": 84,
+    "TakeCupOutFromCabinet": 85,
+    "TakeFrameOffHanger": 86,
+    "TakeItemOutOfDrawer": 87,
+    "TakeLidOffSaucepan": 88,
+    "TakeMoneyOutSafe": 89,
+    "TakeOffWeighingScales": 90,
+    "TakePlateOffColoredDishRack": 91,
+    "TakeShoesOutOfBox": 92,
+    "TakeToiletRollOffStand": 93,
+    "TakeTrayOutOfOven": 94,
+    "TakeUmbrellaOutOfUmbrellaStand": 95,
+    "TakeUsbOutOfComputer": 96,
+    "ToiletSeatDown": 97,
+    "ToiletSeatUp": 98,
+    "TurnOvenOn": 99,
+    "TurnTap": 100,
+    "TvOn": 101,
+    "UnplugCharger": 102,
+    "WaterPlants": 103,
+    "WeighingScales": 104,
+    "WipeDesk": 105
   }
 }

From 990eded19f17288d49ed14678011044cc6c608dd Mon Sep 17 00:00:00 2001
From: Andrea Delli <andrea.delli2001@gmail.com>
Date: Sun, 2 Nov 2025 12:22:54 +0100
Subject: [PATCH 6/8] feat: rlbench - ensure states and actions consistency

---
 examples/dataset/rlbench_collect_dataset.py |  23 +-
 src/lerobot/envs/configs.py                 |  50 ++--
 src/lerobot/envs/factory.py                 |   4 +-
 src/lerobot/envs/rlbench.py                 | 286 ++++++++++++++++----
 src/lerobot/envs/utils.py                   |   9 +-
 src/lerobot/utils/constants.py              |   1 +
 6 files changed, 292 insertions(+), 81 deletions(-)

diff --git a/examples/dataset/rlbench_collect_dataset.py b/examples/dataset/rlbench_collect_dataset.py
index 872e73dae9..a517ef8ba4 100644
--- a/examples/dataset/rlbench_collect_dataset.py
+++ b/examples/dataset/rlbench_collect_dataset.py
@@ -7,7 +7,7 @@
 # RLBench
 from rlbench import CameraConfig, Environment
 from rlbench.action_modes.action_mode import MoveArmThenGripper
-from rlbench.action_modes.arm_action_modes import EndEffectorPoseViaPlanning
+from rlbench.action_modes.arm_action_modes import EndEffectorPoseViaIK
 from rlbench.action_modes.gripper_action_modes import Discrete
 from rlbench.demo import Demo
 from rlbench.observation_config import ObservationConfig
@@ -169,7 +169,7 @@ def main(args):
     obs_config.set_all(True)
 
     action_mode = MoveArmThenGripper(
-        arm_action_mode=EndEffectorPoseViaPlanning(absolute_mode=args.absolute_actions),
+        arm_action_mode=EndEffectorPoseViaIK(absolute_mode=args.absolute_actions),
         gripper_action_mode=Discrete(),
     )
     env = Environment(action_mode, obs_config=obs_config, headless=True)
@@ -228,17 +228,18 @@ def main(args):
             # All camera images
             **{
                 f"observation.images.{cam}": {
-                    "dtype": "video",
+                    "dtype": "image",
                     "shape": (args.image_height, args.image_width, 3),
                     "names": ["height", "width", "channels"],
-                    "info": {
-                        "video.fps": args.fps,
-                        "video.height": args.image_height,
-                        "video.width": args.image_width,
-                        "video.channels": 3,
-                        "video.is_depth_map": False,
-                        "has_audio": False,
-                    },
+                    # "dtype": "video",
+                    # "info": {
+                    #     "video.fps": args.fps,
+                    #     "video.height": args.image_height,
+                    #     "video.width": args.image_width,
+                    #     "video.channels": 3,
+                    #     "video.is_depth_map": False,
+                    #     "has_audio": False,
+                    # },
                 }
                 for cam in camera_names
             },
diff --git a/src/lerobot/envs/configs.py b/src/lerobot/envs/configs.py
index 93f186f061..40d9bb29fa 100644
--- a/src/lerobot/envs/configs.py
+++ b/src/lerobot/envs/configs.py
@@ -21,7 +21,7 @@
 from lerobot.configs.types import FeatureType, PolicyFeature
 from lerobot.robots import RobotConfig
 from lerobot.teleoperators.config import TeleoperatorConfig
-from lerobot.utils.constants import ACTION, OBS_ENV_STATE, OBS_IMAGE, OBS_IMAGES, OBS_STATE
+from lerobot.utils.constants import ACTION, OBS_ENV_STATE, OBS_IMAGE, OBS_IMAGES, OBS_STATE, OBS_STATE_JOINTS
 
 
 @dataclass
@@ -329,10 +329,11 @@ class RLBenchEnv(EnvConfig):
     episode_length: int = 400
     obs_type: str = "pixels_agent_pos"
     render_mode: str = "rgb_array"
-    camera_name: str = "front_rgb,wrist_rgb"
+    camera_name: str = "left_shoulder_rgb,right_shoulder_rgb,front_rgb,wrist_rgb,overhead_rgb"
     camera_name_mapping: dict[str, str] | None = None
     observation_height: int = 256
     observation_width: int = 256
+    task_ids: str | None = None
     features: dict[str, PolicyFeature] = field(
         default_factory=lambda: {
             ACTION: PolicyFeature(type=FeatureType.ACTION, shape=(8,)),
@@ -342,33 +343,46 @@ class RLBenchEnv(EnvConfig):
         default_factory=lambda: {
             ACTION: ACTION,
             "agent_pos": OBS_STATE,
-            "pixels/front_rgb": f"{OBS_IMAGES}.image",
-            "pixels/wrist_rgb": f"{OBS_IMAGES}.image2",
+            "agent_joints": OBS_STATE_JOINTS,
+            "pixels/front_rgb": f"{OBS_IMAGES}.front_rgb",
+            "pixels/wrist_rgb": f"{OBS_IMAGES}.wrist_rgb",
+            "pixels/left_shoulder_rgb": f"{OBS_IMAGES}.left_shoulder_rgb",
+            "pixels/right_shoulder_rgb": f"{OBS_IMAGES}.right_shoulder_rgb",
+            "pixels/overhead_rgb": f"{OBS_IMAGES}.overhead_rgb",
         }
     )
 
     def __post_init__(self):
+        all_cameras = ["front_rgb", "wrist_rgb", "left_shoulder_rgb", "right_shoulder_rgb", "overhead_rgb"]
+
         if self.obs_type == "pixels":
-            self.features["pixels/front_rgb"] = PolicyFeature(
-                type=FeatureType.VISUAL, shape=(self.observation_height, self.observation_width, 3)
-            )
-            self.features["pixels/wrist_rgb"] = PolicyFeature(
-                type=FeatureType.VISUAL, shape=(self.observation_height, self.observation_width, 3)
-            )
+            for cam in all_cameras:
+                self.features[f"pixels/{cam}"] = PolicyFeature(
+                    type=FeatureType.VISUAL, shape=(self.observation_height, self.observation_width, 3)
+                )
+
         elif self.obs_type == "pixels_agent_pos":
-            self.features["agent_pos"] = PolicyFeature(type=FeatureType.STATE, shape=(8,))
-            self.features["pixels/front_rgb"] = PolicyFeature(
-                type=FeatureType.VISUAL, shape=(self.observation_height, self.observation_width, 3)
-            )
-            self.features["pixels/wrist_rgb"] = PolicyFeature(
-                type=FeatureType.VISUAL, shape=(self.observation_height, self.observation_width, 3)
-            )
+            self.features["agent_pos"] = PolicyFeature(type=FeatureType.STATE, shape=(7,))
+            self.features["agent_joints"] = PolicyFeature(type=FeatureType.STATE, shape=(7,))
+            for cam in all_cameras:
+                self.features[f"pixels/{cam}"] = PolicyFeature(
+                    type=FeatureType.VISUAL, shape=(self.observation_height, self.observation_width, 3)
+                )
+
+        elif self.obs_type == "state":
+            self.features["agent_pos"] = PolicyFeature(type=FeatureType.STATE, shape=(7,))
+            self.features["agent_joints"] = PolicyFeature(type=FeatureType.STATE, shape=(7,))
+
         else:
             raise ValueError(f"Unsupported obs_type: {self.obs_type}")
 
     @property
     def gym_kwargs(self) -> dict:
-        return {
+        kwargs = {
             "obs_type": self.obs_type,
             "render_mode": self.render_mode,
         }
+
+        if self.task_ids is not None:
+            kwargs["task_ids"] = self.task_ids
+        return kwargs
diff --git a/src/lerobot/envs/factory.py b/src/lerobot/envs/factory.py
index 2c9093bb2c..eb45c12835 100644
--- a/src/lerobot/envs/factory.py
+++ b/src/lerobot/envs/factory.py
@@ -18,7 +18,7 @@
 import gymnasium as gym
 from gymnasium.envs.registration import registry as gym_registry
 
-from lerobot.envs.configs import AlohaEnv, EnvConfig, LiberoEnv, PushtEnv
+from lerobot.envs.configs import AlohaEnv, EnvConfig, LiberoEnv, PushtEnv, RLBenchEnv
 
 
 def make_env_config(env_type: str, **kwargs) -> EnvConfig:
@@ -28,6 +28,8 @@ def make_env_config(env_type: str, **kwargs) -> EnvConfig:
         return PushtEnv(**kwargs)
     elif env_type == "libero":
         return LiberoEnv(**kwargs)
+    elif env_type == "rlbench":
+        return RLBenchEnv(**kwargs)
     else:
         raise ValueError(f"Policy type '{env_type}' is not available.")
 
diff --git a/src/lerobot/envs/rlbench.py b/src/lerobot/envs/rlbench.py
index 1718e4a118..61535eef05 100644
--- a/src/lerobot/envs/rlbench.py
+++ b/src/lerobot/envs/rlbench.py
@@ -23,16 +23,23 @@
 import gymnasium as gym
 import numpy as np
 from gymnasium import spaces
+from pyrep.const import RenderMode
+from pyrep.objects.dummy import Dummy
+from pyrep.objects.vision_sensor import VisionSensor
 from rlbench.action_modes.action_mode import MoveArmThenGripper
-from rlbench.action_modes.arm_action_modes import JointPosition
+from rlbench.action_modes.arm_action_modes import EndEffectorPoseViaIK
 from rlbench.action_modes.gripper_action_modes import Discrete
+from rlbench.backend.exceptions import BoundaryError, InvalidActionError, TaskEnvironmentError, WaypointError
 from rlbench.backend.task import Task
-from rlbench.gym import RLBenchEnv as RLBenchGymEnv
+from rlbench.environment import Environment
+from rlbench.observation_config import CameraConfig, ObservationConfig
 from rlbench.tasks import FS10_V1, FS25_V1, FS50_V1, FS95_V1, MT15_V1, MT30_V1, MT55_V1, MT100_V1
+from scipy.spatial.transform import Rotation
 
 
-class AbsoluteJointPositionActionMode(MoveArmThenGripper):
-    """Absolute joint position action mode for arm and gripper.
+class DeltaEEFPositionActionMode(MoveArmThenGripper):
+    """Delta end-effector position action mode for arm and gripper.
+    8 values: [Δposition Δquaternion gripper].
 
     The arm action is first applied, followed by the gripper action.
     """
@@ -40,16 +47,15 @@ class AbsoluteJointPositionActionMode(MoveArmThenGripper):
     def __init__(self):
         # Call super
         super().__init__(
-            JointPosition(absolute_mode=True),  # Arm in absolute joint positions
+            EndEffectorPoseViaIK(absolute_mode=False),  # Arm in delta end-effector position
             Discrete(),  # Gripper in discrete open/close (<0.5 → close, >=0.5 → open)
         )
 
     def action_bounds(self):
         """Returns the min and max of the action mode.
-
-        Range is [- 2*pi, 2*pi] for each joint and [0.0, 1.0] for the gripper.
+        Range is [-0.3, 0.3] for pose, [-1, 1] for rotation and [0.0, 1.0] for the gripper.
         """
-        return np.array([-2 * np.pi] * 7 + [0.0]), np.array([2 * np.pi] * 7 + [1.0])
+        return np.array([-0.3, -0.3, -0.3, -1, -1, -1, -1, 0.0]), np.array([0.3, 0.3, 0.3, 1, 1, 1, 1, 1.0])
 
 
 # ---- Load configuration data from the external JSON file ----
@@ -73,8 +79,12 @@ def action_bounds(self):
 TASK_DESCRIPTIONS: dict[str, str] = data.get("TASK_DESCRIPTIONS", {})
 TASK_NAME_TO_ID: dict[str, int] = data.get("TASK_NAME_TO_ID", {})
 
-ACTION_DIM = 8  # 7 joints + 1 gripper  # NOTE: RLBench supports also EEF pose+gripper (dim=8, [x,y,z,rx,ry,rz,gripper])
-OBS_DIM = 8  # 7 joints + 1 gripper
+"""
+RLBench can support many action and observation types.
+Here, we define standard dimensions for end-effector position control with gripper.
+"""
+ACTION_DIM = 8  # EEF pose+gripper (dim=8, [Δx Δy Δz Δqx Δqy Δqz Δqw gripper])
+OBS_DIM = 7  # EEF pose+gripper (dim=7, [x y z rx ry rz gripper])
 
 
 def _parse_camera_names(camera_name: str | Sequence[str]) -> list[str]:
@@ -124,6 +134,144 @@ def _select_task_ids(total_tasks: int, task_ids: Iterable[int] | None) -> list[i
     return ids
 
 
+class RLBenchGymEnv(gym.Env):
+    """An gym wrapper for RLBench."""
+
+    metadata: dict[str, Any] = {"render_modes": ["human", "rgb_array"], "render_fps": 4}
+
+    def __init__(
+        self,
+        task_class,
+        observation_mode="state",
+        render_mode: None | str = None,
+        action_mode=None,
+        obs_width: int = 256,
+        obs_height: int = 256,
+    ):
+        self.task_class = task_class
+        self.observation_mode = observation_mode
+
+        if render_mode is not None and render_mode not in self.metadata["render_modes"]:
+            raise ValueError(f"render_mode must be one of {self.metadata['render_modes']}, got {render_mode}")
+        self.render_mode = render_mode
+        cam_config = CameraConfig(image_size=(obs_width, obs_height))
+        obs_config = ObservationConfig(
+            left_shoulder_camera=cam_config,
+            right_shoulder_camera=cam_config,
+            wrist_camera=cam_config,
+            front_camera=cam_config,
+            overhead_camera=cam_config,
+        )
+        if observation_mode == "state":
+            obs_config.set_all_high_dim(False)
+            obs_config.set_all_low_dim(True)
+        elif observation_mode == "vision":
+            obs_config.set_all(True)
+        else:
+            raise ValueError(f"Unrecognised observation_mode: {observation_mode}.")
+        self.obs_config = obs_config
+        if action_mode is None:
+            action_mode = EndEffectorPoseViaIK()
+        self.action_mode = action_mode
+
+        self.rlbench_env = Environment(
+            action_mode=self.action_mode,
+            obs_config=self.obs_config,
+            headless=True,
+        )
+        self.rlbench_env.launch()
+        self.rlbench_task_env = self.rlbench_env.get_task(self.task_class)
+        if render_mode is not None:
+            cam_placeholder = Dummy("cam_cinematic_placeholder")
+            self.gym_cam = VisionSensor.create([640, 360])
+            self.gym_cam.set_pose(cam_placeholder.get_pose())
+            if render_mode == "human":
+                self.gym_cam.set_render_mode(RenderMode.OPENGL3_WINDOWED)
+            else:
+                self.gym_cam.set_render_mode(RenderMode.OPENGL3)
+        _, obs = self.rlbench_task_env.reset()
+
+        gym_obs = self._extract_obs(obs)
+        self.observation_space = {}
+        for key, value in gym_obs.items():
+            if "rgb" in key:
+                self.observation_space[key] = spaces.Box(
+                    low=0, high=255, shape=value.shape, dtype=value.dtype
+                )
+            else:
+                self.observation_space[key] = spaces.Box(
+                    low=-np.inf, high=np.inf, shape=value.shape, dtype=value.dtype
+                )
+        self.observation_space = spaces.Dict(self.observation_space)
+
+        action_low, action_high = action_mode.action_bounds()
+        self.action_space = spaces.Box(
+            low=np.float32(action_low),
+            high=np.float32(action_high),
+            shape=self.rlbench_env.action_shape,
+            dtype=np.float32,
+        )
+
+    def _extract_obs(self, rlbench_obs):
+        gym_obs = {}
+        for state_name in [
+            "joint_velocities",
+            "joint_positions",
+            "joint_forces",
+            "gripper_open",
+            "gripper_pose",
+            "gripper_joint_positions",
+            "gripper_touch_forces",
+            "task_low_dim_state",
+        ]:
+            state_data = getattr(rlbench_obs, state_name)
+            if state_data is not None:
+                state_data = np.float32(state_data)
+                if np.isscalar(state_data):
+                    state_data = np.asarray([state_data])
+                gym_obs[state_name] = state_data
+
+        if self.observation_mode == "vision":
+            gym_obs.update(
+                {
+                    "left_shoulder_rgb": rlbench_obs.left_shoulder_rgb,
+                    "right_shoulder_rgb": rlbench_obs.right_shoulder_rgb,
+                    "wrist_rgb": rlbench_obs.wrist_rgb,
+                    "front_rgb": rlbench_obs.front_rgb,
+                    "overhead_rgb": rlbench_obs.overhead_rgb,
+                }
+            )
+        return gym_obs
+
+    def render(self):
+        if self.render_mode == "rgb_array":
+            frame = self.gym_cam.capture_rgb()
+            frame = np.clip((frame * 255.0).astype(np.uint8), 0, 255)
+            return frame
+
+    def reset(self, seed=None, options=None):
+        super().reset(seed=seed)
+        # TODO: Remove this and use seed from super()
+        np.random.seed(seed=seed)
+        reset_to_demo = None
+        if options is not None:
+            # TODO: Write test for this
+            reset_to_demo = options.get("reset_to_demo", None)
+
+        if reset_to_demo is None:
+            descriptions, obs = self.rlbench_task_env.reset()
+        else:
+            descriptions, obs = self.rlbench_task_env.reset(reset_to_demo=reset_to_demo)
+        return self._extract_obs(obs), {"text_descriptions": descriptions}
+
+    def step(self, action):
+        obs, reward, terminated = self.rlbench_task_env.step(action)
+        return self._extract_obs(obs), reward, terminated, False, {"is_success": reward > 0.5}
+
+    def close(self) -> None:
+        self.rlbench_env.shutdown()
+
+
 class RLBenchEnv(gym.Env):
     metadata = {"render_modes": ["rgb_array"], "render_fps": 30}
 
@@ -131,7 +279,8 @@ def __init__(
         self,
         task: Task | None = None,
         task_suite: dict[str, list[Task]] | None = None,
-        camera_name: str | Sequence[str] = "front_rgb,wrist_rgb",
+        camera_name: str
+        | Sequence[str] = "left_shoulder_rgb,right_shoulder_rgb,front_rgb,wrist_rgb,overhead_rgb",
         obs_type: str = "pixels",
         render_mode: str = "rgb_array",
         observation_width: int = 256,
@@ -150,15 +299,18 @@ def __init__(
         self.visualization_height = visualization_height
         self.camera_name = _parse_camera_names(camera_name)
 
-        # Map raw camera names to "image1" and "image2".
+        # Map raw camera names to "front_rgb", "wrist_rgb", etc.
         # The preprocessing step `preprocess_observation` will then prefix these with `.images.*`,
-        # following the LeRobot convention (e.g., `observation.images.image`, `observation.images.image2`).
+        # following the LeRobot convention (e.g., `observation.images.front_rgb`, ...).
         # This ensures the policy consistently receives observations in the
         # expected format regardless of the original camera naming.
         if camera_name_mapping is None:
             camera_name_mapping = {
-                "front_rgb": "image",
-                "wrist_rgb": "image2",
+                "front_rgb": "front_rgb",
+                "wrist_rgb": "wrist_rgb",
+                "left_shoulder_rgb": "left_shoulder_rgb",
+                "right_shoulder_rgb": "right_shoulder_rgb",
+                "overhead_rgb": "overhead_rgb",
             }
         self.camera_name_mapping = camera_name_mapping
 
@@ -178,44 +330,50 @@ def __init__(
                 dtype=np.uint8,
             )
 
-        if (
-            self.obs_type == "state"
-        ):  # TODO: This can be implemented in RLBench, because the observation contains joint positions and gripper pose
-            raise ValueError(
-                "The 'state' observation type is not supported in RLBench. "
-                "Please switch to an image-based obs_type (e.g. 'pixels', 'pixels_agent_pos')."
+        if self.obs_type == "state":
+            self.observation_space = spaces.Dict(
+                {
+                    "agent_pos": spaces.Box(
+                        low=-1000.0,
+                        high=1000.0,
+                        shape=(OBS_DIM,),
+                        dtype=np.float64,
+                    ),
+                    "agent_joints": spaces.Box(
+                        low=-1000.0,
+                        high=1000.0,
+                        shape=(7,),
+                        dtype=np.float64,
+                    ),
+                }
             )
 
         elif self.obs_type == "pixels":
             self.observation_space = spaces.Dict(
                 {
-                    "pixels": spaces.Box(
-                        low=0,
-                        high=255,
-                        shape=(self.observation_height, self.observation_width, 3),
-                        dtype=np.uint8,
-                    )
+                    "pixels": spaces.Dict(images),
                 }
             )
         elif self.obs_type == "pixels_agent_pos":
             self.observation_space = spaces.Dict(
                 {
-                    "pixels": spaces.Box(
-                        low=0,
-                        high=255,
-                        shape=(self.observation_height, self.observation_width, 3),
-                        dtype=np.uint8,
-                    ),
+                    "pixels": spaces.Dict(images),
                     "agent_pos": spaces.Box(
                         low=-1000.0,
                         high=1000.0,
                         shape=(OBS_DIM,),
                         dtype=np.float64,
                     ),
+                    "agent_joints": spaces.Box(
+                        low=-1000.0,
+                        high=1000.0,
+                        shape=(7,),
+                        dtype=np.float64,
+                    ),
                 }
             )
 
-        self.action_space = spaces.Box(low=-1, high=1, shape=(ACTION_DIM,), dtype=np.float32)
+        self.action_space = spaces.Box(low=-5, high=5, shape=(ACTION_DIM,), dtype=np.float32)
 
     def render(self) -> np.ndarray:
         """
@@ -231,7 +389,9 @@ def _make_envs_task(self, task: Task):
             task,
             observation_mode="vision",
             render_mode=self.render_mode,
-            action_mode=AbsoluteJointPositionActionMode(),
+            action_mode=DeltaEEFPositionActionMode(),
+            obs_width=self.observation_width,
+            obs_height=self.observation_height,
         )
 
     def _format_raw_obs(self, raw_obs: dict) -> dict[str, Any]:
@@ -241,24 +401,29 @@ def _format_raw_obs(self, raw_obs: dict) -> dict[str, Any]:
             image = image[::-1, ::-1]  # rotate 180 degrees
             images[self.camera_name_mapping[camera_name]] = image
 
-        agent_pos = np.concatenate(
-            raw_obs["joint_positions"],
-            [raw_obs["gripper_open"]],
+        # Gripper pose is 7D (position + quaternion), we convert to 6D (position + euler angles)
+        eef_position = np.concatenate(
+            [
+                raw_obs["gripper_pose"][:3],
+                Rotation.from_quat(raw_obs["gripper_pose"][3:]).as_euler("xyz"),
+                raw_obs["gripper_open"],
+            ]
         )
+        robot_joints = np.array(raw_obs["joint_positions"], dtype=np.float32)
 
-        if (
-            self.obs_type == "state"
-        ):  # TODO: this can be implemented in RLBench, because the observation contains joint positions and gripper pose
-            raise NotImplementedError(
-                "'state' obs_type not implemented for RLBench. Use pixel modes instead."
-            )
+        if self.obs_type == "state":
+            obs = {
+                "agent_pos": eef_position,
+                "agent_joints": robot_joints,
+            }
 
         if self.obs_type == "pixels":
-            obs = {"pixels": image.copy()}
+            obs = {"pixels": images.copy()}
         elif self.obs_type == "pixels_agent_pos":
             obs = {
-                "pixels": image.copy(),
-                "agent_pos": agent_pos,
+                "pixels": images.copy(),
+                "agent_pos": eef_position,
+                "agent_joints": robot_joints,
             }
         else:
             raise NotImplementedError(
@@ -310,11 +475,32 @@ def step(self, action: np.ndarray) -> tuple[dict[str, Any], float, bool, bool, d
                 f"Expected action to be 1-D (shape (action_dim,)), "
                 f"but got shape {action.shape} with ndim={action.ndim}"
             )
-        raw_obs, reward, done, truncated, info = self._env.step(action)
+
+        # If the action has 7 elements (pose + euler angles + gripper), convert to 8 elements (pose + quaternion + gripper)
+        if action.shape[0] == 7:
+            action = np.concatenate(
+                [
+                    action[:3],  # position
+                    Rotation.from_euler("xyz", action[3:6]).as_quat(),  # rotation as quaternion
+                    action[-1:],  # Only last position is gripper
+                ]
+            )
+
+        # Force unit quaternion
+        quat = action[3:7]
+        quat = quat / np.linalg.norm(quat)
+        action[3:7] = quat
+
+        # Perform step
+        try:
+            raw_obs, reward, done, truncated, info = self._env.step(action)
+        except (InvalidActionError, BoundaryError, WaypointError, TaskEnvironmentError) as e:
+            print(f"Error occurred while stepping the environment: {e}")
+            raw_obs, reward, done, truncated, info = None, 0.0, True, True, {}
 
         # Determine whether the task was successful
-        is_success = bool(info.get("success", 0))
-        terminated = done or is_success
+        is_success = bool(info.get("is_success", False))
+        terminated = done or is_success or truncated
         info.update(
             {
                 "task": self.task,
diff --git a/src/lerobot/envs/utils.py b/src/lerobot/envs/utils.py
index 5584e0bff0..1c3c54871c 100644
--- a/src/lerobot/envs/utils.py
+++ b/src/lerobot/envs/utils.py
@@ -26,7 +26,7 @@
 
 from lerobot.configs.types import FeatureType, PolicyFeature
 from lerobot.envs.configs import EnvConfig
-from lerobot.utils.constants import OBS_ENV_STATE, OBS_IMAGE, OBS_IMAGES, OBS_STATE
+from lerobot.utils.constants import OBS_ENV_STATE, OBS_IMAGE, OBS_IMAGES, OBS_STATE, OBS_STATE_JOINTS
 from lerobot.utils.utils import get_channel_first_image_shape
 
 
@@ -75,6 +75,13 @@ def preprocess_observation(observations: dict[str, np.ndarray]) -> dict[str, Ten
 
         return_observations[OBS_ENV_STATE] = env_state
 
+    if "agent_joints" in observations:
+        joints_state = torch.from_numpy(observations["agent_joints"]).float()
+        if joints_state.dim() == 1:
+            joints_state = joints_state.unsqueeze(0)
+
+        return_observations[OBS_STATE_JOINTS] = joints_state
+
     # TODO(rcadene): enable pixels only baseline with `obs_type="pixels"` in environment by removing
     agent_pos = torch.from_numpy(observations["agent_pos"]).float()
     if agent_pos.dim() == 1:
diff --git a/src/lerobot/utils/constants.py b/src/lerobot/utils/constants.py
index 6847666eb9..33e08aa0f6 100644
--- a/src/lerobot/utils/constants.py
+++ b/src/lerobot/utils/constants.py
@@ -21,6 +21,7 @@
 OBS_PREFIX = OBS_STR + "."
 OBS_ENV_STATE = OBS_STR + ".environment_state"
 OBS_STATE = OBS_STR + ".state"
+OBS_STATE_JOINTS = OBS_STATE + ".joints"
 OBS_IMAGE = OBS_STR + ".image"
 OBS_IMAGES = OBS_IMAGE + "s"
 OBS_LANGUAGE = OBS_STR + ".language"

From 9d9fcd525cbb8833e45680a72591dbab91ca1e78 Mon Sep 17 00:00:00 2001
From: Andrea Delli <andrea.delli2001@gmail.com>
Date: Sun, 2 Nov 2025 17:54:44 +0100
Subject: [PATCH 7/8] fix: rlbench - task_ids selection

---
 src/lerobot/envs/rlbench.py          |  16 +-
 src/lerobot/envs/rlbench_config.json | 214 +++++++++++++--------------
 2 files changed, 119 insertions(+), 111 deletions(-)

diff --git a/src/lerobot/envs/rlbench.py b/src/lerobot/envs/rlbench.py
index 61535eef05..2b33871a17 100644
--- a/src/lerobot/envs/rlbench.py
+++ b/src/lerobot/envs/rlbench.py
@@ -77,7 +77,7 @@ def action_bounds(self):
 
 # extract and type-check top-level dicts
 TASK_DESCRIPTIONS: dict[str, str] = data.get("TASK_DESCRIPTIONS", {})
-TASK_NAME_TO_ID: dict[str, int] = data.get("TASK_NAME_TO_ID", {})
+TASK_ID_TO_NAME: dict[str, str] = data.get("TASK_ID_TO_NAME", {})
 
 """
 RLBench can support many action and observation types.
@@ -585,20 +585,28 @@ def create_rlbench_envs(
         raise ValueError("`task` must contain at least one RLBench suite name.")
 
     print(f"Creating RLBench envs | task_groups={suite_names} | n_envs(per task)={n_envs}")
-    if task_ids_filter is not None:
-        print(f"Restricting to task_ids={task_ids_filter}")
 
     out: dict[str, dict[int, Any]] = defaultdict(dict)
 
     for suite_name in suite_names:
         suite = _get_suite(suite_name)
         total = len(suite["train"])
+
+        # Select task ids to build
+        if task_ids_filter is not None:
+            # If string, parse to list of ints (task ids)
+            if isinstance(task_ids_filter, str):
+                task_ids_filter = json.loads(task_ids_filter)
+            print(f"Restricting to task_ids={task_ids_filter}")
         selected = _select_task_ids(total, task_ids_filter)
+        print(
+            f"Selected {len(selected)} tasks from suite '{suite_name}': {[TASK_ID_TO_NAME[str(id)] for id in selected]}."
+        )
 
         if not selected:
             raise ValueError(f"No tasks selected for suite '{suite_name}' (available: {total}).")
 
-        for tid in selected:  # FIXME: this breaks!
+        for tid in selected:  # FIXME: this breaks for multi-task!
             fns = _make_env_fns(
                 suite=suite,
                 task=suite["train"][tid],
diff --git a/src/lerobot/envs/rlbench_config.json b/src/lerobot/envs/rlbench_config.json
index 64ae01ca76..ee0353bb9f 100644
--- a/src/lerobot/envs/rlbench_config.json
+++ b/src/lerobot/envs/rlbench_config.json
@@ -108,112 +108,112 @@
     "WipeDesk": "Wipe dirt off the desk"
   },
 
-  "TASK_NAME_TO_ID": {
-    "BasketballInHoop": 0,
-    "BeatTheBuzz": 1,
-    "BlockPyramid": 2,
-    "ChangeChannel": 3,
-    "ChangeClock": 4,
-    "CloseBox": 5,
-    "CloseDoor": 6,
-    "CloseDrawer": 7,
-    "CloseFridge": 8,
-    "CloseGrill": 9,
-    "CloseJar": 10,
-    "CloseLaptopLid": 11,
-    "CloseMicrowave": 12,
-    "EmptyContainer": 13,
-    "EmptyDishwasher": 14,
-    "GetIceFromFridge": 15,
-    "HangFrameOnHanger": 16,
-    "HitBallWithQueue": 17,
-    "Hockey": 18,
-    "InsertOntoSquarePeg": 19,
-    "InsertUsbInComputer": 20,
-    "LampOff": 21,
-    "LampOn": 22,
-    "LiftNumberedBlock": 23,
-    "LightBulbIn": 24,
-    "LightBulbOut": 25,
-    "MeatOffGrill": 26,
-    "MeatOnGrill": 27,
-    "MoveHanger": 28,
-    "OpenBox": 29,
-    "OpenDoor": 30,
-    "OpenDrawer": 31,
-    "OpenFridge": 32,
-    "OpenGrill": 33,
-    "OpenJar": 34,
-    "OpenMicrowave": 35,
-    "OpenOven": 36,
-    "OpenWashingMachine": 37,
-    "OpenWindow": 38,
-    "OpenWineBottle": 39,
-    "PhoneOnBase": 40,
-    "PickAndLift": 41,
-    "PickAndLiftSmall": 42,
-    "PickUpCup": 43,
-    "PlaceCups": 44,
-    "PlaceHangerOnRack": 45,
-    "PlaceShapeInShapeSorter": 46,
-    "PlayJenga": 47,
-    "PlugChargerInPowerSupply": 48,
-    "PourFromCupToCup": 49,
-    "PressSwitch": 50,
-    "PushButton": 51,
-    "PushButtons": 52,
-    "PutAllGroceriesInCupboard": 53,
-    "PutBooksOnBookshelf": 54,
-    "PutBottleInFridge": 55,
-    "PutGroceriesInCupboard": 56,
-    "PutItemInDrawer": 57,
-    "PutKnifeInKnifeBlock": 58,
-    "PutKnifeOnChoppingBoard": 59,
-    "PutMoneyInSafe": 60,
-    "PutPlateInColoredDishRack": 61,
-    "PutRubbishInBin": 62,
-    "PutShoesInBox": 63,
-    "PutToiletRollOnStand": 64,
-    "PutTrayInOven": 65,
-    "PutUmbrellaInUmbrellaStand": 66,
-    "ReachAndDrag": 67,
-    "ReachTarget": 68,
-    "RemoveCups": 69,
-    "ScoopWithSpatula": 70,
-    "ScrewNail": 71,
-    "SetTheTable": 72,
-    "SetupCheckers": 73,
-    "SetupChess": 74,
-    "SlideBlockToTarget": 75,
-    "SlideCabinetOpen": 76,
-    "SlideCabinetOpenAndPlaceCups": 77,
-    "SolvePuzzle": 78,
-    "StackBlocks": 79,
-    "StackChairs": 80,
-    "StackCups": 81,
-    "StackWine": 82,
-    "StraightenRope": 83,
-    "SweepToDustpan": 84,
-    "TakeCupOutFromCabinet": 85,
-    "TakeFrameOffHanger": 86,
-    "TakeItemOutOfDrawer": 87,
-    "TakeLidOffSaucepan": 88,
-    "TakeMoneyOutSafe": 89,
-    "TakeOffWeighingScales": 90,
-    "TakePlateOffColoredDishRack": 91,
-    "TakeShoesOutOfBox": 92,
-    "TakeToiletRollOffStand": 93,
-    "TakeTrayOutOfOven": 94,
-    "TakeUmbrellaOutOfUmbrellaStand": 95,
-    "TakeUsbOutOfComputer": 96,
-    "ToiletSeatDown": 97,
-    "ToiletSeatUp": 98,
-    "TurnOvenOn": 99,
-    "TurnTap": 100,
-    "TvOn": 101,
-    "UnplugCharger": 102,
-    "WaterPlants": 103,
-    "WeighingScales": 104,
-    "WipeDesk": 105
+  "TASK_ID_TO_NAME": {
+    "0": "ReachTarget",
+    "1": "CloseBox",
+    "2": "CloseMicrowave",
+    "3": "PlugChargerInPowerSupply",
+    "4": "ToiletSeatDown",
+    "5": "TakeUmbrellaOutOfUmbrellaStand",
+    "6": "PutUmbrellaInUmbrellaStand",
+    "7": "SlideCabinetOpen",
+    "8": "CloseFridge",
+    "9": "PickAndLift",
+    "10": "OpenBox",
+    "11": "OpenMicrowave",
+    "12": "UnplugCharger",
+    "13": "ToiletSeatUp",
+    "14": "OpenFridge",
+    "15": "TurnTap",
+    "16": "LightBulbIn",
+    "17": "BasketballInHoop",
+    "18": "OpenWindow",
+    "19": "CloseDoor",
+    "20": "PushButton",
+    "21": "PutItemInDrawer",
+    "22": "OpenDrawer",
+    "23": "CloseDrawer",
+    "24": "TurnOvenOn",
+    "25": "LightBulbOut",
+    "26": "TvOn",
+    "27": "OpenOven",
+    "28": "OpenDoor",
+    "29": "TakeItemOutOfDrawer",
+    "30": "BeatTheBuzz",
+    "31": "BlockPyramid",
+    "32": "ChangeClock",
+    "33": "CloseJar",
+    "34": "CloseLaptopLid",
+    "35": "EmptyContainer",
+    "36": "EmptyDishwasher",
+    "37": "GetIceFromFridge",
+    "38": "HangFrameOnHanger",
+    "39": "InsertOntoSquarePeg",
+    "40": "PutRubbishInBin",
+    "41": "PutShoesInBox",
+    "42": "PutToiletRollOnStand",
+    "43": "PutTrayInOven",
+    "44": "ReachAndDrag",
+    "45": "RemoveCups",
+    "46": "ScoopWithSpatula",
+    "47": "SetTheTable",
+    "48": "SetupCheckers",
+    "49": "SlideBlockToTarget",
+    "50": "Hockey",
+    "51": "InsertUsbInComputer",
+    "52": "PressSwitch",
+    "53": "PlayJenga",
+    "54": "MeatOffGrill",
+    "55": "HitBallWithQueue",
+    "56": "ScrewNail",
+    "57": "LampOff",
+    "58": "LampOn",
+    "59": "MeatOnGrill",
+    "60": "MoveHanger",
+    "61": "OpenJar",
+    "62": "OpenWineBottle",
+    "63": "PlaceCups",
+    "64": "PlaceHangerOnRack",
+    "65": "PlaceShapeInShapeSorter",
+    "66": "PutBottleInFridge",
+    "67": "PutKnifeInKnifeBlock",
+    "68": "PutMoneyInSafe",
+    "69": "PutPlateInColoredDishRack",
+    "70": "SlideCabinetOpenAndPlaceCups",
+    "71": "StackBlocks",
+    "72": "StackCups",
+    "73": "StackWine",
+    "74": "StraightenRope",
+    "75": "SweepToDustpan",
+    "76": "TakeCupOutFromCabinet",
+    "77": "TakeFrameOffHanger",
+    "78": "TakeLidOffSaucepan",
+    "79": "TakeMoneyOutSafe",
+    "80": "TakeOffWeighingScales",
+    "81": "TakePlateOffColoredDishRack",
+    "82": "TakeShoesOutOfBox",
+    "83": "TakeToiletRollOffStand",
+    "84": "TakeUsbOutOfComputer",
+    "85": "WaterPlants",
+    "86": "WeighingScales",
+    "87": "WipeDesk",
+    "88": "ChangeChannel",
+    "89": "OpenGrill",
+    "90": "CloseGrill",
+    "91": "SolvePuzzle",
+    "92": "PickUpCup",
+    "93": "PhoneOnBase",
+    "94": "PourFromCupToCup",
+    "95": "PutKnifeOnChoppingBoard",
+    "96": "PutBooksOnBookshelf",
+    "97": "PushButtons",
+    "98": "PutGroceriesInCupboard",
+    "99": "TakeTrayOutOfOven",
+    "100": "LiftNumberedBlock",
+    "101": "OpenWashingMachine",
+    "102": "PickAndLiftSmall",
+    "103": "PutAllGroceriesInCupboard",
+    "104": "SetupChess",
+    "105": "StackChairs"
   }
 }

From 59631092b80f7c1ea1d9be90dc23ece9cdbafb0a Mon Sep 17 00:00:00 2001
From: Andrea Delli <andrea.delli2001@gmail.com>
Date: Sun, 2 Nov 2025 20:15:20 +0100
Subject: [PATCH 8/8] fix: rlbench - handle empty observation

---
 src/lerobot/envs/rlbench.py | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/src/lerobot/envs/rlbench.py b/src/lerobot/envs/rlbench.py
index 2b33871a17..c7a4a58d50 100644
--- a/src/lerobot/envs/rlbench.py
+++ b/src/lerobot/envs/rlbench.py
@@ -85,6 +85,7 @@ def action_bounds(self):
 """
 ACTION_DIM = 8  # EEF pose+gripper (dim=8, [Δx Δy Δz Δqx Δqy Δqz Δqw gripper])
 OBS_DIM = 7  # EEF pose+gripper (dim=7, [x y z rx ry rz gripper])
+OBS_JOINTS_DIM = 7  # Robot joint positions (7 joints for Panda robot)
 
 
 def _parse_camera_names(camera_name: str | Sequence[str]) -> list[str]:
@@ -315,7 +316,7 @@ def __init__(
         self.camera_name_mapping = camera_name_mapping
 
         self._env = self._make_envs_task(self.task)
-        self._max_episode_steps = 500  # TODO: make configurable depending on task suite?
+        self._max_episode_steps = 300  # TODO: make configurable depending on task suite?
 
         # Get task description
         task_name = self.task.__name__ if self.task is not None else ""
@@ -395,6 +396,17 @@ def _make_envs_task(self, task: Task):
         )
 
     def _format_raw_obs(self, raw_obs: dict) -> dict[str, Any]:
+        if raw_obs is None:
+            # Return empty observation if raw_obs is None (e.g., due to an error in step)
+            return {
+                "pixels": {
+                    cam: np.zeros((self.observation_height, self.observation_width, 3), dtype=np.uint8)
+                    for cam in self.camera_name
+                },
+                "agent_pos": np.zeros(OBS_DIM, dtype=np.float64),
+                "agent_joints": np.zeros(OBS_JOINTS_DIM, dtype=np.float64),
+            }
+
         images = {}
         for camera_name in self.camera_name:
             image = raw_obs[camera_name]