From 768f80821e6f33d5dd963829c46b84a6c6e85d2a Mon Sep 17 00:00:00 2001
From: YN35 <95198883+YN35@users.noreply.github.com>
Date: Thu, 7 Aug 2025 09:11:50 +0000
Subject: [PATCH 01/57] from simpler-openvla
---
.devcontainer/devcontainer.json | 32 +
.gitmodules | 4 +-
README.md | 65 +-
docker/10_nvidia.json | 6 +
docker/Dockerfile | 34 +
docker/nvidia_icd.json | 7 +
docker/nvidia_layers.json | 21 +
policies/lerobotpi/README.md | 18 +
policies/tes.py | 3 +
pyproject.toml | 22 +
scripts/bridge.sh | 52 ++
...r_variant_agg.sh => drawer_variant_agg.sh} | 23 +-
scripts/drawer_visual_matching.sh | 135 ++++
.../misc/octo_drawer_variant_agg_alt_urdf.sh | 87 ---
.../octo_move_near_variant_agg_alt_urdf.sh | 144 ----
...octo_pick_coke_can_variant_agg_alt_urdf.sh | 183 -----
...ariant_agg.sh => move_near_variant_agg.sh} | 37 +-
scripts/move_near_visual_matching.sh | 36 +
scripts/octo_drawer_visual_matching.sh | 135 ----
scripts/octo_move_near_visual_matching.sh | 48 --
...nt_agg.sh => pick_coke_can_variant_agg.sh} | 39 +-
...ng.sh => pick_coke_can_visual_matching.sh} | 26 +-
...nt_agg.sh => put_in_drawer_variant_agg.sh} | 23 +-
...ng.sh => put_in_drawer_visual_matching.sh} | 28 +-
scripts/run_cogact.sh | 39 ++
scripts/run_gr00t.sh | 45 ++
scripts/run_lerobotpifast.sh | 40 ++
scripts/run_octo.sh | 39 ++
scripts/run_openpifast.sh | 48 ++
scripts/run_openvla.sh | 39 ++
scripts/run_rt1.sh | 39 ++
scripts/run_spatialvla.sh | 42 ++
setup.py | 9 -
simpler_env/__init__.py | 17 +-
simpler_env/evaluation/argparse.py | 3 +-
.../evaluation/maniskill2_evaluator.py | 41 +-
simpler_env/main_inference.py | 57 +-
simpler_env/policies/__init__.py | 1 -
simpler_env/policies/gr00t/geometry.py | 463 +++++++++++++
simpler_env/policies/gr00t/gr00t_model.py | 336 ++++++++++
simpler_env/policies/lerobotpi/geometry.py | 463 +++++++++++++
simpler_env/policies/lerobotpi/pi0_or_fast.py | 281 ++++++++
simpler_env/policies/octo/octo_model.py | 12 +-
simpler_env/policies/openpi/geometry.py | 463 +++++++++++++
simpler_env/policies/openpi/pi0_or_fast.py | 287 ++++++++
simpler_env/policies/openvla/openvla_model.py | 181 +++++
simpler_env/policies/rt1/rt1_model.py | 14 +-
simpler_env/policies/sim_cogact/__init__.py | 2 +
.../policies/sim_cogact/adaptive_ensemble.py | 44 ++
.../policies/sim_cogact/cogact_policy.py | 232 +++++++
.../sim_cogact/scripts/cogact_bridge.sh | 20 +-
.../scripts/cogact_drawer_variant_agg.sh | 12 +-
.../scripts/cogact_drawer_visual_matching.sh | 26 +-
.../scripts/cogact_move_near_variant_agg.sh | 22 +-
.../cogact_move_near_visual_matching.sh | 7 +-
.../cogact_pick_coke_can_variant_agg.sh | 22 +-
.../cogact_pick_coke_can_visual_matching.sh | 9 +-
.../cogact_put_in_drawer_variant_agg.sh | 9 +-
.../cogact_put_in_drawer_visual_matching.sh | 13 +-
.../sim_cogact/scripts}/octo_bridge.sh | 12 +-
.../policies/spatialvla/spatialvla_model.py | 240 +++++++
simpler_env/utils/action/action_ensemble.py | 2 +-
simpler_env/utils/metrics.py | 188 +++++-
simpler_env/utils/visualization.py | 14 +
tools/calc_metrics_evaluation_videos.py | 627 +++++++++++++-----
65 files changed, 4662 insertions(+), 1006 deletions(-)
create mode 100644 .devcontainer/devcontainer.json
create mode 100644 docker/10_nvidia.json
create mode 100644 docker/Dockerfile
create mode 100644 docker/nvidia_icd.json
create mode 100644 docker/nvidia_layers.json
create mode 100644 policies/lerobotpi/README.md
create mode 100644 policies/tes.py
create mode 100644 scripts/bridge.sh
rename scripts/{octo_drawer_variant_agg.sh => drawer_variant_agg.sh} (77%)
create mode 100644 scripts/drawer_visual_matching.sh
delete mode 100644 scripts/misc/octo_drawer_variant_agg_alt_urdf.sh
delete mode 100644 scripts/misc/octo_move_near_variant_agg_alt_urdf.sh
delete mode 100644 scripts/misc/octo_pick_coke_can_variant_agg_alt_urdf.sh
rename scripts/{octo_move_near_variant_agg.sh => move_near_variant_agg.sh} (74%)
create mode 100644 scripts/move_near_visual_matching.sh
delete mode 100644 scripts/octo_drawer_visual_matching.sh
delete mode 100644 scripts/octo_move_near_visual_matching.sh
rename scripts/{octo_pick_coke_can_variant_agg.sh => pick_coke_can_variant_agg.sh} (78%)
rename scripts/{octo_pick_coke_can_visual_matching.sh => pick_coke_can_visual_matching.sh} (72%)
rename scripts/{octo_put_in_drawer_variant_agg.sh => put_in_drawer_variant_agg.sh} (77%)
rename scripts/{octo_put_in_drawer_visual_matching.sh => put_in_drawer_visual_matching.sh} (73%)
create mode 100644 scripts/run_cogact.sh
create mode 100644 scripts/run_gr00t.sh
create mode 100644 scripts/run_lerobotpifast.sh
create mode 100644 scripts/run_octo.sh
create mode 100644 scripts/run_openpifast.sh
create mode 100644 scripts/run_openvla.sh
create mode 100644 scripts/run_rt1.sh
create mode 100644 scripts/run_spatialvla.sh
delete mode 100644 setup.py
delete mode 100644 simpler_env/policies/__init__.py
create mode 100644 simpler_env/policies/gr00t/geometry.py
create mode 100644 simpler_env/policies/gr00t/gr00t_model.py
create mode 100644 simpler_env/policies/lerobotpi/geometry.py
create mode 100644 simpler_env/policies/lerobotpi/pi0_or_fast.py
create mode 100644 simpler_env/policies/openpi/geometry.py
create mode 100644 simpler_env/policies/openpi/pi0_or_fast.py
create mode 100644 simpler_env/policies/openvla/openvla_model.py
create mode 100644 simpler_env/policies/sim_cogact/__init__.py
create mode 100644 simpler_env/policies/sim_cogact/adaptive_ensemble.py
create mode 100644 simpler_env/policies/sim_cogact/cogact_policy.py
rename scripts/rt1x_bridge.sh => simpler_env/policies/sim_cogact/scripts/cogact_bridge.sh (89%)
rename scripts/rt1_drawer_variant_agg.sh => simpler_env/policies/sim_cogact/scripts/cogact_drawer_variant_agg.sh (87%)
rename scripts/rt1_drawer_visual_matching.sh => simpler_env/policies/sim_cogact/scripts/cogact_drawer_visual_matching.sh (82%)
rename scripts/rt1_move_near_variant_agg.sh => simpler_env/policies/sim_cogact/scripts/cogact_move_near_variant_agg.sh (87%)
rename scripts/rt1_move_near_visual_matching.sh => simpler_env/policies/sim_cogact/scripts/cogact_move_near_visual_matching.sh (77%)
rename scripts/rt1_pick_coke_can_variant_agg.sh => simpler_env/policies/sim_cogact/scripts/cogact_pick_coke_can_variant_agg.sh (89%)
rename scripts/rt1_pick_coke_can_visual_matching.sh => simpler_env/policies/sim_cogact/scripts/cogact_pick_coke_can_visual_matching.sh (80%)
rename scripts/rt1_put_in_drawer_variant_agg.sh => simpler_env/policies/sim_cogact/scripts/cogact_put_in_drawer_variant_agg.sh (87%)
rename scripts/rt1_put_in_drawer_visual_matching.sh => simpler_env/policies/sim_cogact/scripts/cogact_put_in_drawer_visual_matching.sh (83%)
rename {scripts => simpler_env/policies/sim_cogact/scripts}/octo_bridge.sh (92%)
create mode 100644 simpler_env/policies/spatialvla/spatialvla_model.py
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
new file mode 100644
index 00000000..9e64b618
--- /dev/null
+++ b/.devcontainer/devcontainer.json
@@ -0,0 +1,32 @@
+{
+ // docker build . -t simpler-env
+ "image": "simpler_env",
+ "runArgs": [
+ "--name=simpler_env-ynn",
+ "--net=host",
+ "--ipc=host",
+ "--gpus",
+ "all",
+ "-e",
+ "NVIDIA_DRIVER_CAPABILITIES=all",
+ "--privileged"
+ ],
+ "mounts": [
+ "source=/home/takanori.yoshimoto/code/SimplerEnv,target=/app/SimplerEnv,type=bind",
+ "source=/home/takanori.yoshimoto/code/hsr_openpi/checkpoints,target=/data/checkpoints,type=bind"
+ ],
+ "workspaceFolder": "/app/SimplerEnv",
+ "customizations": {
+ "vscode": {
+ "extensions": [
+ "ms-python.python",
+ "ms-python.vscode-pylance",
+ "ms-python.black-formatter",
+ "ms-python.isort",
+ "ms-azuretools.vscode-docker",
+ "github.copilot",
+ "github.vscode-pull-request-github"
+ ]
+ }
+ }
+}
\ No newline at end of file
diff --git a/.gitmodules b/.gitmodules
index 046394c1..9b815d12 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -1,3 +1,5 @@
[submodule "ManiSkill2_real2sim"]
path = ManiSkill2_real2sim
- url = https://github.com/simpler-env/ManiSkill2_real2sim
+ url = https://github.com/airoa-org/ManiSkill2_real2sim.git
+ # url = https://github.com/allenzren/ManiSkill2_real2sim.git
+ # url = https://github.com/simpler-env/ManiSkill2_real2sim
diff --git a/README.md b/README.md
index d46213f7..9228f400 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-# SimplerEnv: Simulated Manipulation Policy Evaluation Environments for Real Robot Setups
+# SimplerEnv: Simulated Manipulation Policy Evaluation Environments for Real Robot Setups (Multi-model Support 🔥)
[](https://colab.research.google.com/github/simpler-env/SimplerEnv/blob/main/example.ipynb)
@@ -6,7 +6,7 @@
Significant progress has been made in building generalist robot manipulation policies, yet their scalable and reproducible evaluation remains challenging, as real-world evaluation is operationally expensive and inefficient. We propose employing physical simulators as efficient, scalable, and informative complements to real-world evaluations. These simulation evaluations offer valuable quantitative metrics for checkpoint selection, insights into potential real-world policy behaviors or failure modes, and standardized setups to enhance reproducibility.
-This repository's code is based in the [SAPIEN](https://sapien.ucsd.edu/) simulator and the CPU based [ManiSkill2](https://maniskill2.github.io/) benchmark. We have also integrated the Bridge dataset environments into ManiSkill3, which offers GPU parallelization and can run 10-15x faster than the ManiSkill2 version. For instructions on how to use the GPU parallelized environments and evaluate policies on them, see: https://github.com/simpler-env/SimplerEnv/tree/maniskill3
+This repository is based in the [SAPIEN](https://sapien.ucsd.edu/) simulator and the [ManiSkill2](https://maniskill2.github.io/) benchmark (we will also integrate the evaluation envs into ManiSkill3 once it is complete).
This repository encompasses 2 real-to-sim evaluation setups:
- `Visual Matching` evaluation: Matching real & sim visual appearances for policy evaluation by overlaying real-world images onto simulation backgrounds and adjusting foreground object and robot textures in simulation.
@@ -23,12 +23,45 @@ We hope that our work guides and inspires future real-to-sim evaluation efforts.
- [Code Structure](#code-structure)
- [Adding New Policies](#adding-new-policies)
- [Adding New Real-to-Sim Evaluation Environments and Robots](#adding-new-real-to-sim-evaluation-environments-and-robots)
- - [Full Installation (RT-1 and Octo Inference, Env Building)](#full-installation-rt-1-and-octo-inference-env-building)
+ - [Full Installation (RT-1, Octo, OpenVLA Inference, Env Building)](#full-installation-rt-1-octo-openvla-inference-env-building)
- [RT-1 Inference Setup](#rt-1-inference-setup)
- [Octo Inference Setup](#octo-inference-setup)
+ - [OpenVLA Inference Setup](#openvla-inference-setup)
- [Troubleshooting](#troubleshooting)
- [Citation](#citation)
+## Benchmark @ GoogleSheets
+> [!TIP]
+> We maintain a public Google Sheets documenting the latest SOTA models' performance and fine-tuned weights on Simpler-Env, making community benchmarking more accessible. Welcome to contribute and update!
+>
+> [simpler env benchmark @ GoogleSheets 📊](https://docs.google.com/spreadsheets/d/1cLhEW9QnVkP4rqxsFVzdBVRyBWVdSm0d5zp1L_-QJx4/edit?usp=sharing)
+
+
+
+
+## Models
+> [!NOTE]
+> Hello everyone!
+> This repository has now fully opened issues and discussions. We warmly welcome you to: 🤗
+> Discuss any problems you encounter 🙋
+> Submit fixes ✅
+> Support new models! 🚀
+> Given the significant environmental differences across various models and the specific dependencies required for simulator rendering, I will soon provide a Docker solution and a benchmark performance table. I’ll also do my best to address any issues you run into.
+> Thank you for your support and contributions! 🎉
+>
+> To support state input, we use the submodule `ManiSkill2_real2sim` from https://github.com/allenzren/ManiSkill2_real2sim
+
+| Model Name | support | Note |
+| ----------- | ----- | ----- |
+| Octo | ✅ | |
+| RT1 | ✅ | |
+| OpenVLA | ✅ | |
+| CogACT | ✅ | OpenVLA-based |
+| SpatialVLA | ✅ | [transformers == 4.47.0](https://github.com/SpatialVLA/SpatialVLA) |
+| Pi0/Pi0-Fast (openpi version) | ✅ | [openpi](https://github.com/Physical-Intelligence/openpi) |
+| Pi0/Pi0-Fast (lerobot version) | ✅ | [lerobot](https://github.com/huggingface/lerobot) |
+| GR00T | ✅ | [Isaac-GR00T](https://github.com/NVIDIA/Isaac-GR00T) |
+
## Getting Started
@@ -71,13 +104,13 @@ Prerequisites:
Create an anaconda environment:
```
-conda create -n simpler_env python=3.10 (3.10 or 3.11)
+conda create -n simpler_env python=3.10 (any version above 3.10 should be fine)
conda activate simpler_env
```
Clone this repo:
```
-git clone https://github.com/simpler-env/SimplerEnv --recurse-submodules
+git clone https://github.com/simpler-env/SimplerEnv --recurse-submodules --depth 1
```
Install numpy<2.0 (otherwise errors in IK might occur in pinocchio):
@@ -97,15 +130,15 @@ cd {this_repo}
pip install -e .
```
-**If you'd like to perform evaluations on our provided agents (e.g., RT-1, Octo), or add new robots and environments, please additionally follow the full installation instructions [here](#full-installation-rt-1-and-octo-inference-env-building).**
+**If you'd like to perform evaluations on our provided agents (e.g., RT-1, Octo, OpenVLA), or add new robots and environments, please additionally follow the full installation instructions [here](#full-installation-rt-1-octo-openvla-inference-env-building).**
## Examples
-- Simple RT-1 and Octo evaluation script on prepackaged environments with visual matching evaluation setup: see [`simpler_env/simple_inference_visual_matching_prepackaged_envs.py`](https://github.com/simpler-env/SimplerEnv/blob/main/simpler_env/simple_inference_visual_matching_prepackaged_envs.py).
+- Simple RT-1, Octo, and OpenVLA evaluation script on prepackaged environments with visual matching evaluation setup: see [`simpler_env/simple_inference_visual_matching_prepackaged_envs.py`](https://github.com/simpler-env/SimplerEnv/blob/main/simpler_env/simple_inference_visual_matching_prepackaged_envs.py).
- Colab notebook for RT-1 and Octo inference: see [this link](https://colab.research.google.com/github/simpler-env/SimplerEnv/blob/main/example.ipynb).
- Environment interactive visualization and manual control: see [`ManiSkill2_real2sim/mani_skill2_real2sim/examples/demo_manual_control_custom_envs.py`](https://github.com/simpler-env/ManiSkill2_real2sim/blob/main/mani_skill2_real2sim/examples/demo_manual_control_custom_envs.py)
-- Policy inference scripts to reproduce our Google Robot and WidowX real-to-sim evaluation results with sweeps over object / robot poses and advanced loggings. These contain both visual matching and variant aggregation evaluation setups along with RT-1, RT-1-X, and Octo policies. See [`scripts/`](https://github.com/simpler-env/SimplerEnv/tree/main/scripts).
+- Policy inference scripts to reproduce our Google Robot and WidowX real-to-sim evaluation results with sweeps over object / robot poses and advanced loggings. These contain both visual matching and variant aggregation evaluation setups along with RT-1, RT-1-X, Octo, and OpenVLA policies. See [`scripts/`](https://github.com/simpler-env/SimplerEnv/tree/main/scripts).
- Real-to-sim evaluation videos from running `scripts/*.sh`: see [this link](https://huggingface.co/datasets/xuanlinli17/simpler-env-eval-example-videos/tree/main).
## Current Environments
@@ -183,6 +216,7 @@ simpler_env/
policies/: policy implementations
rt1/: RT-1 policy implementation
octo/: Octo policy implementation
+ openvla/: OpenVLA policy implementation
utils/:
env/: environment building and observation utilities
debug/: debugging tools for policies and robots
@@ -205,7 +239,7 @@ scripts/: example bash scripts for policy inference under our variant aggregatio
If you want to use existing environments for evaluating new policies, you can keep `./ManiSkill2_real2sim` as is.
-1. Implement new policy inference scripts in `simpler_env/policies/{your_new_policy}`, following the examples for RT-1 (`simpler_env/policies/rt1`) and Octo (`simpler_env/policies/octo`) policies.
+1. Implement new policy inference scripts in `simpler_env/policies/{your_new_policy}`, following the examples for RT-1 (`simpler_env/policies/rt1`), Octo (`simpler_env/policies/octo`), and OpenVLA (`simpler_env/policies/openvla`) policies.
2. You can now use `simpler_env/simple_inference_visual_matching_prepackaged_envs.py` to perform policy evaluations in simulation.
- If the policy behaviors deviate a lot from those in the real-world, you can write similar scripts as in `simpler_env/utils/debug/{policy_name}_inference_real_video.py` to debug the policy behaviors. The debugging script performs policy inference by feeding real eval video frames into the policy. If the policy behavior still deviates significantly from real, this may suggest that policy actions are processed incorrectly into the simulation environments. Please double check action orderings and action spaces.
3. If you'd like to perform customized evaluations,
@@ -219,7 +253,7 @@ If you want to use existing environments for evaluating new policies, you can ke
We provide a step-by-step guide to add new real-to-sim evaluation environments and robots in [this README](ADDING_NEW_ENVS_ROBOTS.md)
-## Full Installation (RT-1 and Octo Inference, Env Building)
+## Full Installation (RT-1, Octo, OpenVLA Inference, Env Building)
If you'd like to perform evaluations on our provided agents (e.g., RT-1, Octo), or add new robots and environments, please follow the full installation instructions below.
@@ -289,6 +323,13 @@ If you are using CUDA 12, then to use GPU for Octo inference, you need CUDA vers
`PATH=/usr/local/cuda-12.3/bin:$PATH LD_LIBRARY_PATH=/usr/local/cuda-12.3/lib64:$LD_LIBRARY_PATH bash scripts/octo_xxx_script.sh`
+### OpenVLA Inference Setup
+
+```
+pip install torch==2.3.1 torchvision==0.18.1 timm==0.9.10 tokenizers==0.15.2 accelerate==0.32.1
+pip install flash-attn==2.6.1 --no-build-isolation
+```
+
## Troubleshooting
1. If you encounter issues such as
@@ -307,6 +348,10 @@ Follow [this link](https://maniskill.readthedocs.io/en/latest/user_guide/getting
TypeError: 'NoneType' object is not subscriptable
```
+3. Please also refer to the original repo or [vulkan_setup](https://github.com/SpatialVLA/SpatialVLA/issues/3#issuecomment-2641739404) if you encounter any problems.
+
+4. `tensorflow-2.15.0` conflicts with `tensorflow-2.15.1`?
+The dlimp library has not been maintained for a long time, so the TensorFlow version might be out of date. A reliable solution is to comment out tensorflow==2.15.0 in the requirements file, install all other dependencies, and then install tensorflow==2.15.0 finally. Currently, using tensorflow==2.15.0 has not caused any problems.
## Citation
diff --git a/docker/10_nvidia.json b/docker/10_nvidia.json
new file mode 100644
index 00000000..bc224f29
--- /dev/null
+++ b/docker/10_nvidia.json
@@ -0,0 +1,6 @@
+{
+ "file_format_version": "1.0.0",
+ "ICD": {
+ "library_path": "libEGL_nvidia.so.0"
+ }
+}
\ No newline at end of file
diff --git a/docker/Dockerfile b/docker/Dockerfile
new file mode 100644
index 00000000..49c49916
--- /dev/null
+++ b/docker/Dockerfile
@@ -0,0 +1,34 @@
+FROM nvidia/cuda:12.1.0-devel-ubuntu20.04
+ENV NVIDIA_DRIVER_CAPABILITIES=all
+ENV DEBIAN_FRONTEND=noninteractive
+
+# Install packages for simpler
+# RUN apt-get update && apt-get install -y --no-install-recommends \
+# bash-completion build-essential ca-certificates cmake curl git \
+# htop unzip vim wget \
+# libvulkan1 libvulkan-dev vulkan-tools xvfb \
+# libglvnd-dev libgl1-mesa-dev libegl1-mesa-dev libgles2-mesa-dev libglib2.0-0
+# RUN rm -rf /var/lib/apt/lists/*
+
+
+RUN apt-get update && apt-get install -y --no-install-recommends \
+ bash-completion build-essential ca-certificates cmake curl git \
+ htop libegl1 libxext6 libjpeg-dev libpng-dev libvulkan1 rsync \
+ tmux unzip vim vulkan-utils wget xvfb pkg-config ffmpeg \
+ libglvnd-dev libgl1-mesa-dev libegl1-mesa-dev libgles2-mesa-dev libglib2.0-0
+RUN rm -rf /var/lib/apt/lists/*
+
+# libvulkan1 libvulkan-dev vulkan-tools xvfb ffmpeg これだけでもいいかも?
+
+RUN wget -qO- https://astral.sh/uv/install.sh | sh
+
+# https://github.com/haosulab/ManiSkill/issues/9
+COPY docker/10_nvidia.json /usr/share/glvnd/egl_vendor.d/10_nvidia.json
+COPY docker/nvidia_icd.json /usr/share/vulkan/icd.d/nvidia_icd.json
+COPY docker/nvidia_layers.json /etc/vulkan/implicit_layer.d/nvidia_layers.json
+
+# install dependencies
+# RUN conda install -c conda-forge libgl glib libvulkan-loader vulkan-tools vulkan-headers
+# RUN pip install uv
+
+# docker build -t simpler_env .
\ No newline at end of file
diff --git a/docker/nvidia_icd.json b/docker/nvidia_icd.json
new file mode 100644
index 00000000..dbd5a51a
--- /dev/null
+++ b/docker/nvidia_icd.json
@@ -0,0 +1,7 @@
+{
+ "file_format_version": "1.0.0",
+ "ICD": {
+ "library_path": "libGLX_nvidia.so.0",
+ "api_version": "1.2.155"
+ }
+}
\ No newline at end of file
diff --git a/docker/nvidia_layers.json b/docker/nvidia_layers.json
new file mode 100644
index 00000000..baaded95
--- /dev/null
+++ b/docker/nvidia_layers.json
@@ -0,0 +1,21 @@
+{
+ "file_format_version": "1.0.0",
+ "layer": {
+ "name": "VK_LAYER_NV_optimus",
+ "type": "INSTANCE",
+ "library_path": "libGLX_nvidia.so.0",
+ "api_version": "1.2.155",
+ "implementation_version": "1",
+ "description": "NVIDIA Optimus layer",
+ "functions": {
+ "vkGetInstanceProcAddr": "vk_optimusGetInstanceProcAddr",
+ "vkGetDeviceProcAddr": "vk_optimusGetDeviceProcAddr"
+ },
+ "enable_environment": {
+ "__NV_PRIME_RENDER_OFFLOAD": "1"
+ },
+ "disable_environment": {
+ "DISABLE_LAYER_NV_OPTIMUS_1": ""
+ }
+ }
+}
\ No newline at end of file
diff --git a/policies/lerobotpi/README.md b/policies/lerobotpi/README.md
new file mode 100644
index 00000000..153db02a
--- /dev/null
+++ b/policies/lerobotpi/README.md
@@ -0,0 +1,18 @@
+
+```bash
+uv venv -p 3.10 policies/lerobotpi/.venv
+
+
+# git clone https://github.com/huggingface/lerobot.git
+
+# uv pip install -e .
+source $(pwd)/policies/lerobotpi/.venv/bin/activate
+uv pip install torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 --index-url https://download.pytorch.org/whl/cu124
+uv pip install lerobot[pi0]==0.3.2
+uv pip install -e .
+uv pip install pytest
+
+huggingface-cli login
+bash scripts/run_lerobotpifast.sh
+
+```
\ No newline at end of file
diff --git a/policies/tes.py b/policies/tes.py
new file mode 100644
index 00000000..8fcea7b6
--- /dev/null
+++ b/policies/tes.py
@@ -0,0 +1,3 @@
+import torch
+
+print(torch.cuda.is_available())
diff --git a/pyproject.toml b/pyproject.toml
index 1e2b786d..d2b64bfd 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,3 +1,18 @@
+[project]
+name = "simpler_env"
+version = "0.0.1"
+description = "SimplerEnv"
+readme = "README.md"
+requires-python = ">=3.10,<3.12"
+dependencies = [
+ "mani_skill2_real2sim",
+ "numpy<2.0",
+ "tensorflow-cpu",
+ "mediapy",
+ "transforms3d",
+ "matplotlib",
+]
+
[tool.black]
# https://github.com/psf/black
line-length = 120
@@ -9,3 +24,10 @@ profile = "black"
line_length = 120
force_sort_within_sections = "True"
order_by_type = "False"
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[tool.uv.sources]
+mani_skill2_real2sim = { path = "ManiSkill2_real2sim", editable = true }
\ No newline at end of file
diff --git a/scripts/bridge.sh b/scripts/bridge.sh
new file mode 100644
index 00000000..e346f543
--- /dev/null
+++ b/scripts/bridge.sh
@@ -0,0 +1,52 @@
+ckpt_path=$1
+policy_model=$2
+action_ensemble_temp=$3
+logging_dir=$4
+gpu_id=$5
+
+
+scene_name=bridge_table_1_v1
+robot=widowx
+rgb_overlay_path=ManiSkill2_real2sim/data/real_inpainting/bridge_real_eval_1.png
+robot_init_x=0.147
+robot_init_y=0.028
+
+CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path ${ckpt_path} --action-ensemble-temp ${action_ensemble_temp} --logging-dir ${logging_dir} \
+ --robot ${robot} --policy-setup widowx_bridge \
+ --control-freq 5 --sim-freq 500 --max-episode-steps 60 \
+ --env-name PutCarrotOnPlateInScene-v0 --scene-name ${scene_name} \
+ --rgb-overlay-path ${rgb_overlay_path} \
+ --robot-init-x ${robot_init_x} ${robot_init_x} 1 --robot-init-y ${robot_init_y} ${robot_init_y} 1 --obj-variation-mode episode --obj-episode-range 0 24 \
+ --robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 0 0 1;
+
+CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path ${ckpt_path} --action-ensemble-temp ${action_ensemble_temp} --logging-dir ${logging_dir} \
+ --robot ${robot} --policy-setup widowx_bridge \
+ --control-freq 5 --sim-freq 500 --max-episode-steps 60 \
+ --env-name StackGreenCubeOnYellowCubeBakedTexInScene-v0 --scene-name ${scene_name} \
+ --rgb-overlay-path ${rgb_overlay_path} \
+ --robot-init-x ${robot_init_x} ${robot_init_x} 1 --robot-init-y ${robot_init_y} ${robot_init_y} 1 --obj-variation-mode episode --obj-episode-range 0 24 \
+ --robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 0 0 1;
+
+CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path ${ckpt_path} --action-ensemble-temp ${action_ensemble_temp} --logging-dir ${logging_dir} \
+ --robot ${robot} --policy-setup widowx_bridge \
+ --control-freq 5 --sim-freq 500 --max-episode-steps 60 \
+ --env-name PutSpoonOnTableClothInScene-v0 --scene-name ${scene_name} \
+ --rgb-overlay-path ${rgb_overlay_path} \
+ --robot-init-x ${robot_init_x} ${robot_init_x} 1 --robot-init-y ${robot_init_y} ${robot_init_y} 1 --obj-variation-mode episode --obj-episode-range 0 24 \
+ --robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 0 0 1;
+
+
+scene_name=bridge_table_1_v2
+robot=widowx_sink_camera_setup
+rgb_overlay_path=ManiSkill2_real2sim/data/real_inpainting/bridge_sink.png
+robot_init_x=0.127
+robot_init_y=0.06
+
+CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path ${ckpt_path} --action-ensemble-temp ${action_ensemble_temp} --logging-dir ${logging_dir} \
+ --robot ${robot} --policy-setup widowx_bridge \
+ --control-freq 5 --sim-freq 500 --max-episode-steps 120 \
+ --env-name PutEggplantInBasketScene-v0 --scene-name ${scene_name} \
+ --rgb-overlay-path ${rgb_overlay_path} \
+ --robot-init-x ${robot_init_x} ${robot_init_x} 1 --robot-init-y ${robot_init_y} ${robot_init_y} 1 --obj-variation-mode episode --obj-episode-range 0 24 \
+ --robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 0 0 1;
+
diff --git a/scripts/octo_drawer_variant_agg.sh b/scripts/drawer_variant_agg.sh
similarity index 77%
rename from scripts/octo_drawer_variant_agg.sh
rename to scripts/drawer_variant_agg.sh
index a566b369..17113f21 100644
--- a/scripts/octo_drawer_variant_agg.sh
+++ b/scripts/drawer_variant_agg.sh
@@ -1,10 +1,11 @@
# shader_dir=rt means that we turn on ray-tracing rendering; this is quite crucial for the open / close drawer task as policies often rely on shadows to infer depth
+ckpt_path=$1
+policy_model=$2
+action_ensemble_temp=$3
+logging_dir=$4
+gpu_id=$5
-
-
-declare -a policy_models=(
-"octo-base"
-)
+declare -a ckpt_paths=(${ckpt_path})
declare -a env_names=(
OpenTopDrawerCustomInScene-v0
@@ -22,9 +23,9 @@ EXTRA_ARGS="--enable-raytracing"
scene_name=frl_apartment_stage_simple
EvalSim() {
- echo ${policy_model} ${env_name}
+ echo ${ckpt_path} ${env_name}
- python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path None \
+ CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path ${ckpt_path} --action-ensemble-temp ${action_ensemble_temp} --logging-dir ${logging_dir} \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 113 \
--env-name ${env_name} --scene-name ${scene_name} \
@@ -35,7 +36,7 @@ EvalSim() {
}
-for policy_model in "${policy_models[@]}"; do
+for ckpt_path in "${ckpt_paths[@]}"; do
for env_name in "${env_names[@]}"; do
EvalSim
done
@@ -50,7 +51,7 @@ declare -a scene_names=(
)
for scene_name in "${scene_names[@]}"; do
- for policy_model in "${policy_models[@]}"; do
+ for ckpt_path in "${ckpt_paths[@]}"; do
for env_name in "${env_names[@]}"; do
EXTRA_ARGS="--additional-env-build-kwargs shader_dir=rt"
EvalSim
@@ -62,7 +63,7 @@ done
# lightings
scene_name=frl_apartment_stage_simple
-for policy_model in "${policy_models[@]}"; do
+for ckpt_path in "${ckpt_paths[@]}"; do
for env_name in "${env_names[@]}"; do
EXTRA_ARGS="--additional-env-build-kwargs shader_dir=rt light_mode=brighter"
EvalSim
@@ -75,7 +76,7 @@ done
# new cabinets
scene_name=frl_apartment_stage_simple
-for policy_model in "${policy_models[@]}"; do
+for ckpt_path in "${ckpt_paths[@]}"; do
for env_name in "${env_names[@]}"; do
EXTRA_ARGS="--additional-env-build-kwargs shader_dir=rt station_name=mk_station2"
EvalSim
diff --git a/scripts/drawer_visual_matching.sh b/scripts/drawer_visual_matching.sh
new file mode 100644
index 00000000..93869004
--- /dev/null
+++ b/scripts/drawer_visual_matching.sh
@@ -0,0 +1,135 @@
+# shader_dir=rt means that we turn on ray-tracing rendering; this is quite crucial for the open / close drawer task as policies often rely on shadows to infer depth
+ckpt_path=$1
+policy_model=$2
+action_ensemble_temp=$3
+logging_dir=$4
+gpu_id=$5
+
+declare -a ckpt_paths=($ckpt_path)
+
+declare -a env_names=(
+ OpenTopDrawerCustomInScene-v0
+ OpenMiddleDrawerCustomInScene-v0
+ OpenBottomDrawerCustomInScene-v0
+ CloseTopDrawerCustomInScene-v0
+ CloseMiddleDrawerCustomInScene-v0
+ CloseBottomDrawerCustomInScene-v0
+)
+
+# URDF variations
+declare -a urdf_version_arr=("recolor_cabinet_visual_matching_1" "recolor_tabletop_visual_matching_1" "recolor_tabletop_visual_matching_2" None)
+
+for urdf_version in "${urdf_version_arr[@]}"; do
+
+ EXTRA_ARGS="--enable-raytracing --additional-env-build-kwargs station_name=mk_station_recolor light_mode=simple disable_bad_material=True urdf_version=${urdf_version}"
+
+ EvalOverlay() {
+ # A0
+ echo ${ckpt_path} ${env_name} ${urdf_version}
+ CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path ${ckpt_path} --action-ensemble-temp ${action_ensemble_temp} --logging-dir ${logging_dir} \
+ --robot google_robot_static \
+ --control-freq 3 --sim-freq 513 --max-episode-steps 113 \
+ --env-name ${env_name} --scene-name dummy_drawer \
+ --robot-init-x 0.644 0.644 1 --robot-init-y -0.179 -0.179 1 \
+ --robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 -0.03 -0.03 1 \
+ --obj-init-x-range 0 0 1 --obj-init-y-range 0 0 1 \
+ --rgb-overlay-path ./ManiSkill2_real2sim/data/real_inpainting/open_drawer_a0.png \
+ ${EXTRA_ARGS}
+
+ # A1
+ CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path ${ckpt_path} --action-ensemble-temp ${action_ensemble_temp} --logging-dir ${logging_dir} \
+ --robot google_robot_static \
+ --control-freq 3 --sim-freq 513 --max-episode-steps 113 \
+ --env-name ${env_name} --scene-name dummy_drawer \
+ --robot-init-x 0.765 0.765 1 --robot-init-y -0.182 -0.182 1 \
+ --robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 -0.02 -0.02 1 \
+ --obj-init-x-range 0 0 1 --obj-init-y-range 0 0 1 \
+ --rgb-overlay-path ./ManiSkill2_real2sim/data/real_inpainting/open_drawer_a1.png \
+ ${EXTRA_ARGS}
+
+ # A2
+ CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path ${ckpt_path} --action-ensemble-temp ${action_ensemble_temp} --logging-dir ${logging_dir} \
+ --robot google_robot_static \
+ --control-freq 3 --sim-freq 513 --max-episode-steps 113 \
+ --env-name ${env_name} --scene-name dummy_drawer \
+ --robot-init-x 0.889 0.889 1 --robot-init-y -0.203 -0.203 1 \
+ --robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 -0.06 -0.06 1 \
+ --obj-init-x-range 0 0 1 --obj-init-y-range 0 0 1 \
+ --rgb-overlay-path ./ManiSkill2_real2sim/data/real_inpainting/open_drawer_a2.png \
+ ${EXTRA_ARGS}
+
+ # B0
+ CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path ${ckpt_path} --action-ensemble-temp ${action_ensemble_temp} --logging-dir ${logging_dir} \
+ --robot google_robot_static \
+ --control-freq 3 --sim-freq 513 --max-episode-steps 113 \
+ --env-name ${env_name} --scene-name dummy_drawer \
+ --robot-init-x 0.652 0.652 1 --robot-init-y 0.009 0.009 1 \
+ --robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 0 0 1 \
+ --obj-init-x-range 0 0 1 --obj-init-y-range 0 0 1 \
+ --rgb-overlay-path ./ManiSkill2_real2sim/data/real_inpainting/open_drawer_b0.png \
+ ${EXTRA_ARGS}
+
+ # B1
+ CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path ${ckpt_path} --action-ensemble-temp ${action_ensemble_temp} --logging-dir ${logging_dir} \
+ --robot google_robot_static \
+ --control-freq 3 --sim-freq 513 --max-episode-steps 113 \
+ --env-name ${env_name} --scene-name dummy_drawer \
+ --robot-init-x 0.752 0.752 1 --robot-init-y 0.009 0.009 1 \
+ --robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 0 0 1 \
+ --obj-init-x-range 0 0 1 --obj-init-y-range 0 0 1 \
+ --rgb-overlay-path ./ManiSkill2_real2sim/data/real_inpainting/open_drawer_b1.png \
+ ${EXTRA_ARGS}
+
+ # B2
+ CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path ${ckpt_path} --action-ensemble-temp ${action_ensemble_temp} --logging-dir ${logging_dir} \
+ --robot google_robot_static \
+ --control-freq 3 --sim-freq 513 --max-episode-steps 113 \
+ --env-name ${env_name} --scene-name dummy_drawer \
+ --robot-init-x 0.851 0.851 1 --robot-init-y 0.035 0.035 1 \
+ --robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 0 0 1 \
+ --obj-init-x-range 0 0 1 --obj-init-y-range 0 0 1 \
+ --rgb-overlay-path ./ManiSkill2_real2sim/data/real_inpainting/open_drawer_b2.png \
+ ${EXTRA_ARGS}
+
+ # C0
+ CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path ${ckpt_path} --action-ensemble-temp ${action_ensemble_temp} --logging-dir ${logging_dir} \
+ --robot google_robot_static \
+ --control-freq 3 --sim-freq 513 --max-episode-steps 113 \
+ --env-name ${env_name} --scene-name dummy_drawer \
+ --robot-init-x 0.665 0.665 1 --robot-init-y 0.224 0.224 1 \
+ --robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 0 0 1 \
+ --obj-init-x-range 0 0 1 --obj-init-y-range 0 0 1 \
+ --rgb-overlay-path ./ManiSkill2_real2sim/data/real_inpainting/open_drawer_c0.png \
+ ${EXTRA_ARGS}
+
+ # C1
+ CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path ${ckpt_path} --action-ensemble-temp ${action_ensemble_temp} --logging-dir ${logging_dir} \
+ --robot google_robot_static \
+ --control-freq 3 --sim-freq 513 --max-episode-steps 113 \
+ --env-name ${env_name} --scene-name dummy_drawer \
+ --robot-init-x 0.765 0.765 1 --robot-init-y 0.222 0.222 1 \
+ --robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 -0.025 -0.025 1 \
+ --obj-init-x-range 0 0 1 --obj-init-y-range 0 0 1 \
+ --rgb-overlay-path ./ManiSkill2_real2sim/data/real_inpainting/open_drawer_c1.png \
+ ${EXTRA_ARGS}
+
+ # C2
+ CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path ${ckpt_path} --action-ensemble-temp ${action_ensemble_temp} --logging-dir ${logging_dir} \
+ --robot google_robot_static \
+ --control-freq 3 --sim-freq 513 --max-episode-steps 113 \
+ --env-name ${env_name} --scene-name dummy_drawer \
+ --robot-init-x 0.865 0.865 1 --robot-init-y 0.222 0.222 1 \
+ --robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 -0.025 -0.025 1 \
+ --obj-init-x-range 0 0 1 --obj-init-y-range 0 0 1 \
+ --rgb-overlay-path ./ManiSkill2_real2sim/data/real_inpainting/open_drawer_c2.png \
+ ${EXTRA_ARGS}
+ }
+
+
+ for ckpt_path in "${ckpt_paths[@]}"; do
+ for env_name in "${env_names[@]}"; do
+ EvalOverlay
+ done
+ done
+
+done
diff --git a/scripts/misc/octo_drawer_variant_agg_alt_urdf.sh b/scripts/misc/octo_drawer_variant_agg_alt_urdf.sh
deleted file mode 100644
index 9440be5d..00000000
--- a/scripts/misc/octo_drawer_variant_agg_alt_urdf.sh
+++ /dev/null
@@ -1,87 +0,0 @@
-# shader_dir=rt means that we turn on ray-tracing rendering; this is quite crucial for the open / close drawer task as policies often rely on shadows to infer depth
-
-
-
-declare -a policy_models=(
-"octo-base"
-)
-
-declare -a env_names=(
-OpenTopDrawerCustomInScene-v0
-OpenMiddleDrawerCustomInScene-v0
-OpenBottomDrawerCustomInScene-v0
-CloseTopDrawerCustomInScene-v0
-CloseMiddleDrawerCustomInScene-v0
-CloseBottomDrawerCustomInScene-v0
-)
-
-urdf_version=recolor_sim
-
-EXTRA_ARGS="--enable-raytracing --additional-env-build-kwargs urdf_version=${urdf_version}"
-
-
-# base setup
-scene_name=frl_apartment_stage_simple
-
-EvalSim() {
- echo ${policy_model} ${env_name}
-
- python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path None \
- --robot google_robot_static \
- --control-freq 3 --sim-freq 513 --max-episode-steps 113 \
- --env-name ${env_name} --scene-name ${scene_name} \
- --robot-init-x 0.65 0.85 3 --robot-init-y -0.2 0.2 3 \
- --robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 0.0 0.0 1 \
- --obj-init-x-range 0 0 1 --obj-init-y-range 0 0 1 \
- ${EXTRA_ARGS}
-}
-
-
-for policy_model in "${policy_models[@]}"; do
- for env_name in "${env_names[@]}"; do
- EvalSim
- done
-done
-
-
-# backgrounds
-
-declare -a scene_names=(
-"modern_bedroom_no_roof"
-"modern_office_no_roof"
-)
-
-for scene_name in "${scene_names[@]}"; do
- for policy_model in "${policy_models[@]}"; do
- for env_name in "${env_names[@]}"; do
- EXTRA_ARGS="--additional-env-build-kwargs shader_dir=rt urdf_version=${urdf_version}"
- EvalSim
- done
- done
-done
-
-
-# lightings
-scene_name=frl_apartment_stage_simple
-
-for policy_model in "${policy_models[@]}"; do
- for env_name in "${env_names[@]}"; do
- EXTRA_ARGS="--additional-env-build-kwargs shader_dir=rt urdf_version=${urdf_version} light_mode=brighter"
- EvalSim
- EXTRA_ARGS="--additional-env-build-kwargs shader_dir=rt urdf_version=${urdf_version} light_mode=darker"
- EvalSim
- done
-done
-
-
-# new cabinets
-scene_name=frl_apartment_stage_simple
-
-for policy_model in "${policy_models[@]}"; do
- for env_name in "${env_names[@]}"; do
- EXTRA_ARGS="--additional-env-build-kwargs shader_dir=rt urdf_version=${urdf_version} station_name=mk_station2"
- EvalSim
- EXTRA_ARGS="--additional-env-build-kwargs shader_dir=rt urdf_version=${urdf_version} station_name=mk_station3"
- EvalSim
- done
-done
diff --git a/scripts/misc/octo_move_near_variant_agg_alt_urdf.sh b/scripts/misc/octo_move_near_variant_agg_alt_urdf.sh
deleted file mode 100644
index 11c1aa09..00000000
--- a/scripts/misc/octo_move_near_variant_agg_alt_urdf.sh
+++ /dev/null
@@ -1,144 +0,0 @@
-
-gpu_id=0
-
-declare -a arr=("octo-base")
-urdf_version=recolor_sim
-
-for policy_model in "${arr[@]}"; do echo "$policy_model"; done
-
-
-# base setup
-
-env_name=MoveNearGoogleInScene-v0
-scene_name=google_pick_coke_can_1_v4
-
-for policy_model in "${arr[@]}";
-
-do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path None \
- --robot google_robot_static \
- --control-freq 3 --sim-freq 513 --max-episode-steps 80 \
- --env-name ${env_name} --scene-name ${scene_name} \
- --robot-init-x 0.35 0.35 1 --robot-init-y 0.21 0.21 1 --obj-variation-mode episode --obj-episode-range 0 60 \
- --robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 -0.09 -0.09 1 \
- --additional-env-build-kwargs urdf_version=${urdf_version};
-
-done
-
-
-
-# distractor
-
-for policy_model in "${arr[@]}";
-
-do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path None \
- --robot google_robot_static \
- --control-freq 3 --sim-freq 513 --max-episode-steps 80 \
- --env-name ${env_name} --scene-name ${scene_name} \
- --robot-init-x 0.35 0.35 1 --robot-init-y 0.21 0.21 1 --obj-variation-mode episode --obj-episode-range 0 60 \
- --robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 -0.09 -0.09 1 \
- --additional-env-build-kwargs no_distractor=True urdf_version=${urdf_version};
-
-done
-
-
-# backgrounds
-
-env_name=MoveNearGoogleInScene-v0
-declare -a scene_arr=("google_pick_coke_can_1_v4_alt_background" \
- "google_pick_coke_can_1_v4_alt_background_2")
-
-for scene_name in "${scene_arr[@]}";
-
-do for policy_model in "${arr[@]}";
-
-do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path None \
- --robot google_robot_static \
- --control-freq 3 --sim-freq 513 --max-episode-steps 80 \
- --env-name ${env_name} --scene-name ${scene_name} \
- --robot-init-x 0.35 0.35 1 --robot-init-y 0.21 0.21 1 --obj-variation-mode episode --obj-episode-range 0 60 \
- --robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 -0.09 -0.09 1 \
- --additional-env-build-kwargs urdf_version=${urdf_version};
-
-done
-
-done
-
-
-
-
-
-# lighting
-
-env_name=MoveNearGoogleInScene-v0
-scene_name=google_pick_coke_can_1_v4
-
-for policy_model in "${arr[@]}";
-
-do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path None \
- --robot google_robot_static \
- --control-freq 3 --sim-freq 513 --max-episode-steps 80 \
- --env-name ${env_name} --scene-name ${scene_name} \
- --robot-init-x 0.35 0.35 1 --robot-init-y 0.21 0.21 1 --obj-variation-mode episode --obj-episode-range 0 60 \
- --robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 -0.09 -0.09 1 \
- --additional-env-build-kwargs slightly_darker_lighting=True urdf_version=${urdf_version};
-
-CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path None \
- --robot google_robot_static \
- --control-freq 3 --sim-freq 513 --max-episode-steps 80 \
- --env-name ${env_name} --scene-name ${scene_name} \
- --robot-init-x 0.35 0.35 1 --robot-init-y 0.21 0.21 1 --obj-variation-mode episode --obj-episode-range 0 60 \
- --robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 -0.09 -0.09 1 \
- --additional-env-build-kwargs slightly_brighter_lighting=True urdf_version=${urdf_version};
-
-done
-
-
-
-
-
-# table textures
-
-env_name=MoveNearGoogleInScene-v0
-declare -a scene_arr=("Baked_sc1_staging_objaverse_cabinet1_h870" \
- "Baked_sc1_staging_objaverse_cabinet2_h870")
-
-for scene_name in "${scene_arr[@]}";
-
-do for policy_model in "${arr[@]}";
-
-do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path None \
- --robot google_robot_static \
- --control-freq 3 --sim-freq 513 --max-episode-steps 80 \
- --env-name ${env_name} --scene-name ${scene_name} \
- --robot-init-x 0.35 0.35 1 --robot-init-y 0.21 0.21 1 --obj-variation-mode episode --obj-episode-range 0 60 \
- --robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 -0.09 -0.09 1 \
- --additional-env-build-kwargs slightly_brighter_lighting=True urdf_version=${urdf_version};
-
-done
-
-done
-
-
-
-
-# camera orientations
-
-declare -a env_arr=("MoveNearAltGoogleCameraInScene-v0" \
- "MoveNearAltGoogleCamera2InScene-v0")
-scene_name=google_pick_coke_can_1_v4
-
-for env_name in "${env_arr[@]}";
-
-do for policy_model in "${arr[@]}";
-
-do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path None \
- --robot google_robot_static \
- --control-freq 3 --sim-freq 513 --max-episode-steps 80 \
- --env-name ${env_name} --scene-name ${scene_name} \
- --robot-init-x 0.35 0.35 1 --robot-init-y 0.21 0.21 1 --obj-variation-mode episode --obj-episode-range 0 60 \
- --robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 -0.09 -0.09 1 \
- --additional-env-build-kwargs slightly_brighter_lighting=True urdf_version=${urdf_version};
-
-done
-
-done
diff --git a/scripts/misc/octo_pick_coke_can_variant_agg_alt_urdf.sh b/scripts/misc/octo_pick_coke_can_variant_agg_alt_urdf.sh
deleted file mode 100644
index 5d7b3dc5..00000000
--- a/scripts/misc/octo_pick_coke_can_variant_agg_alt_urdf.sh
+++ /dev/null
@@ -1,183 +0,0 @@
-
-gpu_id=0
-
-declare -a arr=("octo-base")
-
-# lr_switch=laying horizontally but flipped left-right to match real eval; upright=standing; laid_vertically=laying vertically
-declare -a coke_can_options_arr=("lr_switch=True" "upright=True" "laid_vertically=True")
-urdf_version=recolor_sim
-# recolor_sim
-
-for policy_model in "${arr[@]}"; do echo "$policy_model"; done
-
-
-# base setup
-
-env_name=GraspSingleOpenedCokeCanInScene-v0
-scene_name=google_pick_coke_can_1_v4
-
-for coke_can_option in "${coke_can_options_arr[@]}";
-
-do for policy_model in "${arr[@]}";
-
-do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path None \
- --robot google_robot_static \
- --control-freq 3 --sim-freq 513 --max-episode-steps 80 \
- --env-name ${env_name} --scene-name ${scene_name} \
- --robot-init-x 0.35 0.35 1 --robot-init-y 0.20 0.20 1 --obj-init-x -0.35 -0.12 5 --obj-init-y -0.02 0.42 5 \
- --robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 0 0 1 \
- --additional-env-build-kwargs ${coke_can_option} urdf_version=${urdf_version};
-
-done
-
-done
-
-
-
-# table textures
-
-env_name=GraspSingleOpenedCokeCanInScene-v0
-
-declare -a scene_arr=("Baked_sc1_staging_objaverse_cabinet1_h870" \
- "Baked_sc1_staging_objaverse_cabinet2_h870")
-
-for coke_can_option in "${coke_can_options_arr[@]}";
-
-do for scene_name in "${scene_arr[@]}";
-
-do for policy_model in "${arr[@]}";
-
-do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path None \
- --robot google_robot_static \
- --control-freq 3 --sim-freq 513 --max-episode-steps 80 \
- --env-name ${env_name} --scene-name ${scene_name} \
- --robot-init-x 0.35 0.35 1 --robot-init-y 0.20 0.20 1 --obj-init-x -0.35 -0.12 5 --obj-init-y -0.02 0.42 5 \
- --robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 0 0 1 \
- --additional-env-build-kwargs ${coke_can_option} urdf_version=${urdf_version};
-
-done
-
-done
-
-done
-
-
-
-
-# distractors
-
-env_name=GraspSingleOpenedCokeCanDistractorInScene-v0
-scene_name=google_pick_coke_can_1_v4
-
-for coke_can_option in "${coke_can_options_arr[@]}";
-
-do for policy_model in "${arr[@]}";
-
-do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path None \
- --robot google_robot_static \
- --control-freq 3 --sim-freq 513 --max-episode-steps 80 \
- --env-name ${env_name} --scene-name ${scene_name} \
- --robot-init-x 0.35 0.35 1 --robot-init-y 0.20 0.20 1 --obj-init-x -0.35 -0.12 5 --obj-init-y -0.02 0.42 5 \
- --robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 0 0 1 \
- --additional-env-build-kwargs ${coke_can_option} urdf_version=${urdf_version};
-
-CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path None \
- --robot google_robot_static \
- --control-freq 3 --sim-freq 513 --max-episode-steps 80 \
- --env-name ${env_name} --scene-name ${scene_name} \
- --robot-init-x 0.35 0.35 1 --robot-init-y 0.20 0.20 1 --obj-init-x -0.35 -0.12 5 --obj-init-y -0.02 0.42 5 \
- --robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 0 0 1 \
- --additional-env-build-kwargs ${coke_can_option} urdf_version=${urdf_version} distractor_config=more;
-
-done
-
-done
-
-
-
-
-# backgrounds
-
-env_name=GraspSingleOpenedCokeCanInScene-v0
-declare -a scene_arr=("google_pick_coke_can_1_v4_alt_background" \
- "google_pick_coke_can_1_v4_alt_background_2")
-
-for coke_can_option in "${coke_can_options_arr[@]}";
-
-do for scene_name in "${scene_arr[@]}";
-
-do for policy_model in "${arr[@]}";
-
-do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path None \
- --robot google_robot_static \
- --control-freq 3 --sim-freq 513 --max-episode-steps 80 \
- --env-name ${env_name} --scene-name ${scene_name} \
- --robot-init-x 0.35 0.35 1 --robot-init-y 0.20 0.20 1 --obj-init-x -0.35 -0.12 5 --obj-init-y -0.02 0.42 5 \
- --robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 0 0 1 \
- --additional-env-build-kwargs ${coke_can_option} urdf_version=${urdf_version};
-
-done
-
-done
-
-done
-
-
-
-# lightings
-
-env_name=GraspSingleOpenedCokeCanInScene-v0
-scene_name=google_pick_coke_can_1_v4
-
-for coke_can_option in "${coke_can_options_arr[@]}";
-
-do for policy_model in "${arr[@]}";
-
-do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path None \
- --robot google_robot_static \
- --control-freq 3 --sim-freq 513 --max-episode-steps 80 \
- --env-name ${env_name} --scene-name ${scene_name} \
- --robot-init-x 0.35 0.35 1 --robot-init-y 0.20 0.20 1 --obj-init-x -0.35 -0.12 5 --obj-init-y -0.02 0.42 5 \
- --robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 0 0 1 \
- --additional-env-build-kwargs ${coke_can_option} urdf_version=${urdf_version} slightly_darker_lighting=True;
-
-CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path None \
- --robot google_robot_static \
- --control-freq 3 --sim-freq 513 --max-episode-steps 80 \
- --env-name ${env_name} --scene-name ${scene_name} \
- --robot-init-x 0.35 0.35 1 --robot-init-y 0.20 0.20 1 --obj-init-x -0.35 -0.12 5 --obj-init-y -0.02 0.42 5 \
- --robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 0 0 1 \
- --additional-env-build-kwargs ${coke_can_option} urdf_version=${urdf_version} slightly_brighter_lighting=True;
-
-done
-
-done
-
-
-
-
-# camera orientations
-
-declare -a env_arr=("GraspSingleOpenedCokeCanAltGoogleCameraInScene-v0" \
- "GraspSingleOpenedCokeCanAltGoogleCamera2InScene-v0")
-scene_name=google_pick_coke_can_1_v4
-
-for coke_can_option in "${coke_can_options_arr[@]}";
-
-do for env_name in "${env_arr[@]}";
-
-do for policy_model in "${arr[@]}";
-
-do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path None \
- --robot google_robot_static \
- --control-freq 3 --sim-freq 513 --max-episode-steps 80 \
- --env-name ${env_name} --scene-name ${scene_name} \
- --robot-init-x 0.35 0.35 1 --robot-init-y 0.20 0.20 1 --obj-init-x -0.35 -0.12 5 --obj-init-y -0.02 0.42 5 \
- --robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 0 0 1 \
- --additional-env-build-kwargs ${coke_can_option} urdf_version=${urdf_version};
-
-done
-
-done
-
-done
diff --git a/scripts/octo_move_near_variant_agg.sh b/scripts/move_near_variant_agg.sh
similarity index 74%
rename from scripts/octo_move_near_variant_agg.sh
rename to scripts/move_near_variant_agg.sh
index 43733968..d4430537 100644
--- a/scripts/octo_move_near_variant_agg.sh
+++ b/scripts/move_near_variant_agg.sh
@@ -1,9 +1,12 @@
+ckpt_path=$1
+policy_model=$2
+action_ensemble_temp=$3
+logging_dir=$4
+gpu_id=$5
-gpu_id=0
+declare -a arr=("${ckpt_path}")
-declare -a arr=("octo-base")
-
-for policy_model in "${arr[@]}"; do echo "$policy_model"; done
+for ckpt_path in "${arr[@]}"; do echo "$ckpt_path"; done
# base setup
@@ -11,9 +14,9 @@ for policy_model in "${arr[@]}"; do echo "$policy_model"; done
env_name=MoveNearGoogleInScene-v0
scene_name=google_pick_coke_can_1_v4
-for policy_model in "${arr[@]}";
+for ckpt_path in "${arr[@]}";
-do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path None \
+do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path ${ckpt_path} --action-ensemble-temp ${action_ensemble_temp} --logging-dir ${logging_dir} \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 80 \
--env-name ${env_name} --scene-name ${scene_name} \
@@ -26,9 +29,9 @@ done
# distractor
-for policy_model in "${arr[@]}";
+for ckpt_path in "${arr[@]}";
-do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path None \
+do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path ${ckpt_path} --action-ensemble-temp ${action_ensemble_temp} --logging-dir ${logging_dir} \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 80 \
--env-name ${env_name} --scene-name ${scene_name} \
@@ -47,9 +50,9 @@ declare -a scene_arr=("google_pick_coke_can_1_v4_alt_background" \
for scene_name in "${scene_arr[@]}";
-do for policy_model in "${arr[@]}";
+do for ckpt_path in "${arr[@]}";
-do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path None \
+do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path ${ckpt_path} --action-ensemble-temp ${action_ensemble_temp} --logging-dir ${logging_dir} \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 80 \
--env-name ${env_name} --scene-name ${scene_name} \
@@ -69,9 +72,9 @@ done
env_name=MoveNearGoogleInScene-v0
scene_name=google_pick_coke_can_1_v4
-for policy_model in "${arr[@]}";
+for ckpt_path in "${arr[@]}";
-do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path None \
+do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path ${ckpt_path} --action-ensemble-temp ${action_ensemble_temp} --logging-dir ${logging_dir} \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 80 \
--env-name ${env_name} --scene-name ${scene_name} \
@@ -79,7 +82,7 @@ do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-
--robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 -0.09 -0.09 1 \
--additional-env-build-kwargs slightly_darker_lighting=True;
-CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path None \
+CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path ${ckpt_path} --action-ensemble-temp ${action_ensemble_temp} --logging-dir ${logging_dir} \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 80 \
--env-name ${env_name} --scene-name ${scene_name} \
@@ -101,9 +104,9 @@ declare -a scene_arr=("Baked_sc1_staging_objaverse_cabinet1_h870" \
for scene_name in "${scene_arr[@]}";
-do for policy_model in "${arr[@]}";
+do for ckpt_path in "${arr[@]}";
-do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path None \
+do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path ${ckpt_path} --action-ensemble-temp ${action_ensemble_temp} --logging-dir ${logging_dir} \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 80 \
--env-name ${env_name} --scene-name ${scene_name} \
@@ -125,9 +128,9 @@ scene_name=google_pick_coke_can_1_v4
for env_name in "${env_arr[@]}";
-do for policy_model in "${arr[@]}";
+do for ckpt_path in "${arr[@]}";
-do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path None \
+do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path ${ckpt_path} --action-ensemble-temp ${action_ensemble_temp} --logging-dir ${logging_dir} \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 80 \
--env-name ${env_name} --scene-name ${scene_name} \
diff --git a/scripts/move_near_visual_matching.sh b/scripts/move_near_visual_matching.sh
new file mode 100644
index 00000000..d14a2e0e
--- /dev/null
+++ b/scripts/move_near_visual_matching.sh
@@ -0,0 +1,36 @@
+ckpt_path=$1
+policy_model=$2
+action_ensemble_temp=$3
+logging_dir=$4
+gpu_id=$5
+
+declare -a arr=($ckpt_path)
+
+env_name=MoveNearGoogleBakedTexInScene-v0
+# env_name=MoveNearGoogleBakedTexInScene-v1
+scene_name=google_pick_coke_can_1_v4
+rgb_overlay_path=./ManiSkill2_real2sim/data/real_inpainting/google_move_near_real_eval_1.png
+
+# URDF variations
+declare -a urdf_version_arr=(None "recolor_tabletop_visual_matching_1" "recolor_tabletop_visual_matching_2" "recolor_cabinet_visual_matching_1")
+
+for ckpt_path in "${arr[@]}"; do echo "$ckpt_path"; done
+
+
+for urdf_version in "${urdf_version_arr[@]}";
+
+do for ckpt_path in "${arr[@]}";
+
+do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path ${ckpt_path} --action-ensemble-temp ${action_ensemble_temp} --logging-dir ${logging_dir} \
+ --robot google_robot_static \
+ --control-freq 3 --sim-freq 513 --max-episode-steps 80 \
+ --env-name ${env_name} --scene-name ${scene_name} \
+ --rgb-overlay-path ${rgb_overlay_path} \
+ --robot-init-x 0.35 0.35 1 --robot-init-y 0.21 0.21 1 --obj-variation-mode episode --obj-episode-range 0 60 \
+ --robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 -0.09 -0.09 1 \
+ --additional-env-build-kwargs urdf_version=${urdf_version} \
+ --additional-env-save-tags baked_except_bpb_orange;
+
+done
+
+done
diff --git a/scripts/octo_drawer_visual_matching.sh b/scripts/octo_drawer_visual_matching.sh
deleted file mode 100644
index 41e8c893..00000000
--- a/scripts/octo_drawer_visual_matching.sh
+++ /dev/null
@@ -1,135 +0,0 @@
-# shader_dir=rt means that we turn on ray-tracing rendering; this is quite crucial for the open / close drawer task as policies often rely on shadows to infer depth
-
-
-
-declare -a policy_models=(
-"octo-base"
-# "octo-server"
-)
-
-declare -a env_names=(
-OpenTopDrawerCustomInScene-v0
-OpenMiddleDrawerCustomInScene-v0
-OpenBottomDrawerCustomInScene-v0
-CloseTopDrawerCustomInScene-v0
-CloseMiddleDrawerCustomInScene-v0
-CloseBottomDrawerCustomInScene-v0
-)
-
-# URDF variations
-declare -a urdf_version_arr=("recolor_cabinet_visual_matching_1" "recolor_tabletop_visual_matching_1" "recolor_tabletop_visual_matching_2" None)
-
-for urdf_version in "${urdf_version_arr[@]}"; do
-
-EXTRA_ARGS="--enable-raytracing --additional-env-build-kwargs station_name=mk_station_recolor light_mode=simple disable_bad_material=True urdf_version=${urdf_version}"
-
-EvalOverlay() {
-# A0
-python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path None \
- --robot google_robot_static \
- --control-freq 3 --sim-freq 513 --max-episode-steps 113 \
- --env-name ${env_name} --scene-name dummy_drawer \
- --robot-init-x 0.644 0.644 1 --robot-init-y -0.179 -0.179 1 \
- --robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 -0.03 -0.03 1 \
- --obj-init-x-range 0 0 1 --obj-init-y-range 0 0 1 \
- --rgb-overlay-path ./ManiSkill2_real2sim/data/real_inpainting/open_drawer_a0.png \
- ${EXTRA_ARGS}
-
-# A1
-python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path None \
- --robot google_robot_static \
- --control-freq 3 --sim-freq 513 --max-episode-steps 113 \
- --env-name ${env_name} --scene-name dummy_drawer \
- --robot-init-x 0.765 0.765 1 --robot-init-y -0.182 -0.182 1 \
- --robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 -0.02 -0.02 1 \
- --obj-init-x-range 0 0 1 --obj-init-y-range 0 0 1 \
- --rgb-overlay-path ./ManiSkill2_real2sim/data/real_inpainting/open_drawer_a1.png \
- ${EXTRA_ARGS}
-
-# A2
-python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path None \
- --robot google_robot_static \
- --control-freq 3 --sim-freq 513 --max-episode-steps 113 \
- --env-name ${env_name} --scene-name dummy_drawer \
- --robot-init-x 0.889 0.889 1 --robot-init-y -0.203 -0.203 1 \
- --robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 -0.06 -0.06 1 \
- --obj-init-x-range 0 0 1 --obj-init-y-range 0 0 1 \
- --rgb-overlay-path ./ManiSkill2_real2sim/data/real_inpainting/open_drawer_a2.png \
- ${EXTRA_ARGS}
-
-# B0
-python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path None \
- --robot google_robot_static \
- --control-freq 3 --sim-freq 513 --max-episode-steps 113 \
- --env-name ${env_name} --scene-name dummy_drawer \
- --robot-init-x 0.652 0.652 1 --robot-init-y 0.009 0.009 1 \
- --robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 0 0 1 \
- --obj-init-x-range 0 0 1 --obj-init-y-range 0 0 1 \
- --rgb-overlay-path ./ManiSkill2_real2sim/data/real_inpainting/open_drawer_b0.png \
- ${EXTRA_ARGS}
-
-# B1
-python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path None \
- --robot google_robot_static \
- --control-freq 3 --sim-freq 513 --max-episode-steps 113 \
- --env-name ${env_name} --scene-name dummy_drawer \
- --robot-init-x 0.752 0.752 1 --robot-init-y 0.009 0.009 1 \
- --robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 0 0 1 \
- --obj-init-x-range 0 0 1 --obj-init-y-range 0 0 1 \
- --rgb-overlay-path ./ManiSkill2_real2sim/data/real_inpainting/open_drawer_b1.png \
- ${EXTRA_ARGS}
-
-# B2
-python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path None \
- --robot google_robot_static \
- --control-freq 3 --sim-freq 513 --max-episode-steps 113 \
- --env-name ${env_name} --scene-name dummy_drawer \
- --robot-init-x 0.851 0.851 1 --robot-init-y 0.035 0.035 1 \
- --robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 0 0 1 \
- --obj-init-x-range 0 0 1 --obj-init-y-range 0 0 1 \
- --rgb-overlay-path ./ManiSkill2_real2sim/data/real_inpainting/open_drawer_b2.png \
- ${EXTRA_ARGS}
-
-# C0
-python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path None \
- --robot google_robot_static \
- --control-freq 3 --sim-freq 513 --max-episode-steps 113 \
- --env-name ${env_name} --scene-name dummy_drawer \
- --robot-init-x 0.665 0.665 1 --robot-init-y 0.224 0.224 1 \
- --robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 0 0 1 \
- --obj-init-x-range 0 0 1 --obj-init-y-range 0 0 1 \
- --rgb-overlay-path ./ManiSkill2_real2sim/data/real_inpainting/open_drawer_c0.png \
- ${EXTRA_ARGS}
-
-# C1
-python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path None \
- --robot google_robot_static \
- --control-freq 3 --sim-freq 513 --max-episode-steps 113 \
- --env-name ${env_name} --scene-name dummy_drawer \
- --robot-init-x 0.765 0.765 1 --robot-init-y 0.222 0.222 1 \
- --robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 -0.025 -0.025 1 \
- --obj-init-x-range 0 0 1 --obj-init-y-range 0 0 1 \
- --rgb-overlay-path ./ManiSkill2_real2sim/data/real_inpainting/open_drawer_c1.png \
- ${EXTRA_ARGS}
-
-# C2
-python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path None \
- --robot google_robot_static \
- --control-freq 3 --sim-freq 513 --max-episode-steps 113 \
- --env-name ${env_name} --scene-name dummy_drawer \
- --robot-init-x 0.865 0.865 1 --robot-init-y 0.222 0.222 1 \
- --robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 -0.025 -0.025 1 \
- --obj-init-x-range 0 0 1 --obj-init-y-range 0 0 1 \
- --rgb-overlay-path ./ManiSkill2_real2sim/data/real_inpainting/open_drawer_c2.png \
- ${EXTRA_ARGS}
-}
-
-
-for policy_model in "${policy_models[@]}"; do
- for env_name in "${env_names[@]}"; do
- EvalOverlay
- done
-done
-
-
-done
diff --git a/scripts/octo_move_near_visual_matching.sh b/scripts/octo_move_near_visual_matching.sh
deleted file mode 100644
index dd89caee..00000000
--- a/scripts/octo_move_near_visual_matching.sh
+++ /dev/null
@@ -1,48 +0,0 @@
-
-
-gpu_id=0
-
-declare -a policy_models=(
- "octo-base"
- # "octo-server"
-)
-
-env_name=MoveNearGoogleBakedTexInScene-v0
-# env_name=MoveNearGoogleBakedTexInScene-v1
-scene_name=google_pick_coke_can_1_v4
-rgb_overlay_path=./ManiSkill2_real2sim/data/real_inpainting/google_move_near_real_eval_1.png
-# rgb_overlay_path=./ManiSkill2_real2sim/data/real_inpainting/google_move_near_real_eval_2.png
-
-# URDF variations
-declare -a urdf_version_arr=(None "recolor_tabletop_visual_matching_1" "recolor_tabletop_visual_matching_2" "recolor_cabinet_visual_matching_1")
-
-for policy_model in "${policy_models[@]}"; do echo "$policy_model"; done
-
-
-for urdf_version in "${urdf_version_arr[@]}";
-
-do for policy_model in "${policy_models[@]}";
-
-do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path None \
- --robot google_robot_static \
- --control-freq 3 --sim-freq 513 --max-episode-steps 80 \
- --env-name ${env_name} --scene-name ${scene_name} \
- --rgb-overlay-path ${rgb_overlay_path} \
- --robot-init-x 0.35 0.35 1 --robot-init-y 0.21 0.21 1 --obj-variation-mode episode --obj-episode-range 0 60 \
- --robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 -0.09 -0.09 1 \
- --additional-env-build-kwargs urdf_version=${urdf_version} \
- --additional-env-save-tags baked_except_bpb_orange; # google_move_near_real_eval_1.png
-
-# do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path None \
-# --robot google_robot_static \
-# --control-freq 3 --sim-freq 513 --max-episode-steps 80 \
-# --env-name ${env_name} --scene-name ${scene_name} \
-# --rgb-overlay-path ${rgb_overlay_path} \
-# --robot-init-x 0.36 0.36 1 --robot-init-y 0.22 0.22 1 --obj-variation-mode episode --obj-episode-range 0 60 \
-# --robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 -0.028 -0.028 1 \
-# --additional-env-build-kwargs urdf_version=${urdf_version} \
-# --additional-env-save-tags baked_except_bpb_orange; # google_move_near_real_eval_2.png
-
-done
-
-done
diff --git a/scripts/octo_pick_coke_can_variant_agg.sh b/scripts/pick_coke_can_variant_agg.sh
similarity index 78%
rename from scripts/octo_pick_coke_can_variant_agg.sh
rename to scripts/pick_coke_can_variant_agg.sh
index c446b5d1..f5e1427a 100644
--- a/scripts/octo_pick_coke_can_variant_agg.sh
+++ b/scripts/pick_coke_can_variant_agg.sh
@@ -1,12 +1,15 @@
+ckpt_path=$1
+policy_model=$2
+action_ensemble_temp=$3
+logging_dir=$4
+gpu_id=$5
-gpu_id=0
-
-declare -a arr=("octo-base")
+declare -a arr=($ckpt_path)
# lr_switch=laying horizontally but flipped left-right to match real eval; upright=standing; laid_vertically=laying vertically
declare -a coke_can_options_arr=("lr_switch=True" "upright=True" "laid_vertically=True")
-for policy_model in "${arr[@]}"; do echo "$policy_model"; done
+for ckpt_path in "${arr[@]}"; do echo "$ckpt_path"; done
# base setup
@@ -16,9 +19,9 @@ scene_name=google_pick_coke_can_1_v4
for coke_can_option in "${coke_can_options_arr[@]}";
-do for policy_model in "${arr[@]}";
+do for ckpt_path in "${arr[@]}";
-do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path None \
+do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path ${ckpt_path} --action-ensemble-temp ${action_ensemble_temp} --logging-dir ${logging_dir} \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 80 \
--env-name ${env_name} --scene-name ${scene_name} \
@@ -44,9 +47,9 @@ for coke_can_option in "${coke_can_options_arr[@]}";
do for scene_name in "${scene_arr[@]}";
-do for policy_model in "${arr[@]}";
+do for ckpt_path in "${arr[@]}";
-do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path None \
+do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path ${ckpt_path} --action-ensemble-temp ${action_ensemble_temp} --logging-dir ${logging_dir} \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 80 \
--env-name ${env_name} --scene-name ${scene_name} \
@@ -70,9 +73,9 @@ scene_name=google_pick_coke_can_1_v4
for coke_can_option in "${coke_can_options_arr[@]}";
-do for policy_model in "${arr[@]}";
+do for ckpt_path in "${arr[@]}";
-do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path None \
+do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path ${ckpt_path} --action-ensemble-temp ${action_ensemble_temp} --logging-dir ${logging_dir} \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 80 \
--env-name ${env_name} --scene-name ${scene_name} \
@@ -80,7 +83,7 @@ do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-
--robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 0 0 1 \
--additional-env-build-kwargs ${coke_can_option};
-CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path None \
+CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path ${ckpt_path} --action-ensemble-temp ${action_ensemble_temp} --logging-dir ${logging_dir} \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 80 \
--env-name ${env_name} --scene-name ${scene_name} \
@@ -105,9 +108,9 @@ for coke_can_option in "${coke_can_options_arr[@]}";
do for scene_name in "${scene_arr[@]}";
-do for policy_model in "${arr[@]}";
+do for ckpt_path in "${arr[@]}";
-do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path None \
+do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path ${ckpt_path} --action-ensemble-temp ${action_ensemble_temp} --logging-dir ${logging_dir} \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 80 \
--env-name ${env_name} --scene-name ${scene_name} \
@@ -130,9 +133,9 @@ scene_name=google_pick_coke_can_1_v4
for coke_can_option in "${coke_can_options_arr[@]}";
-do for policy_model in "${arr[@]}";
+do for ckpt_path in "${arr[@]}";
-do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path None \
+do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path ${ckpt_path} --action-ensemble-temp ${action_ensemble_temp} --logging-dir ${logging_dir} \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 80 \
--env-name ${env_name} --scene-name ${scene_name} \
@@ -140,7 +143,7 @@ do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-
--robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 0 0 1 \
--additional-env-build-kwargs ${coke_can_option} slightly_darker_lighting=True;
-CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path None \
+CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path ${ckpt_path} --action-ensemble-temp ${action_ensemble_temp} --logging-dir ${logging_dir} \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 80 \
--env-name ${env_name} --scene-name ${scene_name} \
@@ -165,9 +168,9 @@ for coke_can_option in "${coke_can_options_arr[@]}";
do for env_name in "${env_arr[@]}";
-do for policy_model in "${arr[@]}";
+do for ckpt_path in "${arr[@]}";
-do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path None \
+do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path ${ckpt_path} --action-ensemble-temp ${action_ensemble_temp} --logging-dir ${logging_dir} \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 80 \
--env-name ${env_name} --scene-name ${scene_name} \
diff --git a/scripts/octo_pick_coke_can_visual_matching.sh b/scripts/pick_coke_can_visual_matching.sh
similarity index 72%
rename from scripts/octo_pick_coke_can_visual_matching.sh
rename to scripts/pick_coke_can_visual_matching.sh
index c633a339..5fa0451a 100644
--- a/scripts/octo_pick_coke_can_visual_matching.sh
+++ b/scripts/pick_coke_can_visual_matching.sh
@@ -1,12 +1,11 @@
+ckpt_path=$1
+policy_model=$2
+action_ensemble_temp=$3
+logging_dir=$4
+gpu_id=$5
+declare -a arr=($ckpt_path)
-gpu_id=0
-
-declare -a policy_models=(
- "octo-base"
- # "octo-small"
- # "octo-server"
-)
# lr_switch=laying horizontally but flipped left-right to match real eval; upright=standing; laid_vertically=laying vertically
declare -a coke_can_options_arr=("lr_switch=True" "upright=True" "laid_vertically=True")
@@ -18,18 +17,19 @@ env_name=GraspSingleOpenedCokeCanInScene-v0
scene_name=google_pick_coke_can_1_v4
rgb_overlay_path=./ManiSkill2_real2sim/data/real_inpainting/google_coke_can_real_eval_1.png
-for policy_model in "${policy_models[@]}"; do echo "$policy_model"; done
+for ckpt_path in "${arr[@]}"; do echo "$ckpt_path"; done
-for urdf_version in "${urdf_version_arr[@]}";
-do for policy_model in "${policy_models[@]}";
+for urdf_version in "${urdf_version_arr[@]}";
do for coke_can_option in "${coke_can_options_arr[@]}";
-do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path None \
- --robot google_robot_static --policy-setup google_robot \
- --control-freq 3 --sim-freq 501 --max-episode-steps 80 \
+do for ckpt_path in "${arr[@]}";
+
+do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path ${ckpt_path} --action-ensemble-temp ${action_ensemble_temp} --logging-dir ${logging_dir} \
+ --robot google_robot_static \
+ --control-freq 3 --sim-freq 513 --max-episode-steps 80 \
--env-name ${env_name} --scene-name ${scene_name} \
--rgb-overlay-path ${rgb_overlay_path} \
--robot-init-x 0.35 0.35 1 --robot-init-y 0.20 0.20 1 --obj-init-x -0.35 -0.12 5 --obj-init-y -0.02 0.42 5 \
diff --git a/scripts/octo_put_in_drawer_variant_agg.sh b/scripts/put_in_drawer_variant_agg.sh
similarity index 77%
rename from scripts/octo_put_in_drawer_variant_agg.sh
rename to scripts/put_in_drawer_variant_agg.sh
index b833a896..e92d5454 100644
--- a/scripts/octo_put_in_drawer_variant_agg.sh
+++ b/scripts/put_in_drawer_variant_agg.sh
@@ -1,10 +1,11 @@
# shader_dir=rt means that we turn on ray-tracing rendering; this is quite crucial for the open / close drawer task as policies often rely on shadows to infer depth
+ckpt_paths=($1)
+policy_model=$2
+action_ensemble_temp=$3
+logging_dir=$4
+gpu_id=$5
-
-
-declare -a policy_models=(
-"octo-base"
-)
+declare -a arr=($ckpt_path)
declare -a env_names=(
PlaceIntoClosedTopDrawerCustomInScene-v0
@@ -17,9 +18,9 @@ EXTRA_ARGS="--enable-raytracing --additional-env-build-kwargs model_ids=apple"
scene_name=frl_apartment_stage_simple
EvalSim() {
- echo ${policy_model} ${env_name}
+ echo ${ckpt_path} ${env_name}
- python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path None \
+ CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path ${ckpt_path} --action-ensemble-temp ${action_ensemble_temp} --logging-dir ${logging_dir} \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 200 \
--env-name ${env_name} --scene-name ${scene_name} \
@@ -30,7 +31,7 @@ EvalSim() {
}
-for policy_model in "${policy_models[@]}"; do
+for ckpt_path in "${ckpt_paths[@]}"; do
for env_name in "${env_names[@]}"; do
EvalSim
done
@@ -45,7 +46,7 @@ declare -a scene_names=(
)
for scene_name in "${scene_names[@]}"; do
- for policy_model in "${policy_models[@]}"; do
+ for ckpt_path in "${ckpt_paths[@]}"; do
for env_name in "${env_names[@]}"; do
EXTRA_ARGS="--additional-env-build-kwargs shader_dir=rt model_ids=apple"
EvalSim
@@ -57,7 +58,7 @@ done
# lightings
scene_name=frl_apartment_stage_simple
-for policy_model in "${policy_models[@]}"; do
+for ckpt_path in "${ckpt_paths[@]}"; do
for env_name in "${env_names[@]}"; do
EXTRA_ARGS="--additional-env-build-kwargs shader_dir=rt light_mode=brighter model_ids=apple"
EvalSim
@@ -70,7 +71,7 @@ done
# new cabinets
scene_name=frl_apartment_stage_simple
-for policy_model in "${policy_models[@]}"; do
+for ckpt_path in "${ckpt_paths[@]}"; do
for env_name in "${env_names[@]}"; do
EXTRA_ARGS="--additional-env-build-kwargs shader_dir=rt station_name=mk_station2 model_ids=apple"
EvalSim
diff --git a/scripts/octo_put_in_drawer_visual_matching.sh b/scripts/put_in_drawer_visual_matching.sh
similarity index 73%
rename from scripts/octo_put_in_drawer_visual_matching.sh
rename to scripts/put_in_drawer_visual_matching.sh
index f945815c..a42dd4bd 100644
--- a/scripts/octo_put_in_drawer_visual_matching.sh
+++ b/scripts/put_in_drawer_visual_matching.sh
@@ -1,11 +1,11 @@
# shader_dir=rt means that we turn on ray-tracing rendering; this is quite crucial for the open / close drawer task as policies often rely on shadows to infer depth
+ckpt_paths=($1)
+policy_model=$2
+action_ensemble_temp=$3
+logging_dir=$4
+gpu_id=$5
-
-
-declare -a policy_models=(
-"octo-base"
-# "octo-server"
-)
+declare -a arr=($ckpt_path)
declare -a env_names=(
PlaceIntoClosedTopDrawerCustomInScene-v0
@@ -13,6 +13,7 @@ PlaceIntoClosedTopDrawerCustomInScene-v0
# PlaceIntoClosedBottomDrawerCustomInScene-v0
)
+
# URDF variations
declare -a urdf_version_arr=("recolor_cabinet_visual_matching_1" "recolor_tabletop_visual_matching_1" "recolor_tabletop_visual_matching_2" None)
@@ -20,9 +21,10 @@ for urdf_version in "${urdf_version_arr[@]}"; do
EXTRA_ARGS="--enable-raytracing --additional-env-build-kwargs station_name=mk_station_recolor light_mode=simple disable_bad_material=True urdf_version=${urdf_version} model_ids=baked_apple_v2"
+
EvalOverlay() {
# A0
-python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path None \
+CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path ${ckpt_path} --action-ensemble-temp ${action_ensemble_temp} --logging-dir ${logging_dir} \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 200 \
--env-name ${env_name} --scene-name dummy_drawer \
@@ -31,9 +33,9 @@ python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path
--obj-init-x-range -0.08 -0.02 3 --obj-init-y-range -0.02 0.08 3 \
--rgb-overlay-path ./ManiSkill2_real2sim/data/real_inpainting/open_drawer_a0.png \
${EXTRA_ARGS}
-
+echo "Done A0"
# B0
-python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path None \
+CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path ${ckpt_path} --action-ensemble-temp ${action_ensemble_temp} --logging-dir ${logging_dir} \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 200 \
--env-name ${env_name} --scene-name dummy_drawer \
@@ -42,9 +44,9 @@ python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path
--obj-init-x-range -0.08 -0.02 3 --obj-init-y-range -0.02 0.08 3 \
--rgb-overlay-path ./ManiSkill2_real2sim/data/real_inpainting/open_drawer_b0.png \
${EXTRA_ARGS}
-
+echo "Done B0"
# C0
-python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path None \
+CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path ${ckpt_path} --action-ensemble-temp ${action_ensemble_temp} --logging-dir ${logging_dir} \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 200 \
--env-name ${env_name} --scene-name dummy_drawer \
@@ -53,14 +55,16 @@ python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path
--obj-init-x-range -0.08 -0.02 3 --obj-init-y-range -0.02 0.08 3 \
--rgb-overlay-path ./ManiSkill2_real2sim/data/real_inpainting/open_drawer_c0.png \
${EXTRA_ARGS}
+echo "Done C0"
}
-for policy_model in "${policy_models[@]}"; do
+for ckpt_path in "${ckpt_paths[@]}"; do
for env_name in "${env_names[@]}"; do
EvalOverlay
done
done
+
done
diff --git a/scripts/run_cogact.sh b/scripts/run_cogact.sh
new file mode 100644
index 00000000..be4df642
--- /dev/null
+++ b/scripts/run_cogact.sh
@@ -0,0 +1,39 @@
+model_name=cogact
+tasks=(
+ bridge.sh
+ drawer_variant_agg.sh
+ drawer_visual_matching.sh
+ pick_coke_can_variant_agg.sh
+ pick_coke_can_visual_matching.sh
+ move_near_variant_agg.sh
+ move_near_visual_matching.sh
+ put_in_drawer_variant_agg.sh
+ put_in_drawer_visual_matching.sh
+)
+
+ckpts=(
+ CogACT/CogACT-Large
+)
+
+action_ensemble_temp=-0.8
+for ckpt_path in ${ckpts[@]}; do
+ base_dir=$(dirname $ckpt_path)
+
+ # evaluation in simulator
+ # logging_dir=$base_dir/simpler_env/$(basename $ckpt_path)${action_ensemble_temp}
+ logging_dir=results/$(basename $ckpt_path)${action_ensemble_temp}
+ mkdir -p $logging_dir
+ for i in ${!tasks[@]}; do
+ task=${tasks[$i]}
+ echo "🚀 running $task ..."
+ device=0
+ session_name=CUDA${device}-$(basename $logging_dir)-${task}
+ bash scripts/$task $ckpt_path $model_name $action_ensemble_temp $logging_dir $device
+ done
+
+ # statistics evalution results
+ echo "🚀 all tasks DONE! Calculating metrics..."
+ python tools/calc_metrics_evaluation_videos.py \
+ --log-dir-root $logging_dir \
+ >>$logging_dir/total.metrics
+done
diff --git a/scripts/run_gr00t.sh b/scripts/run_gr00t.sh
new file mode 100644
index 00000000..10a9fb78
--- /dev/null
+++ b/scripts/run_gr00t.sh
@@ -0,0 +1,45 @@
+# source scripts/env.sh
+# XDG_RUNTIME_DIR=/usr/lib
+# LD_LIBRARY_PATH="/usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}"
+
+model_name=gr00t
+tasks=(
+ bridge.sh
+
+ # drawer_variant_agg.sh
+ # drawer_visual_matching.sh
+
+ # move_near_variant_agg.sh
+ # move_near_visual_matching.sh
+
+ # pick_coke_can_variant_agg.sh
+ # pick_coke_can_visual_matching.sh
+
+ # put_in_drawer_variant_agg.sh
+ # put_in_drawer_visual_matching.sh
+)
+
+ckpts=(
+ # path to your loal model
+)
+
+action_ensemble_temp=-0.8
+
+for ckpt_path in ${ckpts[@]}; do
+
+ logging_dir=results/$(basename $ckpt_path)${action_ensemble_temp}
+
+ mkdir -p $logging_dir
+ for i in ${!tasks[@]}; do
+ task=${tasks[$i]}
+ echo "🚀 running $task ..."
+ device=0
+ bash scripts/$task $ckpt_path $model_name $action_ensemble_temp $logging_dir $device
+ done
+
+ # statistics evalution results
+ echo "🚀 all tasks DONE! Calculating metrics..."
+ python tools/calc_metrics_evaluation_videos.py \
+ --log-dir-root $logging_dir \
+ >>$logging_dir/total.metrics
+done
diff --git a/scripts/run_lerobotpifast.sh b/scripts/run_lerobotpifast.sh
new file mode 100644
index 00000000..d2afc1ed
--- /dev/null
+++ b/scripts/run_lerobotpifast.sh
@@ -0,0 +1,40 @@
+# XDG_RUNTIME_DIR=/usr/lib
+# LD_LIBRARY_PATH="/usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}"
+MUJOCO_GL="egl"
+
+model_name=lerobotpifast
+
+tasks=(
+ bridge.sh
+ drawer_variant_agg.sh
+ drawer_visual_matching.sh
+ move_near_variant_agg.sh
+ move_near_visual_matching.sh
+ pick_coke_can_variant_agg.sh
+ pick_coke_can_visual_matching.sh
+ put_in_drawer_variant_agg.sh
+ put_in_drawer_visual_matching.sh
+)
+
+ckpts=(
+lerobot/pi0
+)
+
+action_ensemble_temp=-0.8
+
+for ckpt_path in ${ckpts[@]}; do
+
+ logging_dir=results/$(basename $ckpt_path)${action_ensemble_temp}
+ mkdir -p $logging_dir
+ for i in ${!tasks[@]}; do
+ task=${tasks[$i]}
+ cuda=$((i % 8))
+ GPU_IDX=${cuda} bash scripts/$task $ckpt_path $model_name $action_ensemble_temp $logging_dir
+ done
+
+ # statistics evalution results
+ echo "🚀 all tasks DONE! Calculating metrics..."
+ python tools/calc_metrics_evaluation_videos.py \
+ --log-dir-root $logging_dir \
+ >>$logging_dir/total.metrics
+done
diff --git a/scripts/run_octo.sh b/scripts/run_octo.sh
new file mode 100644
index 00000000..bb6116d3
--- /dev/null
+++ b/scripts/run_octo.sh
@@ -0,0 +1,39 @@
+model_name=octo-base
+tasks=(
+ bridge.sh
+ drawer_variant_agg.sh
+ drawer_visual_matching.sh
+ move_near_variant_agg.sh
+ move_near_visual_matching.sh
+ pick_coke_can_variant_agg.sh
+ pick_coke_can_visual_matching.sh
+ put_in_drawer_variant_agg.sh
+ put_in_drawer_visual_matching.sh
+)
+
+ckpts=(
+ None
+)
+
+action_ensemble_temp=-0.8
+for ckpt_path in ${ckpts[@]}; do
+ base_dir=$(dirname $ckpt_path)
+
+ # evaluation in simulator
+ # logging_dir=$base_dir/simpler_env/$(basename $ckpt_path)${action_ensemble_temp}
+ logging_dir=results/$(basename $ckpt_path)${action_ensemble_temp}
+ mkdir -p $logging_dir
+ for i in ${!tasks[@]}; do
+ task=${tasks[$i]}
+ echo "🚀 running $task ..."
+ device=0
+ session_name=CUDA${device}-$(basename $logging_dir)-${task}
+ bash scripts/$task $ckpt_path $model_name $action_ensemble_temp $logging_dir $device
+ done
+
+ # statistics evalution results
+ echo "🚀 all tasks DONE! Calculating metrics..."
+ python tools/calc_metrics_evaluation_videos.py \
+ --log-dir-root $logging_dir \
+ >>$logging_dir/total.metrics
+done
diff --git a/scripts/run_openpifast.sh b/scripts/run_openpifast.sh
new file mode 100644
index 00000000..416ba62a
--- /dev/null
+++ b/scripts/run_openpifast.sh
@@ -0,0 +1,48 @@
+XDG_RUNTIME_DIR=/usr/lib
+LD_LIBRARY_PATH="/usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH}"
+
+model_name=openpifast
+
+tasks=(
+ bridge.sh
+ drawer_variant_agg.sh
+ drawer_visual_matching.sh
+ move_near_variant_agg.sh
+ move_near_visual_matching.sh
+ pick_coke_can_variant_agg.sh
+ pick_coke_can_visual_matching.sh
+ put_in_drawer_variant_agg.sh
+ put_in_drawer_visual_matching.sh
+)
+
+ckpts=(
+
+)
+
+
+# to start server, please refe to https://github.com/Physical-Intelligence/openpi
+# e.g.,
+# cd /cpfs01/openpi
+# uv run scripts/serve_policy.py policy:checkpoint \
+# --policy.config=pi0_fast_bridge_lora_pt_tokenizer \
+# --policy.dir=/cpfs01/openpi/checkpoints/pi0_fast_bridge_lora_pt_tokenizer/pi0_fast_bridge_lora_pt_tokenizer/29999
+
+action_ensemble_temp=-0.8
+
+# client inference
+for ckpt_path in ${ckpts[@]}; do
+
+ logging_dir=results/$(basename $ckpt_path)${action_ensemble_temp}
+ mkdir -p $logging_dir
+ for i in ${!tasks[@]}; do
+ task=${tasks[$i]}
+ cuda=$((i % 8))
+ GPU_IDX=${cuda} bash scripts/$task $ckpt_path $model_name $action_ensemble_temp $logging_dir
+ done
+
+ # statistics evalution results
+ echo "🚀 all tasks DONE! Calculating metrics..."
+ python tools/calc_metrics_evaluation_videos.py \
+ --log-dir-root $logging_dir \
+ >>$logging_dir/total.metrics
+done
diff --git a/scripts/run_openvla.sh b/scripts/run_openvla.sh
new file mode 100644
index 00000000..fb5f3c43
--- /dev/null
+++ b/scripts/run_openvla.sh
@@ -0,0 +1,39 @@
+model_name=openvla
+tasks=(
+ bridge.sh
+ drawer_variant_agg.sh
+ drawer_visual_matching.sh
+ move_near_variant_agg.sh
+ move_near_visual_matching.sh
+ pick_coke_can_variant_agg.sh
+ pick_coke_can_visual_matching.sh
+ put_in_drawer_variant_agg.sh
+ put_in_drawer_visual_matching.sh
+)
+
+ckpts=(
+ ~/projects/vla_ptm/pretrained/openvla-7b
+)
+
+action_ensemble_temp=-0.8
+for ckpt_path in ${ckpts[@]}; do
+ base_dir=$(dirname $ckpt_path)
+
+ # evaluation in simulator
+ # logging_dir=$base_dir/simpler_env/$(basename $ckpt_path)${action_ensemble_temp}
+ logging_dir=results/$(basename $ckpt_path)${action_ensemble_temp}
+ mkdir -p $logging_dir
+ for i in ${!tasks[@]}; do
+ task=${tasks[$i]}
+ echo "🚀 running $task ..."
+ device=0
+ session_name=CUDA${device}-$(basename $logging_dir)-${task}
+ bash scripts/$task $ckpt_path $model_name $action_ensemble_temp $logging_dir $device
+ done
+
+ # statistics evalution results
+ echo "🚀 all tasks DONE! Calculating metrics..."
+ python tools/calc_metrics_evaluation_videos.py \
+ --log-dir-root $logging_dir \
+ >>$logging_dir/total.metrics
+done
diff --git a/scripts/run_rt1.sh b/scripts/run_rt1.sh
new file mode 100644
index 00000000..eb803c34
--- /dev/null
+++ b/scripts/run_rt1.sh
@@ -0,0 +1,39 @@
+model_name=rt1
+tasks=(
+ bridge.sh
+ drawer_variant_agg.sh
+ drawer_visual_matching.sh
+ move_near_variant_agg.sh
+ move_near_visual_matching.sh
+ pick_coke_can_variant_agg.sh
+ pick_coke_can_visual_matching.sh
+ put_in_drawer_variant_agg.sh
+ put_in_drawer_visual_matching.sh
+)
+
+ckpts=(
+ ../pretrained/rt_1_tf_trained_for_000400120
+)
+
+action_ensemble_temp=-0.8
+for ckpt_path in ${ckpts[@]}; do
+ base_dir=$(dirname $ckpt_path)
+
+ # evaluation in simulator
+ # logging_dir=$base_dir/simpler_env/$(basename $ckpt_path)${action_ensemble_temp}
+ logging_dir=results/$(basename $ckpt_path)${action_ensemble_temp}
+ mkdir -p $logging_dir
+ for i in ${!tasks[@]}; do
+ task=${tasks[$i]}
+ echo "🚀 running $task ..."
+ device=0
+ session_name=CUDA${device}-$(basename $logging_dir)-${task}
+ bash scripts/$task $ckpt_path $model_name $action_ensemble_temp $logging_dir $device
+ done
+
+ # statistics evalution results
+ echo "🚀 all tasks DONE! Calculating metrics..."
+ python tools/calc_metrics_evaluation_videos.py \
+ --log-dir-root $logging_dir \
+ >>$logging_dir/total.metrics
+done
diff --git a/scripts/run_spatialvla.sh b/scripts/run_spatialvla.sh
new file mode 100644
index 00000000..370370e8
--- /dev/null
+++ b/scripts/run_spatialvla.sh
@@ -0,0 +1,42 @@
+model_name=spatialvla
+tasks=(
+ bridge.sh
+ drawer_variant_agg.sh
+ drawer_visual_matching.sh
+ move_near_variant_agg.sh
+ move_near_visual_matching.sh
+ pick_coke_can_variant_agg.sh
+ pick_coke_can_visual_matching.sh
+
+ # put_in_drawer_variant_agg.sh
+ # put_in_drawer_visual_matching.sh
+)
+
+ckpts=(
+ IPEC-COMMUNITY/spatialvla-4b-224-pt # or a local path
+)
+
+action_ensemble_temp=-0.8
+for ckpt_path in ${ckpts[@]}; do
+ # 🤗 NOTE: set hf cache to avoid confilcts
+ # base_dir=$(dirname $ckpt_path)
+ # export HF_MODULES_CACHE=$base_dir/hf_cache/modules
+ # mkdir -p $HF_MODULES_CACHE
+ # logging_dir=$base_dir/simpler_env/$(basename $ckpt_path)${action_ensemble_temp}
+
+ logging_dir=results/$(basename $ckpt_path)${action_ensemble_temp}
+
+ mkdir -p $logging_dir
+ for i in ${!tasks[@]}; do
+ task=${tasks[$i]}
+ echo "🚀 running $task ..."
+ device=0
+ bash scripts/$task $ckpt_path $model_name $action_ensemble_temp $logging_dir $device
+ done
+
+ # statistics evalution results
+ echo "🚀 all tasks DONE! Calculating metrics..."
+ python tools/calc_metrics_evaluation_videos.py \
+ --log-dir-root $logging_dir \
+ >>$logging_dir/total.metrics
+done
diff --git a/setup.py b/setup.py
deleted file mode 100644
index 2ff8c4ba..00000000
--- a/setup.py
+++ /dev/null
@@ -1,9 +0,0 @@
-from setuptools import find_packages, setup
-
-setup(
- name="simpler_env",
- version="0.0.1",
- author="Xuanlin Li",
- packages=find_packages(include=["simpler_env*"]),
- python_requires=">=3.10",
-)
diff --git a/simpler_env/__init__.py b/simpler_env/__init__.py
index e8b8aa53..cbecacd6 100644
--- a/simpler_env/__init__.py
+++ b/simpler_env/__init__.py
@@ -1,6 +1,5 @@
import gymnasium as gym
import mani_skill2_real2sim.envs
-import warnings
ENVIRONMENTS = [
"google_robot_pick_coke_can",
@@ -71,18 +70,10 @@
}
-def make(task_name, **kwargs):
+def make(task_name):
"""Creates simulated eval environment from task name."""
assert task_name in ENVIRONMENTS, f"Task {task_name} is not supported. Environments: \n {ENVIRONMENTS}"
- env_name, env_kwargs = ENVIRONMENT_MAP[task_name]
-
- env_kwargs["obs_mode"] = "rgbd",
- env_kwargs["prepackaged_config"] = True
-
- for key, value in kwargs.items():
- if key in env_kwargs:
- warnings.warn(f"default value [{env_kwargs[key]}] for Key {key} changes to value [{value}]")
- env_kwargs[key] = value
-
- env = gym.make(env_name, **env_kwargs)
+ env_name, kwargs = ENVIRONMENT_MAP[task_name]
+ kwargs["prepackaged_config"] = True
+ env = gym.make(env_name, obs_mode="rgbd", **kwargs)
return env
diff --git a/simpler_env/evaluation/argparse.py b/simpler_env/evaluation/argparse.py
index d17e4d88..9da3e992 100644
--- a/simpler_env/evaluation/argparse.py
+++ b/simpler_env/evaluation/argparse.py
@@ -44,7 +44,7 @@ def get_args():
help="Obtain image observation from this camera for policy input. None = default",
)
parser.add_argument("--action-scale", type=float, default=1.0)
-
+ parser.add_argument("--action-ensemble-temp", type=float, default=-0.8)
parser.add_argument("--control-freq", type=int, default=3)
parser.add_argument("--sim-freq", type=int, default=513)
parser.add_argument("--max-episode-steps", type=int, default=80)
@@ -99,7 +99,6 @@ def get_args():
default=[-0.02, 0.42, 5],
help="[ymin, ymax, num]",
)
-
parser.add_argument(
"--additional-env-build-kwargs",
nargs="+",
diff --git a/simpler_env/evaluation/maniskill2_evaluator.py b/simpler_env/evaluation/maniskill2_evaluator.py
index 99a788e0..a8244792 100644
--- a/simpler_env/evaluation/maniskill2_evaluator.py
+++ b/simpler_env/evaluation/maniskill2_evaluator.py
@@ -7,9 +7,12 @@
import numpy as np
from transforms3d.euler import quat2euler
-from simpler_env.utils.env.env_builder import build_maniskill2_env, get_robot_control_mode
+from simpler_env.utils.env.env_builder import (
+ build_maniskill2_env,
+ get_robot_control_mode,
+)
from simpler_env.utils.env.observation_utils import get_image_from_maniskill2_obs_dict
-from simpler_env.utils.visualization import write_video
+from simpler_env.utils.visualization import write_interval_video, write_video
def run_maniskill2_eval_single_episode(
@@ -36,7 +39,6 @@ def run_maniskill2_eval_single_episode(
additional_env_save_tags=None,
logging_dir="./results",
):
-
if additional_env_build_kwargs is None:
additional_env_build_kwargs = {}
@@ -62,7 +64,7 @@ def run_maniskill2_eval_single_episode(
**additional_env_build_kwargs,
**kwargs,
)
-
+ # __import__('ipdb').set_trace()
# initialize environment
env_reset_options = {
"robot_init_options": {
@@ -84,7 +86,7 @@ def run_maniskill2_eval_single_episode(
}
obs, _ = env.reset(options=env_reset_options)
# for long-horizon environments, we check if the current subtask is the final subtask
- is_final_subtask = env.is_final_subtask()
+ is_final_subtask = env.is_final_subtask()
# Obtain language instruction
if instruction is not None:
@@ -105,11 +107,13 @@ def run_maniskill2_eval_single_episode(
timestep = 0
success = "failure"
+ # action_ensemble = model.action_ensemble_temp if hasattr(model, "action_ensemble") else "none"
# Step the environment
+ task_descriptions = []
while not (predicted_terminated or truncated):
# step the model; "raw_action" is raw model action output; "action" is the processed action to be sent into maniskill env
- raw_action, action = model.step(image, task_description)
+ raw_action, action = model.step(image, task_description, eef_pos=obs["agent"]["eef_pos"])
predicted_actions.append(raw_action)
predicted_terminated = bool(action["terminate_episode"][0] > 0)
if predicted_terminated:
@@ -120,9 +124,11 @@ def run_maniskill2_eval_single_episode(
# step the environment
obs, reward, done, truncated, info = env.step(
- np.concatenate([action["world_vector"], action["rot_axangle"], action["gripper"]]),
+ np.concatenate(
+ [action["world_vector"], action["rot_axangle"], action["gripper"]]
+ ),
)
-
+
success = "success" if done else "failure"
new_task_description = env.get_language_instruction()
if new_task_description != task_description:
@@ -132,14 +138,18 @@ def run_maniskill2_eval_single_episode(
print(timestep, info)
- image = get_image_from_maniskill2_obs_dict(env, obs, camera_name=obs_camera_name)
+ image = get_image_from_maniskill2_obs_dict(
+ env, obs, camera_name=obs_camera_name
+ )
images.append(image)
+ task_descriptions.append(task_description)
timestep += 1
episode_stats = info.get("episode_stats", {})
# save video
env_save_name = env_name
+
for k, v in additional_env_build_kwargs.items():
env_save_name = env_save_name + f"_{k}_{v}"
if additional_env_save_tags is not None:
@@ -158,7 +168,7 @@ def run_maniskill2_eval_single_episode(
else:
rgb_overlay_path_str = "None"
r, p, y = quat2euler(robot_init_quat)
- video_path = f"{ckpt_path_basename}/{scene_name}/{control_mode}/{env_save_name}/rob_{robot_init_x}_{robot_init_y}_rot_{r:.3f}_{p:.3f}_{y:.3f}_rgb_overlay_{rgb_overlay_path_str}/{video_name}"
+ video_path = f"{scene_name}/{control_mode}/{env_save_name}/rob_{robot_init_x}_{robot_init_y}_rot_{r:.3f}_{p:.3f}_{y:.3f}_rgb_overlay_{rgb_overlay_path_str}/{video_name}"
video_path = os.path.join(logging_dir, video_path)
write_video(video_path, images, fps=5)
@@ -168,7 +178,6 @@ def run_maniskill2_eval_single_episode(
os.makedirs(action_root, exist_ok=True)
action_path = action_root + os.path.basename(action_path)
model.visualize_epoch(predicted_actions, images, save_path=action_path)
-
return success == "success"
@@ -211,8 +220,14 @@ def maniskill2_evaluator(model, args):
)
)
elif args.obj_variation_mode == "episode":
- for obj_episode_id in range(args.obj_episode_range[0], args.obj_episode_range[1]):
- success_arr.append(run_maniskill2_eval_single_episode(obj_episode_id=obj_episode_id, **kwargs))
+ for obj_episode_id in range(
+ args.obj_episode_range[0], args.obj_episode_range[1]
+ ):
+ success_arr.append(
+ run_maniskill2_eval_single_episode(
+ obj_episode_id=obj_episode_id, **kwargs
+ )
+ )
else:
raise NotImplementedError()
diff --git a/simpler_env/main_inference.py b/simpler_env/main_inference.py
index 320f2685..60336140 100644
--- a/simpler_env/main_inference.py
+++ b/simpler_env/main_inference.py
@@ -5,8 +5,6 @@
from simpler_env.evaluation.argparse import get_args
from simpler_env.evaluation.maniskill2_evaluator import maniskill2_evaluator
-from simpler_env.policies.octo.octo_server_model import OctoServerInference
-from simpler_env.policies.rt1.rt1_model import RT1Inference
try:
from simpler_env.policies.octo.octo_model import OctoInference
@@ -17,7 +15,6 @@
if __name__ == "__main__":
args = get_args()
-
os.environ["DISPLAY"] = ""
# prevent a single jax process from taking up all the GPU memory
os.environ["XLA_PYTHON_CLIENT_PREALLOCATE"] = "false"
@@ -28,9 +25,10 @@
gpus[0],
[tf.config.LogicalDeviceConfiguration(memory_limit=args.tf_memory_limit)],
)
-
+ print(f"**** {args.policy_model} ****")
# policy model creation; update this if you are using a new policy model
if args.policy_model == "rt1":
+ from simpler_env.policies.rt1.rt1_model import RT1Inference
assert args.ckpt_path is not None
model = RT1Inference(
saved_model_path=args.ckpt_path,
@@ -38,6 +36,7 @@
action_scale=args.action_scale,
)
elif "octo" in args.policy_model:
+ from simpler_env.policies.octo.octo_server_model import OctoServerInference
if args.ckpt_path is None or args.ckpt_path == "None":
args.ckpt_path = args.policy_model
if "server" in args.policy_model:
@@ -53,6 +52,56 @@
init_rng=args.octo_init_rng,
action_scale=args.action_scale,
)
+ elif args.policy_model == "openvla":
+ assert args.ckpt_path is not None
+ from simpler_env.policies.openvla.openvla_model import OpenVLAInference
+ model = OpenVLAInference(
+ saved_model_path=args.ckpt_path,
+ policy_setup=args.policy_setup,
+ action_scale=args.action_scale,
+ )
+ elif args.policy_model == "cogact":
+ from simpler_env.policies.sim_cogact import CogACTInference
+ assert args.ckpt_path is not None
+ model = CogACTInference(
+ saved_model_path=args.ckpt_path, # e.g., CogACT/CogACT-Base
+ policy_setup=args.policy_setup,
+ action_scale=args.action_scale,
+ action_model_type='DiT-L',
+ cfg_scale=1.5 # cfg from 1.5 to 7 also performs well
+ )
+ elif args.policy_model == "spatialvla":
+ assert args.ckpt_path is not None
+ from simpler_env.policies.spatialvla.spatialvla_model import SpatialVLAInference
+ model = SpatialVLAInference(
+ saved_model_path=args.ckpt_path,
+ policy_setup=args.policy_setup,
+ action_scale=args.action_scale,
+ )
+ elif args.policy_model == "openpifast":
+ assert args.ckpt_path is not None
+ from simpler_env.policies.openpi.pi0_or_fast import OpenPiFastInference
+ model = OpenPiFastInference(
+ saved_model_path=args.ckpt_path,
+ policy_setup=args.policy_setup,
+ action_scale=args.action_scale,
+ )
+ elif args.policy_model == "lerobotpifast":
+ assert args.ckpt_path is not None
+ from simpler_env.policies.lerobotpi.pi0_or_fast import LerobotPiFastInference
+ model = LerobotPiFastInference(
+ saved_model_path=args.ckpt_path,
+ policy_setup=args.policy_setup,
+ action_scale=args.action_scale,
+ )
+ elif args.policy_model == "gr00t":
+ assert args.ckpt_path is not None
+ from simpler_env.policies.gr00t.gr00t_model import Gr00tInference
+ model = Gr00tInference(
+ saved_model_path=args.ckpt_path,
+ policy_setup=args.policy_setup,
+ action_scale=args.action_scale,
+ )
else:
raise NotImplementedError()
diff --git a/simpler_env/policies/__init__.py b/simpler_env/policies/__init__.py
deleted file mode 100644
index 8b137891..00000000
--- a/simpler_env/policies/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-
diff --git a/simpler_env/policies/gr00t/geometry.py b/simpler_env/policies/gr00t/geometry.py
new file mode 100644
index 00000000..f1e7164a
--- /dev/null
+++ b/simpler_env/policies/gr00t/geometry.py
@@ -0,0 +1,463 @@
+"""
+Mostly copied from transforms3d library
+
+"""
+
+import math
+
+import numpy as np
+
+_FLOAT_EPS = np.finfo(np.float64).eps
+
+# axis sequences for Euler angles
+_NEXT_AXIS = [1, 2, 0, 1]
+
+# map axes strings to/from tuples of inner axis, parity, repetition, frame
+_AXES2TUPLE = {
+ "sxyz": (0, 0, 0, 0),
+ "sxyx": (0, 0, 1, 0),
+ "sxzy": (0, 1, 0, 0),
+ "sxzx": (0, 1, 1, 0),
+ "syzx": (1, 0, 0, 0),
+ "syzy": (1, 0, 1, 0),
+ "syxz": (1, 1, 0, 0),
+ "syxy": (1, 1, 1, 0),
+ "szxy": (2, 0, 0, 0),
+ "szxz": (2, 0, 1, 0),
+ "szyx": (2, 1, 0, 0),
+ "szyz": (2, 1, 1, 0),
+ "rzyx": (0, 0, 0, 1),
+ "rxyx": (0, 0, 1, 1),
+ "ryzx": (0, 1, 0, 1),
+ "rxzx": (0, 1, 1, 1),
+ "rxzy": (1, 0, 0, 1),
+ "ryzy": (1, 0, 1, 1),
+ "rzxy": (1, 1, 0, 1),
+ "ryxy": (1, 1, 1, 1),
+ "ryxz": (2, 0, 0, 1),
+ "rzxz": (2, 0, 1, 1),
+ "rxyz": (2, 1, 0, 1),
+ "rzyz": (2, 1, 1, 1),
+}
+
+_TUPLE2AXES = dict((v, k) for k, v in _AXES2TUPLE.items())
+
+# For testing whether a number is close to zero
+_EPS4 = np.finfo(float).eps * 4.0
+
+
+def mat2euler(mat, axes="sxyz"):
+ """Return Euler angles from rotation matrix for specified axis sequence.
+
+ Note that many Euler angle triplets can describe one matrix.
+
+ Parameters
+ ----------
+ mat : array-like shape (3, 3) or (4, 4)
+ Rotation matrix or affine.
+ axes : str, optional
+ Axis specification; one of 24 axis sequences as string or encoded
+ tuple - e.g. ``sxyz`` (the default).
+
+ Returns
+ -------
+ ai : float
+ First rotation angle (according to `axes`).
+ aj : float
+ Second rotation angle (according to `axes`).
+ ak : float
+ Third rotation angle (according to `axes`).
+
+ Examples
+ --------
+ >>> R0 = euler2mat(1, 2, 3, 'syxz')
+ >>> al, be, ga = mat2euler(R0, 'syxz')
+ >>> R1 = euler2mat(al, be, ga, 'syxz')
+ >>> np.allclose(R0, R1)
+ True
+ """
+ try:
+ firstaxis, parity, repetition, frame = _AXES2TUPLE[axes.lower()]
+ except (AttributeError, KeyError):
+ _TUPLE2AXES[axes] # validation
+ firstaxis, parity, repetition, frame = axes
+
+ i = firstaxis
+ j = _NEXT_AXIS[i + parity]
+ k = _NEXT_AXIS[i - parity + 1]
+
+ M = np.array(mat, dtype=np.float64, copy=False)[:3, :3]
+ if repetition:
+ sy = math.sqrt(M[i, j] * M[i, j] + M[i, k] * M[i, k])
+ if sy > _EPS4:
+ ax = math.atan2(M[i, j], M[i, k])
+ ay = math.atan2(sy, M[i, i])
+ az = math.atan2(M[j, i], -M[k, i])
+ else:
+ ax = math.atan2(-M[j, k], M[j, j])
+ ay = math.atan2(sy, M[i, i])
+ az = 0.0
+ else:
+ cy = math.sqrt(M[i, i] * M[i, i] + M[j, i] * M[j, i])
+ if cy > _EPS4:
+ ax = math.atan2(M[k, j], M[k, k])
+ ay = math.atan2(-M[k, i], cy)
+ az = math.atan2(M[j, i], M[i, i])
+ else:
+ ax = math.atan2(-M[j, k], M[j, j])
+ ay = math.atan2(-M[k, i], cy)
+ az = 0.0
+
+ if parity:
+ ax, ay, az = -ax, -ay, -az
+ if frame:
+ ax, az = az, ax
+ return ax, ay, az
+
+
+def quat2mat(q):
+ """Calculate rotation matrix corresponding to quaternion
+
+ Parameters
+ ----------
+ q : 4 element array-like
+
+ Returns
+ -------
+ M : (3,3) array
+ Rotation matrix corresponding to input quaternion *q*
+
+ Notes
+ -----
+ Rotation matrix applies to column vectors, and is applied to the
+ left of coordinate vectors. The algorithm here allows quaternions that
+ have not been normalized.
+
+ References
+ ----------
+ Algorithm from http://en.wikipedia.org/wiki/Rotation_matrix#Quaternion
+
+ Examples
+ --------
+ >>> import numpy as np
+ >>> M = quat2mat([1, 0, 0, 0]) # Identity quaternion
+ >>> np.allclose(M, np.eye(3))
+ True
+ >>> M = quat2mat([0, 1, 0, 0]) # 180 degree rotn around axis 0
+ >>> np.allclose(M, np.diag([1, -1, -1]))
+ True
+ """
+ w, x, y, z = q
+ Nq = w * w + x * x + y * y + z * z
+ if Nq < _FLOAT_EPS:
+ return np.eye(3)
+ s = 2.0 / Nq
+ X = x * s
+ Y = y * s
+ Z = z * s
+ wX = w * X
+ wY = w * Y
+ wZ = w * Z
+ xX = x * X
+ xY = x * Y
+ xZ = x * Z
+ yY = y * Y
+ yZ = y * Z
+ zZ = z * Z
+ return np.array(
+ [
+ [1.0 - (yY + zZ), xY - wZ, xZ + wY],
+ [xY + wZ, 1.0 - (xX + zZ), yZ - wX],
+ [xZ - wY, yZ + wX, 1.0 - (xX + yY)],
+ ]
+ )
+
+
+# Checks if a matrix is a valid rotation matrix.
+def isrotation(
+ R: np.ndarray,
+ thresh=1e-6,
+) -> bool:
+ Rt = np.transpose(R)
+ shouldBeIdentity = np.dot(Rt, R)
+ iden = np.identity(3, dtype=R.dtype)
+ n = np.linalg.norm(iden - shouldBeIdentity)
+ return n < thresh
+
+
+def euler2mat(ai, aj, ak, axes="sxyz"):
+ """Return rotation matrix from Euler angles and axis sequence.
+
+ Parameters
+ ----------
+ ai : float
+ First rotation angle (according to `axes`).
+ aj : float
+ Second rotation angle (according to `axes`).
+ ak : float
+ Third rotation angle (according to `axes`).
+ axes : str, optional
+ Axis specification; one of 24 axis sequences as string or encoded
+ tuple - e.g. ``sxyz`` (the default).
+
+ Returns
+ -------
+ mat : array (3, 3)
+ Rotation matrix or affine.
+
+ Examples
+ --------
+ >>> R = euler2mat(1, 2, 3, 'syxz')
+ >>> np.allclose(np.sum(R[0]), -1.34786452)
+ True
+ >>> R = euler2mat(1, 2, 3, (0, 1, 0, 1))
+ >>> np.allclose(np.sum(R[0]), -0.383436184)
+ True
+ """
+ try:
+ firstaxis, parity, repetition, frame = _AXES2TUPLE[axes]
+ except (AttributeError, KeyError):
+ _TUPLE2AXES[axes] # validation
+ firstaxis, parity, repetition, frame = axes
+
+ i = firstaxis
+ j = _NEXT_AXIS[i + parity]
+ k = _NEXT_AXIS[i - parity + 1]
+
+ if frame:
+ ai, ak = ak, ai
+ if parity:
+ ai, aj, ak = -ai, -aj, -ak
+
+ si, sj, sk = math.sin(ai), math.sin(aj), math.sin(ak)
+ ci, cj, ck = math.cos(ai), math.cos(aj), math.cos(ak)
+ cc, cs = ci * ck, ci * sk
+ sc, ss = si * ck, si * sk
+
+ M = np.eye(3)
+ if repetition:
+ M[i, i] = cj
+ M[i, j] = sj * si
+ M[i, k] = sj * ci
+ M[j, i] = sj * sk
+ M[j, j] = -cj * ss + cc
+ M[j, k] = -cj * cs - sc
+ M[k, i] = -sj * ck
+ M[k, j] = cj * sc + cs
+ M[k, k] = cj * cc - ss
+ else:
+ M[i, i] = cj * ck
+ M[i, j] = sj * sc - cs
+ M[i, k] = sj * cc + ss
+ M[j, i] = cj * sk
+ M[j, j] = sj * ss + cc
+ M[j, k] = sj * cs - sc
+ M[k, i] = -sj
+ M[k, j] = cj * si
+ M[k, k] = cj * ci
+ return M
+
+
+def euler2axangle(ai, aj, ak, axes="sxyz"):
+ """Return angle, axis corresponding to Euler angles, axis specification
+
+ Parameters
+ ----------
+ ai : float
+ First rotation angle (according to `axes`).
+ aj : float
+ Second rotation angle (according to `axes`).
+ ak : float
+ Third rotation angle (according to `axes`).
+ axes : str, optional
+ Axis specification; one of 24 axis sequences as string or encoded
+ tuple - e.g. ``sxyz`` (the default).
+
+ Returns
+ -------
+ vector : array shape (3,)
+ axis around which rotation occurs
+ theta : scalar
+ angle of rotation
+
+ Examples
+ --------
+ >>> vec, theta = euler2axangle(0, 1.5, 0, 'szyx')
+ >>> np.allclose(vec, [0, 1, 0])
+ True
+ >>> theta
+ 1.5
+ """
+ return quat2axangle(euler2quat(ai, aj, ak, axes))
+
+
+def euler2quat(ai, aj, ak, axes="sxyz"):
+ """Return `quaternion` from Euler angles and axis sequence `axes`
+
+ Parameters
+ ----------
+ ai : float
+ First rotation angle (according to `axes`).
+ aj : float
+ Second rotation angle (according to `axes`).
+ ak : float
+ Third rotation angle (according to `axes`).
+ axes : str, optional
+ Axis specification; one of 24 axis sequences as string or encoded
+ tuple - e.g. ``sxyz`` (the default).
+
+ Returns
+ -------
+ quat : array shape (4,)
+ Quaternion in w, x, y z (real, then vector) format
+
+ Examples
+ --------
+ >>> q = euler2quat(1, 2, 3, 'ryxz')
+ >>> np.allclose(q, [0.435953, 0.310622, -0.718287, 0.444435])
+ True
+ """
+ try:
+ firstaxis, parity, repetition, frame = _AXES2TUPLE[axes.lower()]
+ except (AttributeError, KeyError):
+ _TUPLE2AXES[axes] # validation
+ firstaxis, parity, repetition, frame = axes
+
+ i = firstaxis + 1
+ j = _NEXT_AXIS[i + parity - 1] + 1
+ k = _NEXT_AXIS[i - parity] + 1
+
+ if frame:
+ ai, ak = ak, ai
+ if parity:
+ aj = -aj
+
+ ai = ai / 2.0
+ aj = aj / 2.0
+ ak = ak / 2.0
+ ci = math.cos(ai)
+ si = math.sin(ai)
+ cj = math.cos(aj)
+ sj = math.sin(aj)
+ ck = math.cos(ak)
+ sk = math.sin(ak)
+ cc = ci * ck
+ cs = ci * sk
+ sc = si * ck
+ ss = si * sk
+
+ q = np.empty((4,))
+ if repetition:
+ q[0] = cj * (cc - ss)
+ q[i] = cj * (cs + sc)
+ q[j] = sj * (cc + ss)
+ q[k] = sj * (cs - sc)
+ else:
+ q[0] = cj * cc + sj * ss
+ q[i] = cj * sc - sj * cs
+ q[j] = cj * ss + sj * cc
+ q[k] = cj * cs - sj * sc
+ if parity:
+ q[j] *= -1.0
+
+ return q
+
+
+def quat2axangle(quat, identity_thresh=None):
+ """Convert quaternion to rotation of angle around axis
+
+ Parameters
+ ----------
+ quat : 4 element sequence
+ w, x, y, z forming quaternion.
+ identity_thresh : None or scalar, optional
+ Threshold below which the norm of the vector part of the quaternion (x,
+ y, z) is deemed to be 0, leading to the identity rotation. None (the
+ default) leads to a threshold estimated based on the precision of the
+ input.
+
+ Returns
+ -------
+ theta : scalar
+ angle of rotation.
+ vector : array shape (3,)
+ axis around which rotation occurs.
+
+ Examples
+ --------
+ >>> vec, theta = quat2axangle([0, 1, 0, 0])
+ >>> vec
+ array([1., 0., 0.])
+ >>> np.allclose(theta, np.pi)
+ True
+
+ If this is an identity rotation, we return a zero angle and an arbitrary
+ vector:
+
+ >>> quat2axangle([1, 0, 0, 0])
+ (array([1., 0., 0.]), 0.0)
+
+ If any of the quaternion values are not finite, we return a NaN in the
+ angle, and an arbitrary vector:
+
+ >>> quat2axangle([1, np.inf, 0, 0])
+ (array([1., 0., 0.]), nan)
+
+ Notes
+ -----
+ A quaternion for which x, y, z are all equal to 0, is an identity rotation.
+ In this case we return a 0 angle and an arbitrary vector, here [1, 0, 0].
+
+ The algorithm allows for quaternions that have not been normalized.
+ """
+ quat = np.asarray(quat)
+ Nq = np.sum(quat**2)
+ if not np.isfinite(Nq):
+ return np.array([1.0, 0, 0]), float("nan")
+ if identity_thresh is None:
+ try:
+ identity_thresh = np.finfo(Nq.type).eps * 3
+ except (AttributeError, ValueError): # Not a numpy type or not float
+ identity_thresh = _FLOAT_EPS * 3
+ if Nq < _FLOAT_EPS**2: # Results unreliable after normalization
+ return np.array([1.0, 0, 0]), 0.0
+ if Nq != 1: # Normalize if not normalized
+ s = math.sqrt(Nq)
+ quat = quat / s
+ xyz = quat[1:]
+ len2 = np.sum(xyz**2)
+ if len2 < identity_thresh**2:
+ # if vec is nearly 0,0,0, this is an identity rotation
+ return np.array([1.0, 0, 0]), 0.0
+ # Make sure w is not slightly above 1 or below -1
+ theta = 2 * math.acos(max(min(quat[0], 1), -1))
+ return xyz / math.sqrt(len2), theta
+
+
+def quat2euler(quaternion, axes="sxyz"):
+ """Euler angles from `quaternion` for specified axis sequence `axes`
+
+ Parameters
+ ----------
+ q : 4 element sequence
+ w, x, y, z of quaternion
+ axes : str, optional
+ Axis specification; one of 24 axis sequences as string or encoded
+ tuple - e.g. ``sxyz`` (the default).
+
+ Returns
+ -------
+ ai : float
+ First rotation angle (according to `axes`).
+ aj : float
+ Second rotation angle (according to `axes`).
+ ak : float
+ Third rotation angle (according to `axes`).
+
+ Examples
+ --------
+ >>> angles = quat2euler([0.99810947, 0.06146124, 0, 0])
+ >>> np.allclose(angles, [0.123, 0, 0])
+ True
+ """
+ return mat2euler(quat2mat(quaternion), axes)
\ No newline at end of file
diff --git a/simpler_env/policies/gr00t/gr00t_model.py b/simpler_env/policies/gr00t/gr00t_model.py
new file mode 100644
index 00000000..1abdef61
--- /dev/null
+++ b/simpler_env/policies/gr00t/gr00t_model.py
@@ -0,0 +1,336 @@
+from typing import Optional, Sequence, List
+import os
+import matplotlib.pyplot as plt
+import numpy as np
+from transforms3d.euler import euler2axangle
+from collections import deque
+from PIL import Image
+import torch
+import cv2 as cv
+from simpler_env.utils.action.action_ensemble import ActionEnsembler
+from .geometry import quat2mat, mat2euler
+import numpy as np
+import torch
+
+from gr00t.eval.robot import RobotInferenceClient
+from gr00t.experiment.data_config import DATA_CONFIG_MAP
+from gr00t.model.policy import Gr00tPolicy
+
+
+class Gr00tInference:
+ def __init__(
+ self,
+ saved_model_path: str = "pretrained/pi0",
+ unnorm_key: Optional[str] = None,
+ policy_setup: str = "widowx_bridge",
+ exec_horizon: int = 1,
+ image_size: list[int] = [224, 224],
+ action_scale: float = 1.0,
+ action_ensemble_temp: float = -0.8,
+ ) -> None:
+ gpu_idx = os.environ.get("GPU_IDX", 0)
+ self.device = f"cuda:{gpu_idx}"
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
+ if policy_setup == "widowx_bridge":
+ action_ensemble = False
+ data_config = "bridge"
+ image_size = [256, 256]
+ self.sticky_gripper_num_repeat = 1
+ # EE pose in Bridge data was relative to a top-down pose, instead of robot base
+ self.default_rot = np.array([[0, 0, 1.0], [0, 1.0, 0], [-1.0, 0, 0]]) # https://github.com/rail-berkeley/bridge_data_robot/blob/b841131ecd512bafb303075bd8f8b677e0bf9f1f/widowx_envs/widowx_controller/src/widowx_controller/widowx_controller.py#L203
+ elif policy_setup == "google_robot":
+ data_config = "fractal"
+ action_ensemble = False
+ image_size = [320, 256]
+ self.sticky_gripper_num_repeat = 10
+ else:
+ raise NotImplementedError(
+ f"Policy setup {policy_setup} not supported for octo models. The other datasets can be found in the huggingface config.json file."
+ )
+ self.policy_setup = policy_setup
+
+ # load model
+ data_config = DATA_CONFIG_MAP[data_config]
+ modality_config = data_config.modality_config()
+ transforms = data_config.transform()
+
+ self.policy_client = Gr00tPolicy(
+ model_path=saved_model_path,
+ modality_config=modality_config,
+ modality_transform=transforms,
+ embodiment_tag="new_embodiment",
+ device="cuda",
+ denoising_steps=16
+ )
+
+ self.image_size = image_size
+ self.action_scale = action_scale
+ self.obs_horizon = 1
+ self.obs_interval = 1
+ self.pred_action_horizon = 5
+ self.image_history = deque(maxlen=self.obs_horizon)
+ self.exec_horizon = exec_horizon
+
+ self.sticky_action_is_on = False
+ self.gripper_action_repeat = 0
+ self.sticky_gripper_action = 0.0
+ self.previous_gripper_action = None
+
+ self.action_ensemble = action_ensemble
+ self.action_ensemble_temp = action_ensemble_temp
+
+ if self.action_ensemble:
+ self.action_ensembler = ActionEnsembler(
+ self.pred_action_horizon, self.action_ensemble_temp
+ )
+ else:
+ self.action_ensembler = None
+
+ self.task = None
+ self.task_description = None
+
+ def reset(self, task_description: str) -> None:
+ self.image_history.clear()
+ if self.action_ensemble:
+ self.action_ensembler.reset()
+ self.task_description = task_description
+ self.sticky_action_is_on = False
+ self.gripper_action_repeat = 0
+ self.sticky_gripper_action = 0.0
+ self.previous_gripper_action = None
+ self.action_plan = deque()
+
+ def preprocess_widowx_proprio(self, eef_pos) -> np.array:
+ """convert ee rotation to the frame of top-down
+ https://github.com/allenzren/open-pi-zero/blob/c3df7fb062175c16f69d7ca4ce042958ea238fb7/src/agent/env_adapter/simpler.py#L167
+ """
+ # StateEncoding.POS_EULER: xyz + rpy + pad + gripper(openness)
+ proprio = eef_pos
+ rm_bridge = quat2mat(proprio[3:7])
+ rpy_bridge_converted = mat2euler(rm_bridge @ self.default_rot.T)
+ gripper_openness = proprio[7] # from simpler, 0 for close, 1 for open
+ raw_proprio = np.concatenate(
+ [
+ proprio[:3],
+ rpy_bridge_converted,
+ np.zeros(1),
+ [gripper_openness],
+ ]
+ )
+ return raw_proprio
+
+ def preprocess_google_robot_proprio(self, eef_pos) -> np.array:
+ """convert wxyz quat from simpler to xyzw used in fractal
+ https://github.com/allenzren/open-pi-zero/blob/c3df7fb062175c16f69d7ca4ce042958ea238fb7/src/agent/env_adapter/simpler.py#L204
+ """
+ # StateEncoding.POS_QUAT: xyz + q_xyzw + gripper(closeness)
+ quat_xyzw = np.roll(eef_pos[3:7], -1)
+ gripper_width = eef_pos[
+ 7
+ ] # from simpler, 0 for close, 1 for open
+ # need invert as the training data comes from closeness
+ gripper_closedness = (
+ 1 - gripper_width
+ ) # TODO(allenzren): change fractal data processing in training so also use gripper openness in proprio (as in bridge) instead of closedness
+ raw_proprio = np.concatenate(
+ (
+ eef_pos[:3],
+ quat_xyzw,
+ [gripper_closedness],
+ )
+ )
+ return raw_proprio
+
+ def step(
+ self, image: np.ndarray, task_description: Optional[str] = None, *args, **kwargs
+ ) -> tuple[dict[str, np.ndarray], dict[str, np.ndarray]]:
+ """
+ Input:
+ image: np.ndarray of shape (H, W, 3), uint8
+ task_description: Optional[str], task description; if different from previous task description, policy state is reset
+ Output:
+ raw_action: dict; raw policy action output
+ action: dict; processed action to be sent to the maniskill2 environment, with the following keys:
+ - 'world_vector': np.ndarray of shape (3,), xyz translation of robot end-effector
+ - 'rot_axangle': np.ndarray of shape (3,), axis-angle representation of end-effector rotation
+ - 'gripper': np.ndarray of shape (1,), gripper action
+ - 'terminate_episode': np.ndarray of shape (1,), 1 if episode should be terminated, 0 otherwise
+ """
+ if task_description is not None:
+ if task_description != self.task_description:
+ self.reset(task_description)
+
+ assert image.dtype == np.uint8
+ image = self._resize_image(image)
+ self._add_image_to_history(image)
+ images: List[Image.Image] = self._obtain_image_history()
+
+ eef_pos = kwargs.get("eef_pos", None)
+ if self.policy_setup == "widowx_bridge":
+ state = self.preprocess_widowx_proprio(eef_pos)
+ batch = {
+ "video.image_0": np.array(images[0][None]), # numpy (b h w c)
+ "state.x": state[0:1][None],
+ "state.y": state[1:2][None],
+ "state.z": state[2:3][None],
+ "state.roll": state[3:4][None],
+ "state.pitch": state[4:5][None],
+ "state.yaw": state[5:6][None],
+ "state.pad": state[6:7][None],
+ "state.gripper": state[7:8][None],
+ "annotation.human.action.task_description": [task_description],
+ }
+ if not self.action_plan:
+ actions = self.policy_client.get_action(batch)
+ action_chunk = np.stack([
+ actions["action.x"],
+ actions["action.y"],
+ actions["action.z"],
+ actions["action.roll"],
+ actions["action.pitch"],
+ actions["action.yaw"],
+ actions["action.gripper"],
+ ], axis=-1)[:self.pred_action_horizon]
+ self.action_plan.extend(action_chunk)
+
+ elif self.policy_setup == "google_robot":
+ state = self.preprocess_google_robot_proprio(eef_pos)
+ batch = {
+ "video.image": np.array(images[0][None]),
+ "state.x": state[0:1][None],
+ "state.y": state[1:2][None],
+ "state.z": state[2:3][None],
+ "state.rx": state[3:4][None],
+ "state.ry": state[4:5][None],
+ "state.rz": state[5:6][None],
+ "state.rw": state[6:7][None],
+ "state.gripper": state[7:8][None],
+ "annotation.human.action.task_description": [task_description],
+ }
+
+ if not self.action_plan:
+ actions = self.policy_client.get_action(batch)
+ raw_actions = np.stack([
+ actions["action.x"],
+ actions["action.y"],
+ actions["action.z"],
+ actions["action.roll"],
+ actions["action.pitch"],
+ actions["action.yaw"],
+ actions["action.gripper"],
+ ], axis=-1)[:self.pred_action_horizon]
+ self.action_plan.extend(raw_actions)
+
+ raw_actions = self.action_plan.popleft()
+
+
+ # if self.action_ensemble:
+ # raw_actions = self.action_ensembler.ensemble_action(raw_actions)[None]
+
+ raw_action = {
+ "world_vector": np.array(raw_actions[:3]),
+ "rotation_delta": np.array(raw_actions[3:6]),
+ "open_gripper": np.array(
+ raw_actions[6:7]
+ ), # range [0, 1]; 1 = open; 0 = close
+ }
+
+ # process raw_action to obtain the action to be sent to the maniskill2 environment
+ action = {}
+ action["world_vector"] = raw_action["world_vector"] * self.action_scale
+ action_rotation_delta = np.asarray(
+ raw_action["rotation_delta"], dtype=np.float64
+ )
+ roll, pitch, yaw = action_rotation_delta
+ action_rotation_ax, action_rotation_angle = euler2axangle(roll, pitch, yaw)
+ action_rotation_axangle = action_rotation_ax * action_rotation_angle
+ action["rot_axangle"] = action_rotation_axangle * self.action_scale
+
+ if self.policy_setup == "google_robot":
+ action["gripper"] = 0
+ current_gripper_action = raw_action["open_gripper"]
+ if self.previous_gripper_action is None:
+ relative_gripper_action = np.array([0])
+ self.previous_gripper_action = current_gripper_action
+ else:
+ relative_gripper_action = self.previous_gripper_action - current_gripper_action
+
+ # fix a bug in the SIMPLER code here
+ # self.previous_gripper_action = current_gripper_action
+
+ if np.abs(relative_gripper_action) > 0.5 and (not self.sticky_action_is_on):
+ self.sticky_action_is_on = True
+ self.sticky_gripper_action = relative_gripper_action
+ self.previous_gripper_action = current_gripper_action
+
+ if self.sticky_action_is_on:
+ self.gripper_action_repeat += 1
+ relative_gripper_action = self.sticky_gripper_action
+
+ if self.gripper_action_repeat == self.sticky_gripper_num_repeat:
+ self.sticky_action_is_on = False
+ self.gripper_action_repeat = 0
+ self.sticky_gripper_action = 0.0
+
+ action["gripper"] = relative_gripper_action
+
+ elif self.policy_setup == "widowx_bridge":
+ action["gripper"] = 2.0 * (raw_action["open_gripper"] > 0.5) - 1.0
+
+ action["terminate_episode"] = np.array([0.0])
+ return raw_action, action
+
+ def _resize_image(self, image: np.ndarray) -> np.ndarray:
+ image = cv.resize(image, tuple(self.image_size), interpolation=cv.INTER_AREA)
+ return image
+
+ def _add_image_to_history(self, image: np.ndarray) -> None:
+ if len(self.image_history) == 0:
+ self.image_history.extend([image] * self.obs_horizon)
+ else:
+ self.image_history.append(image)
+
+ def _obtain_image_history(self) -> List[Image.Image]:
+ image_history = list(self.image_history)
+ images = image_history[:: self.obs_interval]
+ # images = [Image.fromarray(image).convert("RGB") for image in images]
+ return images
+
+ def visualize_epoch(
+ self,
+ predicted_raw_actions: Sequence[np.ndarray],
+ images: Sequence[np.ndarray],
+ save_path: str,
+ ) -> None:
+ images = [self._resize_image(image) for image in images]
+ ACTION_DIM_LABELS = ["x", "y", "z", "roll", "pitch", "yaw", "grasp"]
+
+ img_strip = np.concatenate(np.array(images[::3]), axis=1)
+
+ # set up plt figure
+ figure_layout = [["image"] * len(ACTION_DIM_LABELS), ACTION_DIM_LABELS]
+ plt.rcParams.update({"font.size": 12})
+ fig, axs = plt.subplot_mosaic(figure_layout)
+ fig.set_size_inches([45, 10])
+
+ # plot actions
+ pred_actions = np.array(
+ [
+ np.concatenate(
+ [a["world_vector"], a["rotation_delta"], a["open_gripper"]], axis=-1
+ )
+ for a in predicted_raw_actions
+ ]
+ )
+ for action_dim, action_label in enumerate(ACTION_DIM_LABELS):
+ # actions have batch, horizon, dim, in this example we just take the first action for simplicity
+ axs[action_label].plot(
+ pred_actions[:, action_dim], label="predicted action"
+ )
+ axs[action_label].set_title(action_label)
+ axs[action_label].set_xlabel("Time in one episode")
+
+ axs["image"].imshow(img_strip)
+ axs["image"].set_xlabel("Time in one episode (subsampled)")
+ plt.legend()
+ plt.savefig(save_path)
diff --git a/simpler_env/policies/lerobotpi/geometry.py b/simpler_env/policies/lerobotpi/geometry.py
new file mode 100644
index 00000000..f1e7164a
--- /dev/null
+++ b/simpler_env/policies/lerobotpi/geometry.py
@@ -0,0 +1,463 @@
+"""
+Mostly copied from transforms3d library
+
+"""
+
+import math
+
+import numpy as np
+
+_FLOAT_EPS = np.finfo(np.float64).eps
+
+# axis sequences for Euler angles
+_NEXT_AXIS = [1, 2, 0, 1]
+
+# map axes strings to/from tuples of inner axis, parity, repetition, frame
+_AXES2TUPLE = {
+ "sxyz": (0, 0, 0, 0),
+ "sxyx": (0, 0, 1, 0),
+ "sxzy": (0, 1, 0, 0),
+ "sxzx": (0, 1, 1, 0),
+ "syzx": (1, 0, 0, 0),
+ "syzy": (1, 0, 1, 0),
+ "syxz": (1, 1, 0, 0),
+ "syxy": (1, 1, 1, 0),
+ "szxy": (2, 0, 0, 0),
+ "szxz": (2, 0, 1, 0),
+ "szyx": (2, 1, 0, 0),
+ "szyz": (2, 1, 1, 0),
+ "rzyx": (0, 0, 0, 1),
+ "rxyx": (0, 0, 1, 1),
+ "ryzx": (0, 1, 0, 1),
+ "rxzx": (0, 1, 1, 1),
+ "rxzy": (1, 0, 0, 1),
+ "ryzy": (1, 0, 1, 1),
+ "rzxy": (1, 1, 0, 1),
+ "ryxy": (1, 1, 1, 1),
+ "ryxz": (2, 0, 0, 1),
+ "rzxz": (2, 0, 1, 1),
+ "rxyz": (2, 1, 0, 1),
+ "rzyz": (2, 1, 1, 1),
+}
+
+_TUPLE2AXES = dict((v, k) for k, v in _AXES2TUPLE.items())
+
+# For testing whether a number is close to zero
+_EPS4 = np.finfo(float).eps * 4.0
+
+
+def mat2euler(mat, axes="sxyz"):
+ """Return Euler angles from rotation matrix for specified axis sequence.
+
+ Note that many Euler angle triplets can describe one matrix.
+
+ Parameters
+ ----------
+ mat : array-like shape (3, 3) or (4, 4)
+ Rotation matrix or affine.
+ axes : str, optional
+ Axis specification; one of 24 axis sequences as string or encoded
+ tuple - e.g. ``sxyz`` (the default).
+
+ Returns
+ -------
+ ai : float
+ First rotation angle (according to `axes`).
+ aj : float
+ Second rotation angle (according to `axes`).
+ ak : float
+ Third rotation angle (according to `axes`).
+
+ Examples
+ --------
+ >>> R0 = euler2mat(1, 2, 3, 'syxz')
+ >>> al, be, ga = mat2euler(R0, 'syxz')
+ >>> R1 = euler2mat(al, be, ga, 'syxz')
+ >>> np.allclose(R0, R1)
+ True
+ """
+ try:
+ firstaxis, parity, repetition, frame = _AXES2TUPLE[axes.lower()]
+ except (AttributeError, KeyError):
+ _TUPLE2AXES[axes] # validation
+ firstaxis, parity, repetition, frame = axes
+
+ i = firstaxis
+ j = _NEXT_AXIS[i + parity]
+ k = _NEXT_AXIS[i - parity + 1]
+
+ M = np.array(mat, dtype=np.float64, copy=False)[:3, :3]
+ if repetition:
+ sy = math.sqrt(M[i, j] * M[i, j] + M[i, k] * M[i, k])
+ if sy > _EPS4:
+ ax = math.atan2(M[i, j], M[i, k])
+ ay = math.atan2(sy, M[i, i])
+ az = math.atan2(M[j, i], -M[k, i])
+ else:
+ ax = math.atan2(-M[j, k], M[j, j])
+ ay = math.atan2(sy, M[i, i])
+ az = 0.0
+ else:
+ cy = math.sqrt(M[i, i] * M[i, i] + M[j, i] * M[j, i])
+ if cy > _EPS4:
+ ax = math.atan2(M[k, j], M[k, k])
+ ay = math.atan2(-M[k, i], cy)
+ az = math.atan2(M[j, i], M[i, i])
+ else:
+ ax = math.atan2(-M[j, k], M[j, j])
+ ay = math.atan2(-M[k, i], cy)
+ az = 0.0
+
+ if parity:
+ ax, ay, az = -ax, -ay, -az
+ if frame:
+ ax, az = az, ax
+ return ax, ay, az
+
+
+def quat2mat(q):
+ """Calculate rotation matrix corresponding to quaternion
+
+ Parameters
+ ----------
+ q : 4 element array-like
+
+ Returns
+ -------
+ M : (3,3) array
+ Rotation matrix corresponding to input quaternion *q*
+
+ Notes
+ -----
+ Rotation matrix applies to column vectors, and is applied to the
+ left of coordinate vectors. The algorithm here allows quaternions that
+ have not been normalized.
+
+ References
+ ----------
+ Algorithm from http://en.wikipedia.org/wiki/Rotation_matrix#Quaternion
+
+ Examples
+ --------
+ >>> import numpy as np
+ >>> M = quat2mat([1, 0, 0, 0]) # Identity quaternion
+ >>> np.allclose(M, np.eye(3))
+ True
+ >>> M = quat2mat([0, 1, 0, 0]) # 180 degree rotn around axis 0
+ >>> np.allclose(M, np.diag([1, -1, -1]))
+ True
+ """
+ w, x, y, z = q
+ Nq = w * w + x * x + y * y + z * z
+ if Nq < _FLOAT_EPS:
+ return np.eye(3)
+ s = 2.0 / Nq
+ X = x * s
+ Y = y * s
+ Z = z * s
+ wX = w * X
+ wY = w * Y
+ wZ = w * Z
+ xX = x * X
+ xY = x * Y
+ xZ = x * Z
+ yY = y * Y
+ yZ = y * Z
+ zZ = z * Z
+ return np.array(
+ [
+ [1.0 - (yY + zZ), xY - wZ, xZ + wY],
+ [xY + wZ, 1.0 - (xX + zZ), yZ - wX],
+ [xZ - wY, yZ + wX, 1.0 - (xX + yY)],
+ ]
+ )
+
+
+# Checks if a matrix is a valid rotation matrix.
+def isrotation(
+ R: np.ndarray,
+ thresh=1e-6,
+) -> bool:
+ Rt = np.transpose(R)
+ shouldBeIdentity = np.dot(Rt, R)
+ iden = np.identity(3, dtype=R.dtype)
+ n = np.linalg.norm(iden - shouldBeIdentity)
+ return n < thresh
+
+
+def euler2mat(ai, aj, ak, axes="sxyz"):
+ """Return rotation matrix from Euler angles and axis sequence.
+
+ Parameters
+ ----------
+ ai : float
+ First rotation angle (according to `axes`).
+ aj : float
+ Second rotation angle (according to `axes`).
+ ak : float
+ Third rotation angle (according to `axes`).
+ axes : str, optional
+ Axis specification; one of 24 axis sequences as string or encoded
+ tuple - e.g. ``sxyz`` (the default).
+
+ Returns
+ -------
+ mat : array (3, 3)
+ Rotation matrix or affine.
+
+ Examples
+ --------
+ >>> R = euler2mat(1, 2, 3, 'syxz')
+ >>> np.allclose(np.sum(R[0]), -1.34786452)
+ True
+ >>> R = euler2mat(1, 2, 3, (0, 1, 0, 1))
+ >>> np.allclose(np.sum(R[0]), -0.383436184)
+ True
+ """
+ try:
+ firstaxis, parity, repetition, frame = _AXES2TUPLE[axes]
+ except (AttributeError, KeyError):
+ _TUPLE2AXES[axes] # validation
+ firstaxis, parity, repetition, frame = axes
+
+ i = firstaxis
+ j = _NEXT_AXIS[i + parity]
+ k = _NEXT_AXIS[i - parity + 1]
+
+ if frame:
+ ai, ak = ak, ai
+ if parity:
+ ai, aj, ak = -ai, -aj, -ak
+
+ si, sj, sk = math.sin(ai), math.sin(aj), math.sin(ak)
+ ci, cj, ck = math.cos(ai), math.cos(aj), math.cos(ak)
+ cc, cs = ci * ck, ci * sk
+ sc, ss = si * ck, si * sk
+
+ M = np.eye(3)
+ if repetition:
+ M[i, i] = cj
+ M[i, j] = sj * si
+ M[i, k] = sj * ci
+ M[j, i] = sj * sk
+ M[j, j] = -cj * ss + cc
+ M[j, k] = -cj * cs - sc
+ M[k, i] = -sj * ck
+ M[k, j] = cj * sc + cs
+ M[k, k] = cj * cc - ss
+ else:
+ M[i, i] = cj * ck
+ M[i, j] = sj * sc - cs
+ M[i, k] = sj * cc + ss
+ M[j, i] = cj * sk
+ M[j, j] = sj * ss + cc
+ M[j, k] = sj * cs - sc
+ M[k, i] = -sj
+ M[k, j] = cj * si
+ M[k, k] = cj * ci
+ return M
+
+
+def euler2axangle(ai, aj, ak, axes="sxyz"):
+ """Return angle, axis corresponding to Euler angles, axis specification
+
+ Parameters
+ ----------
+ ai : float
+ First rotation angle (according to `axes`).
+ aj : float
+ Second rotation angle (according to `axes`).
+ ak : float
+ Third rotation angle (according to `axes`).
+ axes : str, optional
+ Axis specification; one of 24 axis sequences as string or encoded
+ tuple - e.g. ``sxyz`` (the default).
+
+ Returns
+ -------
+ vector : array shape (3,)
+ axis around which rotation occurs
+ theta : scalar
+ angle of rotation
+
+ Examples
+ --------
+ >>> vec, theta = euler2axangle(0, 1.5, 0, 'szyx')
+ >>> np.allclose(vec, [0, 1, 0])
+ True
+ >>> theta
+ 1.5
+ """
+ return quat2axangle(euler2quat(ai, aj, ak, axes))
+
+
+def euler2quat(ai, aj, ak, axes="sxyz"):
+ """Return `quaternion` from Euler angles and axis sequence `axes`
+
+ Parameters
+ ----------
+ ai : float
+ First rotation angle (according to `axes`).
+ aj : float
+ Second rotation angle (according to `axes`).
+ ak : float
+ Third rotation angle (according to `axes`).
+ axes : str, optional
+ Axis specification; one of 24 axis sequences as string or encoded
+ tuple - e.g. ``sxyz`` (the default).
+
+ Returns
+ -------
+ quat : array shape (4,)
+ Quaternion in w, x, y z (real, then vector) format
+
+ Examples
+ --------
+ >>> q = euler2quat(1, 2, 3, 'ryxz')
+ >>> np.allclose(q, [0.435953, 0.310622, -0.718287, 0.444435])
+ True
+ """
+ try:
+ firstaxis, parity, repetition, frame = _AXES2TUPLE[axes.lower()]
+ except (AttributeError, KeyError):
+ _TUPLE2AXES[axes] # validation
+ firstaxis, parity, repetition, frame = axes
+
+ i = firstaxis + 1
+ j = _NEXT_AXIS[i + parity - 1] + 1
+ k = _NEXT_AXIS[i - parity] + 1
+
+ if frame:
+ ai, ak = ak, ai
+ if parity:
+ aj = -aj
+
+ ai = ai / 2.0
+ aj = aj / 2.0
+ ak = ak / 2.0
+ ci = math.cos(ai)
+ si = math.sin(ai)
+ cj = math.cos(aj)
+ sj = math.sin(aj)
+ ck = math.cos(ak)
+ sk = math.sin(ak)
+ cc = ci * ck
+ cs = ci * sk
+ sc = si * ck
+ ss = si * sk
+
+ q = np.empty((4,))
+ if repetition:
+ q[0] = cj * (cc - ss)
+ q[i] = cj * (cs + sc)
+ q[j] = sj * (cc + ss)
+ q[k] = sj * (cs - sc)
+ else:
+ q[0] = cj * cc + sj * ss
+ q[i] = cj * sc - sj * cs
+ q[j] = cj * ss + sj * cc
+ q[k] = cj * cs - sj * sc
+ if parity:
+ q[j] *= -1.0
+
+ return q
+
+
+def quat2axangle(quat, identity_thresh=None):
+ """Convert quaternion to rotation of angle around axis
+
+ Parameters
+ ----------
+ quat : 4 element sequence
+ w, x, y, z forming quaternion.
+ identity_thresh : None or scalar, optional
+ Threshold below which the norm of the vector part of the quaternion (x,
+ y, z) is deemed to be 0, leading to the identity rotation. None (the
+ default) leads to a threshold estimated based on the precision of the
+ input.
+
+ Returns
+ -------
+ theta : scalar
+ angle of rotation.
+ vector : array shape (3,)
+ axis around which rotation occurs.
+
+ Examples
+ --------
+ >>> vec, theta = quat2axangle([0, 1, 0, 0])
+ >>> vec
+ array([1., 0., 0.])
+ >>> np.allclose(theta, np.pi)
+ True
+
+ If this is an identity rotation, we return a zero angle and an arbitrary
+ vector:
+
+ >>> quat2axangle([1, 0, 0, 0])
+ (array([1., 0., 0.]), 0.0)
+
+ If any of the quaternion values are not finite, we return a NaN in the
+ angle, and an arbitrary vector:
+
+ >>> quat2axangle([1, np.inf, 0, 0])
+ (array([1., 0., 0.]), nan)
+
+ Notes
+ -----
+ A quaternion for which x, y, z are all equal to 0, is an identity rotation.
+ In this case we return a 0 angle and an arbitrary vector, here [1, 0, 0].
+
+ The algorithm allows for quaternions that have not been normalized.
+ """
+ quat = np.asarray(quat)
+ Nq = np.sum(quat**2)
+ if not np.isfinite(Nq):
+ return np.array([1.0, 0, 0]), float("nan")
+ if identity_thresh is None:
+ try:
+ identity_thresh = np.finfo(Nq.type).eps * 3
+ except (AttributeError, ValueError): # Not a numpy type or not float
+ identity_thresh = _FLOAT_EPS * 3
+ if Nq < _FLOAT_EPS**2: # Results unreliable after normalization
+ return np.array([1.0, 0, 0]), 0.0
+ if Nq != 1: # Normalize if not normalized
+ s = math.sqrt(Nq)
+ quat = quat / s
+ xyz = quat[1:]
+ len2 = np.sum(xyz**2)
+ if len2 < identity_thresh**2:
+ # if vec is nearly 0,0,0, this is an identity rotation
+ return np.array([1.0, 0, 0]), 0.0
+ # Make sure w is not slightly above 1 or below -1
+ theta = 2 * math.acos(max(min(quat[0], 1), -1))
+ return xyz / math.sqrt(len2), theta
+
+
+def quat2euler(quaternion, axes="sxyz"):
+ """Euler angles from `quaternion` for specified axis sequence `axes`
+
+ Parameters
+ ----------
+ q : 4 element sequence
+ w, x, y, z of quaternion
+ axes : str, optional
+ Axis specification; one of 24 axis sequences as string or encoded
+ tuple - e.g. ``sxyz`` (the default).
+
+ Returns
+ -------
+ ai : float
+ First rotation angle (according to `axes`).
+ aj : float
+ Second rotation angle (according to `axes`).
+ ak : float
+ Third rotation angle (according to `axes`).
+
+ Examples
+ --------
+ >>> angles = quat2euler([0.99810947, 0.06146124, 0, 0])
+ >>> np.allclose(angles, [0.123, 0, 0])
+ True
+ """
+ return mat2euler(quat2mat(quaternion), axes)
\ No newline at end of file
diff --git a/simpler_env/policies/lerobotpi/pi0_or_fast.py b/simpler_env/policies/lerobotpi/pi0_or_fast.py
new file mode 100644
index 00000000..97e29227
--- /dev/null
+++ b/simpler_env/policies/lerobotpi/pi0_or_fast.py
@@ -0,0 +1,281 @@
+from collections import deque
+import os
+from typing import List, Optional, Sequence
+
+import cv2 as cv
+import matplotlib.pyplot as plt
+import numpy as np
+from PIL import Image
+import torch
+from transforms3d.euler import euler2axangle
+
+from simpler_env.utils.action.action_ensemble import ActionEnsembler
+
+from .geometry import mat2euler, quat2mat
+
+
+def auto_model_fn(path):
+ import sys
+
+ sys.path.append(path) # noqa
+ from lerobot.policies.pi0.modeling_pi0 import PI0Policy
+
+ return PI0Policy
+
+
+class LerobotPiFastInference:
+ def __init__(
+ self,
+ saved_model_path: str = "pretrained/pi0",
+ unnorm_key: Optional[str] = None,
+ policy_setup: str = "widowx_bridge",
+ exec_horizon: int = 4,
+ image_size: list[int] = [224, 224],
+ action_scale: float = 1.0,
+ action_ensemble_temp: float = -0.8,
+ ) -> None:
+ gpu_idx = os.environ.get("GPU_IDX", 0)
+ self.device = f"cuda:{gpu_idx}"
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
+ if policy_setup == "widowx_bridge":
+ unnorm_key = "bridge_orig/1.0.0" if unnorm_key is None else unnorm_key
+ action_ensemble = True
+ self.sticky_gripper_num_repeat = 1
+ # EE pose in Bridge data was relative to a top-down pose, instead of robot base
+ self.default_rot = np.array(
+ [[0, 0, 1.0], [0, 1.0, 0], [-1.0, 0, 0]]
+ ) # https://github.com/rail-berkeley/bridge_data_robot/blob/b841131ecd512bafb303075bd8f8b677e0bf9f1f/widowx_envs/widowx_controller/src/widowx_controller/widowx_controller.py#L203
+ elif policy_setup == "google_robot":
+ unnorm_key = "fractal20220817_data/0.1.0" if unnorm_key is None else unnorm_key
+ action_ensemble = True
+ self.sticky_gripper_num_repeat = 10
+ else:
+ raise NotImplementedError(
+ f"Policy setup {policy_setup} not supported for octo models. The other datasets can be found in the huggingface config.json file."
+ )
+ self.policy_setup = policy_setup
+ self.unnorm_key = unnorm_key
+
+ print(f"*** policy_setup: {policy_setup}, unnorm_key: {unnorm_key} ***")
+
+ # TODO: add pi0 loading ...
+ PI0Policy = auto_model_fn(saved_model_path)
+ print(f"DADSADADAS: {torch.cuda.is_available()}")
+ self.vla = PI0Policy.from_pretrained(saved_model_path)
+ self.vla.to(self.device)
+ self.vla.reset()
+
+ self.image_size = image_size
+ self.action_scale = action_scale
+ self.obs_horizon = 1
+ self.obs_interval = 1
+ self.pred_action_horizon = 5
+ self.image_history = deque(maxlen=self.obs_horizon)
+ self.exec_horizon = exec_horizon
+
+ self.sticky_action_is_on = False
+ self.gripper_action_repeat = 0
+ self.sticky_gripper_action = 0.0
+ self.previous_gripper_action = None
+
+ self.action_ensemble = action_ensemble
+ self.action_ensemble_temp = action_ensemble_temp
+
+ if self.action_ensemble:
+ self.action_ensembler = ActionEnsembler(self.pred_action_horizon, self.action_ensemble_temp)
+ else:
+ self.action_ensembler = None
+
+ self.task = None
+ self.task_description = None
+
+ def reset(self, task_description: str) -> None:
+ self.image_history.clear()
+ if self.action_ensemble:
+ self.action_ensembler.reset()
+ self.task_description = task_description
+ self.sticky_action_is_on = False
+ self.gripper_action_repeat = 0
+ self.sticky_gripper_action = 0.0
+ self.previous_gripper_action = None
+ self.action_plan = deque()
+
+ def preprocess_widowx_proprio(self, eef_pos) -> np.array:
+ """convert ee rotation to the frame of top-down
+ https://github.com/allenzren/open-pi-zero/blob/c3df7fb062175c16f69d7ca4ce042958ea238fb7/src/agent/env_adapter/simpler.py#L167
+ """
+ # StateEncoding.POS_EULER: xyz + rpy + pad + gripper(openness)
+ proprio = eef_pos
+ rm_bridge = quat2mat(proprio[3:7])
+ rpy_bridge_converted = mat2euler(rm_bridge @ self.default_rot.T)
+ gripper_openness = proprio[7] # from simpler, 0 for close, 1 for open
+ raw_proprio = np.concatenate(
+ [
+ proprio[:3],
+ rpy_bridge_converted,
+ np.zeros(1),
+ [gripper_openness],
+ ]
+ )
+ return raw_proprio
+
+ def preprocess_google_robot_proprio(self, eef_pos) -> np.array:
+ """convert wxyz quat from simpler to xyzw used in fractal
+ https://github.com/allenzren/open-pi-zero/blob/c3df7fb062175c16f69d7ca4ce042958ea238fb7/src/agent/env_adapter/simpler.py#L204
+ """
+ # StateEncoding.POS_QUAT: xyz + q_xyzw + gripper(closeness)
+ quat_xyzw = np.roll(eef_pos[3:7], -1)
+ gripper_width = eef_pos[7] # from simpler, 0 for close, 1 for open
+ # need invert as the training data comes from closeness
+ gripper_closedness = (
+ 1 - gripper_width
+ ) # TODO(allenzren): change fractal data processing in training so also use gripper openness in proprio (as in bridge) instead of closedness
+ raw_proprio = np.concatenate(
+ (
+ eef_pos[:3],
+ quat_xyzw,
+ [gripper_closedness],
+ )
+ )
+ return raw_proprio
+
+ def step(self, image: np.ndarray, task_description: Optional[str] = None, *args, **kwargs) -> tuple[dict[str, np.ndarray], dict[str, np.ndarray]]:
+ """
+ Input:
+ image: np.ndarray of shape (H, W, 3), uint8
+ task_description: Optional[str], task description; if different from previous task description, policy state is reset
+ Output:
+ raw_action: dict; raw policy action output
+ action: dict; processed action to be sent to the maniskill2 environment, with the following keys:
+ - 'world_vector': np.ndarray of shape (3,), xyz translation of robot end-effector
+ - 'rot_axangle': np.ndarray of shape (3,), axis-angle representation of end-effector rotation
+ - 'gripper': np.ndarray of shape (1,), gripper action
+ - 'terminate_episode': np.ndarray of shape (1,), 1 if episode should be terminated, 0 otherwise
+ """
+ if task_description is not None:
+ if task_description != self.task_description:
+ self.reset(task_description)
+
+ assert image.dtype == np.uint8
+ image = self._resize_image(image)
+ self._add_image_to_history(image)
+ images: List[Image.Image] = self._obtain_image_history()
+
+ eef_pos = kwargs.get("eef_pos", None)
+ if self.policy_setup == "widowx_bridge":
+ state = self.preprocess_widowx_proprio(eef_pos)
+ image_key = "observation.images.image_0"
+ elif self.policy_setup == "google_robot":
+ state = self.preprocess_google_robot_proprio(eef_pos)
+ image_key = "observation.images.image"
+
+ # if self.action_ensemble:
+ # raw_actions = self.action_ensembler.ensemble_action(raw_actions)[None]
+
+ if not self.action_plan:
+ observation = {
+ "observation.state": torch.from_numpy(state).unsqueeze(0).to(self.device).float(),
+ image_key: torch.from_numpy(images[0] / 255).permute(2, 0, 1).unsqueeze(0).to(self.device).float(),
+ "task": [task_description],
+ }
+
+ # model output gripper action, +1 = open, 0 = close
+ action_chunk = self.vla.select_action(observation)[0][: self.pred_action_horizon].cpu().numpy()
+ self.action_plan.extend(action_chunk[: self.exec_horizon])
+
+ raw_actions = self.action_plan.popleft()
+
+ raw_action = {
+ "world_vector": np.array(raw_actions[:3]),
+ "rotation_delta": np.array(raw_actions[3:6]),
+ "open_gripper": np.array(raw_actions[6:7]), # range [0, 1]; 1 = open; 0 = close
+ }
+
+ # process raw_action to obtain the action to be sent to the maniskill2 environment
+ action = {}
+ action["world_vector"] = raw_action["world_vector"] * self.action_scale
+ action_rotation_delta = np.asarray(raw_action["rotation_delta"], dtype=np.float64)
+ roll, pitch, yaw = action_rotation_delta
+ action_rotation_ax, action_rotation_angle = euler2axangle(roll, pitch, yaw)
+ action_rotation_axangle = action_rotation_ax * action_rotation_angle
+ action["rot_axangle"] = action_rotation_axangle * self.action_scale
+
+ if self.policy_setup == "google_robot":
+ action["gripper"] = 0
+ current_gripper_action = raw_action["open_gripper"]
+ if self.previous_gripper_action is None:
+ relative_gripper_action = np.array([0])
+ self.previous_gripper_action = current_gripper_action
+ else:
+ relative_gripper_action = self.previous_gripper_action - current_gripper_action
+
+ # fix a bug in the SIMPLER code here
+ # self.previous_gripper_action = current_gripper_action
+
+ if np.abs(relative_gripper_action) > 0.5 and (not self.sticky_action_is_on):
+ self.sticky_action_is_on = True
+ self.sticky_gripper_action = relative_gripper_action
+ self.previous_gripper_action = current_gripper_action
+
+ if self.sticky_action_is_on:
+ self.gripper_action_repeat += 1
+ relative_gripper_action = self.sticky_gripper_action
+
+ if self.gripper_action_repeat == self.sticky_gripper_num_repeat:
+ self.sticky_action_is_on = False
+ self.gripper_action_repeat = 0
+ self.sticky_gripper_action = 0.0
+
+ action["gripper"] = relative_gripper_action
+
+ elif self.policy_setup == "widowx_bridge":
+ action["gripper"] = 2.0 * (raw_action["open_gripper"] > 0.5) - 1.0
+
+ action["terminate_episode"] = np.array([0.0])
+ return raw_action, action
+
+ def _resize_image(self, image: np.ndarray) -> np.ndarray:
+ image = cv.resize(image, tuple(self.image_size), interpolation=cv.INTER_AREA)
+ return image
+
+ def _add_image_to_history(self, image: np.ndarray) -> None:
+ if len(self.image_history) == 0:
+ self.image_history.extend([image] * self.obs_horizon)
+ else:
+ self.image_history.append(image)
+
+ def _obtain_image_history(self) -> List[Image.Image]:
+ image_history = list(self.image_history)
+ images = image_history[:: self.obs_interval]
+ # images = [Image.fromarray(image).convert("RGB") for image in images]
+ return images
+
+ def visualize_epoch(
+ self,
+ predicted_raw_actions: Sequence[np.ndarray],
+ images: Sequence[np.ndarray],
+ save_path: str,
+ ) -> None:
+ images = [self._resize_image(image) for image in images]
+ ACTION_DIM_LABELS = ["x", "y", "z", "roll", "pitch", "yaw", "grasp"]
+
+ img_strip = np.concatenate(np.array(images[::3]), axis=1)
+
+ # set up plt figure
+ figure_layout = [["image"] * len(ACTION_DIM_LABELS), ACTION_DIM_LABELS]
+ plt.rcParams.update({"font.size": 12})
+ fig, axs = plt.subplot_mosaic(figure_layout)
+ fig.set_size_inches([45, 10])
+
+ # plot actions
+ pred_actions = np.array([np.concatenate([a["world_vector"], a["rotation_delta"], a["open_gripper"]], axis=-1) for a in predicted_raw_actions])
+ for action_dim, action_label in enumerate(ACTION_DIM_LABELS):
+ # actions have batch, horizon, dim, in this example we just take the first action for simplicity
+ axs[action_label].plot(pred_actions[:, action_dim], label="predicted action")
+ axs[action_label].set_title(action_label)
+ axs[action_label].set_xlabel("Time in one episode")
+
+ axs["image"].imshow(img_strip)
+ axs["image"].set_xlabel("Time in one episode (subsampled)")
+ plt.legend()
+ plt.savefig(save_path)
diff --git a/simpler_env/policies/octo/octo_model.py b/simpler_env/policies/octo/octo_model.py
index 92110985..e05a51e0 100644
--- a/simpler_env/policies/octo/octo_model.py
+++ b/simpler_env/policies/octo/octo_model.py
@@ -57,7 +57,9 @@ def __init__(
self.action_std = self.model.dataset_statistics[dataset_id]["action"]["std"]
else:
raise NotImplementedError()
-
+ print(
+ f"*** policy setup: {policy_setup}, dataset_id: {dataset_id}, model_type: {model_type}, mean {self.action_mean}, std {self.action_std} ***"
+ )
self.image_size = image_size
self.action_scale = action_scale
self.horizon = horizon
@@ -127,7 +129,9 @@ def reset(self, task_description: str) -> None:
# self.gripper_is_closed = False
self.previous_gripper_action = None
- def step(self, image: np.ndarray, task_description: Optional[str] = None, *args, **kwargs) -> tuple[dict[str, np.ndarray], dict[str, np.ndarray]]:
+ def step(
+ self, image: np.ndarray, task_description: Optional[str] = None, *args, **kwargs
+ ) -> tuple[dict[str, np.ndarray], dict[str, np.ndarray]]:
"""
Input:
image: np.ndarray of shape (H, W, 3), uint8
@@ -241,7 +245,9 @@ def step(self, image: np.ndarray, task_description: Optional[str] = None, *args,
return raw_action, action
- def visualize_epoch(self, predicted_raw_actions: Sequence[np.ndarray], images: Sequence[np.ndarray], save_path: str) -> None:
+ def visualize_epoch(
+ self, predicted_raw_actions: Sequence[np.ndarray], images: Sequence[np.ndarray], save_path: str
+ ) -> None:
images = [self._resize_image(image) for image in images]
ACTION_DIM_LABELS = ["x", "y", "z", "roll", "pitch", "yaw", "grasp"]
diff --git a/simpler_env/policies/openpi/geometry.py b/simpler_env/policies/openpi/geometry.py
new file mode 100644
index 00000000..f1e7164a
--- /dev/null
+++ b/simpler_env/policies/openpi/geometry.py
@@ -0,0 +1,463 @@
+"""
+Mostly copied from transforms3d library
+
+"""
+
+import math
+
+import numpy as np
+
+_FLOAT_EPS = np.finfo(np.float64).eps
+
+# axis sequences for Euler angles
+_NEXT_AXIS = [1, 2, 0, 1]
+
+# map axes strings to/from tuples of inner axis, parity, repetition, frame
+_AXES2TUPLE = {
+ "sxyz": (0, 0, 0, 0),
+ "sxyx": (0, 0, 1, 0),
+ "sxzy": (0, 1, 0, 0),
+ "sxzx": (0, 1, 1, 0),
+ "syzx": (1, 0, 0, 0),
+ "syzy": (1, 0, 1, 0),
+ "syxz": (1, 1, 0, 0),
+ "syxy": (1, 1, 1, 0),
+ "szxy": (2, 0, 0, 0),
+ "szxz": (2, 0, 1, 0),
+ "szyx": (2, 1, 0, 0),
+ "szyz": (2, 1, 1, 0),
+ "rzyx": (0, 0, 0, 1),
+ "rxyx": (0, 0, 1, 1),
+ "ryzx": (0, 1, 0, 1),
+ "rxzx": (0, 1, 1, 1),
+ "rxzy": (1, 0, 0, 1),
+ "ryzy": (1, 0, 1, 1),
+ "rzxy": (1, 1, 0, 1),
+ "ryxy": (1, 1, 1, 1),
+ "ryxz": (2, 0, 0, 1),
+ "rzxz": (2, 0, 1, 1),
+ "rxyz": (2, 1, 0, 1),
+ "rzyz": (2, 1, 1, 1),
+}
+
+_TUPLE2AXES = dict((v, k) for k, v in _AXES2TUPLE.items())
+
+# For testing whether a number is close to zero
+_EPS4 = np.finfo(float).eps * 4.0
+
+
+def mat2euler(mat, axes="sxyz"):
+ """Return Euler angles from rotation matrix for specified axis sequence.
+
+ Note that many Euler angle triplets can describe one matrix.
+
+ Parameters
+ ----------
+ mat : array-like shape (3, 3) or (4, 4)
+ Rotation matrix or affine.
+ axes : str, optional
+ Axis specification; one of 24 axis sequences as string or encoded
+ tuple - e.g. ``sxyz`` (the default).
+
+ Returns
+ -------
+ ai : float
+ First rotation angle (according to `axes`).
+ aj : float
+ Second rotation angle (according to `axes`).
+ ak : float
+ Third rotation angle (according to `axes`).
+
+ Examples
+ --------
+ >>> R0 = euler2mat(1, 2, 3, 'syxz')
+ >>> al, be, ga = mat2euler(R0, 'syxz')
+ >>> R1 = euler2mat(al, be, ga, 'syxz')
+ >>> np.allclose(R0, R1)
+ True
+ """
+ try:
+ firstaxis, parity, repetition, frame = _AXES2TUPLE[axes.lower()]
+ except (AttributeError, KeyError):
+ _TUPLE2AXES[axes] # validation
+ firstaxis, parity, repetition, frame = axes
+
+ i = firstaxis
+ j = _NEXT_AXIS[i + parity]
+ k = _NEXT_AXIS[i - parity + 1]
+
+ M = np.array(mat, dtype=np.float64, copy=False)[:3, :3]
+ if repetition:
+ sy = math.sqrt(M[i, j] * M[i, j] + M[i, k] * M[i, k])
+ if sy > _EPS4:
+ ax = math.atan2(M[i, j], M[i, k])
+ ay = math.atan2(sy, M[i, i])
+ az = math.atan2(M[j, i], -M[k, i])
+ else:
+ ax = math.atan2(-M[j, k], M[j, j])
+ ay = math.atan2(sy, M[i, i])
+ az = 0.0
+ else:
+ cy = math.sqrt(M[i, i] * M[i, i] + M[j, i] * M[j, i])
+ if cy > _EPS4:
+ ax = math.atan2(M[k, j], M[k, k])
+ ay = math.atan2(-M[k, i], cy)
+ az = math.atan2(M[j, i], M[i, i])
+ else:
+ ax = math.atan2(-M[j, k], M[j, j])
+ ay = math.atan2(-M[k, i], cy)
+ az = 0.0
+
+ if parity:
+ ax, ay, az = -ax, -ay, -az
+ if frame:
+ ax, az = az, ax
+ return ax, ay, az
+
+
+def quat2mat(q):
+ """Calculate rotation matrix corresponding to quaternion
+
+ Parameters
+ ----------
+ q : 4 element array-like
+
+ Returns
+ -------
+ M : (3,3) array
+ Rotation matrix corresponding to input quaternion *q*
+
+ Notes
+ -----
+ Rotation matrix applies to column vectors, and is applied to the
+ left of coordinate vectors. The algorithm here allows quaternions that
+ have not been normalized.
+
+ References
+ ----------
+ Algorithm from http://en.wikipedia.org/wiki/Rotation_matrix#Quaternion
+
+ Examples
+ --------
+ >>> import numpy as np
+ >>> M = quat2mat([1, 0, 0, 0]) # Identity quaternion
+ >>> np.allclose(M, np.eye(3))
+ True
+ >>> M = quat2mat([0, 1, 0, 0]) # 180 degree rotn around axis 0
+ >>> np.allclose(M, np.diag([1, -1, -1]))
+ True
+ """
+ w, x, y, z = q
+ Nq = w * w + x * x + y * y + z * z
+ if Nq < _FLOAT_EPS:
+ return np.eye(3)
+ s = 2.0 / Nq
+ X = x * s
+ Y = y * s
+ Z = z * s
+ wX = w * X
+ wY = w * Y
+ wZ = w * Z
+ xX = x * X
+ xY = x * Y
+ xZ = x * Z
+ yY = y * Y
+ yZ = y * Z
+ zZ = z * Z
+ return np.array(
+ [
+ [1.0 - (yY + zZ), xY - wZ, xZ + wY],
+ [xY + wZ, 1.0 - (xX + zZ), yZ - wX],
+ [xZ - wY, yZ + wX, 1.0 - (xX + yY)],
+ ]
+ )
+
+
+# Checks if a matrix is a valid rotation matrix.
+def isrotation(
+ R: np.ndarray,
+ thresh=1e-6,
+) -> bool:
+ Rt = np.transpose(R)
+ shouldBeIdentity = np.dot(Rt, R)
+ iden = np.identity(3, dtype=R.dtype)
+ n = np.linalg.norm(iden - shouldBeIdentity)
+ return n < thresh
+
+
+def euler2mat(ai, aj, ak, axes="sxyz"):
+ """Return rotation matrix from Euler angles and axis sequence.
+
+ Parameters
+ ----------
+ ai : float
+ First rotation angle (according to `axes`).
+ aj : float
+ Second rotation angle (according to `axes`).
+ ak : float
+ Third rotation angle (according to `axes`).
+ axes : str, optional
+ Axis specification; one of 24 axis sequences as string or encoded
+ tuple - e.g. ``sxyz`` (the default).
+
+ Returns
+ -------
+ mat : array (3, 3)
+ Rotation matrix or affine.
+
+ Examples
+ --------
+ >>> R = euler2mat(1, 2, 3, 'syxz')
+ >>> np.allclose(np.sum(R[0]), -1.34786452)
+ True
+ >>> R = euler2mat(1, 2, 3, (0, 1, 0, 1))
+ >>> np.allclose(np.sum(R[0]), -0.383436184)
+ True
+ """
+ try:
+ firstaxis, parity, repetition, frame = _AXES2TUPLE[axes]
+ except (AttributeError, KeyError):
+ _TUPLE2AXES[axes] # validation
+ firstaxis, parity, repetition, frame = axes
+
+ i = firstaxis
+ j = _NEXT_AXIS[i + parity]
+ k = _NEXT_AXIS[i - parity + 1]
+
+ if frame:
+ ai, ak = ak, ai
+ if parity:
+ ai, aj, ak = -ai, -aj, -ak
+
+ si, sj, sk = math.sin(ai), math.sin(aj), math.sin(ak)
+ ci, cj, ck = math.cos(ai), math.cos(aj), math.cos(ak)
+ cc, cs = ci * ck, ci * sk
+ sc, ss = si * ck, si * sk
+
+ M = np.eye(3)
+ if repetition:
+ M[i, i] = cj
+ M[i, j] = sj * si
+ M[i, k] = sj * ci
+ M[j, i] = sj * sk
+ M[j, j] = -cj * ss + cc
+ M[j, k] = -cj * cs - sc
+ M[k, i] = -sj * ck
+ M[k, j] = cj * sc + cs
+ M[k, k] = cj * cc - ss
+ else:
+ M[i, i] = cj * ck
+ M[i, j] = sj * sc - cs
+ M[i, k] = sj * cc + ss
+ M[j, i] = cj * sk
+ M[j, j] = sj * ss + cc
+ M[j, k] = sj * cs - sc
+ M[k, i] = -sj
+ M[k, j] = cj * si
+ M[k, k] = cj * ci
+ return M
+
+
+def euler2axangle(ai, aj, ak, axes="sxyz"):
+ """Return angle, axis corresponding to Euler angles, axis specification
+
+ Parameters
+ ----------
+ ai : float
+ First rotation angle (according to `axes`).
+ aj : float
+ Second rotation angle (according to `axes`).
+ ak : float
+ Third rotation angle (according to `axes`).
+ axes : str, optional
+ Axis specification; one of 24 axis sequences as string or encoded
+ tuple - e.g. ``sxyz`` (the default).
+
+ Returns
+ -------
+ vector : array shape (3,)
+ axis around which rotation occurs
+ theta : scalar
+ angle of rotation
+
+ Examples
+ --------
+ >>> vec, theta = euler2axangle(0, 1.5, 0, 'szyx')
+ >>> np.allclose(vec, [0, 1, 0])
+ True
+ >>> theta
+ 1.5
+ """
+ return quat2axangle(euler2quat(ai, aj, ak, axes))
+
+
+def euler2quat(ai, aj, ak, axes="sxyz"):
+ """Return `quaternion` from Euler angles and axis sequence `axes`
+
+ Parameters
+ ----------
+ ai : float
+ First rotation angle (according to `axes`).
+ aj : float
+ Second rotation angle (according to `axes`).
+ ak : float
+ Third rotation angle (according to `axes`).
+ axes : str, optional
+ Axis specification; one of 24 axis sequences as string or encoded
+ tuple - e.g. ``sxyz`` (the default).
+
+ Returns
+ -------
+ quat : array shape (4,)
+ Quaternion in w, x, y z (real, then vector) format
+
+ Examples
+ --------
+ >>> q = euler2quat(1, 2, 3, 'ryxz')
+ >>> np.allclose(q, [0.435953, 0.310622, -0.718287, 0.444435])
+ True
+ """
+ try:
+ firstaxis, parity, repetition, frame = _AXES2TUPLE[axes.lower()]
+ except (AttributeError, KeyError):
+ _TUPLE2AXES[axes] # validation
+ firstaxis, parity, repetition, frame = axes
+
+ i = firstaxis + 1
+ j = _NEXT_AXIS[i + parity - 1] + 1
+ k = _NEXT_AXIS[i - parity] + 1
+
+ if frame:
+ ai, ak = ak, ai
+ if parity:
+ aj = -aj
+
+ ai = ai / 2.0
+ aj = aj / 2.0
+ ak = ak / 2.0
+ ci = math.cos(ai)
+ si = math.sin(ai)
+ cj = math.cos(aj)
+ sj = math.sin(aj)
+ ck = math.cos(ak)
+ sk = math.sin(ak)
+ cc = ci * ck
+ cs = ci * sk
+ sc = si * ck
+ ss = si * sk
+
+ q = np.empty((4,))
+ if repetition:
+ q[0] = cj * (cc - ss)
+ q[i] = cj * (cs + sc)
+ q[j] = sj * (cc + ss)
+ q[k] = sj * (cs - sc)
+ else:
+ q[0] = cj * cc + sj * ss
+ q[i] = cj * sc - sj * cs
+ q[j] = cj * ss + sj * cc
+ q[k] = cj * cs - sj * sc
+ if parity:
+ q[j] *= -1.0
+
+ return q
+
+
+def quat2axangle(quat, identity_thresh=None):
+ """Convert quaternion to rotation of angle around axis
+
+ Parameters
+ ----------
+ quat : 4 element sequence
+ w, x, y, z forming quaternion.
+ identity_thresh : None or scalar, optional
+ Threshold below which the norm of the vector part of the quaternion (x,
+ y, z) is deemed to be 0, leading to the identity rotation. None (the
+ default) leads to a threshold estimated based on the precision of the
+ input.
+
+ Returns
+ -------
+ theta : scalar
+ angle of rotation.
+ vector : array shape (3,)
+ axis around which rotation occurs.
+
+ Examples
+ --------
+ >>> vec, theta = quat2axangle([0, 1, 0, 0])
+ >>> vec
+ array([1., 0., 0.])
+ >>> np.allclose(theta, np.pi)
+ True
+
+ If this is an identity rotation, we return a zero angle and an arbitrary
+ vector:
+
+ >>> quat2axangle([1, 0, 0, 0])
+ (array([1., 0., 0.]), 0.0)
+
+ If any of the quaternion values are not finite, we return a NaN in the
+ angle, and an arbitrary vector:
+
+ >>> quat2axangle([1, np.inf, 0, 0])
+ (array([1., 0., 0.]), nan)
+
+ Notes
+ -----
+ A quaternion for which x, y, z are all equal to 0, is an identity rotation.
+ In this case we return a 0 angle and an arbitrary vector, here [1, 0, 0].
+
+ The algorithm allows for quaternions that have not been normalized.
+ """
+ quat = np.asarray(quat)
+ Nq = np.sum(quat**2)
+ if not np.isfinite(Nq):
+ return np.array([1.0, 0, 0]), float("nan")
+ if identity_thresh is None:
+ try:
+ identity_thresh = np.finfo(Nq.type).eps * 3
+ except (AttributeError, ValueError): # Not a numpy type or not float
+ identity_thresh = _FLOAT_EPS * 3
+ if Nq < _FLOAT_EPS**2: # Results unreliable after normalization
+ return np.array([1.0, 0, 0]), 0.0
+ if Nq != 1: # Normalize if not normalized
+ s = math.sqrt(Nq)
+ quat = quat / s
+ xyz = quat[1:]
+ len2 = np.sum(xyz**2)
+ if len2 < identity_thresh**2:
+ # if vec is nearly 0,0,0, this is an identity rotation
+ return np.array([1.0, 0, 0]), 0.0
+ # Make sure w is not slightly above 1 or below -1
+ theta = 2 * math.acos(max(min(quat[0], 1), -1))
+ return xyz / math.sqrt(len2), theta
+
+
+def quat2euler(quaternion, axes="sxyz"):
+ """Euler angles from `quaternion` for specified axis sequence `axes`
+
+ Parameters
+ ----------
+ q : 4 element sequence
+ w, x, y, z of quaternion
+ axes : str, optional
+ Axis specification; one of 24 axis sequences as string or encoded
+ tuple - e.g. ``sxyz`` (the default).
+
+ Returns
+ -------
+ ai : float
+ First rotation angle (according to `axes`).
+ aj : float
+ Second rotation angle (according to `axes`).
+ ak : float
+ Third rotation angle (according to `axes`).
+
+ Examples
+ --------
+ >>> angles = quat2euler([0.99810947, 0.06146124, 0, 0])
+ >>> np.allclose(angles, [0.123, 0, 0])
+ True
+ """
+ return mat2euler(quat2mat(quaternion), axes)
\ No newline at end of file
diff --git a/simpler_env/policies/openpi/pi0_or_fast.py b/simpler_env/policies/openpi/pi0_or_fast.py
new file mode 100644
index 00000000..98397f66
--- /dev/null
+++ b/simpler_env/policies/openpi/pi0_or_fast.py
@@ -0,0 +1,287 @@
+from typing import Optional, Sequence, List
+import os
+import matplotlib.pyplot as plt
+import numpy as np
+from transforms3d.euler import euler2axangle
+from collections import deque
+from PIL import Image
+import torch
+import cv2 as cv
+from simpler_env.utils.action.action_ensemble import ActionEnsembler
+from .geometry import quat2mat, mat2euler
+import numpy as np
+import torch
+from openpi_client import websocket_client_policy as _websocket_client_policy
+
+
+class OpenPiFastInference:
+ def __init__(
+ self,
+ saved_model_path: str = "pretrained/pi0",
+ unnorm_key: Optional[str] = None,
+ policy_setup: str = "widowx_bridge",
+ exec_horizon: int = 4,
+ image_size: list[int] = [224, 224],
+ action_scale: float = 1.0,
+ action_ensemble_temp: float = -0.8,
+ ) -> None:
+ gpu_idx = os.environ.get("GPU_IDX", 0)
+ self.device = f"cuda:{gpu_idx}"
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
+ if policy_setup == "widowx_bridge":
+ unnorm_key = "bridge_orig/1.0.0" if unnorm_key is None else unnorm_key
+ action_ensemble = True
+ self.sticky_gripper_num_repeat = 1
+ # EE pose in Bridge data was relative to a top-down pose, instead of robot base
+ self.default_rot = np.array([[0, 0, 1.0], [0, 1.0, 0], [-1.0, 0, 0]]) # https://github.com/rail-berkeley/bridge_data_robot/blob/b841131ecd512bafb303075bd8f8b677e0bf9f1f/widowx_envs/widowx_controller/src/widowx_controller/widowx_controller.py#L203
+ elif policy_setup == "google_robot":
+ unnorm_key = (
+ "fractal20220817_data/0.1.0" if unnorm_key is None else unnorm_key
+ )
+ action_ensemble = True
+ self.sticky_gripper_num_repeat = 10
+ else:
+ raise NotImplementedError(
+ f"Policy setup {policy_setup} not supported for octo models. The other datasets can be found in the huggingface config.json file."
+ )
+ self.policy_setup = policy_setup
+ self.unnorm_key = unnorm_key
+
+ print(f"*** policy_setup: {policy_setup}, unnorm_key: {unnorm_key} ***")
+
+ # TODO: add pi-fast loading ...
+ host: str = "0.0.0.0"
+ port: int = 8000
+ self.policy_client = _websocket_client_policy.WebsocketClientPolicy(host, port)
+
+ self.image_size = image_size
+ self.action_scale = action_scale
+ self.obs_horizon = 1
+ self.obs_interval = 1
+ self.pred_action_horizon = 5
+ self.image_history = deque(maxlen=self.obs_horizon)
+ self.exec_horizon = exec_horizon
+
+ self.sticky_action_is_on = False
+ self.gripper_action_repeat = 0
+ self.sticky_gripper_action = 0.0
+ self.previous_gripper_action = None
+
+ self.action_ensemble = action_ensemble
+ self.action_ensemble_temp = action_ensemble_temp
+
+ if self.action_ensemble:
+ self.action_ensembler = ActionEnsembler(
+ self.pred_action_horizon, self.action_ensemble_temp
+ )
+ else:
+ self.action_ensembler = None
+
+ self.task = None
+ self.task_description = None
+
+ def reset(self, task_description: str) -> None:
+ self.image_history.clear()
+ if self.action_ensemble:
+ self.action_ensembler.reset()
+ self.task_description = task_description
+ self.sticky_action_is_on = False
+ self.gripper_action_repeat = 0
+ self.sticky_gripper_action = 0.0
+ self.previous_gripper_action = None
+ self.action_plan = deque()
+
+ def preprocess_widowx_proprio(self, eef_pos) -> np.array:
+ """convert ee rotation to the frame of top-down
+ https://github.com/allenzren/open-pi-zero/blob/c3df7fb062175c16f69d7ca4ce042958ea238fb7/src/agent/env_adapter/simpler.py#L167
+ """
+ # StateEncoding.POS_EULER: xyz + rpy + pad + gripper(openness)
+ proprio = eef_pos
+ rm_bridge = quat2mat(proprio[3:7])
+ rpy_bridge_converted = mat2euler(rm_bridge @ self.default_rot.T)
+ gripper_openness = proprio[7] # from simpler, 0 for close, 1 for open
+ raw_proprio = np.concatenate(
+ [
+ proprio[:3],
+ rpy_bridge_converted,
+ np.zeros(1),
+ [gripper_openness],
+ ]
+ )
+ return raw_proprio
+
+ def preprocess_google_robot_proprio(self, eef_pos) -> np.array:
+ """convert wxyz quat from simpler to xyzw used in fractal
+ https://github.com/allenzren/open-pi-zero/blob/c3df7fb062175c16f69d7ca4ce042958ea238fb7/src/agent/env_adapter/simpler.py#L204
+ """
+ # StateEncoding.POS_QUAT: xyz + q_xyzw + gripper(closeness)
+ quat_xyzw = np.roll(eef_pos[3:7], -1)
+ gripper_width = eef_pos[
+ 7
+ ] # from simpler, 0 for close, 1 for open
+ # need invert as the training data comes from closeness
+ gripper_closedness = (
+ 1 - gripper_width
+ ) # TODO(allenzren): change fractal data processing in training so also use gripper openness in proprio (as in bridge) instead of closedness
+ raw_proprio = np.concatenate(
+ (
+ eef_pos[:3],
+ quat_xyzw,
+ [gripper_closedness],
+ )
+ )
+ return raw_proprio
+
+ def step(
+ self, image: np.ndarray, task_description: Optional[str] = None, *args, **kwargs
+ ) -> tuple[dict[str, np.ndarray], dict[str, np.ndarray]]:
+ """
+ Input:
+ image: np.ndarray of shape (H, W, 3), uint8
+ task_description: Optional[str], task description; if different from previous task description, policy state is reset
+ Output:
+ raw_action: dict; raw policy action output
+ action: dict; processed action to be sent to the maniskill2 environment, with the following keys:
+ - 'world_vector': np.ndarray of shape (3,), xyz translation of robot end-effector
+ - 'rot_axangle': np.ndarray of shape (3,), axis-angle representation of end-effector rotation
+ - 'gripper': np.ndarray of shape (1,), gripper action
+ - 'terminate_episode': np.ndarray of shape (1,), 1 if episode should be terminated, 0 otherwise
+ """
+ if task_description is not None:
+ if task_description != self.task_description:
+ self.reset(task_description)
+
+ assert image.dtype == np.uint8
+ image = self._resize_image(image)
+ self._add_image_to_history(image)
+ images: List[Image.Image] = self._obtain_image_history()
+
+ eef_pos = kwargs.get("eef_pos", None)
+ if self.policy_setup == "widowx_bridge":
+ state = self.preprocess_widowx_proprio(eef_pos)
+ image_key = "observation.images.image_0"
+ elif self.policy_setup == "google_robot":
+ state = self.preprocess_google_robot_proprio(eef_pos)
+ image_key = "observation.images.image"
+
+ # if self.action_ensemble:
+ # raw_actions = self.action_ensembler.ensemble_action(raw_actions)[None]
+
+ if not self.action_plan:
+ observation = {
+ "observation/state": state,
+ "observation/primary_image": np.array(images[0]),
+ "prompt": task_description,
+ }
+ action_chunk = self.policy_client.infer(observation)["actions"][:self.pred_action_horizon]
+ self.action_plan.extend(action_chunk[: self.exec_horizon])
+
+ raw_actions = self.action_plan.popleft()
+
+ raw_action = {
+ "world_vector": np.array(raw_actions[:3]),
+ "rotation_delta": np.array(raw_actions[3:6]),
+ "open_gripper": np.array(
+ raw_actions[6:7]
+ ), # range [0, 1]; 1 = open; 0 = close
+ }
+
+ # process raw_action to obtain the action to be sent to the maniskill2 environment
+ action = {}
+ action["world_vector"] = raw_action["world_vector"] * self.action_scale
+ action_rotation_delta = np.asarray(
+ raw_action["rotation_delta"], dtype=np.float64
+ )
+ roll, pitch, yaw = action_rotation_delta
+ action_rotation_ax, action_rotation_angle = euler2axangle(roll, pitch, yaw)
+ action_rotation_axangle = action_rotation_ax * action_rotation_angle
+ action["rot_axangle"] = action_rotation_axangle * self.action_scale
+
+ if self.policy_setup == "google_robot":
+ action["gripper"] = 0
+ current_gripper_action = raw_action["open_gripper"]
+ if self.previous_gripper_action is None:
+ relative_gripper_action = np.array([0])
+ self.previous_gripper_action = current_gripper_action
+ else:
+ relative_gripper_action = self.previous_gripper_action - current_gripper_action
+
+ # fix a bug in the SIMPLER code here
+ # self.previous_gripper_action = current_gripper_action
+
+ if np.abs(relative_gripper_action) > 0.5 and (not self.sticky_action_is_on):
+ self.sticky_action_is_on = True
+ self.sticky_gripper_action = relative_gripper_action
+ self.previous_gripper_action = current_gripper_action
+
+ if self.sticky_action_is_on:
+ self.gripper_action_repeat += 1
+ relative_gripper_action = self.sticky_gripper_action
+
+ if self.gripper_action_repeat == self.sticky_gripper_num_repeat:
+ self.sticky_action_is_on = False
+ self.gripper_action_repeat = 0
+ self.sticky_gripper_action = 0.0
+
+ action["gripper"] = relative_gripper_action
+
+ elif self.policy_setup == "widowx_bridge":
+ action["gripper"] = 2.0 * (raw_action["open_gripper"] > 0.5) - 1.0
+
+ action["terminate_episode"] = np.array([0.0])
+ return raw_action, action
+
+ def _resize_image(self, image: np.ndarray) -> np.ndarray:
+ image = cv.resize(image, tuple(self.image_size), interpolation=cv.INTER_AREA)
+ return image
+
+ def _add_image_to_history(self, image: np.ndarray) -> None:
+ if len(self.image_history) == 0:
+ self.image_history.extend([image] * self.obs_horizon)
+ else:
+ self.image_history.append(image)
+
+ def _obtain_image_history(self) -> List[Image.Image]:
+ image_history = list(self.image_history)
+ images = image_history[:: self.obs_interval]
+ # images = [Image.fromarray(image).convert("RGB") for image in images]
+ return images
+
+ def visualize_epoch(
+ self,
+ predicted_raw_actions: Sequence[np.ndarray],
+ images: Sequence[np.ndarray],
+ save_path: str,
+ ) -> None:
+ images = [self._resize_image(image) for image in images]
+ ACTION_DIM_LABELS = ["x", "y", "z", "roll", "pitch", "yaw", "grasp"]
+
+ img_strip = np.concatenate(np.array(images[::3]), axis=1)
+
+ # set up plt figure
+ figure_layout = [["image"] * len(ACTION_DIM_LABELS), ACTION_DIM_LABELS]
+ plt.rcParams.update({"font.size": 12})
+ fig, axs = plt.subplot_mosaic(figure_layout)
+ fig.set_size_inches([45, 10])
+
+ # plot actions
+ pred_actions = np.array(
+ [
+ np.concatenate(
+ [a["world_vector"], a["rotation_delta"], a["open_gripper"]], axis=-1
+ )
+ for a in predicted_raw_actions
+ ]
+ )
+ for action_dim, action_label in enumerate(ACTION_DIM_LABELS):
+ # actions have batch, horizon, dim, in this example we just take the first action for simplicity
+ axs[action_label].plot(
+ pred_actions[:, action_dim], label="predicted action"
+ )
+ axs[action_label].set_title(action_label)
+ axs[action_label].set_xlabel("Time in one episode")
+
+ axs["image"].imshow(img_strip)
+ axs["image"].set_xlabel("Time in one episode (subsampled)")
+ plt.legend()
+ plt.savefig(save_path)
diff --git a/simpler_env/policies/openvla/openvla_model.py b/simpler_env/policies/openvla/openvla_model.py
new file mode 100644
index 00000000..715c7796
--- /dev/null
+++ b/simpler_env/policies/openvla/openvla_model.py
@@ -0,0 +1,181 @@
+from typing import Optional, Sequence
+import os
+import matplotlib.pyplot as plt
+import numpy as np
+from transforms3d.euler import euler2axangle
+from transformers import AutoModelForVision2Seq, AutoProcessor
+from PIL import Image
+import torch
+import cv2 as cv
+
+
+class OpenVLAInference:
+ def __init__(
+ self,
+ saved_model_path: str = "openvla/openvla-7b",
+ unnorm_key: Optional[str] = None,
+ policy_setup: str = "widowx_bridge",
+ horizon: int = 1,
+ pred_action_horizon: int = 1,
+ exec_horizon: int = 1,
+ image_size: list[int] = [224, 224],
+ action_scale: float = 1.0,
+ ) -> None:
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
+ if policy_setup == "widowx_bridge":
+ unnorm_key = "bridge_orig" if unnorm_key is None else unnorm_key
+ self.sticky_gripper_num_repeat = 1
+ elif policy_setup == "google_robot":
+ unnorm_key = "fractal20220817_data" if unnorm_key is None else unnorm_key
+ self.sticky_gripper_num_repeat = 15
+ else:
+ raise NotImplementedError(
+ f"Policy setup {policy_setup} not supported for octo models. The other datasets can be found in the huggingface config.json file."
+ )
+ self.policy_setup = policy_setup
+ self.unnorm_key = unnorm_key
+
+ print(f"*** policy_setup: {policy_setup}, unnorm_key: {unnorm_key} ***")
+ self.processor = AutoProcessor.from_pretrained(saved_model_path, trust_remote_code=True)
+ self.vla = AutoModelForVision2Seq.from_pretrained(
+ saved_model_path,
+ attn_implementation="flash_attention_2", # [Optional] Requires `flash_attn`
+ torch_dtype=torch.bfloat16,
+ low_cpu_mem_usage=True,
+ trust_remote_code=True,
+ ).cuda()
+
+ self.image_size = image_size
+ self.action_scale = action_scale
+ self.horizon = horizon
+ self.pred_action_horizon = pred_action_horizon
+ self.exec_horizon = exec_horizon
+
+ self.sticky_action_is_on = False
+ self.gripper_action_repeat = 0
+ self.sticky_gripper_action = 0.0
+ self.previous_gripper_action = None
+
+ self.task = None
+ self.task_description = None
+ self.num_image_history = 0
+
+ def reset(self, task_description: str) -> None:
+ self.task_description = task_description
+ self.num_image_history = 0
+
+ self.sticky_action_is_on = False
+ self.gripper_action_repeat = 0
+ self.sticky_gripper_action = 0.0
+ self.previous_gripper_action = None
+
+ def step(
+ self, image: np.ndarray, task_description: Optional[str] = None, *args, **kwargs
+ ) -> tuple[dict[str, np.ndarray], dict[str, np.ndarray]]:
+ """
+ Input:
+ image: np.ndarray of shape (H, W, 3), uint8
+ task_description: Optional[str], task description; if different from previous task description, policy state is reset
+ Output:
+ raw_action: dict; raw policy action output
+ action: dict; processed action to be sent to the maniskill2 environment, with the following keys:
+ - 'world_vector': np.ndarray of shape (3,), xyz translation of robot end-effector
+ - 'rot_axangle': np.ndarray of shape (3,), axis-angle representation of end-effector rotation
+ - 'gripper': np.ndarray of shape (1,), gripper action
+ - 'terminate_episode': np.ndarray of shape (1,), 1 if episode should be terminated, 0 otherwise
+ """
+ if task_description is not None:
+ if task_description != self.task_description:
+ self.reset(task_description)
+
+ assert image.dtype == np.uint8
+ image = self._resize_image(image)
+
+ image: Image.Image = Image.fromarray(image)
+ prompt = task_description
+
+ # predict action (7-dof; un-normalize for bridgev2)
+ inputs = self.processor(prompt, image).to("cuda:0", dtype=torch.bfloat16)
+ raw_actions = self.vla.predict_action(**inputs, unnorm_key=self.unnorm_key, do_sample=False)[None]
+ # print(f"*** raw actions {raw_actions} ***")
+
+ raw_action = {
+ "world_vector": np.array(raw_actions[0, :3]),
+ "rotation_delta": np.array(raw_actions[0, 3:6]),
+ "open_gripper": np.array(raw_actions[0, 6:7]), # range [0, 1]; 1 = open; 0 = close
+ }
+
+ # process raw_action to obtain the action to be sent to the maniskill2 environment
+ action = {}
+ action["world_vector"] = raw_action["world_vector"] * self.action_scale
+ action_rotation_delta = np.asarray(raw_action["rotation_delta"], dtype=np.float64)
+ roll, pitch, yaw = action_rotation_delta
+ action_rotation_ax, action_rotation_angle = euler2axangle(roll, pitch, yaw)
+ action_rotation_axangle = action_rotation_ax * action_rotation_angle
+ action["rot_axangle"] = action_rotation_axangle * self.action_scale
+
+ if self.policy_setup == "google_robot":
+ current_gripper_action = raw_action["open_gripper"]
+ if self.previous_gripper_action is None:
+ relative_gripper_action = np.array([0])
+ else:
+ relative_gripper_action = self.previous_gripper_action - current_gripper_action
+ self.previous_gripper_action = current_gripper_action
+
+ if np.abs(relative_gripper_action) > 0.5 and (not self.sticky_action_is_on):
+ self.sticky_action_is_on = True
+ self.sticky_gripper_action = relative_gripper_action
+
+ if self.sticky_action_is_on:
+ self.gripper_action_repeat += 1
+ relative_gripper_action = self.sticky_gripper_action
+
+ if self.gripper_action_repeat == self.sticky_gripper_num_repeat:
+ self.sticky_action_is_on = False
+ self.gripper_action_repeat = 0
+ self.sticky_gripper_action = 0.0
+
+ action["gripper"] = relative_gripper_action
+
+ elif self.policy_setup == "widowx_bridge":
+ action["gripper"] = 2.0 * (raw_action["open_gripper"] > 0.5) - 1.0
+
+ action["terminate_episode"] = np.array([0.0])
+
+ return raw_action, action
+
+ def _resize_image(self, image: np.ndarray) -> np.ndarray:
+ image = cv.resize(image, tuple(self.image_size), interpolation=cv.INTER_AREA)
+ return image
+
+ def visualize_epoch(
+ self, predicted_raw_actions: Sequence[np.ndarray], images: Sequence[np.ndarray], save_path: str
+ ) -> None:
+ images = [self._resize_image(image) for image in images]
+ ACTION_DIM_LABELS = ["x", "y", "z", "roll", "pitch", "yaw", "grasp"]
+
+ img_strip = np.concatenate(np.array(images[::3]), axis=1)
+
+ # set up plt figure
+ figure_layout = [["image"] * len(ACTION_DIM_LABELS), ACTION_DIM_LABELS]
+ plt.rcParams.update({"font.size": 12})
+ fig, axs = plt.subplot_mosaic(figure_layout)
+ fig.set_size_inches([45, 10])
+
+ # plot actions
+ pred_actions = np.array(
+ [
+ np.concatenate([a["world_vector"], a["rotation_delta"], a["open_gripper"]], axis=-1)
+ for a in predicted_raw_actions
+ ]
+ )
+ for action_dim, action_label in enumerate(ACTION_DIM_LABELS):
+ # actions have batch, horizon, dim, in this example we just take the first action for simplicity
+ axs[action_label].plot(pred_actions[:, action_dim], label="predicted action")
+ axs[action_label].set_title(action_label)
+ axs[action_label].set_xlabel("Time in one episode")
+
+ axs["image"].imshow(img_strip)
+ axs["image"].set_xlabel("Time in one episode (subsampled)")
+ plt.legend()
+ plt.savefig(save_path)
diff --git a/simpler_env/policies/rt1/rt1_model.py b/simpler_env/policies/rt1/rt1_model.py
index c838217b..cf52726d 100644
--- a/simpler_env/policies/rt1/rt1_model.py
+++ b/simpler_env/policies/rt1/rt1_model.py
@@ -116,7 +116,9 @@ def reset(self, task_description: str) -> None:
self._initialize_task_description(task_description)
@staticmethod
- def _small_action_filter_google_robot(raw_action: dict[str, np.ndarray | tf.Tensor], arm_movement: bool = False, gripper: bool = True) -> dict[str, np.ndarray | tf.Tensor]:
+ def _small_action_filter_google_robot(
+ raw_action: dict[str, np.ndarray | tf.Tensor], arm_movement: bool = False, gripper: bool = True
+ ) -> dict[str, np.ndarray | tf.Tensor]:
# small action filtering for google robot
if arm_movement:
raw_action["world_vector"] = tf.where(
@@ -147,7 +149,9 @@ def _small_action_filter_google_robot(raw_action: dict[str, np.ndarray | tf.Tens
)
return raw_action
- def step(self, image: np.ndarray, task_description: Optional[str] = None) -> tuple[dict[str, np.ndarray], dict[str, np.ndarray]]:
+ def step(
+ self, image: np.ndarray, task_description: Optional[str] = None
+ ) -> tuple[dict[str, np.ndarray], dict[str, np.ndarray]]:
"""
Input:
image: np.ndarray of shape (H, W, 3), uint8
@@ -165,7 +169,7 @@ def step(self, image: np.ndarray, task_description: Optional[str] = None) -> tup
# task description has changed; update language embedding
# self._initialize_task_description(task_description)
self.reset(task_description)
-
+
assert image.dtype == np.uint8
image = self._resize_image(image)
self.observation["image"] = image
@@ -229,7 +233,9 @@ def step(self, image: np.ndarray, task_description: Optional[str] = None) -> tup
return raw_action, action
- def visualize_epoch(self, predicted_raw_actions: Sequence[np.ndarray], images: Sequence[np.ndarray], save_path: str) -> None:
+ def visualize_epoch(
+ self, predicted_raw_actions: Sequence[np.ndarray], images: Sequence[np.ndarray], save_path: str
+ ) -> None:
images = [self._resize_image(image) for image in images]
predicted_action_name_to_values_over_time = defaultdict(list)
figure_layout = [
diff --git a/simpler_env/policies/sim_cogact/__init__.py b/simpler_env/policies/sim_cogact/__init__.py
new file mode 100644
index 00000000..43a37cfc
--- /dev/null
+++ b/simpler_env/policies/sim_cogact/__init__.py
@@ -0,0 +1,2 @@
+from .cogact_policy import CogACTInference
+from .adaptive_ensemble import AdaptiveEnsembler
\ No newline at end of file
diff --git a/simpler_env/policies/sim_cogact/adaptive_ensemble.py b/simpler_env/policies/sim_cogact/adaptive_ensemble.py
new file mode 100644
index 00000000..520f85cc
--- /dev/null
+++ b/simpler_env/policies/sim_cogact/adaptive_ensemble.py
@@ -0,0 +1,44 @@
+"""
+adaptive_ensemble.py
+
+"""
+
+from collections import deque
+import numpy as np
+
+
+class AdaptiveEnsembler:
+ def __init__(self, pred_action_horizon, adaptive_ensemble_alpha=0.0):
+ self.pred_action_horizon = pred_action_horizon
+ self.action_history = deque(maxlen=self.pred_action_horizon)
+ self.adaptive_ensemble_alpha = adaptive_ensemble_alpha
+
+ def reset(self):
+ self.action_history.clear()
+
+ def ensemble_action(self, cur_action):
+ self.action_history.append(cur_action)
+ num_actions = len(self.action_history)
+ if cur_action.ndim == 1:
+ curr_act_preds = np.stack(self.action_history)
+ else:
+ curr_act_preds = np.stack(
+ [pred_actions[i] for (i, pred_actions) in zip(range(num_actions - 1, -1, -1), self.action_history)]
+ )
+
+ # calculate cosine similarity between the current prediction and all previous predictions
+ ref = curr_act_preds[num_actions-1, :]
+ previous_pred = curr_act_preds
+ dot_product = np.sum(previous_pred * ref, axis=1)
+ norm_previous_pred = np.linalg.norm(previous_pred, axis=1)
+ norm_ref = np.linalg.norm(ref)
+ cos_similarity = dot_product / (norm_previous_pred * norm_ref + 1e-7)
+
+ # compute the weights for each prediction
+ weights = np.exp(self.adaptive_ensemble_alpha * cos_similarity)
+ weights = weights / weights.sum()
+
+ # compute the weighted average across all predictions for this timestep
+ cur_action = np.sum(weights[:, None] * curr_act_preds, axis=0)
+
+ return cur_action
\ No newline at end of file
diff --git a/simpler_env/policies/sim_cogact/cogact_policy.py b/simpler_env/policies/sim_cogact/cogact_policy.py
new file mode 100644
index 00000000..f3609018
--- /dev/null
+++ b/simpler_env/policies/sim_cogact/cogact_policy.py
@@ -0,0 +1,232 @@
+"""
+cogact_policy.py
+
+"""
+from collections import deque
+from typing import Optional, Sequence
+import os
+from PIL import Image
+import torch
+import cv2 as cv
+import matplotlib.pyplot as plt
+import numpy as np
+
+from transforms3d.euler import euler2axangle
+from transformers import AutoModelForVision2Seq, AutoProcessor
+
+
+from vla import load_vla
+from sim_cogact.adaptive_ensemble import AdaptiveEnsembler
+
+class CogACTInference:
+ def __init__(
+ self,
+ saved_model_path: str = 'CogACT/CogACT-Base',
+ unnorm_key: Optional[str] = None,
+ policy_setup: str = "widowx_bridge",
+ horizon: int = 0,
+ action_ensemble_horizon: Optional[int] = None,
+ image_size: list[int] = [224, 224],
+ future_action_window_size: int = 15,
+ action_dim: int = 7,
+ action_model_type: str = "DiT-L",
+ action_scale: float = 1.0,
+ cfg_scale: float = 1.5,
+ use_ddim: bool = True,
+ num_ddim_steps: int = 10,
+ use_bf16: bool = True,
+ action_ensemble = True,
+ adaptive_ensemble_alpha = 0.1,
+ ) -> None:
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
+ if policy_setup == "widowx_bridge":
+ unnorm_key = "bridge_orig" if unnorm_key is None else unnorm_key
+ action_ensemble = action_ensemble
+ adaptive_ensemble_alpha = adaptive_ensemble_alpha
+ if action_ensemble_horizon is None:
+ # Set 7 for widowx_bridge to fix the window size of motion scale between each frame. see appendix in our paper for details
+ action_ensemble_horizon = 7
+ self.sticky_gripper_num_repeat = 1
+ elif policy_setup == "google_robot":
+ unnorm_key = "fractal20220817_data" if unnorm_key is None else unnorm_key
+ action_ensemble = action_ensemble
+ adaptive_ensemble_alpha = adaptive_ensemble_alpha
+ if action_ensemble_horizon is None:
+ # Set 2 for google_robot to fix the window size of motion scale between each frame. see appendix in our paper for details
+ action_ensemble_horizon = 2
+ self.sticky_gripper_num_repeat = 10
+ else:
+ raise NotImplementedError(
+ f"Policy setup {policy_setup} not supported for octo models. The other datasets can be found in the huggingface config.json file."
+ )
+ self.policy_setup = policy_setup
+ self.unnorm_key = unnorm_key
+
+ print(f"*** policy_setup: {policy_setup}, unnorm_key: {unnorm_key} ***")
+ self.use_ddim = use_ddim
+ self.num_ddim_steps = num_ddim_steps
+ self.vla = load_vla(
+ saved_model_path, # choose from ['CogACT/CogACT-Small', 'CogACT/CogACT-Base', 'CogACT/CogACT-Large'] or the local path
+ load_for_training=False,
+ action_model_type=action_model_type, # choose from ['DiT-Small', 'DiT-Base', 'DiT-Large'] to match the model weight
+ future_action_window_size=future_action_window_size,
+ action_dim=action_dim,
+ )
+
+ if use_bf16:
+ self.vla.vlm = self.vla.vlm.to(torch.bfloat16)
+ self.vla = self.vla.to("cuda").eval()
+ self.cfg_scale = cfg_scale
+
+ self.image_size = image_size
+ self.action_scale = action_scale
+ self.horizon = horizon
+ self.action_ensemble = action_ensemble
+ self.adaptive_ensemble_alpha = adaptive_ensemble_alpha
+ self.action_ensemble_horizon = action_ensemble_horizon
+ self.sticky_action_is_on = False
+ self.gripper_action_repeat = 0
+ self.sticky_gripper_action = 0.0
+ self.previous_gripper_action = None
+
+ self.task_description = None
+ self.image_history = deque(maxlen=self.horizon)
+ if self.action_ensemble:
+ self.action_ensembler = AdaptiveEnsembler(self.action_ensemble_horizon, self.adaptive_ensemble_alpha)
+ else:
+ self.action_ensembler = None
+ self.num_image_history = 0
+
+ def _add_image_to_history(self, image: np.ndarray) -> None:
+ self.image_history.append(image)
+ self.num_image_history = min(self.num_image_history + 1, self.horizon)
+
+ def reset(self, task_description: str) -> None:
+ self.task_description = task_description
+ self.image_history.clear()
+ if self.action_ensemble:
+ self.action_ensembler.reset()
+ self.num_image_history = 0
+
+ self.sticky_action_is_on = False
+ self.gripper_action_repeat = 0
+ self.sticky_gripper_action = 0.0
+ self.previous_gripper_action = None
+
+ def step(
+ self, image: np.ndarray, task_description: Optional[str] = None, *args, **kwargs
+ ) -> tuple[dict[str, np.ndarray], dict[str, np.ndarray]]:
+ """
+ Input:
+ image: np.ndarray of shape (H, W, 3), uint8
+ task_description: Optional[str], task description; if different from previous task description, policy state is reset
+ Output:
+ raw_action: dict; raw policy action output
+ action: dict; processed action to be sent to the maniskill2 environment, with the following keys:
+ - 'world_vector': np.ndarray of shape (3,), xyz translation of robot end-effector
+ - 'rot_axangle': np.ndarray of shape (3,), axis-angle representation of end-effector rotation
+ - 'gripper': np.ndarray of shape (1,), gripper action
+ - 'terminate_episode': np.ndarray of shape (1,), 1 if episode should be terminated, 0 otherwise
+ """
+ if task_description is not None:
+ if task_description != self.task_description:
+ self.reset(task_description)
+
+ assert image.dtype == np.uint8
+ self._add_image_to_history(self._resize_image(image))
+ image: Image.Image = Image.fromarray(image)
+ raw_actions, normalized_actions = self.vla.predict_action(image=image,
+ instruction=self.task_description,
+ unnorm_key=self.unnorm_key,
+ do_sample=False,
+ cfg_scale=self.cfg_scale,
+ use_ddim=self.use_ddim,
+ num_ddim_steps=self.num_ddim_steps,
+ )
+
+ if self.action_ensemble:
+ raw_actions = self.action_ensembler.ensemble_action(raw_actions)[None]
+ raw_action = {
+ "world_vector": np.array(raw_actions[0, :3]),
+ "rotation_delta": np.array(raw_actions[0, 3:6]),
+ "open_gripper": np.array(raw_actions[0, 6:7]), # range [0, 1]; 1 = open; 0 = close
+ }
+
+ # process raw_action to obtain the action to be sent to the maniskill2 environment
+ action = {}
+ action["world_vector"] = raw_action["world_vector"] * self.action_scale
+ action_rotation_delta = np.asarray(raw_action["rotation_delta"], dtype=np.float64)
+
+ roll, pitch, yaw = action_rotation_delta
+ axes, angles = euler2axangle(roll, pitch, yaw)
+ action_rotation_axangle = axes * angles
+ action["rot_axangle"] = action_rotation_axangle * self.action_scale
+
+ if self.policy_setup == "google_robot":
+ action["gripper"] = 0
+ current_gripper_action = raw_action["open_gripper"]
+ if self.previous_gripper_action is None:
+ relative_gripper_action = np.array([0])
+ self.previous_gripper_action = current_gripper_action
+ else:
+ relative_gripper_action = self.previous_gripper_action - current_gripper_action
+ # fix a bug in the SIMPLER code here
+ # self.previous_gripper_action = current_gripper_action
+
+ if np.abs(relative_gripper_action) > 0.5 and (not self.sticky_action_is_on):
+ self.sticky_action_is_on = True
+ self.sticky_gripper_action = relative_gripper_action
+ self.previous_gripper_action = current_gripper_action
+
+ if self.sticky_action_is_on:
+ self.gripper_action_repeat += 1
+ relative_gripper_action = self.sticky_gripper_action
+
+ if self.gripper_action_repeat == self.sticky_gripper_num_repeat:
+ self.sticky_action_is_on = False
+ self.gripper_action_repeat = 0
+ self.sticky_gripper_action = 0.0
+
+ action["gripper"] = relative_gripper_action
+
+ elif self.policy_setup == "widowx_bridge":
+ action["gripper"] = 2.0 * (raw_action["open_gripper"] > 0.5) - 1.0
+
+ action["terminate_episode"] = np.array([0.0])
+ return raw_action, action
+
+ def _resize_image(self, image: np.ndarray) -> np.ndarray:
+ image = cv.resize(image, tuple(self.image_size), interpolation=cv.INTER_AREA)
+ return image
+
+ def visualize_epoch(
+ self, predicted_raw_actions: Sequence[np.ndarray], images: Sequence[np.ndarray], save_path: str
+ ) -> None:
+ images = [self._resize_image(image) for image in images]
+ ACTION_DIM_LABELS = ["x", "y", "z", "roll", "pitch", "yaw", "grasp"]
+
+ img_strip = np.concatenate(np.array(images[::3]), axis=1)
+
+ # set up plt figure
+ figure_layout = [["image"] * len(ACTION_DIM_LABELS), ACTION_DIM_LABELS]
+ plt.rcParams.update({"font.size": 12})
+ fig, axs = plt.subplot_mosaic(figure_layout)
+ fig.set_size_inches([45, 10])
+
+ # plot actions
+ pred_actions = np.array(
+ [
+ np.concatenate([a["world_vector"], a["rotation_delta"], a["open_gripper"]], axis=-1)
+ for a in predicted_raw_actions
+ ]
+ )
+ for action_dim, action_label in enumerate(ACTION_DIM_LABELS):
+ # actions have batch, horizon, dim, in this example we just take the first action for simplicity
+ axs[action_label].plot(pred_actions[:, action_dim], label="predicted action")
+ axs[action_label].set_title(action_label)
+ axs[action_label].set_xlabel("Time in one episode")
+
+ axs["image"].imshow(img_strip)
+ axs["image"].set_xlabel("Time in one episode (subsampled)")
+ plt.legend()
+ plt.savefig(save_path)
diff --git a/scripts/rt1x_bridge.sh b/simpler_env/policies/sim_cogact/scripts/cogact_bridge.sh
similarity index 89%
rename from scripts/rt1x_bridge.sh
rename to simpler_env/policies/sim_cogact/scripts/cogact_bridge.sh
index c4608ce0..2a3f8827 100644
--- a/scripts/rt1x_bridge.sh
+++ b/simpler_env/policies/sim_cogact/scripts/cogact_bridge.sh
@@ -1,8 +1,6 @@
-
-
gpu_id=0
-policy_model=rt1
-ckpt_path=checkpoints/rt_1_x_tf_trained_for_002272480_step/
+policy_model=cogact
+ckpt_path=CogACT/CogACT-Base # CogACT/CogACT-Large CogACT/CogACT-Small
scene_name=bridge_table_1_v1
robot=widowx
@@ -13,23 +11,23 @@ robot_init_y=0.028
CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path ${ckpt_path} \
--robot ${robot} --policy-setup widowx_bridge \
- --control-freq 5 --sim-freq 500 --max-episode-steps 60 \
- --env-name PutCarrotOnPlateInScene-v0 --scene-name ${scene_name} \
+ --control-freq 5 --sim-freq 500 --max-episode-steps 120 \
+ --env-name StackGreenCubeOnYellowCubeBakedTexInScene-v0 --scene-name ${scene_name} \
--rgb-overlay-path ${rgb_overlay_path} \
--robot-init-x ${robot_init_x} ${robot_init_x} 1 --robot-init-y ${robot_init_y} ${robot_init_y} 1 --obj-variation-mode episode --obj-episode-range 0 24 \
--robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 0 0 1;
CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path ${ckpt_path} \
--robot ${robot} --policy-setup widowx_bridge \
- --control-freq 5 --sim-freq 500 --max-episode-steps 60 \
- --env-name StackGreenCubeOnYellowCubeBakedTexInScene-v0 --scene-name ${scene_name} \
+ --control-freq 5 --sim-freq 500 --max-episode-steps 120 \
+ --env-name PutCarrotOnPlateInScene-v0 --scene-name ${scene_name} \
--rgb-overlay-path ${rgb_overlay_path} \
--robot-init-x ${robot_init_x} ${robot_init_x} 1 --robot-init-y ${robot_init_y} ${robot_init_y} 1 --obj-variation-mode episode --obj-episode-range 0 24 \
--robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 0 0 1;
CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path ${ckpt_path} \
--robot ${robot} --policy-setup widowx_bridge \
- --control-freq 5 --sim-freq 500 --max-episode-steps 60 \
+ --control-freq 5 --sim-freq 500 --max-episode-steps 120 \
--env-name PutSpoonOnTableClothInScene-v0 --scene-name ${scene_name} \
--rgb-overlay-path ${rgb_overlay_path} \
--robot-init-x ${robot_init_x} ${robot_init_x} 1 --robot-init-y ${robot_init_y} ${robot_init_y} 1 --obj-variation-mode episode --obj-episode-range 0 24 \
@@ -48,6 +46,4 @@ CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-mod
--env-name PutEggplantInBasketScene-v0 --scene-name ${scene_name} \
--rgb-overlay-path ${rgb_overlay_path} \
--robot-init-x ${robot_init_x} ${robot_init_x} 1 --robot-init-y ${robot_init_y} ${robot_init_y} 1 --obj-variation-mode episode --obj-episode-range 0 24 \
- --robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 0 0 1;
-
-
+ --robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 0 0 1;
\ No newline at end of file
diff --git a/scripts/rt1_drawer_variant_agg.sh b/simpler_env/policies/sim_cogact/scripts/cogact_drawer_variant_agg.sh
similarity index 87%
rename from scripts/rt1_drawer_variant_agg.sh
rename to simpler_env/policies/sim_cogact/scripts/cogact_drawer_variant_agg.sh
index 983229b6..0bac2a90 100644
--- a/scripts/rt1_drawer_variant_agg.sh
+++ b/simpler_env/policies/sim_cogact/scripts/cogact_drawer_variant_agg.sh
@@ -1,14 +1,10 @@
# shader_dir=rt means that we turn on ray-tracing rendering; this is quite crucial for the open / close drawer task as policies often rely on shadows to infer depth
-
-
+gpu_id=0
declare -a ckpt_paths=(
-"./checkpoints/rt_1_tf_trained_for_000400120/"
-"./checkpoints/rt_1_tf_trained_for_000058240/"
-"./checkpoints/rt_1_x_tf_trained_for_002272480_step/"
-"./checkpoints/rt_1_tf_trained_for_000001120/"
+"CogACT/CogACT-Base"
)
-
+# CogACT/CogACT-Large CogACT/CogACT-Small
declare -a env_names=(
OpenTopDrawerCustomInScene-v0
OpenMiddleDrawerCustomInScene-v0
@@ -27,7 +23,7 @@ scene_name=frl_apartment_stage_simple
EvalSim() {
echo ${ckpt_path} ${env_name}
- python simpler_env/main_inference.py --policy-model rt1 --ckpt-path ${ckpt_path} \
+ CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model cogact --ckpt-path None \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 113 \
--env-name ${env_name} --scene-name ${scene_name} \
diff --git a/scripts/rt1_drawer_visual_matching.sh b/simpler_env/policies/sim_cogact/scripts/cogact_drawer_visual_matching.sh
similarity index 82%
rename from scripts/rt1_drawer_visual_matching.sh
rename to simpler_env/policies/sim_cogact/scripts/cogact_drawer_visual_matching.sh
index 77b78d2d..c302e827 100644
--- a/scripts/rt1_drawer_visual_matching.sh
+++ b/simpler_env/policies/sim_cogact/scripts/cogact_drawer_visual_matching.sh
@@ -1,12 +1,8 @@
# shader_dir=rt means that we turn on ray-tracing rendering; this is quite crucial for the open / close drawer task as policies often rely on shadows to infer depth
-
-
+gpu_id=0
declare -a ckpt_paths=(
-"./checkpoints/rt_1_tf_trained_for_000400120/"
-"./checkpoints/rt_1_tf_trained_for_000058240/"
-"./checkpoints/rt_1_x_tf_trained_for_002272480_step/"
-"./checkpoints/rt_1_tf_trained_for_000001120/"
+"CogACT/CogACT-Base"
)
declare -a env_names=(
@@ -27,7 +23,7 @@ EXTRA_ARGS="--enable-raytracing --additional-env-build-kwargs station_name=mk_st
EvalOverlay() {
# A0
-python simpler_env/main_inference.py --policy-model rt1 --ckpt-path ${ckpt_path} \
+CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model cogact --ckpt-path ${ckpt_path} \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 113 \
--env-name ${env_name} --scene-name dummy_drawer \
@@ -38,7 +34,7 @@ python simpler_env/main_inference.py --policy-model rt1 --ckpt-path ${ckpt_path}
${EXTRA_ARGS}
# A1
-python simpler_env/main_inference.py --policy-model rt1 --ckpt-path ${ckpt_path} \
+CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model cogact --ckpt-path ${ckpt_path} \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 113 \
--env-name ${env_name} --scene-name dummy_drawer \
@@ -49,7 +45,7 @@ python simpler_env/main_inference.py --policy-model rt1 --ckpt-path ${ckpt_path}
${EXTRA_ARGS}
# A2
-python simpler_env/main_inference.py --policy-model rt1 --ckpt-path ${ckpt_path} \
+CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model cogact --ckpt-path ${ckpt_path} \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 113 \
--env-name ${env_name} --scene-name dummy_drawer \
@@ -60,7 +56,7 @@ python simpler_env/main_inference.py --policy-model rt1 --ckpt-path ${ckpt_path}
${EXTRA_ARGS}
# B0
-python simpler_env/main_inference.py --policy-model rt1 --ckpt-path ${ckpt_path} \
+CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model cogact --ckpt-path ${ckpt_path} \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 113 \
--env-name ${env_name} --scene-name dummy_drawer \
@@ -71,7 +67,7 @@ python simpler_env/main_inference.py --policy-model rt1 --ckpt-path ${ckpt_path}
${EXTRA_ARGS}
# B1
-python simpler_env/main_inference.py --policy-model rt1 --ckpt-path ${ckpt_path} \
+CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model cogact --ckpt-path ${ckpt_path} \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 113 \
--env-name ${env_name} --scene-name dummy_drawer \
@@ -82,7 +78,7 @@ python simpler_env/main_inference.py --policy-model rt1 --ckpt-path ${ckpt_path}
${EXTRA_ARGS}
# B2
-python simpler_env/main_inference.py --policy-model rt1 --ckpt-path ${ckpt_path} \
+CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model cogact --ckpt-path ${ckpt_path} \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 113 \
--env-name ${env_name} --scene-name dummy_drawer \
@@ -93,7 +89,7 @@ python simpler_env/main_inference.py --policy-model rt1 --ckpt-path ${ckpt_path}
${EXTRA_ARGS}
# C0
-python simpler_env/main_inference.py --policy-model rt1 --ckpt-path ${ckpt_path} \
+CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model cogact --ckpt-path ${ckpt_path} \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 113 \
--env-name ${env_name} --scene-name dummy_drawer \
@@ -104,7 +100,7 @@ python simpler_env/main_inference.py --policy-model rt1 --ckpt-path ${ckpt_path}
${EXTRA_ARGS}
# C1
-python simpler_env/main_inference.py --policy-model rt1 --ckpt-path ${ckpt_path} \
+CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model cogact --ckpt-path ${ckpt_path} \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 113 \
--env-name ${env_name} --scene-name dummy_drawer \
@@ -115,7 +111,7 @@ python simpler_env/main_inference.py --policy-model rt1 --ckpt-path ${ckpt_path}
${EXTRA_ARGS}
# C2
-python simpler_env/main_inference.py --policy-model rt1 --ckpt-path ${ckpt_path} \
+CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model cogact --ckpt-path ${ckpt_path} \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 113 \
--env-name ${env_name} --scene-name dummy_drawer \
diff --git a/scripts/rt1_move_near_variant_agg.sh b/simpler_env/policies/sim_cogact/scripts/cogact_move_near_variant_agg.sh
similarity index 87%
rename from scripts/rt1_move_near_variant_agg.sh
rename to simpler_env/policies/sim_cogact/scripts/cogact_move_near_variant_agg.sh
index 7c95760c..b5b11b1c 100644
--- a/scripts/rt1_move_near_variant_agg.sh
+++ b/simpler_env/policies/sim_cogact/scripts/cogact_move_near_variant_agg.sh
@@ -1,11 +1,7 @@
-
gpu_id=0
-declare -a arr=("./checkpoints/rt_1_x_tf_trained_for_002272480_step/" \
- "./checkpoints/rt_1_tf_trained_for_000400120/" \
- "./checkpoints/rt_1_tf_trained_for_000058240/" \
- "./checkpoints/rt_1_tf_trained_for_000001120/")
-
+declare -a arr=("CogACT/CogACT-Base")
+# CogACT/CogACT-Large CogACT/CogACT-Small
for ckpt_path in "${arr[@]}"; do echo "$ckpt_path"; done
@@ -16,7 +12,7 @@ scene_name=google_pick_coke_can_1_v4
for ckpt_path in "${arr[@]}";
-do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model rt1 --ckpt-path ${ckpt_path} \
+do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model cogact --ckpt-path ${ckpt_path} \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 80 \
--env-name ${env_name} --scene-name ${scene_name} \
@@ -31,7 +27,7 @@ done
for ckpt_path in "${arr[@]}";
-do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model rt1 --ckpt-path ${ckpt_path} \
+do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model cogact --ckpt-path ${ckpt_path} \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 80 \
--env-name ${env_name} --scene-name ${scene_name} \
@@ -52,7 +48,7 @@ for scene_name in "${scene_arr[@]}";
do for ckpt_path in "${arr[@]}";
-do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model rt1 --ckpt-path ${ckpt_path} \
+do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model cogact --ckpt-path ${ckpt_path} \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 80 \
--env-name ${env_name} --scene-name ${scene_name} \
@@ -74,7 +70,7 @@ scene_name=google_pick_coke_can_1_v4
for ckpt_path in "${arr[@]}";
-do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model rt1 --ckpt-path ${ckpt_path} \
+do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model cogact --ckpt-path ${ckpt_path} \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 80 \
--env-name ${env_name} --scene-name ${scene_name} \
@@ -82,7 +78,7 @@ do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-
--robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 -0.09 -0.09 1 \
--additional-env-build-kwargs slightly_darker_lighting=True;
-CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model rt1 --ckpt-path ${ckpt_path} \
+CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model cogact --ckpt-path ${ckpt_path} \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 80 \
--env-name ${env_name} --scene-name ${scene_name} \
@@ -106,7 +102,7 @@ for scene_name in "${scene_arr[@]}";
do for ckpt_path in "${arr[@]}";
-do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model rt1 --ckpt-path ${ckpt_path} \
+do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model cogact --ckpt-path ${ckpt_path} \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 80 \
--env-name ${env_name} --scene-name ${scene_name} \
@@ -130,7 +126,7 @@ for env_name in "${env_arr[@]}";
do for ckpt_path in "${arr[@]}";
-do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model rt1 --ckpt-path ${ckpt_path} \
+do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model cogact --ckpt-path ${ckpt_path} \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 80 \
--env-name ${env_name} --scene-name ${scene_name} \
diff --git a/scripts/rt1_move_near_visual_matching.sh b/simpler_env/policies/sim_cogact/scripts/cogact_move_near_visual_matching.sh
similarity index 77%
rename from scripts/rt1_move_near_visual_matching.sh
rename to simpler_env/policies/sim_cogact/scripts/cogact_move_near_visual_matching.sh
index 1949333d..833865c0 100644
--- a/scripts/rt1_move_near_visual_matching.sh
+++ b/simpler_env/policies/sim_cogact/scripts/cogact_move_near_visual_matching.sh
@@ -1,10 +1,7 @@
gpu_id=0
-declare -a arr=("./checkpoints/rt_1_tf_trained_for_000400120/" \
- "./checkpoints/rt_1_x_tf_trained_for_002272480_step/" \
- "./checkpoints/rt_1_tf_trained_for_000058240/" \
- "./checkpoints/rt_1_tf_trained_for_000001120/")
+declare -a arr=("CogACT/CogACT-Base")
env_name=MoveNearGoogleBakedTexInScene-v0
# env_name=MoveNearGoogleBakedTexInScene-v1
@@ -21,7 +18,7 @@ for urdf_version in "${urdf_version_arr[@]}";
do for ckpt_path in "${arr[@]}";
-do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model rt1 --ckpt-path ${ckpt_path} \
+do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model cogact --ckpt-path ${ckpt_path} \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 80 \
--env-name ${env_name} --scene-name ${scene_name} \
diff --git a/scripts/rt1_pick_coke_can_variant_agg.sh b/simpler_env/policies/sim_cogact/scripts/cogact_pick_coke_can_variant_agg.sh
similarity index 89%
rename from scripts/rt1_pick_coke_can_variant_agg.sh
rename to simpler_env/policies/sim_cogact/scripts/cogact_pick_coke_can_variant_agg.sh
index 63b36e1e..79d1a387 100644
--- a/scripts/rt1_pick_coke_can_variant_agg.sh
+++ b/simpler_env/policies/sim_cogact/scripts/cogact_pick_coke_can_variant_agg.sh
@@ -1,10 +1,6 @@
-
gpu_id=0
-declare -a arr=("./checkpoints/rt_1_x_tf_trained_for_002272480_step/" \
- "./checkpoints/rt_1_tf_trained_for_000400120/" \
- "./checkpoints/rt_1_tf_trained_for_000058240/" \
- "./checkpoints/rt_1_tf_trained_for_000001120/")
+declare -a arr=("CogACT/CogACT-Base")
# lr_switch=laying horizontally but flipped left-right to match real eval; upright=standing; laid_vertically=laying vertically
declare -a coke_can_options_arr=("lr_switch=True" "upright=True" "laid_vertically=True")
@@ -21,7 +17,7 @@ for coke_can_option in "${coke_can_options_arr[@]}";
do for ckpt_path in "${arr[@]}";
-do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model rt1 --ckpt-path ${ckpt_path} \
+do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model cogact --ckpt-path ${ckpt_path} \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 80 \
--env-name ${env_name} --scene-name ${scene_name} \
@@ -49,7 +45,7 @@ do for scene_name in "${scene_arr[@]}";
do for ckpt_path in "${arr[@]}";
-do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model rt1 --ckpt-path ${ckpt_path} \
+do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model cogact --ckpt-path ${ckpt_path} \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 80 \
--env-name ${env_name} --scene-name ${scene_name} \
@@ -75,7 +71,7 @@ for coke_can_option in "${coke_can_options_arr[@]}";
do for ckpt_path in "${arr[@]}";
-do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model rt1 --ckpt-path ${ckpt_path} \
+do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model cogact --ckpt-path ${ckpt_path} \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 80 \
--env-name ${env_name} --scene-name ${scene_name} \
@@ -83,7 +79,7 @@ do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-
--robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 0 0 1 \
--additional-env-build-kwargs ${coke_can_option};
-CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model rt1 --ckpt-path ${ckpt_path} \
+CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model cogact --ckpt-path ${ckpt_path} \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 80 \
--env-name ${env_name} --scene-name ${scene_name} \
@@ -110,7 +106,7 @@ do for scene_name in "${scene_arr[@]}";
do for ckpt_path in "${arr[@]}";
-do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model rt1 --ckpt-path ${ckpt_path} \
+do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model cogact --ckpt-path ${ckpt_path} \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 80 \
--env-name ${env_name} --scene-name ${scene_name} \
@@ -135,7 +131,7 @@ for coke_can_option in "${coke_can_options_arr[@]}";
do for ckpt_path in "${arr[@]}";
-do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model rt1 --ckpt-path ${ckpt_path} \
+do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model cogact --ckpt-path ${ckpt_path} \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 80 \
--env-name ${env_name} --scene-name ${scene_name} \
@@ -143,7 +139,7 @@ do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-
--robot-init-rot-quat-center 0 0 0 1 --robot-init-rot-rpy-range 0 0 1 0 0 1 0 0 1 \
--additional-env-build-kwargs ${coke_can_option} slightly_darker_lighting=True;
-CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model rt1 --ckpt-path ${ckpt_path} \
+CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model cogact --ckpt-path ${ckpt_path} \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 80 \
--env-name ${env_name} --scene-name ${scene_name} \
@@ -170,7 +166,7 @@ do for env_name in "${env_arr[@]}";
do for ckpt_path in "${arr[@]}";
-do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model rt1 --ckpt-path ${ckpt_path} \
+do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model cogact --ckpt-path ${ckpt_path} \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 80 \
--env-name ${env_name} --scene-name ${scene_name} \
diff --git a/scripts/rt1_pick_coke_can_visual_matching.sh b/simpler_env/policies/sim_cogact/scripts/cogact_pick_coke_can_visual_matching.sh
similarity index 80%
rename from scripts/rt1_pick_coke_can_visual_matching.sh
rename to simpler_env/policies/sim_cogact/scripts/cogact_pick_coke_can_visual_matching.sh
index b0086684..3e5fbc25 100644
--- a/scripts/rt1_pick_coke_can_visual_matching.sh
+++ b/simpler_env/policies/sim_cogact/scripts/cogact_pick_coke_can_visual_matching.sh
@@ -1,11 +1,6 @@
-
-
gpu_id=0
-declare -a arr=("./checkpoints/rt_1_tf_trained_for_000400120/" \
- "./checkpoints/rt_1_tf_trained_for_000058240/" \
- "./checkpoints/rt_1_x_tf_trained_for_002272480_step/" \
- "./checkpoints/rt_1_tf_trained_for_000001120/" )
+declare -a arr=("CogACT/CogACT-Base")
# lr_switch=laying horizontally but flipped left-right to match real eval; upright=standing; laid_vertically=laying vertically
declare -a coke_can_options_arr=("lr_switch=True" "upright=True" "laid_vertically=True")
@@ -27,7 +22,7 @@ do for coke_can_option in "${coke_can_options_arr[@]}";
do for ckpt_path in "${arr[@]}";
-do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model rt1 --ckpt-path ${ckpt_path} \
+do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model cogact --ckpt-path ${ckpt_path} \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 80 \
--env-name ${env_name} --scene-name ${scene_name} \
diff --git a/scripts/rt1_put_in_drawer_variant_agg.sh b/simpler_env/policies/sim_cogact/scripts/cogact_put_in_drawer_variant_agg.sh
similarity index 87%
rename from scripts/rt1_put_in_drawer_variant_agg.sh
rename to simpler_env/policies/sim_cogact/scripts/cogact_put_in_drawer_variant_agg.sh
index 8aacf728..04d11634 100644
--- a/scripts/rt1_put_in_drawer_variant_agg.sh
+++ b/simpler_env/policies/sim_cogact/scripts/cogact_put_in_drawer_variant_agg.sh
@@ -1,12 +1,9 @@
# shader_dir=rt means that we turn on ray-tracing rendering; this is quite crucial for the open / close drawer task as policies often rely on shadows to infer depth
-
+gpu_id=0
declare -a ckpt_paths=(
-"./checkpoints/rt_1_tf_trained_for_000400120/"
-"./checkpoints/rt_1_tf_trained_for_000058240/"
-"./checkpoints/rt_1_x_tf_trained_for_002272480_step/"
-"./checkpoints/rt_1_tf_trained_for_000001120/"
+"CogACT/CogACT-Base"
)
declare -a env_names=(
@@ -22,7 +19,7 @@ scene_name=frl_apartment_stage_simple
EvalSim() {
echo ${ckpt_path} ${env_name}
- python simpler_env/main_inference.py --policy-model rt1 --ckpt-path ${ckpt_path} \
+ CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model cogact --ckpt-path ${ckpt_path} \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 200 \
--env-name ${env_name} --scene-name ${scene_name} \
diff --git a/scripts/rt1_put_in_drawer_visual_matching.sh b/simpler_env/policies/sim_cogact/scripts/cogact_put_in_drawer_visual_matching.sh
similarity index 83%
rename from scripts/rt1_put_in_drawer_visual_matching.sh
rename to simpler_env/policies/sim_cogact/scripts/cogact_put_in_drawer_visual_matching.sh
index 3d250f6d..c1b3d23e 100644
--- a/scripts/rt1_put_in_drawer_visual_matching.sh
+++ b/simpler_env/policies/sim_cogact/scripts/cogact_put_in_drawer_visual_matching.sh
@@ -1,10 +1,7 @@
# shader_dir=rt means that we turn on ray-tracing rendering; this is quite crucial for the open / close drawer task as policies often rely on shadows to infer depth
-
+gpu_id=0
declare -a ckpt_paths=(
-"./checkpoints/rt_1_tf_trained_for_000400120/"
-"./checkpoints/rt_1_tf_trained_for_000058240/"
-"./checkpoints/rt_1_x_tf_trained_for_002272480_step/"
-"./checkpoints/rt_1_tf_trained_for_000001120/"
+"CogACT/CogACT-Base"
)
@@ -25,7 +22,7 @@ EXTRA_ARGS="--enable-raytracing --additional-env-build-kwargs station_name=mk_st
EvalOverlay() {
# A0
-python simpler_env/main_inference.py --policy-model rt1 --ckpt-path ${ckpt_path} \
+CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model cogact --ckpt-path ${ckpt_path} \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 200 \
--env-name ${env_name} --scene-name dummy_drawer \
@@ -36,7 +33,7 @@ python simpler_env/main_inference.py --policy-model rt1 --ckpt-path ${ckpt_path}
${EXTRA_ARGS}
# B0
-python simpler_env/main_inference.py --policy-model rt1 --ckpt-path ${ckpt_path} \
+CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model cogact --ckpt-path ${ckpt_path} \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 200 \
--env-name ${env_name} --scene-name dummy_drawer \
@@ -47,7 +44,7 @@ python simpler_env/main_inference.py --policy-model rt1 --ckpt-path ${ckpt_path}
${EXTRA_ARGS}
# C0
-python simpler_env/main_inference.py --policy-model rt1 --ckpt-path ${ckpt_path} \
+CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model cogact --ckpt-path ${ckpt_path} \
--robot google_robot_static \
--control-freq 3 --sim-freq 513 --max-episode-steps 200 \
--env-name ${env_name} --scene-name dummy_drawer \
diff --git a/scripts/octo_bridge.sh b/simpler_env/policies/sim_cogact/scripts/octo_bridge.sh
similarity index 92%
rename from scripts/octo_bridge.sh
rename to simpler_env/policies/sim_cogact/scripts/octo_bridge.sh
index 607498b0..e4f4d686 100644
--- a/scripts/octo_bridge.sh
+++ b/simpler_env/policies/sim_cogact/scripts/octo_bridge.sh
@@ -1,5 +1,3 @@
-
-
gpu_id=0
declare -a policy_models=(
"octo-small"
@@ -15,13 +13,13 @@ rgb_overlay_path=ManiSkill2_real2sim/data/real_inpainting/bridge_real_eval_1.png
robot_init_x=0.147
robot_init_y=0.028
-for init_rng in 0 2 4;
+for init_rng in 0 2 4 6 8;
do for policy_model in "${policy_models[@]}";
do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path ${ckpt_path} \
--robot ${robot} --policy-setup widowx_bridge --octo-init-rng ${init_rng} \
- --control-freq 5 --sim-freq 500 --max-episode-steps 60 \
+ --control-freq 5 --sim-freq 500 --max-episode-steps 120 \
--env-name StackGreenCubeOnYellowCubeBakedTexInScene-v0 --scene-name ${scene_name} \
--rgb-overlay-path ${rgb_overlay_path} \
--robot-init-x ${robot_init_x} ${robot_init_x} 1 --robot-init-y ${robot_init_y} ${robot_init_y} 1 --obj-variation-mode episode --obj-episode-range 0 24 \
@@ -30,7 +28,7 @@ do CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-
CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path ${ckpt_path} \
--robot ${robot} --policy-setup widowx_bridge --octo-init-rng ${init_rng} \
- --control-freq 5 --sim-freq 500 --max-episode-steps 60 \
+ --control-freq 5 --sim-freq 500 --max-episode-steps 120 \
--env-name PutCarrotOnPlateInScene-v0 --scene-name ${scene_name} \
--rgb-overlay-path ${rgb_overlay_path} \
--robot-init-x ${robot_init_x} ${robot_init_x} 1 --robot-init-y ${robot_init_y} ${robot_init_y} 1 --obj-variation-mode episode --obj-episode-range 0 24 \
@@ -39,7 +37,7 @@ CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-mod
CUDA_VISIBLE_DEVICES=${gpu_id} python simpler_env/main_inference.py --policy-model ${policy_model} --ckpt-path ${ckpt_path} \
--robot ${robot} --policy-setup widowx_bridge --octo-init-rng ${init_rng} \
- --control-freq 5 --sim-freq 500 --max-episode-steps 60 \
+ --control-freq 5 --sim-freq 500 --max-episode-steps 120 \
--env-name PutSpoonOnTableClothInScene-v0 --scene-name ${scene_name} \
--rgb-overlay-path ${rgb_overlay_path} \
--robot-init-x ${robot_init_x} ${robot_init_x} 1 --robot-init-y ${robot_init_y} ${robot_init_y} 1 --obj-variation-mode episode --obj-episode-range 0 24 \
@@ -60,7 +58,7 @@ rgb_overlay_path=ManiSkill2_real2sim/data/real_inpainting/bridge_sink.png
robot_init_x=0.127
robot_init_y=0.06
-for init_rng in 0 2 4;
+for init_rng in 0 2 4 6 8;
do for policy_model in "${policy_models[@]}";
diff --git a/simpler_env/policies/spatialvla/spatialvla_model.py b/simpler_env/policies/spatialvla/spatialvla_model.py
new file mode 100644
index 00000000..d1c2ba8f
--- /dev/null
+++ b/simpler_env/policies/spatialvla/spatialvla_model.py
@@ -0,0 +1,240 @@
+from typing import Optional, Sequence, List
+import os
+import matplotlib.pyplot as plt
+import numpy as np
+from transforms3d.euler import euler2axangle
+from transformers import AutoModel, AutoProcessor
+from collections import deque
+from PIL import Image
+import torch
+import cv2 as cv
+
+from simpler_env.utils.action.action_ensemble import ActionEnsembler
+
+
+class SpatialVLAInference:
+ def __init__(
+ self,
+ saved_model_path: str = "IPEC-COMMUNITY/spatialvla-4b-224-pt",
+ unnorm_key: Optional[str] = None,
+ policy_setup: str = "widowx_bridge",
+ exec_horizon: int = 1,
+ image_size: list[int] = [224, 224],
+ action_scale: float = 1.0,
+ action_ensemble_temp: float = -0.8,
+ ) -> None:
+ os.environ["TOKENIZERS_PARALLELISM"] = "false"
+ if policy_setup == "widowx_bridge":
+ unnorm_key = "bridge_orig/1.0.0" if unnorm_key is None else unnorm_key
+ action_ensemble = True
+ self.sticky_gripper_num_repeat = 1
+ elif policy_setup == "google_robot":
+ unnorm_key = (
+ "fractal20220817_data/0.1.0" if unnorm_key is None else unnorm_key
+ )
+ action_ensemble = True
+ self.sticky_gripper_num_repeat = 10
+ else:
+ raise NotImplementedError(
+ f"Policy setup {policy_setup} not supported for octo models. The other datasets can be found in the huggingface config.json file."
+ )
+ self.policy_setup = policy_setup
+ self.unnorm_key = unnorm_key
+
+ print(f"*** policy_setup: {policy_setup}, unnorm_key: {unnorm_key} ***")
+ self.processor = AutoProcessor.from_pretrained(
+ saved_model_path, trust_remote_code=True
+ )
+ self.vla = (
+ AutoModel.from_pretrained(
+ saved_model_path,
+ torch_dtype=torch.bfloat16,
+ trust_remote_code=True,
+ )
+ .eval()
+ .cuda()
+ )
+
+ self.image_size = image_size
+ self.action_scale = action_scale
+ self.obs_horizon = (self.processor.num_obs_steps - 1) * self.processor.obs_delta + 1
+ self.obs_interval = self.processor.obs_delta
+ self.pred_action_horizon = self.processor.action_chunk_size
+ self.image_history = deque(maxlen=self.obs_horizon)
+ self.exec_horizon = exec_horizon
+
+ self.sticky_action_is_on = False
+ self.gripper_action_repeat = 0
+ self.sticky_gripper_action = 0.0
+ self.previous_gripper_action = None
+
+ self.action_ensemble = action_ensemble
+ self.action_ensemble_temp = action_ensemble_temp
+
+ if self.action_ensemble:
+ self.action_ensembler = ActionEnsembler(
+ self.pred_action_horizon, self.action_ensemble_temp
+ )
+ else:
+ self.action_ensembler = None
+
+ self.task = None
+ self.task_description = None
+
+ def reset(self, task_description: str) -> None:
+ self.image_history.clear()
+ if self.action_ensemble:
+ self.action_ensembler.reset()
+ self.task_description = task_description
+ self.sticky_action_is_on = False
+ self.gripper_action_repeat = 0
+ self.sticky_gripper_action = 0.0
+ self.previous_gripper_action = None
+
+ def step(
+ self, image: np.ndarray, task_description: Optional[str] = None, *args, **kwargs
+ ) -> tuple[dict[str, np.ndarray], dict[str, np.ndarray]]:
+ """
+ Input:
+ image: np.ndarray of shape (H, W, 3), uint8
+ task_description: Optional[str], task description; if different from previous task description, policy state is reset
+ Output:
+ raw_action: dict; raw policy action output
+ action: dict; processed action to be sent to the maniskill2 environment, with the following keys:
+ - 'world_vector': np.ndarray of shape (3,), xyz translation of robot end-effector
+ - 'rot_axangle': np.ndarray of shape (3,), axis-angle representation of end-effector rotation
+ - 'gripper': np.ndarray of shape (1,), gripper action
+ - 'terminate_episode': np.ndarray of shape (1,), 1 if episode should be terminated, 0 otherwise
+ """
+ if task_description is not None:
+ if task_description != self.task_description:
+ self.reset(task_description)
+
+ assert image.dtype == np.uint8
+ image = self._resize_image(image)
+ self._add_image_to_history(image)
+ images: List[Image.Image] = self._obtain_image_history()
+ prompt = task_description
+
+ # predict action (7-dof; un-normalize for bridgev2)
+ inputs = self.processor(images=images, text=prompt, unnorm_key=self.unnorm_key, return_tensors="pt", do_normalize=False)
+ with torch.no_grad():
+ if hasattr(self.processor, "action_tokenizer"):
+ generation_outputs = self.vla.predict_action(inputs)
+ raw_actions = self.processor.decode_actions(
+ generation_outputs=generation_outputs,
+ unnorm_key=self.unnorm_key,
+ )["actions"]
+ else:
+ raw_actions = self.vla.predict_action(**inputs)["actions"]
+ raw_actions = raw_actions.cpu().numpy()
+
+ if self.action_ensemble:
+ raw_actions = self.action_ensembler.ensemble_action(raw_actions)[None]
+
+ raw_action = {
+ "world_vector": np.array(raw_actions[0, :3]),
+ "rotation_delta": np.array(raw_actions[0, 3:6]),
+ "open_gripper": np.array(
+ raw_actions[0, 6:7]
+ ), # range [0, 1]; 1 = open; 0 = close
+ }
+
+ # process raw_action to obtain the action to be sent to the maniskill2 environment
+ action = {}
+ action["world_vector"] = raw_action["world_vector"] * self.action_scale
+ action_rotation_delta = np.asarray(
+ raw_action["rotation_delta"], dtype=np.float64
+ )
+ roll, pitch, yaw = action_rotation_delta
+ action_rotation_ax, action_rotation_angle = euler2axangle(roll, pitch, yaw)
+ action_rotation_axangle = action_rotation_ax * action_rotation_angle
+ action["rot_axangle"] = action_rotation_axangle * self.action_scale
+
+ if self.policy_setup == "google_robot":
+ action["gripper"] = 0
+ current_gripper_action = raw_action["open_gripper"]
+ if self.previous_gripper_action is None:
+ relative_gripper_action = np.array([0])
+ self.previous_gripper_action = current_gripper_action
+ else:
+ relative_gripper_action = self.previous_gripper_action - current_gripper_action
+ # fix a bug in the SIMPLER code here
+ # self.previous_gripper_action = current_gripper_action
+
+ if np.abs(relative_gripper_action) > 0.5 and (not self.sticky_action_is_on):
+ self.sticky_action_is_on = True
+ self.sticky_gripper_action = relative_gripper_action
+ self.previous_gripper_action = current_gripper_action
+
+ if self.sticky_action_is_on:
+ self.gripper_action_repeat += 1
+ relative_gripper_action = self.sticky_gripper_action
+
+ if self.gripper_action_repeat == self.sticky_gripper_num_repeat:
+ self.sticky_action_is_on = False
+ self.gripper_action_repeat = 0
+ self.sticky_gripper_action = 0.0
+
+ action["gripper"] = relative_gripper_action
+
+ elif self.policy_setup == "widowx_bridge":
+ action["gripper"] = 2.0 * (raw_action["open_gripper"] > 0.5) - 1.0
+
+ action["terminate_episode"] = np.array([0.0])
+ return raw_action, action
+
+ def _resize_image(self, image: np.ndarray) -> np.ndarray:
+ image = cv.resize(image, tuple(self.image_size), interpolation=cv.INTER_AREA)
+ return image
+
+ def _add_image_to_history(self, image: np.ndarray) -> None:
+ if len(self.image_history) == 0:
+ self.image_history.extend([image] * self.obs_horizon)
+ else:
+ self.image_history.append(image)
+
+ def _obtain_image_history(self) -> List[Image.Image]:
+ image_history = list(self.image_history)
+ images = image_history[:: self.obs_interval]
+ images = [Image.fromarray(image).convert("RGB") for image in images]
+ return images
+
+ def visualize_epoch(
+ self,
+ predicted_raw_actions: Sequence[np.ndarray],
+ images: Sequence[np.ndarray],
+ save_path: str,
+ ) -> None:
+ images = [self._resize_image(image) for image in images]
+ ACTION_DIM_LABELS = ["x", "y", "z", "roll", "pitch", "yaw", "grasp"]
+
+ img_strip = np.concatenate(np.array(images[::3]), axis=1)
+
+ # set up plt figure
+ figure_layout = [["image"] * len(ACTION_DIM_LABELS), ACTION_DIM_LABELS]
+ plt.rcParams.update({"font.size": 12})
+ fig, axs = plt.subplot_mosaic(figure_layout)
+ fig.set_size_inches([45, 10])
+
+ # plot actions
+ pred_actions = np.array(
+ [
+ np.concatenate(
+ [a["world_vector"], a["rotation_delta"], a["open_gripper"]], axis=-1
+ )
+ for a in predicted_raw_actions
+ ]
+ )
+ for action_dim, action_label in enumerate(ACTION_DIM_LABELS):
+ # actions have batch, horizon, dim, in this example we just take the first action for simplicity
+ axs[action_label].plot(
+ pred_actions[:, action_dim], label="predicted action"
+ )
+ axs[action_label].set_title(action_label)
+ axs[action_label].set_xlabel("Time in one episode")
+
+ axs["image"].imshow(img_strip)
+ axs["image"].set_xlabel("Time in one episode (subsampled)")
+ plt.legend()
+ plt.savefig(save_path)
diff --git a/simpler_env/utils/action/action_ensemble.py b/simpler_env/utils/action/action_ensemble.py
index 367ccf91..fc88fed2 100644
--- a/simpler_env/utils/action/action_ensemble.py
+++ b/simpler_env/utils/action/action_ensemble.py
@@ -21,7 +21,7 @@ def ensemble_action(self, cur_action):
curr_act_preds = np.stack(
[pred_actions[i] for (i, pred_actions) in zip(range(num_actions - 1, -1, -1), self.action_history)]
)
- # more recent predictions get exponentially *less* weight than older predictions
+ # if temp > 0, more recent predictions get exponentially *less* weight than older predictions
weights = np.exp(-self.action_ensemble_temp * np.arange(num_actions))
weights = weights / weights.sum()
# compute the weighted average across all predictions for this timestep
diff --git a/simpler_env/utils/metrics.py b/simpler_env/utils/metrics.py
index 39a251c9..6a7f207c 100644
--- a/simpler_env/utils/metrics.py
+++ b/simpler_env/utils/metrics.py
@@ -3,8 +3,9 @@
from typing import Sequence, Optional
import numpy as np
+from pandas.core.frame import frame_sub_kwargs
-REAL_PERF = { # Real robot eval performance --> extract via: REAL_PERF[task][policy]
+REAL_PERF = { # Real robot eval performance --> extract via: REAL_PERF[task][policy]
"google_robot_pick_coke_can": {
"rt-2-x": 0.907,
"rt-1-converged": 0.853,
@@ -68,7 +69,7 @@
}
-SIMPLER_PERF = { # SIMPLER simulated eval performance --> extract via: SIMPLER_PERF[task][policy]
+SIMPLER_PERF = { # SIMPLER simulated eval performance --> extract via: SIMPLER_PERF[task][policy]
"google_robot_pick_coke_can": {
"rt-2-x": 0.787,
"rt-1-converged": 0.857,
@@ -131,6 +132,187 @@
},
}
+REF = {
+ "ckpt_name": [
+ "RT-1(Converged)",
+ "RT-1(15%)",
+ "RT-1-X",
+ "RT-2-X",
+ "Octo-Base",
+ "Octo-Small",
+ "RT-1(begin)",
+ "OpenVLA",
+ "RoboVLM",
+ ],
+ "coke_can/matching_avg": [0.857, 0.710, 0.567, 0.787, 0.170, "nan", 0.027, 0.163, 0.727],
+ "coke_can/variant_avg": [0.898, 0.813, 0.490, 0.823, 0.006, "nan", 0.022, 0.545, "nan"],
+ "coke_can/matching/horizontal": [
+ 0.960,
+ 0.860,
+ 0.820,
+ 0.740,
+ 0.210,
+ "nan",
+ 0.050,
+ 0.270,
+ 0.850,
+ ],
+ "coke_can/matching/vertical": [
+ 0.900,
+ 0.790,
+ 0.330,
+ 0.740,
+ 0.210,
+ "nan",
+ 0.000,
+ 0.030,
+ 0.430,
+ ],
+ "coke_can/matching/standing": [
+ 0.710,
+ 0.480,
+ 0.550,
+ 0.880,
+ 0.090,
+ "nan",
+ 0.030,
+ 0.190,
+ 0.900,
+ ],
+ "coke_can/variant/horizontal": [
+ 0.969,
+ 0.920,
+ 0.569,
+ 0.822,
+ 0.005,
+ "nan",
+ 0.022,
+ 0.711,
+ "nan",
+ ],
+ "coke_can/variant/vertical": [
+ 0.760,
+ 0.704,
+ 0.204,
+ 0.754,
+ 0.000,
+ "nan",
+ 0.013,
+ 0.271,
+ "nan",
+ ],
+ "coke_can/variant/standing": [
+ 0.964,
+ 0.813,
+ 0.698,
+ 0.893,
+ 0.013,
+ "nan",
+ 0.031,
+ 0.653,
+ "nan",
+ ],
+ "move_near/variant": [0.500, 0.446, 0.323, 0.792, 0.031, "nan", 0.040, 0.477, "nan"],
+ "move_near/matching": [0.442, 0.354, 0.317, 0.779, 0.042, "nan", 0.050, 0.462, 0.663],
+ "drawer/matching_avg": [0.730, 0.565, 0.597, 0.250, 0.227, "nan", 0.139, 0.356, 0.268],
+ "drawer/variant_avg": [0.323, 0.267, 0.294, 0.353, 0.011, "nan", 0.069, 0.177, "nan"],
+ "drawer/matching/open": [0.601, 0.463, 0.296, 0.157, 0.009, "nan", 0.000, 0.194, 0.287],
+ "drawer/matching/close": [0.861, 0.667, 0.891, 0.343, 0.444, "nan", 0.278, 0.518, 0.25],
+ "drawer/variant/open": [0.270, 0.212, 0.069, 0.333, 0.000, "nan", 0.005, 0.158, "nan"],
+ "drawer/variant/close": [0.376, 0.323, 0.519, 0.372, 0.021, "nan", 0.132, 0.195, "nan"],
+ "apple_in_drawer/matching_avg": [0.065, 0.130, 0.213, 0.037, 0.000, 0.000, 0.000, "nan", 0.361],
+ "apple_in_drawer/variant_avg": [0.026, 0.021, 0.101, 0.206, 0.000, 0.000, 0.000, "nan", "nan"],
+ "put_spoon_on_tablecloth/matching_partial": [
+ "nan",
+ "nan",
+ 0.167,
+ "nan",
+ 0.347,
+ 0.778,
+ "nan",
+ 0.041,
+ 0.375
+ ],
+ "put_spoon_on_tablecloth/matching_entire": [
+ "nan",
+ "nan",
+ 0.000,
+ "nan",
+ 0.125,
+ 0.472,
+ "nan",
+ 0.000,
+ 0.208
+ ],
+ "put_carrot_on_plate/matching_partial": [
+ "nan",
+ "nan",
+ 0.208,
+ "nan",
+ 0.528,
+ 0.278,
+ "nan",
+ 0.333,
+ 0.333
+ ],
+ "put_carrot_on_plate/matching_entire": [
+ "nan",
+ "nan",
+ 0.042,
+ "nan",
+ 0.083,
+ 0.097,
+ "nan",
+ 0.000,
+ 0.25
+ ],
+ "stack_green_block_on_yellow_block/matching_partial": [
+ "nan",
+ "nan",
+ 0.083,
+ "nan",
+ 0.319,
+ 0.403,
+ "nan",
+ 0.125,
+ 0.083
+ ],
+ "stack_green_block_on_yellow_block/matching_entire": [
+ "nan",
+ "nan",
+ 0.000,
+ "nan",
+ 0.000,
+ 0.042,
+ "nan",
+ 0.000,
+ 0.083
+ ],
+ "put_eggplant_in_basket/matching_partial": [
+ "nan",
+ "nan",
+ 0.000,
+ "nan",
+ 0.667,
+ 0.875,
+ "nan",
+ 0.083,
+ 0.000
+ ],
+ "put_eggplant_in_basket/matching_entire": [
+ "nan",
+ "nan",
+ 0.000,
+ "nan",
+ 0.431,
+ 0.569,
+ "nan",
+ 0.041,
+ 0.000
+ ],
+}
+
+
def pearson_correlation(perf_sim: Sequence[float], perf_real: Sequence[float]) -> float:
perf_sim, perf_real = np.array(perf_sim), np.array(perf_real)
assert perf_sim.shape == perf_real.shape
@@ -173,6 +355,7 @@ def print_all_kruskal_results(
Prints out the Kruskal-Wallis test for each checkpoint
"""
from scipy.stats import kruskal
+
sim, real = np.array(sim), np.array(real)
assert sim.shape == real.shape
print(title)
@@ -225,5 +408,4 @@ def get_dir_stats(
results.append(1)
elif succ_fail_pattern[1] in fname:
results.append(0)
-
return results
diff --git a/simpler_env/utils/visualization.py b/simpler_env/utils/visualization.py
index 824f4c5b..15531a92 100644
--- a/simpler_env/utils/visualization.py
+++ b/simpler_env/utils/visualization.py
@@ -27,6 +27,20 @@ def write_video(path, images, fps=5):
images_npy = images
media.write_video(path, images_npy, fps=fps)
+def write_interval_video(path, images, fps=5, interval=20):
+ # images: list of numpy arrays
+ root_dir = Path(path).parent
+ if not os.path.exists(root_dir):
+ os.makedirs(root_dir)
+ print(f"skip writting video D:")
+ os.system(f"touch {path}")
+ return
+
+ if not isinstance(images[0], np.ndarray):
+ images_npy = [image.numpy() for image in images]
+ else:
+ images_npy = images
+ media.write_video(path, images_npy[::interval], fps=fps)
def plot_pred_and_gt_action_trajectory(predicted_actions, gt_actions, stacked_images):
"""
diff --git a/tools/calc_metrics_evaluation_videos.py b/tools/calc_metrics_evaluation_videos.py
index 1981ad25..4af72010 100644
--- a/tools/calc_metrics_evaluation_videos.py
+++ b/tools/calc_metrics_evaluation_videos.py
@@ -1,6 +1,7 @@
import argparse
import glob
from pathlib import Path
+import pandas as pd
import numpy as np
from scipy.stats import kruskal
@@ -13,6 +14,7 @@
print_all_kruskal_results,
)
+from simpler_env.utils.metrics import REF
# Calculate metrics for each task
@@ -21,28 +23,40 @@ def calc_pick_coke_can_stats(root_result_dir):
# If you use a new checkpoint, please update the real evaluation results here
coke_can_real_success = {
"horizontal": {
- "rt-2-x": 0.92,
- "rt-1-converged": 0.96,
- "rt-1-15pct": 1.0,
- "rt-1-x": 0.88,
- "rt-1-begin": 0.20,
- "octo-base": 0.44,
+ # "rt-2-x": 0.92,
+ # "rt-1-converged": 0.96,
+ # "rt-1-15pct": 1.0,
+ # "rt-1-x": 0.88,
+ # "rt-1-begin": 0.20,
+ # "octo-base": 0.44,
+ # "openvla-7b": 0.44,
+ # "internvla-1_8b": 0.44,
+ # "internvla_diff-1_8b": 0.44,
+ "ours": 0.0,
},
"vertical": {
- "rt-2-x": 0.80,
- "rt-1-converged": 0.88,
- "rt-1-15pct": 0.96,
- "rt-1-x": 0.56,
- "rt-1-begin": 0.00,
- "octo-base": 0.20,
+ # "rt-2-x": 0.80,
+ # "rt-1-converged": 0.88,
+ # "rt-1-15pct": 0.96,
+ # "rt-1-x": 0.56,
+ # "rt-1-begin": 0.00,
+ # "octo-base": 0.20,
+ # "openvla-7b": 0.20,
+ # "internvla-1_8b": 0.44,
+ # "internvla_diff-1_8b": 0.44,
+ "ours": 0.0,
},
"standing": {
- "rt-2-x": 1.00,
- "rt-1-converged": 0.72,
- "rt-1-15pct": 0.80,
- "rt-1-x": 0.84,
- "rt-1-begin": 0.20,
- "octo-base": 0.24,
+ # "rt-2-x": 1.00,
+ # "rt-1-converged": 0.72,
+ # "rt-1-15pct": 0.80,
+ # "rt-1-x": 0.84,
+ # "rt-1-begin": 0.20,
+ # "octo-base": 0.24,
+ # "openvla-7b": 0.24,
+ # "internvla-1_8b": 0.44,
+ # "internvla_diff-1_8b": 0.44,
+ "ours": 0.0,
},
}
@@ -52,17 +66,18 @@ def calc_pick_coke_can_stats(root_result_dir):
"vertical": "laid_vertically",
"standing": "upright",
}
- n_orientations = len(coke_can_orientation_map_dict) # number of coke can orientations
- n_trials_per_ckpt_per_orientation = (
- 25 # number of trials per checkpoint per coke can orientation; update if it is different
- )
+ n_orientations = len(
+ coke_can_orientation_map_dict
+ ) # number of coke can orientations
+ n_trials_per_ckpt_per_orientation = 25 # number of trials per checkpoint per coke can orientation; update if it is different
# extra patterns required in file name; if you are using different visual matching overlay image, please update here
extra_pattern_require_sim_variants = ["rgb_overlay_None"]
extra_pattern_require_visual_matching = ["rgb_overlay_google_coke_can_real_eval_1"]
# get simulation variant success
coke_can_sim_variant_success = {
- k1: {k2: [] for k2 in ckpt_alias_keys} for k1 in coke_can_orientation_map_dict.keys()
+ k1: {k2: [] for k2 in ckpt_alias_keys}
+ for k1 in coke_can_orientation_map_dict.keys()
}
# hardcoded variant aggregation result dirs; if you have new variants, please update here
@@ -105,9 +120,15 @@ def calc_pick_coke_can_stats(root_result_dir):
for coke_can_orientation in coke_can_orientation_map_dict.keys():
for ckpt_alias in ckpt_alias_keys:
for variant in (
- base_variants + background_variants + lighting_variants + distractor_variants + table_texture_variants
+ base_variants
+ + background_variants
+ + lighting_variants
+ + distractor_variants
+ + table_texture_variants
):
- variant = variant.format(coke_can_orientation_map_dict[coke_can_orientation])
+ variant = variant.format(
+ coke_can_orientation_map_dict[coke_can_orientation]
+ )
variant = f"{root_result_dir}/{CKPT_MAPPING[ckpt_alias]}/{variant}"
avg_sim_success = np.mean(
get_dir_stats(
@@ -117,8 +138,10 @@ def calc_pick_coke_can_stats(root_result_dir):
)
if np.isnan(avg_sim_success):
print(f"WARNING: avg_sim_success is nan for {variant}")
- coke_can_sim_variant_success[coke_can_orientation][ckpt_alias].append(avg_sim_success)
-
+ coke_can_sim_variant_success[coke_can_orientation][ckpt_alias].append(
+ avg_sim_success
+ )
+
coke_can_sim_variant_success[coke_can_orientation][ckpt_alias] = np.mean(
coke_can_sim_variant_success[coke_can_orientation][ckpt_alias]
)
@@ -157,29 +180,40 @@ def calc_pick_coke_can_stats(root_result_dir):
avg_orientation_sim_variant_results[-1].append(
coke_can_sim_variant_success[coke_can_orientation][ckpt_alias]
)
- avg_orientation_real_results[-1].append(coke_can_real_success[coke_can_orientation][ckpt_alias])
- avg_orientation_sim_variant_results[-1] = np.mean(avg_orientation_sim_variant_results[-1])
+ avg_orientation_real_results[-1].append(
+ coke_can_real_success[coke_can_orientation][ckpt_alias]
+ )
+ avg_orientation_sim_variant_results[-1] = np.mean(
+ avg_orientation_sim_variant_results[-1]
+ )
avg_orientation_real_results[-1] = np.mean(avg_orientation_real_results[-1])
print("avg_orientation_sim_variant_results", avg_orientation_sim_variant_results)
print("avg_orientation_real_results", avg_orientation_real_results)
print(
"mean_maximum_rank_violation(avg_orientation_sim_variant_results, avg_orientation_real_results)",
- mean_maximum_rank_violation(avg_orientation_sim_variant_results, avg_orientation_real_results),
+ mean_maximum_rank_violation(
+ avg_orientation_sim_variant_results, avg_orientation_real_results
+ ),
)
print(
"pearson_correlation(avg_orientation_sim_variant_results, avg_orientation_real_results)",
- pearson_correlation(avg_orientation_sim_variant_results, avg_orientation_real_results),
+ pearson_correlation(
+ avg_orientation_sim_variant_results, avg_orientation_real_results
+ ),
)
print("-" * 20)
# get visual matching success
coke_can_sim_visual_matching_success = {
- k1: {k2: [] for k2 in ckpt_alias_keys} for k1 in coke_can_orientation_map_dict.keys()
+ k1: {k2: [] for k2 in ckpt_alias_keys}
+ for k1 in coke_can_orientation_map_dict.keys()
}
for coke_can_orientation in coke_can_orientation_map_dict.keys():
for ckpt_alias in ckpt_alias_keys:
for variant in base_visual_matching_variants:
- variant = variant.format(coke_can_orientation_map_dict[coke_can_orientation])
+ variant = variant.format(
+ coke_can_orientation_map_dict[coke_can_orientation]
+ )
variant = f"{root_result_dir}/{CKPT_MAPPING[ckpt_alias]}/{variant}"
avg_sim_success = np.mean(
get_dir_stats(
@@ -189,10 +223,18 @@ def calc_pick_coke_can_stats(root_result_dir):
)
if np.isnan(avg_sim_success):
print(f"WARNING: avg_sim_success is nan for {variant}")
- coke_can_sim_visual_matching_success[coke_can_orientation][ckpt_alias].append(avg_sim_success)
- print(f"Orientation {coke_can_orientation}, ckpt {ckpt_alias} all robot arm visual matching success: {coke_can_sim_visual_matching_success[coke_can_orientation][ckpt_alias]}")
- coke_can_sim_visual_matching_success[coke_can_orientation][ckpt_alias] = np.mean(
- coke_can_sim_visual_matching_success[coke_can_orientation][ckpt_alias]
+ coke_can_sim_visual_matching_success[coke_can_orientation][
+ ckpt_alias
+ ].append(avg_sim_success)
+ print(
+ f"Orientation {coke_can_orientation}, ckpt {ckpt_alias} all robot arm visual matching success: {coke_can_sim_visual_matching_success[coke_can_orientation][ckpt_alias]}"
+ )
+ coke_can_sim_visual_matching_success[coke_can_orientation][ckpt_alias] = (
+ np.mean(
+ coke_can_sim_visual_matching_success[coke_can_orientation][
+ ckpt_alias
+ ]
+ )
)
for coke_can_orientation in coke_can_orientation_map_dict.keys():
@@ -207,21 +249,27 @@ def calc_pick_coke_can_stats(root_result_dir):
print(
f"{coke_can_orientation} MMRV",
mean_maximum_rank_violation(
- list(coke_can_sim_visual_matching_success[coke_can_orientation].values()),
+ list(
+ coke_can_sim_visual_matching_success[coke_can_orientation].values()
+ ),
list(coke_can_real_success[coke_can_orientation].values()),
),
)
print(
f"{coke_can_orientation} pearson correlation",
pearson_correlation(
- list(coke_can_sim_visual_matching_success[coke_can_orientation].values()),
+ list(
+ coke_can_sim_visual_matching_success[coke_can_orientation].values()
+ ),
list(coke_can_real_success[coke_can_orientation].values()),
),
)
print_all_kruskal_results(
construct_unordered_trial_results(
n_trials_per_ckpt_per_orientation,
- list(coke_can_sim_visual_matching_success[coke_can_orientation].values()),
+ list(
+ coke_can_sim_visual_matching_success[coke_can_orientation].values()
+ ),
),
construct_unordered_trial_results(
n_trials_per_ckpt_per_orientation,
@@ -239,8 +287,12 @@ def calc_pick_coke_can_stats(root_result_dir):
avg_orientation_sim_visual_matching_results[-1].append(
coke_can_sim_visual_matching_success[coke_can_orientation][ckpt_alias]
)
- avg_orientation_real_results[-1].append(coke_can_real_success[coke_can_orientation][ckpt_alias])
- avg_orientation_sim_visual_matching_results[-1] = np.mean(avg_orientation_sim_visual_matching_results[-1])
+ avg_orientation_real_results[-1].append(
+ coke_can_real_success[coke_can_orientation][ckpt_alias]
+ )
+ avg_orientation_sim_visual_matching_results[-1] = np.mean(
+ avg_orientation_sim_visual_matching_results[-1]
+ )
avg_orientation_real_results[-1] = np.mean(avg_orientation_real_results[-1])
print(
"avg_orientation_sim_visual_matching_results",
@@ -249,11 +301,15 @@ def calc_pick_coke_can_stats(root_result_dir):
print("avg_orientation_real_results", avg_orientation_real_results)
print(
"mean_maximum_rank_violation(avg_orientation_sim_visual_matching_results, avg_orientation_real_results)",
- mean_maximum_rank_violation(avg_orientation_sim_visual_matching_results, avg_orientation_real_results),
+ mean_maximum_rank_violation(
+ avg_orientation_sim_visual_matching_results, avg_orientation_real_results
+ ),
)
print(
"pearson_correlation(avg_orientation_sim_visual_matching_results, avg_orientation_real_results)",
- pearson_correlation(avg_orientation_sim_visual_matching_results, avg_orientation_real_results),
+ pearson_correlation(
+ avg_orientation_sim_visual_matching_results, avg_orientation_real_results
+ ),
)
print_all_kruskal_results(
construct_unordered_trial_results(
@@ -270,18 +326,31 @@ def calc_pick_coke_can_stats(root_result_dir):
print("*" * 20)
for _ in range(3):
print()
+ # process results and return
+ ret_dict = {}
+ ret_dict["coke_can/matching_avg"] = avg_orientation_sim_visual_matching_results
+ ret_dict["coke_can/variant_avg"] = avg_orientation_sim_variant_results
+ for key, val in coke_can_sim_visual_matching_success.items():
+ ret_dict[f"coke_can/matching/{key}"] = [succ for _, succ in val.items()]
+ for key, val in coke_can_sim_variant_success.items():
+ ret_dict[f"coke_can/variant/{key}"] = [succ for _, succ in val.items()]
+ return ret_dict
def calc_move_near_stats(root_result_dir):
print("***Move Near results***")
# If you use a new checkpoint, please update the real evaluation results here
move_near_real_success = {
- "rt-2-x": 0.733,
- "rt-1-converged": 0.633,
- "rt-1-15pct": 0.583,
- "rt-1-x": 0.45,
- "rt-1-begin": 0.017,
- "octo-base": 0.35,
+ # "rt-2-x": 0.733,
+ # "rt-1-converged": 0.633,
+ # "rt-1-15pct": 0.583,
+ # "rt-1-x": 0.45,
+ # "rt-1-begin": 0.017,
+ # "octo-base": 0.35,
+ # "openvla-7b": 0.35,
+ # "internvla-1_8b": 0.44,
+ # "internvla_diff-1_8b": 0.44,
+ "ours": 0.0,
}
ckpt_alias_keys = list(move_near_real_success.keys())
@@ -331,14 +400,24 @@ def calc_move_near_stats(root_result_dir):
for ckpt_alias in ckpt_alias_keys:
for variant in (
- base_variants + background_variants + lighting_variants + distractor_variants + table_texture_variants
+ base_variants
+ + background_variants
+ + lighting_variants
+ + distractor_variants
+ + table_texture_variants
):
variant = f"{root_result_dir}/{CKPT_MAPPING[ckpt_alias]}/{variant}"
- avg_sim_success = np.mean(get_dir_stats(variant, extra_pattern_require=extra_pattern_require_sim_variants))
+ avg_sim_success = np.mean(
+ get_dir_stats(
+ variant, extra_pattern_require=extra_pattern_require_sim_variants
+ )
+ )
if np.isnan(avg_sim_success):
print(f"WARNING: avg_sim_success is nan for {variant}")
move_near_sim_variant_success[ckpt_alias].append(avg_sim_success)
- move_near_sim_variant_success[ckpt_alias] = np.mean(move_near_sim_variant_success[ckpt_alias])
+ move_near_sim_variant_success[ckpt_alias] = np.mean(
+ move_near_sim_variant_success[ckpt_alias]
+ )
print("-" * 20)
print("sim variant avg success", move_near_sim_variant_success)
@@ -365,15 +444,20 @@ def calc_move_near_stats(root_result_dir):
for variant in base_visual_matching_variants:
variant = f"{root_result_dir}/{CKPT_MAPPING[ckpt_alias]}/{variant}"
avg_sim_success = np.mean(
- get_dir_stats(variant, extra_pattern_require=extra_pattern_require_visual_matching)
+ get_dir_stats(
+ variant, extra_pattern_require=extra_pattern_require_visual_matching
+ )
)
if np.isnan(avg_sim_success):
print(f"WARNING: avg_sim_success is nan for {variant}")
move_near_sim_visual_matching_success[ckpt_alias].append(avg_sim_success)
-
- print(f"Ckpt {ckpt_alias} all robot arm visual matching success: {move_near_sim_visual_matching_success[ckpt_alias]}")
- move_near_sim_visual_matching_success[ckpt_alias] = np.mean(move_near_sim_visual_matching_success[ckpt_alias])
+ print(
+ f"Ckpt {ckpt_alias} all robot arm visual matching success: {move_near_sim_visual_matching_success[ckpt_alias]}"
+ )
+ move_near_sim_visual_matching_success[ckpt_alias] = np.mean(
+ move_near_sim_visual_matching_success[ckpt_alias]
+ )
print("sim visual matching success", move_near_sim_visual_matching_success)
print("real success", move_near_real_success)
@@ -392,8 +476,12 @@ def calc_move_near_stats(root_result_dir):
),
)
print_all_kruskal_results(
- construct_unordered_trial_results(n_trials_per_ckpt, list(move_near_sim_visual_matching_success.values())),
- construct_unordered_trial_results(n_trials_per_ckpt, list(move_near_real_success.values())),
+ construct_unordered_trial_results(
+ n_trials_per_ckpt, list(move_near_sim_visual_matching_success.values())
+ ),
+ construct_unordered_trial_results(
+ n_trials_per_ckpt, list(move_near_real_success.values())
+ ),
"avg kruskal:",
)
@@ -401,26 +489,46 @@ def calc_move_near_stats(root_result_dir):
for _ in range(3):
print()
+ # TODO: process ret results
+ ret_dict = {}
+ for key, val in move_near_sim_variant_success.items():
+ ret_dict["move_near/variant"] = [
+ succ for _, succ in move_near_sim_variant_success.items()
+ ]
+ for key, val in move_near_sim_visual_matching_success.items():
+ ret_dict["move_near/matching"] = [
+ succ for _, succ in move_near_sim_visual_matching_success.items()
+ ]
+ return ret_dict
+
def calc_drawer_stats(root_result_dir):
print("***Drawer results***")
# If you use a new checkpoint, please update the real evaluation results here
drawer_real_success = {
"open": {
- "rt-2-x": 0.333,
- "rt-1-converged": 0.815,
- "rt-1-15pct": 0.704,
- "rt-1-x": 0.519,
- "rt-1-begin": 0.000,
- "octo-base": 0.148,
+ # "rt-2-x": 0.333,
+ # "rt-1-converged": 0.815,
+ # "rt-1-15pct": 0.704,
+ # "rt-1-x": 0.519,
+ # "rt-1-begin": 0.000,
+ # "octo-base": 0.148,
+ # "openvla-7b": 0.148,
+ # "internvla-1_8b": 0.44,
+ # "internvla_diff-1_8b": 0.44,
+ "ours": 0.0,
},
"close": {
- "rt-2-x": 0.630,
- "rt-1-converged": 0.926,
- "rt-1-15pct": 0.889,
- "rt-1-x": 0.741,
- "rt-1-begin": 0.000,
- "octo-base": 0.519,
+ # "rt-2-x": 0.630,
+ # "rt-1-converged": 0.926,
+ # "rt-1-15pct": 0.889,
+ # "rt-1-x": 0.741,
+ # "rt-1-begin": 0.000,
+ # "octo-base": 0.519,
+ # "openvla-7b": 0.519,
+ # "internvla-1_8b": 0.44,
+ # "internvla_diff-1_8b": 0.44,
+ "ours": 0.0,
},
}
@@ -438,15 +546,15 @@ def calc_drawer_stats(root_result_dir):
],
}
n_tasks = len(drawer_task_map_dict)
- n_trials_per_ckpt_per_task = (
- 27 # number of trials per checkpoint for all open / all close tasks; update if it is different
- )
+ n_trials_per_ckpt_per_task = 27 # number of trials per checkpoint for all open / all close tasks; update if it is different
# extra patterns required in file name; if you are using different visual matching overlay image, please update here
extra_pattern_require_sim_variants = ["rgb_overlay_None"]
extra_pattern_require_visual_matching = ["rgb_overlay_open_drawer"]
# get simulation variant success
- drawer_sim_variant_success = {k1: {k2: [] for k2 in ckpt_alias_keys} for k1 in drawer_task_map_dict.keys()}
+ drawer_sim_variant_success = {
+ k1: {k2: [] for k2 in ckpt_alias_keys} for k1 in drawer_task_map_dict.keys()
+ }
# hardcoded variant aggregation result dirs; if you have new variants, please update here
base_variants = [
@@ -481,7 +589,12 @@ def calc_drawer_stats(root_result_dir):
for drawer_task in drawer_task_map_dict.keys():
for ckpt_alias in ckpt_alias_keys:
for specific_task in drawer_task_map_dict[drawer_task]:
- for variant in base_variants + background_variants + lighting_variants + table_texture_variants:
+ for variant in (
+ base_variants
+ + background_variants
+ + lighting_variants
+ + table_texture_variants
+ ):
variant = variant.format(specific_task)
variant = f"{root_result_dir}/{CKPT_MAPPING[ckpt_alias]}/{variant}"
avg_sim_success = np.mean(
@@ -492,7 +605,9 @@ def calc_drawer_stats(root_result_dir):
)
if np.isnan(avg_sim_success):
print(f"WARNING: avg_sim_success is nan for {variant}")
- drawer_sim_variant_success[drawer_task][ckpt_alias].append(avg_sim_success)
+ drawer_sim_variant_success[drawer_task][ckpt_alias].append(
+ avg_sim_success
+ )
drawer_sim_variant_success[drawer_task][ckpt_alias] = np.mean(
drawer_sim_variant_success[drawer_task][ckpt_alias]
)
@@ -525,7 +640,9 @@ def calc_drawer_stats(root_result_dir):
avg_sim_variant_results.append([])
avg_real_results.append([])
for drawer_task in drawer_task_map_dict.keys():
- avg_sim_variant_results[-1].append(drawer_sim_variant_success[drawer_task][ckpt_alias])
+ avg_sim_variant_results[-1].append(
+ drawer_sim_variant_success[drawer_task][ckpt_alias]
+ )
avg_real_results[-1].append(drawer_real_success[drawer_task][ckpt_alias])
avg_sim_variant_results[-1] = np.mean(avg_sim_variant_results[-1])
avg_real_results[-1] = np.mean(avg_real_results[-1])
@@ -542,7 +659,9 @@ def calc_drawer_stats(root_result_dir):
print("-" * 20)
# get visual matching success
- drawer_sim_visual_matching_success = {k1: {k2: [] for k2 in ckpt_alias_keys} for k1 in drawer_task_map_dict.keys()}
+ drawer_sim_visual_matching_success = {
+ k1: {k2: [] for k2 in ckpt_alias_keys} for k1 in drawer_task_map_dict.keys()
+ }
for drawer_task in drawer_task_map_dict.keys():
for ckpt_alias in ckpt_alias_keys:
for specific_task in drawer_task_map_dict[drawer_task]:
@@ -557,13 +676,21 @@ def calc_drawer_stats(root_result_dir):
)
if np.isnan(avg_sim_success):
print(f"WARNING: avg_sim_success is nan for {variant}")
- drawer_sim_visual_matching_success[drawer_task][ckpt_alias].append(avg_sim_success)
+ drawer_sim_visual_matching_success[drawer_task][ckpt_alias].append(
+ avg_sim_success
+ )
tmp_variant_avg_each_robot_arm = []
for i in range(len(base_visual_matching_variants)):
tmp_variant_avg_each_robot_arm.append(
- np.mean(drawer_sim_visual_matching_success[drawer_task][ckpt_alias][i::len(drawer_task_map_dict[drawer_task])])
+ np.mean(
+ drawer_sim_visual_matching_success[drawer_task][ckpt_alias][
+ i :: len(drawer_task_map_dict[drawer_task])
+ ]
+ )
)
- print(f"Drawer task {drawer_task}, ckpt {ckpt_alias} all robot arm visual matching success: {tmp_variant_avg_each_robot_arm}")
+ print(
+ f"Drawer task {drawer_task}, ckpt {ckpt_alias} all robot arm visual matching success: {tmp_variant_avg_each_robot_arm}"
+ )
drawer_sim_visual_matching_success[drawer_task][ckpt_alias] = np.mean(
drawer_sim_visual_matching_success[drawer_task][ckpt_alias]
)
@@ -606,9 +733,13 @@ def calc_drawer_stats(root_result_dir):
avg_sim_visual_matching_results.append([])
avg_real_results.append([])
for drawer_task in drawer_task_map_dict.keys():
- avg_sim_visual_matching_results[-1].append(drawer_sim_visual_matching_success[drawer_task][ckpt_alias])
+ avg_sim_visual_matching_results[-1].append(
+ drawer_sim_visual_matching_success[drawer_task][ckpt_alias]
+ )
avg_real_results[-1].append(drawer_real_success[drawer_task][ckpt_alias])
- avg_sim_visual_matching_results[-1] = np.mean(avg_sim_visual_matching_results[-1])
+ avg_sim_visual_matching_results[-1] = np.mean(
+ avg_sim_visual_matching_results[-1]
+ )
avg_real_results[-1] = np.mean(avg_real_results[-1])
print("avg_sim_visual_matching_results", avg_sim_visual_matching_results)
print("avg_real_results", avg_real_results)
@@ -621,27 +752,46 @@ def calc_drawer_stats(root_result_dir):
pearson_correlation(avg_sim_visual_matching_results, avg_real_results),
)
print_all_kruskal_results(
- construct_unordered_trial_results(n_trials_per_ckpt_per_task * n_tasks, avg_sim_visual_matching_results),
- construct_unordered_trial_results(n_trials_per_ckpt_per_task * n_tasks, avg_real_results),
+ construct_unordered_trial_results(
+ n_trials_per_ckpt_per_task * n_tasks, avg_sim_visual_matching_results
+ ),
+ construct_unordered_trial_results(
+ n_trials_per_ckpt_per_task * n_tasks, avg_real_results
+ ),
"avg kruskal:",
)
print("*" * 20)
for _ in range(3):
print()
-
-
+
+ # TODO: process ret dict
+ ret_dict = {
+ "drawer/matching_avg": avg_sim_visual_matching_results,
+ "drawer/variant_avg": avg_sim_variant_results,
+ }
+ for key, val in drawer_sim_visual_matching_success.items():
+ ret_dict[f"drawer/matching/{key}"] = [succ for _, succ in val.items()]
+ for key, val in drawer_sim_variant_success.items():
+ ret_dict[f"drawer/variant/{key}"] = [succ for _, succ in val.items()]
+ return ret_dict
+
+
def calc_long_horizon_apple_in_drawer_stats(root_result_dir):
print("***Drawer results***")
# If you use a new checkpoint, please update the real evaluation results here
drawer_real_success = {
"put_apple_into_top_drawer": {
- "rt-2-x": 0.074,
- "rt-1-converged": 0.185,
- "rt-1-15pct": 0.185,
- "rt-1-x": 0.407,
- "rt-1-begin": 0.000,
- "octo-base": 0.000,
+ # "rt-2-x": 0.074,
+ # "rt-1-converged": 0.185,
+ # "rt-1-15pct": 0.185,
+ # "rt-1-x": 0.407,
+ # "rt-1-begin": 0.000,
+ # "octo-base": 0.000,
+ # "openvla-7b": 0.000,
+ # "internvla-1_8b": 0.44,
+ # "internvla_diff-1_8b": 0.44,
+ "ours": 0.0,
},
}
@@ -652,9 +802,7 @@ def calc_long_horizon_apple_in_drawer_stats(root_result_dir):
],
}
n_tasks = len(drawer_task_map_dict)
- n_trials_per_ckpt_per_task = (
- 27 # number of trials per checkpoint for each key in drawer_task_map_dict; update if it is different
- )
+ n_trials_per_ckpt_per_task = 27 # number of trials per checkpoint for each key in drawer_task_map_dict; update if it is different
# extra patterns required in file name; if you are using different visual matching overlay image, please update here
extra_pattern_require_sim_variants = ["rgb_overlay_None", "apple"]
extra_pattern_require_visual_matching = ["rgb_overlay_open_drawer", "apple"]
@@ -662,7 +810,9 @@ def calc_long_horizon_apple_in_drawer_stats(root_result_dir):
extra_log_str_visual_matching = "model_ids_baked_apple_v2"
# get simulation variant success
- drawer_sim_variant_success = {k1: {k2: [] for k2 in ckpt_alias_keys} for k1 in drawer_task_map_dict.keys()}
+ drawer_sim_variant_success = {
+ k1: {k2: [] for k2 in ckpt_alias_keys} for k1 in drawer_task_map_dict.keys()
+ }
# hardcoded variant aggregation result dirs; if you have new variants, please update here
base_variants = [
@@ -697,8 +847,15 @@ def calc_long_horizon_apple_in_drawer_stats(root_result_dir):
for drawer_task in drawer_task_map_dict.keys():
for ckpt_alias in ckpt_alias_keys:
for specific_task in drawer_task_map_dict[drawer_task]:
- for variant in base_variants + background_variants + lighting_variants + table_texture_variants:
- variant = variant.format(specific_task) + f"_{extra_log_str_variant_agg}"
+ for variant in (
+ base_variants
+ + background_variants
+ + lighting_variants
+ + table_texture_variants
+ ):
+ variant = (
+ variant.format(specific_task) + f"_{extra_log_str_variant_agg}"
+ )
variant = f"{root_result_dir}/{CKPT_MAPPING[ckpt_alias]}/{variant}"
avg_sim_success = np.mean(
get_dir_stats(
@@ -708,7 +865,9 @@ def calc_long_horizon_apple_in_drawer_stats(root_result_dir):
)
if np.isnan(avg_sim_success):
print(f"WARNING: avg_sim_success is nan for {variant}")
- drawer_sim_variant_success[drawer_task][ckpt_alias].append(avg_sim_success)
+ drawer_sim_variant_success[drawer_task][ckpt_alias].append(
+ avg_sim_success
+ )
drawer_sim_variant_success[drawer_task][ckpt_alias] = np.mean(
drawer_sim_variant_success[drawer_task][ckpt_alias]
)
@@ -741,7 +900,9 @@ def calc_long_horizon_apple_in_drawer_stats(root_result_dir):
avg_sim_variant_results.append([])
avg_real_results.append([])
for drawer_task in drawer_task_map_dict.keys():
- avg_sim_variant_results[-1].append(drawer_sim_variant_success[drawer_task][ckpt_alias])
+ avg_sim_variant_results[-1].append(
+ drawer_sim_variant_success[drawer_task][ckpt_alias]
+ )
avg_real_results[-1].append(drawer_real_success[drawer_task][ckpt_alias])
avg_sim_variant_results[-1] = np.mean(avg_sim_variant_results[-1])
avg_real_results[-1] = np.mean(avg_real_results[-1])
@@ -758,12 +919,17 @@ def calc_long_horizon_apple_in_drawer_stats(root_result_dir):
print("-" * 20)
# get visual matching success
- drawer_sim_visual_matching_success = {k1: {k2: [] for k2 in ckpt_alias_keys} for k1 in drawer_task_map_dict.keys()}
+ drawer_sim_visual_matching_success = {
+ k1: {k2: [] for k2 in ckpt_alias_keys} for k1 in drawer_task_map_dict.keys()
+ }
for drawer_task in drawer_task_map_dict.keys():
for ckpt_alias in ckpt_alias_keys:
for specific_task in drawer_task_map_dict[drawer_task]:
for variant in base_visual_matching_variants:
- variant = variant.format(specific_task) + f"_{extra_log_str_visual_matching}"
+ variant = (
+ variant.format(specific_task)
+ + f"_{extra_log_str_visual_matching}"
+ )
variant = f"{root_result_dir}/{CKPT_MAPPING[ckpt_alias]}/{variant}"
avg_sim_success = np.mean(
get_dir_stats(
@@ -773,13 +939,21 @@ def calc_long_horizon_apple_in_drawer_stats(root_result_dir):
)
if np.isnan(avg_sim_success):
print(f"WARNING: avg_sim_success is nan for {variant}")
- drawer_sim_visual_matching_success[drawer_task][ckpt_alias].append(avg_sim_success)
+ drawer_sim_visual_matching_success[drawer_task][ckpt_alias].append(
+ avg_sim_success
+ )
tmp_variant_avg_each_robot_arm = []
for i in range(len(base_visual_matching_variants)):
tmp_variant_avg_each_robot_arm.append(
- np.mean(drawer_sim_visual_matching_success[drawer_task][ckpt_alias][i::len(drawer_task_map_dict[drawer_task])])
+ np.mean(
+ drawer_sim_visual_matching_success[drawer_task][ckpt_alias][
+ i :: len(drawer_task_map_dict[drawer_task])
+ ]
+ )
)
- print(f"Drawer task {drawer_task}, ckpt {ckpt_alias} all robot arm visual matching success: {tmp_variant_avg_each_robot_arm}")
+ print(
+ f"Drawer task {drawer_task}, ckpt {ckpt_alias} all robot arm visual matching success: {tmp_variant_avg_each_robot_arm}"
+ )
drawer_sim_visual_matching_success[drawer_task][ckpt_alias] = np.mean(
drawer_sim_visual_matching_success[drawer_task][ckpt_alias]
)
@@ -822,9 +996,13 @@ def calc_long_horizon_apple_in_drawer_stats(root_result_dir):
avg_sim_visual_matching_results.append([])
avg_real_results.append([])
for drawer_task in drawer_task_map_dict.keys():
- avg_sim_visual_matching_results[-1].append(drawer_sim_visual_matching_success[drawer_task][ckpt_alias])
+ avg_sim_visual_matching_results[-1].append(
+ drawer_sim_visual_matching_success[drawer_task][ckpt_alias]
+ )
avg_real_results[-1].append(drawer_real_success[drawer_task][ckpt_alias])
- avg_sim_visual_matching_results[-1] = np.mean(avg_sim_visual_matching_results[-1])
+ avg_sim_visual_matching_results[-1] = np.mean(
+ avg_sim_visual_matching_results[-1]
+ )
avg_real_results[-1] = np.mean(avg_real_results[-1])
print("avg_sim_visual_matching_results", avg_sim_visual_matching_results)
print("avg_real_results", avg_real_results)
@@ -837,14 +1015,29 @@ def calc_long_horizon_apple_in_drawer_stats(root_result_dir):
pearson_correlation(avg_sim_visual_matching_results, avg_real_results),
)
print_all_kruskal_results(
- construct_unordered_trial_results(n_trials_per_ckpt_per_task * n_tasks, avg_sim_visual_matching_results),
- construct_unordered_trial_results(n_trials_per_ckpt_per_task * n_tasks, avg_real_results),
+ construct_unordered_trial_results(
+ n_trials_per_ckpt_per_task * n_tasks, avg_sim_visual_matching_results
+ ),
+ construct_unordered_trial_results(
+ n_trials_per_ckpt_per_task * n_tasks, avg_real_results
+ ),
"avg kruskal:",
)
print("*" * 20)
for _ in range(3):
print()
+
+ # TODO: process ret dict
+ ret_dict = {
+ "apple_in_drawer/matching_avg": avg_sim_visual_matching_results,
+ "apple_in_drawer/variant_avg": avg_sim_variant_results,
+ }
+ # for key, val in drawer_sim_visual_matching_success.items():
+ # ret_dict[f"drawer/matching/{key}"] = [succ for _, succ in val.items()]
+ # for key, val in drawer_sim_variant_success.items():
+ # ret_dict[f"drawer/variant/{key}"] = [succ for _, succ in val.items()]
+ return ret_dict
def calc_bridge_put_on_env_stats(root_result_dir):
@@ -852,42 +1045,78 @@ def calc_bridge_put_on_env_stats(root_result_dir):
# If you use a new checkpoint, please update the real evaluation results here
real_partial_success_dict = {
"put_spoon_on_tablecloth": {
- "rt-1-x": 0.042,
- "octo-base": 0.500,
- "octo-small": 0.542,
+ # "rt-1-x": 0.042,
+ # "octo-base": 0.500,
+ # "openvla-7b": 0.500,
+ # "internvla-1_8b": 0.44,
+ # "internvla_diff-1_8b": 0.44,
+ "ours": 0.0,
+ # "octo-small": 0.542,
},
"put_carrot_on_plate": {
- "rt-1-x": 0.167,
- "octo-base": 0.500,
- "octo-small": 0.208,
+ # "rt-1-x": 0.167,
+ # "octo-base": 0.500,
+ # "openvla-7b": 0.500,
+ # "internvla-1_8b": 0.44,
+ # "internvla_diff-1_8b": 0.44,
+ "ours": 0.0,
+ # "octo-small": 0.208,
},
"stack_green_block_on_yellow_block": {
- "rt-1-x": 0.000,
- "octo-base": 0.292,
- "octo-small": 0.583,
+ # "rt-1-x": 0.000,
+ # "octo-base": 0.292,
+ # "openvla-7b": 0.292,
+ # "internvla-1_8b": 0.44,
+ # "internvla_diff-1_8b": 0.44,
+ "ours": 0.0,
+ # "octo-small": 0.583,
},
"put_eggplant_in_basket": {
- "rt-1-x": 0.000,
- "octo-base": 0.400,
- "octo-small": 0.600,
+ # "rt-1-x": 0.000,
+ # "octo-base": 0.400,
+ # "openvla-7b": 0.400,
+ # "internvla-1_8b": 0.44,
+ # "internvla_diff-1_8b": 0.44,
+ "ours": 0.0,
+ # "octo-small": 0.600,
},
}
real_success_dict = {
"put_spoon_on_tablecloth": {
- "rt-1-x": 0.000,
- "octo-base": 0.333,
- "octo-small": 0.417,
+ # "rt-1-x": 0.000,
+ # "octo-base": 0.333,
+ # "openvla-7b": 0.333,
+ # "internvla-1_8b": 0.44,
+ # "internvla_diff-1_8b": 0.44,
+ "ours": 0.0,
+ # "octo-small": 0.417,
+ },
+ "put_carrot_on_plate": {
+ # "rt-1-x": 0.00,
+ # "octo-base": 0.25,
+ # "octo-small": 0.083,
+ # "openvla-7b": 0,
+ # "internvla-1_8b": 0.44,
+ # "internvla_diff-1_8b": 0.44,
+ "ours": 0.0,
},
- "put_carrot_on_plate": {"rt-1-x": 0.00, "octo-base": 0.25, "octo-small": 0.083},
"stack_green_block_on_yellow_block": {
- "rt-1-x": 0.000,
- "octo-base": 0.000,
- "octo-small": 0.125,
+ # "rt-1-x": 0.000,
+ # "octo-base": 0.000,
+ # "openvla-7b": 0.000,
+ # "internvla-1_8b": 0.44,
+ # "internvla_diff-1_8b": 0.44,
+ "ours": 0.0,
+ # "octo-small": 0.125,
},
"put_eggplant_in_basket": {
- "rt-1-x": 0.000,
- "octo-base": 0.250,
- "octo-small": 0.400,
+ # "rt-1-x": 0.000,
+ # "octo-base": 0.250,
+ # "openvla-7b": 0.250,
+ # "internvla-1_8b": 0.44,
+ # "internvla_diff-1_8b": 0.44,
+ "ours": 0.0,
+ # "octo-small": 0.400,
},
}
@@ -931,6 +1160,7 @@ def calc_bridge_put_on_env_stats(root_result_dir):
# partial_succ_fail_pattern = ['consecutive_grasp_True', 'consecutive_grasp_False']
# get visual matching success
+ ret_dict = {}
for task in tasks:
real_success = real_success_dict[task]
real_partial_success = real_partial_success_dict[task]
@@ -944,32 +1174,49 @@ def calc_bridge_put_on_env_stats(root_result_dir):
# we average octo performance over different random seeds
tmp = []
for seed in octo_seed_range:
- tmp.extend([f"{variant}_octo_init_rng_{seed}" for variant in base_visual_matching_variants])
+ tmp.extend(
+ [
+ f"{variant}_octo_init_rng_{seed}"
+ for variant in base_visual_matching_variants
+ ]
+ )
base_visual_matching_variants = tmp
for variant in base_visual_matching_variants:
variant = f"{root_result_dir}/{CKPT_MAPPING[ckpt_alias]}/{variant}"
avg_sim_success = np.mean(
get_dir_stats(
variant,
- extra_pattern_require=extra_pattern_require_visual_matching[task],
+ extra_pattern_require=extra_pattern_require_visual_matching[
+ task
+ ],
succ_fail_pattern=succ_fail_pattern,
)
)
avg_sim_partial_success = np.mean(
get_dir_stats(
variant,
- extra_pattern_require=extra_pattern_require_visual_matching[task],
+ extra_pattern_require=extra_pattern_require_visual_matching[
+ task
+ ],
succ_fail_pattern=partial_succ_fail_pattern,
)
)
if np.isnan(avg_sim_success) or np.isnan(avg_sim_partial_success):
print(f"WARNING: avg_sim_success is nan for {variant}")
sim_visual_matching_success[ckpt_alias].append(avg_sim_success)
- sim_visual_matching_partial_success[ckpt_alias].append(avg_sim_partial_success)
- sim_visual_matching_success[ckpt_alias] = np.mean(sim_visual_matching_success[ckpt_alias])
- sim_visual_matching_partial_success[ckpt_alias] = np.mean(sim_visual_matching_partial_success[ckpt_alias])
+ sim_visual_matching_partial_success[ckpt_alias].append(
+ avg_sim_partial_success
+ )
+ sim_visual_matching_success[ckpt_alias] = np.mean(
+ sim_visual_matching_success[ckpt_alias]
+ )
+ sim_visual_matching_partial_success[ckpt_alias] = np.mean(
+ sim_visual_matching_partial_success[ckpt_alias]
+ )
- print("sim visual matching partial success", sim_visual_matching_partial_success)
+ print(
+ "sim visual matching partial success", sim_visual_matching_partial_success
+ )
print("real partial success", real_partial_success)
print(
"visual matching MMRV (partial success)",
@@ -986,8 +1233,12 @@ def calc_bridge_put_on_env_stats(root_result_dir):
),
)
print_all_kruskal_results(
- construct_unordered_trial_results(n_trials_per_ckpt, list(sim_visual_matching_partial_success.values())),
- construct_unordered_trial_results(n_trials_per_ckpt, list(real_partial_success.values())),
+ construct_unordered_trial_results(
+ n_trials_per_ckpt, list(sim_visual_matching_partial_success.values())
+ ),
+ construct_unordered_trial_results(
+ n_trials_per_ckpt, list(real_partial_success.values())
+ ),
"avg kruskal (partial success):",
)
@@ -995,15 +1246,23 @@ def calc_bridge_put_on_env_stats(root_result_dir):
print("real success", real_success)
print(
"visual matching MMRV",
- mean_maximum_rank_violation(list(sim_visual_matching_success.values()), list(real_success.values())),
+ mean_maximum_rank_violation(
+ list(sim_visual_matching_success.values()), list(real_success.values())
+ ),
)
print(
"visual matching pearson correlation",
- pearson_correlation(list(sim_visual_matching_success.values()), list(real_success.values())),
+ pearson_correlation(
+ list(sim_visual_matching_success.values()), list(real_success.values())
+ ),
)
print_all_kruskal_results(
- construct_unordered_trial_results(n_trials_per_ckpt, list(sim_visual_matching_success.values())),
- construct_unordered_trial_results(n_trials_per_ckpt, list(real_success.values())),
+ construct_unordered_trial_results(
+ n_trials_per_ckpt, list(sim_visual_matching_success.values())
+ ),
+ construct_unordered_trial_results(
+ n_trials_per_ckpt, list(real_success.values())
+ ),
"avg kruskal:",
)
@@ -1011,39 +1270,69 @@ def calc_bridge_put_on_env_stats(root_result_dir):
for _ in range(3):
print()
+ ret_dict[f"{task}/matching_partial"] = [
+ succ for _, succ in sim_visual_matching_partial_success.items()
+ ]
+ ret_dict[f"{task}/matching_entire"] = [
+ succ for _, succ in sim_visual_matching_success.items()
+ ]
+
+ return ret_dict
+
# Define checkpoint alias-to-directory mapping; If you use a new checkpoint, please update the dict
CKPT_MAPPING = {
- "rt-2-x": "rt_2_x",
- "rt-1-converged": "rt_1_tf_trained_for_000400120",
- "rt-1-15pct": "rt_1_tf_trained_for_000058240",
- "rt-1-x": "rt_1_x_tf_trained_for_002272480_step",
- "rt-1-begin": "rt_1_tf_trained_for_000001120",
- "octo-base": "octo-base",
- "octo-small": "octo-small",
- "octo-server": "octo-server",
+ # "rt-2-x": "rt_2_x",
+ # "rt-1-converged": "rt_1_tf_trained_for_000400120",
+ # "rt-1-15pct": "rt_1_tf_trained_for_000058240",
+ # "rt-1-x": "rt_1_x_tf_trained_for_002272480_step",
+ # "rt-1-begin": "rt_1_tf_trained_for_000001120",
+ # "octo-base": "octo-base",
+ # "openvla-7b": "openvla-7b",
+ # "internvla-1_8b": "internvla-1_8b",
+ # "internvla_diff-1_8b": "internvla_diff-1_8b",
+ "ours": "TODO",
+ # "octo-small": "octo-small",
+ # "octo-server": "octo-server",
}
parser = argparse.ArgumentParser()
-parser.add_argument("--task", type=str, default="pick_coke_can", help="task name")
-parser.add_argument("--log-dir-root", type=str, default="./results/", help="log directory")
-
+parser.add_argument(
+ "--log-dir-root", type=str, default="./results/", help="log directory"
+)
args = parser.parse_args()
-if args.task == "pick_coke_can":
- calc_pick_coke_can_stats(args.log_dir_root)
-elif args.task == "move_near":
- calc_move_near_stats(args.log_dir_root)
-elif args.task == "drawer":
- calc_drawer_stats(args.log_dir_root)
-elif args.task == "long_horizon_apple_in_drawer":
- calc_long_horizon_apple_in_drawer_stats(args.log_dir_root)
-elif args.task == "bridge_put_on":
- calc_bridge_put_on_env_stats(args.log_dir_root)
-else:
- raise ValueError(f"Unknown task: {args.task}")
+# NOTE: replace the CKPT_MAPPING with the actual checkpoint directories
+CKPT_MAPPING["ours"] = Path(args.log_dir_root).name
+
+pick_coke_can_results = calc_pick_coke_can_stats(str(Path(args.log_dir_root).parent))
+move_near_real_results = calc_move_near_stats(str(Path(args.log_dir_root).parent))
+drawer_results = calc_drawer_stats(str(Path(args.log_dir_root).parent))
+long_horizon_apple_in_drawer_results = calc_long_horizon_apple_in_drawer_stats(str(Path(args.log_dir_root).parent))
+bridge_put_on_results = calc_bridge_put_on_env_stats(
+ str(Path(args.log_dir_root).parent)
+)
+
+results = {
+ **pick_coke_can_results,
+ **move_near_real_results,
+ **drawer_results,
+ **bridge_put_on_results,
+ **long_horizon_apple_in_drawer_results,
+ "ckpt_name": ["ours"],
+}
+
+df = pd.DataFrame(results)
+df_ref = pd.DataFrame(REF)
+results_df = pd.concat([df, df_ref], axis=0, ignore_index=True)
+results_df = results_df.transpose()
+markdown = results_df.to_markdown(f"{args.log_dir_root}/results.md", index=True)
+
+df.loc[0, "ckpt_name"] = CKPT_MAPPING["ours"]
+df.to_csv(f"{args.log_dir_root}/results.csv", index=False, float_format="%.3f")
+print(df)
exit(0)
"""
From fea24680013ec250c437feb970d22b608f2f8117 Mon Sep 17 00:00:00 2001
From: YN35 <95198883+YN35@users.noreply.github.com>
Date: Thu, 14 Aug 2025 07:00:40 +0000
Subject: [PATCH 02/57] stable run
---
.gitignore | 2 ++
1 file changed, 2 insertions(+)
diff --git a/.gitignore b/.gitignore
index dfb15106..399a25ee 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,8 @@ __pycache__
_ext
*.pyc
*.so
+.venv/
+results/
build/
dist/
*.egg-info/
From b6eb6eb0201880c93179efb9ca1681896a4ef4cd Mon Sep 17 00:00:00 2001
From: YN35 <95198883+YN35@users.noreply.github.com>
Date: Thu, 14 Aug 2025 07:00:53 +0000
Subject: [PATCH 03/57] stable run
---
ManiSkill2_real2sim | 2 +-
policies/lerobotpi/README.md | 18 -
policies/tes.py | 3 -
scripts/hsr_openpi/README.md | 17 +
scripts/hsr_openpi/evaluate_fractal.py | 39 ++
scripts/lerobotpi/README.md | 16 +
scripts/lerobotpi/evaluate_fractal.py | 22 +
scripts/openpi/README.md | 16 +
scripts/openpi/evaluate_fractal.py | 24 +
scripts/{ => originals}/bridge.sh | 1 +
scripts/{ => originals}/drawer_variant_agg.sh | 1 +
.../{ => originals}/drawer_visual_matching.sh | 1 +
.../{ => originals}/move_near_variant_agg.sh | 1 +
.../move_near_visual_matching.sh | 1 +
.../pick_coke_can_variant_agg.sh | 1 +
.../pick_coke_can_visual_matching.sh | 1 +
.../put_in_drawer_variant_agg.sh | 1 +
.../put_in_drawer_visual_matching.sh | 1 +
scripts/{ => originals}/run_cogact.sh | 0
scripts/{ => originals}/run_gr00t.sh | 0
scripts/{ => originals}/run_lerobotpifast.sh | 0
scripts/{ => originals}/run_octo.sh | 0
scripts/{ => originals}/run_openpifast.sh | 0
scripts/{ => originals}/run_openvla.sh | 0
scripts/{ => originals}/run_rt1.sh | 0
scripts/{ => originals}/run_spatialvla.sh | 0
simpler_env/evaluation/config.py | 90 +++
simpler_env/evaluation/evaluate.py | 634 ++++++++++++++++++
.../evaluation/maniskill2_evaluator.py | 24 +-
simpler_env/policies/adapter.py | 219 ++++++
simpler_env/policies/base.py | 12 +
.../policies/hsr_openpi/pi0_or_fast.py | 18 +
simpler_env/policies/lerobotpi/pi0_or_fast.py | 1 -
simpler_env/utils/geometry.py | 463 +++++++++++++
34 files changed, 1586 insertions(+), 41 deletions(-)
delete mode 100644 policies/lerobotpi/README.md
delete mode 100644 policies/tes.py
create mode 100644 scripts/hsr_openpi/README.md
create mode 100644 scripts/hsr_openpi/evaluate_fractal.py
create mode 100644 scripts/lerobotpi/README.md
create mode 100644 scripts/lerobotpi/evaluate_fractal.py
create mode 100644 scripts/openpi/README.md
create mode 100644 scripts/openpi/evaluate_fractal.py
rename scripts/{ => originals}/bridge.sh (99%)
rename scripts/{ => originals}/drawer_variant_agg.sh (98%)
rename scripts/{ => originals}/drawer_visual_matching.sh (99%)
rename scripts/{ => originals}/move_near_variant_agg.sh (99%)
rename scripts/{ => originals}/move_near_visual_matching.sh (97%)
rename scripts/{ => originals}/pick_coke_can_variant_agg.sh (99%)
rename scripts/{ => originals}/pick_coke_can_visual_matching.sh (97%)
rename scripts/{ => originals}/put_in_drawer_variant_agg.sh (98%)
rename scripts/{ => originals}/put_in_drawer_visual_matching.sh (98%)
rename scripts/{ => originals}/run_cogact.sh (100%)
rename scripts/{ => originals}/run_gr00t.sh (100%)
rename scripts/{ => originals}/run_lerobotpifast.sh (100%)
rename scripts/{ => originals}/run_octo.sh (100%)
rename scripts/{ => originals}/run_openpifast.sh (100%)
rename scripts/{ => originals}/run_openvla.sh (100%)
rename scripts/{ => originals}/run_rt1.sh (100%)
rename scripts/{ => originals}/run_spatialvla.sh (100%)
create mode 100644 simpler_env/evaluation/config.py
create mode 100644 simpler_env/evaluation/evaluate.py
create mode 100644 simpler_env/policies/adapter.py
create mode 100644 simpler_env/policies/base.py
create mode 100644 simpler_env/policies/hsr_openpi/pi0_or_fast.py
create mode 100644 simpler_env/utils/geometry.py
diff --git a/ManiSkill2_real2sim b/ManiSkill2_real2sim
index ef7a4d4f..91d154bf 160000
--- a/ManiSkill2_real2sim
+++ b/ManiSkill2_real2sim
@@ -1 +1 @@
-Subproject commit ef7a4d4fdf4b69f2c2154db5b15b9ac8dfe10682
+Subproject commit 91d154bfd864577f8d2e80f3fc2f8b4d9df9ae5c
diff --git a/policies/lerobotpi/README.md b/policies/lerobotpi/README.md
deleted file mode 100644
index 153db02a..00000000
--- a/policies/lerobotpi/README.md
+++ /dev/null
@@ -1,18 +0,0 @@
-
-```bash
-uv venv -p 3.10 policies/lerobotpi/.venv
-
-
-# git clone https://github.com/huggingface/lerobot.git
-
-# uv pip install -e .
-source $(pwd)/policies/lerobotpi/.venv/bin/activate
-uv pip install torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 --index-url https://download.pytorch.org/whl/cu124
-uv pip install lerobot[pi0]==0.3.2
-uv pip install -e .
-uv pip install pytest
-
-huggingface-cli login
-bash scripts/run_lerobotpifast.sh
-
-```
\ No newline at end of file
diff --git a/policies/tes.py b/policies/tes.py
deleted file mode 100644
index 8fcea7b6..00000000
--- a/policies/tes.py
+++ /dev/null
@@ -1,3 +0,0 @@
-import torch
-
-print(torch.cuda.is_available())
diff --git a/scripts/hsr_openpi/README.md b/scripts/hsr_openpi/README.md
new file mode 100644
index 00000000..412ec981
--- /dev/null
+++ b/scripts/hsr_openpi/README.md
@@ -0,0 +1,17 @@
+
+```bash
+uv venv -p 3.11 scripts/openpi/.venv
+
+git clone https://github.com/airoa-org/hsr_openpi.git
+cd hsr_openpi
+git checkout remotes/origin/release
+GIT_LFS_SKIP_SMUDGE=1 UV_PROJECT_ENVIRONMENT=../scripts/hsr_openpi/.venv uv sync
+GIT_LFS_SKIP_SMUDGE=1 uv pip install -e .
+cd ..
+
+source $(pwd)/scripts/hsr_openpi/.venv/bin/activate
+uv pip install -e .
+
+
+python scripts/hsr_openpi/evaluate_fractal.py --ckpt-path /data/checkpoints/pi0_fractal_low_mem_finetune2/my_experiment/17000
+```
\ No newline at end of file
diff --git a/scripts/hsr_openpi/evaluate_fractal.py b/scripts/hsr_openpi/evaluate_fractal.py
new file mode 100644
index 00000000..4ee03772
--- /dev/null
+++ b/scripts/hsr_openpi/evaluate_fractal.py
@@ -0,0 +1,39 @@
+import argparse
+
+from openpi.policies import policy_config as _policy_config
+from openpi.training import config as _config
+from openpi_client import action_chunk_broker
+
+from simpler_env.evaluation.evaluate import run_comprehensive_evaluation
+from simpler_env.policies.adapter import AiroaToSimplerFractalAdapter
+from simpler_env.policies.hsr_openpi.pi0_or_fast import OpenpiToAiroaPolicy
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(description="Run Comprehensive ManiSkill2 Evaluation")
+ parser.add_argument("--ckpt-path", type=str, required=True, help="Path to the checkpoint to evaluate.")
+ return parser.parse_args()
+
+
+if __name__ == "__main__":
+ args = parse_args()
+ ckpt_path = args.ckpt_path
+
+ policy = OpenpiToAiroaPolicy(
+ policy=action_chunk_broker.ActionChunkBroker(
+ policy=_policy_config.create_trained_policy(
+ _config.get_config("pi0_fractal_low_mem_finetune"),
+ ckpt_path,
+ ),
+ action_horizon=10,
+ ),
+ )
+
+ env_policy = AiroaToSimplerFractalAdapter(policy=policy)
+
+ print("Policy initialized. Starting evaluation...")
+
+ final_scores = run_comprehensive_evaluation(env_policy=env_policy, ckpt_path=args.ckpt_path)
+
+ print("\nEvaluation finished.")
+ print(f"Final calculated scores: {final_scores}")
diff --git a/scripts/lerobotpi/README.md b/scripts/lerobotpi/README.md
new file mode 100644
index 00000000..02bb6e59
--- /dev/null
+++ b/scripts/lerobotpi/README.md
@@ -0,0 +1,16 @@
+
+```bash
+uv venv -p 3.10 scripts/lerobotpi/.venv
+
+
+# uv pip install -e .
+source $(pwd)/scripts/lerobotpi/.venv/bin/activate
+uv pip install torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 --index-url https://download.pytorch.org/whl/cu124
+uv pip install git+https://github.com/huggingface/lerobot.git@ce2b9724bfe1b5a4c45e61b1890eef3f5ab0909c#egg=lerobot[pi0]
+uv pip install -e .
+uv pip install pytest
+
+huggingface-cli login
+CUDA_VISIBLE_DEVICES=1 python scripts/lerobotpi/evaluate_fractal.py --ckpt-path HaomingSong/lerobot-pi0-fractal
+CUDA_VISIBLE_DEVICES=1 python scripts/lerobotpi/evaluate_fractal.py --ckpt-path lerobot/pi0
+```
\ No newline at end of file
diff --git a/scripts/lerobotpi/evaluate_fractal.py b/scripts/lerobotpi/evaluate_fractal.py
new file mode 100644
index 00000000..0ed009a0
--- /dev/null
+++ b/scripts/lerobotpi/evaluate_fractal.py
@@ -0,0 +1,22 @@
+import argparse
+from simpler_env.policies.lerobotpi.pi0_or_fast import LerobotPiFastInference
+from simpler_env.evaluation.evaluate import run_comprehensive_evaluation
+
+def parse_args():
+ parser = argparse.ArgumentParser(description="Run Comprehensive ManiSkill2 Evaluation")
+ parser.add_argument("--ckpt-path", type=str, required=True, help="Path to the checkpoint to evaluate.")
+ return parser.parse_args()
+
+
+if __name__ == "__main__":
+ args = parse_args()
+ ckpt_path = args.ckpt_path
+
+ policy = LerobotPiFastInference(saved_model_path=ckpt_path, policy_setup="google_robot", action_scale=1.0)
+
+ print("Policy initialized. Starting evaluation...")
+
+ final_scores = run_comprehensive_evaluation(env_policy=policy, ckpt_path=args.ckpt_path)
+
+ print("\nEvaluation finished.")
+ print(f"Final calculated scores: {final_scores}")
diff --git a/scripts/openpi/README.md b/scripts/openpi/README.md
new file mode 100644
index 00000000..efba3b67
--- /dev/null
+++ b/scripts/openpi/README.md
@@ -0,0 +1,16 @@
+
+```bash
+uv venv -p 3.11 scripts/openpi/.venv
+
+git clone https://github.com/airoa-org/hsr_openpi.git
+cd hsr_openpi
+GIT_LFS_SKIP_SMUDGE=1 UV_PROJECT_ENVIRONMENT=../scripts/openpi/.venv uv sync
+GIT_LFS_SKIP_SMUDGE=1 uv pip install -e .
+cd ..
+
+source $(pwd)/scripts/openpi/.venv/bin/activate
+uv pip install -e .
+
+
+python scripts/openpi/evaluate_fractal.py --ckpt-path /data/checkpoints/pi0_fractal_low_mem_finetune2/my_experiment/17000
+```
\ No newline at end of file
diff --git a/scripts/openpi/evaluate_fractal.py b/scripts/openpi/evaluate_fractal.py
new file mode 100644
index 00000000..6a3b512d
--- /dev/null
+++ b/scripts/openpi/evaluate_fractal.py
@@ -0,0 +1,24 @@
+import argparse
+
+from simpler_env.evaluation.evaluate import run_comprehensive_evaluation
+from simpler_env.policies.openpi.pi0_or_fast import OpenPiFastInference
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(description="Run Comprehensive ManiSkill2 Evaluation")
+ parser.add_argument("--ckpt-path", type=str, required=True, help="Path to the checkpoint to evaluate.")
+ return parser.parse_args()
+
+
+if __name__ == "__main__":
+ args = parse_args()
+ ckpt_path = args.ckpt_path
+
+ policy = OpenPiFastInference(saved_model_path=ckpt_path, policy_setup="google_robot", action_scale=1.0)
+
+ print("Policy initialized. Starting evaluation...")
+
+ final_scores = run_comprehensive_evaluation(env_policy=policy, ckpt_path=args.ckpt_path)
+
+ print("\nEvaluation finished.")
+ print(f"Final calculated scores: {final_scores}")
diff --git a/scripts/bridge.sh b/scripts/originals/bridge.sh
similarity index 99%
rename from scripts/bridge.sh
rename to scripts/originals/bridge.sh
index e346f543..720460d5 100644
--- a/scripts/bridge.sh
+++ b/scripts/originals/bridge.sh
@@ -1,3 +1,4 @@
+# scripts/bridge.sh
ckpt_path=$1
policy_model=$2
action_ensemble_temp=$3
diff --git a/scripts/drawer_variant_agg.sh b/scripts/originals/drawer_variant_agg.sh
similarity index 98%
rename from scripts/drawer_variant_agg.sh
rename to scripts/originals/drawer_variant_agg.sh
index 17113f21..a29de36d 100644
--- a/scripts/drawer_variant_agg.sh
+++ b/scripts/originals/drawer_variant_agg.sh
@@ -1,3 +1,4 @@
+# scripts/drawer_variant_agg.sh
# shader_dir=rt means that we turn on ray-tracing rendering; this is quite crucial for the open / close drawer task as policies often rely on shadows to infer depth
ckpt_path=$1
policy_model=$2
diff --git a/scripts/drawer_visual_matching.sh b/scripts/originals/drawer_visual_matching.sh
similarity index 99%
rename from scripts/drawer_visual_matching.sh
rename to scripts/originals/drawer_visual_matching.sh
index 93869004..83655b1f 100644
--- a/scripts/drawer_visual_matching.sh
+++ b/scripts/originals/drawer_visual_matching.sh
@@ -1,3 +1,4 @@
+# scripts/drawer_visual_matching.sh
# shader_dir=rt means that we turn on ray-tracing rendering; this is quite crucial for the open / close drawer task as policies often rely on shadows to infer depth
ckpt_path=$1
policy_model=$2
diff --git a/scripts/move_near_variant_agg.sh b/scripts/originals/move_near_variant_agg.sh
similarity index 99%
rename from scripts/move_near_variant_agg.sh
rename to scripts/originals/move_near_variant_agg.sh
index d4430537..55054ea6 100644
--- a/scripts/move_near_variant_agg.sh
+++ b/scripts/originals/move_near_variant_agg.sh
@@ -1,3 +1,4 @@
+# scripts/move_near_variant_agg.sh
ckpt_path=$1
policy_model=$2
action_ensemble_temp=$3
diff --git a/scripts/move_near_visual_matching.sh b/scripts/originals/move_near_visual_matching.sh
similarity index 97%
rename from scripts/move_near_visual_matching.sh
rename to scripts/originals/move_near_visual_matching.sh
index d14a2e0e..eaee31b1 100644
--- a/scripts/move_near_visual_matching.sh
+++ b/scripts/originals/move_near_visual_matching.sh
@@ -1,3 +1,4 @@
+# scripts/move_near_visual_matching.sh
ckpt_path=$1
policy_model=$2
action_ensemble_temp=$3
diff --git a/scripts/pick_coke_can_variant_agg.sh b/scripts/originals/pick_coke_can_variant_agg.sh
similarity index 99%
rename from scripts/pick_coke_can_variant_agg.sh
rename to scripts/originals/pick_coke_can_variant_agg.sh
index f5e1427a..9be76965 100644
--- a/scripts/pick_coke_can_variant_agg.sh
+++ b/scripts/originals/pick_coke_can_variant_agg.sh
@@ -1,3 +1,4 @@
+# scripts/pick_coke_can_variant_agg.sh
ckpt_path=$1
policy_model=$2
action_ensemble_temp=$3
diff --git a/scripts/pick_coke_can_visual_matching.sh b/scripts/originals/pick_coke_can_visual_matching.sh
similarity index 97%
rename from scripts/pick_coke_can_visual_matching.sh
rename to scripts/originals/pick_coke_can_visual_matching.sh
index 5fa0451a..f36e29bc 100644
--- a/scripts/pick_coke_can_visual_matching.sh
+++ b/scripts/originals/pick_coke_can_visual_matching.sh
@@ -1,3 +1,4 @@
+# scripts/pick_coke_can_visual_matching.sh
ckpt_path=$1
policy_model=$2
action_ensemble_temp=$3
diff --git a/scripts/put_in_drawer_variant_agg.sh b/scripts/originals/put_in_drawer_variant_agg.sh
similarity index 98%
rename from scripts/put_in_drawer_variant_agg.sh
rename to scripts/originals/put_in_drawer_variant_agg.sh
index e92d5454..37a0036d 100644
--- a/scripts/put_in_drawer_variant_agg.sh
+++ b/scripts/originals/put_in_drawer_variant_agg.sh
@@ -1,3 +1,4 @@
+# scripts/put_in_drawer_variant_agg.sh
# shader_dir=rt means that we turn on ray-tracing rendering; this is quite crucial for the open / close drawer task as policies often rely on shadows to infer depth
ckpt_paths=($1)
policy_model=$2
diff --git a/scripts/put_in_drawer_visual_matching.sh b/scripts/originals/put_in_drawer_visual_matching.sh
similarity index 98%
rename from scripts/put_in_drawer_visual_matching.sh
rename to scripts/originals/put_in_drawer_visual_matching.sh
index a42dd4bd..fbfb3634 100644
--- a/scripts/put_in_drawer_visual_matching.sh
+++ b/scripts/originals/put_in_drawer_visual_matching.sh
@@ -1,3 +1,4 @@
+# scripts/put_in_drawer_visual_matching.sh
# shader_dir=rt means that we turn on ray-tracing rendering; this is quite crucial for the open / close drawer task as policies often rely on shadows to infer depth
ckpt_paths=($1)
policy_model=$2
diff --git a/scripts/run_cogact.sh b/scripts/originals/run_cogact.sh
similarity index 100%
rename from scripts/run_cogact.sh
rename to scripts/originals/run_cogact.sh
diff --git a/scripts/run_gr00t.sh b/scripts/originals/run_gr00t.sh
similarity index 100%
rename from scripts/run_gr00t.sh
rename to scripts/originals/run_gr00t.sh
diff --git a/scripts/run_lerobotpifast.sh b/scripts/originals/run_lerobotpifast.sh
similarity index 100%
rename from scripts/run_lerobotpifast.sh
rename to scripts/originals/run_lerobotpifast.sh
diff --git a/scripts/run_octo.sh b/scripts/originals/run_octo.sh
similarity index 100%
rename from scripts/run_octo.sh
rename to scripts/originals/run_octo.sh
diff --git a/scripts/run_openpifast.sh b/scripts/originals/run_openpifast.sh
similarity index 100%
rename from scripts/run_openpifast.sh
rename to scripts/originals/run_openpifast.sh
diff --git a/scripts/run_openvla.sh b/scripts/originals/run_openvla.sh
similarity index 100%
rename from scripts/run_openvla.sh
rename to scripts/originals/run_openvla.sh
diff --git a/scripts/run_rt1.sh b/scripts/originals/run_rt1.sh
similarity index 100%
rename from scripts/run_rt1.sh
rename to scripts/originals/run_rt1.sh
diff --git a/scripts/run_spatialvla.sh b/scripts/originals/run_spatialvla.sh
similarity index 100%
rename from scripts/run_spatialvla.sh
rename to scripts/originals/run_spatialvla.sh
diff --git a/simpler_env/evaluation/config.py b/simpler_env/evaluation/config.py
new file mode 100644
index 00000000..fc6e481a
--- /dev/null
+++ b/simpler_env/evaluation/config.py
@@ -0,0 +1,90 @@
+from dataclasses import dataclass, field
+from typing import Any, Dict, List, Optional, Tuple, Union
+
+import numpy as np
+from sapien.core import Pose
+from transforms3d.euler import euler2quat
+
+
+def parse_range_tuple(t):
+ return np.linspace(t[0], t[1], int(t[2]))
+
+
+@dataclass
+class ManiSkill2Config:
+ # Required parameters
+ env_name: str # required=True in argparse
+
+ # Policy settings
+ policy_model: str = "rt1"
+ policy_setup: str = "google_robot"
+ ckpt_path: Optional[str] = None
+
+ # Environment settings
+ scene_name: str = "google_pick_coke_can_1_v4"
+ enable_raytracing: bool = False
+ robot: str = "google_robot_static"
+ obs_camera_name: Optional[str] = None
+ action_scale: float = 1.0
+
+ # Control settings
+ control_freq: int = 3
+ sim_freq: int = 513
+ max_episode_steps: int = 80
+ rgb_overlay_path: Optional[str] = None
+
+ # Robot initialization ranges
+ robot_init_x_range: List[float] = field(default_factory=lambda: [0.35, 0.35, 1])
+ robot_init_y_range: List[float] = field(default_factory=lambda: [0.20, 0.20, 1])
+ robot_init_rot_quat_center: List[float] = field(default_factory=lambda: [1, 0, 0, 0])
+ robot_init_rot_rpy_range: List[float] = field(default_factory=lambda: [0, 0, 1, 0, 0, 1, 0, 0, 1])
+
+ # Object variation settings
+ obj_variation_mode: str = "xy" # choices: ["xy", "episode"]
+ obj_episode_range: List[int] = field(default_factory=lambda: [0, 60])
+ obj_init_x_range: List[float] = field(default_factory=lambda: [-0.35, -0.12, 5])
+ obj_init_y_range: List[float] = field(default_factory=lambda: [-0.02, 0.42, 5])
+
+ # Additional settings
+ additional_env_build_kwargs: Optional[Dict[str, Any]] = None
+ additional_env_save_tags: Optional[str] = None
+ logging_dir: str = "./results"
+ tf_memory_limit: int = 3072
+ octo_init_rng: int = 0
+
+ # Computed attributes (will be set in __post_init__)
+ robot_init_xs: List[float] = field(init=False)
+ robot_init_ys: List[float] = field(init=False)
+ robot_init_quats: List[List[float]] = field(init=False)
+ obj_init_xs: Optional[List[float]] = field(init=False, default=None)
+ obj_init_ys: Optional[List[float]] = field(init=False, default=None)
+
+ def __post_init__(self):
+ """argparseの後処理と同等の計算を実行"""
+ # Robot pose calculations
+ self.robot_init_xs = parse_range_tuple(self.robot_init_x_range)
+ self.robot_init_ys = parse_range_tuple(self.robot_init_y_range)
+
+ # Robot orientation calculations
+ self.robot_init_quats = []
+ for r in parse_range_tuple(self.robot_init_rot_rpy_range[:3]):
+ for p in parse_range_tuple(self.robot_init_rot_rpy_range[3:6]):
+ for y in parse_range_tuple(self.robot_init_rot_rpy_range[6:]):
+ quat = (Pose(q=euler2quat(r, p, y)) * Pose(q=self.robot_init_rot_quat_center)).q
+ self.robot_init_quats.append(quat)
+
+ # Object position calculations
+ if self.obj_variation_mode == "xy":
+ self.obj_init_xs = parse_range_tuple(self.obj_init_x_range)
+ self.obj_init_ys = parse_range_tuple(self.obj_init_y_range)
+
+ # Update logging info if using different camera
+ if self.obs_camera_name is not None:
+ if self.additional_env_save_tags is None:
+ self.additional_env_save_tags = f"obs_camera_{self.obs_camera_name}"
+ else:
+ self.additional_env_save_tags = self.additional_env_save_tags + f"_obs_camera_{self.obs_camera_name}"
+
+ # Validate obj_variation_mode
+ if self.obj_variation_mode not in ["xy", "episode"]:
+ raise ValueError(f"obj_variation_mode must be 'xy' or 'episode', got {self.obj_variation_mode}")
diff --git a/simpler_env/evaluation/evaluate.py b/simpler_env/evaluation/evaluate.py
new file mode 100644
index 00000000..b5c5a147
--- /dev/null
+++ b/simpler_env/evaluation/evaluate.py
@@ -0,0 +1,634 @@
+from typing import Any, Dict, List, Tuple
+
+import numpy as np
+
+from ..policies.base import AiroaBasePolicy
+from .config import ManiSkill2Config
+from .maniskill2_evaluator import maniskill2_evaluator
+
+
+def calculate_robust_score(results: List[List[bool]], penalty_factor: float = 0.5) -> float:
+ if not results:
+ return 0.0
+ success_rates = [np.mean(run) for run in results if run]
+ if not success_rates:
+ return 0.0
+ mean_success_rate = np.mean(success_rates)
+ std_dev = np.std(success_rates)
+ robust_score = mean_success_rate - penalty_factor * std_dev
+ return max(0.0, robust_score)
+
+
+def _run_single_evaluation(env_policy: AiroaBasePolicy, cfg: ManiSkill2Config, ckpt_path: str) -> List[bool]:
+ if cfg.additional_env_build_kwargs:
+ if "urdf_version" in cfg.additional_env_build_kwargs:
+ kwargs_info = cfg.additional_env_build_kwargs["urdf_version"]
+ else:
+ kwargs_info = cfg.additional_env_build_kwargs
+ else:
+ kwargs_info = None
+
+ print(f" ▶️ Running: env={cfg.env_name}, scene={cfg.scene_name}, " f"enable_rt={cfg.enable_raytracing}, kwargs={kwargs_info}")
+ success_arr = maniskill2_evaluator(env_policy, cfg)
+ print(f" ✅ Success Rate: {np.mean(success_arr):.2%}")
+ return success_arr
+
+
+# ----------------------------------------------------------------------
+# 1) pick_coke_can_variant_agg.sh
+# ----------------------------------------------------------------------
+def pick_coke_can_variant_agg(env_policy: AiroaBasePolicy, ckpt_path: str) -> List[List[bool]]:
+ print("\n--- pick_coke_can_variant_agg ---")
+ results: List[List[bool]] = []
+
+ coke_can_options_arr = [
+ {"lr_switch": True},
+ {"upright": True},
+ {"laid_vertically": True},
+ ]
+
+ base_kwargs = dict(
+ robot="google_robot_static",
+ policy_setup="google_robot",
+ control_freq=3,
+ sim_freq=513,
+ max_episode_steps=80,
+ ckpt_path=ckpt_path,
+ robot_init_x_range=[0.35, 0.35, 1],
+ robot_init_y_range=[0.20, 0.20, 1],
+ obj_init_x_range=[-0.35, -0.12, 5],
+ obj_init_y_range=[-0.02, 0.42, 5],
+ robot_init_rot_quat_center=[0, 0, 0, 1],
+ robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, 0, 0, 1],
+ )
+
+ # base
+ for opt in coke_can_options_arr:
+ cfg = ManiSkill2Config(
+ **base_kwargs,
+ env_name="GraspSingleOpenedCokeCanInScene-v0",
+ scene_name="google_pick_coke_can_1_v4",
+ additional_env_build_kwargs={**opt},
+ )
+ results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
+
+ # table textures (baked)
+ baked_scenes = ["Baked_sc1_staging_objaverse_cabinet1_h870", "Baked_sc1_staging_objaverse_cabinet2_h870"]
+ for scene in baked_scenes:
+ for opt in coke_can_options_arr:
+ cfg = ManiSkill2Config(
+ **base_kwargs,
+ env_name="GraspSingleOpenedCokeCanInScene-v0",
+ scene_name=scene,
+ additional_env_build_kwargs={**opt},
+ )
+ results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
+
+ # distractors (normal + more)
+ for opt in coke_can_options_arr:
+ cfg = ManiSkill2Config(
+ **base_kwargs,
+ env_name="GraspSingleOpenedCokeCanDistractorInScene-v0",
+ scene_name="google_pick_coke_can_1_v4",
+ additional_env_build_kwargs={**opt},
+ )
+ results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
+
+ cfg = ManiSkill2Config(
+ **base_kwargs,
+ env_name="GraspSingleOpenedCokeCanDistractorInScene-v0",
+ scene_name="google_pick_coke_can_1_v4",
+ additional_env_build_kwargs={**opt, "distractor_config": "more"},
+ )
+ results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
+
+ # backgrounds
+ bg_scenes = ["google_pick_coke_can_1_v4_alt_background", "google_pick_coke_can_1_v4_alt_background_2"]
+ for scene in bg_scenes:
+ for opt in coke_can_options_arr:
+ cfg = ManiSkill2Config(
+ **base_kwargs,
+ env_name="GraspSingleOpenedCokeCanInScene-v0",
+ scene_name=scene,
+ additional_env_build_kwargs={**opt},
+ )
+ results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
+
+ # lightings (darker / brighter)
+ for opt in coke_can_options_arr:
+ cfg = ManiSkill2Config(
+ **base_kwargs,
+ env_name="GraspSingleOpenedCokeCanInScene-v0",
+ scene_name="google_pick_coke_can_1_v4",
+ additional_env_build_kwargs={**opt, "slightly_darker_lighting": True},
+ )
+ results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
+
+ cfg = ManiSkill2Config(
+ **base_kwargs,
+ env_name="GraspSingleOpenedCokeCanInScene-v0",
+ scene_name="google_pick_coke_can_1_v4",
+ additional_env_build_kwargs={**opt, "slightly_brighter_lighting": True},
+ )
+ results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
+
+ # camera orientations
+ alt_envs = ["GraspSingleOpenedCokeCanAltGoogleCameraInScene-v0", "GraspSingleOpenedCokeCanAltGoogleCamera2InScene-v0"]
+ for env in alt_envs:
+ for opt in coke_can_options_arr:
+ cfg = ManiSkill2Config(
+ **base_kwargs,
+ env_name=env,
+ scene_name="google_pick_coke_can_1_v4",
+ additional_env_build_kwargs={**opt},
+ )
+ results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
+
+ return results
+
+
+# ----------------------------------------------------------------------
+# 2) pick_coke_can_visual_matching.sh
+# ----------------------------------------------------------------------
+def pick_coke_can_visual_matching(env_policy: AiroaBasePolicy, ckpt_path: str) -> List[List[bool]]:
+ print("\n--- pick_coke_can_visual_matching ---")
+ results: List[List[bool]] = []
+
+ coke_can_options_arr = [
+ {"lr_switch": True},
+ {"upright": True},
+ {"laid_vertically": True},
+ ]
+ urdf_versions = [None, "recolor_tabletop_visual_matching_1", "recolor_tabletop_visual_matching_2", "recolor_cabinet_visual_matching_1"]
+
+ base_kwargs = dict(
+ robot="google_robot_static",
+ policy_setup="google_robot",
+ control_freq=3,
+ sim_freq=513,
+ max_episode_steps=80,
+ ckpt_path=ckpt_path,
+ robot_init_x_range=[0.35, 0.35, 1],
+ robot_init_y_range=[0.20, 0.20, 1],
+ obj_init_x_range=[-0.35, -0.12, 5],
+ obj_init_y_range=[-0.02, 0.42, 5],
+ robot_init_rot_quat_center=[0, 0, 0, 1],
+ robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, 0, 0, 1],
+ )
+
+ for urdf in urdf_versions:
+ for opt in coke_can_options_arr:
+ cfg = ManiSkill2Config(
+ **base_kwargs,
+ env_name="GraspSingleOpenedCokeCanInScene-v0",
+ scene_name="google_pick_coke_can_1_v4",
+ rgb_overlay_path="./ManiSkill2_real2sim/data/real_inpainting/google_coke_can_real_eval_1.png",
+ additional_env_build_kwargs={**opt, "urdf_version": urdf},
+ )
+ results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
+
+ return results
+
+
+# ----------------------------------------------------------------------
+# 3) move_near_variant_agg.sh
+# ----------------------------------------------------------------------
+def move_near_variant_agg(env_policy: AiroaBasePolicy, ckpt_path: str) -> List[List[bool]]:
+ print("\n--- move_near_variant_agg ---")
+ results: List[List[bool]] = []
+
+ base_kwargs = dict(
+ robot="google_robot_static",
+ policy_setup="google_robot",
+ control_freq=3,
+ sim_freq=513,
+ max_episode_steps=80,
+ robot_init_x_range=[0.35, 0.35, 1],
+ robot_init_y_range=[0.21, 0.21, 1],
+ obj_variation_mode="episode",
+ obj_episode_range=[0, 60],
+ robot_init_rot_quat_center=[0, 0, 0, 1],
+ robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, -0.09, -0.09, 1],
+ ckpt_path=ckpt_path,
+ )
+
+ # base
+ cfg = ManiSkill2Config(**base_kwargs, env_name="MoveNearGoogleInScene-v0", scene_name="google_pick_coke_can_1_v4")
+ results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
+
+ # distractor(no_distractor=True)
+ cfg = ManiSkill2Config(
+ **base_kwargs,
+ env_name="MoveNearGoogleInScene-v0",
+ scene_name="google_pick_coke_can_1_v4",
+ additional_env_build_kwargs={"no_distractor": True},
+ )
+ results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
+
+ # backgrounds
+ for scene in ["google_pick_coke_can_1_v4_alt_background", "google_pick_coke_can_1_v4_alt_background_2"]:
+ cfg = ManiSkill2Config(**base_kwargs, env_name="MoveNearGoogleInScene-v0", scene_name=scene)
+ results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
+
+ # lighting
+ for k in [{"slightly_darker_lighting": True}, {"slightly_brighter_lighting": True}]:
+ cfg = ManiSkill2Config(
+ **base_kwargs,
+ env_name="MoveNearGoogleInScene-v0",
+ scene_name="google_pick_coke_can_1_v4",
+ additional_env_build_kwargs=k,
+ )
+ results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
+
+ # table textures (baked)
+ for scene in ["Baked_sc1_staging_objaverse_cabinet1_h870", "Baked_sc1_staging_objaverse_cabinet2_h870"]:
+ cfg = ManiSkill2Config(**base_kwargs, env_name="MoveNearGoogleInScene-v0", scene_name=scene)
+ results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
+
+ # camera orientations
+ for env in ["MoveNearAltGoogleCameraInScene-v0", "MoveNearAltGoogleCamera2InScene-v0"]:
+ cfg = ManiSkill2Config(**base_kwargs, env_name=env, scene_name="google_pick_coke_can_1_v4")
+ results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
+
+ return results
+
+
+# ----------------------------------------------------------------------
+# 4) move_near_visual_matching.sh
+# ----------------------------------------------------------------------
+def move_near_visual_matching(env_policy: AiroaBasePolicy, ckpt_path: str) -> List[List[bool]]:
+ print("\n--- move_near_visual_matching ---")
+ results: List[List[bool]] = []
+
+ base_kwargs = dict(
+ robot="google_robot_static",
+ policy_setup="google_robot",
+ control_freq=3,
+ sim_freq=513,
+ max_episode_steps=80,
+ robot_init_x_range=[0.35, 0.35, 1],
+ robot_init_y_range=[0.21, 0.21, 1],
+ obj_variation_mode="episode",
+ obj_episode_range=[0, 60],
+ robot_init_rot_quat_center=[0, 0, 0, 1],
+ robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, -0.09, -0.09, 1],
+ ckpt_path=ckpt_path,
+ )
+
+ urdf_versions = [None, "recolor_tabletop_visual_matching_1", "recolor_tabletop_visual_matching_2", "recolor_cabinet_visual_matching_1"]
+ for urdf in urdf_versions:
+ cfg = ManiSkill2Config(
+ **base_kwargs,
+ env_name="MoveNearGoogleBakedTexInScene-v0",
+ scene_name="google_pick_coke_can_1_v4",
+ rgb_overlay_path="./ManiSkill2_real2sim/data/real_inpainting/google_move_near_real_eval_1.png",
+ additional_env_save_tags="baked_except_bpb_orange",
+ additional_env_build_kwargs={"urdf_version": urdf},
+ )
+ results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
+
+ return results
+
+
+# ----------------------------------------------------------------------
+# 5) put_in_drawer_variant_agg.sh
+# ----------------------------------------------------------------------
+def put_in_drawer_variant_agg(env_policy: AiroaBasePolicy, ckpt_path: str) -> List[List[bool]]:
+ print("\n--- put_in_drawer_variant_agg ---")
+ results: List[List[bool]] = []
+
+ common = dict(
+ robot="google_robot_static",
+ policy_setup="google_robot",
+ control_freq=3,
+ sim_freq=513,
+ max_episode_steps=200,
+ ckpt_path=ckpt_path,
+ additional_env_build_kwargs={"model_ids": "apple"},
+ robot_init_rot_quat_center=[0, 0, 0, 1],
+ obj_init_x_range=[-0.08, -0.02, 3],
+ obj_init_y_range=[-0.02, 0.08, 3],
+ robot_init_x_range=[0.65, 0.65, 1],
+ robot_init_y_range=[-0.2, 0.2, 3],
+ robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, 0.0, 0.0, 1],
+ )
+
+ env_names = ["PlaceIntoClosedTopDrawerCustomInScene-v0"]
+
+ # base (enable raytracing)
+ for env_name in env_names:
+ cfg = ManiSkill2Config(**common, env_name=env_name, scene_name="frl_apartment_stage_simple", enable_raytracing=True)
+ results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
+
+ # backgrounds (shader_dir=rt)
+ for scene in ["modern_bedroom_no_roof", "modern_office_no_roof"]:
+ merged = common["additional_env_build_kwargs"].copy()
+ merged["shader_dir"] = "rt"
+ for env_name in env_names:
+ cfg = ManiSkill2Config(**common, env_name=env_name, scene_name=scene, additional_env_build_kwargs=merged)
+ results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
+
+ # lightings
+ for light_mode in ["brighter", "darker"]:
+ merged = common["additional_env_build_kwargs"].copy()
+ merged.update({"shader_dir": "rt", "light_mode": light_mode})
+ for env_name in env_names:
+ cfg = ManiSkill2Config(**common, env_name=env_name, scene_name="frl_apartment_stage_simple", additional_env_build_kwargs=merged)
+ results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
+
+ # new cabinets
+ for station in ["mk_station2", "mk_station3"]:
+ merged = common["additional_env_build_kwargs"].copy()
+ merged.update({"shader_dir": "rt", "station_name": station})
+ for env_name in env_names:
+ cfg = ManiSkill2Config(**common, env_name=env_name, scene_name="frl_apartment_stage_simple", additional_env_build_kwargs=merged)
+ results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
+
+ return results
+
+
+# ----------------------------------------------------------------------
+# 6) put_in_drawer_visual_matching.sh
+# ----------------------------------------------------------------------
+def put_in_drawer_visual_matching(env_policy: AiroaBasePolicy, ckpt_path: str) -> List[List[bool]]:
+ print("\n--- put_in_drawer_visual_matching ---")
+ results: List[List[bool]] = []
+
+ env_names = ["PlaceIntoClosedTopDrawerCustomInScene-v0"]
+ urdf_versions = ["recolor_cabinet_visual_matching_1", "recolor_tabletop_visual_matching_1", "recolor_tabletop_visual_matching_2", None]
+
+ base = dict(
+ robot="google_robot_static",
+ policy_setup="google_robot",
+ control_freq=3,
+ sim_freq=513,
+ max_episode_steps=200,
+ ckpt_path=ckpt_path,
+ robot_init_rot_quat_center=[0, 0, 0, 1],
+ obj_init_x_range=[-0.08, -0.02, 3],
+ obj_init_y_range=[-0.02, 0.08, 3],
+ enable_raytracing=True,
+ )
+
+ overlay_poses = [
+ # A0
+ dict(
+ robot_init_x_range=[0.644, 0.644, 1],
+ robot_init_y_range=[-0.179, -0.179, 1],
+ robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, -0.03, -0.03, 1],
+ rgb_overlay_path="./ManiSkill2_real2sim/data/real_inpainting/open_drawer_a0.png",
+ ),
+ # B0
+ dict(
+ robot_init_x_range=[0.652, 0.652, 1],
+ robot_init_y_range=[0.009, 0.009, 1],
+ robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, 0, 0, 1],
+ rgb_overlay_path="./ManiSkill2_real2sim/data/real_inpainting/open_drawer_b0.png",
+ ),
+ # C0
+ dict(
+ robot_init_x_range=[0.665, 0.665, 1],
+ robot_init_y_range=[0.224, 0.224, 1],
+ robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, 0, 0, 1],
+ rgb_overlay_path="./ManiSkill2_real2sim/data/real_inpainting/open_drawer_c0.png",
+ ),
+ ]
+
+ add_base = dict(station_name="mk_station_recolor", light_mode="simple", disable_bad_material=True, model_ids="baked_apple_v2")
+
+ for urdf in urdf_versions:
+ add_kwargs = {**add_base, "urdf_version": urdf}
+ for env_name in env_names:
+ for pose in overlay_poses:
+ cfg = ManiSkill2Config(
+ **base,
+ env_name=env_name,
+ scene_name="dummy_drawer",
+ additional_env_build_kwargs=add_kwargs,
+ **pose,
+ )
+ results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
+
+ return results
+
+
+# ----------------------------------------------------------------------
+# 7) drawer_variant_agg.sh
+# ----------------------------------------------------------------------
+def drawer_variant_agg(env_policy: AiroaBasePolicy, ckpt_path: str) -> List[List[bool]]:
+ print("\n--- drawer_variant_agg ---")
+ results: List[List[bool]] = []
+
+ env_names = [
+ "OpenTopDrawerCustomInScene-v0",
+ "OpenMiddleDrawerCustomInScene-v0",
+ "OpenBottomDrawerCustomInScene-v0",
+ "CloseTopDrawerCustomInScene-v0",
+ "CloseMiddleDrawerCustomInScene-v0",
+ "CloseBottomDrawerCustomInScene-v0",
+ ]
+
+ base = dict(
+ robot="google_robot_static",
+ policy_setup="google_robot",
+ control_freq=3,
+ sim_freq=513,
+ max_episode_steps=113,
+ ckpt_path=ckpt_path,
+ robot_init_x_range=[0.65, 0.85, 3],
+ robot_init_y_range=[-0.2, 0.2, 3],
+ robot_init_rot_quat_center=[0, 0, 0, 1],
+ robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, 0.0, 0.0, 1],
+ obj_init_x_range=[0, 0, 1],
+ obj_init_y_range=[0, 0, 1],
+ )
+
+ # base (enable raytracing)
+ for env_name in env_names:
+ cfg = ManiSkill2Config(**base, env_name=env_name, scene_name="frl_apartment_stage_simple", enable_raytracing=True)
+ results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
+
+ # backgrounds (shader_dir=rt)
+ for scene in ["modern_bedroom_no_roof", "modern_office_no_roof"]:
+ merged = {"shader_dir": "rt"}
+ for env_name in env_names:
+ cfg = ManiSkill2Config(**base, env_name=env_name, scene_name=scene, additional_env_build_kwargs=merged)
+ results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
+
+ # lightings
+ for light in ["brighter", "darker"]:
+ merged = {"shader_dir": "rt", "light_mode": light}
+ for env_name in env_names:
+ cfg = ManiSkill2Config(**base, env_name=env_name, scene_name="frl_apartment_stage_simple", additional_env_build_kwargs=merged)
+ results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
+
+ # new cabinets
+ for station in ["mk_station2", "mk_station3"]:
+ merged = {"shader_dir": "rt", "station_name": station}
+ for env_name in env_names:
+ cfg = ManiSkill2Config(**base, env_name=env_name, scene_name="frl_apartment_stage_simple", additional_env_build_kwargs=merged)
+ results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
+
+ return results
+
+
+# ----------------------------------------------------------------------
+# 8) drawer_visual_matching.sh
+# ----------------------------------------------------------------------
+def drawer_visual_matching(env_policy: AiroaBasePolicy, ckpt_path: str) -> List[List[bool]]:
+ print("\n--- drawer_visual_matching ---")
+ results: List[List[bool]] = []
+
+ env_names = [
+ "OpenTopDrawerCustomInScene-v0",
+ "OpenMiddleDrawerCustomInScene-v0",
+ "OpenBottomDrawerCustomInScene-v0",
+ "CloseTopDrawerCustomInScene-v0",
+ "CloseMiddleDrawerCustomInScene-v0",
+ "CloseBottomDrawerCustomInScene-v0",
+ ]
+ urdf_versions = ["recolor_cabinet_visual_matching_1", "recolor_tabletop_visual_matching_1", "recolor_tabletop_visual_matching_2", None]
+
+ base = dict(
+ robot="google_robot_static",
+ policy_setup="google_robot",
+ control_freq=3,
+ sim_freq=513,
+ max_episode_steps=113,
+ ckpt_path=ckpt_path,
+ robot_init_rot_quat_center=[0, 0, 0, 1],
+ obj_init_x_range=[0, 0, 1],
+ obj_init_y_range=[0, 0, 1],
+ enable_raytracing=True,
+ scene_name="dummy_drawer",
+ )
+
+ # 9 overlay poses (A0/A1/A2/B0/B1/B2/C0/C1/C2)
+ overlay_poses = [
+ # A0/A1/A2
+ dict(
+ robot_init_x_range=[0.644, 0.644, 1],
+ robot_init_y_range=[-0.179, -0.179, 1],
+ robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, -0.03, -0.03, 1],
+ rgb_overlay_path="./ManiSkill2_real2sim/data/real_inpainting/open_drawer_a0.png",
+ ),
+ dict(
+ robot_init_x_range=[0.765, 0.765, 1],
+ robot_init_y_range=[-0.182, -0.182, 1],
+ robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, -0.02, -0.02, 1],
+ rgb_overlay_path="./ManiSkill2_real2sim/data/real_inpainting/open_drawer_a1.png",
+ ),
+ dict(
+ robot_init_x_range=[0.889, 0.889, 1],
+ robot_init_y_range=[-0.203, -0.203, 1],
+ robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, -0.06, -0.06, 1],
+ rgb_overlay_path="./ManiSkill2_real2sim/data/real_inpainting/open_drawer_a2.png",
+ ),
+ # B0/B1/B2
+ dict(
+ robot_init_x_range=[0.652, 0.652, 1],
+ robot_init_y_range=[0.009, 0.009, 1],
+ robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, 0, 0, 1],
+ rgb_overlay_path="./ManiSkill2_real2sim/data/real_inpainting/open_drawer_b0.png",
+ ),
+ dict(
+ robot_init_x_range=[0.752, 0.752, 1],
+ robot_init_y_range=[0.009, 0.009, 1],
+ robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, 0, 0, 1],
+ rgb_overlay_path="./ManiSkill2_real2sim/data/real_inpainting/open_drawer_b1.png",
+ ),
+ dict(
+ robot_init_x_range=[0.851, 0.851, 1],
+ robot_init_y_range=[0.035, 0.035, 1],
+ robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, 0, 0, 1],
+ rgb_overlay_path="./ManiSkill2_real2sim/data/real_inpainting/open_drawer_b2.png",
+ ),
+ # C0/C1/C2
+ dict(
+ robot_init_x_range=[0.665, 0.665, 1],
+ robot_init_y_range=[0.224, 0.224, 1],
+ robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, 0, 0, 1],
+ rgb_overlay_path="./ManiSkill2_real2sim/data/real_inpainting/open_drawer_c0.png",
+ ),
+ dict(
+ robot_init_x_range=[0.765, 0.765, 1],
+ robot_init_y_range=[0.222, 0.222, 1],
+ robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, -0.025, -0.025, 1],
+ rgb_overlay_path="./ManiSkill2_real2sim/data/real_inpainting/open_drawer_c1.png",
+ ),
+ dict(
+ robot_init_x_range=[0.865, 0.865, 1],
+ robot_init_y_range=[0.222, 0.222, 1],
+ robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, -0.025, -0.025, 1],
+ rgb_overlay_path="./ManiSkill2_real2sim/data/real_inpainting/open_drawer_c2.png",
+ ),
+ ]
+
+ add_base = dict(station_name="mk_station_recolor", light_mode="simple", disable_bad_material=True)
+
+ for urdf in urdf_versions:
+ add_kwargs = {**add_base, "urdf_version": urdf}
+ for env_name in env_names:
+ for pose in overlay_poses:
+ cfg = ManiSkill2Config(
+ **base,
+ env_name=env_name,
+ additional_env_build_kwargs=add_kwargs,
+ **pose,
+ )
+ results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
+
+ return results
+
+
+# ======================================================================
+# 総合評価(重み付け・スコアリングは従来どおり)
+# ======================================================================
+SIM_WEIGHT = 0.3
+VISUAL_MATCHING_WEIGHT = 0.7
+
+
+def run_comprehensive_evaluation(env_policy: AiroaBasePolicy, ckpt_path: str) -> Dict[str, float]:
+ print("=" * 80)
+ print(f"🚀 STARTING COMPREHENSIVE EVALUATION 🚀")
+ print(f"Checkpoint: {ckpt_path}")
+ print(f"Weights: Sim={SIM_WEIGHT}, VisualMatching={VISUAL_MATCHING_WEIGHT}")
+ print("=" * 80)
+
+ # ビジュアルマッチング(overlay)
+ vm_results: List[List[bool]] = []
+ vm_results += pick_coke_can_visual_matching(env_policy, ckpt_path)
+ vm_results += move_near_visual_matching(env_policy, ckpt_path)
+ vm_results += put_in_drawer_visual_matching(env_policy, ckpt_path)
+ vm_results += drawer_visual_matching(env_policy, ckpt_path) # ★ 追加
+
+ # シミュレーション(variant_agg)
+ sim_results: List[List[bool]] = []
+ sim_results += pick_coke_can_variant_agg(env_policy, ckpt_path)
+ sim_results += move_near_variant_agg(env_policy, ckpt_path)
+ sim_results += put_in_drawer_variant_agg(env_policy, ckpt_path)
+ sim_results += drawer_variant_agg(env_policy, ckpt_path) # ★ 追加
+
+ # ロバストスコア
+ sim_score = calculate_robust_score(sim_results)
+ vm_score = calculate_robust_score(vm_results)
+
+ total_weight = SIM_WEIGHT + VISUAL_MATCHING_WEIGHT
+ final_score = 0.0 if total_weight == 0 else (sim_score * SIM_WEIGHT + vm_score * VISUAL_MATCHING_WEIGHT) / total_weight
+
+ print("\n" + "=" * 80)
+ print("📊 EVALUATION SUMMARY 📊")
+ print("-" * 80)
+ print(f"Simulation Score (Robust): {sim_score:.4f}")
+ print(f" - Total Simulation Runs: {len(sim_results)}")
+ print(f"Visual Matching Score (Robust): {vm_score:.4f}")
+ print(f" - Total Visual Matching Runs: {len(vm_results)}")
+ print("-" * 80)
+ print(f"🏆 Final Weighted Score: {final_score:.4f}")
+ print("=" * 80)
+
+ return {
+ "final_score": final_score,
+ "simulation_robust_score": sim_score,
+ "visual_matching_robust_score": vm_score,
+ }
diff --git a/simpler_env/evaluation/maniskill2_evaluator.py b/simpler_env/evaluation/maniskill2_evaluator.py
index a8244792..8d6796df 100644
--- a/simpler_env/evaluation/maniskill2_evaluator.py
+++ b/simpler_env/evaluation/maniskill2_evaluator.py
@@ -7,10 +7,7 @@
import numpy as np
from transforms3d.euler import quat2euler
-from simpler_env.utils.env.env_builder import (
- build_maniskill2_env,
- get_robot_control_mode,
-)
+from simpler_env.utils.env.env_builder import build_maniskill2_env, get_robot_control_mode
from simpler_env.utils.env.observation_utils import get_image_from_maniskill2_obs_dict
from simpler_env.utils.visualization import write_interval_video, write_video
@@ -124,9 +121,7 @@ def run_maniskill2_eval_single_episode(
# step the environment
obs, reward, done, truncated, info = env.step(
- np.concatenate(
- [action["world_vector"], action["rot_axangle"], action["gripper"]]
- ),
+ np.concatenate([action["world_vector"], action["rot_axangle"], action["gripper"]]),
)
success = "success" if done else "failure"
@@ -138,9 +133,7 @@ def run_maniskill2_eval_single_episode(
print(timestep, info)
- image = get_image_from_maniskill2_obs_dict(
- env, obs, camera_name=obs_camera_name
- )
+ image = get_image_from_maniskill2_obs_dict(env, obs, camera_name=obs_camera_name)
images.append(image)
task_descriptions.append(task_description)
timestep += 1
@@ -171,6 +164,7 @@ def run_maniskill2_eval_single_episode(
video_path = f"{scene_name}/{control_mode}/{env_save_name}/rob_{robot_init_x}_{robot_init_y}_rot_{r:.3f}_{p:.3f}_{y:.3f}_rgb_overlay_{rgb_overlay_path_str}/{video_name}"
video_path = os.path.join(logging_dir, video_path)
write_video(video_path, images, fps=5)
+ print(f"Video saved to {video_path}")
# save action trajectory
action_path = video_path.replace(".mp4", ".png")
@@ -220,14 +214,8 @@ def maniskill2_evaluator(model, args):
)
)
elif args.obj_variation_mode == "episode":
- for obj_episode_id in range(
- args.obj_episode_range[0], args.obj_episode_range[1]
- ):
- success_arr.append(
- run_maniskill2_eval_single_episode(
- obj_episode_id=obj_episode_id, **kwargs
- )
- )
+ for obj_episode_id in range(args.obj_episode_range[0], args.obj_episode_range[1]):
+ success_arr.append(run_maniskill2_eval_single_episode(obj_episode_id=obj_episode_id, **kwargs))
else:
raise NotImplementedError()
diff --git a/simpler_env/policies/adapter.py b/simpler_env/policies/adapter.py
new file mode 100644
index 00000000..6b7de3e8
--- /dev/null
+++ b/simpler_env/policies/adapter.py
@@ -0,0 +1,219 @@
+from typing import Dict, Optional, Sequence
+
+import matplotlib.pyplot as plt
+import numpy as np
+import tensorflow as tf
+
+from simpler_env.utils.geometry import euler2axangle, mat2euler, quat2mat
+
+
+class BaseAdapter:
+ def __init__(self, policy):
+ self.policy = policy
+
+ def reset(self, task_description):
+ pass
+
+ def preprocess(self, image: np.ndarray, prompt: str, eef_pos: np.ndarray) -> Dict:
+ pass
+
+ def postprocess(self, outputs: Dict) -> Dict:
+ pass
+
+ def step(self, image: np.ndarray, prompt: str, eef_pos: np.ndarray) -> tuple[dict[str, np.ndarray], dict[str, np.ndarray]]:
+ inputs = self.preprocess(image, prompt, eef_pos)
+ outputs = self.policy.step(inputs)
+ state_gripper = inputs["state"][-1]
+ action_gripper = outputs["actions"][-1]
+ # print(f"state: {state_gripper} action: {action_gripper}")
+ final_outputs = self.postprocess(outputs)
+ simpler_outputs = {
+ "world_vector": outputs["actions"][:3],
+ "rot_axangle": outputs["actions"][3:6],
+ "gripper": outputs["actions"][6:],
+ "terminate_episode": outputs["terminate_episode"],
+ }
+ final_simpler_outputs = {
+ "world_vector": final_outputs["actions"][:3],
+ "rot_axangle": final_outputs["actions"][3:6],
+ "gripper": final_outputs["actions"][6:],
+ "terminate_episode": final_outputs["terminate_episode"],
+ }
+ return simpler_outputs, final_simpler_outputs
+
+ def _resize_image(self, image: np.ndarray) -> np.ndarray:
+ image = tf.image.resize(
+ image,
+ size=(256, 256),
+ method="lanczos3",
+ antialias=True,
+ )
+ image = tf.cast(tf.clip_by_value(tf.round(image), 0, 255), tf.uint8).numpy()
+ return image
+
+ def visualize_epoch(self, predicted_raw_actions: Sequence[np.ndarray], images: Sequence[np.ndarray], save_path: str) -> None:
+ images = [self._resize_image(image) for image in images]
+ ACTION_DIM_LABELS = ["x", "y", "z", "roll", "pitch", "yaw", "grasp"]
+
+ img_strip = np.concatenate(np.array(images[::3]), axis=1)
+
+ # set up plt figure
+ figure_layout = [["image"] * len(ACTION_DIM_LABELS), ACTION_DIM_LABELS]
+ plt.rcParams.update({"font.size": 12})
+ fig, axs = plt.subplot_mosaic(figure_layout)
+ fig.set_size_inches([45, 10])
+
+ # plot actions
+ pred_actions = np.array([np.concatenate([a["world_vector"], a["rot_axangle"], a["gripper"]], axis=-1) for a in predicted_raw_actions])
+ for action_dim, action_label in enumerate(ACTION_DIM_LABELS):
+ # actions have batch, horizon, dim, in this example we just take the first action for simplicity
+ axs[action_label].plot(pred_actions[:, action_dim], label="predicted action")
+ axs[action_label].set_title(action_label)
+ axs[action_label].set_xlabel("Time in one episode")
+
+ axs["image"].imshow(img_strip)
+ axs["image"].set_xlabel("Time in one episode (subsampled)")
+ plt.legend()
+ plt.savefig(save_path)
+
+
+# from https://github.com/allenzren/open-pi-zero/blob/main/src/agent/env_adapter/simpler.py
+class AiroaToSimplerBridgeAdapter(BaseAdapter):
+ def __init__(self, policy):
+ super().__init__(policy)
+ # EE pose in Bridge data was relative to a top-down pose, instead of robot base
+ self.default_rot = np.array(
+ [[0, 0, 1.0], [0, 1.0, 0], [-1.0, 0, 0]]
+ ) # https://github.com/rail-berkeley/bridge_data_robot/blob/b841131ecd512bafb303075bd8f8b677e0bf9f1f/widowx_envs/widowx_controller/src/widowx_controller/widowx_controller.py#L203
+
+ def reset(self, task_description):
+ pass
+
+ def preprocess(self, image: np.ndarray, prompt: str, eef_pos: np.ndarray) -> dict:
+
+ proprio = eef_pos
+ rm_bridge = quat2mat(proprio[3:7])
+ rpy_bridge_converted = mat2euler(rm_bridge @ self.default_rot.T)
+ gripper_openness = proprio[7]
+ proprio = np.concatenate(
+ [
+ proprio[:3],
+ rpy_bridge_converted,
+ [gripper_openness],
+ ]
+ )
+
+ inputs = {
+ "image": image,
+ "prompt": prompt,
+ "state": proprio,
+ }
+ return inputs
+
+ def postprocess(self, outputs: dict) -> dict:
+ action = outputs["actions"]
+ roll, pitch, yaw = action[3:6]
+ action_rotation_ax, action_rotation_angle = euler2axangle(roll, pitch, yaw)
+
+ """from simpler octo inference: https://github.com/allenzren/SimplerEnv/blob/7d39d8a44e6d5ec02d4cdc9101bb17f5913bcd2a/simpler_env/policies/octo/octo_model.py#L234-L235"""
+ # trained with [0, 1], 0 for close, 1 for open
+ # convert to -1 close, 1 open for simpler
+ action_gripper = action[-1]
+ action_gripper = 2.0 * (action_gripper > 0.5) - 1.0
+
+ action = np.concatenate(
+ [
+ action[:3],
+ action_rotation_ax * action_rotation_angle,
+ [action_gripper],
+ ]
+ )
+ return {
+ "actions": action,
+ "terminate_episode": outputs["terminate_episode"],
+ }
+
+
+class AiroaToSimplerFractalAdapter(BaseAdapter):
+ def __init__(self, policy):
+ super().__init__(policy)
+ # Constants
+ self.sticky_gripper_num_repeat = 15 # same used in Octo. Note this is for every individual action, not every action chunk. Control freq is 3Hz, so roughly sticky for 5 seconds.
+
+ def reset(self, task_description):
+ self.sticky_action_is_on = False
+ self.gripper_action_repeat = 0
+ self.sticky_gripper_action = 0.0
+
+ def preprocess(self, image: np.ndarray, prompt: str, eef_pos: np.ndarray) -> dict:
+ """convert wxyz quat from simpler to xyzw used in fractal"""
+ quat_xyzw = np.roll(eef_pos[3:7], -1)
+ gripper_width = eef_pos[7] # from simpler, 0 for close, 1 for open continuous
+ gripper_closedness = (
+ 1 - gripper_width
+ ) # TODO(allenzren): change fractal data processing in training so also use gripper openness in proprio (as in bridge) instead of closedness
+ proprio = np.concatenate(
+ (
+ eef_pos[:3],
+ quat_xyzw,
+ [gripper_closedness],
+ )
+ )
+
+ # H W C [0, 255]
+ inputs = {
+ "image": image,
+ "prompt": prompt,
+ "state": proprio,
+ }
+ return inputs
+
+ def postprocess(self, outputs: dict) -> dict:
+ action = outputs["actions"]
+ roll, pitch, yaw = action[3:6]
+ action_rotation_ax, action_rotation_angle = euler2axangle(roll, pitch, yaw)
+
+ """from simpler octo inference: https://github.com/allenzren/SimplerEnv/blob/7d39d8a44e6d5ec02d4cdc9101bb17f5913bcd2a/simpler_env/policies/octo/octo_model.py#L187"""
+ # trained with [0, 1], 0 for close, 1 for open
+ # convert to -1 open, 1 close for simpler
+
+ gripper_action = action[-1]
+
+ gripper_action = (gripper_action * 2) - 1 # [0, 1] -> [-1, 1] -1 close, 1 open
+
+ # without sticky
+ relative_gripper_action = -gripper_action
+ # if self.previous_gripper_action is None:
+ # relative_gripper_action = -1 # open
+ # else:
+ # relative_gripper_action = -action
+ # self.previous_gripper_action = action
+
+ # switch to sticky closing
+ if np.abs(relative_gripper_action) > 0.5 and self.sticky_action_is_on is False:
+ self.sticky_action_is_on = True
+ self.sticky_gripper_action = relative_gripper_action
+
+ # sticky closing
+ if self.sticky_action_is_on:
+ self.gripper_action_repeat += 1
+ relative_gripper_action = self.sticky_gripper_action
+
+ # reaching maximum sticky
+ if self.gripper_action_repeat == self.sticky_gripper_num_repeat:
+ self.sticky_action_is_on = False
+ self.gripper_action_repeat = 0
+ self.sticky_gripper_action = 0.0
+
+ action = np.concatenate(
+ [
+ action[:3],
+ action_rotation_ax * action_rotation_angle,
+ [relative_gripper_action],
+ ]
+ )
+
+ return {
+ "actions": action,
+ "terminate_episode": outputs["terminate_episode"],
+ }
diff --git a/simpler_env/policies/base.py b/simpler_env/policies/base.py
new file mode 100644
index 00000000..73eb48a7
--- /dev/null
+++ b/simpler_env/policies/base.py
@@ -0,0 +1,12 @@
+from abc import abstractmethod
+from typing import Dict
+
+
+class AiroaBasePolicy:
+ @abstractmethod
+ def step(self, obs: Dict) -> Dict:
+ pass
+
+ @abstractmethod
+ def reset(self) -> None:
+ pass
diff --git a/simpler_env/policies/hsr_openpi/pi0_or_fast.py b/simpler_env/policies/hsr_openpi/pi0_or_fast.py
new file mode 100644
index 00000000..a26c6813
--- /dev/null
+++ b/simpler_env/policies/hsr_openpi/pi0_or_fast.py
@@ -0,0 +1,18 @@
+from typing import Dict
+
+import numpy as np
+
+from simpler_env.policies.base import AiroaBasePolicy
+
+
+class OpenpiToAiroaPolicy(AiroaBasePolicy):
+ def __init__(self, policy):
+ self.policy = policy
+
+ def step(self, obs: Dict) -> Dict:
+ outputs = self.policy.infer(obs)
+ outputs["terminate_episode"] = np.zeros(outputs["actions"].shape[0])
+ return outputs
+
+ def reset(self) -> None:
+ self.policy.reset()
diff --git a/simpler_env/policies/lerobotpi/pi0_or_fast.py b/simpler_env/policies/lerobotpi/pi0_or_fast.py
index 97e29227..3323b2cc 100644
--- a/simpler_env/policies/lerobotpi/pi0_or_fast.py
+++ b/simpler_env/policies/lerobotpi/pi0_or_fast.py
@@ -60,7 +60,6 @@ def __init__(
# TODO: add pi0 loading ...
PI0Policy = auto_model_fn(saved_model_path)
- print(f"DADSADADAS: {torch.cuda.is_available()}")
self.vla = PI0Policy.from_pretrained(saved_model_path)
self.vla.to(self.device)
self.vla.reset()
diff --git a/simpler_env/utils/geometry.py b/simpler_env/utils/geometry.py
new file mode 100644
index 00000000..f1e7164a
--- /dev/null
+++ b/simpler_env/utils/geometry.py
@@ -0,0 +1,463 @@
+"""
+Mostly copied from transforms3d library
+
+"""
+
+import math
+
+import numpy as np
+
+_FLOAT_EPS = np.finfo(np.float64).eps
+
+# axis sequences for Euler angles
+_NEXT_AXIS = [1, 2, 0, 1]
+
+# map axes strings to/from tuples of inner axis, parity, repetition, frame
+_AXES2TUPLE = {
+ "sxyz": (0, 0, 0, 0),
+ "sxyx": (0, 0, 1, 0),
+ "sxzy": (0, 1, 0, 0),
+ "sxzx": (0, 1, 1, 0),
+ "syzx": (1, 0, 0, 0),
+ "syzy": (1, 0, 1, 0),
+ "syxz": (1, 1, 0, 0),
+ "syxy": (1, 1, 1, 0),
+ "szxy": (2, 0, 0, 0),
+ "szxz": (2, 0, 1, 0),
+ "szyx": (2, 1, 0, 0),
+ "szyz": (2, 1, 1, 0),
+ "rzyx": (0, 0, 0, 1),
+ "rxyx": (0, 0, 1, 1),
+ "ryzx": (0, 1, 0, 1),
+ "rxzx": (0, 1, 1, 1),
+ "rxzy": (1, 0, 0, 1),
+ "ryzy": (1, 0, 1, 1),
+ "rzxy": (1, 1, 0, 1),
+ "ryxy": (1, 1, 1, 1),
+ "ryxz": (2, 0, 0, 1),
+ "rzxz": (2, 0, 1, 1),
+ "rxyz": (2, 1, 0, 1),
+ "rzyz": (2, 1, 1, 1),
+}
+
+_TUPLE2AXES = dict((v, k) for k, v in _AXES2TUPLE.items())
+
+# For testing whether a number is close to zero
+_EPS4 = np.finfo(float).eps * 4.0
+
+
+def mat2euler(mat, axes="sxyz"):
+ """Return Euler angles from rotation matrix for specified axis sequence.
+
+ Note that many Euler angle triplets can describe one matrix.
+
+ Parameters
+ ----------
+ mat : array-like shape (3, 3) or (4, 4)
+ Rotation matrix or affine.
+ axes : str, optional
+ Axis specification; one of 24 axis sequences as string or encoded
+ tuple - e.g. ``sxyz`` (the default).
+
+ Returns
+ -------
+ ai : float
+ First rotation angle (according to `axes`).
+ aj : float
+ Second rotation angle (according to `axes`).
+ ak : float
+ Third rotation angle (according to `axes`).
+
+ Examples
+ --------
+ >>> R0 = euler2mat(1, 2, 3, 'syxz')
+ >>> al, be, ga = mat2euler(R0, 'syxz')
+ >>> R1 = euler2mat(al, be, ga, 'syxz')
+ >>> np.allclose(R0, R1)
+ True
+ """
+ try:
+ firstaxis, parity, repetition, frame = _AXES2TUPLE[axes.lower()]
+ except (AttributeError, KeyError):
+ _TUPLE2AXES[axes] # validation
+ firstaxis, parity, repetition, frame = axes
+
+ i = firstaxis
+ j = _NEXT_AXIS[i + parity]
+ k = _NEXT_AXIS[i - parity + 1]
+
+ M = np.array(mat, dtype=np.float64, copy=False)[:3, :3]
+ if repetition:
+ sy = math.sqrt(M[i, j] * M[i, j] + M[i, k] * M[i, k])
+ if sy > _EPS4:
+ ax = math.atan2(M[i, j], M[i, k])
+ ay = math.atan2(sy, M[i, i])
+ az = math.atan2(M[j, i], -M[k, i])
+ else:
+ ax = math.atan2(-M[j, k], M[j, j])
+ ay = math.atan2(sy, M[i, i])
+ az = 0.0
+ else:
+ cy = math.sqrt(M[i, i] * M[i, i] + M[j, i] * M[j, i])
+ if cy > _EPS4:
+ ax = math.atan2(M[k, j], M[k, k])
+ ay = math.atan2(-M[k, i], cy)
+ az = math.atan2(M[j, i], M[i, i])
+ else:
+ ax = math.atan2(-M[j, k], M[j, j])
+ ay = math.atan2(-M[k, i], cy)
+ az = 0.0
+
+ if parity:
+ ax, ay, az = -ax, -ay, -az
+ if frame:
+ ax, az = az, ax
+ return ax, ay, az
+
+
+def quat2mat(q):
+ """Calculate rotation matrix corresponding to quaternion
+
+ Parameters
+ ----------
+ q : 4 element array-like
+
+ Returns
+ -------
+ M : (3,3) array
+ Rotation matrix corresponding to input quaternion *q*
+
+ Notes
+ -----
+ Rotation matrix applies to column vectors, and is applied to the
+ left of coordinate vectors. The algorithm here allows quaternions that
+ have not been normalized.
+
+ References
+ ----------
+ Algorithm from http://en.wikipedia.org/wiki/Rotation_matrix#Quaternion
+
+ Examples
+ --------
+ >>> import numpy as np
+ >>> M = quat2mat([1, 0, 0, 0]) # Identity quaternion
+ >>> np.allclose(M, np.eye(3))
+ True
+ >>> M = quat2mat([0, 1, 0, 0]) # 180 degree rotn around axis 0
+ >>> np.allclose(M, np.diag([1, -1, -1]))
+ True
+ """
+ w, x, y, z = q
+ Nq = w * w + x * x + y * y + z * z
+ if Nq < _FLOAT_EPS:
+ return np.eye(3)
+ s = 2.0 / Nq
+ X = x * s
+ Y = y * s
+ Z = z * s
+ wX = w * X
+ wY = w * Y
+ wZ = w * Z
+ xX = x * X
+ xY = x * Y
+ xZ = x * Z
+ yY = y * Y
+ yZ = y * Z
+ zZ = z * Z
+ return np.array(
+ [
+ [1.0 - (yY + zZ), xY - wZ, xZ + wY],
+ [xY + wZ, 1.0 - (xX + zZ), yZ - wX],
+ [xZ - wY, yZ + wX, 1.0 - (xX + yY)],
+ ]
+ )
+
+
+# Checks if a matrix is a valid rotation matrix.
+def isrotation(
+ R: np.ndarray,
+ thresh=1e-6,
+) -> bool:
+ Rt = np.transpose(R)
+ shouldBeIdentity = np.dot(Rt, R)
+ iden = np.identity(3, dtype=R.dtype)
+ n = np.linalg.norm(iden - shouldBeIdentity)
+ return n < thresh
+
+
+def euler2mat(ai, aj, ak, axes="sxyz"):
+ """Return rotation matrix from Euler angles and axis sequence.
+
+ Parameters
+ ----------
+ ai : float
+ First rotation angle (according to `axes`).
+ aj : float
+ Second rotation angle (according to `axes`).
+ ak : float
+ Third rotation angle (according to `axes`).
+ axes : str, optional
+ Axis specification; one of 24 axis sequences as string or encoded
+ tuple - e.g. ``sxyz`` (the default).
+
+ Returns
+ -------
+ mat : array (3, 3)
+ Rotation matrix or affine.
+
+ Examples
+ --------
+ >>> R = euler2mat(1, 2, 3, 'syxz')
+ >>> np.allclose(np.sum(R[0]), -1.34786452)
+ True
+ >>> R = euler2mat(1, 2, 3, (0, 1, 0, 1))
+ >>> np.allclose(np.sum(R[0]), -0.383436184)
+ True
+ """
+ try:
+ firstaxis, parity, repetition, frame = _AXES2TUPLE[axes]
+ except (AttributeError, KeyError):
+ _TUPLE2AXES[axes] # validation
+ firstaxis, parity, repetition, frame = axes
+
+ i = firstaxis
+ j = _NEXT_AXIS[i + parity]
+ k = _NEXT_AXIS[i - parity + 1]
+
+ if frame:
+ ai, ak = ak, ai
+ if parity:
+ ai, aj, ak = -ai, -aj, -ak
+
+ si, sj, sk = math.sin(ai), math.sin(aj), math.sin(ak)
+ ci, cj, ck = math.cos(ai), math.cos(aj), math.cos(ak)
+ cc, cs = ci * ck, ci * sk
+ sc, ss = si * ck, si * sk
+
+ M = np.eye(3)
+ if repetition:
+ M[i, i] = cj
+ M[i, j] = sj * si
+ M[i, k] = sj * ci
+ M[j, i] = sj * sk
+ M[j, j] = -cj * ss + cc
+ M[j, k] = -cj * cs - sc
+ M[k, i] = -sj * ck
+ M[k, j] = cj * sc + cs
+ M[k, k] = cj * cc - ss
+ else:
+ M[i, i] = cj * ck
+ M[i, j] = sj * sc - cs
+ M[i, k] = sj * cc + ss
+ M[j, i] = cj * sk
+ M[j, j] = sj * ss + cc
+ M[j, k] = sj * cs - sc
+ M[k, i] = -sj
+ M[k, j] = cj * si
+ M[k, k] = cj * ci
+ return M
+
+
+def euler2axangle(ai, aj, ak, axes="sxyz"):
+ """Return angle, axis corresponding to Euler angles, axis specification
+
+ Parameters
+ ----------
+ ai : float
+ First rotation angle (according to `axes`).
+ aj : float
+ Second rotation angle (according to `axes`).
+ ak : float
+ Third rotation angle (according to `axes`).
+ axes : str, optional
+ Axis specification; one of 24 axis sequences as string or encoded
+ tuple - e.g. ``sxyz`` (the default).
+
+ Returns
+ -------
+ vector : array shape (3,)
+ axis around which rotation occurs
+ theta : scalar
+ angle of rotation
+
+ Examples
+ --------
+ >>> vec, theta = euler2axangle(0, 1.5, 0, 'szyx')
+ >>> np.allclose(vec, [0, 1, 0])
+ True
+ >>> theta
+ 1.5
+ """
+ return quat2axangle(euler2quat(ai, aj, ak, axes))
+
+
+def euler2quat(ai, aj, ak, axes="sxyz"):
+ """Return `quaternion` from Euler angles and axis sequence `axes`
+
+ Parameters
+ ----------
+ ai : float
+ First rotation angle (according to `axes`).
+ aj : float
+ Second rotation angle (according to `axes`).
+ ak : float
+ Third rotation angle (according to `axes`).
+ axes : str, optional
+ Axis specification; one of 24 axis sequences as string or encoded
+ tuple - e.g. ``sxyz`` (the default).
+
+ Returns
+ -------
+ quat : array shape (4,)
+ Quaternion in w, x, y z (real, then vector) format
+
+ Examples
+ --------
+ >>> q = euler2quat(1, 2, 3, 'ryxz')
+ >>> np.allclose(q, [0.435953, 0.310622, -0.718287, 0.444435])
+ True
+ """
+ try:
+ firstaxis, parity, repetition, frame = _AXES2TUPLE[axes.lower()]
+ except (AttributeError, KeyError):
+ _TUPLE2AXES[axes] # validation
+ firstaxis, parity, repetition, frame = axes
+
+ i = firstaxis + 1
+ j = _NEXT_AXIS[i + parity - 1] + 1
+ k = _NEXT_AXIS[i - parity] + 1
+
+ if frame:
+ ai, ak = ak, ai
+ if parity:
+ aj = -aj
+
+ ai = ai / 2.0
+ aj = aj / 2.0
+ ak = ak / 2.0
+ ci = math.cos(ai)
+ si = math.sin(ai)
+ cj = math.cos(aj)
+ sj = math.sin(aj)
+ ck = math.cos(ak)
+ sk = math.sin(ak)
+ cc = ci * ck
+ cs = ci * sk
+ sc = si * ck
+ ss = si * sk
+
+ q = np.empty((4,))
+ if repetition:
+ q[0] = cj * (cc - ss)
+ q[i] = cj * (cs + sc)
+ q[j] = sj * (cc + ss)
+ q[k] = sj * (cs - sc)
+ else:
+ q[0] = cj * cc + sj * ss
+ q[i] = cj * sc - sj * cs
+ q[j] = cj * ss + sj * cc
+ q[k] = cj * cs - sj * sc
+ if parity:
+ q[j] *= -1.0
+
+ return q
+
+
+def quat2axangle(quat, identity_thresh=None):
+ """Convert quaternion to rotation of angle around axis
+
+ Parameters
+ ----------
+ quat : 4 element sequence
+ w, x, y, z forming quaternion.
+ identity_thresh : None or scalar, optional
+ Threshold below which the norm of the vector part of the quaternion (x,
+ y, z) is deemed to be 0, leading to the identity rotation. None (the
+ default) leads to a threshold estimated based on the precision of the
+ input.
+
+ Returns
+ -------
+ theta : scalar
+ angle of rotation.
+ vector : array shape (3,)
+ axis around which rotation occurs.
+
+ Examples
+ --------
+ >>> vec, theta = quat2axangle([0, 1, 0, 0])
+ >>> vec
+ array([1., 0., 0.])
+ >>> np.allclose(theta, np.pi)
+ True
+
+ If this is an identity rotation, we return a zero angle and an arbitrary
+ vector:
+
+ >>> quat2axangle([1, 0, 0, 0])
+ (array([1., 0., 0.]), 0.0)
+
+ If any of the quaternion values are not finite, we return a NaN in the
+ angle, and an arbitrary vector:
+
+ >>> quat2axangle([1, np.inf, 0, 0])
+ (array([1., 0., 0.]), nan)
+
+ Notes
+ -----
+ A quaternion for which x, y, z are all equal to 0, is an identity rotation.
+ In this case we return a 0 angle and an arbitrary vector, here [1, 0, 0].
+
+ The algorithm allows for quaternions that have not been normalized.
+ """
+ quat = np.asarray(quat)
+ Nq = np.sum(quat**2)
+ if not np.isfinite(Nq):
+ return np.array([1.0, 0, 0]), float("nan")
+ if identity_thresh is None:
+ try:
+ identity_thresh = np.finfo(Nq.type).eps * 3
+ except (AttributeError, ValueError): # Not a numpy type or not float
+ identity_thresh = _FLOAT_EPS * 3
+ if Nq < _FLOAT_EPS**2: # Results unreliable after normalization
+ return np.array([1.0, 0, 0]), 0.0
+ if Nq != 1: # Normalize if not normalized
+ s = math.sqrt(Nq)
+ quat = quat / s
+ xyz = quat[1:]
+ len2 = np.sum(xyz**2)
+ if len2 < identity_thresh**2:
+ # if vec is nearly 0,0,0, this is an identity rotation
+ return np.array([1.0, 0, 0]), 0.0
+ # Make sure w is not slightly above 1 or below -1
+ theta = 2 * math.acos(max(min(quat[0], 1), -1))
+ return xyz / math.sqrt(len2), theta
+
+
+def quat2euler(quaternion, axes="sxyz"):
+ """Euler angles from `quaternion` for specified axis sequence `axes`
+
+ Parameters
+ ----------
+ q : 4 element sequence
+ w, x, y, z of quaternion
+ axes : str, optional
+ Axis specification; one of 24 axis sequences as string or encoded
+ tuple - e.g. ``sxyz`` (the default).
+
+ Returns
+ -------
+ ai : float
+ First rotation angle (according to `axes`).
+ aj : float
+ Second rotation angle (according to `axes`).
+ ak : float
+ Third rotation angle (according to `axes`).
+
+ Examples
+ --------
+ >>> angles = quat2euler([0.99810947, 0.06146124, 0, 0])
+ >>> np.allclose(angles, [0.123, 0, 0])
+ True
+ """
+ return mat2euler(quat2mat(quaternion), axes)
\ No newline at end of file
From f027da6c52d54c75a82f649dd6610d6646789db9 Mon Sep 17 00:00:00 2001
From: YN35 <95198883+YN35@users.noreply.github.com>
Date: Thu, 14 Aug 2025 09:42:58 +0000
Subject: [PATCH 04/57] add bridge
---
scripts/openpi/evaluate_bridge.py | 24 ++++++
simpler_env/evaluation/evaluate.py | 48 ++++++++++--
simpler_env/evaluation/evaluate_bridge.py | 92 +++++++++++++++++++++++
3 files changed, 159 insertions(+), 5 deletions(-)
create mode 100644 scripts/openpi/evaluate_bridge.py
create mode 100644 simpler_env/evaluation/evaluate_bridge.py
diff --git a/scripts/openpi/evaluate_bridge.py b/scripts/openpi/evaluate_bridge.py
new file mode 100644
index 00000000..9f8cd6fd
--- /dev/null
+++ b/scripts/openpi/evaluate_bridge.py
@@ -0,0 +1,24 @@
+import argparse
+
+from simpler_env.evaluation.evaluate_bridge import run_comprehensive_evaluation_bridge
+from simpler_env.policies.openpi.pi0_or_fast import OpenPiFastInference
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(description="Run Comprehensive ManiSkill2 Evaluation")
+ parser.add_argument("--ckpt-path", type=str, required=True, help="Path to the checkpoint to evaluate.")
+ return parser.parse_args()
+
+
+if __name__ == "__main__":
+ args = parse_args()
+ ckpt_path = args.ckpt_path
+
+ policy = OpenPiFastInference(saved_model_path=ckpt_path, policy_setup="google_robot", action_scale=1.0)
+
+ print("Policy initialized. Starting evaluation...")
+
+ final_scores = run_comprehensive_evaluation_bridge(env_policy=policy, ckpt_path=args.ckpt_path)
+
+ print("\nEvaluation finished.")
+ print(f"Final calculated scores: {final_scores}")
diff --git a/simpler_env/evaluation/evaluate.py b/simpler_env/evaluation/evaluate.py
index b5c5a147..aa7ebb61 100644
--- a/simpler_env/evaluation/evaluate.py
+++ b/simpler_env/evaluation/evaluate.py
@@ -28,12 +28,52 @@ def _run_single_evaluation(env_policy: AiroaBasePolicy, cfg: ManiSkill2Config, c
else:
kwargs_info = None
- print(f" ▶️ Running: env={cfg.env_name}, scene={cfg.scene_name}, " f"enable_rt={cfg.enable_raytracing}, kwargs={kwargs_info}")
+ print(f" ▶️ Running: env={cfg.env_name}, scene={cfg.scene_name}, kwargs={kwargs_info}")
success_arr = maniskill2_evaluator(env_policy, cfg)
print(f" ✅ Success Rate: {np.mean(success_arr):.2%}")
return success_arr
+# def pick_coke_can_visual_matching(env_policy: AiroaBasePolicy, ckpt_path: str) -> List[List[bool]]:
+# print("\n--- pick_coke_can_visual_matching ---")
+# results: List[List[bool]] = []
+
+# coke_can_options_arr = [
+# {"lr_switch": True},
+# {"upright": True},
+# {"laid_vertically": True},
+# ]
+# urdf_versions = [None, "recolor_tabletop_visual_matching_1", "recolor_tabletop_visual_matching_2", "recolor_cabinet_visual_matching_1"]
+
+# base_kwargs = dict(
+# robot="google_robot_static",
+# policy_setup="google_robot",
+# control_freq=3,
+# sim_freq=513,
+# max_episode_steps=80,
+# ckpt_path=ckpt_path,
+# robot_init_x_range=[0.35, 0.35, 1],
+# robot_init_y_range=[0.20, 0.20, 1],
+# obj_init_x_range=[-0.35, -0.12, 5],
+# obj_init_y_range=[-0.02, 0.42, 5],
+# robot_init_rot_quat_center=[0, 0, 0, 1],
+# robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, 0, 0, 1],
+# )
+
+# for urdf in urdf_versions:
+# for opt in coke_can_options_arr:
+# cfg = ManiSkill2Config(
+# **base_kwargs,
+# env_name="PickColaAndPlaceInDrawer-v0",
+# scene_name="google_pick_coke_can_1_v4",
+# rgb_overlay_path="./ManiSkill2_real2sim/data/real_inpainting/google_coke_can_real_eval_1.png",
+# additional_env_build_kwargs={**opt, "urdf_version": urdf},
+# )
+# results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
+
+# return results
+
+
# ----------------------------------------------------------------------
# 1) pick_coke_can_variant_agg.sh
# ----------------------------------------------------------------------
@@ -367,7 +407,6 @@ def put_in_drawer_visual_matching(env_policy: AiroaBasePolicy, ckpt_path: str) -
robot_init_rot_quat_center=[0, 0, 0, 1],
obj_init_x_range=[-0.08, -0.02, 3],
obj_init_y_range=[-0.02, 0.08, 3],
- enable_raytracing=True,
)
overlay_poses = [
@@ -499,7 +538,6 @@ def drawer_visual_matching(env_policy: AiroaBasePolicy, ckpt_path: str) -> List[
robot_init_rot_quat_center=[0, 0, 0, 1],
obj_init_x_range=[0, 0, 1],
obj_init_y_range=[0, 0, 1],
- enable_raytracing=True,
scene_name="dummy_drawer",
)
@@ -600,14 +638,14 @@ def run_comprehensive_evaluation(env_policy: AiroaBasePolicy, ckpt_path: str) ->
vm_results += pick_coke_can_visual_matching(env_policy, ckpt_path)
vm_results += move_near_visual_matching(env_policy, ckpt_path)
vm_results += put_in_drawer_visual_matching(env_policy, ckpt_path)
- vm_results += drawer_visual_matching(env_policy, ckpt_path) # ★ 追加
+ vm_results += drawer_visual_matching(env_policy, ckpt_path)
# シミュレーション(variant_agg)
sim_results: List[List[bool]] = []
sim_results += pick_coke_can_variant_agg(env_policy, ckpt_path)
sim_results += move_near_variant_agg(env_policy, ckpt_path)
sim_results += put_in_drawer_variant_agg(env_policy, ckpt_path)
- sim_results += drawer_variant_agg(env_policy, ckpt_path) # ★ 追加
+ sim_results += drawer_variant_agg(env_policy, ckpt_path)
# ロバストスコア
sim_score = calculate_robust_score(sim_results)
diff --git a/simpler_env/evaluation/evaluate_bridge.py b/simpler_env/evaluation/evaluate_bridge.py
new file mode 100644
index 00000000..8f4bbbf2
--- /dev/null
+++ b/simpler_env/evaluation/evaluate_bridge.py
@@ -0,0 +1,92 @@
+from typing import Any, Dict, List
+
+from ..policies.base import AiroaBasePolicy
+from .config import ManiSkill2Config
+from .evaluate import _run_single_evaluation, calculate_robust_score
+
+
+def bridge(env_policy: AiroaBasePolicy, ckpt_path: str) -> List[List[bool]]:
+ """
+ scripts/bridge.sh を Python で忠実に再現。
+ 4 本の実行(3 + 1)を順番に回し、各 run の成功配列を返す。
+ """
+ print("\n--- bridge (scripts/bridge.sh) ---")
+ results: List[List[bool]] = []
+
+ # ====== bridge_table_1_v1 ======
+ common_v1 = dict(
+ policy_setup="widowx_bridge",
+ robot="widowx",
+ control_freq=5,
+ sim_freq=500,
+ max_episode_steps=60,
+ rgb_overlay_path="ManiSkill2_real2sim/data/real_inpainting/bridge_real_eval_1.png",
+ robot_init_x_range=[0.147, 0.147, 1],
+ robot_init_y_range=[0.028, 0.028, 1],
+ obj_variation_mode="episode",
+ obj_episode_range=[0, 24],
+ robot_init_rot_quat_center=[0, 0, 0, 1],
+ robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, 0, 0, 1],
+ ckpt_path=ckpt_path,
+ )
+ envs_v1 = [
+ ("PutCarrotOnPlateInScene-v0", "bridge_table_1_v1"),
+ ("StackGreenCubeOnYellowCubeBakedTexInScene-v0", "bridge_table_1_v1"),
+ ("PutSpoonOnTableClothInScene-v0", "bridge_table_1_v1"),
+ ]
+ for env_name, scene_name in envs_v1:
+ cfg = ManiSkill2Config(
+ **common_v1,
+ env_name=env_name,
+ scene_name=scene_name,
+ )
+ results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
+
+ # ====== bridge_table_1_v2 ======
+ # 注意: ここだけ max_episode_steps=120、ロボット/カメラセットアップが異なる
+ cfg_v2 = ManiSkill2Config(
+ policy_setup="widowx_bridge",
+ robot="widowx_sink_camera_setup",
+ control_freq=5,
+ sim_freq=500,
+ max_episode_steps=120,
+ env_name="PutEggplantInBasketScene-v0",
+ scene_name="bridge_table_1_v2",
+ rgb_overlay_path="ManiSkill2_real2sim/data/real_inpainting/bridge_sink.png",
+ robot_init_x_range=[0.127, 0.127, 1],
+ robot_init_y_range=[0.06, 0.06, 1],
+ obj_variation_mode="episode",
+ obj_episode_range=[0, 24],
+ robot_init_rot_quat_center=[0, 0, 0, 1],
+ robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, 0, 0, 1],
+ ckpt_path=ckpt_path,
+ )
+ results.append(_run_single_evaluation(env_policy, cfg_v2, ckpt_path))
+
+ return results
+
+
+def run_comprehensive_evaluation_bridge(env_policy: AiroaBasePolicy, ckpt_path: str) -> Dict[str, Any]:
+ """
+ Bridge 用の総合実行。scripts/bridge.sh を 1:1 で再現し、
+ ロバストスコアも合わせて出力する(便宜機能)。
+ """
+ print("=" * 80)
+ print("🚧 STARTING BRIDGE EVALUATION (bridge.sh) 🚧")
+ print(f"Checkpoint: {ckpt_path}")
+ print("=" * 80)
+
+ runs = bridge(env_policy, ckpt_path)
+ vm_score = calculate_robust_score(runs) # overlay 相当なので Visual Matching としてロバストスコアを算出
+
+ print("\n" + "=" * 80)
+ print("📊 BRIDGE EVALUATION SUMMARY 📊")
+ print("-" * 80)
+ print(f"Visual Matching Score (Robust): {vm_score:.4f}")
+ print(f" - Total Runs: {len(runs)}")
+ print("=" * 80)
+
+ return {
+ "visual_matching_robust_score": vm_score,
+ "num_runs": len(runs),
+ }
From e8bce2079b9d8e5e65a2462d0fc45d4297ab39ae Mon Sep 17 00:00:00 2001
From: YN35 <95198883+YN35@users.noreply.github.com>
Date: Thu, 14 Aug 2025 10:28:50 +0000
Subject: [PATCH 05/57] =?UTF-8?q?openpi=E3=81=AF=E3=82=B3=E3=83=BC?=
=?UTF-8?q?=E3=83=89=E3=82=92=E6=9B=B8=E3=81=8D=E6=8F=9B=E3=81=88=E3=81=AA?=
=?UTF-8?q?=E3=81=84=E3=81=A8=E3=81=84=E3=81=91=E7=84=A1=E3=81=95=E3=81=9D?=
=?UTF-8?q?=E3=81=86?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
scripts/hsr_openpi/README.md | 2 -
scripts/openpi/README.md | 220 +++++++++++++++++++++++++++-
scripts/openpi/evaluate_bridge.py | 2 +-
scripts/openpi/evaluate_fractal2.py | 39 +++++
4 files changed, 255 insertions(+), 8 deletions(-)
create mode 100644 scripts/openpi/evaluate_fractal2.py
diff --git a/scripts/hsr_openpi/README.md b/scripts/hsr_openpi/README.md
index 412ec981..2ba2f9e0 100644
--- a/scripts/hsr_openpi/README.md
+++ b/scripts/hsr_openpi/README.md
@@ -1,7 +1,5 @@
```bash
-uv venv -p 3.11 scripts/openpi/.venv
-
git clone https://github.com/airoa-org/hsr_openpi.git
cd hsr_openpi
git checkout remotes/origin/release
diff --git a/scripts/openpi/README.md b/scripts/openpi/README.md
index efba3b67..8b01a13d 100644
--- a/scripts/openpi/README.md
+++ b/scripts/openpi/README.md
@@ -1,10 +1,9 @@
```bash
-uv venv -p 3.11 scripts/openpi/.venv
-
-git clone https://github.com/airoa-org/hsr_openpi.git
-cd hsr_openpi
+git clone https://github.com/Physical-Intelligence/openpi.git
+cd openpi
GIT_LFS_SKIP_SMUDGE=1 UV_PROJECT_ENVIRONMENT=../scripts/openpi/.venv uv sync
+source ../scripts/openpi/.venv/bin/activate
GIT_LFS_SKIP_SMUDGE=1 uv pip install -e .
cd ..
@@ -12,5 +11,216 @@ source $(pwd)/scripts/openpi/.venv/bin/activate
uv pip install -e .
-python scripts/openpi/evaluate_fractal.py --ckpt-path /data/checkpoints/pi0_fractal_low_mem_finetune2/my_experiment/17000
+huggingface-cli download --resume-download --repo-type model HaomingSong/openpi0-fractal-lora --local-dir /data/checkpoints/openpi0-fractal-lora
+
+python scripts/openpi/evaluate_fractal2.py --ckpt-path /data/checkpoints/openpi0-fractal-lora
+CUDA_VISIBLE_DEVICES=1 python scripts/openpi/evaluate_fractal.py --ckpt-path HaomingSong/openpi0-bridge-lora
+```
+
+
+https://huggingface.co/HaomingSong/openpi0-fast-fractal-fft
+```python
+@dataclasses.dataclass(frozen=True)
+class LeRobotFractalDataConfig(DataConfigFactory):
+ use_quantile_norm: bool = True
+
+ # Action keys that will be used to read the action sequence from the dataset.
+ action_sequence_keys: Sequence[str] = ("action",)
+
+ prompt_from_task: bool = True
+
+ @override
+ def create(self, assets_dirs: pathlib.Path, model_config: _model.BaseModelConfig) -> DataConfig:
+ # Make inputs look like they come from the Libero environment
+ repack_transform = _transforms.Group(
+ inputs=[
+ _transforms.RepackTransform(
+ {
+ "observation/primary_image": "observation.images.image",
+ "observation/state": "observation.state",
+ "actions": "action",
+ "prompt": "prompt",
+ }
+ )
+ ]
+ )
+
+ # Prepare data for policy training
+ # Convert images to uint8 numpy arrays, add masks
+ data_transforms = _transforms.Group(
+ inputs=[
+ fractal_policy.FractalInputs(
+ action_dim=model_config.action_dim,
+ model_type=model_config.model_type,
+ )
+ ],
+ outputs=[fractal_policy.FractalOutputs()],
+ )
+
+ # Model transforms include things like tokenizing the prompt and action targets
+ model_transforms = ModelTransformFactory()(model_config)
+
+ return dataclasses.replace(
+ self.create_base_config(assets_dirs),
+ repack_transforms=repack_transform,
+ data_transforms=data_transforms,
+ model_transforms=model_transforms,
+ use_quantile_norm=self.use_quantile_norm,
+ action_sequence_keys=self.action_sequence_keys,
+ prompt_from_task=self.prompt_from_task,
+ )
+
+```
+
+
+https://github.com/DelinQu/SimplerEnv-OpenVLA/issues/24
+```python
+@dataclasses.dataclass(frozen=True)
+class LeRobotBridgeDataConfig(DataConfigFactory):
+ use_quantile_norm: bool = True
+
+ # Action keys that will be used to read the action sequence from the dataset.
+ action_sequence_keys: Sequence[str] = ("action",)
+
+ prompt_from_task: bool = True
+
+ @override
+ def create(self, assets_dirs: pathlib.Path, model_config: _model.BaseModelConfig) -> DataConfig:
+ # Make inputs look like they come from the Libero environment
+ repack_transform = _transforms.Group(
+ inputs=[
+ _transforms.RepackTransform(
+ {
+ "observation/primary_image": "observation.images.image_0",
+ # "observation/left_yellow_image": "observation.images.image_1",
+ # "observation/right_blue_image": "observation.images.image_2",
+ # "observation/wirst_image": "observation.images.image_3",
+ "observation/state": "observation.state",
+ "actions": "action",
+ "prompt": "prompt",
+ }
+ )
+ ]
+ )
+
+ # Prepare data for policy training
+ # Convert images to uint8 numpy arrays, add masks
+ data_transforms = _transforms.Group(
+ inputs=[
+ ### THE FOLLOWING bridge_policy MODULE IS MISSING ###
+ bridge_policy.BridgeInputs(
+ action_dim=model_config.action_dim,
+ model_type=model_config.model_type,
+ )
+ ],
+ outputs=[bridge_policy.BridgeOutputs()],
+ )
+
+ # Model transforms include things like tokenizing the prompt and action targets
+ model_transforms = ModelTransformFactory()(model_config)
+
+ return dataclasses.replace(
+ self.create_base_config(assets_dirs),
+ repack_transforms=repack_transform,
+ data_transforms=data_transforms,
+ model_transforms=model_transforms,
+ use_quantile_norm=self.use_quantile_norm,
+ action_sequence_keys=self.action_sequence_keys,
+ prompt_from_task=self.prompt_from_task,
+ )
+```
+
+
+https://github.com/HaomingSong/openpi/blob/bridge/src/openpi/policies/bridge_policy.py
+```python
+import dataclasses
+
+import einops
+import numpy as np
+
+from openpi import transforms
+from openpi.models import model as _model
+import torch
+
+
+def make_bridge_example() -> dict:
+ """Creates a random input example for the Libero policy."""
+ return {
+ "observation/state": np.random.rand(8),
+ "observation/primary_image": np.random.randint(256, size=(224, 224, 3), dtype=np.uint8),
+ # "observation/left_yellow_image": np.random.randint(256, size=(224, 224, 3), dtype=np.uint8),
+ # "observation/right_blue_image": np.random.randint(256, size=(224, 224, 3), dtype=np.uint8),
+ # "observation/wirst_image": np.random.randint(256, size=(224, 224, 3), dtype=np.uint8),
+ "prompt": "do something",
+ }
+
+
+def _parse_image(image) -> np.ndarray:
+ image = np.asarray(image)
+ if np.issubdtype(image.dtype, np.floating):
+ image = (255 * image).astype(np.uint8)
+ if image.shape[0] == 3:
+ image = einops.rearrange(image, "c h w -> h w c")
+ return image
+
+
+@dataclasses.dataclass(frozen=True)
+class BridgeInputs(transforms.DataTransformFn):
+ # The action dimension of the model. Will be used to pad state and actions for pi0 model (not pi0-FAST).
+ action_dim: int
+
+ # Determines which model will be used.
+ model_type: _model.ModelType = _model.ModelType.PI0
+
+ def __call__(self, data: dict) -> dict:
+ mask_padding = self.model_type == _model.ModelType.PI0 # We don't mask for pi0-FAST.
+
+ # NOTE: for bridge dataset at IPEC-COMMUNITY/bridge_orig_lerobot, the state is 8-dim.
+ # Get the state. We are padding from 8 to the model action dim.
+ # state = data["observation/state"][:8]
+ state = torch.zeros(data["observation/state"].shape)
+ state = transforms.pad_to_dim(data["observation/state"], self.action_dim)
+
+ # Possibly need to parse images to uint8 (H,W,C) since LeRobot automatically
+ # stores as float32 (C,H,W), gets skipped for policy inference
+ primary_image = _parse_image(data["observation/primary_image"])
+ # left_yellow_image = _parse_image(data["observation/left_yellow_image"])
+ # right_blue_image = _parse_image(data["observation/right_blue_image"])
+ # wrist_image = _parse_image(data["observation/wrist_image"])
+
+ inputs = {
+ "state": state,
+ "image": {
+ "primary_image": primary_image,
+ # "left_yellow_image": left_yellow_image,
+ # "left_yellow_image": np.zeros_like(primary_image),
+ # "right_blue_image": right_blue_image,
+ # "right_blue_image": np.zeros_like(primary_image),
+ # "wrist_image": wrist_image,
+ },
+ "image_mask": {
+ "primary_image": np.True_,
+ # "left_wrist_0_rgb": np.False_ if mask_padding else np.True_,
+ # "right_wrist_0_rgb": np.False_ if mask_padding else np.True_,
+ },
+ }
+
+ # Actions are only available during training.
+ if "actions" in data:
+ # We are padding from 7 to the model action dim.
+ # For pi0-FAST, this is a no-op (since action_dim = 7).
+ actions = transforms.pad_to_dim(data["actions"], self.action_dim)
+ inputs["actions"] = actions
+
+ if "prompt" in data:
+ inputs["prompt"] = data["prompt"]
+
+ return inputs
+
+
+@dataclasses.dataclass(frozen=True)
+class BridgeOutputs(transforms.DataTransformFn):
+ def __call__(self, data: dict) -> dict:
+ # Only return the first 7 dims.
+ return {"actions": np.asarray(data["actions"][:, :7])}
```
\ No newline at end of file
diff --git a/scripts/openpi/evaluate_bridge.py b/scripts/openpi/evaluate_bridge.py
index 9f8cd6fd..8874e48c 100644
--- a/scripts/openpi/evaluate_bridge.py
+++ b/scripts/openpi/evaluate_bridge.py
@@ -14,7 +14,7 @@ def parse_args():
args = parse_args()
ckpt_path = args.ckpt_path
- policy = OpenPiFastInference(saved_model_path=ckpt_path, policy_setup="google_robot", action_scale=1.0)
+ policy = OpenPiFastInference(saved_model_path=ckpt_path, policy_setup="widowx_bridge", action_scale=1.0)
print("Policy initialized. Starting evaluation...")
diff --git a/scripts/openpi/evaluate_fractal2.py b/scripts/openpi/evaluate_fractal2.py
new file mode 100644
index 00000000..9e20bba0
--- /dev/null
+++ b/scripts/openpi/evaluate_fractal2.py
@@ -0,0 +1,39 @@
+import argparse
+
+from openpi_client import action_chunk_broker
+
+from openpi.policies import policy_config as _policy_config
+from openpi.training import config as _config
+from simpler_env.evaluation.evaluate import run_comprehensive_evaluation
+from simpler_env.policies.adapter import AiroaToSimplerFractalAdapter
+from simpler_env.policies.hsr_openpi.pi0_or_fast import OpenpiToAiroaPolicy
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(description="Run Comprehensive ManiSkill2 Evaluation")
+ parser.add_argument("--ckpt-path", type=str, required=True, help="Path to the checkpoint to evaluate.")
+ return parser.parse_args()
+
+
+if __name__ == "__main__":
+ args = parse_args()
+ ckpt_path = args.ckpt_path
+
+ policy = OpenpiToAiroaPolicy(
+ policy=action_chunk_broker.ActionChunkBroker(
+ policy=_policy_config.create_trained_policy(
+ _config.get_config("pi0_fractal_lora"),
+ ckpt_path,
+ ),
+ action_horizon=10,
+ ),
+ )
+
+ env_policy = AiroaToSimplerFractalAdapter(policy=policy)
+
+ print("Policy initialized. Starting evaluation...")
+
+ final_scores = run_comprehensive_evaluation(env_policy=env_policy, ckpt_path=args.ckpt_path)
+
+ print("\nEvaluation finished.")
+ print(f"Final calculated scores: {final_scores}")
From 019e4d2a0ac5d2e16e14e40b4ce5f85b2c29e37a Mon Sep 17 00:00:00 2001
From: YN35 <95198883+YN35@users.noreply.github.com>
Date: Thu, 14 Aug 2025 11:28:18 +0000
Subject: [PATCH 06/57] rt1 ok
---
.gitignore | 1 +
pyproject.toml | 7 ++++-
scripts/hsr_openpi/README.md | 2 +-
scripts/lerobotpi/README.md | 2 +-
scripts/openpi/README.md | 2 +-
scripts/rt1/README.md | 42 +++++++++++++++++++++++++++
scripts/rt1/evaluate_fractal.py | 24 +++++++++++++++
simpler_env/policies/rt1/rt1_model.py | 18 +++---------
8 files changed, 80 insertions(+), 18 deletions(-)
create mode 100644 scripts/rt1/README.md
create mode 100644 scripts/rt1/evaluate_fractal.py
diff --git a/.gitignore b/.gitignore
index 399a25ee..d8002eb9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,6 +5,7 @@ _ext
*.so
.venv/
results/
+checkpoints/
build/
dist/
*.egg-info/
diff --git a/pyproject.toml b/pyproject.toml
index d2b64bfd..c51b534d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,12 +7,17 @@ requires-python = ">=3.10,<3.12"
dependencies = [
"mani_skill2_real2sim",
"numpy<2.0",
- "tensorflow-cpu",
"mediapy",
"transforms3d",
"matplotlib",
]
+
+[project.optional-dependencies]
+torch = [
+ "tensorflow-cpu",
+]
+
[tool.black]
# https://github.com/psf/black
line-length = 120
diff --git a/scripts/hsr_openpi/README.md b/scripts/hsr_openpi/README.md
index 2ba2f9e0..fde4b226 100644
--- a/scripts/hsr_openpi/README.md
+++ b/scripts/hsr_openpi/README.md
@@ -8,7 +8,7 @@ GIT_LFS_SKIP_SMUDGE=1 uv pip install -e .
cd ..
source $(pwd)/scripts/hsr_openpi/.venv/bin/activate
-uv pip install -e .
+uv pip install -e . ".[torch]"
python scripts/hsr_openpi/evaluate_fractal.py --ckpt-path /data/checkpoints/pi0_fractal_low_mem_finetune2/my_experiment/17000
diff --git a/scripts/lerobotpi/README.md b/scripts/lerobotpi/README.md
index 02bb6e59..6f1b7bca 100644
--- a/scripts/lerobotpi/README.md
+++ b/scripts/lerobotpi/README.md
@@ -7,7 +7,7 @@ uv venv -p 3.10 scripts/lerobotpi/.venv
source $(pwd)/scripts/lerobotpi/.venv/bin/activate
uv pip install torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 --index-url https://download.pytorch.org/whl/cu124
uv pip install git+https://github.com/huggingface/lerobot.git@ce2b9724bfe1b5a4c45e61b1890eef3f5ab0909c#egg=lerobot[pi0]
-uv pip install -e .
+uv pip install -e . ".[torch]"
uv pip install pytest
huggingface-cli login
diff --git a/scripts/openpi/README.md b/scripts/openpi/README.md
index 8b01a13d..2c40daa0 100644
--- a/scripts/openpi/README.md
+++ b/scripts/openpi/README.md
@@ -8,7 +8,7 @@ GIT_LFS_SKIP_SMUDGE=1 uv pip install -e .
cd ..
source $(pwd)/scripts/openpi/.venv/bin/activate
-uv pip install -e .
+uv pip install -e . ".[torch]"
huggingface-cli download --resume-download --repo-type model HaomingSong/openpi0-fractal-lora --local-dir /data/checkpoints/openpi0-fractal-lora
diff --git a/scripts/rt1/README.md b/scripts/rt1/README.md
new file mode 100644
index 00000000..3060352e
--- /dev/null
+++ b/scripts/rt1/README.md
@@ -0,0 +1,42 @@
+
+```bash
+cd ..
+curl -O https://dl.google.com/dl/cloudsdk/channels/rapid/downloads/google-cloud-cli-linux-x86_64.tar.gz
+tar -xf google-cloud-cli-linux-x86_64.tar.gz
+./google-cloud-sdk/install.sh
+cd SimplerEnv
+
+uv venv -p 3.10 scripts/rt1/.venv
+
+source $(pwd)/scripts/rt1/.venv/bin/activate
+uv pip install tensorflow==2.15.0
+uv pip install -r requirements_full_install.txt
+uv pip install -e .
+uv pip install tensorflow[and-cuda]==2.15.1
+
+
+# Make a checkpoint dir:
+mkdir checkpoints
+
+# RT-1-X
+gsutil -m cp -r gs://gdm-robotics-open-x-embodiment/open_x_embodiment_and_rt_x_oss/rt_1_x_tf_trained_for_002272480_step.zip .
+unzip rt_1_x_tf_trained_for_002272480_step.zip
+mv rt_1_x_tf_trained_for_002272480_step checkpoints
+rm rt_1_x_tf_trained_for_002272480_step.zip
+
+# RT-1-Converged
+gsutil -m cp -r gs://gdm-robotics-open-x-embodiment/open_x_embodiment_and_rt_x_oss/rt_1_tf_trained_for_000400120 .
+mv rt_1_tf_trained_for_000400120 checkpoints
+
+# RT-1-15%
+gsutil -m cp -r gs://gdm-robotics-open-x-embodiment/open_x_embodiment_and_rt_x_oss/rt_1_tf_trained_for_000058240 .
+mv rt_1_tf_trained_for_000058240 checkpoints
+
+# RT-1-Begin
+gsutil -m cp -r gs://gdm-robotics-open-x-embodiment/open_x_embodiment_and_rt_x_oss/rt_1_tf_trained_for_000001120 .
+mv rt_1_tf_trained_for_000001120 checkpoints
+
+
+
+CUDA_VISIBLE_DEVICES=3 python scripts/rt1/evaluate_fractal.py --ckpt-path checkpoints/rt_1_tf_trained_for_000400120
+```
\ No newline at end of file
diff --git a/scripts/rt1/evaluate_fractal.py b/scripts/rt1/evaluate_fractal.py
new file mode 100644
index 00000000..936e6a76
--- /dev/null
+++ b/scripts/rt1/evaluate_fractal.py
@@ -0,0 +1,24 @@
+import argparse
+
+from simpler_env.evaluation.evaluate import run_comprehensive_evaluation
+from simpler_env.policies.rt1.rt1_model import RT1Inference
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(description="Run Comprehensive ManiSkill2 Evaluation")
+ parser.add_argument("--ckpt-path", type=str, required=True, help="Path to the checkpoint to evaluate.")
+ return parser.parse_args()
+
+
+if __name__ == "__main__":
+ args = parse_args()
+ ckpt_path = args.ckpt_path
+
+ policy = RT1Inference(saved_model_path=ckpt_path, policy_setup="google_robot")
+
+ print("Policy initialized. Starting evaluation...")
+
+ final_scores = run_comprehensive_evaluation(env_policy=policy, ckpt_path=args.ckpt_path)
+
+ print("\nEvaluation finished.")
+ print(f"Final calculated scores: {final_scores}")
diff --git a/simpler_env/policies/rt1/rt1_model.py b/simpler_env/policies/rt1/rt1_model.py
index cf52726d..9542e9b8 100644
--- a/simpler_env/policies/rt1/rt1_model.py
+++ b/simpler_env/policies/rt1/rt1_model.py
@@ -149,9 +149,7 @@ def _small_action_filter_google_robot(
)
return raw_action
- def step(
- self, image: np.ndarray, task_description: Optional[str] = None
- ) -> tuple[dict[str, np.ndarray], dict[str, np.ndarray]]:
+ def step(self, image: np.ndarray, task_description: Optional[str] = None, *args, **kwargs) -> tuple[dict[str, np.ndarray], dict[str, np.ndarray]]:
"""
Input:
image: np.ndarray of shape (H, W, 3), uint8
@@ -192,11 +190,7 @@ def step(
if self.action_rotation_mode == "axis_angle":
action_rotation_delta = np.asarray(raw_action["rotation_delta"], dtype=np.float64)
action_rotation_angle = np.linalg.norm(action_rotation_delta)
- action_rotation_ax = (
- action_rotation_delta / action_rotation_angle
- if action_rotation_angle > 1e-6
- else np.array([0.0, 1.0, 0.0])
- )
+ action_rotation_ax = action_rotation_delta / action_rotation_angle if action_rotation_angle > 1e-6 else np.array([0.0, 1.0, 0.0])
action["rot_axangle"] = action_rotation_ax * action_rotation_angle * self.action_scale
elif self.action_rotation_mode in ["rpy", "ypr", "pry"]:
if self.action_rotation_mode == "rpy":
@@ -233,9 +227,7 @@ def step(
return raw_action, action
- def visualize_epoch(
- self, predicted_raw_actions: Sequence[np.ndarray], images: Sequence[np.ndarray], save_path: str
- ) -> None:
+ def visualize_epoch(self, predicted_raw_actions: Sequence[np.ndarray], images: Sequence[np.ndarray], save_path: str) -> None:
images = [self._resize_image(image) for image in images]
predicted_action_name_to_values_over_time = defaultdict(list)
figure_layout = [
@@ -262,9 +254,7 @@ def visualize_epoch(
for action_sub_dimension in range(action[action_name].shape[0]):
# print(action_name, action_sub_dimension)
title = f"{action_name}_{action_sub_dimension}"
- predicted_action_name_to_values_over_time[title].append(
- predicted_raw_actions[i][action_name][action_sub_dimension]
- )
+ predicted_action_name_to_values_over_time[title].append(predicted_raw_actions[i][action_name][action_sub_dimension])
figure_layout = [["image"] * len(figure_layout), figure_layout]
From 2f40c11b3899deb2ed692084706c559d0296bea8 Mon Sep 17 00:00:00 2001
From: YN35 <95198883+YN35@users.noreply.github.com>
Date: Thu, 28 Aug 2025 16:18:23 +0000
Subject: [PATCH 07/57] =?UTF-8?q?=E7=92=B0=E5=A2=83=E5=AE=9F=E8=A3=85?=
=?UTF-8?q?=E5=AE=8C=E4=BA=86?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
ManiSkill2_real2sim | 2 +-
scripts/README.md | 383 +++++++++++
simpler_env/evaluation/config.py | 2 +-
simpler_env/evaluation/evaluate.py | 618 ++++++++++--------
.../evaluation/maniskill2_evaluator.py | 4 +-
5 files changed, 725 insertions(+), 284 deletions(-)
create mode 100644 scripts/README.md
diff --git a/ManiSkill2_real2sim b/ManiSkill2_real2sim
index 91d154bf..5c920429 160000
--- a/ManiSkill2_real2sim
+++ b/ManiSkill2_real2sim
@@ -1 +1 @@
-Subproject commit 91d154bfd864577f8d2e80f3fc2f8b4d9df9ae5c
+Subproject commit 5c92042911212ec48282a79103f8da441eaadedf
diff --git a/scripts/README.md b/scripts/README.md
new file mode 100644
index 00000000..91d27353
--- /dev/null
+++ b/scripts/README.md
@@ -0,0 +1,383 @@
+- 共通オプション
+ - urdf_version
+ - None: 白
+ - recolor_tabletop_visual_matching_1: 黄色
+ - recolor_tabletop_visual_matching_2: 少し黄色
+ - recolor_cabinet_visual_matching_1: 少し白
+ - baked*: リアルなテクスチャを張ったという意味?
+- pick_coke_can_visual_matching
+ - robot_init_x_range=[0.35, 0.35, 1]
+ - robot_init_y_range=[0.20, 0.20, 1]
+ - obj_init_x_range=[-0.35, -0.12, 5]
+ - obj_init_y_range=[-0.02, 0.42, 5]
+ - env_options:
+ - lr_switch: (コーラのinital orientation)横方向に寝かせたような姿勢
+ - upright: (コーラのinital orientation)直立した姿勢
+ - laid_vertically:(コーラのinital orientation) 縦方向に寝かせた姿勢
+ - urdf_version
+ - env_name
+ - GraspSingleOpenedCokeCanInScene-v0
+ - scene_name
+ - google_pick_coke_can_1_v4
+ - rgb_overlay_path
+ - ./ManiSkill2_real2sim/data/real_inpainting/google_coke_can_real_eval_1.png
+- pick_coke_can_variant_agg
+ - robot_init_x_range=[0.35, 0.35, 1]
+ - robot_init_y_range=[0.20, 0.20, 1]
+ - obj_init_x_range=[-0.35, -0.12, 5]
+ - obj_init_y_range=[-0.02, 0.42, 5]
+ - env_options:
+ - lr_switch
+ - upright
+ - laid_vertically
+ - urdf_version
+ - (distractor_config = “more, less”)
+ 障害物として全て追加される
+ - opened_ 7up_can (開いたセブンアップの缶)
+ - opened_sprite_can (開いたスプライトの缶)
+ - sponge (スポンジ)
+ - orange (オレンジ)
+ - opened_fanta_can (開いたファンタの缶)
+ - bridge_spoon_generated_modified
+ - (slightly_darker_lighting)
+ - true/false
+ - env_name
+ - GraspSingleOpenedCokeCanInScene-v0
+ - 開封済みコカ・コーラ缶の把持
+ - GraspSingleOpenedCokeCanDistractorInScene-v0
+ - 妨害オブジェクトを追加した、より難易度の高い環境
+ - ターゲットである開封済みのコカ・コーラ缶を正しく識別
+ - GraspSingleOpenedCokeCanAltGoogleCameraInScene-v0
+ - カメラアングルを変更した環境
+ - GraspSingleOpenedCokeCanAltGoogleCamera2InScene-v0
+ - さらに別のカメラアングル
+ - scene_name
+ - google_pick_coke_can_1_v4
+ - Baked_sc1_staging_objaverse_cabinet1_h870
+ - Baked_sc1_staging_objaverse_cabinet2_h870
+ - google_pick_coke_can_1_v4_alt_background
+ - google_pick_coke_can_1_v4_alt_background_2
+- pick_coke_can_visual_matching
+ - robot_init_x_range=[0.35, 0.35, 1]
+ - robot_init_y_range=[0.20, 0.20, 1]
+ - obj_init_x_range=[-0.35, -0.12, 5]
+ - obj_init_y_range=[-0.02, 0.42, 5]
+ - env_options:
+ - lr_switch: (コーラのinitial orientation)横方向に寝かせたような姿勢
+ - upright: (コーラのinitial orientation)直立した姿勢
+ - laid_vertically:(コーラのinitial orientation) 縦方向に寝かせた姿勢
+ - urdf_version
+ - env_name: GraspSingleOpenedCokeCanInScene-v0
+ - scene_name: google_pick_coke_can_1_v4
+ - rgb_overlay_path: ./ManiSkill2_real2sim/data/real_inpainting/google_coke_can_real_eval_1.png
+- pick_coke_can_variant_agg
+ - robot_init_x_range=[0.35, 0.35, 1]
+ - robot_init_y_range=[0.20, 0.20, 1]
+ - obj_init_x_range=[-0.35, -0.12, 5]
+ - obj_init_y_range=[-0.02, 0.42, 5]
+ - env_options:
+ - lr_switch, upright, laid_vertically
+ - distractor_config = "more, less" (障害物として追加)
+ - slightly_darker_lighting, slightly_brighter_lighting
+ - env_name:
+ - GraspSingleOpenedCokeCanInScene-v0 (開封済みコカ・コーラ缶の把持)
+ - GraspSingleOpenedCokeCanDistractorInScene-v0 (妨害オブジェクト追加)
+ - GraspSingleOpenedCokeCanAltGoogleCameraInScene-v0, GraspSingleOpenedCokeCanAltGoogleCamera2InScene-v0 (カメラアングル変更)
+ - scene_name:
+ - google_pick_coke_can_1_v4
+ - Baked_sc1_staging_objaverse_cabinet1_h870, Baked_sc1_staging_objaverse_cabinet2_h870
+ - google_pick_coke_can_1_v4_alt_background, google_pick_coke_can_1_v4_alt_background_2
+- move_near_variant_agg
+ - robot_init_x_range=[0.35, 0.35, 1]
+ - robot_init_y_range=[0.21, 0.21, 1]
+ - obj_variation_mode="episode"
+ - obj_episode_range=[0, 60]
+ - robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, -0.09, -0.09, 1]
+ - env_options:
+ - no_distractor: True (妨害オブジェクトなし)
+ - slightly_darker_lighting, slightly_brighter_lighting
+ - env_name:
+ - MoveNearGoogleInScene-v0 (オブジェクトを別のオブジェクトの近くに移動)
+ - MoveNearAltGoogleCameraInScene-v0, MoveNearAltGoogleCamera2InScene-v0 (カメラアングル変更)
+ - scene_name:
+ - google_pick_coke_can_1_v4
+ - google_pick_coke_can_1_v4_alt_background, google_pick_coke_can_1_v4_alt_background_2
+ - Baked_sc1_staging_objaverse_cabinet1_h870, Baked_sc1_staging_objaverse_cabinet2_h870
+- move_near_visual_matching
+ - robot_init_x_range=[0.35, 0.35, 1]
+ - robot_init_y_range=[0.21, 0.21, 1]
+ - obj_variation_mode="episode"
+ - obj_episode_range=[0, 60]
+ - robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, -0.09, -0.09, 1]
+ - env_options:
+ - urdf_version
+ - env_name: MoveNearGoogleBakedTexInScene-v0
+ - scene_name: google_pick_coke_can_1_v4
+ - rgb_overlay_path: ./ManiSkill2_real2sim/data/real_inpainting/google_move_near_real_eval_1.png
+ - additional_env_save_tags: "baked_except_bpb_orange"
+- put_in_drawer_variant_agg
+ - max_episode_steps=200
+ - robot_init_x_range=[0.65, 0.65, 1]
+ - robot_init_y_range=[-0.2, 0.2, 3]
+ - obj_init_x_range=[-0.08, -0.02, 3]
+ - obj_init_y_range=[-0.02, 0.08, 3]
+ - env_options:
+ - model_ids: "apple" (リンゴオブジェクトを引き出しに配置)
+ - shader_dir: "rt" (レイトレーシング)
+ - light_mode: "brighter", "darker" (照明調整)
+ - station_name: "mk_station2", "mk_station3" (異なるキャビネット)
+ - env_name: PlaceIntoClosedTopDrawerCustomInScene-v0 (閉じた上段引き出しにオブジェクトを配置)
+ - scene_name:
+ - frl_apartment_stage_simple (基本シーン、レイトレーシング有効)
+ - modern_bedroom_no_roof, modern_office_no_roof (背景変更)
+ - enable_raytracing=True
+- put_in_drawer_visual_matching
+ - max_episode_steps=200
+ - obj_init_x_range=[-0.08, -0.02, 3]
+ - obj_init_y_range=[-0.02, 0.08, 3]
+ - env_options:
+ - urdf_version
+ - station_name: "mk_station_recolor"
+ - light_mode: "simple"
+ - disable_bad_material: True
+ - model_ids: "baked_apple_v2"
+ - env_name: PlaceIntoClosedTopDrawerCustomInScene-v0
+ - scene_name: dummy_drawer
+ - overlay_poses: 3つの異なる位置(A0, B0, C0)
+ - A0: robot_init=[0.644, -0.179], rot=-0.03
+ - B0: robot_init=[0.652, 0.009], rot=0
+ - C0: robot_init=[0.665, 0.224], rot=0
+ - rgb_overlay_path: ./ManiSkill2_real2sim/data/real_inpainting/open_drawer_{a0,b0,c0}.png
+- drawer_variant_agg
+ - max_episode_steps=113
+ - robot_init_x_range=[0.65, 0.85, 3]
+ - robot_init_y_range=[-0.2, 0.2, 3]
+ - obj_init_x_range=[0, 0, 1], obj_init_y_range=[0, 0, 1]
+ - env_options:
+ - shader_dir: "rt"
+ - light_mode: "brighter", "darker"
+ - station_name: "mk_station2", "mk_station3"
+ - env_name:
+ - OpenTopDrawerCustomInScene-v0, OpenMiddleDrawerCustomInScene-v0, OpenBottomDrawerCustomInScene-v0 (引き出しを開く)
+ - CloseTopDrawerCustomInScene-v0, CloseMiddleDrawerCustomInScene-v0, CloseBottomDrawerCustomInScene-v0 (引き出しを閉じる)
+ - scene_name:
+ - frl_apartment_stage_simple (基本シーン、レイトレーシング有効)
+ - modern_bedroom_no_roof, modern_office_no_roof (背景変更)
+ - enable_raytracing=True
+- drawer_visual_matching
+ - max_episode_steps=113
+ - obj_init_x_range=[0, 0, 1], obj_init_y_range=[0, 0, 1]
+ - env_options:
+ - urdf_version
+ - station_name: "mk_station_recolor"
+ - light_mode: "simple"
+ - disable_bad_material: True
+ - env_name: 上記のdrawer_variant_aggと同じ6つの環境
+ - scene_name: dummy_drawer
+ - overlay_poses: 9つの異なる位置(A0/A1/A2/B0/B1/B2/C0/C1/C2)
+ - A系列: robot_init_y=[-0.179, -0.182, -0.203], robot_init_x=[0.644, 0.765, 0.889]
+ - B系列: robot_init_y=[0.009, 0.009, 0.035], robot_init_x=[0.652, 0.752, 0.851]
+ - C系列: robot_init_y=[0.224, 0.222, 0.222], robot_init_x=[0.665, 0.765, 0.865]
+ - rgb_overlay_path: ./ManiSkill2_real2sim/data/real_inpainting/open_drawer_{a0,a1,a2,b0,b1,b2,c0,c1,c2}.png
+
+
+
+**grasp_single**
+- GraspSingleInSceneEnv (CustomSceneEnv)
+ - options
+ - require_lifting_obj_for_success: bool = True
+ - success_from_episode_stats: bool = True
+ - distractor_model_ids: Optional[List[str]] = None
+ - slightly_darker_lighting: bool = False
+ - slightly_brighter_lighting: bool = False
+ - darker_lighting: bool = False
+ - prepackaged_config: bool = False
+ - GraspSingleCustomInSceneEnv (GraspSingleInSceneEnv, CustomOtherObjectsInSceneEnv)
+ - GraspSingleCustomOrientationInSceneEnv (GraspSingleCustomInSceneEnv)
+ - options
+ - upright: bool = False (オブジェクトを直立させる)
+ - laid_vertically: bool = False (オブジェクトを縦方向に寝かせる)
+ - lr_switch: bool = False (オブジェクトを左右反転させる)
+ - GraspSingleRandomObjectInScene-v0 (GraspSingleCustomOrientationInSceneEnv)
+ - model_ids (固定)
+ - opened_pepsi_can, opened_coke_can, opened_sprite_can, opened_fanta_can
+ - opened_redbull_can, blue_plastic_bottle, apple, orange, sponge
+ - bridge_spoon_generated_modified, bridge_carrot_generated_modified
+ - green_cube_3cm, yellow_cube_3cm, eggplant
+ - GraspSingleCokeCanInScene-v0 (GraspSingleCustomOrientationInSceneEnv)
+ - model_ids (固定): coke_can
+ - GraspSingleOpenedCokeCanInScene-v0 (GraspSingleCustomOrientationInSceneEnv)
+ - model_ids (固定): opened_coke_can
+ - GraspSingleAltDensityOpenedCokeCanInScene-v0 (GraspSingleOpenedCokeCanInSceneEnv)
+ - density=100 (元の密度50から変更、空の開封済み缶の質量20gに対応)
+ - GraspSingleDummy-v0 (GraspSingleOpenedCokeCanInSceneEnv)
+ - robot_init_options (固定)
+ - init_xy: [100.0, 100.0] (ロボットを遠い位置に配置)
+ - init_height: 50.0
+ - GraspSingleOpenedCokeCanAltGoogleCameraInScene-v0 (GraspSingleOpenedCokeCanInSceneEnv)
+ - robot_init_options (固定)
+ - qpos: [..., -0.00285961, 0.9351361] (カメラ向き変更)
+ - GraspSingleOpenedCokeCanAltGoogleCamera2InScene-v0 (GraspSingleOpenedCokeCanInSceneEnv)
+ - robot_init_options (固定)
+ - qpos: [..., -0.00285961, 0.6651361] (別のカメラ向き)
+ - GraspSingleOpenedCokeCanDistractorInScene-v0 (GraspSingleOpenedCokeCanInSceneEnv)
+ - options
+ - distractor_config: "less" or "more" (妨害オブジェクトの量)
+ - GraspSinglePepsiCanInScene-v0 (GraspSingleCustomOrientationInSceneEnv)
+ - model_ids (固定): pepsi_can
+ - GraspSingleOpenedPepsiCanInScene-v0 (GraspSingleCustomOrientationInSceneEnv)
+ - model_ids (固定): opened_pepsi_can
+ - GraspSingle7upCanInScene-v0 (GraspSingleCustomOrientationInSceneEnv)
+ - model_ids (固定): 7up_can
+ - GraspSingleOpened7upCanInScene-v0 (GraspSingleCustomOrientationInSceneEnv)
+ - model_ids (固定): opened_7up_can
+ - GraspSingleSpriteCanInScene-v0 (GraspSingleCustomOrientationInSceneEnv)
+ - model_ids (固定): sprite_can
+ - GraspSingleOpenedSpriteCanInScene-v0 (GraspSingleCustomOrientationInSceneEnv)
+ - model_ids (固定): opened_sprite_can
+ - GraspSingleFantaCanInScene-v0 (GraspSingleCustomOrientationInSceneEnv)
+ - model_ids (固定): fanta_can
+ - GraspSingleOpenedFantaCanInScene-v0 (GraspSingleCustomOrientationInSceneEnv)
+ - model_ids (固定): opened_fanta_can
+ - GraspSingleRedBullCanInScene-v0 (GraspSingleCustomOrientationInSceneEnv)
+ - model_ids (固定): redbull_can
+ - GraspSingleOpenedRedBullCanInScene-v0 (GraspSingleCustomOrientationInSceneEnv)
+ - model_ids (固定): opened_redbull_can
+ - GraspSingleBluePlasticBottleInScene-v0 (GraspSingleCustomOrientationInSceneEnv)
+ - model_ids (固定): blue_plastic_bottle
+ - GraspSingleAppleInScene-v0 (GraspSingleCustomInSceneEnv)
+ - model_ids (固定): apple
+ - GraspSingleOrangeInScene-v0 (GraspSingleCustomInSceneEnv)
+ - model_ids (固定): orange
+ - GraspSingleSpongeInScene-v0 (GraspSingleCustomInSceneEnv)
+ - model_ids (固定): sponge
+ - GraspSingleBridgeSpoonInScene-v0 (GraspSingleCustomInSceneEnv)
+ - model_ids (固定): bridge_spoon_generated_modified
+
+**move_near**
+- MoveNearInSceneEnv (CustomSceneEnv)
+ - options
+ - original_lighting: bool = False
+ - slightly_darker_lighting: bool = False
+ - slightly_brighter_lighting: bool = False
+ - ambient_only_lighting: bool = False
+ - prepackaged_config: bool = False
+ - MoveNearGoogleInScene-v0 (MoveNearInSceneEnv, CustomOtherObjectsInSceneEnv)
+ - options
+ - no_distractor: bool = False (妨害オブジェクトなしモード)
+ - MoveNearGoogleBakedTexInScene-v0 (MoveNearGoogleInSceneEnv)
+ - DEFAULT_MODEL_JSON: "info_pick_custom_baked_tex_v0.json"
+ - MoveNearGoogleBakedTexInSceneEnvV1 (MoveNearGoogleInSceneEnv)
+ - DEFAULT_MODEL_JSON: "info_pick_custom_baked_tex_v1.json"
+ - options
+ - light_mode: "simple", "simple2" or None
+ - MoveNearAltGoogleCameraInScene-v0 (MoveNearGoogleInSceneEnv)
+ - robot_init_options (固定)
+ - qpos: [..., -0.00285961, 0.9351361] (カメラ向き変更)
+ - MoveNearAltGoogleCamera2InScene-v0 (MoveNearGoogleInSceneEnv)
+ - robot_init_options (固定)
+ - qpos: [..., -0.00285961, 0.6651361] (別のカメラ向き)
+
+**open_drawer**
+- OpenDrawerInSceneEnv (CustomSceneEnv)
+ - options
+ - light_mode: Optional[str] = None
+ - camera_mode: Optional[str] = None
+ - station_name: str = "mk_station" (キャビネットの種類)
+ - cabinet_joint_friction: float = 0.05
+ - prepackaged_config: bool = False
+ - OpenDrawerCustomInScene-v0 (OpenDrawerInSceneEnv, CustomOtherObjectsInSceneEnv)
+ - drawer_ids: ["top", "middle", "bottom"]
+ - OpenTopDrawerCustomInScene-v0 (OpenDrawerCustomInSceneEnv)
+ - drawer_ids: ["top"]
+ - OpenMiddleDrawerCustomInScene-v0 (OpenDrawerCustomInSceneEnv)
+ - drawer_ids: ["middle"]
+ - OpenBottomDrawerCustomInScene-v0 (OpenDrawerCustomInSceneEnv)
+ - drawer_ids: ["bottom"]
+
+**close_drawer**
+- CloseDrawerInSceneEnv (OpenDrawerInSceneEnv)
+ - 引き出しを閉じるタスク(初期状態で引き出しが開いている)
+ - CloseDrawerCustomInScene-v0 (CloseDrawerInSceneEnv, CustomOtherObjectsInSceneEnv)
+ - drawer_ids: ["top", "middle", "bottom"]
+ - CloseTopDrawerCustomInScene-v0 (CloseDrawerCustomInSceneEnv)
+ - drawer_ids: ["top"]
+ - CloseMiddleDrawerCustomInScene-v0 (CloseDrawerCustomInSceneEnv)
+ - drawer_ids: ["middle"]
+ - CloseBottomDrawerCustomInScene-v0 (CloseDrawerCustomInSceneEnv)
+ - drawer_ids: ["bottom"]
+
+**place_object_in_closed_drawer**
+- PlaceObjectInClosedDrawerInSceneEnv (OpenDrawerInSceneEnv)
+ - options
+ - force_advance_subtask_time_steps: int = 100 (強制的に次のサブタスクに進むまでのステップ数)
+ - PlaceIntoClosedDrawerCustomInScene-v0 (PlaceObjectInClosedDrawerInSceneEnv, CustomOtherObjectsInSceneEnv)
+ - DEFAULT_MODEL_JSON: "info_pick_custom_baked_tex_v1.json"
+ - drawer_ids: ["top", "middle", "bottom"]
+ - PlaceIntoClosedTopDrawerCustomInScene-v0 (PlaceIntoClosedDrawerCustomInSceneEnv)
+ - drawer_ids: ["top"]
+ - PlaceIntoClosedMiddleDrawerCustomInScene-v0 (PlaceIntoClosedDrawerCustomInSceneEnv)
+ - drawer_ids: ["middle"]
+ - PlaceIntoClosedBottomDrawerCustomInScene-v0 (PlaceIntoClosedDrawerCustomInSceneEnv)
+ - drawer_ids: ["bottom"]
+ - PlaceIntoClosedTopDrawerCustomInScene-v1 (PlaceIntoClosedDrawerCustomInSceneEnv)
+ - drawer_ids: ["top"]
+ - 引き出しが自動的に開かれたら次のサブタスクに進む
+
+**put_on**
+- PutOnInSceneEnv (MoveNearInSceneEnv)
+ - オブジェクトを別のオブジェクトの上に置くタスク
+ - PutOnBridgeInSceneEnv (PutOnInSceneEnv, CustomBridgeObjectsInSceneEnv)
+ - options
+ - source_obj_name: str = None (移動するオブジェクト)
+ - target_obj_name: str = None (目標オブジェクト)
+ - xy_configs: List[np.ndarray] = None (位置設定)
+ - quat_configs: List[np.ndarray] = None (回転設定)
+ - PutSpoonOnTableClothInScene-v0 (PutOnBridgeInSceneEnv)
+ - source_obj_name (固定): "bridge_spoon_generated_modified"
+ - target_obj_name (固定): "table_cloth_generated_shorter"
+ - 部分的にテーブルクロスの上にあれば成功とみなす
+ - PutCarrotOnPlateInScene-v0 (PutOnBridgeInSceneEnv)
+ - source_obj_name (固定): "bridge_carrot_generated_modified"
+ - target_obj_name (固定): "bridge_plate_objaverse_larger"
+ - StackGreenCubeOnYellowCubeInScene-v0 (PutOnBridgeInSceneEnv)
+ - source_obj_name (固定): "green_cube_3cm"
+ - target_obj_name (固定): "yellow_cube_3cm"
+ - StackGreenCubeOnYellowCubeBakedTexInScene-v0 (StackGreenCubeOnYellowCubeInScene)
+ - DEFAULT_MODEL_JSON: "info_bridge_custom_baked_tex_v0.json"
+ - source_obj_name (固定): "baked_green_cube_3cm"
+ - target_obj_name (固定): "baked_yellow_cube_3cm"
+ - PutEggplantInBasketScene-v0 (PutOnBridgeInSceneEnv)
+ - source_obj_name (固定): "eggplant"
+ - target_obj_name (固定): "dummy_sink_target_plane" (見えないターゲット平面)
+ - robot (固定): "widowx_sink_camera_setup"
+ - scene_name (固定): "bridge_table_1_v2"
+ - rgb_always_overlay_objects: ['sink', 'dummy_sink_target_plane']
+
+**base_env**
+- CustomSceneEnv (BaseEnv)
+ - options
+ - robot: str = "google_robot_static"
+ - rgb_overlay_path: Optional[str] = None
+ - rgb_overlay_cameras: list = []
+ - rgb_overlay_mode: str = 'background'
+ - rgb_always_overlay_objects: List[str] = []
+ - disable_bad_material: bool = False
+ - asset_root: Optional[str] = None
+ - scene_root: Optional[str] = None
+ - scene_name: Optional[str] = None
+ - scene_offset: Optional[List[float]] = None
+ - scene_pose: Optional[List[float]] = None
+ - scene_table_height: float = 0.87
+ - model_json: Optional[str] = None
+ - model_ids: List[str] = ()
+ - model_db_override: Dict[str, Dict] = {}
+ - urdf_version: str = ""
+ - CustomOtherObjectsInSceneEnv (CustomSceneEnv)
+ - DEFAULT_ASSET_ROOT: "{ASSET_DIR}/custom"
+ - DEFAULT_SCENE_ROOT: "{ASSET_DIR}/hab2_bench_assets"
+ - DEFAULT_MODEL_JSON: "info_pick_custom_v0.json"
+ - obj_static_friction: 0.5
+ - obj_dynamic_friction: 0.5
+ - CustomBridgeObjectsInSceneEnv (CustomOtherObjectsInSceneEnv)
+ - DEFAULT_MODEL_JSON: "info_bridge_custom_v0.json"
\ No newline at end of file
diff --git a/simpler_env/evaluation/config.py b/simpler_env/evaluation/config.py
index fc6e481a..ec258fd7 100644
--- a/simpler_env/evaluation/config.py
+++ b/simpler_env/evaluation/config.py
@@ -34,7 +34,7 @@ class ManiSkill2Config:
rgb_overlay_path: Optional[str] = None
# Robot initialization ranges
- robot_init_x_range: List[float] = field(default_factory=lambda: [0.35, 0.35, 1])
+ robot_init_x_range: List[float] = field(default_factory=lambda: [0.35, 0.35, 1]) # start, end, len
robot_init_y_range: List[float] = field(default_factory=lambda: [0.20, 0.20, 1])
robot_init_rot_quat_center: List[float] = field(default_factory=lambda: [1, 0, 0, 0])
robot_init_rot_rpy_range: List[float] = field(default_factory=lambda: [0, 0, 1, 0, 0, 1, 0, 0, 1])
diff --git a/simpler_env/evaluation/evaluate.py b/simpler_env/evaluation/evaluate.py
index aa7ebb61..42e78a26 100644
--- a/simpler_env/evaluation/evaluate.py
+++ b/simpler_env/evaluation/evaluate.py
@@ -34,51 +34,49 @@ def _run_single_evaluation(env_policy: AiroaBasePolicy, cfg: ManiSkill2Config, c
return success_arr
-# def pick_coke_can_visual_matching(env_policy: AiroaBasePolicy, ckpt_path: str) -> List[List[bool]]:
-# print("\n--- pick_coke_can_visual_matching ---")
-# results: List[List[bool]] = []
-
-# coke_can_options_arr = [
-# {"lr_switch": True},
-# {"upright": True},
-# {"laid_vertically": True},
-# ]
-# urdf_versions = [None, "recolor_tabletop_visual_matching_1", "recolor_tabletop_visual_matching_2", "recolor_cabinet_visual_matching_1"]
-
-# base_kwargs = dict(
-# robot="google_robot_static",
-# policy_setup="google_robot",
-# control_freq=3,
-# sim_freq=513,
-# max_episode_steps=80,
-# ckpt_path=ckpt_path,
-# robot_init_x_range=[0.35, 0.35, 1],
-# robot_init_y_range=[0.20, 0.20, 1],
-# obj_init_x_range=[-0.35, -0.12, 5],
-# obj_init_y_range=[-0.02, 0.42, 5],
-# robot_init_rot_quat_center=[0, 0, 0, 1],
-# robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, 0, 0, 1],
-# )
-
-# for urdf in urdf_versions:
-# for opt in coke_can_options_arr:
-# cfg = ManiSkill2Config(
-# **base_kwargs,
-# env_name="PickColaAndPlaceInDrawer-v0",
-# scene_name="google_pick_coke_can_1_v4",
-# rgb_overlay_path="./ManiSkill2_real2sim/data/real_inpainting/google_coke_can_real_eval_1.png",
-# additional_env_build_kwargs={**opt, "urdf_version": urdf},
-# )
-# results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
-
-# return results
-
-
-# ----------------------------------------------------------------------
-# 1) pick_coke_can_variant_agg.sh
-# ----------------------------------------------------------------------
-def pick_coke_can_variant_agg(env_policy: AiroaBasePolicy, ckpt_path: str) -> List[List[bool]]:
- print("\n--- pick_coke_can_variant_agg ---")
+def pick_object_visual_matching(env_policy: AiroaBasePolicy, ckpt_path: str) -> List[List[bool]]:
+ print("\n--- pick_object_visual_matching ---")
+ results: List[List[bool]] = []
+
+ direction_options_arr = [
+ {"lr_switch": True},
+ {"upright": True},
+ {"laid_vertically": True},
+ ]
+
+ urdf_versions = [None, "recolor_tabletop_visual_matching_1", "recolor_tabletop_visual_matching_2", "recolor_cabinet_visual_matching_1"]
+
+ base_kwargs = dict(
+ robot="google_robot_static",
+ policy_setup="google_robot",
+ control_freq=3,
+ sim_freq=513,
+ max_episode_steps=80,
+ ckpt_path=ckpt_path,
+ robot_init_x_range=[0.35, 0.35, 1],
+ robot_init_y_range=[0.20, 0.20, 1],
+ obj_init_x_range=[-0.35, -0.12, 8],
+ obj_init_y_range=[-0.02, 0.42, 8],
+ robot_init_rot_quat_center=[0, 0, 0, 1],
+ robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, 0, 0, 1],
+ )
+
+ for urdf in urdf_versions:
+ for opt in direction_options_arr:
+ cfg = ManiSkill2Config(
+ **base_kwargs,
+ env_name="GraspSingleRandomObjectInScene-v0",
+ scene_name="google_pick_coke_can_1_v4",
+ rgb_overlay_path="./ManiSkill2_real2sim/data/real_inpainting/google_coke_can_real_eval_1.png",
+ additional_env_build_kwargs={**opt, "urdf_version": urdf},
+ )
+ results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
+
+ return results
+
+
+def pick_object_variant_agg(env_policy: AiroaBasePolicy, ckpt_path: str) -> List[List[bool]]:
+ print("\n--- pick_object_variant_agg ---")
results: List[List[bool]] = []
coke_can_options_arr = [
@@ -106,7 +104,7 @@ def pick_coke_can_variant_agg(env_policy: AiroaBasePolicy, ckpt_path: str) -> Li
for opt in coke_can_options_arr:
cfg = ManiSkill2Config(
**base_kwargs,
- env_name="GraspSingleOpenedCokeCanInScene-v0",
+ env_name="GraspSingleRandomObjectInScene-v0",
scene_name="google_pick_coke_can_1_v4",
additional_env_build_kwargs={**opt},
)
@@ -118,37 +116,19 @@ def pick_coke_can_variant_agg(env_policy: AiroaBasePolicy, ckpt_path: str) -> Li
for opt in coke_can_options_arr:
cfg = ManiSkill2Config(
**base_kwargs,
- env_name="GraspSingleOpenedCokeCanInScene-v0",
+ env_name="GraspSingleRandomObjectInScene-v0",
scene_name=scene,
additional_env_build_kwargs={**opt},
)
results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
- # distractors (normal + more)
- for opt in coke_can_options_arr:
- cfg = ManiSkill2Config(
- **base_kwargs,
- env_name="GraspSingleOpenedCokeCanDistractorInScene-v0",
- scene_name="google_pick_coke_can_1_v4",
- additional_env_build_kwargs={**opt},
- )
- results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
-
- cfg = ManiSkill2Config(
- **base_kwargs,
- env_name="GraspSingleOpenedCokeCanDistractorInScene-v0",
- scene_name="google_pick_coke_can_1_v4",
- additional_env_build_kwargs={**opt, "distractor_config": "more"},
- )
- results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
-
# backgrounds
bg_scenes = ["google_pick_coke_can_1_v4_alt_background", "google_pick_coke_can_1_v4_alt_background_2"]
for scene in bg_scenes:
for opt in coke_can_options_arr:
cfg = ManiSkill2Config(
**base_kwargs,
- env_name="GraspSingleOpenedCokeCanInScene-v0",
+ env_name="GraspSingleRandomObjectInScene-v0",
scene_name=scene,
additional_env_build_kwargs={**opt},
)
@@ -158,7 +138,7 @@ def pick_coke_can_variant_agg(env_policy: AiroaBasePolicy, ckpt_path: str) -> Li
for opt in coke_can_options_arr:
cfg = ManiSkill2Config(
**base_kwargs,
- env_name="GraspSingleOpenedCokeCanInScene-v0",
+ env_name="GraspSingleRandomObjectInScene-v0",
scene_name="google_pick_coke_can_1_v4",
additional_env_build_kwargs={**opt, "slightly_darker_lighting": True},
)
@@ -166,14 +146,14 @@ def pick_coke_can_variant_agg(env_policy: AiroaBasePolicy, ckpt_path: str) -> Li
cfg = ManiSkill2Config(
**base_kwargs,
- env_name="GraspSingleOpenedCokeCanInScene-v0",
+ env_name="GraspSingleRandomObjectInScene-v0",
scene_name="google_pick_coke_can_1_v4",
additional_env_build_kwargs={**opt, "slightly_brighter_lighting": True},
)
results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
# camera orientations
- alt_envs = ["GraspSingleOpenedCokeCanAltGoogleCameraInScene-v0", "GraspSingleOpenedCokeCanAltGoogleCamera2InScene-v0"]
+ alt_envs = ["GraspSingleRandomObjectAltGoogleCameraInScene-v0", "GraspSingleRandomObjectAltGoogleCamera2InScene-v0"]
for env in alt_envs:
for opt in coke_can_options_arr:
cfg = ManiSkill2Config(
@@ -187,11 +167,8 @@ def pick_coke_can_variant_agg(env_policy: AiroaBasePolicy, ckpt_path: str) -> Li
return results
-# ----------------------------------------------------------------------
-# 2) pick_coke_can_visual_matching.sh
-# ----------------------------------------------------------------------
-def pick_coke_can_visual_matching(env_policy: AiroaBasePolicy, ckpt_path: str) -> List[List[bool]]:
- print("\n--- pick_coke_can_visual_matching ---")
+def pick_object_among_visual_matching(env_policy: AiroaBasePolicy, ckpt_path: str) -> List[List[bool]]:
+ print("\n--- pick_object_among_visual_matching ---")
results: List[List[bool]] = []
coke_can_options_arr = [
@@ -199,6 +176,7 @@ def pick_coke_can_visual_matching(env_policy: AiroaBasePolicy, ckpt_path: str) -
{"upright": True},
{"laid_vertically": True},
]
+
urdf_versions = [None, "recolor_tabletop_visual_matching_1", "recolor_tabletop_visual_matching_2", "recolor_cabinet_visual_matching_1"]
base_kwargs = dict(
@@ -220,181 +198,120 @@ def pick_coke_can_visual_matching(env_policy: AiroaBasePolicy, ckpt_path: str) -
for opt in coke_can_options_arr:
cfg = ManiSkill2Config(
**base_kwargs,
- env_name="GraspSingleOpenedCokeCanInScene-v0",
+ env_name="GraspSingleRandomObjectDistractorInScene-v0",
scene_name="google_pick_coke_can_1_v4",
rgb_overlay_path="./ManiSkill2_real2sim/data/real_inpainting/google_coke_can_real_eval_1.png",
- additional_env_build_kwargs={**opt, "urdf_version": urdf},
+ additional_env_build_kwargs={**opt, "urdf_version": urdf, "distractor_config": "less"},
)
results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
return results
-# ----------------------------------------------------------------------
-# 3) move_near_variant_agg.sh
-# ----------------------------------------------------------------------
-def move_near_variant_agg(env_policy: AiroaBasePolicy, ckpt_path: str) -> List[List[bool]]:
- print("\n--- move_near_variant_agg ---")
+def pick_object_among_variant_agg(env_policy: AiroaBasePolicy, ckpt_path: str) -> List[List[bool]]:
+ print("\n--- pick_object_among_variant_agg ---")
results: List[List[bool]] = []
+ coke_can_options_arr = [
+ {"lr_switch": True},
+ {"upright": True},
+ {"laid_vertically": True},
+ ]
+
base_kwargs = dict(
robot="google_robot_static",
policy_setup="google_robot",
control_freq=3,
sim_freq=513,
max_episode_steps=80,
+ ckpt_path=ckpt_path,
robot_init_x_range=[0.35, 0.35, 1],
- robot_init_y_range=[0.21, 0.21, 1],
- obj_variation_mode="episode",
- obj_episode_range=[0, 60],
+ robot_init_y_range=[0.20, 0.20, 1],
+ obj_init_x_range=[-0.35, -0.12, 5],
+ obj_init_y_range=[-0.02, 0.42, 5],
robot_init_rot_quat_center=[0, 0, 0, 1],
- robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, -0.09, -0.09, 1],
- ckpt_path=ckpt_path,
+ robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, 0, 0, 1],
)
# base
- cfg = ManiSkill2Config(**base_kwargs, env_name="MoveNearGoogleInScene-v0", scene_name="google_pick_coke_can_1_v4")
- results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
-
- # distractor(no_distractor=True)
- cfg = ManiSkill2Config(
- **base_kwargs,
- env_name="MoveNearGoogleInScene-v0",
- scene_name="google_pick_coke_can_1_v4",
- additional_env_build_kwargs={"no_distractor": True},
- )
- results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
-
- # backgrounds
- for scene in ["google_pick_coke_can_1_v4_alt_background", "google_pick_coke_can_1_v4_alt_background_2"]:
- cfg = ManiSkill2Config(**base_kwargs, env_name="MoveNearGoogleInScene-v0", scene_name=scene)
- results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
-
- # lighting
- for k in [{"slightly_darker_lighting": True}, {"slightly_brighter_lighting": True}]:
+ for opt in coke_can_options_arr:
cfg = ManiSkill2Config(
**base_kwargs,
- env_name="MoveNearGoogleInScene-v0",
+ env_name="GraspSingleRandomObjectDistractorInScene-v0",
scene_name="google_pick_coke_can_1_v4",
- additional_env_build_kwargs=k,
+ additional_env_build_kwargs={**opt, "distractor_config": "less"},
)
results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
# table textures (baked)
- for scene in ["Baked_sc1_staging_objaverse_cabinet1_h870", "Baked_sc1_staging_objaverse_cabinet2_h870"]:
- cfg = ManiSkill2Config(**base_kwargs, env_name="MoveNearGoogleInScene-v0", scene_name=scene)
- results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
-
- # camera orientations
- for env in ["MoveNearAltGoogleCameraInScene-v0", "MoveNearAltGoogleCamera2InScene-v0"]:
- cfg = ManiSkill2Config(**base_kwargs, env_name=env, scene_name="google_pick_coke_can_1_v4")
- results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
-
- return results
-
-
-# ----------------------------------------------------------------------
-# 4) move_near_visual_matching.sh
-# ----------------------------------------------------------------------
-def move_near_visual_matching(env_policy: AiroaBasePolicy, ckpt_path: str) -> List[List[bool]]:
- print("\n--- move_near_visual_matching ---")
- results: List[List[bool]] = []
+ baked_scenes = ["Baked_sc1_staging_objaverse_cabinet1_h870", "Baked_sc1_staging_objaverse_cabinet2_h870"]
+ for scene in baked_scenes:
+ for opt in coke_can_options_arr:
+ cfg = ManiSkill2Config(
+ **base_kwargs,
+ env_name="GraspSingleRandomObjectDistractorInScene-v0",
+ scene_name=scene,
+ additional_env_build_kwargs={**opt, "distractor_config": "less"},
+ )
+ results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
- base_kwargs = dict(
- robot="google_robot_static",
- policy_setup="google_robot",
- control_freq=3,
- sim_freq=513,
- max_episode_steps=80,
- robot_init_x_range=[0.35, 0.35, 1],
- robot_init_y_range=[0.21, 0.21, 1],
- obj_variation_mode="episode",
- obj_episode_range=[0, 60],
- robot_init_rot_quat_center=[0, 0, 0, 1],
- robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, -0.09, -0.09, 1],
- ckpt_path=ckpt_path,
- )
+ # backgrounds
+ bg_scenes = ["google_pick_coke_can_1_v4_alt_background", "google_pick_coke_can_1_v4_alt_background_2"]
+ for scene in bg_scenes:
+ for opt in coke_can_options_arr:
+ cfg = ManiSkill2Config(
+ **base_kwargs,
+ env_name="GraspSingleRandomObjectDistractorInScene-v0",
+ scene_name=scene,
+ additional_env_build_kwargs={**opt, "distractor_config": "less"},
+ )
+ results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
- urdf_versions = [None, "recolor_tabletop_visual_matching_1", "recolor_tabletop_visual_matching_2", "recolor_cabinet_visual_matching_1"]
- for urdf in urdf_versions:
+ # lightings (darker / brighter)
+ for opt in coke_can_options_arr:
cfg = ManiSkill2Config(
**base_kwargs,
- env_name="MoveNearGoogleBakedTexInScene-v0",
+ env_name="GraspSingleRandomObjectDistractorInScene-v0",
scene_name="google_pick_coke_can_1_v4",
- rgb_overlay_path="./ManiSkill2_real2sim/data/real_inpainting/google_move_near_real_eval_1.png",
- additional_env_save_tags="baked_except_bpb_orange",
- additional_env_build_kwargs={"urdf_version": urdf},
+ additional_env_build_kwargs={**opt, "slightly_darker_lighting": True, "distractor_config": "less"},
)
results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
- return results
-
-
-# ----------------------------------------------------------------------
-# 5) put_in_drawer_variant_agg.sh
-# ----------------------------------------------------------------------
-def put_in_drawer_variant_agg(env_policy: AiroaBasePolicy, ckpt_path: str) -> List[List[bool]]:
- print("\n--- put_in_drawer_variant_agg ---")
- results: List[List[bool]] = []
-
- common = dict(
- robot="google_robot_static",
- policy_setup="google_robot",
- control_freq=3,
- sim_freq=513,
- max_episode_steps=200,
- ckpt_path=ckpt_path,
- additional_env_build_kwargs={"model_ids": "apple"},
- robot_init_rot_quat_center=[0, 0, 0, 1],
- obj_init_x_range=[-0.08, -0.02, 3],
- obj_init_y_range=[-0.02, 0.08, 3],
- robot_init_x_range=[0.65, 0.65, 1],
- robot_init_y_range=[-0.2, 0.2, 3],
- robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, 0.0, 0.0, 1],
- )
-
- env_names = ["PlaceIntoClosedTopDrawerCustomInScene-v0"]
-
- # base (enable raytracing)
- for env_name in env_names:
- cfg = ManiSkill2Config(**common, env_name=env_name, scene_name="frl_apartment_stage_simple", enable_raytracing=True)
+ cfg = ManiSkill2Config(
+ **base_kwargs,
+ env_name="GraspSingleRandomObjectDistractorInScene-v0",
+ scene_name="google_pick_coke_can_1_v4",
+ additional_env_build_kwargs={**opt, "slightly_brighter_lighting": True, "distractor_config": "less"},
+ )
results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
- # backgrounds (shader_dir=rt)
- for scene in ["modern_bedroom_no_roof", "modern_office_no_roof"]:
- merged = common["additional_env_build_kwargs"].copy()
- merged["shader_dir"] = "rt"
- for env_name in env_names:
- cfg = ManiSkill2Config(**common, env_name=env_name, scene_name=scene, additional_env_build_kwargs=merged)
- results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
-
- # lightings
- for light_mode in ["brighter", "darker"]:
- merged = common["additional_env_build_kwargs"].copy()
- merged.update({"shader_dir": "rt", "light_mode": light_mode})
- for env_name in env_names:
- cfg = ManiSkill2Config(**common, env_name=env_name, scene_name="frl_apartment_stage_simple", additional_env_build_kwargs=merged)
- results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
-
- # new cabinets
- for station in ["mk_station2", "mk_station3"]:
- merged = common["additional_env_build_kwargs"].copy()
- merged.update({"shader_dir": "rt", "station_name": station})
- for env_name in env_names:
- cfg = ManiSkill2Config(**common, env_name=env_name, scene_name="frl_apartment_stage_simple", additional_env_build_kwargs=merged)
+ # camera orientations
+ alt_envs = ["GraspSingleRandomObjectDistractorAltGoogleCameraInScene-v0", "GraspSingleRandomObjectDistractorAltGoogleCamera2InScene-v0"]
+ for env in alt_envs:
+ for opt in coke_can_options_arr:
+ cfg = ManiSkill2Config(
+ **base_kwargs,
+ env_name=env,
+ scene_name="google_pick_coke_can_1_v4",
+ additional_env_build_kwargs={**opt, "distractor_config": "less"},
+ )
results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
return results
-# ----------------------------------------------------------------------
-# 6) put_in_drawer_visual_matching.sh
-# ----------------------------------------------------------------------
-def put_in_drawer_visual_matching(env_policy: AiroaBasePolicy, ckpt_path: str) -> List[List[bool]]:
- print("\n--- put_in_drawer_visual_matching ---")
+def drawer_visual_matching(env_policy: AiroaBasePolicy, ckpt_path: str) -> List[List[bool]]:
+ print("\n--- drawer_visual_matching ---")
results: List[List[bool]] = []
- env_names = ["PlaceIntoClosedTopDrawerCustomInScene-v0"]
+ env_names = [
+ "OpenTopDrawerCustomInScene-v0",
+ "OpenMiddleDrawerCustomInScene-v0",
+ "OpenBottomDrawerCustomInScene-v0",
+ "CloseTopDrawerCustomInScene-v0",
+ "CloseMiddleDrawerCustomInScene-v0",
+ "CloseBottomDrawerCustomInScene-v0",
+ ]
urdf_versions = ["recolor_cabinet_visual_matching_1", "recolor_tabletop_visual_matching_1", "recolor_tabletop_visual_matching_2", None]
base = dict(
@@ -402,38 +319,76 @@ def put_in_drawer_visual_matching(env_policy: AiroaBasePolicy, ckpt_path: str) -
policy_setup="google_robot",
control_freq=3,
sim_freq=513,
- max_episode_steps=200,
+ max_episode_steps=113,
ckpt_path=ckpt_path,
robot_init_rot_quat_center=[0, 0, 0, 1],
- obj_init_x_range=[-0.08, -0.02, 3],
- obj_init_y_range=[-0.02, 0.08, 3],
+ obj_init_x_range=[0, 0, 1],
+ obj_init_y_range=[0, 0, 1],
+ scene_name="dummy_drawer",
)
+ # 9 overlay poses (A0/A1/A2/B0/B1/B2/C0/C1/C2)
overlay_poses = [
- # A0
+ # A0/A1/A2
dict(
robot_init_x_range=[0.644, 0.644, 1],
robot_init_y_range=[-0.179, -0.179, 1],
robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, -0.03, -0.03, 1],
rgb_overlay_path="./ManiSkill2_real2sim/data/real_inpainting/open_drawer_a0.png",
),
- # B0
+ dict(
+ robot_init_x_range=[0.765, 0.765, 1],
+ robot_init_y_range=[-0.182, -0.182, 1],
+ robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, -0.02, -0.02, 1],
+ rgb_overlay_path="./ManiSkill2_real2sim/data/real_inpainting/open_drawer_a1.png",
+ ),
+ dict(
+ robot_init_x_range=[0.889, 0.889, 1],
+ robot_init_y_range=[-0.203, -0.203, 1],
+ robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, -0.06, -0.06, 1],
+ rgb_overlay_path="./ManiSkill2_real2sim/data/real_inpainting/open_drawer_a2.png",
+ ),
+ # B0/B1/B2
dict(
robot_init_x_range=[0.652, 0.652, 1],
robot_init_y_range=[0.009, 0.009, 1],
robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, 0, 0, 1],
rgb_overlay_path="./ManiSkill2_real2sim/data/real_inpainting/open_drawer_b0.png",
),
- # C0
+ dict(
+ robot_init_x_range=[0.752, 0.752, 1],
+ robot_init_y_range=[0.009, 0.009, 1],
+ robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, 0, 0, 1],
+ rgb_overlay_path="./ManiSkill2_real2sim/data/real_inpainting/open_drawer_b1.png",
+ ),
+ dict(
+ robot_init_x_range=[0.851, 0.851, 1],
+ robot_init_y_range=[0.035, 0.035, 1],
+ robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, 0, 0, 1],
+ rgb_overlay_path="./ManiSkill2_real2sim/data/real_inpainting/open_drawer_b2.png",
+ ),
+ # C0/C1/C2
dict(
robot_init_x_range=[0.665, 0.665, 1],
robot_init_y_range=[0.224, 0.224, 1],
robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, 0, 0, 1],
rgb_overlay_path="./ManiSkill2_real2sim/data/real_inpainting/open_drawer_c0.png",
),
+ dict(
+ robot_init_x_range=[0.765, 0.765, 1],
+ robot_init_y_range=[0.222, 0.222, 1],
+ robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, -0.025, -0.025, 1],
+ rgb_overlay_path="./ManiSkill2_real2sim/data/real_inpainting/open_drawer_c1.png",
+ ),
+ dict(
+ robot_init_x_range=[0.865, 0.865, 1],
+ robot_init_y_range=[0.222, 0.222, 1],
+ robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, -0.025, -0.025, 1],
+ rgb_overlay_path="./ManiSkill2_real2sim/data/real_inpainting/open_drawer_c2.png",
+ ),
]
- add_base = dict(station_name="mk_station_recolor", light_mode="simple", disable_bad_material=True, model_ids="baked_apple_v2")
+ add_base = dict(station_name="mk_station_recolor", light_mode="simple", disable_bad_material=True)
for urdf in urdf_versions:
add_kwargs = {**add_base, "urdf_version": urdf}
@@ -442,7 +397,6 @@ def put_in_drawer_visual_matching(env_policy: AiroaBasePolicy, ckpt_path: str) -
cfg = ManiSkill2Config(
**base,
env_name=env_name,
- scene_name="dummy_drawer",
additional_env_build_kwargs=add_kwargs,
**pose,
)
@@ -451,9 +405,6 @@ def put_in_drawer_visual_matching(env_policy: AiroaBasePolicy, ckpt_path: str) -
return results
-# ----------------------------------------------------------------------
-# 7) drawer_variant_agg.sh
-# ----------------------------------------------------------------------
def drawer_variant_agg(env_policy: AiroaBasePolicy, ckpt_path: str) -> List[List[bool]]:
print("\n--- drawer_variant_agg ---")
results: List[List[bool]] = []
@@ -511,21 +462,105 @@ def drawer_variant_agg(env_policy: AiroaBasePolicy, ckpt_path: str) -> List[List
return results
-# ----------------------------------------------------------------------
-# 8) drawer_visual_matching.sh
-# ----------------------------------------------------------------------
-def drawer_visual_matching(env_policy: AiroaBasePolicy, ckpt_path: str) -> List[List[bool]]:
- print("\n--- drawer_visual_matching ---")
+def move_near_variant_agg(env_policy: AiroaBasePolicy, ckpt_path: str) -> List[List[bool]]:
+ print("\n--- move_near_variant_agg ---")
results: List[List[bool]] = []
- env_names = [
- "OpenTopDrawerCustomInScene-v0",
- "OpenMiddleDrawerCustomInScene-v0",
- "OpenBottomDrawerCustomInScene-v0",
- "CloseTopDrawerCustomInScene-v0",
- "CloseMiddleDrawerCustomInScene-v0",
- "CloseBottomDrawerCustomInScene-v0",
- ]
+ base_kwargs = dict(
+ robot="google_robot_static",
+ policy_setup="google_robot",
+ control_freq=3,
+ sim_freq=513,
+ max_episode_steps=80,
+ robot_init_x_range=[0.35, 0.35, 1],
+ robot_init_y_range=[0.21, 0.21, 1],
+ obj_variation_mode="episode",
+ obj_episode_range=[0, 60],
+ robot_init_rot_quat_center=[0, 0, 0, 1],
+ robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, -0.09, -0.09, 1],
+ ckpt_path=ckpt_path,
+ )
+
+ # base
+ cfg = ManiSkill2Config(**base_kwargs, env_name="MoveNearGoogleInScene-v0", scene_name="google_pick_coke_can_1_v4")
+ results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
+
+ # distractor(no_distractor=True)
+ cfg = ManiSkill2Config(
+ **base_kwargs,
+ env_name="MoveNearGoogleInScene-v0",
+ scene_name="google_pick_coke_can_1_v4",
+ additional_env_build_kwargs={"no_distractor": True},
+ )
+ results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
+
+ # backgrounds
+ for scene in ["google_pick_coke_can_1_v4_alt_background", "google_pick_coke_can_1_v4_alt_background_2"]:
+ cfg = ManiSkill2Config(**base_kwargs, env_name="MoveNearGoogleInScene-v0", scene_name=scene)
+ results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
+
+ # lighting
+ for k in [{"slightly_darker_lighting": True}, {"slightly_brighter_lighting": True}]:
+ cfg = ManiSkill2Config(
+ **base_kwargs,
+ env_name="MoveNearGoogleInScene-v0",
+ scene_name="google_pick_coke_can_1_v4",
+ additional_env_build_kwargs=k,
+ )
+ results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
+
+ # table textures (baked)
+ for scene in ["Baked_sc1_staging_objaverse_cabinet1_h870", "Baked_sc1_staging_objaverse_cabinet2_h870"]:
+ cfg = ManiSkill2Config(**base_kwargs, env_name="MoveNearGoogleInScene-v0", scene_name=scene)
+ results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
+
+ # camera orientations
+ for env in ["MoveNearAltGoogleCameraInScene-v0", "MoveNearAltGoogleCamera2InScene-v0"]:
+ cfg = ManiSkill2Config(**base_kwargs, env_name=env, scene_name="google_pick_coke_can_1_v4")
+ results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
+
+ return results
+
+
+def move_near_visual_matching(env_policy: AiroaBasePolicy, ckpt_path: str) -> List[List[bool]]:
+ print("\n--- move_near_visual_matching ---")
+ results: List[List[bool]] = []
+
+ base_kwargs = dict(
+ robot="google_robot_static",
+ policy_setup="google_robot",
+ control_freq=3,
+ sim_freq=513,
+ max_episode_steps=80,
+ robot_init_x_range=[0.35, 0.35, 1],
+ robot_init_y_range=[0.21, 0.21, 1],
+ obj_variation_mode="episode",
+ obj_episode_range=[0, 60],
+ robot_init_rot_quat_center=[0, 0, 0, 1],
+ robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, -0.09, -0.09, 1],
+ ckpt_path=ckpt_path,
+ )
+
+ urdf_versions = [None, "recolor_tabletop_visual_matching_1", "recolor_tabletop_visual_matching_2", "recolor_cabinet_visual_matching_1"]
+ for urdf in urdf_versions:
+ cfg = ManiSkill2Config(
+ **base_kwargs,
+ env_name="MoveNearGoogleBakedTexInScene-v0",
+ scene_name="google_pick_coke_can_1_v4",
+ rgb_overlay_path="./ManiSkill2_real2sim/data/real_inpainting/google_move_near_real_eval_1.png",
+ additional_env_save_tags="baked_except_bpb_orange",
+ additional_env_build_kwargs={"urdf_version": urdf},
+ )
+ results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
+
+ return results
+
+
+def put_in_drawer_visual_matching(env_policy: AiroaBasePolicy, ckpt_path: str) -> List[List[bool]]:
+ print("\n--- put_in_drawer_visual_matching ---")
+ results: List[List[bool]] = []
+
+ env_names = ["PlaceIntoClosedTopDrawerCustomInScene-v0"]
urdf_versions = ["recolor_cabinet_visual_matching_1", "recolor_tabletop_visual_matching_1", "recolor_tabletop_visual_matching_2", None]
base = dict(
@@ -533,76 +568,38 @@ def drawer_visual_matching(env_policy: AiroaBasePolicy, ckpt_path: str) -> List[
policy_setup="google_robot",
control_freq=3,
sim_freq=513,
- max_episode_steps=113,
+ max_episode_steps=200,
ckpt_path=ckpt_path,
robot_init_rot_quat_center=[0, 0, 0, 1],
- obj_init_x_range=[0, 0, 1],
- obj_init_y_range=[0, 0, 1],
- scene_name="dummy_drawer",
+ obj_init_x_range=[-0.08, -0.02, 3],
+ obj_init_y_range=[-0.02, 0.08, 3],
)
- # 9 overlay poses (A0/A1/A2/B0/B1/B2/C0/C1/C2)
overlay_poses = [
- # A0/A1/A2
+ # A0
dict(
robot_init_x_range=[0.644, 0.644, 1],
robot_init_y_range=[-0.179, -0.179, 1],
robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, -0.03, -0.03, 1],
rgb_overlay_path="./ManiSkill2_real2sim/data/real_inpainting/open_drawer_a0.png",
),
- dict(
- robot_init_x_range=[0.765, 0.765, 1],
- robot_init_y_range=[-0.182, -0.182, 1],
- robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, -0.02, -0.02, 1],
- rgb_overlay_path="./ManiSkill2_real2sim/data/real_inpainting/open_drawer_a1.png",
- ),
- dict(
- robot_init_x_range=[0.889, 0.889, 1],
- robot_init_y_range=[-0.203, -0.203, 1],
- robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, -0.06, -0.06, 1],
- rgb_overlay_path="./ManiSkill2_real2sim/data/real_inpainting/open_drawer_a2.png",
- ),
- # B0/B1/B2
+ # B0
dict(
robot_init_x_range=[0.652, 0.652, 1],
robot_init_y_range=[0.009, 0.009, 1],
robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, 0, 0, 1],
rgb_overlay_path="./ManiSkill2_real2sim/data/real_inpainting/open_drawer_b0.png",
),
- dict(
- robot_init_x_range=[0.752, 0.752, 1],
- robot_init_y_range=[0.009, 0.009, 1],
- robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, 0, 0, 1],
- rgb_overlay_path="./ManiSkill2_real2sim/data/real_inpainting/open_drawer_b1.png",
- ),
- dict(
- robot_init_x_range=[0.851, 0.851, 1],
- robot_init_y_range=[0.035, 0.035, 1],
- robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, 0, 0, 1],
- rgb_overlay_path="./ManiSkill2_real2sim/data/real_inpainting/open_drawer_b2.png",
- ),
- # C0/C1/C2
+ # C0
dict(
robot_init_x_range=[0.665, 0.665, 1],
robot_init_y_range=[0.224, 0.224, 1],
robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, 0, 0, 1],
rgb_overlay_path="./ManiSkill2_real2sim/data/real_inpainting/open_drawer_c0.png",
),
- dict(
- robot_init_x_range=[0.765, 0.765, 1],
- robot_init_y_range=[0.222, 0.222, 1],
- robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, -0.025, -0.025, 1],
- rgb_overlay_path="./ManiSkill2_real2sim/data/real_inpainting/open_drawer_c1.png",
- ),
- dict(
- robot_init_x_range=[0.865, 0.865, 1],
- robot_init_y_range=[0.222, 0.222, 1],
- robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, -0.025, -0.025, 1],
- rgb_overlay_path="./ManiSkill2_real2sim/data/real_inpainting/open_drawer_c2.png",
- ),
]
- add_base = dict(station_name="mk_station_recolor", light_mode="simple", disable_bad_material=True)
+ add_base = dict(station_name="mk_station_recolor", light_mode="simple", disable_bad_material=True, model_ids="baked_apple_v2")
for urdf in urdf_versions:
add_kwargs = {**add_base, "urdf_version": urdf}
@@ -611,6 +608,7 @@ def drawer_visual_matching(env_policy: AiroaBasePolicy, ckpt_path: str) -> List[
cfg = ManiSkill2Config(
**base,
env_name=env_name,
+ scene_name="dummy_drawer",
additional_env_build_kwargs=add_kwargs,
**pose,
)
@@ -619,6 +617,60 @@ def drawer_visual_matching(env_policy: AiroaBasePolicy, ckpt_path: str) -> List[
return results
+def put_in_drawer_variant_agg(env_policy: AiroaBasePolicy, ckpt_path: str) -> List[List[bool]]:
+ print("\n--- put_in_drawer_variant_agg ---")
+ results: List[List[bool]] = []
+
+ common = dict(
+ robot="google_robot_static",
+ policy_setup="google_robot",
+ control_freq=3,
+ sim_freq=513,
+ max_episode_steps=200,
+ ckpt_path=ckpt_path,
+ additional_env_build_kwargs={"model_ids": "apple"},
+ robot_init_rot_quat_center=[0, 0, 0, 1],
+ obj_init_x_range=[-0.08, -0.02, 3],
+ obj_init_y_range=[-0.02, 0.08, 3],
+ robot_init_x_range=[0.65, 0.65, 1],
+ robot_init_y_range=[-0.2, 0.2, 3],
+ robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, 0.0, 0.0, 1],
+ )
+
+ env_names = ["PlaceIntoClosedTopDrawerCustomInScene-v0"]
+
+ # base (enable raytracing)
+ for env_name in env_names:
+ cfg = ManiSkill2Config(**common, env_name=env_name, scene_name="frl_apartment_stage_simple", enable_raytracing=True)
+ results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
+
+ # backgrounds (shader_dir=rt)
+ for scene in ["modern_bedroom_no_roof", "modern_office_no_roof"]:
+ merged = common["additional_env_build_kwargs"].copy()
+ merged["shader_dir"] = "rt"
+ for env_name in env_names:
+ cfg = ManiSkill2Config(**common, env_name=env_name, scene_name=scene, additional_env_build_kwargs=merged)
+ results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
+
+ # lightings
+ for light_mode in ["brighter", "darker"]:
+ merged = common["additional_env_build_kwargs"].copy()
+ merged.update({"shader_dir": "rt", "light_mode": light_mode})
+ for env_name in env_names:
+ cfg = ManiSkill2Config(**common, env_name=env_name, scene_name="frl_apartment_stage_simple", additional_env_build_kwargs=merged)
+ results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
+
+ # new cabinets
+ for station in ["mk_station2", "mk_station3"]:
+ merged = common["additional_env_build_kwargs"].copy()
+ merged.update({"shader_dir": "rt", "station_name": station})
+ for env_name in env_names:
+ cfg = ManiSkill2Config(**common, env_name=env_name, scene_name="frl_apartment_stage_simple", additional_env_build_kwargs=merged)
+ results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
+
+ return results
+
+
# ======================================================================
# 総合評価(重み付け・スコアリングは従来どおり)
# ======================================================================
@@ -633,19 +685,23 @@ def run_comprehensive_evaluation(env_policy: AiroaBasePolicy, ckpt_path: str) ->
print(f"Weights: Sim={SIM_WEIGHT}, VisualMatching={VISUAL_MATCHING_WEIGHT}")
print("=" * 80)
- # ビジュアルマッチング(overlay)
vm_results: List[List[bool]] = []
- vm_results += pick_coke_can_visual_matching(env_policy, ckpt_path)
- vm_results += move_near_visual_matching(env_policy, ckpt_path)
- vm_results += put_in_drawer_visual_matching(env_policy, ckpt_path)
+ sim_results: List[List[bool]] = []
+
+ # vm_results += pick_object_visual_matching(env_policy, ckpt_path)
+ sim_results += pick_object_variant_agg(env_policy, ckpt_path)
+
+ vm_results += pick_object_among_visual_matching(env_policy, ckpt_path)
+ sim_results += pick_object_among_variant_agg(env_policy, ckpt_path)
+
vm_results += drawer_visual_matching(env_policy, ckpt_path)
+ sim_results += drawer_variant_agg(env_policy, ckpt_path)
- # シミュレーション(variant_agg)
- sim_results: List[List[bool]] = []
- sim_results += pick_coke_can_variant_agg(env_policy, ckpt_path)
+ vm_results += move_near_visual_matching(env_policy, ckpt_path)
sim_results += move_near_variant_agg(env_policy, ckpt_path)
+
+ vm_results += put_in_drawer_visual_matching(env_policy, ckpt_path)
sim_results += put_in_drawer_variant_agg(env_policy, ckpt_path)
- sim_results += drawer_variant_agg(env_policy, ckpt_path)
# ロバストスコア
sim_score = calculate_robust_score(sim_results)
diff --git a/simpler_env/evaluation/maniskill2_evaluator.py b/simpler_env/evaluation/maniskill2_evaluator.py
index 8d6796df..c65b84f9 100644
--- a/simpler_env/evaluation/maniskill2_evaluator.py
+++ b/simpler_env/evaluation/maniskill2_evaluator.py
@@ -81,7 +81,9 @@ def run_maniskill2_eval_single_episode(
env_reset_options["obj_init_options"] = {
"episode_id": obj_episode_id,
}
- obs, _ = env.reset(options=env_reset_options)
+ episode_seed = np.random.randint(0, 100000)
+
+ obs, _ = env.reset(options=env_reset_options, seed=episode_seed)
# for long-horizon environments, we check if the current subtask is the final subtask
is_final_subtask = env.is_final_subtask()
From 6e832d2e75c78867008576df27166d1234e4799f Mon Sep 17 00:00:00 2001
From: Penrose0v0
Date: Thu, 28 Aug 2025 23:31:23 +0000
Subject: [PATCH 08/57] Init bridge challenge
---
scripts/openpi/challenge_bridge.py | 39 +++
scripts/openpi/task_bridge_1.py | 24 ++
scripts/openpi/task_bridge_2.py | 24 ++
scripts/openpi/task_bridge_3.py | 24 ++
scripts/openpi/task_bridge_4.py | 24 ++
simpler_env/evaluation/bridge_tasks.py | 107 ++++++
simpler_env/evaluation/random_envs.py | 440 +++++++++++++++++++++++++
7 files changed, 682 insertions(+)
create mode 100644 scripts/openpi/challenge_bridge.py
create mode 100644 scripts/openpi/task_bridge_1.py
create mode 100644 scripts/openpi/task_bridge_2.py
create mode 100644 scripts/openpi/task_bridge_3.py
create mode 100644 scripts/openpi/task_bridge_4.py
create mode 100644 simpler_env/evaluation/bridge_tasks.py
create mode 100644 simpler_env/evaluation/random_envs.py
diff --git a/scripts/openpi/challenge_bridge.py b/scripts/openpi/challenge_bridge.py
new file mode 100644
index 00000000..15bf59db
--- /dev/null
+++ b/scripts/openpi/challenge_bridge.py
@@ -0,0 +1,39 @@
+import argparse
+
+from simpler_env.evaluation.bridge_tasks import (
+ widowx_task1_pick_object,
+ widowx_task2_stack_cube,
+ widowx_task3_put_object_on_top,
+ widowx_task4_put_object_in_basket
+)
+from simpler_env.policies.openpi.pi0_or_fast import OpenPiFastInference
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(description="Run Comprehensive ManiSkill2 Evaluation")
+ parser.add_argument("--ckpt-path", type=str, required=True, help="Path to the checkpoint to evaluate.")
+ return parser.parse_args()
+
+
+if __name__ == "__main__":
+ args = parse_args()
+ ckpt_path = args.ckpt_path
+
+ policy = OpenPiFastInference(saved_model_path=ckpt_path, policy_setup="widowx_bridge", action_scale=1.0)
+
+ print("Policy initialized. Starting evaluation...")
+
+ tasks = [
+ widowx_task1_pick_object,
+ widowx_task2_stack_cube,
+ widowx_task3_put_object_on_top,
+ widowx_task4_put_object_in_basket
+ ]
+
+ final_scores = []
+ for task in tasks:
+ cur_scores = task(env_policy=policy, ckpt_path=args.ckpt_path)
+ final_scores += cur_scores
+
+ print("\nEvaluation finished.")
+ print(f"Final calculated scores: {final_scores}")
diff --git a/scripts/openpi/task_bridge_1.py b/scripts/openpi/task_bridge_1.py
new file mode 100644
index 00000000..2a92cc91
--- /dev/null
+++ b/scripts/openpi/task_bridge_1.py
@@ -0,0 +1,24 @@
+import argparse
+
+from simpler_env.evaluation.bridge_tasks import widowx_task1_pick_object
+from simpler_env.policies.openpi.pi0_or_fast import OpenPiFastInference
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(description="Run Comprehensive ManiSkill2 Evaluation")
+ parser.add_argument("--ckpt-path", type=str, required=True, help="Path to the checkpoint to evaluate.")
+ return parser.parse_args()
+
+
+if __name__ == "__main__":
+ args = parse_args()
+ ckpt_path = args.ckpt_path
+
+ policy = OpenPiFastInference(saved_model_path=ckpt_path, policy_setup="widowx_bridge", action_scale=1.0)
+
+ print("Policy initialized. Starting evaluation...")
+
+ final_scores = widowx_task1_pick_object(env_policy=policy, ckpt_path=args.ckpt_path)
+
+ print("\nEvaluation finished.")
+ print(f"Final calculated scores: {final_scores}")
diff --git a/scripts/openpi/task_bridge_2.py b/scripts/openpi/task_bridge_2.py
new file mode 100644
index 00000000..ef63251b
--- /dev/null
+++ b/scripts/openpi/task_bridge_2.py
@@ -0,0 +1,24 @@
+import argparse
+
+from simpler_env.evaluation.bridge_tasks import widowx_task2_stack_cube
+from simpler_env.policies.openpi.pi0_or_fast import OpenPiFastInference
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(description="Run Comprehensive ManiSkill2 Evaluation")
+ parser.add_argument("--ckpt-path", type=str, required=True, help="Path to the checkpoint to evaluate.")
+ return parser.parse_args()
+
+
+if __name__ == "__main__":
+ args = parse_args()
+ ckpt_path = args.ckpt_path
+
+ policy = OpenPiFastInference(saved_model_path=ckpt_path, policy_setup="widowx_bridge", action_scale=1.0)
+
+ print("Policy initialized. Starting evaluation...")
+
+ final_scores = widowx_task2_stack_cube(env_policy=policy, ckpt_path=args.ckpt_path)
+
+ print("\nEvaluation finished.")
+ print(f"Final calculated scores: {final_scores}")
diff --git a/scripts/openpi/task_bridge_3.py b/scripts/openpi/task_bridge_3.py
new file mode 100644
index 00000000..bdc36d31
--- /dev/null
+++ b/scripts/openpi/task_bridge_3.py
@@ -0,0 +1,24 @@
+import argparse
+
+from simpler_env.evaluation.bridge_tasks import widowx_task3_put_object_on_top
+from simpler_env.policies.openpi.pi0_or_fast import OpenPiFastInference
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(description="Run Comprehensive ManiSkill2 Evaluation")
+ parser.add_argument("--ckpt-path", type=str, required=True, help="Path to the checkpoint to evaluate.")
+ return parser.parse_args()
+
+
+if __name__ == "__main__":
+ args = parse_args()
+ ckpt_path = args.ckpt_path
+
+ policy = OpenPiFastInference(saved_model_path=ckpt_path, policy_setup="widowx_bridge", action_scale=1.0)
+
+ print("Policy initialized. Starting evaluation...")
+
+ final_scores = widowx_task3_put_object_on_top(env_policy=policy, ckpt_path=args.ckpt_path)
+
+ print("\nEvaluation finished.")
+ print(f"Final calculated scores: {final_scores}")
diff --git a/scripts/openpi/task_bridge_4.py b/scripts/openpi/task_bridge_4.py
new file mode 100644
index 00000000..84241db5
--- /dev/null
+++ b/scripts/openpi/task_bridge_4.py
@@ -0,0 +1,24 @@
+import argparse
+
+from simpler_env.evaluation.bridge_tasks import widowx_task4_put_object_in_basket
+from simpler_env.policies.openpi.pi0_or_fast import OpenPiFastInference
+
+
+def parse_args():
+ parser = argparse.ArgumentParser(description="Run Comprehensive ManiSkill2 Evaluation")
+ parser.add_argument("--ckpt-path", type=str, required=True, help="Path to the checkpoint to evaluate.")
+ return parser.parse_args()
+
+
+if __name__ == "__main__":
+ args = parse_args()
+ ckpt_path = args.ckpt_path
+
+ policy = OpenPiFastInference(saved_model_path=ckpt_path, policy_setup="widowx_bridge", action_scale=1.0)
+
+ print("Policy initialized. Starting evaluation...")
+
+ final_scores = widowx_task4_put_object_in_basket(env_policy=policy, ckpt_path=args.ckpt_path)
+
+ print("\nEvaluation finished.")
+ print(f"Final calculated scores: {final_scores}")
diff --git a/simpler_env/evaluation/bridge_tasks.py b/simpler_env/evaluation/bridge_tasks.py
new file mode 100644
index 00000000..809625e4
--- /dev/null
+++ b/simpler_env/evaluation/bridge_tasks.py
@@ -0,0 +1,107 @@
+from typing import Any, Dict, List
+
+from ..policies.base import AiroaBasePolicy
+from .config import ManiSkill2Config
+from .evaluate import _run_single_evaluation
+
+from . import random_envs
+
+
+def widowx_task1_pick_object(env_policy: AiroaBasePolicy, ckpt_path: str) -> List[List[bool]]:
+ results: List[List[bool]] = []
+
+ cfg = ManiSkill2Config(
+ policy_setup="widowx_bridge",
+ robot="widowx",
+ control_freq=5,
+ sim_freq=500,
+ max_episode_steps=60,
+ env_name="GraspRandomObjectInScene-v0",
+ scene_name="bridge_table_1_v1",
+ rgb_overlay_path="ManiSkill2_real2sim/data/real_inpainting/bridge_real_eval_1.png",
+ robot_init_x_range=[0.147, 0.147, 1],
+ robot_init_y_range=[0.028, 0.028, 1],
+ obj_variation_mode="episode",
+ obj_episode_range=[0, 24],
+ robot_init_rot_quat_center=[0, 0, 0, 1],
+ robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, 0, 0, 1],
+ ckpt_path=ckpt_path,
+ )
+ results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
+
+ return results
+
+
+def widowx_task2_stack_cube(env_policy: AiroaBasePolicy, ckpt_path: str) -> List[List[bool]]:
+ results: List[List[bool]] = []
+
+ cfg = ManiSkill2Config(
+ policy_setup="widowx_bridge",
+ robot="widowx",
+ control_freq=5,
+ sim_freq=500,
+ max_episode_steps=60,
+ env_name="StackRandomGreenYellowCubeInScene-v0",
+ scene_name="bridge_table_1_v1",
+ rgb_overlay_path="ManiSkill2_real2sim/data/real_inpainting/bridge_real_eval_1.png",
+ robot_init_x_range=[0.147, 0.147, 1],
+ robot_init_y_range=[0.028, 0.028, 1],
+ obj_variation_mode="episode",
+ obj_episode_range=[0, 24],
+ robot_init_rot_quat_center=[0, 0, 0, 1],
+ robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, 0, 0, 1],
+ ckpt_path=ckpt_path,
+ )
+ results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
+
+ return results
+
+
+def widowx_task3_put_object_on_top(env_policy: AiroaBasePolicy, ckpt_path: str) -> List[List[bool]]:
+ results: List[List[bool]] = []
+
+ cfg = ManiSkill2Config(
+ policy_setup="widowx_bridge",
+ robot="widowx",
+ control_freq=5,
+ sim_freq=500,
+ max_episode_steps=60,
+ env_name="PutRandomObjectOnRandomTopInScene-v0",
+ scene_name="bridge_table_1_v1",
+ rgb_overlay_path="ManiSkill2_real2sim/data/real_inpainting/bridge_real_eval_1.png",
+ robot_init_x_range=[0.147, 0.147, 1],
+ robot_init_y_range=[0.028, 0.028, 1],
+ obj_variation_mode="episode",
+ obj_episode_range=[0, 24],
+ robot_init_rot_quat_center=[0, 0, 0, 1],
+ robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, 0, 0, 1],
+ ckpt_path=ckpt_path,
+ )
+ results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
+
+ return results
+
+
+def widowx_task4_put_object_in_basket(env_policy: AiroaBasePolicy, ckpt_path: str) -> List[List[bool]]:
+ results: List[List[bool]] = []
+
+ cfg = ManiSkill2Config(
+ policy_setup="widowx_bridge",
+ robot="widowx_sink_camera_setup",
+ control_freq=5,
+ sim_freq=500,
+ max_episode_steps=120,
+ env_name="PutRandomObjectInBasketScene-v0",
+ scene_name="bridge_table_1_v2",
+ rgb_overlay_path="ManiSkill2_real2sim/data/real_inpainting/bridge_sink.png",
+ robot_init_x_range=[0.127, 0.127, 1],
+ robot_init_y_range=[0.06, 0.06, 1],
+ obj_variation_mode="episode",
+ obj_episode_range=[0, 24],
+ robot_init_rot_quat_center=[0, 0, 0, 1],
+ robot_init_rot_rpy_range=[0, 0, 1, 0, 0, 1, 0, 0, 1],
+ ckpt_path=ckpt_path,
+ )
+ results.append(_run_single_evaluation(env_policy, cfg, ckpt_path))
+
+ return results
diff --git a/simpler_env/evaluation/random_envs.py b/simpler_env/evaluation/random_envs.py
new file mode 100644
index 00000000..aeb50101
--- /dev/null
+++ b/simpler_env/evaluation/random_envs.py
@@ -0,0 +1,440 @@
+from collections import OrderedDict
+from typing import List
+
+import sapien.core as sapien
+from transforms3d.euler import euler2quat
+import numpy as np
+
+from ManiSkill2_real2sim.mani_skill2_real2sim.utils.registration import register_env
+from ManiSkill2_real2sim.mani_skill2_real2sim.utils.common import random_choice
+from ManiSkill2_real2sim.mani_skill2_real2sim.envs.custom_scenes.put_on_in_scene import PutOnBridgeInSceneEnv
+from ManiSkill2_real2sim.mani_skill2_real2sim import ASSET_DIR
+
+DEFAULT_OBJECTS = [
+ "yellow_cube_3cm",
+ "green_cube_3cm",
+ "eggplant",
+ "bridge_spoon_generated_modified",
+ "bridge_carrot_generated_modified",
+]
+
+DEFAULT_TOPS = [
+ "bridge_plate_objaverse_larger",
+ "table_cloth_generated_shorter",
+]
+
+
+# Task 1
+@register_env("GraspRandomObjectInScene-v0", max_episode_steps=60)
+class GraspRandomObjectInScene(PutOnBridgeInSceneEnv):
+ def __init__(self, candidate_source_names: List[str] = DEFAULT_OBJECTS, grasp_hold_seconds: float = 5.0, **kwargs):
+ xy_center = np.array([-0.16, 0.00])
+ half_edge_length_x = 0.075
+ half_edge_length_y = 0.075
+ grid_pos = np.array([[0, 0], [0, 1], [1, 0], [1, 1]]) * 2 - 1
+ grid_pos = (
+ grid_pos * np.array([half_edge_length_x, half_edge_length_y])[None]
+ + xy_center[None]
+ )
+
+ xy_configs = []
+ for i, p1 in enumerate(grid_pos):
+ for j, p2 in enumerate(grid_pos):
+ if i != j:
+ xy_configs.append(np.array([p1, p2]))
+
+ quat_configs = [
+ np.array([[1, 0, 0, 0], [1, 0, 0, 0]]),
+ np.array([euler2quat(0, 0, np.pi / 2), [1, 0, 0, 0]]),
+ np.array([euler2quat(0, 0, np.pi), [1, 0, 0, 0]]),
+ ]
+
+ self._placeholder_src = "__random_src_placeholder__"
+ self._user_src_pool = candidate_source_names
+
+ self._grasp_hold_steps = int(grasp_hold_seconds * 5)
+ self.consecutive_grasp = 0
+
+ super().__init__(
+ source_obj_name=self._placeholder_src,
+ target_obj_name="dummy_sink_target_plane",
+ xy_configs=xy_configs,
+ quat_configs=quat_configs,
+ **kwargs,
+ )
+
+ def _initialize_actors(self):
+ super()._initialize_actors()
+ if self.episode_target_obj is not None:
+ self.episode_target_obj.hide_visual()
+
+ def reset(self, seed=None, options=None):
+ if options is None:
+ options = dict()
+ options = options.copy()
+ self.set_episode_rng(seed)
+
+ if self._user_src_pool is not None:
+ src_candidates = list(self._user_src_pool)
+ else:
+ ban = {"sink", "dummy_sink_target_plane", "table_cloth_generated_shorter"}
+ ban_kw = ["sink", "plane", "cloth", "towel", "target"]
+ src_candidates = [
+ k for k in self.model_db.keys()
+ if (k not in ban) and all(kw not in k for kw in ban_kw)
+ ]
+ assert len(src_candidates) > 0, "No valid source objects to grasp."
+
+ self._source_obj_name = random_choice(src_candidates)
+
+ self.consecutive_grasp = 0
+ self._grasp_success_locked = False
+
+ obs, info = super().reset(seed=self._episode_seed, options=options)
+ info.update({"random_source_obj_name": self._source_obj_name})
+ return obs, info
+
+ def _initialize_episode_stats(self):
+ self.episode_stats = OrderedDict(
+ is_src_obj_grasped=False,
+ consecutive_grasp=False,
+ success=False,
+ )
+
+ def evaluate(self, **kwargs):
+ is_src_obj_grasped = self.agent.check_grasp(self.episode_source_obj)
+
+ if self._grasp_success_locked:
+ success = True
+ consecutive_grasp = True
+ else:
+ if is_src_obj_grasped:
+ self.consecutive_grasp += 1
+ else:
+ self.consecutive_grasp = 0
+
+ consecutive_grasp = self.consecutive_grasp >= self._grasp_hold_steps
+ success = consecutive_grasp
+
+ if success:
+ self._grasp_success_locked = True
+
+ self.episode_stats["is_src_obj_grasped"] = (
+ self.episode_stats["is_src_obj_grasped"] or is_src_obj_grasped
+ )
+ self.episode_stats["consecutive_grasp"] = (
+ self.episode_stats["consecutive_grasp"] or consecutive_grasp
+ )
+ self.episode_stats["success"] = success
+
+ return dict(
+ is_src_obj_grasped=is_src_obj_grasped,
+ consecutive_grasp=consecutive_grasp,
+ episode_stats=self.episode_stats,
+ success=success,
+ )
+
+ def get_language_instruction(self, **kwargs):
+ src_name = self._get_instruction_obj_name(self.episode_source_obj.name)
+ return f"pick up {src_name}"
+
+
+# Task 2
+@register_env("StackRandomGreenYellowCubeInScene-v0", max_episode_steps=60)
+class StackRandomGreenYellowCubeInScene(PutOnBridgeInSceneEnv):
+ def __init__(self, **kwargs):
+ xy_center = np.array([-0.16, 0.00])
+ half_edge_length_xs = [0.05, 0.1]
+ half_edge_length_ys = [0.05, 0.1]
+ xy_configs = []
+ for (half_edge_length_x, half_edge_length_y) in zip(
+ half_edge_length_xs, half_edge_length_ys
+ ):
+ grid_pos = np.array([[0, 0], [0, 1], [1, 0], [1, 1]]) * 2 - 1
+ grid_pos = (
+ grid_pos * np.array([half_edge_length_x, half_edge_length_y])[None]
+ + xy_center[None]
+ )
+ for i, p1 in enumerate(grid_pos):
+ for j, p2 in enumerate(grid_pos):
+ if i != j:
+ xy_configs.append(np.array([p1, p2]))
+
+ quat_configs = [np.array([[1, 0, 0, 0], [1, 0, 0, 0]])]
+
+ self._placeholder_src = "__random_src_placeholder__"
+ self._placeholder_tgt = "__random_tgt_placeholder__"
+
+ super().__init__(
+ source_obj_name=self._placeholder_src,
+ target_obj_name=self._placeholder_tgt,
+ xy_configs=xy_configs,
+ quat_configs=quat_configs,
+ **kwargs,
+ )
+
+ def reset(self, seed=None, options=None):
+ if options is None:
+ options = dict()
+ options = options.copy()
+
+ self.set_episode_rng(seed)
+
+ green = "green_cube_3cm"
+ yellow = "yellow_cube_3cm"
+
+ if random_choice([0, 1]):
+ src, tgt = green, yellow
+ else:
+ src, tgt = yellow, green
+
+ self._source_obj_name = src
+ self._target_obj_name = tgt
+
+ obs, info = super().reset(seed=self._episode_seed, options=options)
+ info.update({
+ "top_cube": self._source_obj_name,
+ "bottom_cube": self._target_obj_name,
+ })
+ return obs, info
+
+ def get_language_instruction(self, **kwargs):
+ src = "green block" if "green" in self.episode_source_obj.name else "yellow block"
+ tgt = "yellow block" if "yellow" in self.episode_target_obj.name else "green block"
+ return f"stack the {src} on the {tgt}"
+
+
+# Task 3
+@register_env("PutRandomObjectOnRandomTopInScene-v0", max_episode_steps=60)
+class PutRandomObjectOnRandomTopInScene(PutOnBridgeInSceneEnv):
+ def __init__(
+ self,
+ candidate_source_names: List[str] = DEFAULT_OBJECTS,
+ candidate_target_names: List[str] = DEFAULT_TOPS,
+ **kwargs,
+ ):
+ xy_center = np.array([-0.16, 0.00])
+ half_edge_length_x = 0.075
+ half_edge_length_y = 0.075
+ grid_pos = np.array([[0, 0], [0, 1], [1, 0], [1, 1]]) * 2 - 1
+ grid_pos = (
+ grid_pos * np.array([half_edge_length_x, half_edge_length_y])[None]
+ + xy_center[None]
+ )
+
+ xy_configs = []
+ for i, p1 in enumerate(grid_pos):
+ for j, p2 in enumerate(grid_pos):
+ if i != j:
+ xy_configs.append(np.array([p1, p2]))
+
+ quat_configs = [
+ np.array([[1, 0, 0, 0], [1, 0, 0, 0]]),
+ np.array([euler2quat(0, 0, np.pi / 2), [1, 0, 0, 0]]),
+ np.array([euler2quat(0, 0, np.pi), [1, 0, 0, 0]]),
+ ]
+
+ self._placeholder_src = "__random_src_placeholder__"
+ self._placeholder_tgt = "__random_tgt_placeholder__"
+
+ self._user_src_pool = candidate_source_names
+ self._user_tgt_pool = candidate_target_names
+
+ super().__init__(
+ source_obj_name=self._placeholder_src,
+ target_obj_name=self._placeholder_tgt,
+ xy_configs=xy_configs,
+ quat_configs=quat_configs,
+ **kwargs,
+ )
+
+ def reset(self, seed=None, options=None):
+ if options is None:
+ options = dict()
+ options = options.copy()
+
+ self.set_episode_rng(seed)
+
+ if self._user_src_pool is not None:
+ src_candidates = list(self._user_src_pool)
+ else:
+ ban = {"sink", "dummy_sink_target_plane"}
+ ban_kw = ["sink", "plane", "cloth", "towel", "target"] # 源物体不应是容器/平面
+ src_candidates = [
+ k for k in self.model_db.keys()
+ if (k not in ban) and all(kw not in k for kw in ban_kw)
+ ]
+
+ if self._user_tgt_pool is not None:
+ tgt_candidates = list(self._user_tgt_pool)
+ else:
+ prefer_kw = ["plate", "bowl", "tray", "cloth", "towel"]
+ tgt_candidates = [
+ k for k in self.model_db.keys()
+ if any(kw in k for kw in prefer_kw)
+ ]
+
+ assert len(src_candidates) > 0, "No valid source objects for random put-on task."
+ assert len(tgt_candidates) > 0, "No valid container candidates for random put-on task."
+
+ chosen_src = random_choice(src_candidates)
+ chosen_tgt = random_choice(tgt_candidates)
+ if chosen_src == chosen_tgt and len(src_candidates) > 1:
+ alt = [x for x in src_candidates if x != chosen_tgt]
+ chosen_src = random_choice(alt, self._episode_rng)
+
+ self._source_obj_name = chosen_src
+ self._target_obj_name = chosen_tgt
+
+ obs, info = super().reset(seed=self._episode_seed, options=options)
+ info.update({
+ "random_source_obj_name": chosen_src,
+ "random_container_obj_name": chosen_tgt,
+ })
+ return obs, info
+
+ def evaluate(self, **kwargs):
+ tgt_name = self.episode_target_obj.name if self.episode_target_obj is not None else ""
+ soft = ("cloth" in tgt_name) or ("towel" in tgt_name)
+ if soft:
+ return super().evaluate(success_require_src_completely_on_target=False, **kwargs)
+ else:
+ return super().evaluate(success_require_src_completely_on_target=True, **kwargs)
+
+ def get_language_instruction(self, **kwargs):
+ src_name = self._get_instruction_obj_name(self.episode_source_obj.name)
+ tgt_name = self._get_instruction_obj_name(self.episode_target_obj.name)
+ tgt = "plate" if "plate" in tgt_name else "towel"
+ return f"put {src_name} on the {tgt}"
+
+
+# Task 4
+@register_env("PutRandomObjectInBasketScene-v0", max_episode_steps=120)
+class PutRandomObjectInBasketScene(PutOnBridgeInSceneEnv):
+ def __init__(self, candidate_source_names: List[str] = DEFAULT_OBJECTS, **kwargs):
+ target_obj_name = "dummy_sink_target_plane"
+
+ target_xy = np.array([-0.125, 0.025])
+ xy_center = [-0.105, 0.206]
+ half_span_x = 0.01
+ half_span_y = 0.015
+ num_x = 2
+ num_y = 4
+
+ grid_pos = []
+ for x in np.linspace(-half_span_x, half_span_x, num_x):
+ for y in np.linspace(-half_span_y, half_span_y, num_y):
+ grid_pos.append(np.array([x + xy_center[0], y + xy_center[1]]))
+ xy_configs = [np.stack([pos, target_xy], axis=0) for pos in grid_pos]
+
+ quat_configs = [
+ np.array([euler2quat(0, 0, 0, "sxyz"), [1, 0, 0, 0]]),
+ np.array([euler2quat(0, 0, 1 * np.pi / 4, "sxyz"), [1, 0, 0, 0]]),
+ np.array([euler2quat(0, 0, -1 * np.pi / 4, "sxyz"), [1, 0, 0, 0]]),
+ ]
+
+ self._placeholder_src = "__random_src_placeholder__"
+ self._candidate_source_names = candidate_source_names
+
+ super().__init__(
+ source_obj_name=self._placeholder_src,
+ target_obj_name=target_obj_name,
+ xy_configs=xy_configs,
+ quat_configs=quat_configs,
+ rgb_always_overlay_objects=["sink", "dummy_sink_target_plane"],
+ **kwargs,
+ )
+
+ def _load_model(self):
+ super()._load_model()
+ self.sink_id = "sink"
+ self.sink = self._build_actor_helper(
+ self.sink_id,
+ self._scene,
+ density=self.model_db[self.sink_id].get("density", 1000),
+ physical_material=self._scene.create_physical_material(
+ static_friction=self.obj_static_friction,
+ dynamic_friction=self.obj_dynamic_friction,
+ restitution=0.0,
+ ),
+ root_dir=self.asset_root,
+ )
+ self.sink.name = self.sink_id
+
+ def _initialize_actors(self):
+ self.agent.robot.set_pose(sapien.Pose([-10, 0, 0]))
+ self.sink.set_pose(
+ sapien.Pose([-0.16, 0.13, 0.88], [1, 0, 0, 0])
+ )
+ self.sink.lock_motion()
+ super()._initialize_actors()
+
+ def evaluate(self, *args, **kwargs):
+ return super().evaluate(
+ success_require_src_completely_on_target=False,
+ z_flag_required_offset=0.06,
+ *args,
+ **kwargs,
+ )
+
+ def reset(self, seed=None, options=None):
+ if options is None:
+ options = dict()
+ options = options.copy()
+
+ self.set_episode_rng(seed)
+
+ if self._candidate_source_names is not None:
+ candidates = list(self._candidate_source_names)
+ else:
+ forbid = {
+ "sink",
+ "dummy_sink_target_plane",
+ "table_cloth_generated_shorter",
+ }
+ ban_keywords = ["sink", "plane", "cloth", "towel", "target"]
+ candidates = [
+ k
+ for k in self.model_db.keys()
+ if (k not in forbid)
+ and all(kw not in k for kw in ban_keywords)
+ ]
+
+ assert len(candidates) > 0, "No valid source objects found for random basket task."
+ self._source_obj_name = random_choice(candidates)
+
+ obs, info = super().reset(seed=self._episode_seed, options=options)
+ info.update({"random_source_obj_name": self._source_obj_name})
+ return obs, info
+
+ def _setup_prepackaged_env_init_config(self):
+ ret = super()._setup_prepackaged_env_init_config()
+ ret["robot"] = "widowx_sink_camera_setup"
+ ret["scene_name"] = "bridge_table_1_v2"
+ ret["rgb_overlay_path"] = str(ASSET_DIR / "real_inpainting/bridge_sink.png")
+ return ret
+
+ def _additional_prepackaged_config_reset(self, options):
+ options["robot_init_options"] = {
+ "init_xy": [0.127, 0.06],
+ "init_rot_quat": [0, 0, 0, 1],
+ }
+ return False
+
+ def _setup_lighting(self):
+ if self.bg_name is not None:
+ return
+ shadow = self.enable_shadow
+ self._scene.set_ambient_light([0.3, 0.3, 0.3])
+ self._scene.add_directional_light(
+ [0, 0, -1],
+ [0.3, 0.3, 0.3],
+ position=[0, 0, 1],
+ shadow=shadow,
+ scale=5,
+ shadow_map_size=2048,
+ )
+
+ def get_language_instruction(self, **kwargs):
+ src_name = self._get_instruction_obj_name(self.episode_source_obj.name)
+ return f"put {src_name} into yellow basket"
\ No newline at end of file
From be712c4ae6820950b337a4890bd35e6d00934431 Mon Sep 17 00:00:00 2001
From: Penrose0v0
Date: Thu, 28 Aug 2025 23:40:53 +0000
Subject: [PATCH 09/57] Changed name bridge -> widowx
---
scripts/openpi/{challenge_bridge.py => challenge_widowx.py} | 0
1 file changed, 0 insertions(+), 0 deletions(-)
rename scripts/openpi/{challenge_bridge.py => challenge_widowx.py} (100%)
diff --git a/scripts/openpi/challenge_bridge.py b/scripts/openpi/challenge_widowx.py
similarity index 100%
rename from scripts/openpi/challenge_bridge.py
rename to scripts/openpi/challenge_widowx.py
From 1a4c74acd7ab943c32fc662510c22cfc739f572b Mon Sep 17 00:00:00 2001
From: Penrose0v0
Date: Fri, 29 Aug 2025 00:21:41 +0000
Subject: [PATCH 10/57] Added control_freq
---
scripts/openpi/challenge_widowx.py | 5 +++-
scripts/openpi/task_bridge_1.py | 5 +++-
scripts/openpi/task_bridge_2.py | 5 +++-
scripts/openpi/task_bridge_3.py | 5 +++-
scripts/openpi/task_bridge_4.py | 5 +++-
simpler_env/evaluation/bridge_tasks.py | 36 ++++++++++++++++++++------
6 files changed, 48 insertions(+), 13 deletions(-)
diff --git a/scripts/openpi/challenge_widowx.py b/scripts/openpi/challenge_widowx.py
index 15bf59db..503997bc 100644
--- a/scripts/openpi/challenge_widowx.py
+++ b/scripts/openpi/challenge_widowx.py
@@ -12,6 +12,7 @@
def parse_args():
parser = argparse.ArgumentParser(description="Run Comprehensive ManiSkill2 Evaluation")
parser.add_argument("--ckpt-path", type=str, required=True, help="Path to the checkpoint to evaluate.")
+ parser.add_argument("--control-freq", type=int, default=5, help="Set control frequency (default->5)")
return parser.parse_args()
@@ -32,7 +33,9 @@ def parse_args():
final_scores = []
for task in tasks:
- cur_scores = task(env_policy=policy, ckpt_path=args.ckpt_path)
+ cur_scores = task(
+ env_policy=policy, ckpt_path=args.ckpt_path, control_freq=args.control_freq
+ )
final_scores += cur_scores
print("\nEvaluation finished.")
diff --git a/scripts/openpi/task_bridge_1.py b/scripts/openpi/task_bridge_1.py
index 2a92cc91..2c01f79d 100644
--- a/scripts/openpi/task_bridge_1.py
+++ b/scripts/openpi/task_bridge_1.py
@@ -7,6 +7,7 @@
def parse_args():
parser = argparse.ArgumentParser(description="Run Comprehensive ManiSkill2 Evaluation")
parser.add_argument("--ckpt-path", type=str, required=True, help="Path to the checkpoint to evaluate.")
+ parser.add_argument("--control-freq", type=int, default=5, help="Set control frequency (default->5)")
return parser.parse_args()
@@ -18,7 +19,9 @@ def parse_args():
print("Policy initialized. Starting evaluation...")
- final_scores = widowx_task1_pick_object(env_policy=policy, ckpt_path=args.ckpt_path)
+ final_scores = widowx_task1_pick_object(
+ env_policy=policy, ckpt_path=args.ckpt_path, control_freq=args.control_freq
+ )
print("\nEvaluation finished.")
print(f"Final calculated scores: {final_scores}")
diff --git a/scripts/openpi/task_bridge_2.py b/scripts/openpi/task_bridge_2.py
index ef63251b..e6761208 100644
--- a/scripts/openpi/task_bridge_2.py
+++ b/scripts/openpi/task_bridge_2.py
@@ -7,6 +7,7 @@
def parse_args():
parser = argparse.ArgumentParser(description="Run Comprehensive ManiSkill2 Evaluation")
parser.add_argument("--ckpt-path", type=str, required=True, help="Path to the checkpoint to evaluate.")
+ parser.add_argument("--control-freq", type=int, default=5, help="Set control frequency (default->5)")
return parser.parse_args()
@@ -18,7 +19,9 @@ def parse_args():
print("Policy initialized. Starting evaluation...")
- final_scores = widowx_task2_stack_cube(env_policy=policy, ckpt_path=args.ckpt_path)
+ final_scores = widowx_task2_stack_cube(
+ env_policy=policy, ckpt_path=args.ckpt_path, control_freq=args.control_freq
+ )
print("\nEvaluation finished.")
print(f"Final calculated scores: {final_scores}")
diff --git a/scripts/openpi/task_bridge_3.py b/scripts/openpi/task_bridge_3.py
index bdc36d31..b7b7d5c0 100644
--- a/scripts/openpi/task_bridge_3.py
+++ b/scripts/openpi/task_bridge_3.py
@@ -7,6 +7,7 @@
def parse_args():
parser = argparse.ArgumentParser(description="Run Comprehensive ManiSkill2 Evaluation")
parser.add_argument("--ckpt-path", type=str, required=True, help="Path to the checkpoint to evaluate.")
+ parser.add_argument("--control-freq", type=int, default=5, help="Set control frequency (default->5)")
return parser.parse_args()
@@ -18,7 +19,9 @@ def parse_args():
print("Policy initialized. Starting evaluation...")
- final_scores = widowx_task3_put_object_on_top(env_policy=policy, ckpt_path=args.ckpt_path)
+ final_scores = widowx_task3_put_object_on_top(
+ env_policy=policy, ckpt_path=args.ckpt_path, control_freq=args.control_freq
+ )
print("\nEvaluation finished.")
print(f"Final calculated scores: {final_scores}")
diff --git a/scripts/openpi/task_bridge_4.py b/scripts/openpi/task_bridge_4.py
index 84241db5..8fa919e3 100644
--- a/scripts/openpi/task_bridge_4.py
+++ b/scripts/openpi/task_bridge_4.py
@@ -7,6 +7,7 @@
def parse_args():
parser = argparse.ArgumentParser(description="Run Comprehensive ManiSkill2 Evaluation")
parser.add_argument("--ckpt-path", type=str, required=True, help="Path to the checkpoint to evaluate.")
+ parser.add_argument("--control-freq", type=int, default=5, help="Set control frequency (default->5)")
return parser.parse_args()
@@ -18,7 +19,9 @@ def parse_args():
print("Policy initialized. Starting evaluation...")
- final_scores = widowx_task4_put_object_in_basket(env_policy=policy, ckpt_path=args.ckpt_path)
+ final_scores = widowx_task4_put_object_in_basket(
+ env_policy=policy, ckpt_path=args.ckpt_path, control_freq=args.control_freq
+ )
print("\nEvaluation finished.")
print(f"Final calculated scores: {final_scores}")
diff --git a/simpler_env/evaluation/bridge_tasks.py b/simpler_env/evaluation/bridge_tasks.py
index 809625e4..11ac423f 100644
--- a/simpler_env/evaluation/bridge_tasks.py
+++ b/simpler_env/evaluation/bridge_tasks.py
@@ -7,13 +7,18 @@
from . import random_envs
-def widowx_task1_pick_object(env_policy: AiroaBasePolicy, ckpt_path: str) -> List[List[bool]]:
+def widowx_task1_pick_object(
+ env_policy: AiroaBasePolicy,
+ ckpt_path: str,
+ control_freq: int = 5
+ ) -> List[List[bool]]:
+
results: List[List[bool]] = []
cfg = ManiSkill2Config(
policy_setup="widowx_bridge",
robot="widowx",
- control_freq=5,
+ control_freq=control_freq,
sim_freq=500,
max_episode_steps=60,
env_name="GraspRandomObjectInScene-v0",
@@ -32,13 +37,18 @@ def widowx_task1_pick_object(env_policy: AiroaBasePolicy, ckpt_path: str) -> Lis
return results
-def widowx_task2_stack_cube(env_policy: AiroaBasePolicy, ckpt_path: str) -> List[List[bool]]:
+def widowx_task2_stack_cube(
+ env_policy: AiroaBasePolicy,
+ ckpt_path: str,
+ control_freq: int = 5
+ ) -> List[List[bool]]:
+
results: List[List[bool]] = []
cfg = ManiSkill2Config(
policy_setup="widowx_bridge",
robot="widowx",
- control_freq=5,
+ control_freq=control_freq,
sim_freq=500,
max_episode_steps=60,
env_name="StackRandomGreenYellowCubeInScene-v0",
@@ -57,13 +67,18 @@ def widowx_task2_stack_cube(env_policy: AiroaBasePolicy, ckpt_path: str) -> List
return results
-def widowx_task3_put_object_on_top(env_policy: AiroaBasePolicy, ckpt_path: str) -> List[List[bool]]:
+def widowx_task3_put_object_on_top(
+ env_policy: AiroaBasePolicy,
+ ckpt_path: str,
+ control_freq: int = 5
+ ) -> List[List[bool]]:
+
results: List[List[bool]] = []
cfg = ManiSkill2Config(
policy_setup="widowx_bridge",
robot="widowx",
- control_freq=5,
+ control_freq=control_freq,
sim_freq=500,
max_episode_steps=60,
env_name="PutRandomObjectOnRandomTopInScene-v0",
@@ -82,13 +97,18 @@ def widowx_task3_put_object_on_top(env_policy: AiroaBasePolicy, ckpt_path: str)
return results
-def widowx_task4_put_object_in_basket(env_policy: AiroaBasePolicy, ckpt_path: str) -> List[List[bool]]:
+def widowx_task4_put_object_in_basket(
+ env_policy: AiroaBasePolicy,
+ ckpt_path: str,
+ control_freq: int = 5
+ ) -> List[List[bool]]:
+
results: List[List[bool]] = []
cfg = ManiSkill2Config(
policy_setup="widowx_bridge",
robot="widowx_sink_camera_setup",
- control_freq=5,
+ control_freq=control_freq,
sim_freq=500,
max_episode_steps=120,
env_name="PutRandomObjectInBasketScene-v0",
From 99d3aab361ad9f7b4dc2c6c52293f86fd64901f9 Mon Sep 17 00:00:00 2001
From: Penrose0v0
Date: Fri, 29 Aug 2025 00:21:58 +0000
Subject: [PATCH 11/57] Init readme
---
scripts/README.md | 69 +++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 69 insertions(+)
create mode 100644 scripts/README.md
diff --git a/scripts/README.md b/scripts/README.md
new file mode 100644
index 00000000..0068fbd6
--- /dev/null
+++ b/scripts/README.md
@@ -0,0 +1,69 @@
+## Introduction
+
+### WidowX
+**タスクセッティング**
+
+| Task | Challenge | Task Definition |
+|--------|-------------|--------------------------------|
+| 1 | 1 | Pick `