andylin-hao
diff --git a/‎.github/workflows/docker-build.yml‎
Lines changed: 59 additions & 0 deletions b/‎.github/workflows/docker-build.yml‎
Lines changed: 59 additions & 0 deletions
diff --git a/‎.github/workflows/embodied-e2e-tests.yml‎
Lines changed: 28 additions & 0 deletions b/‎.github/workflows/embodied-e2e-tests.yml‎
Lines changed: 28 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 3 additions & 2 deletions b/‎README.md‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎README.zh-CN.md‎
Lines changed: 4 additions & 4 deletions b/‎README.zh-CN.md‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎docker/Dockerfile‎
Lines changed: 11 additions & 0 deletions b/‎docker/Dockerfile‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎docs/source-en/rst_source/examples/index.rst‎
Lines changed: 1 addition & 0 deletions b/‎docs/source-en/rst_source/examples/index.rst‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎docs/source-en/rst_source/examples/robocasa.rst‎
Lines changed: 156 additions & 0 deletions b/‎docs/source-en/rst_source/examples/robocasa.rst‎
Lines changed: 156 additions & 0 deletions
diff --git a/‎docs/source-zh/rst_source/examples/index.rst‎
Lines changed: 1 addition & 0 deletions b/‎docs/source-zh/rst_source/examples/index.rst‎
Lines changed: 1 addition & 0 deletions
@@ -298,6 +298,65 @@ jobs:
             NO_MIRROR=true
           outputs: type=cacheonly
           tags: rlinf:embodied-calvin
+  
+  build-embodied-robocasa:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Maximize storage space
+        run: |
+          # Remove Java (JDKs)
+          sudo rm -rf /usr/lib/jvm
+  
+          # Remove .NET SDKs
+          sudo rm -rf /usr/share/dotnet
+  
+          # Remove Swift toolchain
+          sudo rm -rf /usr/share/swift
+  
+          # Remove Haskell (GHC)
+          sudo rm -rf /usr/local/.ghcup
+  
+          # Remove Julia
+          sudo rm -rf /usr/local/julia*
+  
+          # Remove Android SDKs
+          sudo rm -rf /usr/local/lib/android
+  
+          # Remove Chromium (optional if not using for browser tests)
+          sudo rm -rf /usr/local/share/chromium
+  
+          # Remove Microsoft/Edge and Google Chrome builds
+          sudo rm -rf /opt/microsoft /opt/google
+  
+          # Remove Azure CLI
+          sudo rm -rf /opt/az
+  
+          # Remove PowerShell
+          sudo rm -rf /usr/local/share/powershell
+  
+          # Remove CodeQL and other toolcaches
+          sudo rm -rf /opt/hostedtoolcache
+  
+          docker system prune -af || true
+          docker builder prune -af || true
+          df -h
+
+      - name: Checkout code
+        uses: actions/checkout@v5
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Build embodied-behavior
+        uses: docker/build-push-action@v6
+        with:
+          file: ./docker/Dockerfile
+          push: false
+          build-args: |
+            BUILD_TARGET=embodied-robocasa
+            NO_MIRROR=true
+          outputs: type=cacheonly
+          tags: rlinf:embodied-robocasa
 
   build-embodied-isaaclab:
     runs-on: ubuntu-latest
 
@@ -261,6 +261,34 @@ jobs:
           source .venv/bin/activate
           bash tests/e2e_tests/embodied/run.sh calvin_ppo_openpi
 
+      - name: Clean up
+        run: |
+          rm -rf .venv
+          uv cache prune
+
+  embodied-openpi-robocasa-test:
+    runs-on: embodied
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v5
+
+      - name: Create embodied environment
+        run: |
+          unset UV_DEFAULT_INDEX
+          export UV_PATH=/workspace/dataset/.uv
+          export UV_LINK_MODE=symlink
+          export UV_CACHE_DIR=/workspace/dataset/.uv_cache
+          export UV_PYTHON_INSTALL_DIR=/workspace/dataset/.uv_python
+          export ROBOCASA_PATH=/workspace/dataset/robocasa
+          bash requirements/install.sh embodied --model openpi --env robocasa
+
+      - name: Robocasa GRPO test
+        timeout-minutes: 20
+        run: |
+          export REPO_PATH=$(pwd)
+          source .venv/bin/activate
+          bash tests/e2e_tests/embodied/run.sh robocasa_grpo_openpi
+
       - name: Clean up
         run: |
           rm -rf .venv
 
@@ -30,6 +30,7 @@ RLinf is a flexible and scalable open-source infrastructure designed for post-tr
 
 
 ## What's NEW!
+- [2025/12] 🔥 RLinf supports reinforcement learning fine-tuning for [RoboCasa](https://github.com/robocasa/robocasa). Doc: [RL on Robocasa](https://rlinf.readthedocs.io/en/latest/rst_source/examples/robocasa.html).
 - [2025/12] 🎉 RLinf official release of [v0.1](https://github.com/RLinf/RLinf/releases/tag/v0.1).
 - [2025/11] 🔥 RLinf supports reinforcement learning fine-tuning for [CALVIN](https://github.com/mees/calvin). Doc: [RL on CALVIN](https://rlinf.readthedocs.io/en/latest/rst_source/examples/calvin.html).
 - [2025/11] 🔥 RLinf supports reinforcement learning fine-tuning for [IsaacLab](https://github.com/isaac-sim/IsaacLab). Doc: [RL on IsaacLab](https://rlinf.readthedocs.io/en/latest/rst_source/examples/isaaclab.html). 
@@ -71,7 +72,7 @@ RLinf is a flexible and scalable open-source infrastructure designed for post-tr
           <li><a href="https://rlinf.readthedocs.io/en/latest/rst_source/examples/metaworld.html">MetaWorld</a> ✅</li>
           <li><a href="https://rlinf.readthedocs.io/en/latest/rst_source/examples/isaaclab.html">IsaacLab</a> ✅</li>
           <li><a href="https://rlinf.readthedocs.io/en/latest/rst_source/examples/calvin.html">CALVIN</a> ✅</li>
-          <li>RoboCasa</li>
+          <li><a href="https://rlinf.readthedocs.io/en/latest/rst_source/examples/robocasa.html">RoboCasa</a> ✅</li>
           <li>More...</li>
         </ul>
       </td>
@@ -562,7 +563,7 @@ and exhibits greater stability.
 - [X] Support for Vision-Language Models (VLMs) training  
 - [ ] Support for deep searcher agent training  
 - [ ] Support for multi-agent training  
-- [ ] Support for integration with more embodied simulators (e.g., [RoboCasa](https://github.com/robocasa/robocasa), [GENESIS](https://github.com/Genesis-Embodied-AI/Genesis), [RoboTwin](https://github.com/RoboTwin-Platform/RoboTwin))  
+- [ ] Support for integration with more embodied simulators (e.g., [GENESIS](https://github.com/Genesis-Embodied-AI/Genesis), [RoboTwin](https://github.com/RoboTwin-Platform/RoboTwin))  
 - [ ] Support for more Vision Language Action models (VLAs) (e.g., [WALL-OSS](https://huggingface.co/x-square-robot/wall-oss-flow))
 - [ ] Support for world model   
 - [ ] Support for real-world RL embodied intelligence
 
@@ -30,6 +30,7 @@ RLinf 是一个灵活且可扩展的开源框架，专为利用强化学习进
 
 
 ## 最新动态
+- [2025/12] 🔥 基于[RoboCasa](https://github.com/robocasa/robocasa)的强化学习微调已经上线! 文档：[RL on RoboCasa](https://rlinf.readthedocs.io/zh-cn/latest/rst_source/examples/robocasa.html)。
 - [2025/12] 🎉 RLinf正式发布[v0.1](https://github.com/RLinf/RLinf/releases/tag/v0.1)版本。
 - [2025/11] 🔥 基于[CALVIN](https://github.com/mees/calvin)的强化学习微调已经上线! 文档：[RL on CALVIN](https://rlinf.readthedocs.io/zh-cn/latest/rst_source/examples/calvin.html)。
 - [2025/11] 🔥 基于[IsaacLab](https://github.com/isaac-sim/IsaacLab)的强化学习微调已经上线! 文档：[RL on IsaacLab](https://rlinf.readthedocs.io/zh-cn/latest/rst_source/examples/isaaclab.html)。 
@@ -70,8 +71,7 @@ RLinf 是一个灵活且可扩展的开源框架，专为利用强化学习进
           <li><a href="https://rlinf.readthedocs.io/zh-cn/latest/rst_source/examples/behavior.html">BEHAVIOR</a> ✅</li>
           <li><a href="https://rlinf.readthedocs.io/zh-cn/latest/rst_source/examples/metaworld.html">MetaWorld</a> ✅</li>
           <li><a href="https://rlinf.readthedocs.io/zh-cn/latest/rst_source/examples/isaaclab.html">IsaacLab</a> ✅</li>
-          <li><a href="https://rlinf.readthedocs.io/zh-cn/latest/rst_source/examples/calvin.html">CALVIN</a> ✅</li>
-          <li>RoboCasa</li>
+          <li><a href="https://rlinf.readthedocs.io/zh-cn/latest/rst_source/examples/robocasa.html">RoboCasa</a> ✅</li>
           <li>More...</li>
         </ul>
       </td>
@@ -89,7 +89,7 @@ RLinf 是一个灵活且可扩展的开源框架，专为利用强化学习进
             <li><a href="https://rlinf.readthedocs.io/zh-cn/latest/rst_source/examples/pi0.html">π₀.₅</a> ✅</li>
             <li><a href="https://rlinf.readthedocs.io/zh-cn/latest/rst_source/examples/maniskill.html">OpenVLA</a> ✅</li>
             <li><a href="https://rlinf.readthedocs.io/zh-cn/latest/rst_source/examples/libero.html">OpenVLA-OFT</a> ✅</li>
-            <li><a href="https://rlinf.readthedocs.io/en/latest/rst_source/examples/gr00t.html">GR00T</a> ✅</li>
+            <li><a href="https://rlinf.readthedocs.io/zh-cn/latest/rst_source/examples/gr00t.html">GR00T</a> ✅</li>
           </ul>
           <li><b>VLM 模型</b></li>
           <ul>
@@ -565,7 +565,7 @@ RLinf 是一个灵活且可扩展的开源框架，专为利用强化学习进
 - [ ] 支持深度搜索智能体训练
 
 - [ ] 支持多智能体训练
-- [ ] 支持更多具身模拟器的集成 (如 [RoboCasa](https://github.com/robocasa/robocasa), [GENESIS](https://github.com/Genesis-Embodied-AI/Genesis), [RoboTwin](https://github.com/RoboTwin-Platform/RoboTwin))  
+- [ ] 支持更多具身模拟器的集成 (如 [GENESIS](https://github.com/Genesis-Embodied-AI/Genesis), [RoboTwin](https://github.com/RoboTwin-Platform/RoboTwin))  
 - [ ] 支持更多VLA模型 (如[WALL-OSS](https://huggingface.co/x-square-robot/wall-oss-flow))
 - [ ] 支持世界模型（World Model）
 
 
@@ -139,6 +139,17 @@ RUN link_assets
 # Set default env
 RUN echo "source ${UV_PATH}/openpi/bin/activate" >> ~/.bashrc
 
+FROM embodied-common-image AS embodied-robocasa-image
+
+# Install openpi env
+RUN bash requirements/install.sh embodied --venv openpi --model openpi --env robocasa
+
+RUN source switch_env openpi && download_assets --dir /opt/assets --assets openpi
+RUN link_assets
+
+# Set default env
+RUN echo "source ${UV_PATH}/openpi/bin/activate" >> ~/.bashrc
+
 FROM embodied-common-image AS embodied-isaaclab-image
 
 # Install gr00t env
 
@@ -252,6 +252,7 @@ Thanks to this decoupled design, workers can be flexibly and dynamically schedul
    metaworld
    isaaclab
    calvin
+   robocasa
    pi0
    gr00t
    reasoning
 
@@ -0,0 +1,156 @@
+RL with RoboCasa Benchmark
+====================================
+
+.. |huggingface| image:: /_static/svg/hf-logo.svg
+   :width: 16px
+   :height: 16px
+   :class: inline-icon
+
+This document provides a comprehensive guide for reinforcement learning training tasks using the RoboCasa benchmark in the RLinf framework.
+RoboCasa is a large-scale robotic learning simulation framework focused on manipulation tasks in kitchen environments, featuring diverse kitchen layouts, objects, and manipulation tasks.
+
+RoboCasa combines realistic kitchen environments with diverse manipulation challenges, making it an ideal benchmark for developing generalizable robotic policies.
+The main goal is to train vision-language-action models capable of performing the following tasks:
+
+1. **Visual Understanding**: Process RGB images from multiple camera viewpoints.
+2. **Language Understanding**: Interpret natural language task instructions.
+3. **Manipulation Skills**: Execute complex kitchen tasks such as pick-and-place, opening/closing doors, and appliance control.
+
+Environment Overview
+--------------------
+
+**RoboCasa Simulation Platform**
+
+- **Environment**: RoboCasa Kitchen simulation environment (built on robosuite)
+- **Robot**: Panda manipulator with mobile base (PandaOmron), equipped with parallel gripper
+- **Tasks**: 24 atomic kitchen tasks covering multiple categories (excluding NavigateKitchen task that require moving the base)
+- **Observation**: Multi-view RGB images (robot view + wrist camera) + proprioceptive state
+- **Action Space**: 12-dimensional continuous actions
+
+  - 3D arm position delta
+  - 3D arm rotation delta
+  - 1D gripper control (open/close)
+  - 4D base control
+  - 1D mode selection (control base or arm)
+
+**Task Categories**
+
+RoboCasa provides diverse atomic tasks organized into multiple categories:
+
+*Door Manipulation Tasks*:
+
+- ``OpenSingleDoor``: Open cabinet or microwave door
+- ``CloseSingleDoor``: Close cabinet or microwave door
+- ``OpenDoubleDoor``: Open double cabinet doors
+- ``CloseDoubleDoor``: Close double cabinet doors
+- ``OpenDrawer``: Open drawer
+- ``CloseDrawer``: Close drawer
+
+*Pick and Place Tasks*:
+
+- ``PnPCounterToCab``: Pick from counter and place into cabinet
+- ``PnPCabToCounter``: Pick from cabinet and place on counter
+- ``PnPCounterToSink``: Pick from counter and place in sink
+- ``PnPSinkToCounter``: Pick from sink and place on counter
+- ``PnPCounterToStove``: Pick from counter and place on stove
+- ``PnPStoveToCounter``: Pick from stove and place on counter
+- ``PnPCounterToMicrowave``: Pick from counter and place in microwave
+- ``PnPMicrowaveToCounter``: Pick from microwave and place on counter
+
+*Appliance Control Tasks*:
+
+- ``TurnOnMicrowave``: Turn on microwave
+- ``TurnOffMicrowave``: Turn off microwave
+- ``TurnOnSinkFaucet``: Turn on sink faucet
+- ``TurnOffSinkFaucet``: Turn off sink faucet
+- ``TurnSinkSpout``: Turn sink spout
+- ``TurnOnStove``: Turn on stove
+- ``TurnOffStove``: Turn off stove
+
+*Coffee Making Tasks*:
+
+- ``CoffeeSetupMug``: Setup coffee mug
+- ``CoffeeServeMug``: Serve coffee into mug
+- ``CoffeePressButton``: Press coffee machine button
+
+**Observation Structure**
+
+- **Base Camera Image** (``base_image``): Robot left view (128×128 RGB)
+- **Wrist Camera Image** (``wrist_image``): End-effector view camera (128×128 RGB)
+- **Proprioceptive State** (``state``): 16-dimensional vector containing:
+
+  - ``[0:2]`` Robot base position (x, y)
+  - ``[2:5]`` Padding zeros
+  - ``[5:9]`` End-effector quaternion relative to base
+  - ``[9:12]`` End-effector position relative to base
+  - ``[12:14]`` Gripper joint velocities
+  - ``[14:16]`` Gripper joint positions
+
+**Data Structure**
+
+- **Images**: Base camera RGB tensor ``[batch_size, 3, 128, 128]`` and wrist camera ``[batch_size, 3, 128, 128]``
+- **State**: Proprioceptive state tensor ``[batch_size, 16]``
+- **Task Description**: Natural language instructions
+- **Actions**: 7-dimensional continuous actions (position, quaternion, gripper)
+- **Reward**: Sparse reward based on task completion
+
+Algorithm
+---------
+
+**Core Algorithm Components**
+
+1. **PPO (Proximal Policy Optimization)**
+
+   - Advantage estimation using GAE (Generalized Advantage Estimation)
+
+   - Policy clipping with ratio limits
+
+   - Value function clipping
+
+   - Entropy regularization
+
+2. **GRPO (Group Relative Policy Optimization)**
+
+   - For every state / prompt the policy generates *G* independent actions
+
+   - Compute the advantage of each action by subtracting the group's mean reward.
+
+Dependency Installation
+-----------------------
+
+**Option 1: Docker Image**
+
+Use the Docker image ``rlinf/rlinf:agentic-rlinf0.1-robocasa`` for the experiment.
+
+**Option 2: Custom Environment**
+
+Install dependencies directly in your environment by running the following command:
+
+.. code:: bash
+
+   pip install uv
+   bash requirements/install.sh embodied --model openpi --env robocasa
+   source .venv/bin/activate
+
+Dataset Download
+-----------------
+
+.. code:: bash
+
+   python -m robocasa.scripts.download_kitchen_assets   # Caution: Assets to be downloaded are around 5GB
+
+Model Download
+--------------
+
+.. code-block:: bash
+
+   # Download the model (choose either method)
+   # Method 1: Using git clone
+   git lfs install
+   git clone https://huggingface.co/RLinf/RLinf-Pi0-RoboCasa
+   git clone https://huggingface.co/RLinf/RLinf-Pi0-RoboCasa
+
+   # Method 2: Using huggingface-hub
+   pip install huggingface-hub
+   hf download RLinf/RLinf-Pi0-RoboCasa
+   hf download RLinf/RLinf-Pi0-RoboCasa
@@ -247,6 +247,7 @@ RLinf的整体设计简洁且模块化，以Worker为抽象封装强化学习训
    metaworld
    isaaclab
    calvin
+   robocasa
    pi0
    gr00t
    reasoning