diff --git a/.gitignore b/.gitignore
index b47e22cbfa..359c0ad9e1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -27,7 +27,7 @@ node_modules/
 poetry.lock
 uv.lock
 Pipfile.lock
-
+miniconda3
 ### Build & Distribution ###
 build/
 dist/
diff --git a/cuda.py b/cuda.py
new file mode 100644
index 0000000000..ee25da6935
--- /dev/null
+++ b/cuda.py
@@ -0,0 +1,2 @@
+import torch
+print(torch.cuda.is_available())
\ No newline at end of file
diff --git a/keyboard_test.py b/keyboard_test.py
new file mode 100644
index 0000000000..b0502cea6d
--- /dev/null
+++ b/keyboard_test.py
@@ -0,0 +1,72 @@
+#!/usr/bin/env python3
+
+"""
+Simple keyboard test utility.
+
+It collects all keys pressed on stdin and prints the batch every second.
+Press Ctrl+C to exit.
+"""
+
+from __future__ import annotations
+
+import select
+import sys
+import termios
+import time
+import tty
+
+
+def _format_keys(keys: list[str]) -> str:
+    """Return a readable representation of the collected keys."""
+    pretty = []
+    for key in keys:
+        if key == "\n":
+            pretty.append("<ENTER>")
+        elif key == "\r":
+            continue  # handled with newline on most terminals
+        elif key == "\t":
+            pretty.append("<TAB>")
+        elif key == "\x7f":
+            pretty.append("<BACKSPACE>")
+        elif key.isprintable():
+            pretty.append(key)
+        else:
+            pretty.append(f"<0x{ord(key):02x}>")
+    return " ".join(pretty) if pretty else "<none>"
+
+
+def main() -> None:
+    fd = sys.stdin.fileno()
+    old_settings = termios.tcgetattr(fd)
+
+    print("Keyboard test started. Press keys (Ctrl+C to quit).")
+
+    try:
+        tty.setcbreak(fd)
+        window: list[str] = []
+        last_tick = time.monotonic()
+
+        while True:
+            # poll stdin; timeout keeps loop responsive for the 1s ticker
+            ready, _, _ = select.select([sys.stdin], [], [], 0.1)
+            if ready:
+                key = sys.stdin.read(1)
+                if key == "\x03":  # Ctrl+C
+                    raise KeyboardInterrupt
+                window.append(key)
+
+            now = time.monotonic()
+            if now - last_tick >= 1.0:
+                stamp = time.strftime("%H:%M:%S")
+                print(f"[{stamp}] { _format_keys(window) }")
+                window.clear()
+                last_tick = now
+
+    except KeyboardInterrupt:
+        print("\nExiting keyboard test.")
+    finally:
+        termios.tcsetattr(fd, termios.TCSADRAIN, old_settings)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/rk_client.py b/rk_client.py
new file mode 100644
index 0000000000..d053d38b5b
--- /dev/null
+++ b/rk_client.py
@@ -0,0 +1,119 @@
+#!/usr/bin/env python3
+"""Command line client for the RobotKinematics HTTP service."""
+
+from __future__ import annotations
+
+import argparse
+import json
+import sys
+from typing import Any
+from urllib.error import HTTPError, URLError
+from urllib.request import Request, urlopen
+
+import numpy as np
+
+
+def _post_json(url: str, payload: dict[str, Any]) -> dict[str, Any]:
+    data = json.dumps(payload).encode("utf-8")
+    request = Request(url, data=data, headers={"Content-Type": "application/json"})
+
+    try:
+        with urlopen(request) as response:
+            body = response.read().decode("utf-8")
+            return json.loads(body)
+    except HTTPError as exc:
+        error_body = exc.read().decode("utf-8", errors="ignore")
+        raise RuntimeError(f"Server returned HTTP {exc.code}: {error_body}") from exc
+    except URLError as exc:
+        raise RuntimeError(f"Failed to reach {url}: {exc.reason}") from exc
+
+
+def command_fk(args: argparse.Namespace) -> None:
+    payload = {"joint_positions": args.joint_positions}
+    response = _post_json(f"{args.url}/fk", payload)
+    print(json.dumps(response, indent=2))
+
+
+def command_ik(args: argparse.Namespace) -> None:
+    pose_matrix = np.asarray(args.pose, dtype=float).reshape(4, 4).tolist()
+    payload = {
+        "current_joint_positions": args.current_joint_positions,
+        "desired_pose": pose_matrix,
+    }
+    if args.position_weight is not None:
+        payload["position_weight"] = args.position_weight
+    if args.orientation_weight is not None:
+        payload["orientation_weight"] = args.orientation_weight
+
+    response = _post_json(f"{args.url}/ik", payload)
+    print(json.dumps(response, indent=2))
+
+
+def build_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(description="Client for the RobotKinematics HTTP service.")
+    parser.add_argument(
+        "--url",
+        default="http://127.0.0.1:8081",
+        help="Base URL for the rk_service (default: http://127.0.0.1:8081)",
+    )
+
+    subparsers = parser.add_subparsers(dest="command", required=True)
+
+    fk_parser = subparsers.add_parser("fk", help="Call forward kinematics.")
+    fk_parser.add_argument(
+        "--joint-positions",
+        nargs="+",
+        type=float,
+        required=True,
+        metavar="ANGLE_DEG",
+        help="Joint positions in degrees.",
+    )
+    fk_parser.set_defaults(func=command_fk)
+
+    ik_parser = subparsers.add_parser("ik", help="Call inverse kinematics.")
+    ik_parser.add_argument(
+        "--current-joint-positions",
+        nargs="+",
+        type=float,
+        required=True,
+        metavar="ANGLE_DEG",
+        help="Current joint positions in degrees used as the IK seed.",
+    )
+    ik_parser.add_argument(
+        "--pose",
+        nargs=16,
+        type=float,
+        required=True,
+        metavar="POSE_VALUE",
+        help="Desired end-effector 4x4 pose matrix in row-major order (16 values).",
+    )
+    ik_parser.add_argument(
+        "--position-weight",
+        type=float,
+        default=None,
+        help="Optional position weight for IK.",
+    )
+    ik_parser.add_argument(
+        "--orientation-weight",
+        type=float,
+        default=None,
+        help="Optional orientation weight for IK.",
+    )
+    ik_parser.set_defaults(func=command_ik)
+
+    return parser
+
+
+def main(argv: list[str] | None = None) -> None:
+    parser = build_parser()
+    args = parser.parse_args(argv)
+
+    try:
+        args.func(args)
+    except RuntimeError as exc:
+        parser.error(str(exc))
+
+
+if __name__ == "__main__":
+    main(sys.argv[1:])
+
diff --git a/rk_service.py b/rk_service.py
new file mode 100644
index 0000000000..d220ffe2a7
--- /dev/null
+++ b/rk_service.py
@@ -0,0 +1,192 @@
+#!/usr/bin/env python3
+"""Simple HTTP service exposing RobotKinematics forward and inverse kinematics.
+
+Run on a machine that has the `placo` dependency installed so RobotKinematics works:
+
+    python rk_service.py --urdf-path /path/to/robot.urdf --port 8000
+
+Endpoints:
+    POST /fk
+        Body: {"joint_positions": [deg, ...]}
+        Response: {"pose": [[...], ...]}
+
+    POST /ik
+        Body: {
+            "current_joint_positions": [deg, ...],
+            "desired_pose": [[...], ...],
+            "position_weight": 1.0,               # optional
+            "orientation_weight": 0.01            # optional
+        }
+        Response: {"joint_positions": [deg, ...]}
+"""
+
+from __future__ import annotations
+
+import argparse
+import json
+import logging
+from http import HTTPStatus
+from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
+from typing import Any, ClassVar
+
+import numpy as np
+
+from lerobot.model.kinematics import RobotKinematics
+
+
+LOGGER = logging.getLogger("rk_service")
+
+
+def _json_response(handler: BaseHTTPRequestHandler, status: HTTPStatus, payload: dict[str, Any]) -> None:
+    """Serialize payload and send it as the HTTP response."""
+    body = json.dumps(payload).encode("utf-8")
+    handler.send_response(status.value)
+    handler.send_header("Content-Type", "application/json")
+    handler.send_header("Content-Length", str(len(body)))
+    handler.end_headers()
+    handler.wfile.write(body)
+
+
+class KinematicsRequestHandler(BaseHTTPRequestHandler):
+    """HTTP request handler exposing RobotKinematics operations."""
+
+    kinematics: ClassVar[RobotKinematics]
+    suppress_logging: ClassVar[bool] = False
+
+    def log_message(self, format: str, *args: Any) -> None:  # noqa: D401 - `BaseHTTPRequestHandler` signature
+        if not self.suppress_logging:
+            super().log_message(format, *args)
+
+    def do_GET(self) -> None:  # noqa: N802 D401 - `BaseHTTPRequestHandler` signature
+        if self.path == "/healthz":
+            _json_response(self, HTTPStatus.OK, {"status": "ok"})
+        else:
+            _json_response(self, HTTPStatus.NOT_FOUND, {"error": "Endpoint not found"})
+
+    def do_POST(self) -> None:  # noqa: N802 D401 - `BaseHTTPRequestHandler` signature
+        content_length = int(self.headers.get("Content-Length", "0"))
+        body = self.rfile.read(content_length) if content_length else b""
+
+        try:
+            payload = json.loads(body.decode("utf-8"))
+        except json.JSONDecodeError as exc:
+            LOGGER.exception("Failed to decode request body as JSON")
+            _json_response(self, HTTPStatus.BAD_REQUEST, {"error": f"Invalid JSON: {exc}"})
+            return
+
+        if self.path == "/fk":
+            self._handle_forward_kinematics(payload)
+        elif self.path == "/ik":
+            self._handle_inverse_kinematics(payload)
+        else:
+            _json_response(self, HTTPStatus.NOT_FOUND, {"error": "Endpoint not found"})
+
+    def _handle_forward_kinematics(self, payload: dict[str, Any]) -> None:
+        try:
+            joints = payload["joint_positions"]
+            joint_array = np.asarray(joints, dtype=float)
+        except (KeyError, TypeError, ValueError) as exc:
+            _json_response(self, HTTPStatus.BAD_REQUEST, {"error": f"Invalid joint_positions: {exc}"})
+            return
+
+        try:
+            pose_matrix = self.kinematics.forward_kinematics(joint_array)
+        except Exception as exc:  # placo may raise C++ exceptions that surface as generic Exception
+            LOGGER.exception("Forward kinematics failed")
+            _json_response(self, HTTPStatus.INTERNAL_SERVER_ERROR, {"error": f"forward_kinematics failed: {exc}"})
+            return
+
+        _json_response(self, HTTPStatus.OK, {"pose": pose_matrix.tolist()})
+
+    def _handle_inverse_kinematics(self, payload: dict[str, Any]) -> None:
+        try:
+            current = np.asarray(payload["current_joint_positions"], dtype=float)
+            desired_pose = np.asarray(payload["desired_pose"], dtype=float)
+        except (KeyError, TypeError, ValueError) as exc:
+            _json_response(
+                self,
+                HTTPStatus.BAD_REQUEST,
+                {"error": f"Invalid current_joint_positions or desired_pose: {exc}"},
+            )
+            return
+
+        if desired_pose.shape != (4, 4):
+            _json_response(
+                self,
+                HTTPStatus.BAD_REQUEST,
+                {"error": f"desired_pose must be 4x4 matrix, got shape {desired_pose.shape}"},
+            )
+            return
+
+        position_weight = float(payload.get("position_weight", 1.0))
+        orientation_weight = float(payload.get("orientation_weight", 0.01))
+
+        try:
+            joint_solution = self.kinematics.inverse_kinematics(
+                current_joint_pos=current,
+                desired_ee_pose=desired_pose,
+                position_weight=position_weight,
+                orientation_weight=orientation_weight,
+            )
+        except Exception as exc:  # placo errors
+            LOGGER.exception("Inverse kinematics failed")
+            _json_response(self, HTTPStatus.INTERNAL_SERVER_ERROR, {"error": f"inverse_kinematics failed: {exc}"})
+            return
+
+        _json_response(self, HTTPStatus.OK, {"joint_positions": joint_solution.tolist()})
+
+
+def build_arg_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(description="Expose RobotKinematics via a simple HTTP service.")
+    parser.add_argument("--host", default="127.0.0.1", help="Interface to bind the HTTP server (default: 127.0.0.1)")
+    parser.add_argument("--port", type=int, default=8081, help="Port for the HTTP server (default: 8081)")
+    parser.add_argument("--urdf-path", default="Simulation/SO101/so101_new_calib.urdf", help="Path to the robot URDF file.")
+    parser.add_argument(
+        "--target-frame",
+        default="gripper_frame_link",
+        help="End-effector frame name (default: gripper_frame_link)",
+    )
+    parser.add_argument(
+        "--joint-names",
+        nargs="+",
+        default=None,
+        help="Optional list of joint names; defaults to URDF order when omitted.",
+    )
+    parser.add_argument(
+        "--suppress-request-logs",
+        action="store_true",
+        help="Disable per-request logging emitted by the HTTP handler.",
+    )
+    return parser
+
+
+def main() -> None:
+    args = build_arg_parser().parse_args()
+
+    logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s: %(message)s")
+
+    LOGGER.info("Loading RobotKinematics with URDF: %s", args.urdf_path)
+    kinematics = RobotKinematics(
+        urdf_path=args.urdf_path,
+        target_frame_name=args.target_frame,
+        joint_names=args.joint_names,
+    )
+
+    KinematicsRequestHandler.kinematics = kinematics
+    KinematicsRequestHandler.suppress_logging = bool(args.suppress_request_logs)
+
+    server_address = (args.host, args.port)
+    httpd = ThreadingHTTPServer(server_address, KinematicsRequestHandler)
+    LOGGER.info("RobotKinematics service listening on http://%s:%d", args.host, args.port)
+
+    try:
+        httpd.serve_forever()
+    except KeyboardInterrupt:
+        LOGGER.info("Shutting down RobotKinematics service.")
+    finally:
+        httpd.server_close()
+
+
+if __name__ == "__main__":
+    main()
+
diff --git a/run_actor.sh b/run_actor.sh
new file mode 100644
index 0000000000..5f1974fef3
--- /dev/null
+++ b/run_actor.sh
@@ -0,0 +1,5 @@
+export HF_ENDPOINT=https://hf-mirror.com
+#export HF_ENDPOINT=/root/.cache/huggingface/lerobot/
+
+export HF_TOKEN=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
+python -m lerobot.rl.actor --config_path src/lerobot/configs/train_config_hilserl_so101_rule_reward_actor.json
\ No newline at end of file
diff --git a/run_docker.sh b/run_docker.sh
new file mode 100644
index 0000000000..9237e0af57
--- /dev/null
+++ b/run_docker.sh
@@ -0,0 +1 @@
+docker run   --runtime=nvidia  --device=/dev/video3 --device=/dev/video5   --device=/dev/ttyACM3 --device=/dev/ttyACM1 --device=/dev/ttyACM0  -v ~/.cache:/root/.cache -v /tmp/.X11-unix:/tmp/.X11-unix  -e DISPLAY=:1  -it --rm       -v "$PWD":/workspace         -w /workspace  --ulimit memlock=-1 --ulimit stack=6700108864   --shm-size 7000000000 mk:lerobot2
diff --git a/run_learner.sh b/run_learner.sh
new file mode 100644
index 0000000000..9838cd2da2
--- /dev/null
+++ b/run_learner.sh
@@ -0,0 +1,3 @@
+export HF_ENDPOINT=https://hf-mirror.com
+export HF_TOKEN=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
+python -m lerobot.rl.learner --config_path src/lerobot/configs/train_config_hilserl_so101_rule_reward_learner.json
\ No newline at end of file
diff --git a/s.sh b/s.sh
new file mode 100644
index 0000000000..2445210703
--- /dev/null
+++ b/s.sh
@@ -0,0 +1,4 @@
+sudo chmod 666 /dev/ttyACM0
+sudo chmod 666 /dev/ttyACM1
+sudo chmod 666 /dev/ttyACM2
+sudo chmod 666 /dev/ttyACM3
\ No newline at end of file
diff --git a/src/lerobot/configs/env_config_so101.json b/src/lerobot/configs/env_config_so101.json
new file mode 100644
index 0000000000..93ce034470
--- /dev/null
+++ b/src/lerobot/configs/env_config_so101.json
@@ -0,0 +1,91 @@
+{
+    "env": {
+        "robot": {
+            "type": "so101_follower",
+            "port": "/dev/ttyACM2",
+            "id": "follower2",
+            "cameras": {
+                "top": {
+                    "type": "opencv",
+                    "index_or_path": 5,
+                    "height": 480,
+                    "width": 640,
+                    "fps": 30
+                },
+                "right": {
+                    "type": "opencv",
+                    "index_or_path": 7,
+                    "height": 480,
+                    "width": 640,
+                    "fps": 30
+                }
+            }
+        },
+        "teleop": {
+            "type": "keyboard_ee"
+        },
+        "processor": {
+            "control_mode": "keyboard_ee",
+            "observation": {
+                "display_cameras": false,
+                "add_joint_velocity_to_observation": true,
+                "add_current_to_observation": true
+            },
+            "image_preprocessing": {
+                "crop_params_dict": {
+                  
+                  "observation.images.top": [0, 136, 360, 360]
+                },
+                "resize_size": [128, 128]            
+            },
+            "gripper": {
+                "use_gripper": true,
+                "gripper_penalty": -0.02
+            },
+            "reset": {
+                "fixed_reset_joint_positions": [
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0
+                ],
+                "reset_time_s": 2.5,
+                "control_time_s": 20.0,
+                "terminate_on_success": true
+            },
+            "inverse_kinematics": {
+                "urdf_path": "/home/sr/so101/SO-ARM100-main/Simulation/SO101/so101_new_calib.urdf",
+                "target_frame_name": "gripper_frame_link",
+                "end_effector_bounds": {
+            "min": [-1.0, -1.0, -1.0],  
+            "max": [1.0, 1.0, 1.0]
+                },
+                "end_effector_step_sizes": {
+                    "x": 0.02,
+                    "y": 0.02,
+                    "z": 0.02
+                }
+            },
+            "reward_classifier": {
+                "pretrained_path": null,
+                "success_threshold": 0.5,
+                "success_reward": 1.0
+            },
+            "max_gripper_pos": 30
+        },
+        "name": "real_robot",
+        "fps": 10
+    },
+    "dataset": {
+        "repo_id": "mengke/test11",
+        "root": null,
+        "task": "ss",
+        "num_episodes_to_record": 1,
+        "replay_episode": null,
+        "push_to_hub": false
+    },
+    "mode": "record",
+    "device": "cpu"
+}
\ No newline at end of file
diff --git a/src/lerobot/configs/env_config_so101_eval.json b/src/lerobot/configs/env_config_so101_eval.json
new file mode 100644
index 0000000000..b9cc02b004
--- /dev/null
+++ b/src/lerobot/configs/env_config_so101_eval.json
@@ -0,0 +1,90 @@
+{
+    "env": {
+        "robot": {
+            "type": "so101_follower",
+            "port": "/dev/ttyACM2",
+            "id": "follower2",
+            "cameras": {
+                "top": {
+                    "type": "opencv",
+                    "index_or_path": 5,
+                    "height": 480,
+                    "width": 640,
+                    "fps": 15
+                },
+                "right": {
+                    "type": "opencv",
+                    "index_or_path": 3,
+                    "height": 480,
+                    "width": 640,
+                    "fps": 15
+                }
+            }
+        },
+        "teleop": {
+            "type": "keyboard_ee"
+        },
+        "processor": {
+            "control_mode": "keyboard_ee",
+            "observation": {
+                "display_cameras": false,
+                "add_joint_velocity_to_observation": true,
+                "add_current_to_observation": true
+            },
+            "image_preprocessing": {
+                "crop_params_dict": {
+                  "observation.images.top": [49, 189, 262, 305]
+                },
+                "resize_size": [128, 128]            
+            },
+            "gripper": {
+                "use_gripper": true,
+                "gripper_penalty": -0.02
+            },
+            "reset": {
+                "fixed_reset_joint_positions": [
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0
+                ],
+                "reset_time_s": 2.5,
+                "control_time_s": 20.0,
+                "terminate_on_success": true
+            },
+            "inverse_kinematics": {
+                "urdf_path": "/home/sr/so101/SO-ARM100-main/Simulation/SO101/so101_new_calib.urdf",
+                "target_frame_name": "gripper_frame_link",
+                "end_effector_bounds": {
+            "min": [-1.0, -1.0, -1.0],  
+            "max": [1.0, 1.0, 1.0]
+                },
+                "end_effector_step_sizes": {
+                    "x": 0.02,
+                    "y": 0.02,
+                    "z": 0.02
+                }
+            },
+            "reward_classifier": {
+                "pretrained_path": "outputs/train/2025-10-30/05-49-53_reward-classifier/checkpoints/last/pretrained_model",
+                "success_threshold": 0.5,
+                "success_reward": 1.0
+            },
+            "max_gripper_pos": 30
+        },
+        "name": "real_robot",
+        "fps": 10
+    },
+    "dataset": {
+        "repo_id": null,
+        "root": null,
+        "task": "eval",
+        "num_episodes_to_record": 10,
+        "replay_episode": null,
+        "push_to_hub": false
+    },
+    "mode": null,
+    "device": "cpu"
+}
\ No newline at end of file
diff --git a/src/lerobot/configs/env_config_so101_rule_reward.json b/src/lerobot/configs/env_config_so101_rule_reward.json
new file mode 100644
index 0000000000..cb9f4e13a7
--- /dev/null
+++ b/src/lerobot/configs/env_config_so101_rule_reward.json
@@ -0,0 +1,93 @@
+{
+    "env": {
+        "robot": {
+            "type": "so101_follower",
+            "port": "/dev/ttyACM2",
+            "id": "follower2",
+            "cameras": {
+                "top": {
+                    "type": "opencv",
+                    "index_or_path": 5,
+                    "height": 480,
+                    "width": 640,
+                    "fps": 30
+                },
+                "right": {
+                    "type": "opencv",
+                    "index_or_path": 7,
+                    "height": 480,
+                    "width": 640,
+                    "fps": 30
+                }
+            }
+        },
+        "teleop": {
+            "type": "keyboard_ee"
+        },
+        "processor": {
+            "control_mode": "keyboard_ee",
+            "observation": {
+                "display_cameras": false,
+                "add_joint_velocity_to_observation": true,
+                "add_current_to_observation": true
+            },
+            "image_preprocessing": {
+                "crop_params_dict": {
+                  
+                  "observation.images.top": [0, 136, 360, 360]
+                },
+                "resize_size": [128, 128]            
+            },
+            "gripper": {
+                "use_gripper": true,
+                "gripper_penalty": -0.02
+            },
+            "reset": {
+                "fixed_reset_joint_positions": [
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0
+                ],
+                "reset_time_s": 2.5,
+                "control_time_s": 20.0,
+                "terminate_on_success": true
+            },
+            "inverse_kinematics": {
+                "urdf_path": "/home/sr/so101/SO-ARM100-main/Simulation/SO101/so101_new_calib.urdf",
+                "target_frame_name": "gripper_frame_link",
+                "end_effector_bounds": {
+            "min": [-1.0, -1.0, -1.0],  
+            "max": [1.0, 1.0, 1.0]
+                },
+                "end_effector_step_sizes": {
+                    "x": 0.02,
+                    "y": 0.02,
+                    "z": 0.02
+                }
+            },
+            "reward_rule": {
+                "target_x": 0.26157,
+                "target_y": -0.1039,
+                "target_z": 0.060122,
+                "radius": 0.05,
+                "radius_z": 0.05
+            },
+            "max_gripper_pos": 30
+        },
+        "name": "real_robot",
+        "fps": 10
+    },
+    "dataset": {
+        "repo_id": "mengke/test11",
+        "root": "/home/sr/so101/lerobot/dataset/test7",
+        "task": "ss",
+        "num_episodes_to_record": 15,
+        "replay_episode": null,
+        "push_to_hub": false
+    },
+    "mode": "record",
+    "device": "cpu"
+}
\ No newline at end of file
diff --git a/src/lerobot/configs/train_config_hilserl_so101.json b/src/lerobot/configs/train_config_hilserl_so101.json
new file mode 100644
index 0000000000..d4638a970e
--- /dev/null
+++ b/src/lerobot/configs/train_config_hilserl_so101.json
@@ -0,0 +1,275 @@
+{
+    "output_dir": null,
+    "job_name": "default",
+    "resume": false,
+    "seed": 1000,
+    "num_workers": 4,
+    "batch_size": 256,
+    "steps": 100000,
+    "log_freq": 1,
+    "save_checkpoint": true,
+    "save_freq": 1000,
+    "wandb": {
+        "enable": true,
+        "project": "so101_real",
+        "disable_artifact": true
+    },
+    "dataset": {
+        "repo_id": "test6",
+        "root": "/workspace/dataset/test6"
+    },
+    "policy": {
+        "type": "sac",
+        "n_obs_steps": 1,
+        "normalization_mapping": {
+            "VISUAL": "MEAN_STD",
+            "STATE": "MIN_MAX",
+            "ENV": "MIN_MAX",
+            "ACTION": "MIN_MAX"
+        },
+        "input_features": {
+            "observation.images.top": {
+                "type": "VISUAL",
+                "shape": [
+                    3,
+                    128,
+                    128
+                ]
+            },
+            "observation.state": {
+                "type": "STATE",
+                "shape": [
+                    18
+                ]
+            }
+        },
+        "output_features": {
+            "action": {
+                "type": "ACTION",
+                "shape": [
+                    3
+                ]
+            }
+        },
+        "device": "cuda",
+        "use_amp": false,
+        "dataset_stats": {
+            "observation.images.top": {
+                "mean": [
+                    0.485,
+                    0.456,
+                    0.406
+                ],
+                "std": [
+                    0.229,
+                    0.224,
+                    0.225
+                ]
+            },
+            "observation.state": {
+                "min": [
+                    -15.0,
+                    -20.0,
+                    10.0,
+                    65.0,
+                    0.0,
+                    0.0,
+                    -40.0,
+                    -60.0,
+                    -50.0,
+                    -80.0,
+                    -10.0,
+                    -110.0,
+                    0.26,
+                    -0.06,
+                    0.25
+                ],
+                "max": [
+                    15.0,
+                    15.0,
+                    40.0,
+                    110.0,
+                    0.5,
+                    31.0,
+                    53.0,
+                    61.0,
+                    67.0,
+                    90.0,
+                    6.0,
+                    100.0,
+                    0.32,
+                    0.06,
+                    0.35
+                ]
+            },
+            "action": {
+                "min": [
+                    -1.0,
+                    -1.0,
+                    -1.0
+                ],
+                "max": [
+                    1.0,
+                    1.0,
+                    1.0
+                ]
+            }
+        },
+        "num_discrete_actions": 3,
+        "storage_device": "cuda",
+        "vision_encoder_name": "helper2424/resnet10",
+        "freeze_vision_encoder": true,
+        "image_encoder_hidden_dim": 32,
+        "shared_encoder": true,
+        "online_steps": 1000000,
+        "online_buffer_capacity": 30000,
+        "offline_buffer_capacity": 10000,
+        "online_step_before_learning": 10,
+        "policy_update_freq": 1,
+        "discount": 0.97,
+        "async_prefetch": false,
+        "temperature_init": 0.01,
+        "num_critics": 2,
+        "num_subsample_critics": null,
+        "critic_lr": 0.0003,
+        "actor_lr": 0.0003,
+        "temperature_lr": 0.0003,
+        "critic_target_update_weight": 0.005,
+        "utd_ratio": 2,
+        "state_encoder_hidden_dim": 256,
+        "latent_dim": 256,
+        "target_entropy": null,
+        "use_backup_entropy": true,
+        "grad_clip_norm": 40.0,
+        "critic_network_kwargs": {
+            "hidden_dims": [
+                256,
+                256
+            ],
+            "activate_final": true,
+            "final_activation": null
+        },
+        "actor_network_kwargs": {
+            "hidden_dims": [
+                256,
+                256
+            ],
+            "activate_final": true
+        },
+        "policy_kwargs": {
+            "use_tanh_squash": true,
+            "std_min": -5,
+            "std_max": 2,
+            "init_final": 0.05
+        },
+        "actor_learner_config": {
+            "learner_host": "127.0.0.1",
+            "learner_port": 50051,
+            "policy_parameters_push_frequency": 40
+        },
+        "concurrency": {
+            "actor": "threads",
+            "learner": "threads"
+        },
+        "repo_id": "aractingi/pick_lift_cube_policy"
+    },
+    "env": {
+        "type": "gym_manipulator",
+            "robot": {
+            "type": "so101_follower",
+            "port": "/dev/ttyACM2",
+            "id": "follower2",
+            "cameras": {
+                "top": {
+                    "type": "opencv",
+                    "index_or_path": 5,
+                    "height": 480,
+                    "width": 640,
+                    "fps": 15
+                }
+            }
+        },
+        "teleop": {
+            "type": "keyboard_ee"
+        },
+        "processor": {
+            "control_mode": "keyboard_ee",
+            "observation": {
+                "display_cameras": false,
+                "add_joint_velocity_to_observation": true,
+                "add_current_to_observation": true
+            },
+            "image_preprocessing": {
+                "crop_params_dict": {
+                  "observation.images.top": [49, 189, 262, 305]
+                },
+                "resize_size": [128, 128]            
+            },
+            "gripper": {
+                "use_gripper": true,
+                "gripper_penalty": -0.02
+            },
+            "reset": {
+                "fixed_reset_joint_positions": [
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0
+                ],
+                "reset_time_s": 2.5,
+                "control_time_s": 20.0,
+                "terminate_on_success": true
+            },
+            "inverse_kinematics": {
+                "use_service": true,
+                "service_url": "http://127.0.0.1:8081",
+                "urdf_path": "Simulation/SO101/so101_new_calib.urdf",
+                "target_frame_name": "gripper_frame_link",
+                "end_effector_bounds": {
+            "min": [-1.0, -1.0, -1.0],  
+            "max": [1.0, 1.0, 1.0]
+                },
+                "end_effector_step_sizes": {
+                    "x": 0.02,
+                    "y": 0.02,
+                    "z": 0.02
+                }
+            },
+            "reward_classifier": {
+                "pretrained_path": null,
+                "success_threshold": 0.5,
+                "success_reward": 1.0
+            },
+            "max_gripper_pos": 30
+        },
+        "name": "real_robot",
+        "fps": 1,
+
+        "task": "",
+        "features": {
+            "observation.images.top": {
+                "type": "VISUAL",
+                "shape": [
+                    3,
+                    128,
+                    128
+                ]
+            },
+            "observation.state": {
+                "type": "STATE",
+                "shape": [
+                    18
+                ]
+            },
+            "action": {
+                "type": "ACTION",
+                "shape": [
+                    3
+                ]
+            }
+        },
+        "features_map": {"observation.images.top": "observation.images.top", "observation.state": "observation.state","action":"action"}
+    }
+}
\ No newline at end of file
diff --git a/src/lerobot/configs/train_config_hilserl_so101.json_bk b/src/lerobot/configs/train_config_hilserl_so101.json_bk
new file mode 100644
index 0000000000..39f5b9b4f6
--- /dev/null
+++ b/src/lerobot/configs/train_config_hilserl_so101.json_bk
@@ -0,0 +1,329 @@
+{
+    "output_dir": null,
+    "job_name": "default",
+    "resume": false,
+    "seed": 1000,
+    "num_workers": 4,
+    "batch_size": 256,
+    "steps": 100000,
+    "log_freq": 500,
+    "save_checkpoint": true,
+    "save_freq": 2000000,
+    "wandb": {
+        "enable": true,
+        "project": "so100_real",
+        "disable_artifact": true
+    },
+    "dataset": {
+        "repo_id": "hf_username/dataset_name",
+        "use_imagenet_stats": false
+    },
+    "policy": {
+        "type": "sac",
+        "n_obs_steps": 1,
+        "repo_id": "hf_username/policy_name",
+        "normalization_mapping": {
+            "VISUAL": "MEAN_STD",
+            "STATE": "MIN_MAX",
+            "ENV": "MIN_MAX",
+            "ACTION": "MIN_MAX"
+        },
+        "input_features": {
+            "observation.images.side": {
+                "type": "VISUAL",
+                "shape": [
+                    3,
+                    128,
+                    128
+                ]
+            },
+            "observation.images.front": {
+                "type": "VISUAL",
+                "shape": [
+                    3,
+                    128,
+                    128
+                ]
+            },
+            "observation.state": {
+                "type": "STATE",
+                "shape": [
+                    15
+                ]
+            }
+        },
+        "device": "cuda",
+        "use_amp": false,
+        "dataset_stats": {
+            "observation.images.side": {
+                "mean": [
+                    0.485,
+                    0.456,
+                    0.406
+                ],
+                "std": [
+                    0.229,
+                    0.224,
+                    0.225
+                ]
+            },
+            "observation.images.front": {
+                "mean": [
+                    0.485,
+                    0.456,
+                    0.406
+                ],
+                "std": [
+                    0.229,
+                    0.224,
+                    0.225
+                ]
+            },
+            "observation.state": {
+                "min": [
+                    -15.0,
+                    -20.0,
+                    10.0,
+                    65.0,
+                    0.0,
+                    0.0,
+                    -40.0,
+                    -60.0,
+                    -50.0,
+                    -80.0,
+                    -10.0,
+                    -110.0,
+                    -15.0,
+                    -20.0,
+                    15.0,
+                    70.0,
+                    -5.0,
+                    0.0,
+                    0.26,
+                    -0.06,
+                    0.25
+                ],
+                "max": [
+                    15.0,
+                    15.0,
+                    40.0,
+                    110.0,
+                    0.5,
+                    31.0,
+                    53.0,
+                    61.0,
+                    67.0,
+                    90.0,
+                    6.0,
+                    100.0,
+                    15.0,
+                    12.0,
+                    40.0,
+                    110.0,
+                    0.5,
+                    31.0,
+                    0.32,
+                    0.06,
+                    0.35
+                ]
+            },
+            "action": {
+                "min": [
+                    -0.03,
+                    -0.03,
+                    -0.03
+                ],
+                "max": [
+                    0.03,
+                    0.03,
+                    0.03
+                ]
+            }
+        },
+        "num_discrete_actions": 3,
+        "storage_device": "cuda",
+        "vision_encoder_name": "helper2424/resnet10",
+        "freeze_vision_encoder": true,
+        "image_encoder_hidden_dim": 32,
+        "shared_encoder": true,
+        "online_steps": 1000000,
+        "online_env_seed": 10000,
+        "online_buffer_capacity": 30000,
+        "offline_buffer_capacity": 10000,
+        "online_step_before_learning": 100,
+        "policy_update_freq": 1,
+        "discount": 0.97,
+        "async_prefetch": false,
+        "temperature_init": 0.01,
+        "num_critics": 2,
+        "num_subsample_critics": null,
+        "critic_lr": 0.0003,
+        "actor_lr": 0.0003,
+        "temperature_lr": 0.0003,
+        "critic_target_update_weight": 0.005,
+        "utd_ratio": 2,
+        "state_encoder_hidden_dim": 256,
+        "latent_dim": 256,
+        "target_entropy": null,
+        "use_backup_entropy": true,
+        "grad_clip_norm": 40.0,
+        "critic_network_kwargs": {
+            "hidden_dims": [
+                256,
+                256
+            ],
+            "activate_final": true,
+            "final_activation": null
+        },
+        "actor_network_kwargs": {
+            "hidden_dims": [
+                256,
+                256
+            ],
+            "activate_final": true
+        },
+        "policy_kwargs": {
+            "use_tanh_squash": true,
+            "std_min": -5,
+            "std_max": 2,
+            "init_final": 0.05
+        },
+        "actor_learner_config": {
+            "learner_host": "127.0.0.1",
+            "learner_port": 50051,
+            "policy_parameters_push_frequency": 4
+        },
+        "concurrency": {
+            "actor": "threads",
+            "learner": "threads"
+        }
+    },
+    "env": {
+        "type": "gym_manipulator",
+        "robot": {
+            "type": "so100_follower_end_effector",
+            "port": "/dev/ttyACM0",
+            "urdf_path": "path/to/your/robot.urdf",
+            "target_frame_name": "gripper_frame_link",
+            "cameras": {
+                "front": {
+                    "type": "opencv",
+                    "index_or_path": 2,
+                    "height": 480,
+                    "width": 640,
+                    "fps": 30
+                },
+                "wrist": {
+                    "type": "opencv",
+                    "index_or_path": 8,
+                    "height": 480,
+                    "width": 640,
+                    "fps": 30
+                }
+            },
+            "end_effector_bounds": {
+                "min": [
+                    -1.0,
+                    -1.0,
+                    -1.0
+                ],
+                "max": [
+                    1.0,
+                    1.0,
+                    1.0
+                ]
+            },
+            "max_gripper_pos": 30
+        },
+        "teleop": {
+            "type": "gamepad",
+            "use_gripper": true
+        },
+        "wrapper": {
+            "display_cameras": false,
+            "add_joint_velocity_to_observation": true,
+            "add_current_to_observation": true,
+            "add_ee_pose_to_observation": true,
+            "crop_params_dict": {
+                "observation.images.front": [
+                    270,
+                    170,
+                    90,
+                    190
+                ],
+                "observation.images.wrist": [
+                    0,
+                    0,
+                    480,
+                    640
+                ]
+            },
+            "resize_size": [
+                128,
+                128
+            ],
+            "control_time_s": 20.0,
+            "use_gripper": true,
+            "gripper_quantization_threshold": null,
+            "gripper_penalty": -0.02,
+            "gripper_penalty_in_reward": false,
+            "fixed_reset_joint_positions": [
+                0.0,
+                -20.0,
+                20.0,
+                90.0,
+                0.0,
+                30.0
+            ],
+            "reset_time_s": 2.5,
+            "control_mode": "gamepad"
+        },
+        "name": "real_robot",
+        "mode": null,
+        "repo_id": null,
+        "dataset_root": null,
+        "task": "",
+        "num_episodes": 0,
+        "episode": 0,
+        "pretrained_policy_name_or_path": null,
+        "device": "cuda",
+        "push_to_hub": true,
+        "fps": 10,
+        "features": {
+            "observation.images.front": {
+                "type": "VISUAL",
+                "shape": [
+                    3,
+                    128,
+                    128
+                ]
+            },
+            "observation.images.wrist": {
+                "type": "VISUAL",
+                "shape": [
+                    3,
+                    128,
+                    128
+                ]
+            },
+            "observation.state": {
+                "type": "STATE",
+                "shape": [
+                    15
+                ]
+            },
+            "action": {
+                "type": "ACTION",
+                "shape": [
+                    3
+                ]
+            }
+        },
+        "features_map": {
+            "observation.images.front": "observation.images.side",
+            "observation.images.wrist": "observation.images.wrist",
+            "observation.state": "observation.state",
+            "action": "action"
+        }
+    }
+}
\ No newline at end of file
diff --git a/src/lerobot/configs/train_config_hilserl_so101_rule_reward_actor.json b/src/lerobot/configs/train_config_hilserl_so101_rule_reward_actor.json
new file mode 100644
index 0000000000..fabf8cbec4
--- /dev/null
+++ b/src/lerobot/configs/train_config_hilserl_so101_rule_reward_actor.json
@@ -0,0 +1,277 @@
+{
+    "output_dir": "output/test2/actor",
+    "job_name": "default",
+    "resume": false,
+    "seed": 1000,
+    "num_workers": 4,
+    "batch_size": 256,
+    "steps": 100000,
+    "log_freq": 10,
+    "save_checkpoint": true,
+    "save_freq": 1000,
+    "wandb": {
+        "enable": true,
+        "project": "so101_real",
+        "disable_artifact": true
+    },
+    "dataset": {
+        "repo_id": "test6",
+        "root": "/workspace/dataset/test7"
+    },
+    "policy": {
+        "type": "sac",
+        "n_obs_steps": 1,
+        "normalization_mapping": {
+            "VISUAL": "MEAN_STD",
+            "STATE": "MIN_MAX",
+            "ENV": "MIN_MAX",
+            "ACTION": "MIN_MAX"
+        },
+        "input_features": {
+            "observation.images.top": {
+                "type": "VISUAL",
+                "shape": [
+                    3,
+                    128,
+                    128
+                ]
+            },
+            "observation.state": {
+                "type": "STATE",
+                "shape": [
+                    18
+                ]
+            }
+        },
+        "output_features": {
+            "action": {
+                "type": "ACTION",
+                "shape": [
+                    3
+                ]
+            }
+        },
+        "device": "cuda",
+        "use_amp": false,
+        "dataset_stats": {
+            "observation.images.top": {
+                "mean": [
+                    0.485,
+                    0.456,
+                    0.406
+                ],
+                "std": [
+                    0.229,
+                    0.224,
+                    0.225
+                ]
+            },
+            "observation.state": {
+                "min": [
+                    -15.0,
+                    -20.0,
+                    10.0,
+                    65.0,
+                    0.0,
+                    0.0,
+                    -40.0,
+                    -60.0,
+                    -50.0,
+                    -80.0,
+                    -10.0,
+                    -110.0,
+                    0.26,
+                    -0.06,
+                    0.25
+                ],
+                "max": [
+                    15.0,
+                    15.0,
+                    40.0,
+                    110.0,
+                    0.5,
+                    31.0,
+                    53.0,
+                    61.0,
+                    67.0,
+                    90.0,
+                    6.0,
+                    100.0,
+                    0.32,
+                    0.06,
+                    0.35
+                ]
+            },
+            "action": {
+                "min": [
+                    -1.0,
+                    -1.0,
+                    -1.0
+                ],
+                "max": [
+                    1.0,
+                    1.0,
+                    1.0
+                ]
+            }
+        },
+        "num_discrete_actions": 3,
+        "storage_device": "cuda",
+        "vision_encoder_name": "helper2424/resnet10",
+        "freeze_vision_encoder": true,
+        "image_encoder_hidden_dim": 32,
+        "shared_encoder": true,
+        "online_steps": 1000000,
+        "online_buffer_capacity": 30000,
+        "offline_buffer_capacity": 10000,
+        "online_step_before_learning": 10,
+        "policy_update_freq": 10,
+        "discount": 0.97,
+        "async_prefetch": false,
+        "temperature_init": 0.01,
+        "num_critics": 2,
+        "num_subsample_critics": null,
+        "critic_lr": 0.0003,
+        "actor_lr": 0.0003,
+        "temperature_lr": 0.0003,
+        "critic_target_update_weight": 0.005,
+        "utd_ratio": 2,
+        "state_encoder_hidden_dim": 256,
+        "latent_dim": 256,
+        "target_entropy": null,
+        "use_backup_entropy": true,
+        "grad_clip_norm": 40.0,
+        "critic_network_kwargs": {
+            "hidden_dims": [
+                256,
+                256
+            ],
+            "activate_final": true,
+            "final_activation": null
+        },
+        "actor_network_kwargs": {
+            "hidden_dims": [
+                256,
+                256
+            ],
+            "activate_final": true
+        },
+        "policy_kwargs": {
+            "use_tanh_squash": true,
+            "std_min": -5,
+            "std_max": 2,
+            "init_final": 0.05
+        },
+        "actor_learner_config": {
+            "learner_host": "127.0.0.1",
+            "learner_port": 50051,
+            "policy_parameters_push_frequency": 40
+        },
+        "concurrency": {
+            "actor": "threads",
+            "learner": "threads"
+        },
+        "repo_id": "aractingi/pick_lift_cube_policy"
+    },
+    "env": {
+        "type": "gym_manipulator",
+            "robot": {
+            "type": "so101_follower",
+            "port": "/dev/ttyACM2",
+            "id": "follower2",
+            "cameras": {
+                "top": {
+                    "type": "opencv",
+                    "index_or_path": 5,
+                    "height": 480,
+                    "width": 640,
+                    "fps": 15
+                }
+            }
+        },
+        "teleop": {
+            "type": "keyboard_ee"
+        },
+        "processor": {
+            "control_mode": "keyboard_ee",
+            "observation": {
+                "display_cameras": false,
+                "add_joint_velocity_to_observation": true,
+                "add_current_to_observation": true
+            },
+            "image_preprocessing": {
+                "crop_params_dict": {
+                  "observation.images.top": [49, 189, 262, 305]
+                },
+                "resize_size": [128, 128]            
+            },
+            "gripper": {
+                "use_gripper": true,
+                "gripper_penalty": -0.02
+            },
+            "reset": {
+                "fixed_reset_joint_positions": [
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0
+                ],
+                "reset_time_s": 2.5,
+                "control_time_s": 20.0,
+                "terminate_on_success": true
+            },
+            "inverse_kinematics": {
+                "use_service": true,
+                "service_url": "http://127.0.0.1:8081",
+                "urdf_path": "Simulation/SO101/so101_new_calib.urdf",
+                "target_frame_name": "gripper_frame_link",
+                "end_effector_bounds": {
+            "min": [-1.0, -1.0, -1.0],  
+            "max": [1.0, 1.0, 1.0]
+                },
+                "end_effector_step_sizes": {
+                    "x": 0.02,
+                    "y": 0.02,
+                    "z": 0.02
+                }
+            },
+            "reward_rule": {
+                "target_x": 0.26157,
+                "target_y": -0.1039,
+                "target_z": 0.060122,
+                "radius": 0.06,
+                "radius_z": 0.08
+            },
+            "max_gripper_pos": 30
+        },
+        "name": "real_robot",
+        "fps": 5,
+
+        "task": "",
+        "features": {
+            "observation.images.top": {
+                "type": "VISUAL",
+                "shape": [
+                    3,
+                    128,
+                    128
+                ]
+            },
+            "observation.state": {
+                "type": "STATE",
+                "shape": [
+                    18
+                ]
+            },
+            "action": {
+                "type": "ACTION",
+                "shape": [
+                    3
+                ]
+            }
+        },
+        "features_map": {"observation.images.top": "observation.images.top", "observation.state": "observation.state","action":"action"}
+    }
+}
\ No newline at end of file
diff --git a/src/lerobot/configs/train_config_hilserl_so101_rule_reward_learner.json b/src/lerobot/configs/train_config_hilserl_so101_rule_reward_learner.json
new file mode 100644
index 0000000000..8964f95556
--- /dev/null
+++ b/src/lerobot/configs/train_config_hilserl_so101_rule_reward_learner.json
@@ -0,0 +1,277 @@
+{
+    "output_dir": "output/test2/learner",
+    "job_name": "default",
+    "resume": false,
+    "seed": 1000,
+    "num_workers": 4,
+    "batch_size": 256,
+    "steps": 100000,
+    "log_freq": 100,
+    "save_checkpoint": true,
+    "save_freq": 500,
+    "wandb": {
+        "enable": true,
+        "project": "so101_real",
+        "disable_artifact": true
+    },
+    "dataset": {
+        "repo_id": "test6",
+        "root": "/workspace/dataset/test7"
+    },
+    "policy": {
+        "type": "sac",
+        "n_obs_steps": 1,
+        "normalization_mapping": {
+            "VISUAL": "MEAN_STD",
+            "STATE": "MIN_MAX",
+            "ENV": "MIN_MAX",
+            "ACTION": "MIN_MAX"
+        },
+        "input_features": {
+            "observation.images.top": {
+                "type": "VISUAL",
+                "shape": [
+                    3,
+                    128,
+                    128
+                ]
+            },
+            "observation.state": {
+                "type": "STATE",
+                "shape": [
+                    18
+                ]
+            }
+        },
+        "output_features": {
+            "action": {
+                "type": "ACTION",
+                "shape": [
+                    3
+                ]
+            }
+        },
+        "device": "cuda",
+        "use_amp": false,
+        "dataset_stats": {
+            "observation.images.top": {
+                "mean": [
+                    0.485,
+                    0.456,
+                    0.406
+                ],
+                "std": [
+                    0.229,
+                    0.224,
+                    0.225
+                ]
+            },
+            "observation.state": {
+                "min": [
+                    -15.0,
+                    -20.0,
+                    10.0,
+                    65.0,
+                    0.0,
+                    0.0,
+                    -40.0,
+                    -60.0,
+                    -50.0,
+                    -80.0,
+                    -10.0,
+                    -110.0,
+                    0.26,
+                    -0.06,
+                    0.25
+                ],
+                "max": [
+                    15.0,
+                    15.0,
+                    40.0,
+                    110.0,
+                    0.5,
+                    31.0,
+                    53.0,
+                    61.0,
+                    67.0,
+                    90.0,
+                    6.0,
+                    100.0,
+                    0.32,
+                    0.06,
+                    0.35
+                ]
+            },
+            "action": {
+                "min": [
+                    -1.0,
+                    -1.0,
+                    -1.0
+                ],
+                "max": [
+                    1.0,
+                    1.0,
+                    1.0
+                ]
+            }
+        },
+        "num_discrete_actions": 3,
+        "storage_device": "cuda",
+        "vision_encoder_name": "helper2424/resnet10",
+        "freeze_vision_encoder": true,
+        "image_encoder_hidden_dim": 32,
+        "shared_encoder": true,
+        "online_steps": 1000000,
+        "online_buffer_capacity": 30000,
+        "offline_buffer_capacity": 10000,
+        "online_step_before_learning": 10,
+        "policy_update_freq": 10,
+        "discount": 0.97,
+        "async_prefetch": false,
+        "temperature_init": 0.01,
+        "num_critics": 2,
+        "num_subsample_critics": null,
+        "critic_lr": 0.0003,
+        "actor_lr": 0.0003,
+        "temperature_lr": 0.0003,
+        "critic_target_update_weight": 0.005,
+        "utd_ratio": 2,
+        "state_encoder_hidden_dim": 256,
+        "latent_dim": 256,
+        "target_entropy": null,
+        "use_backup_entropy": true,
+        "grad_clip_norm": 40.0,
+        "critic_network_kwargs": {
+            "hidden_dims": [
+                256,
+                256
+            ],
+            "activate_final": true,
+            "final_activation": null
+        },
+        "actor_network_kwargs": {
+            "hidden_dims": [
+                256,
+                256
+            ],
+            "activate_final": true
+        },
+        "policy_kwargs": {
+            "use_tanh_squash": true,
+            "std_min": -5,
+            "std_max": 2,
+            "init_final": 0.05
+        },
+        "actor_learner_config": {
+            "learner_host": "127.0.0.1",
+            "learner_port": 50051,
+            "policy_parameters_push_frequency": 40
+        },
+        "concurrency": {
+            "actor": "threads",
+            "learner": "threads"
+        },
+        "repo_id": "aractingi/pick_lift_cube_policy"
+    },
+    "env": {
+        "type": "gym_manipulator",
+            "robot": {
+            "type": "so101_follower",
+            "port": "/dev/ttyACM2",
+            "id": "follower2",
+            "cameras": {
+                "top": {
+                    "type": "opencv",
+                    "index_or_path": 5,
+                    "height": 480,
+                    "width": 640,
+                    "fps": 15
+                }
+            }
+        },
+        "teleop": {
+            "type": "keyboard_ee"
+        },
+        "processor": {
+            "control_mode": "keyboard_ee",
+            "observation": {
+                "display_cameras": false,
+                "add_joint_velocity_to_observation": true,
+                "add_current_to_observation": true
+            },
+            "image_preprocessing": {
+                "crop_params_dict": {
+                  "observation.images.top": [49, 189, 262, 305]
+                },
+                "resize_size": [128, 128]            
+            },
+            "gripper": {
+                "use_gripper": true,
+                "gripper_penalty": -0.02
+            },
+            "reset": {
+                "fixed_reset_joint_positions": [
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0,
+                    0.0
+                ],
+                "reset_time_s": 2.5,
+                "control_time_s": 20.0,
+                "terminate_on_success": true
+            },
+            "inverse_kinematics": {
+                "use_service": true,
+                "service_url": "http://127.0.0.1:8081",
+                "urdf_path": "Simulation/SO101/so101_new_calib.urdf",
+                "target_frame_name": "gripper_frame_link",
+                "end_effector_bounds": {
+            "min": [-1.0, -1.0, -1.0],  
+            "max": [1.0, 1.0, 1.0]
+                },
+                "end_effector_step_sizes": {
+                    "x": 0.02,
+                    "y": 0.02,
+                    "z": 0.02
+                }
+            },
+            "reward_rule": {
+                "target_x": 0.26157,
+                "target_y": -0.1039,
+                "target_z": 0.060122,
+                "radius": 0.06,
+                "radius_z": 0.08
+            },
+            "max_gripper_pos": 30
+        },
+        "name": "real_robot",
+        "fps": 5,
+
+        "task": "",
+        "features": {
+            "observation.images.top": {
+                "type": "VISUAL",
+                "shape": [
+                    3,
+                    128,
+                    128
+                ]
+            },
+            "observation.state": {
+                "type": "STATE",
+                "shape": [
+                    18
+                ]
+            },
+            "action": {
+                "type": "ACTION",
+                "shape": [
+                    3
+                ]
+            }
+        },
+        "features_map": {"observation.images.top": "observation.images.top", "observation.state": "observation.state","action":"action"}
+    }
+}
\ No newline at end of file
diff --git a/src/lerobot/datasets/utils.py b/src/lerobot/datasets/utils.py
index 37d8432b2b..24fae2065f 100644
--- a/src/lerobot/datasets/utils.py
+++ b/src/lerobot/datasets/utils.py
@@ -712,8 +712,8 @@ def dataset_to_policy_features(features: dict[str, dict]) -> dict[str, PolicyFea
                 shape = (shape[2], shape[0], shape[1])
         elif key == OBS_ENV_STATE:
             type = FeatureType.ENV
-        elif key.startswith(OBS_STR):
-            type = FeatureType.STATE
+        # elif key.startswith(OBS_STR):
+        #     type = FeatureType.STATE
         elif key.startswith(ACTION):
             type = FeatureType.ACTION
         else:
diff --git a/src/lerobot/envs/configs.py b/src/lerobot/envs/configs.py
index dc526114dc..bd66af709c 100644
--- a/src/lerobot/envs/configs.py
+++ b/src/lerobot/envs/configs.py
@@ -157,11 +157,22 @@ class RewardClassifierConfig:
     success_threshold: float = 0.5
     success_reward: float = 1.0
 
+@dataclass
+class RewardRuleConfig:
+    """Configuration for reward rule."""
+
+    target_x: float = 0.0
+    target_y: float = 0.0
+    target_z: float = 0.0
+    radius: float = 0.01
+    radius_z: float = 0.01
 
 @dataclass
 class InverseKinematicsConfig:
     """Configuration for inverse kinematics processing."""
 
+    use_service: bool = False
+    service_url: str | None = None
     urdf_path: str | None = None
     target_frame_name: str | None = None
     end_effector_bounds: dict[str, list[float]] | None = None
@@ -206,6 +217,7 @@ class HILSerlProcessorConfig:
     reset: ResetConfig | None = None
     inverse_kinematics: InverseKinematicsConfig | None = None
     reward_classifier: RewardClassifierConfig | None = None
+    reward_rule: RewardRuleConfig | None = None
     max_gripper_pos: float | None = 100.0
 
 
diff --git a/src/lerobot/model/kinematics.py b/src/lerobot/model/kinematics.py
index 95d3b235c6..39f35fa8ba 100644
--- a/src/lerobot/model/kinematics.py
+++ b/src/lerobot/model/kinematics.py
@@ -12,9 +12,25 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from typing import Protocol
+
 import numpy as np
 
 
+class KinematicsLike(Protocol):
+    """Protocol describing the required kinematics interface."""
+
+    def forward_kinematics(self, joint_pos_deg: np.ndarray) -> np.ndarray: ...
+
+    def inverse_kinematics(
+        self,
+        current_joint_pos: np.ndarray,
+        desired_ee_pose: np.ndarray,
+        position_weight: float = 1.0,
+        orientation_weight: float = 0.01,
+    ) -> np.ndarray: ...
+
+
 class RobotKinematics:
     """Robot kinematics using placo library for forward and inverse kinematics."""
 
diff --git a/src/lerobot/policies/sac/reward_model/modeling_classifier.py b/src/lerobot/policies/sac/reward_model/modeling_classifier.py
index dba6a174b4..f56f4903bd 100644
--- a/src/lerobot/policies/sac/reward_model/modeling_classifier.py
+++ b/src/lerobot/policies/sac/reward_model/modeling_classifier.py
@@ -20,6 +20,7 @@
 from torch import Tensor, nn
 
 from lerobot.policies.pretrained import PreTrainedPolicy
+
 from lerobot.policies.sac.reward_model.configuration_classifier import RewardClassifierConfig
 from lerobot.utils.constants import OBS_IMAGE, REWARD
 
@@ -144,6 +145,14 @@ def __init__(
                 encoder = self._create_single_encoder()
                 self.encoders[image_key] = encoder
 
+        # self.normalize_inputs = Normalize(config.input_features, config.normalization_mapping, dataset_stats)
+        # self.normalize_targets = Normalize(
+        #     config.output_features, config.normalization_mapping, dataset_stats
+        # )
+        # self.unnormalize_outputs = Unnormalize(
+        #     config.output_features, config.normalization_mapping, dataset_stats
+        # )
+
         self._build_classifier_head()
 
     def _setup_cnn_backbone(self):
@@ -269,15 +278,15 @@ def forward(self, batch: dict[str, Tensor]) -> tuple[Tensor, dict[str, Tensor]]:
     def predict_reward(self, batch, threshold=0.5):
         """Eval method. Returns predicted reward with the decision threshold as argument."""
         # Check for both OBS_IMAGE and OBS_IMAGES prefixes
-        batch = self.normalize_inputs(batch)
-        batch = self.normalize_targets(batch)
+        # batch = self.normalize_inputs(batch)
+        # batch = self.normalize_targets(batch) #mengke
 
         # Extract images from batch dict
         images = [batch[key] for key in self.config.input_features if key.startswith(OBS_IMAGE)]
 
         if self.config.num_classes == 2:
             probs = self.predict(images).probabilities
-            logging.debug(f"Predicted reward images: {probs}")
+            logging.warning(f"Predicted reward images: {probs}")
             return (probs > threshold).float()
         else:
             return torch.argmax(self.predict(images).probabilities, dim=1)
diff --git a/src/lerobot/processor/__init__.py b/src/lerobot/processor/__init__.py
index be11ac1af4..febff1bf79 100644
--- a/src/lerobot/processor/__init__.py
+++ b/src/lerobot/processor/__init__.py
@@ -47,6 +47,7 @@
     ImageCropResizeProcessorStep,
     InterventionActionProcessorStep,
     RewardClassifierProcessorStep,
+    RewardRuleProcessorStep,
     TimeLimitProcessorStep,
 )
 from .joint_observations_processor import JointVelocityProcessorStep, MotorCurrentProcessorStep
@@ -116,6 +117,7 @@
     "RobotObservation",
     "RenameObservationsProcessorStep",
     "RewardClassifierProcessorStep",
+    "RewardRuleProcessorStep",
     "RewardProcessorStep",
     "DataProcessorPipeline",
     "TimeLimitProcessorStep",
diff --git a/src/lerobot/processor/hil_processor.py b/src/lerobot/processor/hil_processor.py
index f0dbac9c3c..194462569b 100644
--- a/src/lerobot/processor/hil_processor.py
+++ b/src/lerobot/processor/hil_processor.py
@@ -115,7 +115,7 @@ def complementary_data(self, complementary_data: dict) -> dict:
         Returns:
             A new dictionary with the teleoperator action added under the
             `teleop_action` key.
-        """
+        """ #mengke
         new_complementary_data = dict(complementary_data)
         new_complementary_data[TELEOP_ACTION_KEY] = self.teleop_device.get_action()
         return new_complementary_data
@@ -432,9 +432,14 @@ def __call__(self, transition: EnvTransition) -> EnvTransition:
         terminate_episode = info.get(TeleopEvents.TERMINATE_EPISODE, False)
         success = info.get(TeleopEvents.SUCCESS, False)
         rerecord_episode = info.get(TeleopEvents.RERECORD_EPISODE, False)
+        use_policy = info.get(TeleopEvents.USE_POLICY, False)
 
         new_transition = transition.copy()
 
+        if (not use_policy):
+            new_transition[TransitionKey.ACTION] = torch.tensor([0.0, 0.0, 0.0, 1.0], dtype=torch.float32)
+        else:
+            print("use_policy")
         # Override action if intervention is active
         if is_intervention and teleop_action is not None:
             if isinstance(teleop_action, dict):
@@ -594,3 +599,71 @@ def transform_features(
         self, features: dict[PipelineFeatureType, dict[str, PolicyFeature]]
     ) -> dict[PipelineFeatureType, dict[str, PolicyFeature]]:
         return features
+
+
+@ProcessorStepRegistry.register("reward_rule_processor")
+@dataclass
+class RewardRuleProcessorStep(ProcessorStep):
+    """
+    Assigns a binary reward when the end-effector is inside a cylindrical target region.
+
+    The region is defined by a center position (`target_x`, `target_y`, `target_z`),
+    a radial tolerance `radius` on the xy-plane, and a vertical tolerance `z_radius`.
+    If the current end-effector pose satisfies both constraints, the reward is set to 1.0;
+    otherwise it is 0.0.
+    """
+
+    target_x: float
+    target_y: float
+    target_z: float
+    radius: float
+    radius_z: float
+    terminate_on_success: bool
+
+    def __call__(self, transition: EnvTransition) -> EnvTransition:
+        new_transition = transition.copy()
+        observation = new_transition.get(TransitionKey.OBSERVATION) or {}
+
+        if not observation:
+            return new_transition
+
+        def _to_float(value: Any) -> float:
+            if isinstance(value, torch.Tensor):
+                return float(value.squeeze().item())
+            return float(value)
+
+        try:
+            ee_x = _to_float(observation["ee.x"])
+            ee_y = _to_float(observation["ee.y"])
+            ee_z = _to_float(observation["ee.z"])
+        except KeyError:
+            # Required pose components not available; keep reward unchanged.
+            print("RewardRuleProcessorStep failed, no ee pose")
+            return new_transition
+
+        dx = ee_x - self.target_x
+        dy = ee_y - self.target_y
+        within_radius = dx * dx + dy * dy <= self.radius * self.radius
+        within_height = abs(ee_z - self.target_z) <= self.radius_z
+
+
+        reward = new_transition.get(TransitionKey.REWARD, 0.0)
+        terminated = new_transition.get(TransitionKey.DONE, False)
+    
+        if (within_radius and within_height):
+            reward = 1.0
+            if self.terminate_on_success:
+                terminated = True
+        else:
+            reward = 0.0
+
+        new_transition[TransitionKey.REWARD] = reward
+        new_transition[TransitionKey.DONE] = terminated
+        print(f"ee_x: {ee_x}, ee_y: {ee_y}, ee_z: {ee_z}, target_x: {self.target_x}, target_y: {self.target_y}, target_z: {self.target_z}")
+
+        return new_transition
+
+    def transform_features(
+        self, features: dict[PipelineFeatureType, dict[str, PolicyFeature]]
+    ) -> dict[PipelineFeatureType, dict[str, PolicyFeature]]:
+        return features
diff --git a/src/lerobot/processor/pipeline.py b/src/lerobot/processor/pipeline.py
index e14d8b0b9c..a111ff6037 100644
--- a/src/lerobot/processor/pipeline.py
+++ b/src/lerobot/processor/pipeline.py
@@ -1693,8 +1693,11 @@ def __call__(self, transition: EnvTransition) -> EnvTransition:
         complementary_data = new_transition.get(TransitionKey.COMPLEMENTARY_DATA)
         if complementary_data is None or not isinstance(complementary_data, dict):
             raise ValueError("ComplementaryDataProcessorStep requires complementary data in the transition.")
-
+        #print("complementary_data")
+        #print(complementary_data)
         processed_complementary_data = self.complementary_data(complementary_data.copy())
+        #print("processed_complementary_data")
+        #print(processed_complementary_data)
         new_transition[TransitionKey.COMPLEMENTARY_DATA] = processed_complementary_data
         return new_transition
 
diff --git a/src/lerobot/rl/actor.py b/src/lerobot/rl/actor.py
index 54d0fba69f..4ce6c4fc75 100644
--- a/src/lerobot/rl/actor.py
+++ b/src/lerobot/rl/actor.py
@@ -65,8 +65,18 @@
 from lerobot.processor import TransitionKey
 from lerobot.rl.process import ProcessSignalHandler
 from lerobot.rl.queue import get_last_item_from_queue
-from lerobot.robots import so100_follower  # noqa: F401
-from lerobot.teleoperators import gamepad, so101_leader  # noqa: F401
+from lerobot.robots import (  # noqa: F401
+    RobotConfig,
+    make_robot_from_config,
+    so100_follower,
+    so101_follower,
+)
+from lerobot.teleoperators import (
+    gamepad,  # noqa: F401
+    keyboard,  # noqa: F401
+    make_teleoperator_from_config,
+    so101_leader,  # noqa: F401
+)
 from lerobot.teleoperators.utils import TeleopEvents
 from lerobot.transport import services_pb2, services_pb2_grpc
 from lerobot.transport.utils import (
@@ -290,6 +300,8 @@ def act_with_policy(
         with policy_timer:
             # Extract observation from transition for policy
             action = policy.select_action(batch=observation)
+            action[0][3] = 0.0
+            print("policy action:", action)
         policy_fps = policy_timer.fps_last
 
         log_policy_frequency_issue(policy_fps=policy_fps, cfg=cfg, interaction_step=interaction_step)
diff --git a/src/lerobot/rl/buffer.py b/src/lerobot/rl/buffer.py
index 81aa29c480..dbb574bda3 100644
--- a/src/lerobot/rl/buffer.py
+++ b/src/lerobot/rl/buffer.py
@@ -200,6 +200,7 @@ def add(
         """Saves a transition, ensuring tensors are stored on the designated storage device."""
         # Initialize storage if this is the first transition
         if not self.initialized:
+            print("action1: ", action)
             self._initialize_storage(state=state, action=action, complementary_info=complementary_info)
 
         # Store the transition in pre-allocated tensors
@@ -209,7 +210,8 @@ def add(
             if not self.optimize_memory:
                 # Only store next_states if not optimizing memory
                 self.next_states[key][self.position].copy_(next_state[key].squeeze(dim=0))
-
+        # print("action3:", action)
+        # print("action4:", self.actions[self.position].shape)
         self.actions[self.position].copy_(action.squeeze(dim=0))
         self.rewards[self.position] = reward
         self.dones[self.position] = done
@@ -478,7 +480,7 @@ def from_lerobot_dataset(
                 first_complementary_info = {
                     k: v.to(device) for k, v in first_transition["complementary_info"].items()
                 }
-
+            print("action2: ", first_action)
             replay_buffer._initialize_storage(
                 state=first_state, action=first_action, complementary_info=first_complementary_info
             )
diff --git a/src/lerobot/rl/gym_manipulator.py b/src/lerobot/rl/gym_manipulator.py
index f9c9d0d7a7..21bfa77e23 100644
--- a/src/lerobot/rl/gym_manipulator.py
+++ b/src/lerobot/rl/gym_manipulator.py
@@ -14,10 +14,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import json
 import logging
 import time
 from dataclasses import dataclass
 from typing import Any
+from urllib.error import HTTPError, URLError
+from urllib.request import Request, urlopen
 
 import gymnasium as gym
 import numpy as np
@@ -44,6 +47,7 @@
     MotorCurrentProcessorStep,
     Numpy2TorchActionProcessorStep,
     RewardClassifierProcessorStep,
+    RewardRuleProcessorStep,
     RobotActionToPolicyActionProcessorStep,
     TimeLimitProcessorStep,
     Torch2NumpyActionProcessorStep,
@@ -56,6 +60,7 @@
     RobotConfig,
     make_robot_from_config,
     so100_follower,
+    so101_follower,
 )
 from lerobot.robots.robot import Robot
 from lerobot.robots.so100_follower.robot_kinematic_processor import (
@@ -78,6 +83,63 @@
 from lerobot.utils.utils import log_say
 
 logging.basicConfig(level=logging.INFO)
+LOGGER = logging.getLogger(__name__)
+
+
+class RobotKinematicsServiceClient:
+    """HTTP client that mimics the RobotKinematics API."""
+
+    def __init__(self, base_url: str, timeout: float = 5.0) -> None:
+        self._base_url = base_url.rstrip("/")
+        self._timeout = timeout
+
+    def _post(self, endpoint: str, payload: dict[str, Any]) -> dict[str, Any]:
+        """Send a JSON POST request to the configured service."""
+        request = Request(
+            url=f"{self._base_url}{endpoint}",
+            data=json.dumps(payload).encode("utf-8"),
+            headers={"Content-Type": "application/json"},
+        )
+        try:
+            with urlopen(request, timeout=self._timeout) as response:
+                body = response.read().decode("utf-8")
+        except HTTPError as exc:
+            error_body = exc.read().decode("utf-8", errors="ignore")
+            raise RuntimeError(f"HTTP {exc.code} from kinematics service: {error_body}") from exc
+        except URLError as exc:
+            raise RuntimeError(
+                f"Failed to reach kinematics service at {self._base_url}{endpoint}: {exc.reason}"
+            ) from exc
+
+        try:
+            return json.loads(body)
+        except json.JSONDecodeError as exc:
+            raise RuntimeError(f"Invalid JSON response from kinematics service: {body}") from exc
+
+    def forward_kinematics(self, joint_pos_deg: np.ndarray) -> np.ndarray:
+        payload = {"joint_positions": np.asarray(joint_pos_deg, dtype=float).tolist()}
+        response = self._post("/fk", payload)
+        if "pose" not in response:
+            raise RuntimeError("Kinematics service response missing 'pose'.")
+        return np.asarray(response["pose"], dtype=float)
+
+    def inverse_kinematics(
+        self,
+        current_joint_pos: np.ndarray,
+        desired_ee_pose: np.ndarray,
+        position_weight: float = 1.0,
+        orientation_weight: float = 0.01,
+    ) -> np.ndarray:
+        payload = {
+            "current_joint_positions": np.asarray(current_joint_pos, dtype=float).tolist(),
+            "desired_pose": np.asarray(desired_ee_pose, dtype=float).tolist(),
+            "position_weight": position_weight,
+            "orientation_weight": orientation_weight,
+        }
+        response = self._post("/ik", payload)
+        if "joint_positions" not in response:
+            raise RuntimeError("Kinematics service response missing 'joint_positions'.")
+        return np.asarray(response["joint_positions"], dtype=float)
 
 
 @dataclass
@@ -115,6 +177,8 @@ def reset_follower_position(robot_arm: Robot, target_position: np.ndarray) -> No
         action_dict = dict(zip(current_position_dict, pose, strict=False))
         robot_arm.bus.sync_write("Goal_Position", action_dict)
         busy_wait(0.015)
+    current_position_dict = robot_arm.bus.sync_read("Present_Position")
+    print("current_position_dict:", current_position_dict)
 
 
 class RobotEnv(gym.Env):
@@ -234,9 +298,9 @@ def reset(
         # self.robot.reset()
         start_time = time.perf_counter()
         if self.reset_pose is not None:
-            log_say("Reset the environment.", play_sounds=True)
+            print("Reset the environment.")
             reset_follower_position(self.robot, np.array(self.reset_pose))
-            log_say("Reset the environment done.", play_sounds=True)
+            print("Reset the environment done.")
 
         busy_wait(self.reset_time_s - (time.perf_counter() - start_time))
 
@@ -252,7 +316,12 @@ def reset(
     def step(self, action) -> tuple[dict[str, np.ndarray], float, bool, bool, dict[str, Any]]:
         """Execute one environment step with given action."""
         joint_targets_dict = {f"{key}.pos": action[i] for i, key in enumerate(self.robot.bus.motors.keys())}
+        
 
+        print("send action: ", joint_targets_dict)
+        if joint_targets_dict["elbow_flex.pos"] < 3.7037:
+            joint_targets_dict["elbow_flex.pos"] = 3.7037
+            print(joint_targets_dict["elbow_flex.pos"])
         self.robot.send_action(joint_targets_dict)
 
         obs = self._get_observation()
@@ -396,12 +465,19 @@ def make_processors(
 
     # Set up kinematics solver if inverse kinematics is configured
     kinematics_solver = None
-    if cfg.processor.inverse_kinematics is not None:
-        kinematics_solver = RobotKinematics(
-            urdf_path=cfg.processor.inverse_kinematics.urdf_path,
-            target_frame_name=cfg.processor.inverse_kinematics.target_frame_name,
-            joint_names=motor_names,
-        )
+    inverse_cfg = cfg.processor.inverse_kinematics
+    if inverse_cfg is not None:
+        if inverse_cfg.use_service:
+            if not inverse_cfg.service_url:
+                raise ValueError("cfg.processor.inverse_kinematics.service_url must be set when use_service is True.")
+            LOGGER.info("Using RobotKinematics HTTP service at %s", inverse_cfg.service_url)
+            kinematics_solver = RobotKinematicsServiceClient(base_url=inverse_cfg.service_url)
+        else:
+            kinematics_solver = RobotKinematics(
+                urdf_path=inverse_cfg.urdf_path,
+                target_frame_name=inverse_cfg.target_frame_name,
+                joint_names=motor_names,
+            )
 
     env_pipeline_steps = [VanillaObservationProcessorStep()]
 
@@ -455,6 +531,15 @@ def make_processors(
                 terminate_on_success=terminate_on_success,
             )
         )
+    elif (cfg.processor.reward_rule is not None):
+        env_pipeline_steps.append(RewardRuleProcessorStep(
+            target_x=cfg.processor.reward_rule.target_x,
+            target_y=cfg.processor.reward_rule.target_y,
+            target_z=cfg.processor.reward_rule.target_z,
+            radius=cfg.processor.reward_rule.radius,
+            radius_z=cfg.processor.reward_rule.radius_z,
+            terminate_on_success=terminate_on_success,
+        ))
 
     env_pipeline_steps.append(AddBatchDimensionProcessorStep())
     env_pipeline_steps.append(DeviceProcessorStep(device=device))
@@ -642,13 +727,14 @@ def control_loop(
     episode_start_time = time.perf_counter()
 
     while episode_idx < cfg.dataset.num_episodes_to_record:
+        print("episode_idx: ", episode_idx)
         step_start_time = time.perf_counter()
 
         # Create a neutral action (no movement)
         neutral_action = torch.tensor([0.0, 0.0, 0.0], dtype=torch.float32)
         if use_gripper:
             neutral_action = torch.cat([neutral_action, torch.tensor([1.0])])  # Gripper stay
-
+        #print(neutral_action)
         # Use the new step function
         transition = step_env_and_process_transition(
             env=env,
@@ -657,6 +743,7 @@ def control_loop(
             env_processor=env_processor,
             action_processor=action_processor,
         )
+        # print(transition)
         terminated = transition.get(TransitionKey.DONE, False)
         truncated = transition.get(TransitionKey.TRUNCATED, False)
 
@@ -764,6 +851,7 @@ def main(cfg: GymManipulatorConfig) -> None:
         exit()
 
     control_loop(env, env_processor, action_processor, teleop_device, cfg)
+    print("finish")
 
 
 if __name__ == "__main__":
diff --git a/src/lerobot/rl/learner.py b/src/lerobot/rl/learner.py
index d9758d3a35..dbb2acb6ae 100644
--- a/src/lerobot/rl/learner.py
+++ b/src/lerobot/rl/learner.py
@@ -69,8 +69,18 @@
 from lerobot.rl.buffer import ReplayBuffer, concatenate_batch_transitions
 from lerobot.rl.process import ProcessSignalHandler
 from lerobot.rl.wandb_utils import WandBLogger
-from lerobot.robots import so100_follower  # noqa: F401
-from lerobot.teleoperators import gamepad, so101_leader  # noqa: F401
+from lerobot.robots import (  # noqa: F401
+    RobotConfig,
+    make_robot_from_config,
+    so100_follower,
+    so101_follower,
+)
+from lerobot.teleoperators import (
+    gamepad,  # noqa: F401
+    keyboard,  # noqa: F401
+    make_teleoperator_from_config,
+    so101_leader,  # noqa: F401
+)
 from lerobot.teleoperators.utils import TeleopEvents
 from lerobot.transport import services_pb2_grpc
 from lerobot.transport.utils import (
@@ -100,7 +110,7 @@
     init_logging,
 )
 
-from .learner_service import MAX_WORKERS, SHUTDOWN_TIMEOUT, LearnerService
+from lerobot.rl.learner_service import MAX_WORKERS, SHUTDOWN_TIMEOUT, LearnerService
 
 
 @parser.wrap()
@@ -326,7 +336,8 @@ def add_actor_information_and_train(
 
     log_training_info(cfg=cfg, policy=policy)
 
-    replay_buffer = initialize_replay_buffer(cfg, device, storage_device)
+    # replay_buffer = initialize_replay_buffer(cfg, device, storage_device)
+    replay_buffer = initialize_offline_replay_buffer(cfg, device, storage_device)
     batch_size = cfg.batch_size
     offline_replay_buffer = None
 
@@ -378,6 +389,7 @@ def add_actor_information_and_train(
 
         # Wait until the replay buffer has enough samples to start training
         if len(replay_buffer) < online_step_before_learning:
+            #print("continue")
             continue
 
         if online_iterator is None:
diff --git a/src/lerobot/robots/so100_follower/robot_kinematic_processor.py b/src/lerobot/robots/so100_follower/robot_kinematic_processor.py
index 87e832db6e..e2c9bb0d22 100644
--- a/src/lerobot/robots/so100_follower/robot_kinematic_processor.py
+++ b/src/lerobot/robots/so100_follower/robot_kinematic_processor.py
@@ -20,7 +20,7 @@
 import numpy as np
 
 from lerobot.configs.types import FeatureType, PipelineFeatureType, PolicyFeature
-from lerobot.model.kinematics import RobotKinematics
+from lerobot.model.kinematics import KinematicsLike
 from lerobot.processor import (
     EnvTransition,
     ObservationProcessorStep,
@@ -60,7 +60,7 @@ class EEReferenceAndDelta(RobotActionProcessorStep):
         _command_when_disabled: Internal state to hold the last command while disabled.
     """
 
-    kinematics: RobotKinematics
+    kinematics: KinematicsLike
     end_effector_step_sizes: dict
     motor_names: list[str]
     use_latched_reference: bool = (
@@ -226,7 +226,7 @@ def action(self, action: RobotAction) -> RobotAction:
             n = float(np.linalg.norm(dpos))
             if n > self.max_ee_step_m and n > 0:
                 pos = self._last_pos + dpos * (self.max_ee_step_m / n)
-                raise ValueError(f"EE jump {n:.3f}m > {self.max_ee_step_m}m")
+                # raise ValueError(f"EE jump {n:.3f}m > {self.max_ee_step_m}m")
 
         self._last_pos = pos
 
@@ -265,7 +265,7 @@ class InverseKinematicsEEToJoints(RobotActionProcessorStep):
             If False, use the solution from the previous step.
     """
 
-    kinematics: RobotKinematics
+    kinematics: KinematicsLike
     motor_names: list[str]
     q_curr: np.ndarray | None = field(default=None, init=False, repr=False)
     initial_guess_current_joints: bool = True
@@ -401,7 +401,7 @@ def transform_features(
 
 
 def compute_forward_kinematics_joints_to_ee(
-    joints: dict[str, Any], kinematics: RobotKinematics, motor_names: list[str]
+    joints: dict[str, Any], kinematics: KinematicsLike, motor_names: list[str]
 ) -> dict[str, Any]:
     motor_joint_values = [joints[f"{n}.pos"] for n in motor_names]
 
@@ -435,7 +435,7 @@ class ForwardKinematicsJointsToEEObservation(ObservationProcessorStep):
         kinematics: The robot's kinematic model.
     """
 
-    kinematics: RobotKinematics
+    kinematics: KinematicsLike
     motor_names: list[str]
 
     def observation(self, observation: dict[str, Any]) -> dict[str, Any]:
@@ -468,7 +468,7 @@ class ForwardKinematicsJointsToEEAction(RobotActionProcessorStep):
         kinematics: The robot's kinematic model.
     """
 
-    kinematics: RobotKinematics
+    kinematics: KinematicsLike
     motor_names: list[str]
 
     def action(self, action: RobotAction) -> RobotAction:
@@ -491,7 +491,7 @@ def transform_features(
 @ProcessorStepRegistry.register(name="forward_kinematics_joints_to_ee")
 @dataclass
 class ForwardKinematicsJointsToEE(ProcessorStep):
-    kinematics: RobotKinematics
+    kinematics: KinematicsLike
     motor_names: list[str]
 
     def __post_init__(self):
@@ -528,7 +528,7 @@ class InverseKinematicsRLStep(ProcessorStep):
     This is modified from the InverseKinematicsEEToJoints step to be used in the RL pipeline.
     """
 
-    kinematics: RobotKinematics
+    kinematics: KinematicsLike
     motor_names: list[str]
     q_curr: np.ndarray | None = field(default=None, init=False, repr=False)
     initial_guess_current_joints: bool = True
diff --git a/src/lerobot/robots/so101_follower/config_so101_follower.py b/src/lerobot/robots/so101_follower/config_so101_follower.py
index 03c3530c2f..b2a412ab0c 100644
--- a/src/lerobot/robots/so101_follower/config_so101_follower.py
+++ b/src/lerobot/robots/so101_follower/config_so101_follower.py
@@ -39,3 +39,23 @@ class SO101FollowerConfig(RobotConfig):
 
     # Set to `True` for backward compatibility with previous policies/dataset
     use_degrees: bool = False
+
+
+
+     # Default bounds for the end-effector position (in meters)
+    end_effector_bounds: dict[str, list[float]] = field( # bounds for the end-effector in x,y,z direction
+        default_factory=lambda: {
+            "min": [-1.0, -1.0, -1.0],  # min x, y, z
+            "max": [1.0, 1.0, 1.0],  # max x, y, z
+        }
+    )
+
+    max_gripper_pos: float = 50 # maximum gripper position that the gripper will be open at
+
+    end_effector_step_sizes: dict[str, float] = field( # maximum step size for the end-effector in x,y,z direction
+        default_factory=lambda: {
+            "x": 0.02,
+            "y": 0.02,
+            "z": 0.02,
+        }
+    )
diff --git a/src/lerobot/scripts/lerobot_find_joint_limits.py b/src/lerobot/scripts/lerobot_find_joint_limits.py
index 07d57a7608..0ce429ad03 100644
--- a/src/lerobot/scripts/lerobot_find_joint_limits.py
+++ b/src/lerobot/scripts/lerobot_find_joint_limits.py
@@ -42,6 +42,7 @@
     koch_follower,
     make_robot_from_config,
     so100_follower,
+    so101_follower,
 )
 from lerobot.teleoperators import (  # noqa: F401
     TeleoperatorConfig,
@@ -49,6 +50,7 @@
     koch_leader,
     make_teleoperator_from_config,
     so100_leader,
+    so101_leader,
 )
 from lerobot.utils.robot_utils import busy_wait
 
diff --git a/src/lerobot/scripts/lerobot_train.py b/src/lerobot/scripts/lerobot_train.py
index 0cc6e037fd..657f17fc09 100644
--- a/src/lerobot/scripts/lerobot_train.py
+++ b/src/lerobot/scripts/lerobot_train.py
@@ -182,7 +182,7 @@ def train(cfg: TrainPipelineConfig, accelerator: Accelerator | None = None):
     if is_main_process:
         logging.info("Creating dataset")
         dataset = make_dataset(cfg)
-
+    logging.info("Creating dataset Done")
     accelerator.wait_for_everyone()
 
     # Now all other processes can safely load the dataset
diff --git a/src/lerobot/teleoperators/keyboard/teleop_keyboard.py b/src/lerobot/teleoperators/keyboard/teleop_keyboard.py
index 6f53a17c7d..f37097ef43 100644
--- a/src/lerobot/teleoperators/keyboard/teleop_keyboard.py
+++ b/src/lerobot/teleoperators/keyboard/teleop_keyboard.py
@@ -94,6 +94,7 @@ def connect(self) -> None:
                 on_release=self._on_release,
             )
             self.listener.start()
+            print("listener.start()")
         else:
             logging.info("pynput not available - skipping local keyboard listener.")
             self.listener = None
@@ -107,7 +108,7 @@ def _on_press(self, key):
 
     def _on_release(self, key):
         if hasattr(key, "char"):
-            self.event_queue.put((key.char, False))
+            self.event_queue.put((key.char, True))
         if key == keyboard.Key.esc:
             logging.info("ESC pressed, disconnecting.")
             self.disconnect()
@@ -161,6 +162,8 @@ def __init__(self, config: KeyboardEndEffectorTeleopConfig):
         super().__init__(config)
         self.config = config
         self.misc_keys_queue = Queue()
+        self.is_intervention = False
+        self.use_policy = False
 
     @property
     def action_features(self) -> dict:
@@ -191,36 +194,39 @@ def get_action(self) -> dict[str, Any]:
 
         # Generate action based on current key states
         for key, val in self.current_pressed.items():
-            if key == keyboard.Key.up:
+            if key == 'd':
                 delta_y = -int(val)
-            elif key == keyboard.Key.down:
+            elif key == 'a':
                 delta_y = int(val)
-            elif key == keyboard.Key.left:
+            elif key == 'w':
                 delta_x = int(val)
-            elif key == keyboard.Key.right:
+            elif key == 's':
                 delta_x = -int(val)
-            elif key == keyboard.Key.shift:
+            elif key == 'f':
                 delta_z = -int(val)
-            elif key == keyboard.Key.shift_r:
+            elif key == 'g':
                 delta_z = int(val)
-            elif key == keyboard.Key.ctrl_r:
-                # Gripper actions are expected to be between 0 (close), 1 (stay), 2 (open)
-                gripper_action = int(val) + 1
-            elif key == keyboard.Key.ctrl_l:
-                gripper_action = int(val) - 1
             elif val:
                 # If the key is pressed, add it to the misc_keys_queue
                 # this will record key presses that are not part of the delta_x, delta_y, delta_z
                 # this is useful for retrieving other events like interventions for RL, episode success, etc.
                 self.misc_keys_queue.put(key)
 
+        print("self.current_pressed:", self.current_pressed)
         self.current_pressed.clear()
 
+        # Check if any movement keys are currently pressed (indicates intervention)
+        if (delta_x != 0.0 or delta_y != 0.0 or delta_z != 0.0):
+            self.is_intervention = True
+        else:
+            self.is_intervention = False
+
         action_dict = {
             "delta_x": delta_x,
             "delta_y": delta_y,
             "delta_z": delta_z,
         }
+        print("action_dict: ", action_dict)
 
         if self.config.use_gripper:
             action_dict["gripper"] = gripper_action
@@ -251,20 +257,11 @@ def get_teleop_events(self) -> dict[str, Any]:
                 TeleopEvents.TERMINATE_EPISODE: False,
                 TeleopEvents.SUCCESS: False,
                 TeleopEvents.RERECORD_EPISODE: False,
+                TeleopEvents.USE_POLICY: False,
             }
 
         # Check if any movement keys are currently pressed (indicates intervention)
-        movement_keys = [
-            keyboard.Key.up,
-            keyboard.Key.down,
-            keyboard.Key.left,
-            keyboard.Key.right,
-            keyboard.Key.shift,
-            keyboard.Key.shift_r,
-            keyboard.Key.ctrl_r,
-            keyboard.Key.ctrl_l,
-        ]
-        is_intervention = any(self.current_pressed.get(key, False) for key in movement_keys)
+        # is_intervention = any(self.current_pressed.get(key, False) for key in movement_keys)
 
         # Check for episode control commands from misc_keys_queue
         terminate_episode = False
@@ -274,7 +271,7 @@ def get_teleop_events(self) -> dict[str, Any]:
         # Process any pending misc keys
         while not self.misc_keys_queue.empty():
             key = self.misc_keys_queue.get_nowait()
-            if key == "s":
+            if key == "p":
                 success = True
             elif key == "r":
                 terminate_episode = True
@@ -282,10 +279,14 @@ def get_teleop_events(self) -> dict[str, Any]:
             elif key == "q":
                 terminate_episode = True
                 success = False
+            elif key == "t":
+                self.use_policy = not self.use_policy
+                print("use_policy: ", self.use_policy)
 
         return {
-            TeleopEvents.IS_INTERVENTION: is_intervention,
+            TeleopEvents.IS_INTERVENTION: self.is_intervention,
             TeleopEvents.TERMINATE_EPISODE: terminate_episode,
             TeleopEvents.SUCCESS: success,
             TeleopEvents.RERECORD_EPISODE: rerecord_episode,
+            TeleopEvents.USE_POLICY: self.use_policy,
         }
diff --git a/src/lerobot/teleoperators/so101_leader/so101_leader.py b/src/lerobot/teleoperators/so101_leader/so101_leader.py
index be804bf702..b3eb41b073 100644
--- a/src/lerobot/teleoperators/so101_leader/so101_leader.py
+++ b/src/lerobot/teleoperators/so101_leader/so101_leader.py
@@ -23,12 +23,30 @@
     OperatingMode,
 )
 from lerobot.utils.errors import DeviceAlreadyConnectedError, DeviceNotConnectedError
-
+from ..utils import TeleopEvents
 from ..teleoperator import Teleoperator
 from .config_so101_leader import SO101LeaderConfig
 
 logger = logging.getLogger(__name__)
+##hashot
+from queue import Queue
+import os,sys
+from ..utils import TeleopEvents
+PYNPUT_AVAILABLE = True
+try:
+    if ("DISPLAY" not in os.environ) and ("linux" in sys.platform):
+        logging.info("No DISPLAY set. Skipping pynput import.")
+        raise ImportError("pynput blocked intentionally due to no display.")
 
+    from pynput import keyboard
+except ImportError:
+    keyboard = None
+    PYNPUT_AVAILABLE = False
+except Exception as e:
+    keyboard = None
+    PYNPUT_AVAILABLE = False
+    logging.info(f"Could not import pynput: {e}")
+##hashot
 
 class SO101Leader(Teleoperator):
     """
@@ -55,10 +73,55 @@ def __init__(self, config: SO101LeaderConfig):
             calibration=self.calibration,
         )
 
+        self.event_queue = Queue()
+        self.current_pressed = Queue()
+        self.listener = None
+        self.logs = {}
+        self.is_keyboard_connected = False
+
+    def keyboard_connect(self) -> None:
+        if self.is_keyboard_connected:
+            raise DeviceAlreadyConnectedError(
+                "Keyboard is already connected. Do not run `robot.connect()` twice."
+            )
+
+        if PYNPUT_AVAILABLE:
+            logging.info("pynput is available - enabling local keyboard listener.")
+            self.listener = keyboard.Listener(
+                on_press=self._on_press,
+                on_release=self._on_release,
+            )
+            self.listener.start()
+        else:
+            logging.info("pynput not available - skipping local keyboard listener.")
+            self.listener = None
+
+    def _drain_pressed_keys(self):
+        while not self.event_queue.empty():
+            key_char, is_pressed = self.event_queue.get_nowait()
+            if is_pressed:
+                self.current_pressed.put(key_char)
+
+    def _on_press(self, key):
+        if hasattr(key, "char"):
+            self.event_queue.put((key.char, True))
+
+    def _on_release(self, key):
+        if hasattr(key, "char"):
+            self.event_queue.put((key.char, False))
+        if key == keyboard.Key.esc:
+            logging.info("ESC pressed, disconnecting.")
+            self.disconnect()
+
     @property
     def action_features(self) -> dict[str, type]:
-        return {f"{motor}.pos": float for motor in self.bus.motors}
-
+        #return {f"{motor}.pos": float for motor in self.bus.motors}
+        action_names = ["delta_x_ee", "delta_y_ee", "delta_z_ee","gripper_delta"]
+        # if cfg.wrapper.use_gripper:
+        #     action_names.append("gripper_delta")
+        return {            "dtype": "float32",
+            "shape": (len(action_names),),
+            "names": action_names,}
     @property
     def feedback_features(self) -> dict[str, type]:
         return {}
@@ -79,6 +142,7 @@ def connect(self, calibrate: bool = True) -> None:
             self.calibrate()
 
         self.configure()
+        self.keyboard_connect()
         logger.info(f"{self} connected.")
 
     @property
@@ -141,9 +205,73 @@ def get_action(self) -> dict[str, float]:
         action = self.bus.sync_read("Present_Position")
         action = {f"{motor}.pos": val for motor, val in action.items()}
         dt_ms = (time.perf_counter() - start) * 1e3
-        logger.debug(f"{self} read action: {dt_ms:.1f}ms")
+        logger.info(f"{self} read action: {dt_ms:.1f}ms")
         return action
+    from typing import Any
 
+    def get_teleop_events(self) -> dict[str, Any]:
+        """
+        Get extra control events from the keyboard such as intervention status,
+        episode termination, success indicators, etc.
+
+        Keyboard mappings:
+        - Any movement keys pressed = intervention active
+        - 's' key = success (terminate episode successfully)
+        - 'r' key = rerecord episode (terminate and rerecord)
+        - 'q' key = quit episode (terminate without success)
+
+        Returns:
+            Dictionary containing:
+                - is_intervention: bool - Whether human is currently intervening
+                - terminate_episode: bool - Whether to terminate the current episode
+                - success: bool - Whether the episode was successful
+                - rerecord_episode: bool - Whether to rerecord the episode
+        """
+        if not self.is_connected:
+            return {
+                TeleopEvents.IS_INTERVENTION: False,
+                TeleopEvents.TERMINATE_EPISODE: False,
+                TeleopEvents.SUCCESS: False,
+                TeleopEvents.RERECORD_EPISODE: False,
+            }
+
+        # Check if any movement keys are currently pressed (indicates intervention)
+        movement_keys = [
+            keyboard.Key.up,
+            keyboard.Key.down,
+            keyboard.Key.left,
+            keyboard.Key.right,
+            keyboard.Key.shift,
+            keyboard.Key.shift_r,
+            keyboard.Key.ctrl_r,
+            keyboard.Key.ctrl_l,
+        ]
+        is_intervention = True
+
+        self._drain_pressed_keys()
+        # Check for episode control commands from misc_keys_queue
+        terminate_episode = False
+        success = False
+        rerecord_episode = False
+
+        # Process any pending misc keys
+        while not self.current_pressed.empty():
+            key = self.current_pressed.get_nowait()
+            if key == "s":
+                success = True
+            elif key == "r":
+                terminate_episode = True
+                rerecord_episode = True
+            elif key == "q":
+                terminate_episode = True
+                success = False
+
+        return {
+            TeleopEvents.IS_INTERVENTION: is_intervention,
+            TeleopEvents.TERMINATE_EPISODE: terminate_episode,
+            TeleopEvents.SUCCESS: success,
+            TeleopEvents.RERECORD_EPISODE: rerecord_episode,
+        }
     def send_feedback(self, feedback: dict[str, float]) -> None:
         # TODO(rcadene, aliberts): Implement force feedback
         raise NotImplementedError
@@ -154,3 +282,7 @@ def disconnect(self) -> None:
 
         self.bus.disconnect()
         logger.info(f"{self} disconnected.")
+    from typing import Any   
+
+
+    
\ No newline at end of file
diff --git a/src/lerobot/teleoperators/utils.py b/src/lerobot/teleoperators/utils.py
index 2103a1669c..bc8eea85cb 100644
--- a/src/lerobot/teleoperators/utils.py
+++ b/src/lerobot/teleoperators/utils.py
@@ -29,6 +29,7 @@ class TeleopEvents(Enum):
     RERECORD_EPISODE = "rerecord_episode"
     IS_INTERVENTION = "is_intervention"
     TERMINATE_EPISODE = "terminate_episode"
+    USE_POLICY = "use_policy"
 
 
 def make_teleoperator_from_config(config: TeleoperatorConfig) -> Teleoperator:
diff --git a/t_checkport.sh b/t_checkport.sh
new file mode 100644
index 0000000000..0ea1da9cb1
--- /dev/null
+++ b/t_checkport.sh
@@ -0,0 +1 @@
+lerobot-find-port
diff --git a/t_record.sh b/t_record.sh
new file mode 100644
index 0000000000..5f7d19fdb7
--- /dev/null
+++ b/t_record.sh
@@ -0,0 +1 @@
+python -m lerobot.scripts.rl.gym_manipulator --config_path src/lerobot/configs/env_config_so101.json
diff --git a/tty.sh b/tty.sh
new file mode 100644
index 0000000000..c10b828fe8
--- /dev/null
+++ b/tty.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+# List of device strings
+device_list=(
+  "/dev/ttyACM0"
+  "/dev/ttyACM1"
+  "/dev/ttyACM2"
+  "/dev/ttyACM3"
+  "/dev/ttyACM4"
+)
+
+# Loop through each device in the list
+for stringx in "${device_list[@]}"; do
+  # Check for "follower" identifier (5AAF219986)
+  result_follower=$(udevadm info "$stringx" | grep "5AAF219986")
+
+  # Check for "leader" identifier (5AAF218179)
+  result_leader=$(udevadm info "$stringx" | grep "5AAF218179")
+
+  # Determine if it's a follower or leader based on the result
+  if [[ -n "$result_follower" ]]; then
+    echo "$stringx : follower!!!!"
+  elif [[ -n "$result_leader" ]]; then
+    echo "$stringx : leader!!!!"
+
+  fi
+done
\ No newline at end of file
diff --git a/viz.sh b/viz.sh
new file mode 100644
index 0000000000..ee38a96f44
--- /dev/null
+++ b/viz.sh
@@ -0,0 +1 @@
+python3 src/lerobot/scripts/lerobot_dataset_viz.py   --repo-id mengke/test10   --episode-index 0
\ No newline at end of file