diff --git a/.gitignore b/.gitignore index b47e22cbfa..359c0ad9e1 100644 --- a/.gitignore +++ b/.gitignore @@ -27,7 +27,7 @@ node_modules/ poetry.lock uv.lock Pipfile.lock - +miniconda3 ### Build & Distribution ### build/ dist/ diff --git a/cuda.py b/cuda.py new file mode 100644 index 0000000000..ee25da6935 --- /dev/null +++ b/cuda.py @@ -0,0 +1,2 @@ +import torch +print(torch.cuda.is_available()) \ No newline at end of file diff --git a/keyboard_test.py b/keyboard_test.py new file mode 100644 index 0000000000..b0502cea6d --- /dev/null +++ b/keyboard_test.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python3 + +""" +Simple keyboard test utility. + +It collects all keys pressed on stdin and prints the batch every second. +Press Ctrl+C to exit. +""" + +from __future__ import annotations + +import select +import sys +import termios +import time +import tty + + +def _format_keys(keys: list[str]) -> str: + """Return a readable representation of the collected keys.""" + pretty = [] + for key in keys: + if key == "\n": + pretty.append("") + elif key == "\r": + continue # handled with newline on most terminals + elif key == "\t": + pretty.append("") + elif key == "\x7f": + pretty.append("") + elif key.isprintable(): + pretty.append(key) + else: + pretty.append(f"<0x{ord(key):02x}>") + return " ".join(pretty) if pretty else "" + + +def main() -> None: + fd = sys.stdin.fileno() + old_settings = termios.tcgetattr(fd) + + print("Keyboard test started. Press keys (Ctrl+C to quit).") + + try: + tty.setcbreak(fd) + window: list[str] = [] + last_tick = time.monotonic() + + while True: + # poll stdin; timeout keeps loop responsive for the 1s ticker + ready, _, _ = select.select([sys.stdin], [], [], 0.1) + if ready: + key = sys.stdin.read(1) + if key == "\x03": # Ctrl+C + raise KeyboardInterrupt + window.append(key) + + now = time.monotonic() + if now - last_tick >= 1.0: + stamp = time.strftime("%H:%M:%S") + print(f"[{stamp}] { _format_keys(window) }") + window.clear() + last_tick = now + + except KeyboardInterrupt: + print("\nExiting keyboard test.") + finally: + termios.tcsetattr(fd, termios.TCSADRAIN, old_settings) + + +if __name__ == "__main__": + main() diff --git a/rk_client.py b/rk_client.py new file mode 100644 index 0000000000..d053d38b5b --- /dev/null +++ b/rk_client.py @@ -0,0 +1,119 @@ +#!/usr/bin/env python3 +"""Command line client for the RobotKinematics HTTP service.""" + +from __future__ import annotations + +import argparse +import json +import sys +from typing import Any +from urllib.error import HTTPError, URLError +from urllib.request import Request, urlopen + +import numpy as np + + +def _post_json(url: str, payload: dict[str, Any]) -> dict[str, Any]: + data = json.dumps(payload).encode("utf-8") + request = Request(url, data=data, headers={"Content-Type": "application/json"}) + + try: + with urlopen(request) as response: + body = response.read().decode("utf-8") + return json.loads(body) + except HTTPError as exc: + error_body = exc.read().decode("utf-8", errors="ignore") + raise RuntimeError(f"Server returned HTTP {exc.code}: {error_body}") from exc + except URLError as exc: + raise RuntimeError(f"Failed to reach {url}: {exc.reason}") from exc + + +def command_fk(args: argparse.Namespace) -> None: + payload = {"joint_positions": args.joint_positions} + response = _post_json(f"{args.url}/fk", payload) + print(json.dumps(response, indent=2)) + + +def command_ik(args: argparse.Namespace) -> None: + pose_matrix = np.asarray(args.pose, dtype=float).reshape(4, 4).tolist() + payload = { + "current_joint_positions": args.current_joint_positions, + "desired_pose": pose_matrix, + } + if args.position_weight is not None: + payload["position_weight"] = args.position_weight + if args.orientation_weight is not None: + payload["orientation_weight"] = args.orientation_weight + + response = _post_json(f"{args.url}/ik", payload) + print(json.dumps(response, indent=2)) + + +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(description="Client for the RobotKinematics HTTP service.") + parser.add_argument( + "--url", + default="http://127.0.0.1:8081", + help="Base URL for the rk_service (default: http://127.0.0.1:8081)", + ) + + subparsers = parser.add_subparsers(dest="command", required=True) + + fk_parser = subparsers.add_parser("fk", help="Call forward kinematics.") + fk_parser.add_argument( + "--joint-positions", + nargs="+", + type=float, + required=True, + metavar="ANGLE_DEG", + help="Joint positions in degrees.", + ) + fk_parser.set_defaults(func=command_fk) + + ik_parser = subparsers.add_parser("ik", help="Call inverse kinematics.") + ik_parser.add_argument( + "--current-joint-positions", + nargs="+", + type=float, + required=True, + metavar="ANGLE_DEG", + help="Current joint positions in degrees used as the IK seed.", + ) + ik_parser.add_argument( + "--pose", + nargs=16, + type=float, + required=True, + metavar="POSE_VALUE", + help="Desired end-effector 4x4 pose matrix in row-major order (16 values).", + ) + ik_parser.add_argument( + "--position-weight", + type=float, + default=None, + help="Optional position weight for IK.", + ) + ik_parser.add_argument( + "--orientation-weight", + type=float, + default=None, + help="Optional orientation weight for IK.", + ) + ik_parser.set_defaults(func=command_ik) + + return parser + + +def main(argv: list[str] | None = None) -> None: + parser = build_parser() + args = parser.parse_args(argv) + + try: + args.func(args) + except RuntimeError as exc: + parser.error(str(exc)) + + +if __name__ == "__main__": + main(sys.argv[1:]) + diff --git a/rk_service.py b/rk_service.py new file mode 100644 index 0000000000..d220ffe2a7 --- /dev/null +++ b/rk_service.py @@ -0,0 +1,192 @@ +#!/usr/bin/env python3 +"""Simple HTTP service exposing RobotKinematics forward and inverse kinematics. + +Run on a machine that has the `placo` dependency installed so RobotKinematics works: + + python rk_service.py --urdf-path /path/to/robot.urdf --port 8000 + +Endpoints: + POST /fk + Body: {"joint_positions": [deg, ...]} + Response: {"pose": [[...], ...]} + + POST /ik + Body: { + "current_joint_positions": [deg, ...], + "desired_pose": [[...], ...], + "position_weight": 1.0, # optional + "orientation_weight": 0.01 # optional + } + Response: {"joint_positions": [deg, ...]} +""" + +from __future__ import annotations + +import argparse +import json +import logging +from http import HTTPStatus +from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer +from typing import Any, ClassVar + +import numpy as np + +from lerobot.model.kinematics import RobotKinematics + + +LOGGER = logging.getLogger("rk_service") + + +def _json_response(handler: BaseHTTPRequestHandler, status: HTTPStatus, payload: dict[str, Any]) -> None: + """Serialize payload and send it as the HTTP response.""" + body = json.dumps(payload).encode("utf-8") + handler.send_response(status.value) + handler.send_header("Content-Type", "application/json") + handler.send_header("Content-Length", str(len(body))) + handler.end_headers() + handler.wfile.write(body) + + +class KinematicsRequestHandler(BaseHTTPRequestHandler): + """HTTP request handler exposing RobotKinematics operations.""" + + kinematics: ClassVar[RobotKinematics] + suppress_logging: ClassVar[bool] = False + + def log_message(self, format: str, *args: Any) -> None: # noqa: D401 - `BaseHTTPRequestHandler` signature + if not self.suppress_logging: + super().log_message(format, *args) + + def do_GET(self) -> None: # noqa: N802 D401 - `BaseHTTPRequestHandler` signature + if self.path == "/healthz": + _json_response(self, HTTPStatus.OK, {"status": "ok"}) + else: + _json_response(self, HTTPStatus.NOT_FOUND, {"error": "Endpoint not found"}) + + def do_POST(self) -> None: # noqa: N802 D401 - `BaseHTTPRequestHandler` signature + content_length = int(self.headers.get("Content-Length", "0")) + body = self.rfile.read(content_length) if content_length else b"" + + try: + payload = json.loads(body.decode("utf-8")) + except json.JSONDecodeError as exc: + LOGGER.exception("Failed to decode request body as JSON") + _json_response(self, HTTPStatus.BAD_REQUEST, {"error": f"Invalid JSON: {exc}"}) + return + + if self.path == "/fk": + self._handle_forward_kinematics(payload) + elif self.path == "/ik": + self._handle_inverse_kinematics(payload) + else: + _json_response(self, HTTPStatus.NOT_FOUND, {"error": "Endpoint not found"}) + + def _handle_forward_kinematics(self, payload: dict[str, Any]) -> None: + try: + joints = payload["joint_positions"] + joint_array = np.asarray(joints, dtype=float) + except (KeyError, TypeError, ValueError) as exc: + _json_response(self, HTTPStatus.BAD_REQUEST, {"error": f"Invalid joint_positions: {exc}"}) + return + + try: + pose_matrix = self.kinematics.forward_kinematics(joint_array) + except Exception as exc: # placo may raise C++ exceptions that surface as generic Exception + LOGGER.exception("Forward kinematics failed") + _json_response(self, HTTPStatus.INTERNAL_SERVER_ERROR, {"error": f"forward_kinematics failed: {exc}"}) + return + + _json_response(self, HTTPStatus.OK, {"pose": pose_matrix.tolist()}) + + def _handle_inverse_kinematics(self, payload: dict[str, Any]) -> None: + try: + current = np.asarray(payload["current_joint_positions"], dtype=float) + desired_pose = np.asarray(payload["desired_pose"], dtype=float) + except (KeyError, TypeError, ValueError) as exc: + _json_response( + self, + HTTPStatus.BAD_REQUEST, + {"error": f"Invalid current_joint_positions or desired_pose: {exc}"}, + ) + return + + if desired_pose.shape != (4, 4): + _json_response( + self, + HTTPStatus.BAD_REQUEST, + {"error": f"desired_pose must be 4x4 matrix, got shape {desired_pose.shape}"}, + ) + return + + position_weight = float(payload.get("position_weight", 1.0)) + orientation_weight = float(payload.get("orientation_weight", 0.01)) + + try: + joint_solution = self.kinematics.inverse_kinematics( + current_joint_pos=current, + desired_ee_pose=desired_pose, + position_weight=position_weight, + orientation_weight=orientation_weight, + ) + except Exception as exc: # placo errors + LOGGER.exception("Inverse kinematics failed") + _json_response(self, HTTPStatus.INTERNAL_SERVER_ERROR, {"error": f"inverse_kinematics failed: {exc}"}) + return + + _json_response(self, HTTPStatus.OK, {"joint_positions": joint_solution.tolist()}) + + +def build_arg_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(description="Expose RobotKinematics via a simple HTTP service.") + parser.add_argument("--host", default="127.0.0.1", help="Interface to bind the HTTP server (default: 127.0.0.1)") + parser.add_argument("--port", type=int, default=8081, help="Port for the HTTP server (default: 8081)") + parser.add_argument("--urdf-path", default="Simulation/SO101/so101_new_calib.urdf", help="Path to the robot URDF file.") + parser.add_argument( + "--target-frame", + default="gripper_frame_link", + help="End-effector frame name (default: gripper_frame_link)", + ) + parser.add_argument( + "--joint-names", + nargs="+", + default=None, + help="Optional list of joint names; defaults to URDF order when omitted.", + ) + parser.add_argument( + "--suppress-request-logs", + action="store_true", + help="Disable per-request logging emitted by the HTTP handler.", + ) + return parser + + +def main() -> None: + args = build_arg_parser().parse_args() + + logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(name)s: %(message)s") + + LOGGER.info("Loading RobotKinematics with URDF: %s", args.urdf_path) + kinematics = RobotKinematics( + urdf_path=args.urdf_path, + target_frame_name=args.target_frame, + joint_names=args.joint_names, + ) + + KinematicsRequestHandler.kinematics = kinematics + KinematicsRequestHandler.suppress_logging = bool(args.suppress_request_logs) + + server_address = (args.host, args.port) + httpd = ThreadingHTTPServer(server_address, KinematicsRequestHandler) + LOGGER.info("RobotKinematics service listening on http://%s:%d", args.host, args.port) + + try: + httpd.serve_forever() + except KeyboardInterrupt: + LOGGER.info("Shutting down RobotKinematics service.") + finally: + httpd.server_close() + + +if __name__ == "__main__": + main() + diff --git a/run_actor.sh b/run_actor.sh new file mode 100644 index 0000000000..5f1974fef3 --- /dev/null +++ b/run_actor.sh @@ -0,0 +1,5 @@ +export HF_ENDPOINT=https://hf-mirror.com +#export HF_ENDPOINT=/root/.cache/huggingface/lerobot/ + +export HF_TOKEN=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 +python -m lerobot.rl.actor --config_path src/lerobot/configs/train_config_hilserl_so101_rule_reward_actor.json \ No newline at end of file diff --git a/run_docker.sh b/run_docker.sh new file mode 100644 index 0000000000..9237e0af57 --- /dev/null +++ b/run_docker.sh @@ -0,0 +1 @@ +docker run --runtime=nvidia --device=/dev/video3 --device=/dev/video5 --device=/dev/ttyACM3 --device=/dev/ttyACM1 --device=/dev/ttyACM0 -v ~/.cache:/root/.cache -v /tmp/.X11-unix:/tmp/.X11-unix -e DISPLAY=:1 -it --rm -v "$PWD":/workspace -w /workspace --ulimit memlock=-1 --ulimit stack=6700108864 --shm-size 7000000000 mk:lerobot2 diff --git a/run_learner.sh b/run_learner.sh new file mode 100644 index 0000000000..9838cd2da2 --- /dev/null +++ b/run_learner.sh @@ -0,0 +1,3 @@ +export HF_ENDPOINT=https://hf-mirror.com +export HF_TOKEN=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 +python -m lerobot.rl.learner --config_path src/lerobot/configs/train_config_hilserl_so101_rule_reward_learner.json \ No newline at end of file diff --git a/s.sh b/s.sh new file mode 100644 index 0000000000..2445210703 --- /dev/null +++ b/s.sh @@ -0,0 +1,4 @@ +sudo chmod 666 /dev/ttyACM0 +sudo chmod 666 /dev/ttyACM1 +sudo chmod 666 /dev/ttyACM2 +sudo chmod 666 /dev/ttyACM3 \ No newline at end of file diff --git a/src/lerobot/configs/env_config_so101.json b/src/lerobot/configs/env_config_so101.json new file mode 100644 index 0000000000..93ce034470 --- /dev/null +++ b/src/lerobot/configs/env_config_so101.json @@ -0,0 +1,91 @@ +{ + "env": { + "robot": { + "type": "so101_follower", + "port": "/dev/ttyACM2", + "id": "follower2", + "cameras": { + "top": { + "type": "opencv", + "index_or_path": 5, + "height": 480, + "width": 640, + "fps": 30 + }, + "right": { + "type": "opencv", + "index_or_path": 7, + "height": 480, + "width": 640, + "fps": 30 + } + } + }, + "teleop": { + "type": "keyboard_ee" + }, + "processor": { + "control_mode": "keyboard_ee", + "observation": { + "display_cameras": false, + "add_joint_velocity_to_observation": true, + "add_current_to_observation": true + }, + "image_preprocessing": { + "crop_params_dict": { + + "observation.images.top": [0, 136, 360, 360] + }, + "resize_size": [128, 128] + }, + "gripper": { + "use_gripper": true, + "gripper_penalty": -0.02 + }, + "reset": { + "fixed_reset_joint_positions": [ + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "reset_time_s": 2.5, + "control_time_s": 20.0, + "terminate_on_success": true + }, + "inverse_kinematics": { + "urdf_path": "/home/sr/so101/SO-ARM100-main/Simulation/SO101/so101_new_calib.urdf", + "target_frame_name": "gripper_frame_link", + "end_effector_bounds": { + "min": [-1.0, -1.0, -1.0], + "max": [1.0, 1.0, 1.0] + }, + "end_effector_step_sizes": { + "x": 0.02, + "y": 0.02, + "z": 0.02 + } + }, + "reward_classifier": { + "pretrained_path": null, + "success_threshold": 0.5, + "success_reward": 1.0 + }, + "max_gripper_pos": 30 + }, + "name": "real_robot", + "fps": 10 + }, + "dataset": { + "repo_id": "mengke/test11", + "root": null, + "task": "ss", + "num_episodes_to_record": 1, + "replay_episode": null, + "push_to_hub": false + }, + "mode": "record", + "device": "cpu" +} \ No newline at end of file diff --git a/src/lerobot/configs/env_config_so101_eval.json b/src/lerobot/configs/env_config_so101_eval.json new file mode 100644 index 0000000000..b9cc02b004 --- /dev/null +++ b/src/lerobot/configs/env_config_so101_eval.json @@ -0,0 +1,90 @@ +{ + "env": { + "robot": { + "type": "so101_follower", + "port": "/dev/ttyACM2", + "id": "follower2", + "cameras": { + "top": { + "type": "opencv", + "index_or_path": 5, + "height": 480, + "width": 640, + "fps": 15 + }, + "right": { + "type": "opencv", + "index_or_path": 3, + "height": 480, + "width": 640, + "fps": 15 + } + } + }, + "teleop": { + "type": "keyboard_ee" + }, + "processor": { + "control_mode": "keyboard_ee", + "observation": { + "display_cameras": false, + "add_joint_velocity_to_observation": true, + "add_current_to_observation": true + }, + "image_preprocessing": { + "crop_params_dict": { + "observation.images.top": [49, 189, 262, 305] + }, + "resize_size": [128, 128] + }, + "gripper": { + "use_gripper": true, + "gripper_penalty": -0.02 + }, + "reset": { + "fixed_reset_joint_positions": [ + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "reset_time_s": 2.5, + "control_time_s": 20.0, + "terminate_on_success": true + }, + "inverse_kinematics": { + "urdf_path": "/home/sr/so101/SO-ARM100-main/Simulation/SO101/so101_new_calib.urdf", + "target_frame_name": "gripper_frame_link", + "end_effector_bounds": { + "min": [-1.0, -1.0, -1.0], + "max": [1.0, 1.0, 1.0] + }, + "end_effector_step_sizes": { + "x": 0.02, + "y": 0.02, + "z": 0.02 + } + }, + "reward_classifier": { + "pretrained_path": "outputs/train/2025-10-30/05-49-53_reward-classifier/checkpoints/last/pretrained_model", + "success_threshold": 0.5, + "success_reward": 1.0 + }, + "max_gripper_pos": 30 + }, + "name": "real_robot", + "fps": 10 + }, + "dataset": { + "repo_id": null, + "root": null, + "task": "eval", + "num_episodes_to_record": 10, + "replay_episode": null, + "push_to_hub": false + }, + "mode": null, + "device": "cpu" +} \ No newline at end of file diff --git a/src/lerobot/configs/env_config_so101_rule_reward.json b/src/lerobot/configs/env_config_so101_rule_reward.json new file mode 100644 index 0000000000..cb9f4e13a7 --- /dev/null +++ b/src/lerobot/configs/env_config_so101_rule_reward.json @@ -0,0 +1,93 @@ +{ + "env": { + "robot": { + "type": "so101_follower", + "port": "/dev/ttyACM2", + "id": "follower2", + "cameras": { + "top": { + "type": "opencv", + "index_or_path": 5, + "height": 480, + "width": 640, + "fps": 30 + }, + "right": { + "type": "opencv", + "index_or_path": 7, + "height": 480, + "width": 640, + "fps": 30 + } + } + }, + "teleop": { + "type": "keyboard_ee" + }, + "processor": { + "control_mode": "keyboard_ee", + "observation": { + "display_cameras": false, + "add_joint_velocity_to_observation": true, + "add_current_to_observation": true + }, + "image_preprocessing": { + "crop_params_dict": { + + "observation.images.top": [0, 136, 360, 360] + }, + "resize_size": [128, 128] + }, + "gripper": { + "use_gripper": true, + "gripper_penalty": -0.02 + }, + "reset": { + "fixed_reset_joint_positions": [ + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "reset_time_s": 2.5, + "control_time_s": 20.0, + "terminate_on_success": true + }, + "inverse_kinematics": { + "urdf_path": "/home/sr/so101/SO-ARM100-main/Simulation/SO101/so101_new_calib.urdf", + "target_frame_name": "gripper_frame_link", + "end_effector_bounds": { + "min": [-1.0, -1.0, -1.0], + "max": [1.0, 1.0, 1.0] + }, + "end_effector_step_sizes": { + "x": 0.02, + "y": 0.02, + "z": 0.02 + } + }, + "reward_rule": { + "target_x": 0.26157, + "target_y": -0.1039, + "target_z": 0.060122, + "radius": 0.05, + "radius_z": 0.05 + }, + "max_gripper_pos": 30 + }, + "name": "real_robot", + "fps": 10 + }, + "dataset": { + "repo_id": "mengke/test11", + "root": "/home/sr/so101/lerobot/dataset/test7", + "task": "ss", + "num_episodes_to_record": 15, + "replay_episode": null, + "push_to_hub": false + }, + "mode": "record", + "device": "cpu" +} \ No newline at end of file diff --git a/src/lerobot/configs/train_config_hilserl_so101.json b/src/lerobot/configs/train_config_hilserl_so101.json new file mode 100644 index 0000000000..d4638a970e --- /dev/null +++ b/src/lerobot/configs/train_config_hilserl_so101.json @@ -0,0 +1,275 @@ +{ + "output_dir": null, + "job_name": "default", + "resume": false, + "seed": 1000, + "num_workers": 4, + "batch_size": 256, + "steps": 100000, + "log_freq": 1, + "save_checkpoint": true, + "save_freq": 1000, + "wandb": { + "enable": true, + "project": "so101_real", + "disable_artifact": true + }, + "dataset": { + "repo_id": "test6", + "root": "/workspace/dataset/test6" + }, + "policy": { + "type": "sac", + "n_obs_steps": 1, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ENV": "MIN_MAX", + "ACTION": "MIN_MAX" + }, + "input_features": { + "observation.images.top": { + "type": "VISUAL", + "shape": [ + 3, + 128, + 128 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 18 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 3 + ] + } + }, + "device": "cuda", + "use_amp": false, + "dataset_stats": { + "observation.images.top": { + "mean": [ + 0.485, + 0.456, + 0.406 + ], + "std": [ + 0.229, + 0.224, + 0.225 + ] + }, + "observation.state": { + "min": [ + -15.0, + -20.0, + 10.0, + 65.0, + 0.0, + 0.0, + -40.0, + -60.0, + -50.0, + -80.0, + -10.0, + -110.0, + 0.26, + -0.06, + 0.25 + ], + "max": [ + 15.0, + 15.0, + 40.0, + 110.0, + 0.5, + 31.0, + 53.0, + 61.0, + 67.0, + 90.0, + 6.0, + 100.0, + 0.32, + 0.06, + 0.35 + ] + }, + "action": { + "min": [ + -1.0, + -1.0, + -1.0 + ], + "max": [ + 1.0, + 1.0, + 1.0 + ] + } + }, + "num_discrete_actions": 3, + "storage_device": "cuda", + "vision_encoder_name": "helper2424/resnet10", + "freeze_vision_encoder": true, + "image_encoder_hidden_dim": 32, + "shared_encoder": true, + "online_steps": 1000000, + "online_buffer_capacity": 30000, + "offline_buffer_capacity": 10000, + "online_step_before_learning": 10, + "policy_update_freq": 1, + "discount": 0.97, + "async_prefetch": false, + "temperature_init": 0.01, + "num_critics": 2, + "num_subsample_critics": null, + "critic_lr": 0.0003, + "actor_lr": 0.0003, + "temperature_lr": 0.0003, + "critic_target_update_weight": 0.005, + "utd_ratio": 2, + "state_encoder_hidden_dim": 256, + "latent_dim": 256, + "target_entropy": null, + "use_backup_entropy": true, + "grad_clip_norm": 40.0, + "critic_network_kwargs": { + "hidden_dims": [ + 256, + 256 + ], + "activate_final": true, + "final_activation": null + }, + "actor_network_kwargs": { + "hidden_dims": [ + 256, + 256 + ], + "activate_final": true + }, + "policy_kwargs": { + "use_tanh_squash": true, + "std_min": -5, + "std_max": 2, + "init_final": 0.05 + }, + "actor_learner_config": { + "learner_host": "127.0.0.1", + "learner_port": 50051, + "policy_parameters_push_frequency": 40 + }, + "concurrency": { + "actor": "threads", + "learner": "threads" + }, + "repo_id": "aractingi/pick_lift_cube_policy" + }, + "env": { + "type": "gym_manipulator", + "robot": { + "type": "so101_follower", + "port": "/dev/ttyACM2", + "id": "follower2", + "cameras": { + "top": { + "type": "opencv", + "index_or_path": 5, + "height": 480, + "width": 640, + "fps": 15 + } + } + }, + "teleop": { + "type": "keyboard_ee" + }, + "processor": { + "control_mode": "keyboard_ee", + "observation": { + "display_cameras": false, + "add_joint_velocity_to_observation": true, + "add_current_to_observation": true + }, + "image_preprocessing": { + "crop_params_dict": { + "observation.images.top": [49, 189, 262, 305] + }, + "resize_size": [128, 128] + }, + "gripper": { + "use_gripper": true, + "gripper_penalty": -0.02 + }, + "reset": { + "fixed_reset_joint_positions": [ + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "reset_time_s": 2.5, + "control_time_s": 20.0, + "terminate_on_success": true + }, + "inverse_kinematics": { + "use_service": true, + "service_url": "http://127.0.0.1:8081", + "urdf_path": "Simulation/SO101/so101_new_calib.urdf", + "target_frame_name": "gripper_frame_link", + "end_effector_bounds": { + "min": [-1.0, -1.0, -1.0], + "max": [1.0, 1.0, 1.0] + }, + "end_effector_step_sizes": { + "x": 0.02, + "y": 0.02, + "z": 0.02 + } + }, + "reward_classifier": { + "pretrained_path": null, + "success_threshold": 0.5, + "success_reward": 1.0 + }, + "max_gripper_pos": 30 + }, + "name": "real_robot", + "fps": 1, + + "task": "", + "features": { + "observation.images.top": { + "type": "VISUAL", + "shape": [ + 3, + 128, + 128 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 18 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 3 + ] + } + }, + "features_map": {"observation.images.top": "observation.images.top", "observation.state": "observation.state","action":"action"} + } +} \ No newline at end of file diff --git a/src/lerobot/configs/train_config_hilserl_so101.json_bk b/src/lerobot/configs/train_config_hilserl_so101.json_bk new file mode 100644 index 0000000000..39f5b9b4f6 --- /dev/null +++ b/src/lerobot/configs/train_config_hilserl_so101.json_bk @@ -0,0 +1,329 @@ +{ + "output_dir": null, + "job_name": "default", + "resume": false, + "seed": 1000, + "num_workers": 4, + "batch_size": 256, + "steps": 100000, + "log_freq": 500, + "save_checkpoint": true, + "save_freq": 2000000, + "wandb": { + "enable": true, + "project": "so100_real", + "disable_artifact": true + }, + "dataset": { + "repo_id": "hf_username/dataset_name", + "use_imagenet_stats": false + }, + "policy": { + "type": "sac", + "n_obs_steps": 1, + "repo_id": "hf_username/policy_name", + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ENV": "MIN_MAX", + "ACTION": "MIN_MAX" + }, + "input_features": { + "observation.images.side": { + "type": "VISUAL", + "shape": [ + 3, + 128, + 128 + ] + }, + "observation.images.front": { + "type": "VISUAL", + "shape": [ + 3, + 128, + 128 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 15 + ] + } + }, + "device": "cuda", + "use_amp": false, + "dataset_stats": { + "observation.images.side": { + "mean": [ + 0.485, + 0.456, + 0.406 + ], + "std": [ + 0.229, + 0.224, + 0.225 + ] + }, + "observation.images.front": { + "mean": [ + 0.485, + 0.456, + 0.406 + ], + "std": [ + 0.229, + 0.224, + 0.225 + ] + }, + "observation.state": { + "min": [ + -15.0, + -20.0, + 10.0, + 65.0, + 0.0, + 0.0, + -40.0, + -60.0, + -50.0, + -80.0, + -10.0, + -110.0, + -15.0, + -20.0, + 15.0, + 70.0, + -5.0, + 0.0, + 0.26, + -0.06, + 0.25 + ], + "max": [ + 15.0, + 15.0, + 40.0, + 110.0, + 0.5, + 31.0, + 53.0, + 61.0, + 67.0, + 90.0, + 6.0, + 100.0, + 15.0, + 12.0, + 40.0, + 110.0, + 0.5, + 31.0, + 0.32, + 0.06, + 0.35 + ] + }, + "action": { + "min": [ + -0.03, + -0.03, + -0.03 + ], + "max": [ + 0.03, + 0.03, + 0.03 + ] + } + }, + "num_discrete_actions": 3, + "storage_device": "cuda", + "vision_encoder_name": "helper2424/resnet10", + "freeze_vision_encoder": true, + "image_encoder_hidden_dim": 32, + "shared_encoder": true, + "online_steps": 1000000, + "online_env_seed": 10000, + "online_buffer_capacity": 30000, + "offline_buffer_capacity": 10000, + "online_step_before_learning": 100, + "policy_update_freq": 1, + "discount": 0.97, + "async_prefetch": false, + "temperature_init": 0.01, + "num_critics": 2, + "num_subsample_critics": null, + "critic_lr": 0.0003, + "actor_lr": 0.0003, + "temperature_lr": 0.0003, + "critic_target_update_weight": 0.005, + "utd_ratio": 2, + "state_encoder_hidden_dim": 256, + "latent_dim": 256, + "target_entropy": null, + "use_backup_entropy": true, + "grad_clip_norm": 40.0, + "critic_network_kwargs": { + "hidden_dims": [ + 256, + 256 + ], + "activate_final": true, + "final_activation": null + }, + "actor_network_kwargs": { + "hidden_dims": [ + 256, + 256 + ], + "activate_final": true + }, + "policy_kwargs": { + "use_tanh_squash": true, + "std_min": -5, + "std_max": 2, + "init_final": 0.05 + }, + "actor_learner_config": { + "learner_host": "127.0.0.1", + "learner_port": 50051, + "policy_parameters_push_frequency": 4 + }, + "concurrency": { + "actor": "threads", + "learner": "threads" + } + }, + "env": { + "type": "gym_manipulator", + "robot": { + "type": "so100_follower_end_effector", + "port": "/dev/ttyACM0", + "urdf_path": "path/to/your/robot.urdf", + "target_frame_name": "gripper_frame_link", + "cameras": { + "front": { + "type": "opencv", + "index_or_path": 2, + "height": 480, + "width": 640, + "fps": 30 + }, + "wrist": { + "type": "opencv", + "index_or_path": 8, + "height": 480, + "width": 640, + "fps": 30 + } + }, + "end_effector_bounds": { + "min": [ + -1.0, + -1.0, + -1.0 + ], + "max": [ + 1.0, + 1.0, + 1.0 + ] + }, + "max_gripper_pos": 30 + }, + "teleop": { + "type": "gamepad", + "use_gripper": true + }, + "wrapper": { + "display_cameras": false, + "add_joint_velocity_to_observation": true, + "add_current_to_observation": true, + "add_ee_pose_to_observation": true, + "crop_params_dict": { + "observation.images.front": [ + 270, + 170, + 90, + 190 + ], + "observation.images.wrist": [ + 0, + 0, + 480, + 640 + ] + }, + "resize_size": [ + 128, + 128 + ], + "control_time_s": 20.0, + "use_gripper": true, + "gripper_quantization_threshold": null, + "gripper_penalty": -0.02, + "gripper_penalty_in_reward": false, + "fixed_reset_joint_positions": [ + 0.0, + -20.0, + 20.0, + 90.0, + 0.0, + 30.0 + ], + "reset_time_s": 2.5, + "control_mode": "gamepad" + }, + "name": "real_robot", + "mode": null, + "repo_id": null, + "dataset_root": null, + "task": "", + "num_episodes": 0, + "episode": 0, + "pretrained_policy_name_or_path": null, + "device": "cuda", + "push_to_hub": true, + "fps": 10, + "features": { + "observation.images.front": { + "type": "VISUAL", + "shape": [ + 3, + 128, + 128 + ] + }, + "observation.images.wrist": { + "type": "VISUAL", + "shape": [ + 3, + 128, + 128 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 15 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 3 + ] + } + }, + "features_map": { + "observation.images.front": "observation.images.side", + "observation.images.wrist": "observation.images.wrist", + "observation.state": "observation.state", + "action": "action" + } + } +} \ No newline at end of file diff --git a/src/lerobot/configs/train_config_hilserl_so101_rule_reward_actor.json b/src/lerobot/configs/train_config_hilserl_so101_rule_reward_actor.json new file mode 100644 index 0000000000..fabf8cbec4 --- /dev/null +++ b/src/lerobot/configs/train_config_hilserl_so101_rule_reward_actor.json @@ -0,0 +1,277 @@ +{ + "output_dir": "output/test2/actor", + "job_name": "default", + "resume": false, + "seed": 1000, + "num_workers": 4, + "batch_size": 256, + "steps": 100000, + "log_freq": 10, + "save_checkpoint": true, + "save_freq": 1000, + "wandb": { + "enable": true, + "project": "so101_real", + "disable_artifact": true + }, + "dataset": { + "repo_id": "test6", + "root": "/workspace/dataset/test7" + }, + "policy": { + "type": "sac", + "n_obs_steps": 1, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ENV": "MIN_MAX", + "ACTION": "MIN_MAX" + }, + "input_features": { + "observation.images.top": { + "type": "VISUAL", + "shape": [ + 3, + 128, + 128 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 18 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 3 + ] + } + }, + "device": "cuda", + "use_amp": false, + "dataset_stats": { + "observation.images.top": { + "mean": [ + 0.485, + 0.456, + 0.406 + ], + "std": [ + 0.229, + 0.224, + 0.225 + ] + }, + "observation.state": { + "min": [ + -15.0, + -20.0, + 10.0, + 65.0, + 0.0, + 0.0, + -40.0, + -60.0, + -50.0, + -80.0, + -10.0, + -110.0, + 0.26, + -0.06, + 0.25 + ], + "max": [ + 15.0, + 15.0, + 40.0, + 110.0, + 0.5, + 31.0, + 53.0, + 61.0, + 67.0, + 90.0, + 6.0, + 100.0, + 0.32, + 0.06, + 0.35 + ] + }, + "action": { + "min": [ + -1.0, + -1.0, + -1.0 + ], + "max": [ + 1.0, + 1.0, + 1.0 + ] + } + }, + "num_discrete_actions": 3, + "storage_device": "cuda", + "vision_encoder_name": "helper2424/resnet10", + "freeze_vision_encoder": true, + "image_encoder_hidden_dim": 32, + "shared_encoder": true, + "online_steps": 1000000, + "online_buffer_capacity": 30000, + "offline_buffer_capacity": 10000, + "online_step_before_learning": 10, + "policy_update_freq": 10, + "discount": 0.97, + "async_prefetch": false, + "temperature_init": 0.01, + "num_critics": 2, + "num_subsample_critics": null, + "critic_lr": 0.0003, + "actor_lr": 0.0003, + "temperature_lr": 0.0003, + "critic_target_update_weight": 0.005, + "utd_ratio": 2, + "state_encoder_hidden_dim": 256, + "latent_dim": 256, + "target_entropy": null, + "use_backup_entropy": true, + "grad_clip_norm": 40.0, + "critic_network_kwargs": { + "hidden_dims": [ + 256, + 256 + ], + "activate_final": true, + "final_activation": null + }, + "actor_network_kwargs": { + "hidden_dims": [ + 256, + 256 + ], + "activate_final": true + }, + "policy_kwargs": { + "use_tanh_squash": true, + "std_min": -5, + "std_max": 2, + "init_final": 0.05 + }, + "actor_learner_config": { + "learner_host": "127.0.0.1", + "learner_port": 50051, + "policy_parameters_push_frequency": 40 + }, + "concurrency": { + "actor": "threads", + "learner": "threads" + }, + "repo_id": "aractingi/pick_lift_cube_policy" + }, + "env": { + "type": "gym_manipulator", + "robot": { + "type": "so101_follower", + "port": "/dev/ttyACM2", + "id": "follower2", + "cameras": { + "top": { + "type": "opencv", + "index_or_path": 5, + "height": 480, + "width": 640, + "fps": 15 + } + } + }, + "teleop": { + "type": "keyboard_ee" + }, + "processor": { + "control_mode": "keyboard_ee", + "observation": { + "display_cameras": false, + "add_joint_velocity_to_observation": true, + "add_current_to_observation": true + }, + "image_preprocessing": { + "crop_params_dict": { + "observation.images.top": [49, 189, 262, 305] + }, + "resize_size": [128, 128] + }, + "gripper": { + "use_gripper": true, + "gripper_penalty": -0.02 + }, + "reset": { + "fixed_reset_joint_positions": [ + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "reset_time_s": 2.5, + "control_time_s": 20.0, + "terminate_on_success": true + }, + "inverse_kinematics": { + "use_service": true, + "service_url": "http://127.0.0.1:8081", + "urdf_path": "Simulation/SO101/so101_new_calib.urdf", + "target_frame_name": "gripper_frame_link", + "end_effector_bounds": { + "min": [-1.0, -1.0, -1.0], + "max": [1.0, 1.0, 1.0] + }, + "end_effector_step_sizes": { + "x": 0.02, + "y": 0.02, + "z": 0.02 + } + }, + "reward_rule": { + "target_x": 0.26157, + "target_y": -0.1039, + "target_z": 0.060122, + "radius": 0.06, + "radius_z": 0.08 + }, + "max_gripper_pos": 30 + }, + "name": "real_robot", + "fps": 5, + + "task": "", + "features": { + "observation.images.top": { + "type": "VISUAL", + "shape": [ + 3, + 128, + 128 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 18 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 3 + ] + } + }, + "features_map": {"observation.images.top": "observation.images.top", "observation.state": "observation.state","action":"action"} + } +} \ No newline at end of file diff --git a/src/lerobot/configs/train_config_hilserl_so101_rule_reward_learner.json b/src/lerobot/configs/train_config_hilserl_so101_rule_reward_learner.json new file mode 100644 index 0000000000..8964f95556 --- /dev/null +++ b/src/lerobot/configs/train_config_hilserl_so101_rule_reward_learner.json @@ -0,0 +1,277 @@ +{ + "output_dir": "output/test2/learner", + "job_name": "default", + "resume": false, + "seed": 1000, + "num_workers": 4, + "batch_size": 256, + "steps": 100000, + "log_freq": 100, + "save_checkpoint": true, + "save_freq": 500, + "wandb": { + "enable": true, + "project": "so101_real", + "disable_artifact": true + }, + "dataset": { + "repo_id": "test6", + "root": "/workspace/dataset/test7" + }, + "policy": { + "type": "sac", + "n_obs_steps": 1, + "normalization_mapping": { + "VISUAL": "MEAN_STD", + "STATE": "MIN_MAX", + "ENV": "MIN_MAX", + "ACTION": "MIN_MAX" + }, + "input_features": { + "observation.images.top": { + "type": "VISUAL", + "shape": [ + 3, + 128, + 128 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 18 + ] + } + }, + "output_features": { + "action": { + "type": "ACTION", + "shape": [ + 3 + ] + } + }, + "device": "cuda", + "use_amp": false, + "dataset_stats": { + "observation.images.top": { + "mean": [ + 0.485, + 0.456, + 0.406 + ], + "std": [ + 0.229, + 0.224, + 0.225 + ] + }, + "observation.state": { + "min": [ + -15.0, + -20.0, + 10.0, + 65.0, + 0.0, + 0.0, + -40.0, + -60.0, + -50.0, + -80.0, + -10.0, + -110.0, + 0.26, + -0.06, + 0.25 + ], + "max": [ + 15.0, + 15.0, + 40.0, + 110.0, + 0.5, + 31.0, + 53.0, + 61.0, + 67.0, + 90.0, + 6.0, + 100.0, + 0.32, + 0.06, + 0.35 + ] + }, + "action": { + "min": [ + -1.0, + -1.0, + -1.0 + ], + "max": [ + 1.0, + 1.0, + 1.0 + ] + } + }, + "num_discrete_actions": 3, + "storage_device": "cuda", + "vision_encoder_name": "helper2424/resnet10", + "freeze_vision_encoder": true, + "image_encoder_hidden_dim": 32, + "shared_encoder": true, + "online_steps": 1000000, + "online_buffer_capacity": 30000, + "offline_buffer_capacity": 10000, + "online_step_before_learning": 10, + "policy_update_freq": 10, + "discount": 0.97, + "async_prefetch": false, + "temperature_init": 0.01, + "num_critics": 2, + "num_subsample_critics": null, + "critic_lr": 0.0003, + "actor_lr": 0.0003, + "temperature_lr": 0.0003, + "critic_target_update_weight": 0.005, + "utd_ratio": 2, + "state_encoder_hidden_dim": 256, + "latent_dim": 256, + "target_entropy": null, + "use_backup_entropy": true, + "grad_clip_norm": 40.0, + "critic_network_kwargs": { + "hidden_dims": [ + 256, + 256 + ], + "activate_final": true, + "final_activation": null + }, + "actor_network_kwargs": { + "hidden_dims": [ + 256, + 256 + ], + "activate_final": true + }, + "policy_kwargs": { + "use_tanh_squash": true, + "std_min": -5, + "std_max": 2, + "init_final": 0.05 + }, + "actor_learner_config": { + "learner_host": "127.0.0.1", + "learner_port": 50051, + "policy_parameters_push_frequency": 40 + }, + "concurrency": { + "actor": "threads", + "learner": "threads" + }, + "repo_id": "aractingi/pick_lift_cube_policy" + }, + "env": { + "type": "gym_manipulator", + "robot": { + "type": "so101_follower", + "port": "/dev/ttyACM2", + "id": "follower2", + "cameras": { + "top": { + "type": "opencv", + "index_or_path": 5, + "height": 480, + "width": 640, + "fps": 15 + } + } + }, + "teleop": { + "type": "keyboard_ee" + }, + "processor": { + "control_mode": "keyboard_ee", + "observation": { + "display_cameras": false, + "add_joint_velocity_to_observation": true, + "add_current_to_observation": true + }, + "image_preprocessing": { + "crop_params_dict": { + "observation.images.top": [49, 189, 262, 305] + }, + "resize_size": [128, 128] + }, + "gripper": { + "use_gripper": true, + "gripper_penalty": -0.02 + }, + "reset": { + "fixed_reset_joint_positions": [ + 0.0, + 0.0, + 0.0, + 0.0, + 0.0, + 0.0 + ], + "reset_time_s": 2.5, + "control_time_s": 20.0, + "terminate_on_success": true + }, + "inverse_kinematics": { + "use_service": true, + "service_url": "http://127.0.0.1:8081", + "urdf_path": "Simulation/SO101/so101_new_calib.urdf", + "target_frame_name": "gripper_frame_link", + "end_effector_bounds": { + "min": [-1.0, -1.0, -1.0], + "max": [1.0, 1.0, 1.0] + }, + "end_effector_step_sizes": { + "x": 0.02, + "y": 0.02, + "z": 0.02 + } + }, + "reward_rule": { + "target_x": 0.26157, + "target_y": -0.1039, + "target_z": 0.060122, + "radius": 0.06, + "radius_z": 0.08 + }, + "max_gripper_pos": 30 + }, + "name": "real_robot", + "fps": 5, + + "task": "", + "features": { + "observation.images.top": { + "type": "VISUAL", + "shape": [ + 3, + 128, + 128 + ] + }, + "observation.state": { + "type": "STATE", + "shape": [ + 18 + ] + }, + "action": { + "type": "ACTION", + "shape": [ + 3 + ] + } + }, + "features_map": {"observation.images.top": "observation.images.top", "observation.state": "observation.state","action":"action"} + } +} \ No newline at end of file diff --git a/src/lerobot/datasets/utils.py b/src/lerobot/datasets/utils.py index 37d8432b2b..24fae2065f 100644 --- a/src/lerobot/datasets/utils.py +++ b/src/lerobot/datasets/utils.py @@ -712,8 +712,8 @@ def dataset_to_policy_features(features: dict[str, dict]) -> dict[str, PolicyFea shape = (shape[2], shape[0], shape[1]) elif key == OBS_ENV_STATE: type = FeatureType.ENV - elif key.startswith(OBS_STR): - type = FeatureType.STATE + # elif key.startswith(OBS_STR): + # type = FeatureType.STATE elif key.startswith(ACTION): type = FeatureType.ACTION else: diff --git a/src/lerobot/envs/configs.py b/src/lerobot/envs/configs.py index dc526114dc..bd66af709c 100644 --- a/src/lerobot/envs/configs.py +++ b/src/lerobot/envs/configs.py @@ -157,11 +157,22 @@ class RewardClassifierConfig: success_threshold: float = 0.5 success_reward: float = 1.0 +@dataclass +class RewardRuleConfig: + """Configuration for reward rule.""" + + target_x: float = 0.0 + target_y: float = 0.0 + target_z: float = 0.0 + radius: float = 0.01 + radius_z: float = 0.01 @dataclass class InverseKinematicsConfig: """Configuration for inverse kinematics processing.""" + use_service: bool = False + service_url: str | None = None urdf_path: str | None = None target_frame_name: str | None = None end_effector_bounds: dict[str, list[float]] | None = None @@ -206,6 +217,7 @@ class HILSerlProcessorConfig: reset: ResetConfig | None = None inverse_kinematics: InverseKinematicsConfig | None = None reward_classifier: RewardClassifierConfig | None = None + reward_rule: RewardRuleConfig | None = None max_gripper_pos: float | None = 100.0 diff --git a/src/lerobot/model/kinematics.py b/src/lerobot/model/kinematics.py index 95d3b235c6..39f35fa8ba 100644 --- a/src/lerobot/model/kinematics.py +++ b/src/lerobot/model/kinematics.py @@ -12,9 +12,25 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Protocol + import numpy as np +class KinematicsLike(Protocol): + """Protocol describing the required kinematics interface.""" + + def forward_kinematics(self, joint_pos_deg: np.ndarray) -> np.ndarray: ... + + def inverse_kinematics( + self, + current_joint_pos: np.ndarray, + desired_ee_pose: np.ndarray, + position_weight: float = 1.0, + orientation_weight: float = 0.01, + ) -> np.ndarray: ... + + class RobotKinematics: """Robot kinematics using placo library for forward and inverse kinematics.""" diff --git a/src/lerobot/policies/sac/reward_model/modeling_classifier.py b/src/lerobot/policies/sac/reward_model/modeling_classifier.py index dba6a174b4..f56f4903bd 100644 --- a/src/lerobot/policies/sac/reward_model/modeling_classifier.py +++ b/src/lerobot/policies/sac/reward_model/modeling_classifier.py @@ -20,6 +20,7 @@ from torch import Tensor, nn from lerobot.policies.pretrained import PreTrainedPolicy + from lerobot.policies.sac.reward_model.configuration_classifier import RewardClassifierConfig from lerobot.utils.constants import OBS_IMAGE, REWARD @@ -144,6 +145,14 @@ def __init__( encoder = self._create_single_encoder() self.encoders[image_key] = encoder + # self.normalize_inputs = Normalize(config.input_features, config.normalization_mapping, dataset_stats) + # self.normalize_targets = Normalize( + # config.output_features, config.normalization_mapping, dataset_stats + # ) + # self.unnormalize_outputs = Unnormalize( + # config.output_features, config.normalization_mapping, dataset_stats + # ) + self._build_classifier_head() def _setup_cnn_backbone(self): @@ -269,15 +278,15 @@ def forward(self, batch: dict[str, Tensor]) -> tuple[Tensor, dict[str, Tensor]]: def predict_reward(self, batch, threshold=0.5): """Eval method. Returns predicted reward with the decision threshold as argument.""" # Check for both OBS_IMAGE and OBS_IMAGES prefixes - batch = self.normalize_inputs(batch) - batch = self.normalize_targets(batch) + # batch = self.normalize_inputs(batch) + # batch = self.normalize_targets(batch) #mengke # Extract images from batch dict images = [batch[key] for key in self.config.input_features if key.startswith(OBS_IMAGE)] if self.config.num_classes == 2: probs = self.predict(images).probabilities - logging.debug(f"Predicted reward images: {probs}") + logging.warning(f"Predicted reward images: {probs}") return (probs > threshold).float() else: return torch.argmax(self.predict(images).probabilities, dim=1) diff --git a/src/lerobot/processor/__init__.py b/src/lerobot/processor/__init__.py index be11ac1af4..febff1bf79 100644 --- a/src/lerobot/processor/__init__.py +++ b/src/lerobot/processor/__init__.py @@ -47,6 +47,7 @@ ImageCropResizeProcessorStep, InterventionActionProcessorStep, RewardClassifierProcessorStep, + RewardRuleProcessorStep, TimeLimitProcessorStep, ) from .joint_observations_processor import JointVelocityProcessorStep, MotorCurrentProcessorStep @@ -116,6 +117,7 @@ "RobotObservation", "RenameObservationsProcessorStep", "RewardClassifierProcessorStep", + "RewardRuleProcessorStep", "RewardProcessorStep", "DataProcessorPipeline", "TimeLimitProcessorStep", diff --git a/src/lerobot/processor/hil_processor.py b/src/lerobot/processor/hil_processor.py index f0dbac9c3c..194462569b 100644 --- a/src/lerobot/processor/hil_processor.py +++ b/src/lerobot/processor/hil_processor.py @@ -115,7 +115,7 @@ def complementary_data(self, complementary_data: dict) -> dict: Returns: A new dictionary with the teleoperator action added under the `teleop_action` key. - """ + """ #mengke new_complementary_data = dict(complementary_data) new_complementary_data[TELEOP_ACTION_KEY] = self.teleop_device.get_action() return new_complementary_data @@ -432,9 +432,14 @@ def __call__(self, transition: EnvTransition) -> EnvTransition: terminate_episode = info.get(TeleopEvents.TERMINATE_EPISODE, False) success = info.get(TeleopEvents.SUCCESS, False) rerecord_episode = info.get(TeleopEvents.RERECORD_EPISODE, False) + use_policy = info.get(TeleopEvents.USE_POLICY, False) new_transition = transition.copy() + if (not use_policy): + new_transition[TransitionKey.ACTION] = torch.tensor([0.0, 0.0, 0.0, 1.0], dtype=torch.float32) + else: + print("use_policy") # Override action if intervention is active if is_intervention and teleop_action is not None: if isinstance(teleop_action, dict): @@ -594,3 +599,71 @@ def transform_features( self, features: dict[PipelineFeatureType, dict[str, PolicyFeature]] ) -> dict[PipelineFeatureType, dict[str, PolicyFeature]]: return features + + +@ProcessorStepRegistry.register("reward_rule_processor") +@dataclass +class RewardRuleProcessorStep(ProcessorStep): + """ + Assigns a binary reward when the end-effector is inside a cylindrical target region. + + The region is defined by a center position (`target_x`, `target_y`, `target_z`), + a radial tolerance `radius` on the xy-plane, and a vertical tolerance `z_radius`. + If the current end-effector pose satisfies both constraints, the reward is set to 1.0; + otherwise it is 0.0. + """ + + target_x: float + target_y: float + target_z: float + radius: float + radius_z: float + terminate_on_success: bool + + def __call__(self, transition: EnvTransition) -> EnvTransition: + new_transition = transition.copy() + observation = new_transition.get(TransitionKey.OBSERVATION) or {} + + if not observation: + return new_transition + + def _to_float(value: Any) -> float: + if isinstance(value, torch.Tensor): + return float(value.squeeze().item()) + return float(value) + + try: + ee_x = _to_float(observation["ee.x"]) + ee_y = _to_float(observation["ee.y"]) + ee_z = _to_float(observation["ee.z"]) + except KeyError: + # Required pose components not available; keep reward unchanged. + print("RewardRuleProcessorStep failed, no ee pose") + return new_transition + + dx = ee_x - self.target_x + dy = ee_y - self.target_y + within_radius = dx * dx + dy * dy <= self.radius * self.radius + within_height = abs(ee_z - self.target_z) <= self.radius_z + + + reward = new_transition.get(TransitionKey.REWARD, 0.0) + terminated = new_transition.get(TransitionKey.DONE, False) + + if (within_radius and within_height): + reward = 1.0 + if self.terminate_on_success: + terminated = True + else: + reward = 0.0 + + new_transition[TransitionKey.REWARD] = reward + new_transition[TransitionKey.DONE] = terminated + print(f"ee_x: {ee_x}, ee_y: {ee_y}, ee_z: {ee_z}, target_x: {self.target_x}, target_y: {self.target_y}, target_z: {self.target_z}") + + return new_transition + + def transform_features( + self, features: dict[PipelineFeatureType, dict[str, PolicyFeature]] + ) -> dict[PipelineFeatureType, dict[str, PolicyFeature]]: + return features diff --git a/src/lerobot/processor/pipeline.py b/src/lerobot/processor/pipeline.py index e14d8b0b9c..a111ff6037 100644 --- a/src/lerobot/processor/pipeline.py +++ b/src/lerobot/processor/pipeline.py @@ -1693,8 +1693,11 @@ def __call__(self, transition: EnvTransition) -> EnvTransition: complementary_data = new_transition.get(TransitionKey.COMPLEMENTARY_DATA) if complementary_data is None or not isinstance(complementary_data, dict): raise ValueError("ComplementaryDataProcessorStep requires complementary data in the transition.") - + #print("complementary_data") + #print(complementary_data) processed_complementary_data = self.complementary_data(complementary_data.copy()) + #print("processed_complementary_data") + #print(processed_complementary_data) new_transition[TransitionKey.COMPLEMENTARY_DATA] = processed_complementary_data return new_transition diff --git a/src/lerobot/rl/actor.py b/src/lerobot/rl/actor.py index 54d0fba69f..4ce6c4fc75 100644 --- a/src/lerobot/rl/actor.py +++ b/src/lerobot/rl/actor.py @@ -65,8 +65,18 @@ from lerobot.processor import TransitionKey from lerobot.rl.process import ProcessSignalHandler from lerobot.rl.queue import get_last_item_from_queue -from lerobot.robots import so100_follower # noqa: F401 -from lerobot.teleoperators import gamepad, so101_leader # noqa: F401 +from lerobot.robots import ( # noqa: F401 + RobotConfig, + make_robot_from_config, + so100_follower, + so101_follower, +) +from lerobot.teleoperators import ( + gamepad, # noqa: F401 + keyboard, # noqa: F401 + make_teleoperator_from_config, + so101_leader, # noqa: F401 +) from lerobot.teleoperators.utils import TeleopEvents from lerobot.transport import services_pb2, services_pb2_grpc from lerobot.transport.utils import ( @@ -290,6 +300,8 @@ def act_with_policy( with policy_timer: # Extract observation from transition for policy action = policy.select_action(batch=observation) + action[0][3] = 0.0 + print("policy action:", action) policy_fps = policy_timer.fps_last log_policy_frequency_issue(policy_fps=policy_fps, cfg=cfg, interaction_step=interaction_step) diff --git a/src/lerobot/rl/buffer.py b/src/lerobot/rl/buffer.py index 81aa29c480..dbb574bda3 100644 --- a/src/lerobot/rl/buffer.py +++ b/src/lerobot/rl/buffer.py @@ -200,6 +200,7 @@ def add( """Saves a transition, ensuring tensors are stored on the designated storage device.""" # Initialize storage if this is the first transition if not self.initialized: + print("action1: ", action) self._initialize_storage(state=state, action=action, complementary_info=complementary_info) # Store the transition in pre-allocated tensors @@ -209,7 +210,8 @@ def add( if not self.optimize_memory: # Only store next_states if not optimizing memory self.next_states[key][self.position].copy_(next_state[key].squeeze(dim=0)) - + # print("action3:", action) + # print("action4:", self.actions[self.position].shape) self.actions[self.position].copy_(action.squeeze(dim=0)) self.rewards[self.position] = reward self.dones[self.position] = done @@ -478,7 +480,7 @@ def from_lerobot_dataset( first_complementary_info = { k: v.to(device) for k, v in first_transition["complementary_info"].items() } - + print("action2: ", first_action) replay_buffer._initialize_storage( state=first_state, action=first_action, complementary_info=first_complementary_info ) diff --git a/src/lerobot/rl/gym_manipulator.py b/src/lerobot/rl/gym_manipulator.py index f9c9d0d7a7..21bfa77e23 100644 --- a/src/lerobot/rl/gym_manipulator.py +++ b/src/lerobot/rl/gym_manipulator.py @@ -14,10 +14,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import json import logging import time from dataclasses import dataclass from typing import Any +from urllib.error import HTTPError, URLError +from urllib.request import Request, urlopen import gymnasium as gym import numpy as np @@ -44,6 +47,7 @@ MotorCurrentProcessorStep, Numpy2TorchActionProcessorStep, RewardClassifierProcessorStep, + RewardRuleProcessorStep, RobotActionToPolicyActionProcessorStep, TimeLimitProcessorStep, Torch2NumpyActionProcessorStep, @@ -56,6 +60,7 @@ RobotConfig, make_robot_from_config, so100_follower, + so101_follower, ) from lerobot.robots.robot import Robot from lerobot.robots.so100_follower.robot_kinematic_processor import ( @@ -78,6 +83,63 @@ from lerobot.utils.utils import log_say logging.basicConfig(level=logging.INFO) +LOGGER = logging.getLogger(__name__) + + +class RobotKinematicsServiceClient: + """HTTP client that mimics the RobotKinematics API.""" + + def __init__(self, base_url: str, timeout: float = 5.0) -> None: + self._base_url = base_url.rstrip("/") + self._timeout = timeout + + def _post(self, endpoint: str, payload: dict[str, Any]) -> dict[str, Any]: + """Send a JSON POST request to the configured service.""" + request = Request( + url=f"{self._base_url}{endpoint}", + data=json.dumps(payload).encode("utf-8"), + headers={"Content-Type": "application/json"}, + ) + try: + with urlopen(request, timeout=self._timeout) as response: + body = response.read().decode("utf-8") + except HTTPError as exc: + error_body = exc.read().decode("utf-8", errors="ignore") + raise RuntimeError(f"HTTP {exc.code} from kinematics service: {error_body}") from exc + except URLError as exc: + raise RuntimeError( + f"Failed to reach kinematics service at {self._base_url}{endpoint}: {exc.reason}" + ) from exc + + try: + return json.loads(body) + except json.JSONDecodeError as exc: + raise RuntimeError(f"Invalid JSON response from kinematics service: {body}") from exc + + def forward_kinematics(self, joint_pos_deg: np.ndarray) -> np.ndarray: + payload = {"joint_positions": np.asarray(joint_pos_deg, dtype=float).tolist()} + response = self._post("/fk", payload) + if "pose" not in response: + raise RuntimeError("Kinematics service response missing 'pose'.") + return np.asarray(response["pose"], dtype=float) + + def inverse_kinematics( + self, + current_joint_pos: np.ndarray, + desired_ee_pose: np.ndarray, + position_weight: float = 1.0, + orientation_weight: float = 0.01, + ) -> np.ndarray: + payload = { + "current_joint_positions": np.asarray(current_joint_pos, dtype=float).tolist(), + "desired_pose": np.asarray(desired_ee_pose, dtype=float).tolist(), + "position_weight": position_weight, + "orientation_weight": orientation_weight, + } + response = self._post("/ik", payload) + if "joint_positions" not in response: + raise RuntimeError("Kinematics service response missing 'joint_positions'.") + return np.asarray(response["joint_positions"], dtype=float) @dataclass @@ -115,6 +177,8 @@ def reset_follower_position(robot_arm: Robot, target_position: np.ndarray) -> No action_dict = dict(zip(current_position_dict, pose, strict=False)) robot_arm.bus.sync_write("Goal_Position", action_dict) busy_wait(0.015) + current_position_dict = robot_arm.bus.sync_read("Present_Position") + print("current_position_dict:", current_position_dict) class RobotEnv(gym.Env): @@ -234,9 +298,9 @@ def reset( # self.robot.reset() start_time = time.perf_counter() if self.reset_pose is not None: - log_say("Reset the environment.", play_sounds=True) + print("Reset the environment.") reset_follower_position(self.robot, np.array(self.reset_pose)) - log_say("Reset the environment done.", play_sounds=True) + print("Reset the environment done.") busy_wait(self.reset_time_s - (time.perf_counter() - start_time)) @@ -252,7 +316,12 @@ def reset( def step(self, action) -> tuple[dict[str, np.ndarray], float, bool, bool, dict[str, Any]]: """Execute one environment step with given action.""" joint_targets_dict = {f"{key}.pos": action[i] for i, key in enumerate(self.robot.bus.motors.keys())} + + print("send action: ", joint_targets_dict) + if joint_targets_dict["elbow_flex.pos"] < 3.7037: + joint_targets_dict["elbow_flex.pos"] = 3.7037 + print(joint_targets_dict["elbow_flex.pos"]) self.robot.send_action(joint_targets_dict) obs = self._get_observation() @@ -396,12 +465,19 @@ def make_processors( # Set up kinematics solver if inverse kinematics is configured kinematics_solver = None - if cfg.processor.inverse_kinematics is not None: - kinematics_solver = RobotKinematics( - urdf_path=cfg.processor.inverse_kinematics.urdf_path, - target_frame_name=cfg.processor.inverse_kinematics.target_frame_name, - joint_names=motor_names, - ) + inverse_cfg = cfg.processor.inverse_kinematics + if inverse_cfg is not None: + if inverse_cfg.use_service: + if not inverse_cfg.service_url: + raise ValueError("cfg.processor.inverse_kinematics.service_url must be set when use_service is True.") + LOGGER.info("Using RobotKinematics HTTP service at %s", inverse_cfg.service_url) + kinematics_solver = RobotKinematicsServiceClient(base_url=inverse_cfg.service_url) + else: + kinematics_solver = RobotKinematics( + urdf_path=inverse_cfg.urdf_path, + target_frame_name=inverse_cfg.target_frame_name, + joint_names=motor_names, + ) env_pipeline_steps = [VanillaObservationProcessorStep()] @@ -455,6 +531,15 @@ def make_processors( terminate_on_success=terminate_on_success, ) ) + elif (cfg.processor.reward_rule is not None): + env_pipeline_steps.append(RewardRuleProcessorStep( + target_x=cfg.processor.reward_rule.target_x, + target_y=cfg.processor.reward_rule.target_y, + target_z=cfg.processor.reward_rule.target_z, + radius=cfg.processor.reward_rule.radius, + radius_z=cfg.processor.reward_rule.radius_z, + terminate_on_success=terminate_on_success, + )) env_pipeline_steps.append(AddBatchDimensionProcessorStep()) env_pipeline_steps.append(DeviceProcessorStep(device=device)) @@ -642,13 +727,14 @@ def control_loop( episode_start_time = time.perf_counter() while episode_idx < cfg.dataset.num_episodes_to_record: + print("episode_idx: ", episode_idx) step_start_time = time.perf_counter() # Create a neutral action (no movement) neutral_action = torch.tensor([0.0, 0.0, 0.0], dtype=torch.float32) if use_gripper: neutral_action = torch.cat([neutral_action, torch.tensor([1.0])]) # Gripper stay - + #print(neutral_action) # Use the new step function transition = step_env_and_process_transition( env=env, @@ -657,6 +743,7 @@ def control_loop( env_processor=env_processor, action_processor=action_processor, ) + # print(transition) terminated = transition.get(TransitionKey.DONE, False) truncated = transition.get(TransitionKey.TRUNCATED, False) @@ -764,6 +851,7 @@ def main(cfg: GymManipulatorConfig) -> None: exit() control_loop(env, env_processor, action_processor, teleop_device, cfg) + print("finish") if __name__ == "__main__": diff --git a/src/lerobot/rl/learner.py b/src/lerobot/rl/learner.py index d9758d3a35..dbb2acb6ae 100644 --- a/src/lerobot/rl/learner.py +++ b/src/lerobot/rl/learner.py @@ -69,8 +69,18 @@ from lerobot.rl.buffer import ReplayBuffer, concatenate_batch_transitions from lerobot.rl.process import ProcessSignalHandler from lerobot.rl.wandb_utils import WandBLogger -from lerobot.robots import so100_follower # noqa: F401 -from lerobot.teleoperators import gamepad, so101_leader # noqa: F401 +from lerobot.robots import ( # noqa: F401 + RobotConfig, + make_robot_from_config, + so100_follower, + so101_follower, +) +from lerobot.teleoperators import ( + gamepad, # noqa: F401 + keyboard, # noqa: F401 + make_teleoperator_from_config, + so101_leader, # noqa: F401 +) from lerobot.teleoperators.utils import TeleopEvents from lerobot.transport import services_pb2_grpc from lerobot.transport.utils import ( @@ -100,7 +110,7 @@ init_logging, ) -from .learner_service import MAX_WORKERS, SHUTDOWN_TIMEOUT, LearnerService +from lerobot.rl.learner_service import MAX_WORKERS, SHUTDOWN_TIMEOUT, LearnerService @parser.wrap() @@ -326,7 +336,8 @@ def add_actor_information_and_train( log_training_info(cfg=cfg, policy=policy) - replay_buffer = initialize_replay_buffer(cfg, device, storage_device) + # replay_buffer = initialize_replay_buffer(cfg, device, storage_device) + replay_buffer = initialize_offline_replay_buffer(cfg, device, storage_device) batch_size = cfg.batch_size offline_replay_buffer = None @@ -378,6 +389,7 @@ def add_actor_information_and_train( # Wait until the replay buffer has enough samples to start training if len(replay_buffer) < online_step_before_learning: + #print("continue") continue if online_iterator is None: diff --git a/src/lerobot/robots/so100_follower/robot_kinematic_processor.py b/src/lerobot/robots/so100_follower/robot_kinematic_processor.py index 87e832db6e..e2c9bb0d22 100644 --- a/src/lerobot/robots/so100_follower/robot_kinematic_processor.py +++ b/src/lerobot/robots/so100_follower/robot_kinematic_processor.py @@ -20,7 +20,7 @@ import numpy as np from lerobot.configs.types import FeatureType, PipelineFeatureType, PolicyFeature -from lerobot.model.kinematics import RobotKinematics +from lerobot.model.kinematics import KinematicsLike from lerobot.processor import ( EnvTransition, ObservationProcessorStep, @@ -60,7 +60,7 @@ class EEReferenceAndDelta(RobotActionProcessorStep): _command_when_disabled: Internal state to hold the last command while disabled. """ - kinematics: RobotKinematics + kinematics: KinematicsLike end_effector_step_sizes: dict motor_names: list[str] use_latched_reference: bool = ( @@ -226,7 +226,7 @@ def action(self, action: RobotAction) -> RobotAction: n = float(np.linalg.norm(dpos)) if n > self.max_ee_step_m and n > 0: pos = self._last_pos + dpos * (self.max_ee_step_m / n) - raise ValueError(f"EE jump {n:.3f}m > {self.max_ee_step_m}m") + # raise ValueError(f"EE jump {n:.3f}m > {self.max_ee_step_m}m") self._last_pos = pos @@ -265,7 +265,7 @@ class InverseKinematicsEEToJoints(RobotActionProcessorStep): If False, use the solution from the previous step. """ - kinematics: RobotKinematics + kinematics: KinematicsLike motor_names: list[str] q_curr: np.ndarray | None = field(default=None, init=False, repr=False) initial_guess_current_joints: bool = True @@ -401,7 +401,7 @@ def transform_features( def compute_forward_kinematics_joints_to_ee( - joints: dict[str, Any], kinematics: RobotKinematics, motor_names: list[str] + joints: dict[str, Any], kinematics: KinematicsLike, motor_names: list[str] ) -> dict[str, Any]: motor_joint_values = [joints[f"{n}.pos"] for n in motor_names] @@ -435,7 +435,7 @@ class ForwardKinematicsJointsToEEObservation(ObservationProcessorStep): kinematics: The robot's kinematic model. """ - kinematics: RobotKinematics + kinematics: KinematicsLike motor_names: list[str] def observation(self, observation: dict[str, Any]) -> dict[str, Any]: @@ -468,7 +468,7 @@ class ForwardKinematicsJointsToEEAction(RobotActionProcessorStep): kinematics: The robot's kinematic model. """ - kinematics: RobotKinematics + kinematics: KinematicsLike motor_names: list[str] def action(self, action: RobotAction) -> RobotAction: @@ -491,7 +491,7 @@ def transform_features( @ProcessorStepRegistry.register(name="forward_kinematics_joints_to_ee") @dataclass class ForwardKinematicsJointsToEE(ProcessorStep): - kinematics: RobotKinematics + kinematics: KinematicsLike motor_names: list[str] def __post_init__(self): @@ -528,7 +528,7 @@ class InverseKinematicsRLStep(ProcessorStep): This is modified from the InverseKinematicsEEToJoints step to be used in the RL pipeline. """ - kinematics: RobotKinematics + kinematics: KinematicsLike motor_names: list[str] q_curr: np.ndarray | None = field(default=None, init=False, repr=False) initial_guess_current_joints: bool = True diff --git a/src/lerobot/robots/so101_follower/config_so101_follower.py b/src/lerobot/robots/so101_follower/config_so101_follower.py index 03c3530c2f..b2a412ab0c 100644 --- a/src/lerobot/robots/so101_follower/config_so101_follower.py +++ b/src/lerobot/robots/so101_follower/config_so101_follower.py @@ -39,3 +39,23 @@ class SO101FollowerConfig(RobotConfig): # Set to `True` for backward compatibility with previous policies/dataset use_degrees: bool = False + + + + # Default bounds for the end-effector position (in meters) + end_effector_bounds: dict[str, list[float]] = field( # bounds for the end-effector in x,y,z direction + default_factory=lambda: { + "min": [-1.0, -1.0, -1.0], # min x, y, z + "max": [1.0, 1.0, 1.0], # max x, y, z + } + ) + + max_gripper_pos: float = 50 # maximum gripper position that the gripper will be open at + + end_effector_step_sizes: dict[str, float] = field( # maximum step size for the end-effector in x,y,z direction + default_factory=lambda: { + "x": 0.02, + "y": 0.02, + "z": 0.02, + } + ) diff --git a/src/lerobot/scripts/lerobot_find_joint_limits.py b/src/lerobot/scripts/lerobot_find_joint_limits.py index 07d57a7608..0ce429ad03 100644 --- a/src/lerobot/scripts/lerobot_find_joint_limits.py +++ b/src/lerobot/scripts/lerobot_find_joint_limits.py @@ -42,6 +42,7 @@ koch_follower, make_robot_from_config, so100_follower, + so101_follower, ) from lerobot.teleoperators import ( # noqa: F401 TeleoperatorConfig, @@ -49,6 +50,7 @@ koch_leader, make_teleoperator_from_config, so100_leader, + so101_leader, ) from lerobot.utils.robot_utils import busy_wait diff --git a/src/lerobot/scripts/lerobot_train.py b/src/lerobot/scripts/lerobot_train.py index 0cc6e037fd..657f17fc09 100644 --- a/src/lerobot/scripts/lerobot_train.py +++ b/src/lerobot/scripts/lerobot_train.py @@ -182,7 +182,7 @@ def train(cfg: TrainPipelineConfig, accelerator: Accelerator | None = None): if is_main_process: logging.info("Creating dataset") dataset = make_dataset(cfg) - + logging.info("Creating dataset Done") accelerator.wait_for_everyone() # Now all other processes can safely load the dataset diff --git a/src/lerobot/teleoperators/keyboard/teleop_keyboard.py b/src/lerobot/teleoperators/keyboard/teleop_keyboard.py index 6f53a17c7d..f37097ef43 100644 --- a/src/lerobot/teleoperators/keyboard/teleop_keyboard.py +++ b/src/lerobot/teleoperators/keyboard/teleop_keyboard.py @@ -94,6 +94,7 @@ def connect(self) -> None: on_release=self._on_release, ) self.listener.start() + print("listener.start()") else: logging.info("pynput not available - skipping local keyboard listener.") self.listener = None @@ -107,7 +108,7 @@ def _on_press(self, key): def _on_release(self, key): if hasattr(key, "char"): - self.event_queue.put((key.char, False)) + self.event_queue.put((key.char, True)) if key == keyboard.Key.esc: logging.info("ESC pressed, disconnecting.") self.disconnect() @@ -161,6 +162,8 @@ def __init__(self, config: KeyboardEndEffectorTeleopConfig): super().__init__(config) self.config = config self.misc_keys_queue = Queue() + self.is_intervention = False + self.use_policy = False @property def action_features(self) -> dict: @@ -191,36 +194,39 @@ def get_action(self) -> dict[str, Any]: # Generate action based on current key states for key, val in self.current_pressed.items(): - if key == keyboard.Key.up: + if key == 'd': delta_y = -int(val) - elif key == keyboard.Key.down: + elif key == 'a': delta_y = int(val) - elif key == keyboard.Key.left: + elif key == 'w': delta_x = int(val) - elif key == keyboard.Key.right: + elif key == 's': delta_x = -int(val) - elif key == keyboard.Key.shift: + elif key == 'f': delta_z = -int(val) - elif key == keyboard.Key.shift_r: + elif key == 'g': delta_z = int(val) - elif key == keyboard.Key.ctrl_r: - # Gripper actions are expected to be between 0 (close), 1 (stay), 2 (open) - gripper_action = int(val) + 1 - elif key == keyboard.Key.ctrl_l: - gripper_action = int(val) - 1 elif val: # If the key is pressed, add it to the misc_keys_queue # this will record key presses that are not part of the delta_x, delta_y, delta_z # this is useful for retrieving other events like interventions for RL, episode success, etc. self.misc_keys_queue.put(key) + print("self.current_pressed:", self.current_pressed) self.current_pressed.clear() + # Check if any movement keys are currently pressed (indicates intervention) + if (delta_x != 0.0 or delta_y != 0.0 or delta_z != 0.0): + self.is_intervention = True + else: + self.is_intervention = False + action_dict = { "delta_x": delta_x, "delta_y": delta_y, "delta_z": delta_z, } + print("action_dict: ", action_dict) if self.config.use_gripper: action_dict["gripper"] = gripper_action @@ -251,20 +257,11 @@ def get_teleop_events(self) -> dict[str, Any]: TeleopEvents.TERMINATE_EPISODE: False, TeleopEvents.SUCCESS: False, TeleopEvents.RERECORD_EPISODE: False, + TeleopEvents.USE_POLICY: False, } # Check if any movement keys are currently pressed (indicates intervention) - movement_keys = [ - keyboard.Key.up, - keyboard.Key.down, - keyboard.Key.left, - keyboard.Key.right, - keyboard.Key.shift, - keyboard.Key.shift_r, - keyboard.Key.ctrl_r, - keyboard.Key.ctrl_l, - ] - is_intervention = any(self.current_pressed.get(key, False) for key in movement_keys) + # is_intervention = any(self.current_pressed.get(key, False) for key in movement_keys) # Check for episode control commands from misc_keys_queue terminate_episode = False @@ -274,7 +271,7 @@ def get_teleop_events(self) -> dict[str, Any]: # Process any pending misc keys while not self.misc_keys_queue.empty(): key = self.misc_keys_queue.get_nowait() - if key == "s": + if key == "p": success = True elif key == "r": terminate_episode = True @@ -282,10 +279,14 @@ def get_teleop_events(self) -> dict[str, Any]: elif key == "q": terminate_episode = True success = False + elif key == "t": + self.use_policy = not self.use_policy + print("use_policy: ", self.use_policy) return { - TeleopEvents.IS_INTERVENTION: is_intervention, + TeleopEvents.IS_INTERVENTION: self.is_intervention, TeleopEvents.TERMINATE_EPISODE: terminate_episode, TeleopEvents.SUCCESS: success, TeleopEvents.RERECORD_EPISODE: rerecord_episode, + TeleopEvents.USE_POLICY: self.use_policy, } diff --git a/src/lerobot/teleoperators/so101_leader/so101_leader.py b/src/lerobot/teleoperators/so101_leader/so101_leader.py index be804bf702..b3eb41b073 100644 --- a/src/lerobot/teleoperators/so101_leader/so101_leader.py +++ b/src/lerobot/teleoperators/so101_leader/so101_leader.py @@ -23,12 +23,30 @@ OperatingMode, ) from lerobot.utils.errors import DeviceAlreadyConnectedError, DeviceNotConnectedError - +from ..utils import TeleopEvents from ..teleoperator import Teleoperator from .config_so101_leader import SO101LeaderConfig logger = logging.getLogger(__name__) +##hashot +from queue import Queue +import os,sys +from ..utils import TeleopEvents +PYNPUT_AVAILABLE = True +try: + if ("DISPLAY" not in os.environ) and ("linux" in sys.platform): + logging.info("No DISPLAY set. Skipping pynput import.") + raise ImportError("pynput blocked intentionally due to no display.") + from pynput import keyboard +except ImportError: + keyboard = None + PYNPUT_AVAILABLE = False +except Exception as e: + keyboard = None + PYNPUT_AVAILABLE = False + logging.info(f"Could not import pynput: {e}") +##hashot class SO101Leader(Teleoperator): """ @@ -55,10 +73,55 @@ def __init__(self, config: SO101LeaderConfig): calibration=self.calibration, ) + self.event_queue = Queue() + self.current_pressed = Queue() + self.listener = None + self.logs = {} + self.is_keyboard_connected = False + + def keyboard_connect(self) -> None: + if self.is_keyboard_connected: + raise DeviceAlreadyConnectedError( + "Keyboard is already connected. Do not run `robot.connect()` twice." + ) + + if PYNPUT_AVAILABLE: + logging.info("pynput is available - enabling local keyboard listener.") + self.listener = keyboard.Listener( + on_press=self._on_press, + on_release=self._on_release, + ) + self.listener.start() + else: + logging.info("pynput not available - skipping local keyboard listener.") + self.listener = None + + def _drain_pressed_keys(self): + while not self.event_queue.empty(): + key_char, is_pressed = self.event_queue.get_nowait() + if is_pressed: + self.current_pressed.put(key_char) + + def _on_press(self, key): + if hasattr(key, "char"): + self.event_queue.put((key.char, True)) + + def _on_release(self, key): + if hasattr(key, "char"): + self.event_queue.put((key.char, False)) + if key == keyboard.Key.esc: + logging.info("ESC pressed, disconnecting.") + self.disconnect() + @property def action_features(self) -> dict[str, type]: - return {f"{motor}.pos": float for motor in self.bus.motors} - + #return {f"{motor}.pos": float for motor in self.bus.motors} + action_names = ["delta_x_ee", "delta_y_ee", "delta_z_ee","gripper_delta"] + # if cfg.wrapper.use_gripper: + # action_names.append("gripper_delta") + return { "dtype": "float32", + "shape": (len(action_names),), + "names": action_names,} @property def feedback_features(self) -> dict[str, type]: return {} @@ -79,6 +142,7 @@ def connect(self, calibrate: bool = True) -> None: self.calibrate() self.configure() + self.keyboard_connect() logger.info(f"{self} connected.") @property @@ -141,9 +205,73 @@ def get_action(self) -> dict[str, float]: action = self.bus.sync_read("Present_Position") action = {f"{motor}.pos": val for motor, val in action.items()} dt_ms = (time.perf_counter() - start) * 1e3 - logger.debug(f"{self} read action: {dt_ms:.1f}ms") + logger.info(f"{self} read action: {dt_ms:.1f}ms") return action + from typing import Any + def get_teleop_events(self) -> dict[str, Any]: + """ + Get extra control events from the keyboard such as intervention status, + episode termination, success indicators, etc. + + Keyboard mappings: + - Any movement keys pressed = intervention active + - 's' key = success (terminate episode successfully) + - 'r' key = rerecord episode (terminate and rerecord) + - 'q' key = quit episode (terminate without success) + + Returns: + Dictionary containing: + - is_intervention: bool - Whether human is currently intervening + - terminate_episode: bool - Whether to terminate the current episode + - success: bool - Whether the episode was successful + - rerecord_episode: bool - Whether to rerecord the episode + """ + if not self.is_connected: + return { + TeleopEvents.IS_INTERVENTION: False, + TeleopEvents.TERMINATE_EPISODE: False, + TeleopEvents.SUCCESS: False, + TeleopEvents.RERECORD_EPISODE: False, + } + + # Check if any movement keys are currently pressed (indicates intervention) + movement_keys = [ + keyboard.Key.up, + keyboard.Key.down, + keyboard.Key.left, + keyboard.Key.right, + keyboard.Key.shift, + keyboard.Key.shift_r, + keyboard.Key.ctrl_r, + keyboard.Key.ctrl_l, + ] + is_intervention = True + + self._drain_pressed_keys() + # Check for episode control commands from misc_keys_queue + terminate_episode = False + success = False + rerecord_episode = False + + # Process any pending misc keys + while not self.current_pressed.empty(): + key = self.current_pressed.get_nowait() + if key == "s": + success = True + elif key == "r": + terminate_episode = True + rerecord_episode = True + elif key == "q": + terminate_episode = True + success = False + + return { + TeleopEvents.IS_INTERVENTION: is_intervention, + TeleopEvents.TERMINATE_EPISODE: terminate_episode, + TeleopEvents.SUCCESS: success, + TeleopEvents.RERECORD_EPISODE: rerecord_episode, + } def send_feedback(self, feedback: dict[str, float]) -> None: # TODO(rcadene, aliberts): Implement force feedback raise NotImplementedError @@ -154,3 +282,7 @@ def disconnect(self) -> None: self.bus.disconnect() logger.info(f"{self} disconnected.") + from typing import Any + + + \ No newline at end of file diff --git a/src/lerobot/teleoperators/utils.py b/src/lerobot/teleoperators/utils.py index 2103a1669c..bc8eea85cb 100644 --- a/src/lerobot/teleoperators/utils.py +++ b/src/lerobot/teleoperators/utils.py @@ -29,6 +29,7 @@ class TeleopEvents(Enum): RERECORD_EPISODE = "rerecord_episode" IS_INTERVENTION = "is_intervention" TERMINATE_EPISODE = "terminate_episode" + USE_POLICY = "use_policy" def make_teleoperator_from_config(config: TeleoperatorConfig) -> Teleoperator: diff --git a/t_checkport.sh b/t_checkport.sh new file mode 100644 index 0000000000..0ea1da9cb1 --- /dev/null +++ b/t_checkport.sh @@ -0,0 +1 @@ +lerobot-find-port diff --git a/t_record.sh b/t_record.sh new file mode 100644 index 0000000000..5f7d19fdb7 --- /dev/null +++ b/t_record.sh @@ -0,0 +1 @@ +python -m lerobot.scripts.rl.gym_manipulator --config_path src/lerobot/configs/env_config_so101.json diff --git a/tty.sh b/tty.sh new file mode 100644 index 0000000000..c10b828fe8 --- /dev/null +++ b/tty.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +# List of device strings +device_list=( + "/dev/ttyACM0" + "/dev/ttyACM1" + "/dev/ttyACM2" + "/dev/ttyACM3" + "/dev/ttyACM4" +) + +# Loop through each device in the list +for stringx in "${device_list[@]}"; do + # Check for "follower" identifier (5AAF219986) + result_follower=$(udevadm info "$stringx" | grep "5AAF219986") + + # Check for "leader" identifier (5AAF218179) + result_leader=$(udevadm info "$stringx" | grep "5AAF218179") + + # Determine if it's a follower or leader based on the result + if [[ -n "$result_follower" ]]; then + echo "$stringx : follower!!!!" + elif [[ -n "$result_leader" ]]; then + echo "$stringx : leader!!!!" + + fi +done \ No newline at end of file diff --git a/viz.sh b/viz.sh new file mode 100644 index 0000000000..ee38a96f44 --- /dev/null +++ b/viz.sh @@ -0,0 +1 @@ +python3 src/lerobot/scripts/lerobot_dataset_viz.py --repo-id mengke/test10 --episode-index 0 \ No newline at end of file