Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,39 @@ lerobot-train --config_path=lerobot/diffusion_pusht

reproduces SOTA results for Diffusion Policy on the PushT task.

### Remote policy evaluation (experimental)
In case you have a custom model served through an HTTP API,
you can delegate action selection to an external HTTP service by using the `remote` policy.
Install the dedicated dependencies and start the demo server:

```bash
pip install -e ".[server]"
uvicorn examples.remote.remote_policy_server:app --host 0.0.0.0 --port 8000
```

The sample FastAPI app simply echoes zero actions with the requested shape, which is useful to validate end-to-end wiring before deploying a real model.

To evaluate the Libero benchmark through the remote policy, run:

```bash
lerobot-eval \
--env.type=libero \
--env.task=libero_spatial \
--env.max_parallel_tasks=1 \
--eval.batch_size=1 \
--eval.n_episodes=3 \
--policy.type=remote \
--policy.server_url=http://localhost:8000 \
--policy.timeout=30 \
--policy.attempts=3 \
--policy.n_action_steps=10 \
--policy.additional_args='{"dataset_info":{"action_type":"eef","robot_embodiment":"single_arm","robot_type":"franka","stereo_replace_depth":false,"handheld":false,"no_state":false,"obs_dof":8,"action_dof":7},"inference_config":{"n_actions":6,"n_inference_steps":10}}' \
--rename_map='{"observation.images.image":"observation.images.static1","observation.images.image2":"observation.images.wrist1"}' \
--output_dir=./eval_logs_libero_spatial
```

The `additional_args` payload is forwarded to the remote server alongside the observation batch and can be adjusted to match your remote model’s expectations.

## Contribute

If you would like to contribute to 🤗 LeRobot, please check out our [contribution guide](https://github.com/huggingface/lerobot/blob/main/CONTRIBUTING.md).
Expand Down
31 changes: 31 additions & 0 deletions examples/remote/remote_policy_server.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import torch
import numpy as np
from fastapi import FastAPI, Request, Response

from lerobot.utils.messaging import pack_msg, unpack_msg

app = FastAPI()


@app.post("/predict")
async def predict(request: Request):
data = await request.body()
obs_input = unpack_msg(data)

inf_cfg = obs_input.get("inference_config", {})
dataset_info = obs_input.get("dataset_info", {})
n_action_steps = inf_cfg.get("n_action_steps", 10)
action_dim = dataset_info.get("action_dof", 7)

# Try to infer batch size from any array-like input
B = None
for v in obs_input.values():
if isinstance(v, torch.Tensor) or isinstance(v, np.ndarray):
if v.ndim >= 1:
B = int(v.shape[0])
break

Copy link

Copilot AI Oct 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Potential NoneType error if B is not inferred from any tensor/array in obs_input. Add a fallback default or raise a descriptive error if B remains None.

Suggested change
if B is None:
raise ValueError(
"Could not infer batch size 'B' from obs_input. "
"Ensure that obs_input contains at least one array-like value (torch.Tensor or np.ndarray) with a batch dimension."
)

Copilot uses AI. Check for mistakes.
actions = torch.zeros((B, n_action_steps, action_dim), dtype=torch.float32)

packed = pack_msg(actions)
return Response(content=packed, media_type="application/octet-stream")
8 changes: 8 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,13 @@ pusht = ["gym-pusht>=0.1.5,<0.2.0", "pymunk>=6.6.0,<7.0.0"] # TODO: Fix pymunk v
libero = ["lerobot[transformers-dep]", "libero @ git+https://github.com/huggingface/lerobot-libero.git@main#egg=libero"]
metaworld = ["metaworld==3.0.0"]

# HTTP server extra
server = [
"fastapi>=0.115.0,<1.0.0",
"uvicorn[standard]>=0.30.0,<1.0.0",
"msgpack>=1.0.8,<2.0.0",
]

# All
all = [
"lerobot[dynamixel]",
Expand All @@ -155,6 +162,7 @@ all = [
"lerobot[phone]",
"lerobot[libero]",
"lerobot[metaworld]",
"lerobot[server]",
]

[project.scripts]
Expand Down
2 changes: 1 addition & 1 deletion src/lerobot/async_inference/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
DEFAULT_OBS_QUEUE_TIMEOUT = 2

# All action chunking policies
SUPPORTED_POLICIES = ["act", "smolvla", "diffusion", "tdmpc", "vqbet", "pi0", "pi05"]
SUPPORTED_POLICIES = ["act", "smolvla", "diffusion", "tdmpc", "vqbet", "pi0", "pi05", "remote"]

# TODO: Add all other robots
SUPPORTED_ROBOTS = ["so100_follower", "so101_follower", "bi_so100_follower"]
2 changes: 2 additions & 0 deletions src/lerobot/policies/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from .smolvla.processor_smolvla import SmolVLANewLineProcessor
from .tdmpc.configuration_tdmpc import TDMPCConfig as TDMPCConfig
from .vqbet.configuration_vqbet import VQBeTConfig as VQBeTConfig
from .remote.configuration_remote import RemoteConfig as RemoteConfig

__all__ = [
"ACTConfig",
Expand All @@ -29,4 +30,5 @@
"SmolVLAConfig",
"TDMPCConfig",
"VQBeTConfig",
"RemoteConfig",
]
18 changes: 18 additions & 0 deletions src/lerobot/policies/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
from lerobot.policies.smolvla.configuration_smolvla import SmolVLAConfig
from lerobot.policies.tdmpc.configuration_tdmpc import TDMPCConfig
from lerobot.policies.vqbet.configuration_vqbet import VQBeTConfig
from lerobot.policies.remote.configuration_remote import RemoteConfig
from lerobot.processor import PolicyAction, PolicyProcessorPipeline
from lerobot.processor.converters import (
batch_to_transition,
Expand Down Expand Up @@ -101,6 +102,10 @@ def get_policy_class(name: str) -> type[PreTrainedPolicy]:
from lerobot.policies.smolvla.modeling_smolvla import SmolVLAPolicy

return SmolVLAPolicy
elif name == "remote":
from lerobot.policies.remote.modeling_remote import RemotePolicy

return RemotePolicy
else:
raise NotImplementedError(f"Policy with name {name} is not implemented.")

Expand Down Expand Up @@ -142,6 +147,8 @@ def make_policy_config(policy_type: str, **kwargs) -> PreTrainedConfig:
return SmolVLAConfig(**kwargs)
elif policy_type == "reward_classifier":
return RewardClassifierConfig(**kwargs)
elif policy_type == "remote":
return RemoteConfig(**kwargs)
else:
raise ValueError(f"Policy type '{policy_type}' is not available.")

Expand Down Expand Up @@ -292,6 +299,17 @@ def make_pre_post_processors(
config=policy_cfg,
dataset_stats=kwargs.get("dataset_stats"),
)

elif isinstance(policy_cfg, RemoteConfig):
from lerobot.policies.remote.processor_remote import make_remote_pre_post_processors

overrides = kwargs.get("preprocessor_overrides") or {}

processors = make_remote_pre_post_processors(
config=policy_cfg,
dataset_stats=kwargs.get("dataset_stats"),
rename_map=overrides.get("rename_observations_processor", {}).get("rename_map", {}),
)

else:
raise NotImplementedError(f"Processor for policy type '{policy_cfg.type}' is not implemented.")
Expand Down
5 changes: 5 additions & 0 deletions src/lerobot/policies/remote/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from .configuration_remote import RemoteConfig
from .modeling_remote import RemotePolicy
from .processor_remote import make_remote_pre_post_processors

__all__ = ["RemoteConfig", "RemotePolicy", "make_remote_pre_post_processors"]
55 changes: 55 additions & 0 deletions src/lerobot/policies/remote/configuration_remote.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
from dataclasses import dataclass, field
from typing import Any

from lerobot.configs.policies import PreTrainedConfig
from lerobot.optim.optimizers import AdamWConfig

@PreTrainedConfig.register_subclass("remote")
@dataclass
class RemoteConfig(PreTrainedConfig):
# Identity and device placement
type: str = field(default="remote", metadata={"help": "Policy type name"})
device: str = field(default="cpu", metadata={"help": "Device used for returned tensors"})

# Action execution
# How many environment steps to execute per policy call. Used by the runtime action queue.
n_action_steps: int = field(default=1, metadata={"help": "Number of env steps to execute per call"})

# Remote-specific
server_url: str = field(default="http://localhost:8000", metadata={"help": "Remote policy server URL"})
timeout: float = field(default=30.0, metadata={"help": "HTTP timeout in seconds"})
attempts: int = field(default=1, metadata={"help": "Number of retry attempts for failed requests"})

# Additional arguments to inject directly into the observation dict (e.g. {"inference_config": {...}})
additional_args: dict[str, Any] = field(
default_factory=dict,
metadata={"help": "Extra observation keys to inject directly into observation"},
)

# --- Abstract API implementations required by PreTrainedConfig ---
def get_optimizer_preset(self) -> AdamWConfig:
"""Remote policy is inference-only; return a inert preset for API compatibility."""
Copy link

Copilot AI Oct 28, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Corrected spelling of 'inert' to 'inert' (though 'inert' is correct, the article should be 'an inert' not 'a inert').

Suggested change
"""Remote policy is inference-only; return a inert preset for API compatibility."""
"""Remote policy is inference-only; return an inert preset for API compatibility."""

Copilot uses AI. Check for mistakes.
return AdamWConfig(lr=1e-5, weight_decay=0.0, grad_clip_norm=1.0)

def get_scheduler_preset(self):
# No scheduler needed for inference-only policy
return None

def validate_features(self) -> None:
# Minimal validation: allow any combination, but require at least one input feature
if not self.input_features:
raise ValueError("RemoteConfig requires at least one input feature to be defined.")

@property
def observation_delta_indices(self):
# No temporal deltas required for observations by default
return None

@property
def action_delta_indices(self):
# Minimal behavior: align deltas to n_action_steps
return list(range(self.n_action_steps))

@property
def reward_delta_indices(self):
return None
94 changes: 94 additions & 0 deletions src/lerobot/policies/remote/modeling_remote.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
from collections import deque
import threading

import numpy as np
import requests
import torch
from torch import Tensor

from lerobot.utils.messaging import pack_msg, unpack_msg
from lerobot.policies.pretrained import PreTrainedPolicy
from .configuration_remote import RemoteConfig


class RemotePolicy(PreTrainedPolicy):
"""
A policy that proxies inference to a remote HTTP server.
"""

config_class = RemoteConfig
name = "remote"

def __init__(self, config: RemoteConfig):
super().__init__(config)
self.server_url = config.server_url.rstrip("/")
self.timeout = config.timeout
self._thread_state = threading.local()
self.reset()

def get_optim_params(self) -> dict:
return {}

def reset(self):
# Reinitialize thread-local state so each worker gets its own queue/session
self._thread_state = threading.local()

def _state(self):
state = self._thread_state
if not hasattr(state, "session"):
state.session = requests.Session()
if not hasattr(state, "action_queue"):
state.action_queue = deque(maxlen=self.config.n_action_steps)
return state

def forward(self, batch: dict[str, Tensor]) -> tuple[Tensor, dict] | tuple[Tensor, None]:
raise NotImplementedError("RemotePolicy is inference-only")

@torch.no_grad()
def predict_action_chunk(self, batch: dict[str, Tensor], **kwargs) -> Tensor:
state = self._state()

# Build payload with raw tensors/arrays; pack_msg handles encoding
add_args = self.config.additional_args or {}
payload = batch | add_args

packed = pack_msg(payload)

last_exception = None
for _ in range(self.config.attempts):
try:
resp = state.session.post(
f"{self.server_url}/predict",
data=packed,
headers={"Content-Type": "application/octet-stream"},
timeout=self.timeout,
)
resp.raise_for_status()
break
except requests.RequestException as e:
last_exception = e

if last_exception:
raise last_exception

unpacked = unpack_msg(resp.content)
if isinstance(unpacked, torch.Tensor):
actions = unpacked
else:
actions_np = np.asarray(unpacked)
actions = torch.from_numpy(actions_np)

device = torch.device(self.config.device)
return actions.to(device=device, dtype=torch.float32)

@torch.no_grad()
def select_action(self, batch: dict[str, Tensor], **kwargs) -> Tensor:
self.eval()

queue = self._state().action_queue

if len(queue) == 0:
actions = self.predict_action_chunk(batch)[:, : self.config.n_action_steps]
queue.extend(actions.transpose(0, 1)) # [(B, A)] x T

return queue.popleft()
65 changes: 65 additions & 0 deletions src/lerobot/policies/remote/processor_remote.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
from dataclasses import dataclass, field
from typing import Any

import torch

from lerobot.policies.remote.configuration_remote import RemoteConfig
from lerobot.processor import (
AddBatchDimensionProcessorStep,
RenameObservationsProcessorStep,
PolicyAction,
PolicyProcessorPipeline,
ProcessorStep,
)
from lerobot.processor.converters import policy_action_to_transition, transition_to_policy_action
from lerobot.processor.core import EnvTransition, TransitionKey
from lerobot.utils.constants import (
POLICY_POSTPROCESSOR_DEFAULT_NAME,
POLICY_PREPROCESSOR_DEFAULT_NAME,
)


def make_remote_pre_post_processors(
config: RemoteConfig,
dataset_stats: dict[str, dict[str, torch.Tensor]] | None = None,
rename_map: dict[str, str] = {},
) -> tuple[
PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
PolicyProcessorPipeline[PolicyAction, PolicyAction],
]:
"""
Custom pre/post processors for the Remote policy.

Pre:
- Normalizer (if stats provided)
- AddBatchDimension
- AppendInferenceConfig (copies config.inference_config into the batch)
- Device placement

Post:
- Device to CPU
- Unnormalize outputs (if stats provided)
"""

# Pre: allow renaming features and add batch dim. Rename map can be overridden at runtime
# through preprocessor_overrides with the key "rename_observations_processor".
input_steps: list[ProcessorStep] = [
RenameObservationsProcessorStep(rename_map=rename_map),
AddBatchDimensionProcessorStep(),
]

# Minimal postprocessor: identity (no steps)
output_steps: list[ProcessorStep] = []

return (
PolicyProcessorPipeline[dict[str, Any], dict[str, Any]](
steps=input_steps,
name=POLICY_PREPROCESSOR_DEFAULT_NAME,
),
PolicyProcessorPipeline[PolicyAction, PolicyAction](
steps=output_steps,
name=POLICY_POSTPROCESSOR_DEFAULT_NAME,
to_transition=policy_action_to_transition,
to_output=transition_to_policy_action,
),
)
Loading