huggingface · grach0v · Oct 26, 2025 · Oct 27, 2025 · Oct 27, 2025 · Oct 28, 2025
diff --git a/README.md b/README.md
@@ -290,6 +290,39 @@ lerobot-train --config_path=lerobot/diffusion_pusht
 
 reproduces SOTA results for Diffusion Policy on the PushT task.
 
+### Remote policy evaluation (experimental)
+In case you have a custom model served through an HTTP API,
+you can delegate action selection to an external HTTP service by using the `remote` policy. 
+Install the dedicated dependencies and start the demo server:
+
+```bash
+pip install -e ".[server]"
+uvicorn examples.remote.remote_policy_server:app --host 0.0.0.0 --port 8000
+```
+
+The sample FastAPI app simply echoes zero actions with the requested shape, which is useful to validate end-to-end wiring before deploying a real model.
+
+To evaluate the Libero benchmark through the remote policy, run:
+
+```bash
+lerobot-eval \
+  --env.type=libero \
+  --env.task=libero_spatial \
+  --env.max_parallel_tasks=1 \
+  --eval.batch_size=1 \
+  --eval.n_episodes=3 \
+  --policy.type=remote \
+  --policy.server_url=http://localhost:8000 \
+  --policy.timeout=30 \
+  --policy.attempts=3 \
+  --policy.n_action_steps=10 \
+  --policy.additional_args='{"dataset_info":{"action_type":"eef","robot_embodiment":"single_arm","robot_type":"franka","stereo_replace_depth":false,"handheld":false,"no_state":false,"obs_dof":8,"action_dof":7},"inference_config":{"n_actions":6,"n_inference_steps":10}}' \
+  --rename_map='{"observation.images.image":"observation.images.static1","observation.images.image2":"observation.images.wrist1"}' \
+  --output_dir=./eval_logs_libero_spatial
+```
+
+The `additional_args` payload is forwarded to the remote server alongside the observation batch and can be adjusted to match your remote model’s expectations.
+
 ## Contribute
 
 If you would like to contribute to 🤗 LeRobot, please check out our [contribution guide](https://github.com/huggingface/lerobot/blob/main/CONTRIBUTING.md).

diff --git a/examples/remote/remote_policy_server.py b/examples/remote/remote_policy_server.py
@@ -0,0 +1,31 @@
+import torch
+import numpy as np
+from fastapi import FastAPI, Request, Response
+
+from lerobot.utils.messaging import pack_msg, unpack_msg
+
+app = FastAPI()
+
+
+@app.post("/predict")
+async def predict(request: Request):
+    data = await request.body()
+    obs_input = unpack_msg(data)
+
+    inf_cfg = obs_input.get("inference_config", {})
+    dataset_info = obs_input.get("dataset_info", {})
+    n_action_steps = inf_cfg.get("n_action_steps", 10)
+    action_dim = dataset_info.get("action_dof", 7)
+
+    # Try to infer batch size from any array-like input
+    B = None
+    for v in obs_input.values():
+        if isinstance(v, torch.Tensor) or isinstance(v, np.ndarray):
+            if v.ndim >= 1:
+                B = int(v.shape[0])
+                break
+
-
+
+    if B is None:
+        raise ValueError(
+            "Could not infer batch size 'B' from obs_input. "
+            "Ensure that obs_input contains at least one array-like value (torch.Tensor or np.ndarray) with a batch dimension."
+        )
-
+
+    if B is None:
+        raise ValueError(
+            "Could not infer batch size 'B' from obs_input. "
+            "Ensure that obs_input contains at least one array-like value (torch.Tensor or np.ndarray) with a batch dimension."
+        )
+    actions = torch.zeros((B, n_action_steps, action_dim), dtype=torch.float32)
+
+    packed = pack_msg(actions)
+    return Response(content=packed, media_type="application/octet-stream")
diff --git a/pyproject.toml b/pyproject.toml
@@ -134,6 +134,13 @@ pusht = ["gym-pusht>=0.1.5,<0.2.0", "pymunk>=6.6.0,<7.0.0"] # TODO: Fix pymunk v
 libero = ["lerobot[transformers-dep]", "libero @ git+https://github.com/huggingface/lerobot-libero.git@main#egg=libero"]
 metaworld = ["metaworld==3.0.0"]
 
+# HTTP server extra
+server = [
+    "fastapi>=0.115.0,<1.0.0",
+    "uvicorn[standard]>=0.30.0,<1.0.0",
+    "msgpack>=1.0.8,<2.0.0",
+]
+
 # All
 all = [
     "lerobot[dynamixel]",
@@ -155,6 +162,7 @@ all = [
     "lerobot[phone]",
     "lerobot[libero]",
     "lerobot[metaworld]",
+    "lerobot[server]",
 ]
 
 [project.scripts]

diff --git a/src/lerobot/async_inference/constants.py b/src/lerobot/async_inference/constants.py
@@ -23,7 +23,7 @@
 DEFAULT_OBS_QUEUE_TIMEOUT = 2
 
 # All action chunking policies
-SUPPORTED_POLICIES = ["act", "smolvla", "diffusion", "tdmpc", "vqbet", "pi0", "pi05"]
+SUPPORTED_POLICIES = ["act", "smolvla", "diffusion", "tdmpc", "vqbet", "pi0", "pi05", "remote"]
 
 # TODO: Add all other robots
 SUPPORTED_ROBOTS = ["so100_follower", "so101_follower", "bi_so100_follower"]
diff --git a/src/lerobot/policies/__init__.py b/src/lerobot/policies/__init__.py
@@ -20,6 +20,7 @@
 from .smolvla.processor_smolvla import SmolVLANewLineProcessor
 from .tdmpc.configuration_tdmpc import TDMPCConfig as TDMPCConfig
 from .vqbet.configuration_vqbet import VQBeTConfig as VQBeTConfig
+from .remote.configuration_remote import RemoteConfig as RemoteConfig
 
 __all__ = [
     "ACTConfig",
@@ -29,4 +30,5 @@
     "SmolVLAConfig",
     "TDMPCConfig",
     "VQBeTConfig",
+    "RemoteConfig",
 ]
diff --git a/src/lerobot/policies/factory.py b/src/lerobot/policies/factory.py
@@ -38,6 +38,7 @@
 from lerobot.policies.smolvla.configuration_smolvla import SmolVLAConfig
 from lerobot.policies.tdmpc.configuration_tdmpc import TDMPCConfig
 from lerobot.policies.vqbet.configuration_vqbet import VQBeTConfig
+from lerobot.policies.remote.configuration_remote import RemoteConfig
 from lerobot.processor import PolicyAction, PolicyProcessorPipeline
 from lerobot.processor.converters import (
     batch_to_transition,
@@ -101,6 +102,10 @@ def get_policy_class(name: str) -> type[PreTrainedPolicy]:
         from lerobot.policies.smolvla.modeling_smolvla import SmolVLAPolicy
 
         return SmolVLAPolicy
+    elif name == "remote":
+        from lerobot.policies.remote.modeling_remote import RemotePolicy
+
+        return RemotePolicy
     else:
         raise NotImplementedError(f"Policy with name {name} is not implemented.")
 
@@ -142,6 +147,8 @@ def make_policy_config(policy_type: str, **kwargs) -> PreTrainedConfig:
         return SmolVLAConfig(**kwargs)
     elif policy_type == "reward_classifier":
         return RewardClassifierConfig(**kwargs)
+    elif policy_type == "remote":
+        return RemoteConfig(**kwargs)
     else:
         raise ValueError(f"Policy type '{policy_type}' is not available.")
 
@@ -292,6 +299,17 @@ def make_pre_post_processors(
             config=policy_cfg,
             dataset_stats=kwargs.get("dataset_stats"),
         )
+
+    elif isinstance(policy_cfg, RemoteConfig):
+        from lerobot.policies.remote.processor_remote import make_remote_pre_post_processors
+
+        overrides = kwargs.get("preprocessor_overrides") or {}
+
+        processors = make_remote_pre_post_processors(
+            config=policy_cfg,
+            dataset_stats=kwargs.get("dataset_stats"),
+            rename_map=overrides.get("rename_observations_processor", {}).get("rename_map", {}),
+        )
 
     else:
         raise NotImplementedError(f"Processor for policy type '{policy_cfg.type}' is not implemented.")

diff --git a/src/lerobot/policies/remote/__init__.py b/src/lerobot/policies/remote/__init__.py
@@ -0,0 +1,5 @@
+from .configuration_remote import RemoteConfig
+from .modeling_remote import RemotePolicy
+from .processor_remote import make_remote_pre_post_processors
+
+__all__ = ["RemoteConfig", "RemotePolicy", "make_remote_pre_post_processors"]
diff --git a/src/lerobot/policies/remote/configuration_remote.py b/src/lerobot/policies/remote/configuration_remote.py
@@ -0,0 +1,55 @@
+from dataclasses import dataclass, field
+from typing import Any
+
+from lerobot.configs.policies import PreTrainedConfig
+from lerobot.optim.optimizers import AdamWConfig
+
+@PreTrainedConfig.register_subclass("remote")
+@dataclass
+class RemoteConfig(PreTrainedConfig):
+    # Identity and device placement
+    type: str = field(default="remote", metadata={"help": "Policy type name"})
+    device: str = field(default="cpu", metadata={"help": "Device used for returned tensors"})
+
+    # Action execution
+    # How many environment steps to execute per policy call. Used by the runtime action queue.
+    n_action_steps: int = field(default=1, metadata={"help": "Number of env steps to execute per call"})
+
+    # Remote-specific
+    server_url: str = field(default="http://localhost:8000", metadata={"help": "Remote policy server URL"})
+    timeout: float = field(default=30.0, metadata={"help": "HTTP timeout in seconds"})
+    attempts: int = field(default=1, metadata={"help": "Number of retry attempts for failed requests"})
+
+    # Additional arguments to inject directly into the observation dict (e.g. {"inference_config": {...}})
+    additional_args: dict[str, Any] = field(
+        default_factory=dict,
+        metadata={"help": "Extra observation keys to inject directly into observation"},
+    )
+
+    # --- Abstract API implementations required by PreTrainedConfig ---
+    def get_optimizer_preset(self) -> AdamWConfig:
+        """Remote policy is inference-only; return a inert preset for API compatibility."""
-        """Remote policy is inference-only; return a inert preset for API compatibility."""
+        """Remote policy is inference-only; return an inert preset for API compatibility."""
-        """Remote policy is inference-only; return a inert preset for API compatibility."""
+        """Remote policy is inference-only; return an inert preset for API compatibility."""
+        return AdamWConfig(lr=1e-5, weight_decay=0.0, grad_clip_norm=1.0)
+
+    def get_scheduler_preset(self):
+        # No scheduler needed for inference-only policy
+        return None
+
+    def validate_features(self) -> None:
+        # Minimal validation: allow any combination, but require at least one input feature
+        if not self.input_features:
+            raise ValueError("RemoteConfig requires at least one input feature to be defined.")
+
+    @property
+    def observation_delta_indices(self):
+        # No temporal deltas required for observations by default
+        return None
+
+    @property
+    def action_delta_indices(self):
+        # Minimal behavior: align deltas to n_action_steps
+        return list(range(self.n_action_steps))
+
+    @property
+    def reward_delta_indices(self):
+        return None
diff --git a/src/lerobot/policies/remote/modeling_remote.py b/src/lerobot/policies/remote/modeling_remote.py
@@ -0,0 +1,94 @@
+from collections import deque
+import threading
+
+import numpy as np
+import requests
+import torch
+from torch import Tensor
+
+from lerobot.utils.messaging import pack_msg, unpack_msg
+from lerobot.policies.pretrained import PreTrainedPolicy
+from .configuration_remote import RemoteConfig
+
+
+class RemotePolicy(PreTrainedPolicy):
+    """
+    A policy that proxies inference to a remote HTTP server.
+    """
+
+    config_class = RemoteConfig
+    name = "remote"
+
+    def __init__(self, config: RemoteConfig):
+        super().__init__(config)
+        self.server_url = config.server_url.rstrip("/")
+        self.timeout = config.timeout
+        self._thread_state = threading.local()
+        self.reset()
+
+    def get_optim_params(self) -> dict:
+        return {}
+
+    def reset(self):
+        # Reinitialize thread-local state so each worker gets its own queue/session
+        self._thread_state = threading.local()
+
+    def _state(self):
+        state = self._thread_state
+        if not hasattr(state, "session"):
+            state.session = requests.Session()
+        if not hasattr(state, "action_queue"):
+            state.action_queue = deque(maxlen=self.config.n_action_steps)
+        return state
+
+    def forward(self, batch: dict[str, Tensor]) -> tuple[Tensor, dict] | tuple[Tensor, None]:
+        raise NotImplementedError("RemotePolicy is inference-only")
+
+    @torch.no_grad()
+    def predict_action_chunk(self, batch: dict[str, Tensor], **kwargs) -> Tensor:
+        state = self._state()
+
+        # Build payload with raw tensors/arrays; pack_msg handles encoding
+        add_args = self.config.additional_args or {}
+        payload = batch | add_args
+
+        packed = pack_msg(payload)
+
+        last_exception = None
+        for _ in range(self.config.attempts):
+            try:
+                resp = state.session.post(
+                    f"{self.server_url}/predict",
+                    data=packed,
+                    headers={"Content-Type": "application/octet-stream"},
+                    timeout=self.timeout,
+                )
+                resp.raise_for_status()
+                break
+            except requests.RequestException as e:
+                last_exception = e
+
+        if last_exception:
+            raise last_exception
+
+        unpacked = unpack_msg(resp.content)
+        if isinstance(unpacked, torch.Tensor):
+            actions = unpacked
+        else:
+            actions_np = np.asarray(unpacked)
+            actions = torch.from_numpy(actions_np)
+
+        device = torch.device(self.config.device)
+        return actions.to(device=device, dtype=torch.float32)
+
+    @torch.no_grad()
+    def select_action(self, batch: dict[str, Tensor], **kwargs) -> Tensor:
+        self.eval()
+
+        queue = self._state().action_queue
+
+        if len(queue) == 0:
+            actions = self.predict_action_chunk(batch)[:, : self.config.n_action_steps]
+            queue.extend(actions.transpose(0, 1))  # [(B, A)] x T
+
+        return queue.popleft()
diff --git a/src/lerobot/policies/remote/processor_remote.py b/src/lerobot/policies/remote/processor_remote.py
@@ -0,0 +1,65 @@
+from dataclasses import dataclass, field
+from typing import Any
+
+import torch
+
+from lerobot.policies.remote.configuration_remote import RemoteConfig
+from lerobot.processor import (
+    AddBatchDimensionProcessorStep,
+    RenameObservationsProcessorStep,
+    PolicyAction,
+    PolicyProcessorPipeline,
+    ProcessorStep,
+)
+from lerobot.processor.converters import policy_action_to_transition, transition_to_policy_action
+from lerobot.processor.core import EnvTransition, TransitionKey
+from lerobot.utils.constants import (
+    POLICY_POSTPROCESSOR_DEFAULT_NAME,
+    POLICY_PREPROCESSOR_DEFAULT_NAME,
+)
+
+
+def make_remote_pre_post_processors(
+    config: RemoteConfig,
+    dataset_stats: dict[str, dict[str, torch.Tensor]] | None = None,
+    rename_map: dict[str, str] = {},
+) -> tuple[
+    PolicyProcessorPipeline[dict[str, Any], dict[str, Any]],
+    PolicyProcessorPipeline[PolicyAction, PolicyAction],
+]:
+    """
+    Custom pre/post processors for the Remote policy.
+
+    Pre:
+      - Normalizer (if stats provided)
+      - AddBatchDimension
+      - AppendInferenceConfig (copies config.inference_config into the batch)
+      - Device placement
+
+    Post:
+      - Device to CPU
+      - Unnormalize outputs (if stats provided)
+    """
+
+    # Pre: allow renaming features and add batch dim. Rename map can be overridden at runtime
+    # through preprocessor_overrides with the key "rename_observations_processor".
+    input_steps: list[ProcessorStep] = [
+        RenameObservationsProcessorStep(rename_map=rename_map),
+        AddBatchDimensionProcessorStep(),
+    ]
+
+    # Minimal postprocessor: identity (no steps)
+    output_steps: list[ProcessorStep] = []
+
+    return (
+        PolicyProcessorPipeline[dict[str, Any], dict[str, Any]](
+            steps=input_steps,
+            name=POLICY_PREPROCESSOR_DEFAULT_NAME,
+        ),
+        PolicyProcessorPipeline[PolicyAction, PolicyAction](
+            steps=output_steps,
+            name=POLICY_POSTPROCESSOR_DEFAULT_NAME,
+            to_transition=policy_action_to_transition,
+            to_output=transition_to_policy_action,
+        ),
+    )