Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/core/containers/runtime/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@
"""Container runtime providers."""

from .providers import ContainerProvider, KubernetesProvider, LocalDockerProvider
from .uv_provider import UVProvider

__all__ = [
"ContainerProvider",
"LocalDockerProvider",
"KubernetesProvider",
"UVProvider",
]
183 changes: 183 additions & 0 deletions src/core/containers/runtime/uv_provider.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
"""Providers for launching Hugging Face Spaces via ``uv run``."""

from __future__ import annotations

import os
import socket
import subprocess
import time
from dataclasses import dataclass, field
from typing import Dict, Optional

import requests

from .providers import ContainerProvider
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since uv is not per-se a container, I would rename the abstract class RuntimeProvider (or simply Runtime?) and the abstract methods start, stop, and wait_for_ready (instead of start_container, etc.). I feel that semantically it would be more accurate.

(just my 2c, feel free to ignore 🤗 )

Copy link
Collaborator Author

@burtenshaw burtenshaw Nov 10, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree with you @Wauplin , we should clarify naming here. However, I'm reluctant to let this PR swell whilst we're trying to move fast.

For now, I've added a new ABC named RuntimeProvider which has no container specific methods and is used by the UVProvider.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Understandable! Works like this as well :)



def _poll_health(health_url: str, timeout_s: float) -> None:
"""Poll a health endpoint until it returns HTTP 200 or times out."""

deadline = time.time() + timeout_s
while time.time() < deadline:
try:
response = requests.get(health_url, timeout=2.0)
if response.status_code == 200:
return
except requests.RequestException:
pass

time.sleep(0.5)

raise TimeoutError(
f"Server did not become ready within {timeout_s:.1f} seconds"
)


def _create_uv_command(
repo_id: str,
host: str,
port: int,
reload: bool,
project_url: Optional[str] = None,
) -> list[str]:
command = [
"uv",
"run",
"--project",
project_url or f"git+https://huggingface.co/spaces/{repo_id}",
"--",
"server",
"--host",
host,
"--port",
str(port),
]
if reload:
command.append("--reload")
return command


@dataclass
class UVProvider(ContainerProvider):
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
class UVProvider(ContainerProvider):
class UVRuntime(RuntimeProvider):

(naming suggestion to be aligned with the "runtime/" folder)

"""ContainerProvider implementation backed by ``uv run``."""

repo_id: str
host: str = "0.0.0.0"
port: Optional[int] = None
reload: bool = False
project_url: Optional[str] = None
connect_host: Optional[str] = None
extra_env: Optional[Dict[str, str]] = None
context_timeout_s: float = 60.0

_process: subprocess.Popen | None = field(init=False, default=None)
_base_url: str | None = field(init=False, default=None)

def start_container(
self,
image: str,
port: Optional[int] = None,
env_vars: Optional[Dict[str, str]] = None,
**_: Dict[str, str],
) -> str:
if self._process is not None and self._process.poll() is None:
raise RuntimeError("UVProvider is already running")

self.repo_id = image or self.repo_id

bind_port = port or self.port or self._find_free_port()

command = _create_uv_command(
self.repo_id,
self.host,
bind_port,
self.reload,
project_url=self.project_url,
)

env = os.environ.copy()
if self.extra_env:
env.update(self.extra_env)
if env_vars:
env.update(env_vars)

try:
self._process = subprocess.Popen(command, env=env)
except FileNotFoundError as exc:
raise RuntimeError(
"`uv` executable not found. Install uv from "
"https://github.com/astral-sh/uv and ensure it is on PATH."
) from exc
except OSError as exc:
raise RuntimeError(f"Failed to launch `uv run`: {exc}") from exc

client_host = self.connect_host or (
"127.0.0.1" if self.host in {"0.0.0.0", "::"} else self.host
)
self._base_url = f"http://{client_host}:{bind_port}"
self.port = bind_port
return self._base_url

def wait_for_ready(self, base_url: str, timeout_s: float = 60.0) -> None:
if self._process and self._process.poll() is not None:
code = self._process.returncode
raise RuntimeError(
f"uv process exited prematurely with code {code}"
)

_poll_health(f"{base_url}/health", timeout_s)

def stop_container(self) -> None:
if self._process is None:
return

if self._process.poll() is None:
self._process.terminate()
try:
self._process.wait(timeout=10.0)
except subprocess.TimeoutExpired:
self._process.kill()
self._process.wait(timeout=5.0)

self._process = None
self._base_url = None

def start(self) -> str:
return self.start_container(self.repo_id, port=self.port)

def stop(self) -> None:
self.stop_container()
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do like having start and stop methods not taking any argument. In fact I think that the abstract class should define these instead of start_container/stop_container. Now that we have two classes inheriting from ContainerProvider I would take advantage of it to factorize what can be (when it makes sense at least).

Same for wait_for_ready which should take any argument IMO.


def wait_for_ready_default(self, timeout_s: float | None = None) -> None:
if self._base_url is None:
raise RuntimeError("UVProvider has not been started")
self.wait_for_ready(
self._base_url,
timeout_s or self.context_timeout_s,
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would let only 1 way to define the timeout (either in wait_for_ready or __init__ but not in both)

)

def close(self) -> None:
self.stop_container()

def __enter__(self) -> "UVProvider":
if self._base_url is None:
base_url = self.start_container(self.repo_id, port=self.port)
self.wait_for_ready(base_url, timeout_s=self.context_timeout_s)
return self

def __exit__(self, exc_type, exc, tb) -> None:
self.stop_container()

def _find_free_port(self) -> int:
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same implementation as in docker provider (with different name). I would move the implementation to the parent class and reuse it.

with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
sock.bind(("", 0))
sock.listen(1)
return sock.getsockname()[1]

@property
def base_url(self) -> str:
if self._base_url is None:
raise RuntimeError("UVProvider has not been started")
return self._base_url


80 changes: 64 additions & 16 deletions src/core/http_env_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import requests

from .client_types import StepResult
from .containers.runtime import LocalDockerProvider
from .containers.runtime import LocalDockerProvider, UVProvider

if TYPE_CHECKING:
from .containers.runtime import ContainerProvider
Expand Down Expand Up @@ -106,22 +106,70 @@ def from_docker_image(
return cls(base_url=base_url, provider=provider)

@classmethod
def from_hub(cls: Type[EnvClientT], repo_id: str, provider: Optional["ContainerProvider"] = None, **kwargs: Any) -> EnvClientT:
"""
Create an environment client by pulling from a Hugging Face model hub.
def from_hub(
cls: Type[EnvClientT],
repo_id: str,
*,
use_docker: bool = False,
provider: Optional["ContainerProvider"] = None,
host: str = "0.0.0.0",
port: Optional[int] = None,
reload: bool = False,
timeout_s: float = 60.0,
runner: Optional[UVProvider] = None,
project_url: Optional[str] = None,
connect_host: Optional[str] = None,
extra_env: Optional[Dict[str, str]] = None,
**provider_kwargs: Any,
) -> EnvClientT:
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I find this signature very hard to understand without context since it's mixing kwargs for docker and for uv. Also I don't think one needs use_docker, provider, and runner.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My suggestion would be to remove provider and runner. Also remove project_url and connect_host. Have a single env (instead of env for docker and extra_env for uv). And use some typed dict + typing.overload so that IDEs can have correct autocompletion. Here is a simplified example:

from typing import Any, Dict, NotRequired, TypedDict, Unpack, overload


class DockerKwargs(TypedDict, total=False):
    tag: NotRequired[str]
    env: NotRequired[Dict[str, str]]


class UVProvider(TypedDict):
    host: NotRequired[str]
    port: NotRequired[int]
    reload: NotRequired[bool]
    timeout_s: NotRequired[float]
    env: NotRequired[Dict[str, str]]


@overload
def from_hub(repo_id: str, *, use_docker: bool = True, **kwargs: Unpack[DockerKwargs]) -> str: ...


@overload
def from_hub(repo_id: str, *, use_docker: bool = False, **kwargs: Unpack[UVProvider]) -> str: ...


def from_hub(repo_id: str, *, use_docker: bool = False, **kwargs: Any) -> str:
    raise NotImplementedError()
Image Image

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that I'm not a fan of overloads and typed dict but it's the only solution I see to correctly document the signature while keeping a single method.

Another solution is to have from_hub_docker and from_hub_uv (more explicit but less elegant)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for this. I've simplified the signatures right down, but I haven't added type overloading in this PR.

"""Create a client from a Hugging Face Space.

Set ``use_docker=True`` to launch the registry image with a container
provider. The default ``use_docker=False`` runs the Space locally using
``uv run`` through :class:`UVProvider`.
"""

if provider is None:
provider = LocalDockerProvider()

if "tag" in kwargs:
tag = kwargs["tag"]
else:
tag = "latest"

base_url = f"registry.hf.space/{repo_id.replace('/', '-')}:{tag}"

return cls.from_docker_image(image=base_url, provider=provider)

if use_docker:
if provider is None:
provider = LocalDockerProvider()

tag = provider_kwargs.pop("tag", "latest")
image = provider_kwargs.pop(
"image",
f"registry.hf.space/{repo_id.replace('/', '-')}:" f"{tag}",
)

base_url = provider.start_container(image, **provider_kwargs)
provider.wait_for_ready(base_url, timeout_s=timeout_s)
return cls(base_url=base_url, provider=provider)

uv_runner = runner or UVProvider(
repo_id=repo_id,
host=host,
port=port,
reload=reload,
project_url=project_url,
connect_host=connect_host,
extra_env=extra_env,
)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
uv_runner = runner or UVProvider(
repo_id=repo_id,
host=host,
port=port,
reload=reload,
project_url=project_url,
connect_host=connect_host,
extra_env=extra_env,
)
runner = UVProvider(
project=f"git+https://huggingface.co/spaces/{repo_id}",
host=host,
port=port,
reload=reload,
env=env,
)

With all the suggestions above, that would be the runner call (i.e. provide a project url, remove connect_host, remove project_url, rename extra_env to env, and remove runner). I don't think final user loose much in this simplification.


non_docker_kwargs = dict(provider_kwargs)
env_vars = non_docker_kwargs.pop("env_vars", None)

base_url = uv_runner.start_container(
repo_id,
port=port,
env_vars=env_vars,
**non_docker_kwargs,
)

try:
uv_runner.wait_for_ready(base_url, timeout_s=timeout_s)
except Exception:
uv_runner.stop_container()
raise
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Once the runner is defined and instantiated, I feel that the start and wait calls should be made in the __init__ instead. This way you get the same behavior both in from_hub and from_docker. With current implementation, one stops the runner if failing to start, the other don't (which is not consistent).


return cls(base_url=base_url, provider=uv_runner)

@abstractmethod
def _step_payload(self, action: ActT) -> dict:
Expand Down