replicate · zeke · May 8, 2025 · Apr 17, 2025 · Apr 17, 2025 · Apr 23, 2025
@@ -0,0 +1,14 @@
+import rich
+
+from replicate import Replicate
+
+client = Replicate()
+
+outputs = client.run(
+    "black-forest-labs/flux-schnell",
+    input={"prompt": "astronaut riding a rocket like a horse"},
+)
+rich.print(outputs)
+for index, output in enumerate(outputs):
+    with open(f"output_{index}.webp", "wb") as file:
+        file.write(output.read())
@@ -0,0 +1,20 @@
+import asyncio
+
+from replicate import AsyncReplicate
+
+client = AsyncReplicate()
+
+# https://replicate.com/stability-ai/sdxl
+model_version = "stability-ai/sdxl:39ed52f2a78e934b3ba6e2a89f5b1c712de7dfea535525255b1aa35c5565e08b"
+prompts = [f"A chariot pulled by a team of {count} rainbow unicorns" for count in ["two", "four", "six", "eight"]]
+
+
+async def main() -> None:
+    # Create tasks with asyncio.gather directly
+    tasks = [client.run(model_version, input={"prompt": prompt}) for prompt in prompts]
+
+    results = await asyncio.gather(*tasks)
+    print(results)
+
+
+asyncio.run(main())
diff --git a/pyproject.toml b/pyproject.toml
@@ -14,6 +14,7 @@ dependencies = [
     "anyio>=3.5.0, <5",
     "distro>=1.7.0, <2",
     "sniffio",
+    "asyncio>=3.4.3",
 ]
 requires-python = ">= 3.8"
 classifiers = [

@@ -22,6 +22,7 @@
 from ._version import __title__, __version__
 from ._response import APIResponse as APIResponse, AsyncAPIResponse as AsyncAPIResponse
 from ._constants import DEFAULT_TIMEOUT, DEFAULT_MAX_RETRIES, DEFAULT_CONNECTION_LIMITS
+from .lib._files import FileOutput as FileOutput, AsyncFileOutput as AsyncFileOutput
 from ._exceptions import (
     APIError,
     ConflictError,
@@ -38,6 +39,7 @@
     UnprocessableEntityError,
     APIResponseValidationError,
 )
+from .lib._models import Model as Model, Version as Version, ModelVersionIdentifier as ModelVersionIdentifier
 from ._base_client import DefaultHttpxClient, DefaultAsyncHttpxClient
 from ._utils._logs import setup_logging as _setup_logging
 
@@ -80,6 +82,11 @@
     "DEFAULT_CONNECTION_LIMITS",
     "DefaultHttpxClient",
     "DefaultAsyncHttpxClient",
+    "FileOutput",
+    "AsyncFileOutput",
+    "Model",
+    "Version",
+    "ModelVersionIdentifier",
 ]
 
 _setup_logging()
@@ -230,6 +237,7 @@ def _reset_client() -> None:  # type: ignore[reportUnusedFunction]
 
 
 from ._module_client import (
+    run as run,
     models as models,
     account as account,
     hardware as hardware,

@@ -3,11 +3,15 @@
 from __future__ import annotations
 
 import os
-from typing import TYPE_CHECKING, Any, Union, Mapping
-from typing_extensions import Self, override
+from typing import TYPE_CHECKING, Any, Union, Mapping, Optional
+from typing_extensions import Self, Unpack, override
 
 import httpx
 
+from replicate.lib._files import FileEncodingStrategy
+from replicate.lib._predictions import Model, Version, ModelVersionIdentifier
+from replicate.types.prediction_create_params import PredictionCreateParamsWithoutVersion
+
 from . import _exceptions
 from ._qs import Querystring
 from ._types import (
@@ -164,6 +168,10 @@ def with_raw_response(self) -> ReplicateWithRawResponse:
     def with_streaming_response(self) -> ReplicateWithStreamedResponse:
         return ReplicateWithStreamedResponse(self)
 
+    @cached_property
+    def poll_interval(self) -> float:
+        return float(os.environ.get("REPLICATE_POLL_INTERVAL", "0.5"))
+
     @property
     @override
     def qs(self) -> Querystring:
@@ -184,6 +192,27 @@ def default_headers(self) -> dict[str, str | Omit]:
             **self._custom_headers,
         }
 
+    def run(
+        self,
+        ref: Union[Model, Version, ModelVersionIdentifier, str],
+        *,
+        file_encoding_strategy: Optional["FileEncodingStrategy"] = None,
+        use_file_output: bool = True,
+        wait: Union[int, bool, NotGiven] = NOT_GIVEN,
+        **params: Unpack[PredictionCreateParamsWithoutVersion],
+    ) -> Any:
+        """Run a model and wait for its output."""
+        from .lib._predictions import run
+
+        return run(
+            self,
+            ref,
+            wait=wait,
+            use_file_output=use_file_output,
+            file_encoding_strategy=file_encoding_strategy,
+            **params,
+        )
+
     def copy(
         self,
         *,
@@ -380,6 +409,10 @@ def with_raw_response(self) -> AsyncReplicateWithRawResponse:
     def with_streaming_response(self) -> AsyncReplicateWithStreamedResponse:
         return AsyncReplicateWithStreamedResponse(self)
 
+    @cached_property
+    def poll_interval(self) -> float:
+        return float(os.environ.get("REPLICATE_POLL_INTERVAL", "0.5"))
+
     @property
     @override
     def qs(self) -> Querystring:
@@ -400,6 +433,27 @@ def default_headers(self) -> dict[str, str | Omit]:
             **self._custom_headers,
         }
 
+    async def run(
+        self,
+        ref: Union[Model, Version, ModelVersionIdentifier, str],
+        *,
+        use_file_output: bool = True,
+        file_encoding_strategy: Optional["FileEncodingStrategy"] = None,
+        wait: Union[int, bool, NotGiven] = NOT_GIVEN,
+        **params: Unpack[PredictionCreateParamsWithoutVersion],
+    ) -> Any:
+        """Run a model and wait for its output."""
+        from .lib._predictions import async_run
+
+        return await async_run(
+            self,
+            ref,
+            wait=wait,
+            use_file_output=use_file_output,
+            file_encoding_strategy=file_encoding_strategy,
+            **params,
+        )
+
     def copy(
         self,
         *,

@@ -6,6 +6,8 @@
 
 import httpx
 
+from replicate.types.prediction import Prediction
+
 __all__ = [
     "BadRequestError",
     "AuthenticationError",
@@ -15,6 +17,7 @@
     "UnprocessableEntityError",
     "RateLimitError",
     "InternalServerError",
+    "ModelError",
 ]
 
 
@@ -106,3 +109,13 @@ class RateLimitError(APIStatusError):
 
 class InternalServerError(APIStatusError):
     pass
+
+
+class ModelError(ReplicateError):
+    """An error from user's code in a model."""
+
+    prediction: Prediction
+
+    def __init__(self, prediction: Prediction) -> None:
+        self.prediction = prediction
+        super().__init__(prediction.error)
@@ -3,7 +3,7 @@
 from __future__ import annotations
 
 from typing import TYPE_CHECKING
-from typing_extensions import override
+from typing_extensions import cast, override
 
 if TYPE_CHECKING:
     from .resources.account import AccountResource
@@ -67,6 +67,20 @@ def __load__(self) -> PredictionsResource:
         return _load_client().predictions
 
 
+if TYPE_CHECKING:
+    from ._client import Replicate
+
+    # get the type checker to infer the run symbol to the same type
+    # as the method on the client so we don't have to define it twice
+    __client: Replicate = cast(Replicate, {})
+    run = __client.run
+else:
+
+    def _run(*args, **kwargs):
+        return _load_client().run(*args, **kwargs)
+
+    run = _run
+
 models: ModelsResource = ModelsResourceProxy().__as_proxied__()
 account: AccountResource = AccountResourceProxy().__as_proxied__()
 hardware: HardwareResource = HardwareResourceProxy().__as_proxied__()