replicate · dgellow · Jul 31, 2025 · Jul 3, 2025 · Jul 25, 2025 · Jul 25, 2025
@@ -436,6 +436,137 @@ with Replicate() as replicate:
 # HTTP client is now closed
 ```
 
+## Experimental: Using `replicate.use()`
+
+> [!WARNING]
+> The `replicate.use()` interface is experimental and subject to change. We welcome your feedback on this new API design.
+
+The `use()` method provides a more concise way to call Replicate models as functions. This experimental interface offers a more pythonic approach to running models:
+
+```python
+import replicate
+
+# Create a model function
+flux_dev = replicate.use("black-forest-labs/flux-dev")
+
+# Call it like a regular Python function
+outputs = flux_dev(
+    prompt="a cat wearing a wizard hat, digital art",
+    num_outputs=1,
+    aspect_ratio="1:1",
+    output_format="webp",
+)
+
+# outputs is a list of URLPath objects that auto-download when accessed
+for output in outputs:
+    print(output)  # e.g., Path(/tmp/a1b2c3/output.webp)
+```
+
+### Language models with streaming
+
+Many models, particularly language models, support streaming output. Use the `streaming=True` parameter to get results as they're generated:
+
+```python
+import replicate
+
+# Create a streaming language model function
+llama = replicate.use("meta/meta-llama-3-8b-instruct", streaming=True)
+
+# Stream the output
+output = llama(prompt="Write a haiku about Python programming", max_tokens=50)
+
+for chunk in output:
+    print(chunk, end="", flush=True)
+```
+
+### Chaining models
+
+You can easily chain models together by passing the output of one model as input to another:
+
+```python
+import replicate
+
+# Create two model functions
+flux_dev = replicate.use("black-forest-labs/flux-dev")
+llama = replicate.use("meta/meta-llama-3-8b-instruct")
+
+# Generate an image
+images = flux_dev(prompt="a mysterious ancient artifact")
+
+# Describe the image
+description = llama(
+    prompt="Describe this image in detail",
+    image=images[0],  # Pass the first image directly
+)
+
+print(description)
+```
+
+### Async support
+
+For async/await patterns, use the `use_async=True` parameter:
+
+```python
+import asyncio
+import replicate
+
+
+async def main():
+    # Create an async model function
+    flux_dev = replicate.use("black-forest-labs/flux-dev", use_async=True)
+
+    # Await the result
+    outputs = await flux_dev(prompt="futuristic city at sunset")
+
+    for output in outputs:
+        print(output)
+
+
+asyncio.run(main())
+```
+
+### Accessing URLs without downloading
+
+If you need the URL without downloading the file, use the `get_path_url()` helper:
+
+```python
+import replicate
+from replicate.lib._predictions_use import get_path_url
+
+flux_dev = replicate.use("black-forest-labs/flux-dev")
+outputs = flux_dev(prompt="a serene landscape")
+
+for output in outputs:
+    url = get_path_url(output)
+    print(f"URL: {url}")  # https://replicate.delivery/...
+```
+
+### Creating predictions without waiting
+
+To create a prediction without waiting for it to complete, use the `create()` method:
+
+```python
+import replicate
+
+llama = replicate.use("meta/meta-llama-3-8b-instruct")
+
+# Start the prediction
+run = llama.create(prompt="Explain quantum computing")
+
+# Check logs while it's running
+print(run.logs())
+
+# Get the output when ready
+result = run.output()
+print(result)
+```
+
+### Current limitations
+
+- The `use()` method must be called at the module level (not inside functions or classes)
+- Type hints are limited compared to the standard client interface
+- This is an experimental API and may change in future releases
+
 ## Versioning
 
 This package generally follows [SemVer](https://semver.org/spec/v2.0.0.html) conventions, though certain backwards-incompatible changes may be released as minor versions:

@@ -0,0 +1,99 @@
+#!/usr/bin/env python3
+
+# TODO: Add proper type annotations
+# type: ignore
+
+"""
+Example of using the experimental replicate.use() interface
+"""
+
+import replicate
+
+print("Testing replicate.use() functionality...")
+
+# Test 1: Simple text model
+print("\n1. Testing simple text model...")
+try:
+    hello_world = replicate.use("replicate/hello-world")
+    result = hello_world(text="Alice")
+    print(f"Result: {result}")
+except Exception as e:
+    print(f"Error: {type(e).__name__}: {e}")
+
+# Test 2: Image generation model
+print("\n2. Testing image generation model...")
+try:
+    from replicate.lib._predictions_use import get_path_url
+
+    flux_dev = replicate.use("black-forest-labs/flux-dev")
+    outputs = flux_dev(
+        prompt="a cat wearing a wizard hat, digital art",
+        num_outputs=1,
+        aspect_ratio="1:1",
+        output_format="webp",
+        guidance=3.5,
+        num_inference_steps=28,
+    )
+    print(f"Generated output: {outputs}")
+    if isinstance(outputs, list):
+        print(f"Generated {len(outputs)} image(s)")
+        for i, output in enumerate(outputs):
+            print(f"  Image {i}: {output}")
+            # Get the URL without downloading
+            url = get_path_url(output)
+            if url:
+                print(f"  URL: {url}")
+    else:
+        print(f"Single output: {outputs}")
+        url = get_path_url(outputs)
+        if url:
+            print(f"  URL: {url}")
+except Exception as e:
+    print(f"Error: {type(e).__name__}: {e}")
+    import traceback
+
+    traceback.print_exc()
+
+# Test 3: Language model with streaming
+print("\n3. Testing language model with streaming...")
+try:
+    llama = replicate.use("meta/meta-llama-3-8b-instruct", streaming=True)
+    output = llama(prompt="Write a haiku about Python programming", max_tokens=50)
+    print("Streaming output:")
+    for chunk in output:
+        print(chunk, end="", flush=True)
+    print()
+except Exception as e:
+    print(f"Error: {type(e).__name__}: {e}")
+    import traceback
+
+    traceback.print_exc()
+
+# Test 4: Using async
+print("\n4. Testing async functionality...")
+import asyncio
+
+
+async def test_async():
+    try:
+        hello_world = replicate.use("replicate/hello-world", use_async=True)
+        result = await hello_world(text="Bob")
+        print(f"Async result: {result}")
+
+        print("\n4b. Testing async streaming...")
+        llama = replicate.use("meta/meta-llama-3-8b-instruct", streaming=True, use_async=True)
+        output = await llama(prompt="Write a short poem about async/await", max_tokens=50)
+        print("Async streaming output:")
+        async for chunk in output:
+            print(chunk, end="", flush=True)
+        print()
+    except Exception as e:
+        print(f"Error: {type(e).__name__}: {e}")
+        import traceback
+
+        traceback.print_exc()
+
+
+asyncio.run(test_async())
+
+print("\nDone!")
@@ -243,6 +243,7 @@ def _reset_client() -> None:  # type: ignore[reportUnusedFunction]
 
 from ._module_client import (
     run as run,
+    use as use,
     files as files,
     models as models,
     account as account,

@@ -3,13 +3,25 @@
 from __future__ import annotations
 
 import os
-from typing import TYPE_CHECKING, Any, Union, Mapping, Optional
-from typing_extensions import Self, Unpack, override
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Union,
+    Literal,
+    Mapping,
+    TypeVar,
+    Callable,
+    Iterator,
+    Optional,
+    AsyncIterator,
+    overload,
+)
+from typing_extensions import Self, Unpack, ParamSpec, override
 
 import httpx
 
 from replicate.lib._files import FileEncodingStrategy
-from replicate.lib._predictions import Model, Version, ModelVersionIdentifier
+from replicate.lib._predictions_run import Model, Version, ModelVersionIdentifier
 from replicate.types.prediction_create_params import PredictionCreateParamsWithoutVersion
 
 from . import _exceptions
@@ -46,6 +58,12 @@
     from .resources.webhooks.webhooks import WebhooksResource, AsyncWebhooksResource
     from .resources.deployments.deployments import DeploymentsResource, AsyncDeploymentsResource
 
+if TYPE_CHECKING:
+    from .lib._predictions_use import Function, FunctionRef, AsyncFunction
+
+Input = ParamSpec("Input")
+Output = TypeVar("Output")
+
 __all__ = [
     "Timeout",
     "Transport",
@@ -236,7 +254,7 @@ def run(
             ValueError: If the reference format is invalid
             TypeError: If both wait and prefer parameters are provided
         """
-        from .lib._predictions import run
+        from .lib._predictions_run import run
 
         return run(
             self,
@@ -247,6 +265,43 @@ def run(
             **params,
         )
 
+    @overload
+    def use(
+        self,
+        ref: Union[str, "FunctionRef[Input, Output]"],
+        *,
+        hint: Optional[Callable["Input", "Output"]] = None,
+        streaming: Literal[False] = False,
+    ) -> "Function[Input, Output]": ...
+
+    @overload
+    def use(
+        self,
+        ref: Union[str, "FunctionRef[Input, Output]"],
+        *,
+        hint: Optional[Callable["Input", "Output"]] = None,
+        streaming: Literal[True],
+    ) -> "Function[Input, Iterator[Output]]": ...
+
+    def use(
+        self,
+        ref: Union[str, "FunctionRef[Input, Output]"],
+        *,
+        hint: Optional[Callable["Input", "Output"]] = None,
+        streaming: bool = False,
+    ) -> Union["Function[Input, Output]", "Function[Input, Iterator[Output]]"]:
+        """
+        Use a Replicate model as a function.
+
+        Example:
+            flux_dev = replicate.use("black-forest-labs/flux-dev")
+            output = flux_dev(prompt="make me a sandwich")
+        """
+        from .lib._predictions_use import use as _use
+
+        # TODO: Fix mypy overload matching for streaming parameter
+        return _use(self, ref, hint=hint, streaming=streaming)  # type: ignore[call-overload, no-any-return]
+
     def copy(
         self,
         *,
@@ -510,7 +565,7 @@ async def run(
             ValueError: If the reference format is invalid
             TypeError: If both wait and prefer parameters are provided
         """
-        from .lib._predictions import async_run
+        from .lib._predictions_run import async_run
 
         return await async_run(
             self,
@@ -521,6 +576,43 @@ async def run(
             **params,
         )
 
+    @overload
+    def use(
+        self,
+        ref: Union[str, "FunctionRef[Input, Output]"],
+        *,
+        hint: Optional[Callable["Input", "Output"]] = None,
+        streaming: Literal[False] = False,
+    ) -> "AsyncFunction[Input, Output]": ...
+
+    @overload
+    def use(
+        self,
+        ref: Union[str, "FunctionRef[Input, Output]"],
+        *,
+        hint: Optional[Callable["Input", "Output"]] = None,
+        streaming: Literal[True],
+    ) -> "AsyncFunction[Input, AsyncIterator[Output]]": ...
+
+    def use(
+        self,
+        ref: Union[str, "FunctionRef[Input, Output]"],
+        *,
+        hint: Optional[Callable["Input", "Output"]] = None,
+        streaming: bool = False,
+    ) -> Union["AsyncFunction[Input, Output]", "AsyncFunction[Input, AsyncIterator[Output]]"]:
+        """
+        Use a Replicate model as an async function.
+
+        Example:
+            flux_dev = replicate.use("black-forest-labs/flux-dev", use_async=True)
+            output = await flux_dev(prompt="make me a sandwich")
+        """
+        from .lib._predictions_use import use as _use
+
+        # TODO: Fix mypy overload matching for streaming parameter
+        return _use(self, ref, hint=hint, streaming=streaming)  # type: ignore[call-overload, no-any-return]
+
     def copy(
         self,
         *,