Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
131 changes: 131 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -436,6 +436,137 @@ with Replicate() as replicate:
# HTTP client is now closed
```

## Experimental: Using `replicate.use()`

> [!WARNING]
> The `replicate.use()` interface is experimental and subject to change. We welcome your feedback on this new API design.

The `use()` method provides a more concise way to call Replicate models as functions. This experimental interface offers a more pythonic approach to running models:

```python
import replicate

# Create a model function
flux_dev = replicate.use("black-forest-labs/flux-dev")

# Call it like a regular Python function
outputs = flux_dev(
prompt="a cat wearing a wizard hat, digital art",
num_outputs=1,
aspect_ratio="1:1",
output_format="webp",
)

# outputs is a list of URLPath objects that auto-download when accessed
for output in outputs:
print(output) # e.g., Path(/tmp/a1b2c3/output.webp)
```

### Language models with streaming

Many models, particularly language models, support streaming output. Use the `streaming=True` parameter to get results as they're generated:

```python
import replicate

# Create a streaming language model function
llama = replicate.use("meta/meta-llama-3-8b-instruct", streaming=True)

# Stream the output
output = llama(prompt="Write a haiku about Python programming", max_tokens=50)

for chunk in output:
print(chunk, end="", flush=True)
```

### Chaining models

You can easily chain models together by passing the output of one model as input to another:

```python
import replicate

# Create two model functions
flux_dev = replicate.use("black-forest-labs/flux-dev")
llama = replicate.use("meta/meta-llama-3-8b-instruct")

# Generate an image
images = flux_dev(prompt="a mysterious ancient artifact")

# Describe the image
description = llama(
prompt="Describe this image in detail",
image=images[0], # Pass the first image directly
)

print(description)
```

### Async support

For async/await patterns, use the `use_async=True` parameter:

```python
import asyncio
import replicate


async def main():
# Create an async model function
flux_dev = replicate.use("black-forest-labs/flux-dev", use_async=True)

# Await the result
outputs = await flux_dev(prompt="futuristic city at sunset")

for output in outputs:
print(output)


asyncio.run(main())
```

### Accessing URLs without downloading

If you need the URL without downloading the file, use the `get_path_url()` helper:

```python
import replicate
from replicate.lib._predictions_use import get_path_url

flux_dev = replicate.use("black-forest-labs/flux-dev")
outputs = flux_dev(prompt="a serene landscape")

for output in outputs:
url = get_path_url(output)
print(f"URL: {url}") # https://replicate.delivery/...
```

### Creating predictions without waiting

To create a prediction without waiting for it to complete, use the `create()` method:

```python
import replicate

llama = replicate.use("meta/meta-llama-3-8b-instruct")

# Start the prediction
run = llama.create(prompt="Explain quantum computing")

# Check logs while it's running
print(run.logs())

# Get the output when ready
result = run.output()
print(result)
```

### Current limitations

- The `use()` method must be called at the module level (not inside functions or classes)
- Type hints are limited compared to the standard client interface
- This is an experimental API and may change in future releases

## Versioning

This package generally follows [SemVer](https://semver.org/spec/v2.0.0.html) conventions, though certain backwards-incompatible changes may be released as minor versions:
Expand Down
99 changes: 99 additions & 0 deletions examples/use_demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
#!/usr/bin/env python3

# TODO: Add proper type annotations
# type: ignore

"""
Example of using the experimental replicate.use() interface
"""

import replicate

print("Testing replicate.use() functionality...")

# Test 1: Simple text model
print("\n1. Testing simple text model...")
try:
hello_world = replicate.use("replicate/hello-world")
result = hello_world(text="Alice")
print(f"Result: {result}")
except Exception as e:
print(f"Error: {type(e).__name__}: {e}")

# Test 2: Image generation model
print("\n2. Testing image generation model...")
try:
from replicate.lib._predictions_use import get_path_url

flux_dev = replicate.use("black-forest-labs/flux-dev")
outputs = flux_dev(
prompt="a cat wearing a wizard hat, digital art",
num_outputs=1,
aspect_ratio="1:1",
output_format="webp",
guidance=3.5,
num_inference_steps=28,
)
print(f"Generated output: {outputs}")
if isinstance(outputs, list):
print(f"Generated {len(outputs)} image(s)")
for i, output in enumerate(outputs):
print(f" Image {i}: {output}")
# Get the URL without downloading
url = get_path_url(output)
if url:
print(f" URL: {url}")
else:
print(f"Single output: {outputs}")
url = get_path_url(outputs)
if url:
print(f" URL: {url}")
except Exception as e:
print(f"Error: {type(e).__name__}: {e}")
import traceback

traceback.print_exc()

# Test 3: Language model with streaming
print("\n3. Testing language model with streaming...")
try:
llama = replicate.use("meta/meta-llama-3-8b-instruct", streaming=True)
output = llama(prompt="Write a haiku about Python programming", max_tokens=50)
print("Streaming output:")
for chunk in output:
print(chunk, end="", flush=True)
print()
except Exception as e:
print(f"Error: {type(e).__name__}: {e}")
import traceback

traceback.print_exc()

# Test 4: Using async
print("\n4. Testing async functionality...")
import asyncio


async def test_async():
try:
hello_world = replicate.use("replicate/hello-world", use_async=True)
result = await hello_world(text="Bob")
print(f"Async result: {result}")

print("\n4b. Testing async streaming...")
llama = replicate.use("meta/meta-llama-3-8b-instruct", streaming=True, use_async=True)
output = await llama(prompt="Write a short poem about async/await", max_tokens=50)
print("Async streaming output:")
async for chunk in output:
print(chunk, end="", flush=True)
print()
except Exception as e:
print(f"Error: {type(e).__name__}: {e}")
import traceback

traceback.print_exc()


asyncio.run(test_async())

print("\nDone!")
1 change: 1 addition & 0 deletions src/replicate/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,7 @@ def _reset_client() -> None: # type: ignore[reportUnusedFunction]

from ._module_client import (
run as run,
use as use,
files as files,
models as models,
account as account,
Expand Down
102 changes: 97 additions & 5 deletions src/replicate/_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,25 @@
from __future__ import annotations

import os
from typing import TYPE_CHECKING, Any, Union, Mapping, Optional
from typing_extensions import Self, Unpack, override
from typing import (
TYPE_CHECKING,
Any,
Union,
Literal,
Mapping,
TypeVar,
Callable,
Iterator,
Optional,
AsyncIterator,
overload,
)
from typing_extensions import Self, Unpack, ParamSpec, override

import httpx

from replicate.lib._files import FileEncodingStrategy
from replicate.lib._predictions import Model, Version, ModelVersionIdentifier
from replicate.lib._predictions_run import Model, Version, ModelVersionIdentifier
from replicate.types.prediction_create_params import PredictionCreateParamsWithoutVersion

from . import _exceptions
Expand Down Expand Up @@ -46,6 +58,12 @@
from .resources.webhooks.webhooks import WebhooksResource, AsyncWebhooksResource
from .resources.deployments.deployments import DeploymentsResource, AsyncDeploymentsResource

if TYPE_CHECKING:
from .lib._predictions_use import Function, FunctionRef, AsyncFunction

Input = ParamSpec("Input")
Output = TypeVar("Output")

__all__ = [
"Timeout",
"Transport",
Expand Down Expand Up @@ -236,7 +254,7 @@ def run(
ValueError: If the reference format is invalid
TypeError: If both wait and prefer parameters are provided
"""
from .lib._predictions import run
from .lib._predictions_run import run

return run(
self,
Expand All @@ -247,6 +265,43 @@ def run(
**params,
)

@overload
def use(
self,
ref: Union[str, "FunctionRef[Input, Output]"],
*,
hint: Optional[Callable["Input", "Output"]] = None,
streaming: Literal[False] = False,
) -> "Function[Input, Output]": ...

@overload
def use(
self,
ref: Union[str, "FunctionRef[Input, Output]"],
*,
hint: Optional[Callable["Input", "Output"]] = None,
streaming: Literal[True],
) -> "Function[Input, Iterator[Output]]": ...

def use(
self,
ref: Union[str, "FunctionRef[Input, Output]"],
*,
hint: Optional[Callable["Input", "Output"]] = None,
streaming: bool = False,
) -> Union["Function[Input, Output]", "Function[Input, Iterator[Output]]"]:
"""
Use a Replicate model as a function.

Example:
flux_dev = replicate.use("black-forest-labs/flux-dev")
output = flux_dev(prompt="make me a sandwich")
"""
from .lib._predictions_use import use as _use

# TODO: Fix mypy overload matching for streaming parameter
return _use(self, ref, hint=hint, streaming=streaming) # type: ignore[call-overload, no-any-return]

def copy(
self,
*,
Expand Down Expand Up @@ -510,7 +565,7 @@ async def run(
ValueError: If the reference format is invalid
TypeError: If both wait and prefer parameters are provided
"""
from .lib._predictions import async_run
from .lib._predictions_run import async_run

return await async_run(
self,
Expand All @@ -521,6 +576,43 @@ async def run(
**params,
)

@overload
def use(
self,
ref: Union[str, "FunctionRef[Input, Output]"],
*,
hint: Optional[Callable["Input", "Output"]] = None,
streaming: Literal[False] = False,
) -> "AsyncFunction[Input, Output]": ...

@overload
def use(
self,
ref: Union[str, "FunctionRef[Input, Output]"],
*,
hint: Optional[Callable["Input", "Output"]] = None,
streaming: Literal[True],
) -> "AsyncFunction[Input, AsyncIterator[Output]]": ...

def use(
self,
ref: Union[str, "FunctionRef[Input, Output]"],
*,
hint: Optional[Callable["Input", "Output"]] = None,
streaming: bool = False,
) -> Union["AsyncFunction[Input, Output]", "AsyncFunction[Input, AsyncIterator[Output]]"]:
"""
Use a Replicate model as an async function.

Example:
flux_dev = replicate.use("black-forest-labs/flux-dev", use_async=True)
output = await flux_dev(prompt="make me a sandwich")
"""
from .lib._predictions_use import use as _use

# TODO: Fix mypy overload matching for streaming parameter
return _use(self, ref, hint=hint, streaming=streaming) # type: ignore[call-overload, no-any-return]

def copy(
self,
*,
Expand Down
Loading