add support for more ref types

dtmeadows · dtmeadows · commit b969e713cf12 · 2025-05-05T17:40:19.000-04:00
diff --git a/src/replicate/__init__.py b/src/replicate/__init__.py
@@ -39,6 +39,7 @@
     UnprocessableEntityError,
     APIResponseValidationError,
 )
+from .lib._models import Model as Model, Version as Version, ModelVersionIdentifier as ModelVersionIdentifier
 from ._base_client import DefaultHttpxClient, DefaultAsyncHttpxClient
 from ._utils._logs import setup_logging as _setup_logging
 
@@ -83,6 +84,9 @@
     "DefaultAsyncHttpxClient",
     "FileOutput",
     "AsyncFileOutput",
+    "Model",
+    "Version",
+    "ModelVersionIdentifier",
 ]
 
 _setup_logging()
diff --git a/src/replicate/_client.py b/src/replicate/_client.py
@@ -8,6 +8,7 @@
 
 import httpx
 
+from replicate.lib._predictions import Model, Version, ModelVersionIdentifier
 from replicate.types.prediction_create_params import PredictionCreateParamsWithoutVersion
 
 from . import _exceptions
@@ -127,7 +128,7 @@ def __init__(
 
     def run(
         self,
-        ref: str,
+        ref: Union[Model, Version, ModelVersionIdentifier, str],
         *,
         wait: Union[int, bool, NotGiven] = NOT_GIVEN,
         **params: Unpack[PredictionCreateParamsWithoutVersion],
@@ -322,7 +323,7 @@ def __init__(
 
     async def run(
         self,
-        ref: str,
+        ref: Union[Model, Version, ModelVersionIdentifier, str],
         *,
         wait: Union[int, bool, NotGiven] = NOT_GIVEN,
         **params: Unpack[PredictionCreateParamsWithoutVersion],
diff --git a/src/replicate/lib/_predictions.py b/src/replicate/lib/_predictions.py
@@ -1,30 +1,57 @@
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Dict, Union, Iterable
+from typing import TYPE_CHECKING, Dict, Union, Iterable, Optional
 from typing_extensions import Unpack
 
 from replicate.types.prediction_create_params import PredictionCreateParamsWithoutVersion
 
 from ..types import PredictionOutput, PredictionCreateParams
 from .._types import NOT_GIVEN, NotGiven
 from .._utils import is_given
-from .._client import ReplicateClient, AsyncReplicateClient
+from ._models import Model, Version, ModelVersionIdentifier, resolve_reference
 from .._exceptions import ModelError
 
 if TYPE_CHECKING:
     from ._files import FileOutput
+    from .._client import ReplicateClient, AsyncReplicateClient
 
 
 def run(
-    client: ReplicateClient,
-    ref: str,
-    # TODO: support these types
-    # ref: Union["Model", "Version", "ModelVersionIdentifier", str],
+    client: "ReplicateClient",
+    ref: Union[Model, Version, ModelVersionIdentifier, str],
     *,
     wait: Union[int, bool, NotGiven] = NOT_GIVEN,
-    # use_file_output: Optional[bool] = True,
+    _use_file_output: Optional[bool] = True,
     **params: Unpack[PredictionCreateParamsWithoutVersion],
 ) -> PredictionOutput | FileOutput | Iterable[FileOutput] | Dict[str, FileOutput]:
+    """
+    Run a model prediction.
+
+    Args:
+        client: The ReplicateClient instance to use for API calls
+        ref: Reference to the model or version to run. Can be:
+            - A string containing a version ID (e.g. "5c7d5dc6dd8bf75c1acaa8565735e7986bc5b66206b55cca93cb72c9bf15ccaa")
+            - A string with owner/name format (e.g. "replicate/hello-world")
+            - A string with owner/name/version format (e.g. "replicate/hello-world/5c7d5dc6...")
+            - A Model instance with owner and name attributes
+            - A Version instance with id attribute
+            - A ModelVersionIdentifier dictionary with owner, name, and/or version keys
+        input: Dictionary of input parameters for the model
+        wait: If True (default), wait for the prediction to complete. If False, return immediately.
+              If an integer, wait up to that many seconds.
+        use_file_output: If True (default), convert output URLs to FileOutput objects
+        **params: Additional parameters to pass to the prediction creation endpoint
+
+    Returns:
+        The prediction output, which could be a basic type (str, int, etc.), a FileOutput object,
+        a list of FileOutput objects, or a dictionary of FileOutput objects, depending on what
+        the model returns.
+
+    Raises:
+        ModelError: If the model run fails
+        ValueError: If the reference format is invalid
+        TypeError: If both wait and prefer parameters are provided
+    """
     from ._files import transform_output
 
     if is_given(wait) and "prefer" in params:
@@ -41,9 +68,27 @@ def run(
         else:
             params.setdefault("prefer", f"wait={wait}")
 
-    # TODO: support more ref types
-    params_with_version: PredictionCreateParams = {**params, "version": ref}
-    prediction = client.predictions.create(**params_with_version)
+    # Resolve ref to its components
+    _version, owner, name, version_id = resolve_reference(ref)
+
+    prediction = None
+    if version_id is not None:
+        # Create prediction with the specific version ID
+        params_with_version: PredictionCreateParams = {**params, "version": version_id}
+        prediction = client.predictions.create(**params_with_version)
+    elif owner and name:
+        # Create prediction via models resource with owner/name
+        prediction = client.models.predictions.create(model_owner=owner, model_name=name, **params)
+    else:
+        # If ref is a string but doesn't match expected patterns
+        if isinstance(ref, str):
+            params_with_version = {**params, "version": ref}
+            prediction = client.predictions.create(**params_with_version)
+        else:
+            raise ValueError(
+                f"Invalid reference format: {ref}. Expected a model name ('owner/name'), "
+                "a version ID, a Model object, a Version object, or a ModelVersionIdentifier."
+            )
 
     # Currently the "Prefer: wait" interface will return a prediction with a status
     # of "processing" rather than a terminal state because it returns before the
@@ -68,15 +113,41 @@ def run(
 
 
 async def async_run(
-    client: AsyncReplicateClient,
-    ref: str,
-    # TODO: support these types
-    # ref: Union["Model", "Version", "ModelVersionIdentifier", str],
+    client: "AsyncReplicateClient",
+    ref: Union[Model, Version, ModelVersionIdentifier, str],
     *,
     wait: Union[int, bool, NotGiven] = NOT_GIVEN,
-    # use_file_output: Optional[bool] = True,
+    _use_file_output: Optional[bool] = True,
     **params: Unpack[PredictionCreateParamsWithoutVersion],
 ) -> PredictionOutput | FileOutput | Iterable[FileOutput] | Dict[str, FileOutput]:
+    """
+    Run a model prediction asynchronously.
+
+    Args:
+        client: The AsyncReplicateClient instance to use for API calls
+        ref: Reference to the model or version to run. Can be:
+            - A string containing a version ID (e.g. "5c7d5dc6dd8bf75c1acaa8565735e7986bc5b66206b55cca93cb72c9bf15ccaa")
+            - A string with owner/name format (e.g. "replicate/hello-world")
+            - A string with owner/name/version format (e.g. "replicate/hello-world/5c7d5dc6...")
+            - A Model instance with owner and name attributes
+            - A Version instance with id attribute
+            - A ModelVersionIdentifier dictionary with owner, name, and/or version keys
+        input: Dictionary of input parameters for the model
+        wait: If True (default), wait for the prediction to complete. If False, return immediately.
+              If an integer, wait up to that many seconds.
+        use_file_output: If True (default), convert output URLs to AsyncFileOutput objects
+        **params: Additional parameters to pass to the prediction creation endpoint
+
+    Returns:
+        The prediction output, which could be a basic type (str, int, etc.), an AsyncFileOutput object,
+        a list of AsyncFileOutput objects, or a dictionary of AsyncFileOutput objects, depending on what
+        the model returns.
+
+    Raises:
+        ModelError: If the model run fails
+        ValueError: If the reference format is invalid
+        TypeError: If both wait and prefer parameters are provided
+    """
     from ._files import transform_output
 
     if is_given(wait) and "prefer" in params:
@@ -93,9 +164,27 @@ async def async_run(
         else:
             params.setdefault("prefer", f"wait={wait}")
 
-    # TODO: support more ref types
-    params_with_version: PredictionCreateParams = {**params, "version": ref}
-    prediction = await client.predictions.create(**params_with_version)
+    # Resolve ref to its components
+    _version, owner, name, version_id = resolve_reference(ref)
+
+    prediction = None
+    if version_id is not None:
+        # Create prediction with the specific version ID
+        params_with_version: PredictionCreateParams = {**params, "version": version_id}
+        prediction = await client.predictions.create(**params_with_version)
+    elif owner and name:
+        # Create prediction via models resource with owner/name
+        prediction = await client.models.predictions.create(model_owner=owner, model_name=name, **params)
+    else:
+        # If ref is a string but doesn't match expected patterns
+        if isinstance(ref, str):
+            params_with_version = {**params, "version": ref}
+            prediction = await client.predictions.create(**params_with_version)
+        else:
+            raise ValueError(
+                f"Invalid reference format: {ref}. Expected a model name ('owner/name'), "
+                "a version ID, a Model object, a Version object, or a ModelVersionIdentifier."
+            )
 
     # Currently the "Prefer: wait" interface will return a prediction with a status
     # of "processing" rather than a terminal state because it returns before the
diff --git a/tests/lib/test_run.py b/tests/lib/test_run.py
@@ -11,6 +11,7 @@
 from replicate import ReplicateClient, AsyncReplicateClient
 from replicate.lib._files import FileOutput, AsyncFileOutput
 from replicate._exceptions import ModelError, NotFoundError, BadRequestError
+from replicate.lib._models import Model, Version, ModelVersionIdentifier
 
 base_url = os.environ.get("TEST_API_BASE_URL", "http://127.0.0.1:4010")
 bearer_token = "My Bearer Token"
@@ -154,6 +155,7 @@ def test_run_with_error(self, respx_mock: MockRouter) -> None:
         with pytest.raises(ModelError):
             self.client.run("error-model-ref", input={"prompt": "trigger error"})
 
+    @pytest.mark.skip("todo: support BytesIO conversion")
     @pytest.mark.respx(base_url=base_url)
     def test_run_with_base64_file(self, respx_mock: MockRouter) -> None:
         """Test run with base64 encoded file input."""
@@ -205,6 +207,41 @@ def test_run_with_invalid_cog_version(self, respx_mock: MockRouter) -> None:
         with pytest.raises(BadRequestError):
             self.client.run("model-with-invalid-cog", input={"prompt": "test prompt"})
 
+    @pytest.mark.respx(base_url=base_url)
+    def test_run_with_model_object(self, respx_mock: MockRouter) -> None:
+        """Test run with Model object reference."""
+        # Mock the models endpoint for owner/name lookup
+        respx_mock.post("/models/test-owner/test-model/predictions").mock(
+            return_value=httpx.Response(201, json=create_mock_prediction())
+        )
+
+        model = Model(owner="test-owner", name="test-model")
+        output = self.client.run(model, input={"prompt": "test prompt"})
+
+        assert output == "test output"
+
+    @pytest.mark.respx(base_url=base_url)
+    def test_run_with_version_object(self, respx_mock: MockRouter) -> None:
+        """Test run with Version object reference."""
+        # Version ID is used directly
+        respx_mock.post("/predictions").mock(return_value=httpx.Response(201, json=create_mock_prediction()))
+
+        version = Version(id="test-version-id")
+        output = self.client.run(version, input={"prompt": "test prompt"})
+
+        assert output == "test output"
+
+    @pytest.mark.respx(base_url=base_url)
+    def test_run_with_model_version_identifier(self, respx_mock: MockRouter) -> None:
+        """Test run with ModelVersionIdentifier dict reference."""
+        # Case where version ID is provided
+        respx_mock.post("/predictions").mock(return_value=httpx.Response(201, json=create_mock_prediction()))
+
+        identifier: ModelVersionIdentifier = {"owner": "test-owner", "name": "test-model", "version": "test-version-id"}
+        output = self.client.run(identifier, input={"prompt": "test prompt"})
+
+        assert output == "test output"
+
     @pytest.mark.respx(base_url=base_url)
     def test_run_with_file_output_iterator(self, respx_mock: MockRouter) -> None:
         """Test run with file output iterator."""
@@ -349,6 +386,7 @@ async def test_async_run_with_error(self, respx_mock: MockRouter) -> None:
         with pytest.raises(ModelError):
             await self.client.run("error-model-ref", input={"prompt": "trigger error"})
 
+    @pytest.mark.skip("todo: support BytesIO conversion")
     @pytest.mark.respx(base_url=base_url)
     async def test_async_run_with_base64_file(self, respx_mock: MockRouter) -> None:
         """Test async run with base64 encoded file input."""
@@ -400,6 +438,41 @@ async def test_async_run_with_invalid_cog_version(self, respx_mock: MockRouter)
         with pytest.raises(BadRequestError):
             await self.client.run("model-with-invalid-cog", input={"prompt": "test prompt"})
 
+    @pytest.mark.respx(base_url=base_url)
+    async def test_async_run_with_model_object(self, respx_mock: MockRouter) -> None:
+        """Test async run with Model object reference."""
+        # Mock the models endpoint for owner/name lookup
+        respx_mock.post("/models/test-owner/test-model/predictions").mock(
+            return_value=httpx.Response(201, json=create_mock_prediction())
+        )
+
+        model = Model(owner="test-owner", name="test-model")
+        output = await self.client.run(model, input={"prompt": "test prompt"})
+
+        assert output == "test output"
+
+    @pytest.mark.respx(base_url=base_url)
+    async def test_async_run_with_version_object(self, respx_mock: MockRouter) -> None:
+        """Test async run with Version object reference."""
+        # Version ID is used directly
+        respx_mock.post("/predictions").mock(return_value=httpx.Response(201, json=create_mock_prediction()))
+
+        version = Version(id="test-version-id")
+        output = await self.client.run(version, input={"prompt": "test prompt"})
+
+        assert output == "test output"
+
+    @pytest.mark.respx(base_url=base_url)
+    async def test_async_run_with_model_version_identifier(self, respx_mock: MockRouter) -> None:
+        """Test async run with ModelVersionIdentifier dict reference."""
+        # Case where version ID is provided
+        respx_mock.post("/predictions").mock(return_value=httpx.Response(201, json=create_mock_prediction()))
+
+        identifier: ModelVersionIdentifier = {"owner": "test-owner", "name": "test-model", "version": "test-version-id"}
+        output = await self.client.run(identifier, input={"prompt": "test prompt"})
+
+        assert output == "test output"
+
     @pytest.mark.respx(base_url=base_url)
     async def test_async_run_with_file_output_iterator(self, respx_mock: MockRouter) -> None:
         """Test async run with file output iterator."""