finish last bits of todos

dtmeadows · dtmeadows · commit cf383c635234 · 2025-05-06T11:13:21.000-04:00
diff --git a/src/replicate/_client.py b/src/replicate/_client.py
@@ -167,7 +167,7 @@ def with_raw_response(self) -> ReplicateWithRawResponse:
     @cached_property
     def with_streaming_response(self) -> ReplicateWithStreamedResponse:
         return ReplicateWithStreamedResponse(self)
-    
+
     @cached_property
     def poll_interval(self) -> float:
         return float(os.environ.get("REPLICATE_POLL_INTERVAL", "0.5"))
@@ -191,7 +191,7 @@ def default_headers(self) -> dict[str, str | Omit]:
             "X-Stainless-Async": "false",
             **self._custom_headers,
         }
-    
+
     def run(
         self,
         ref: Union[Model, Version, ModelVersionIdentifier, str],
@@ -408,7 +408,7 @@ def with_raw_response(self) -> AsyncReplicateWithRawResponse:
     @cached_property
     def with_streaming_response(self) -> AsyncReplicateWithStreamedResponse:
         return AsyncReplicateWithStreamedResponse(self)
-    
+
     @cached_property
     def poll_interval(self) -> float:
         return float(os.environ.get("REPLICATE_POLL_INTERVAL", "0.5"))
@@ -432,7 +432,7 @@ def default_headers(self) -> dict[str, str | Omit]:
             "X-Stainless-Async": f"async:{get_async_library()}",
             **self._custom_headers,
         }
-    
+
     async def run(
         self,
         ref: Union[Model, Version, ModelVersionIdentifier, str],
diff --git a/src/replicate/_module_client.py b/src/replicate/_module_client.py
@@ -66,6 +66,7 @@ class PredictionsResourceProxy(LazyProxy["PredictionsResource"]):
     def __load__(self) -> PredictionsResource:
         return _load_client().predictions
 
+
 if TYPE_CHECKING:
     from ._client import Replicate
 
@@ -74,6 +75,7 @@ def __load__(self) -> PredictionsResource:
     __client: Replicate = cast(Replicate, {})
     run = __client.run
 else:
+
     def _run(*args, **kwargs):
         return _load_client().run(*args, **kwargs)
 
diff --git a/src/replicate/lib/_models.py b/src/replicate/lib/_models.py
@@ -1,6 +1,6 @@
 from __future__ import annotations
 
-from typing import Tuple, Union, Optional
+from typing import Any, Dict, Tuple, Union, Optional
 from typing_extensions import TypedDict
 
 
@@ -12,11 +12,27 @@ def __init__(self, owner: str, name: str):
         self.name = name
 
 
-class Version:
-    """A specific version of a Replicate model."""
+import datetime
 
-    def __init__(self, id: str):
-        self.id = id
+from pydantic import BaseModel
+
+
+class Version(BaseModel):
+    """
+    A version of a model.
+    """
+
+    id: str
+    """The unique ID of the version."""
+
+    created_at: datetime.datetime
+    """When the version was created."""
+
+    cog_version: str
+    """The version of the Cog used to create the version."""
+
+    openapi_schema: Dict[str, Any]
+    """An OpenAPI description of the model inputs and outputs."""
 
 
 class ModelVersionIdentifier(TypedDict, total=False):
@@ -29,7 +45,7 @@ class ModelVersionIdentifier(TypedDict, total=False):
 
 def resolve_reference(
     ref: Union[Model, Version, ModelVersionIdentifier, str],
-) -> Tuple[Optional[str], Optional[str], Optional[str], Optional[str]]:
+) -> Tuple[Optional[Version], Optional[str], Optional[str], Optional[str]]:
     """
     Resolve a reference to a model or version to its components.
 
diff --git a/src/replicate/lib/_predictions.py b/src/replicate/lib/_predictions.py
@@ -1,9 +1,13 @@
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Dict, Union, Iterable, Optional
+import time
+from typing import TYPE_CHECKING, Any, Dict, List, Union, Iterable, Iterator, Optional
+from collections.abc import AsyncIterator
 from typing_extensions import Unpack
 
 from replicate.lib._files import FileEncodingStrategy
+from replicate.lib._schema import make_schema_backwards_compatible
+from replicate.types.prediction import Prediction
 from replicate.types.prediction_create_params import PredictionCreateParamsWithoutVersion
 
 from ..types import PredictionOutput, PredictionCreateParams
@@ -71,7 +75,7 @@ def run(
             params.setdefault("prefer", f"wait={wait}")
 
     # Resolve ref to its components
-    _version, owner, name, version_id = resolve_reference(ref)
+    version, owner, name, version_id = resolve_reference(ref)
 
     prediction = None
     if version_id is not None:
@@ -104,14 +108,18 @@ def run(
     # "processing".
     in_terminal_state = is_blocking and prediction.status != "starting"
     if not in_terminal_state:
-        # TODO: Return a "polling" iterator if the model has an output iterator array type.
+        # Return a "polling" iterator if the model has an output iterator array type.
+        if version and _has_output_iterator_array_type(version):
+            return (transform_output(chunk, client) for chunk in output_iterator(prediction=prediction, client=client))
 
         prediction = client.predictions.wait(prediction.id)
 
     if prediction.status == "failed":
         raise ModelError(prediction)
 
-    # TODO: Return an iterator for completed output if the model has an output iterator array type.
+    # Return an iterator for the completed prediction when needed.
+    if version and _has_output_iterator_array_type(version) and prediction.output is not None:
+        return (transform_output(chunk, client) for chunk in prediction.output)
 
     if use_file_output:
         return transform_output(prediction.output, client)  # type: ignore[no-any-return]
@@ -173,7 +181,7 @@ async def async_run(
             params.setdefault("prefer", f"wait={wait}")
 
     # Resolve ref to its components
-    _version, owner, name, version_id = resolve_reference(ref)
+    version, owner, name, version_id = resolve_reference(ref)
 
     prediction = None
     if version_id is not None:
@@ -210,16 +218,56 @@ async def async_run(
     # "processing".
     in_terminal_state = is_blocking and prediction.status != "starting"
     if not in_terminal_state:
-        # TODO: Return a "polling" iterator if the model has an output iterator array type.
+        # Return a "polling" iterator if the model has an output iterator array type.
+        # if version and _has_output_iterator_array_type(version):
+        #     return (
+        #         transform_output(chunk, client)
+        #         async for chunk in prediction.async_output_iterator()
+        #     )
 
         prediction = await client.predictions.wait(prediction.id)
 
     if prediction.status == "failed":
         raise ModelError(prediction)
 
-    # TODO: Return an iterator for completed output if the model has an output iterator array type.
-
+    # Return an iterator for completed output if the model has an output iterator array type.
+    if version and _has_output_iterator_array_type(version) and prediction.output is not None:
+        return (transform_output(chunk, client) async for chunk in _make_async_iterator(prediction.output))
     if use_file_output:
         return transform_output(prediction.output, client)  # type: ignore[no-any-return]
 
     return prediction.output
+
+
+def _has_output_iterator_array_type(version: Version) -> bool:
+    schema = make_schema_backwards_compatible(version.openapi_schema, version.cog_version)
+    output = schema.get("components", {}).get("schemas", {}).get("Output", {})
+    return output.get("type") == "array" and output.get("x-cog-array-type") == "iterator" # type: ignore[no-any-return]
+
+
+async def _make_async_iterator(list: List[Any]) -> AsyncIterator[Any]:
+    for item in list:
+        yield item
+
+
+def output_iterator(prediction: Prediction, client: Replicate) -> Iterator[Any]:
+    """
+    Return an iterator of the prediction output.
+    """
+
+    # TODO: check output is list
+    previous_output: Any = prediction.output or []
+    while prediction.status not in ["succeeded", "failed", "canceled"]:
+        output: Any = prediction.output or []
+        new_output = output[len(previous_output) :]
+        yield from new_output
+        previous_output = output
+        time.sleep(client.poll_interval)
+        prediction = client.predictions.get(prediction.id)
+
+    if prediction.status == "failed":
+        raise ModelError(prediction=prediction)
+
+    output = prediction.output or []
+    new_output = output[len(previous_output) :]
+    yield from new_output
diff --git a/tests/lib/test_run.py b/tests/lib/test_run.py
@@ -2,6 +2,7 @@
 
 import io
 import os
+import datetime
 from typing import Any, Dict, Optional
 
 import httpx
@@ -48,6 +49,41 @@ def create_mock_prediction(
     }
 
 
+def _version_with_schema(id: str = "v1", output_schema: Optional[object] = None) -> Version:
+    return Version(
+        id=id,
+        created_at=datetime.datetime.fromisoformat("2022-03-16T00:35:56.210272"),
+        cog_version="dev",
+        openapi_schema={
+            "openapi": "3.0.2",
+            "info": {"title": "Cog", "version": "0.1.0"},
+            "paths": {},
+            "components": {
+                "schemas": {
+                    "Input": {
+                        "type": "object",
+                        "title": "Input",
+                        "required": ["text"],
+                        "properties": {
+                            "text": {
+                                "type": "string",
+                                "title": "Text",
+                                "x-order": 0,
+                                "description": "The text input",
+                            },
+                        },
+                    },
+                    "Output": output_schema
+                    or {
+                        "type": "string",
+                        "title": "Output",
+                    },
+                }
+            },
+        },
+    )
+
+
 class TestRun:
     client = Replicate(base_url=base_url, bearer_token=bearer_token, _strict_response_validation=True)
 
@@ -227,7 +263,7 @@ def test_run_with_version_object(self, respx_mock: MockRouter) -> None:
         # Version ID is used directly
         respx_mock.post("/predictions").mock(return_value=httpx.Response(201, json=create_mock_prediction()))
 
-        version = Version(id="test-version-id")
+        version = _version_with_schema("test-version-id")
         output = self.client.run(version, input={"prompt": "test prompt"})
 
         assert output == "test output"
@@ -243,7 +279,6 @@ def test_run_with_model_version_identifier(self, respx_mock: MockRouter) -> None
 
         assert output == "test output"
 
-    @pytest.mark.skip("todo: support file output iterator")
     @pytest.mark.respx(base_url=base_url)
     def test_run_with_file_output_iterator(self, respx_mock: MockRouter) -> None:
         """Test run with file output iterator."""
@@ -270,7 +305,7 @@ def test_run_with_file_output_iterator(self, respx_mock: MockRouter) -> None:
         )
 
         output: list[FileOutput] = self.client.run(
-            "some-model-ref", use_file_output=True, input={"prompt": "generate file iterator"}
+            "some-model-ref", use_file_output=True, wait=False, input={"prompt": "generate file iterator"}
         )
 
         assert isinstance(output, list)
@@ -460,7 +495,7 @@ async def test_async_run_with_version_object(self, respx_mock: MockRouter) -> No
         # Version ID is used directly
         respx_mock.post("/predictions").mock(return_value=httpx.Response(201, json=create_mock_prediction()))
 
-        version = Version(id="test-version-id")
+        version = _version_with_schema("test-version-id")
         output = await self.client.run(version, input={"prompt": "test prompt"})
 
         assert output == "test output"
@@ -476,7 +511,6 @@ async def test_async_run_with_model_version_identifier(self, respx_mock: MockRou
 
         assert output == "test output"
 
-    @pytest.mark.skip("todo: support file output iterator")
     @pytest.mark.respx(base_url=base_url)
     async def test_async_run_with_file_output_iterator(self, respx_mock: MockRouter) -> None:
         """Test async run with file output iterator."""
@@ -503,7 +537,7 @@ async def test_async_run_with_file_output_iterator(self, respx_mock: MockRouter)
         )
 
         output: list[AsyncFileOutput] = await self.client.run(
-            "some-model-ref", use_file_output=True, input={"prompt": "generate file iterator"}
+            "some-model-ref", use_file_output=True, wait=False, input={"prompt": "generate file iterator"}
         )
 
         assert isinstance(output, list)