fix IndexError when streaming OpenAI (#181)

samuelcolvin · web-flow · commit bf5c2954fdfc · 2024-12-08T19:10:46.000Z
diff --git a/pydantic_ai_slim/pydantic_ai/models/groq.py b/pydantic_ai_slim/pydantic_ai/models/groq.py
@@ -37,11 +37,11 @@
     from groq.types import chat
     from groq.types.chat import ChatCompletion, ChatCompletionChunk
     from groq.types.chat.chat_completion_chunk import ChoiceDeltaToolCall
-except ImportError as e:
+except ImportError as _import_error:
     raise ImportError(
         'Please install `groq` to use the Groq model, '
         "you can use the `groq` optional group — `pip install 'pydantic-ai[groq]'`"
-    ) from e
+    ) from _import_error
 
 GroqModelName = Literal[
     'llama-3.1-70b-versatile',
@@ -209,33 +209,29 @@ def _process_response(response: chat.ChatCompletion) -> ModelAnyResponse:
     @staticmethod
     async def _process_streamed_response(response: AsyncStream[ChatCompletionChunk]) -> EitherStreamedResponse:
         """Process a streamed response, and prepare a streaming response to return."""
-        try:
-            first_chunk = await response.__anext__()
-        except StopAsyncIteration as e:  # pragma: no cover
-            raise UnexpectedModelBehavior('Streamed response ended without content or tool calls') from e
-        timestamp = datetime.fromtimestamp(first_chunk.created, tz=timezone.utc)
-        delta = first_chunk.choices[0].delta
-        start_cost = _map_cost(first_chunk)
-
-        # the first chunk may only contain `role`, so we iterate until we get either `tool_calls` or `content`
-        while delta.tool_calls is None and delta.content is None:
+        timestamp: datetime | None = None
+        start_cost = Cost()
+        # the first chunk may contain enough information so we iterate until we get either `tool_calls` or `content`
+        while True:
             try:
-                next_chunk = await response.__anext__()
+                chunk = await response.__anext__()
             except StopAsyncIteration as e:
                 raise UnexpectedModelBehavior('Streamed response ended without content or tool calls') from e
-            delta = next_chunk.choices[0].delta
-            start_cost += _map_cost(next_chunk)
-
-        if delta.content is not None:
-            return GroqStreamTextResponse(delta.content, response, timestamp, start_cost)
-        else:
-            assert delta.tool_calls is not None, f'Expected delta with tool_calls, got {delta}'
-            return GroqStreamStructuredResponse(
-                response,
-                {c.index: c for c in delta.tool_calls},
-                timestamp,
-                start_cost,
-            )
+            timestamp = timestamp or datetime.fromtimestamp(chunk.created, tz=timezone.utc)
+            start_cost += _map_cost(chunk)
+
+            if chunk.choices:
+                delta = chunk.choices[0].delta
+
+                if delta.content is not None:
+                    return GroqStreamTextResponse(delta.content, response, timestamp, start_cost)
+                elif delta.tool_calls is not None:
+                    return GroqStreamStructuredResponse(
+                        response,
+                        {c.index: c for c in delta.tool_calls},
+                        timestamp,
+                        start_cost,
+                    )
 
     @staticmethod
     def _map_message(message: Message) -> chat.ChatCompletionMessageParam:
diff --git a/pydantic_ai_slim/pydantic_ai/models/openai.py b/pydantic_ai_slim/pydantic_ai/models/openai.py
@@ -37,11 +37,11 @@
     from openai.types import ChatModel, chat
     from openai.types.chat import ChatCompletionChunk
     from openai.types.chat.chat_completion_chunk import ChoiceDeltaToolCall
-except ImportError as e:
+except ImportError as _import_error:
     raise ImportError(
         'Please install `openai` to use the OpenAI model, '
         "you can use the `openai` optional group — `pip install 'pydantic-ai[openai]'`"
-    ) from e
+    ) from _import_error
 
 
 @dataclass(init=False)
@@ -189,33 +189,31 @@ def _process_response(response: chat.ChatCompletion) -> ModelAnyResponse:
     @staticmethod
     async def _process_streamed_response(response: AsyncStream[ChatCompletionChunk]) -> EitherStreamedResponse:
         """Process a streamed response, and prepare a streaming response to return."""
-        try:
-            first_chunk = await response.__anext__()
-        except StopAsyncIteration as e:  # pragma: no cover
-            raise UnexpectedModelBehavior('Streamed response ended without content or tool calls') from e
-        timestamp = datetime.fromtimestamp(first_chunk.created, tz=timezone.utc)
-        delta = first_chunk.choices[0].delta
-        start_cost = _map_cost(first_chunk)
-
-        # the first chunk may only contain `role`, so we iterate until we get either `tool_calls` or `content`
-        while delta.tool_calls is None and delta.content is None:
+        timestamp: datetime | None = None
+        start_cost = Cost()
+        # the first chunk may contain enough information so we iterate until we get either `tool_calls` or `content`
+        while True:
             try:
-                next_chunk = await response.__anext__()
+                chunk = await response.__anext__()
             except StopAsyncIteration as e:
                 raise UnexpectedModelBehavior('Streamed response ended without content or tool calls') from e
-            delta = next_chunk.choices[0].delta
-            start_cost += _map_cost(next_chunk)
 
-        if delta.content is not None:
-            return OpenAIStreamTextResponse(delta.content, response, timestamp, start_cost)
-        else:
-            assert delta.tool_calls is not None, f'Expected delta with tool_calls, got {delta}'
-            return OpenAIStreamStructuredResponse(
-                response,
-                {c.index: c for c in delta.tool_calls},
-                timestamp,
-                start_cost,
-            )
+            timestamp = timestamp or datetime.fromtimestamp(chunk.created, tz=timezone.utc)
+            start_cost += _map_cost(chunk)
+
+            if chunk.choices:
+                delta = chunk.choices[0].delta
+
+                if delta.content is not None:
+                    return OpenAIStreamTextResponse(delta.content, response, timestamp, start_cost)
+                elif delta.tool_calls is not None:
+                    return OpenAIStreamStructuredResponse(
+                        response,
+                        {c.index: c for c in delta.tool_calls},
+                        timestamp,
+                        start_cost,
+                    )
+                # else continue until we get either delta.content or delta.tool_calls
 
     @staticmethod
     def _map_message(message: Message) -> chat.ChatCompletionMessageParam:
diff --git a/pydantic_ai_slim/pydantic_ai/models/vertexai.py b/pydantic_ai_slim/pydantic_ai/models/vertexai.py
@@ -18,11 +18,11 @@
     from google.auth.credentials import Credentials as BaseCredentials
     from google.auth.transport.requests import Request
     from google.oauth2.service_account import Credentials as ServiceAccountCredentials
-except ImportError as e:
+except ImportError as _import_error:
     raise ImportError(
         'Please install `google-auth` to use the VertexAI model, '
         "you can use the `vertexai` optional group — `pip install 'pydantic-ai[vertexai]'`"
-    ) from e
+    ) from _import_error
 
 VERTEX_AI_URL_TEMPLATE = (
     'https://{region}-aiplatform.googleapis.com/v1'
diff --git a/tests/models/test_groq.py b/tests/models/test_groq.py
@@ -450,3 +450,16 @@ async def test_no_content(allow_model_requests: None):
     with pytest.raises(UnexpectedModelBehavior, match='Streamed response ended without con'):
         async with agent.run_stream(''):
             pass  # pragma: no cover
+
+
+async def test_no_delta(allow_model_requests: None):
+    stream = chunk([]), text_chunk('hello '), text_chunk('world')
+    mock_client = MockGroq.create_mock_stream(stream)
+    m = GroqModel('llama-3.1-70b-versatile', groq_client=mock_client)
+    agent = Agent(m)
+
+    async with agent.run_stream('') as result:
+        assert not result.is_structured
+        assert not result.is_complete
+        assert [c async for c in result.stream(debounce_by=None)] == snapshot(['hello ', 'hello world'])
+        assert result.is_complete
diff --git a/tests/models/test_openai.py b/tests/models/test_openai.py
@@ -440,3 +440,21 @@ async def test_no_content(allow_model_requests: None):
     with pytest.raises(UnexpectedModelBehavior, match='Streamed response ended without con'):
         async with agent.run_stream(''):
             pass
+
+
+async def test_no_delta(allow_model_requests: None):
+    stream = (
+        chunk([]),
+        text_chunk('hello '),
+        text_chunk('world'),
+    )
+    mock_client = MockOpenAI.create_mock_stream(stream)
+    m = OpenAIModel('gpt-4', openai_client=mock_client)
+    agent = Agent(m)
+
+    async with agent.run_stream('') as result:
+        assert not result.is_structured
+        assert not result.is_complete
+        assert [c async for c in result.stream(debounce_by=None)] == snapshot(['hello ', 'hello world'])
+        assert result.is_complete
+        assert result.cost() == snapshot(Cost(request_tokens=6, response_tokens=3, total_tokens=9))