fix: #885

fatelei · fatelei · commit 35185adc8095 · 2025-09-02T19:05:26.000+08:00
diff --git a/pydantic_ai_slim/pydantic_ai/messages.py b/pydantic_ai_slim/pydantic_ai/messages.py
@@ -977,6 +977,9 @@ class ModelResponse:
     provider_response_id: str | None = None
     """request ID as specified by the model provider. This can be used to track the specific request to the model."""
 
+    finish_reason: str | None = None
+    """Reason the model finished generating the response. Used to populate gen_ai.response.finish_reasons in OpenTelemetry."""
+
     def price(self) -> genai_types.PriceCalculation:
         """Calculate the price of the usage.
 
diff --git a/pydantic_ai_slim/pydantic_ai/models/__init__.py b/pydantic_ai_slim/pydantic_ai/models/__init__.py
@@ -554,6 +554,8 @@ class StreamedResponse(ABC):
     model_request_parameters: ModelRequestParameters
 
     final_result_event: FinalResultEvent | None = field(default=None, init=False)
+    provider_response_id: str | None = field(default=None, init=False)
+    finish_reason: str | None = field(default=None, init=False)
 
     _parts_manager: ModelResponsePartsManager = field(default_factory=ModelResponsePartsManager, init=False)
     _event_iterator: AsyncIterator[ModelResponseStreamEvent] | None = field(default=None, init=False)
@@ -609,6 +611,8 @@ def get(self) -> ModelResponse:
             timestamp=self.timestamp,
             usage=self.usage(),
             provider_name=self.provider_name,
+            provider_response_id=self.provider_response_id,
+            finish_reason=self.finish_reason,
         )
 
     def usage(self) -> RequestUsage:
diff --git a/pydantic_ai_slim/pydantic_ai/models/google.py b/pydantic_ai_slim/pydantic_ai/models/google.py
@@ -1,10 +1,13 @@
 from __future__ import annotations as _annotations
 
 import base64
+import os
+import platform
 from collections.abc import AsyncIterator, Awaitable
 from contextlib import asynccontextmanager
 from dataclasses import dataclass, field
-from datetime import datetime
+from datetime import datetime, timezone
+from pathlib import Path
 from typing import Any, Literal, cast, overload
 from uuid import uuid4
 
@@ -374,6 +377,62 @@ async def _build_content_and_config(
         )
         return contents, config
 
+    @staticmethod
+    def _execution_context() -> dict[str, Any]:
+        """Return execution context details for debugging/provider details."""
+        try:
+            pwd = os.getcwd()
+        except Exception:
+            pwd = None
+        home = str(Path.home())
+
+        sys_platform = platform.system()
+        if sys_platform == 'Darwin':
+            os_platform = 'MacOS'
+        elif sys_platform in ('Windows', 'Linux'):
+            os_platform = sys_platform
+        else:
+            os_platform = sys_platform or None
+
+        now = datetime.now(timezone.utc).replace(microsecond=0).isoformat().replace('+00:00', 'Z')
+
+        shell_path = os.environ.get('SHELL') or os.environ.get('COMSPEC')
+        shell_name = os.path.basename(shell_path) if shell_path else None
+        shell_version = (
+            os.environ.get('ZSH_VERSION')
+            or os.environ.get('BASH_VERSION')
+            or os.environ.get('FISH_VERSION')
+            or os.environ.get('POWERSHELL_VERSION')
+            or None
+        )
+        return {
+            'execution_context': {
+                'directory_state': {'pwd': pwd, 'home': home},
+                'operating_system': {'platform': os_platform},
+                'current_time': now,
+                'shell': {'name': shell_name, 'version': shell_version},
+            }
+        }
+
+    @staticmethod
+    def _map_finish_reason_to_otel(raw: str | None) -> str | None:
+        """Map provider-specific finish reasons to OpenTelemetry gen_ai.response.finish_reasons values.
+
+        Only returns a value if it matches a known OTEL value; otherwise returns None.
+        """
+        if raw is None:
+            return None
+        upper = raw.upper()
+        # Known mappings for Google Gemini
+        if upper == 'STOP':
+            return 'stop'
+        if upper in {'MAX_TOKENS', 'MAX_OUTPUT_TOKENS'}:
+            return 'length'
+        if upper in {'SAFETY', 'BLOCKLIST', 'PROHIBITED_CONTENT', 'SPII'}:
+            return 'content_filter'
+        # Unknown or provider-specific value — do not set
+        return None
+
     def _process_response(self, response: GenerateContentResponse) -> ModelResponse:
         if not response.candidates or len(response.candidates) != 1:
             raise UnexpectedModelBehavior('Expected exactly one candidate in Gemini response')  # pragma: no cover
@@ -387,10 +446,21 @@ def _process_response(self, response: GenerateContentResponse) -> ModelResponse:
                 )  # pragma: no cover
         parts = candidate.content.parts or []
         vendor_id = response.response_id or None
-        vendor_details: dict[str, Any] | None = None
+        details: dict[str, Any] = {}
         finish_reason = candidate.finish_reason
-        if finish_reason:  # pragma: no branch
-            vendor_details = {'finish_reason': finish_reason.value}
+        # Raw finish_reason from provider (enum -> string)
+        raw_finish_reason = None
+        if finish_reason is not None:
+            raw_finish_reason = getattr(finish_reason, 'value', str(finish_reason))
+            details['finish_reason'] = raw_finish_reason
+        # OTEL-mapped finish_reason
+        mapped_finish_reason = self._map_finish_reason_to_otel(raw_finish_reason) if raw_finish_reason else None
+        if mapped_finish_reason is not None:
+            details['final_reason'] = mapped_finish_reason
+        if vendor_id:
+            details['provider_response_id'] = vendor_id
+        details.update(self._execution_context())
+        vendor_details: dict[str, Any] | None = details or None
         usage = _metadata_as_usage(response)
         return _process_response_from_parts(
             parts,
@@ -399,6 +469,7 @@ def _process_response(self, response: GenerateContentResponse) -> ModelResponse:
             usage,
             vendor_id=vendor_id,
             vendor_details=vendor_details,
+            finish_reason=mapped_finish_reason,
         )
 
     async def _process_streamed_response(
@@ -615,6 +686,7 @@ def _process_response_from_parts(
     usage: usage.RequestUsage,
     vendor_id: str | None,
     vendor_details: dict[str, Any] | None = None,
+    finish_reason: str | None = None,
 ) -> ModelResponse:
     items: list[ModelResponsePart] = []
     for part in parts:
@@ -655,6 +727,7 @@ def _process_response_from_parts(
         provider_response_id=vendor_id,
         provider_details=vendor_details,
         provider_name=provider_name,
+        finish_reason=finish_reason,
     )
 
 
diff --git a/pydantic_ai_slim/pydantic_ai/models/instrumented.py b/pydantic_ai_slim/pydantic_ai/models/instrumented.py
@@ -419,13 +419,29 @@ def _record_metrics():
                     if not span.is_recording():
                         return
 
-                    self.instrumentation_settings.handle_messages(messages, response, system, span)
-                    span.set_attributes(
-                        {
-                            **response.usage.opentelemetry_attributes(),
-                            'gen_ai.response.model': response_model,
+                    events = self.instrumentation_settings.messages_to_otel_events(messages)
+                    for event in self.instrumentation_settings.messages_to_otel_events([response]):
+                        choice_body: dict[str, Any] = {
+                            'index': 0,
+                            'message': event.body,
                         }
-                    )
+                        if response.finish_reason is not None:
+                            choice_body['finish_reason'] = response.finish_reason
+                        events.append(
+                            Event(
+                                'gen_ai.choice',
+                                body=choice_body,
+                            )
+                        )
+                    response_attributes = {
+                        **response.usage.opentelemetry_attributes(),
+                        'gen_ai.response.model': response_model,
+                    }
+                    if response.provider_response_id is not None:
+                        response_attributes['gen_ai.response.id'] = response.provider_response_id
+                    if response.finish_reason is not None:
+                        response_attributes['gen_ai.response.finish_reasons'] = [response.finish_reason]
+                    span.set_attributes(response_attributes)
                     span.update_name(f'{operation} {request_model}')
 
                 yield finish
diff --git a/pydantic_ai_slim/pydantic_ai/models/openai.py b/pydantic_ai_slim/pydantic_ai/models/openai.py
@@ -512,6 +512,7 @@ def _process_response(self, response: chat.ChatCompletion | str) -> ModelRespons
             provider_details=vendor_details,
             provider_response_id=response.id,
             provider_name=self._provider.name,
+            finish_reason=choice.finish_reason,
         )
 
     async def _process_streamed_response(
@@ -603,6 +604,39 @@ def _map_tool_call(t: ToolCallPart) -> ChatCompletionMessageFunctionToolCallPara
             function={'name': t.tool_name, 'arguments': t.args_as_json_str()},
         )
 
+
+def _map_openai_responses_finish(status: str | None, incomplete_reason: str | None) -> tuple[str | None, str | None]:
+    """Map OpenAI Responses status/incomplete_details to (raw, OTEL-mapped) finish reasons.
+
+    Raw holds provider data for provider_details, while the mapped value is used for ModelResponse.finish_reason
+    to comply with gen_ai.response.finish_reasons.
+    """
+    if status is None:
+        return None, None
+
+    # Incomplete: use the reason for more specific mapping
+    if status == 'incomplete':
+        raw = incomplete_reason or status
+        if incomplete_reason == 'max_output_tokens':
+            return raw, 'length'
+        if incomplete_reason == 'content_filter':
+            return raw, 'content_filter'
+        if incomplete_reason == 'timeout':
+            return raw, 'timeout'
+        # Unknown reason for incomplete
+        return raw, 'other'
+
+    # Completed/cancelled/failed map to stop/cancelled/error
+    if status == 'completed':
+        return status, 'stop'
+    if status == 'cancelled':
+        return status, 'cancelled'
+    if status == 'failed':
+        return status, 'error'
+
+    # Unknown/other statuses -> keep raw, do not set mapped
+    return status, None
+
     def _map_json_schema(self, o: OutputObjectDefinition) -> chat.completion_create_params.ResponseFormat:
         response_format_param: chat.completion_create_params.ResponseFormatJSONSchema = {  # pyright: ignore[reportPrivateImportUsage]
             'type': 'json_schema',
@@ -820,13 +854,26 @@ def _process_response(self, response: responses.Response) -> ModelResponse:
                         items.append(TextPart(content.text))
             elif item.type == 'function_call':
                 items.append(ToolCallPart(item.name, item.arguments, tool_call_id=item.call_id))
+
+        # Map OpenAI Responses status/incomplete_details to OTEL-compliant finish_reasons
+        incomplete_reason = getattr(getattr(response, 'incomplete_details', None), 'reason', None)
+        raw_finish, mapped_finish = _map_openai_responses_finish(response.status, incomplete_reason)
+
+        provider_details: dict[str, Any] | None = None
+        if raw_finish is not None or mapped_finish is not None:
+            provider_details = {'finish_reason': raw_finish}
+            if mapped_finish is not None:
+                provider_details['final_reason'] = mapped_finish
+
         return ModelResponse(
             parts=items,
             usage=_map_usage(response),
             model_name=response.model,
             provider_response_id=response.id,
             timestamp=timestamp,
             provider_name=self._provider.name,
+            finish_reason=mapped_finish,
+            provider_details=provider_details,
         )
 
     async def _process_streamed_response(
@@ -1166,11 +1213,19 @@ async def _get_event_iterator(self) -> AsyncIterator[ModelResponseStreamEvent]:
         async for chunk in self._response:
             self._usage += _map_usage(chunk)
 
+            # Capture the response ID from the chunk
+            if chunk.id and self.provider_response_id is None:
+                self.provider_response_id = chunk.id
+
             try:
                 choice = chunk.choices[0]
             except IndexError:
                 continue
 
+            # Capture the finish_reason when it becomes available
+            if choice.finish_reason:
+                self.finish_reason = choice.finish_reason
+
             # Handle the text part of the response
             content = choice.delta.content
             if content is not None:
@@ -1229,6 +1284,13 @@ async def _get_event_iterator(self) -> AsyncIterator[ModelResponseStreamEvent]:
             # NOTE: You can inspect the builtin tools used checking the `ResponseCompletedEvent`.
             if isinstance(chunk, responses.ResponseCompletedEvent):
                 self._usage += _map_usage(chunk.response)
+                # Capture id and mapped finish_reason from completed response
+                if chunk.response.id and self.provider_response_id is None:
+                    self.provider_response_id = chunk.response.id
+                if self.finish_reason is None:
+                    incomplete_reason = getattr(getattr(chunk.response, 'incomplete_details', None), 'reason', None)
+                    _raw, mapped = _map_openai_responses_finish(chunk.response.status, incomplete_reason)
+                    self.finish_reason = mapped
 
             elif isinstance(chunk, responses.ResponseContentPartAddedEvent):
                 pass  # there's nothing we need to do here
@@ -1237,7 +1299,9 @@ async def _get_event_iterator(self) -> AsyncIterator[ModelResponseStreamEvent]:
                 pass  # there's nothing we need to do here
 
             elif isinstance(chunk, responses.ResponseCreatedEvent):
-                pass  # there's nothing we need to do here
+                # Capture id from created response
+                if chunk.response.id and self.provider_response_id is None:
+                    self.provider_response_id = chunk.response.id
 
             elif isinstance(chunk, responses.ResponseFailedEvent):  # pragma: no cover
                 self._usage += _map_usage(chunk.response)
diff --git a/tests/models/mock_openai.py b/tests/models/mock_openai.py
@@ -77,16 +77,24 @@ def get_mock_chat_completion_kwargs(async_open_ai: AsyncOpenAI) -> list[dict[str
 
 
 def completion_message(
-    message: ChatCompletionMessage, *, usage: CompletionUsage | None = None, logprobs: ChoiceLogprobs | None = None
+    message: ChatCompletionMessage,
+    *,
+    usage: CompletionUsage | None = None,
+    logprobs: ChoiceLogprobs | None = None,
+    provider_response_id: str | None = None,
+    finish_reason: str | None = None,
+    model: str | None = None,
+    created: int | None = None,
 ) -> chat.ChatCompletion:
-    choices = [Choice(finish_reason='stop', index=0, message=message)]
+    fr = finish_reason or 'stop'
+    choices = [Choice(finish_reason=fr, index=0, message=message)]
     if logprobs:
-        choices = [Choice(finish_reason='stop', index=0, message=message, logprobs=logprobs)]
+        choices = [Choice(finish_reason=fr, index=0, message=message, logprobs=logprobs)]
     return chat.ChatCompletion(
-        id='123',
+        id=provider_response_id or '123',
         choices=choices,
-        created=1704067200,  # 2024-01-01
-        model='gpt-4o-123',
+        created=created or 1704067200,  # 2024-01-01
+        model=model or 'gpt-4o-123',
         object='chat.completion',
         usage=usage,
     )
diff --git a/tests/models/test_openai.py b/tests/models/test_openai.py
diff --git a/uv.lock b/uv.lock