Add operation.cost metric to instrumented models (#3013)

alexmojaki · Copilot · web-flow · commit 3430187ddf6c · 2025-09-27T00:00:13.000+02:00
Co-authored-by: Copilot &lt;175728472+Copilot@users.noreply.github.com&gt;
diff --git a/pydantic_ai_slim/pydantic_ai/models/instrumented.py b/pydantic_ai_slim/pydantic_ai/models/instrumented.py
@@ -9,6 +9,7 @@
 from typing import Any, Literal, cast
 from urllib.parse import urlparse
 
+from genai_prices.types import PriceCalculation
 from opentelemetry._events import (
     Event,  # pyright: ignore[reportPrivateImportUsage]
     EventLogger,  # pyright: ignore[reportPrivateImportUsage]
@@ -169,6 +170,11 @@ def __init__(
             self.tokens_histogram = self.meter.create_histogram(
                 **tokens_histogram_kwargs,  # pyright: ignore
             )
+        self.cost_histogram = self.meter.create_histogram(
+            'operation.cost',
+            unit='{USD}',
+            description='Monetary cost',
+        )
 
     def messages_to_otel_events(self, messages: list[ModelMessage]) -> list[Event]:
         """Convert a list of model messages to OpenTelemetry events.
@@ -302,6 +308,21 @@ def _emit_events(self, span: Span, events: list[Event]) -> None:
                 }
             )
 
+    def record_metrics(
+        self,
+        response: ModelResponse,
+        price_calculation: PriceCalculation | None,
+        attributes: dict[str, AttributeValue],
+    ):
+        for typ in ['input', 'output']:
+            if not (tokens := getattr(response.usage, f'{typ}_tokens', 0)):  # pragma: no cover
+                continue
+            token_attributes = {**attributes, 'gen_ai.token.type': typ}
+            self.tokens_histogram.record(tokens, token_attributes)
+            if price_calculation:
+                cost = float(getattr(price_calculation, f'{typ}_price'))
+                self.cost_histogram.record(cost, token_attributes)
+
 
 GEN_AI_SYSTEM_ATTRIBUTE = 'gen_ai.system'
 GEN_AI_REQUEST_MODEL_ATTRIBUTE = 'gen_ai.request.model'
@@ -395,6 +416,7 @@ def finish(response: ModelResponse):
                     system = cast(str, attributes[GEN_AI_SYSTEM_ATTRIBUTE])
 
                     response_model = response.model_name or request_model
+                    price_calculation = None
 
                     def _record_metrics():
                         metric_attributes = {
@@ -403,16 +425,7 @@ def _record_metrics():
                             'gen_ai.request.model': request_model,
                             'gen_ai.response.model': response_model,
                         }
-                        if response.usage.input_tokens:  # pragma: no branch
-                            self.instrumentation_settings.tokens_histogram.record(
-                                response.usage.input_tokens,
-                                {**metric_attributes, 'gen_ai.token.type': 'input'},
-                            )
-                        if response.usage.output_tokens:  # pragma: no branch
-                            self.instrumentation_settings.tokens_histogram.record(
-                                response.usage.output_tokens,
-                                {**metric_attributes, 'gen_ai.token.type': 'output'},
-                            )
+                        self.instrumentation_settings.record_metrics(response, price_calculation, metric_attributes)
 
                     nonlocal record_metrics
                     record_metrics = _record_metrics
@@ -427,14 +440,17 @@ def _record_metrics():
                         'gen_ai.response.model': response_model,
                     }
                     try:
-                        attributes_to_set['operation.cost'] = float(response.cost().total_price)
+                        price_calculation = response.cost()
                     except LookupError:
                         # The cost of this provider/model is unknown, which is common.
                         pass
                     except Exception as e:
                         warnings.warn(
                             f'Failed to get cost from response: {type(e).__name__}: {e}', CostCalculationFailedWarning
                         )
+                    else:
+                        attributes_to_set['operation.cost'] = float(price_calculation.total_price)
+
                     if response.provider_response_id is not None:
                         attributes_to_set['gen_ai.response.id'] = response.provider_response_id
                     if response.finish_reason is not None:
diff --git a/tests/models/test_instrumented.py b/tests/models/test_instrumented.py
@@ -42,7 +42,7 @@
 from pydantic_ai.settings import ModelSettings
 from pydantic_ai.usage import RequestUsage
 
-from ..conftest import IsStr, try_import
+from ..conftest import IsInt, IsStr, try_import
 
 with try_import() as imports_successful:
     from logfire.testing import CaptureLogfire
@@ -831,6 +831,115 @@ async def test_instrumented_model_attributes_mode(capfire: CaptureLogfire, instr
             ]
         )
 
+    assert capfire.get_collected_metrics() == snapshot(
+        [
+            {
+                'name': 'gen_ai.client.token.usage',
+                'description': 'Measures number of input and output tokens used',
+                'unit': '{token}',
+                'data': {
+                    'data_points': [
+                        {
+                            'attributes': {
+                                'gen_ai.system': 'openai',
+                                'gen_ai.operation.name': 'chat',
+                                'gen_ai.request.model': 'gpt-4o',
+                                'gen_ai.response.model': 'gpt-4o-2024-11-20',
+                                'gen_ai.token.type': 'input',
+                            },
+                            'start_time_unix_nano': IsInt(),
+                            'time_unix_nano': IsInt(),
+                            'count': 1,
+                            'sum': 100,
+                            'scale': 20,
+                            'zero_count': 0,
+                            'positive': {'offset': 6966588, 'bucket_counts': [1]},
+                            'negative': {'offset': 0, 'bucket_counts': [0]},
+                            'flags': 0,
+                            'min': 100,
+                            'max': 100,
+                            'exemplars': [],
+                        },
+                        {
+                            'attributes': {
+                                'gen_ai.system': 'openai',
+                                'gen_ai.operation.name': 'chat',
+                                'gen_ai.request.model': 'gpt-4o',
+                                'gen_ai.response.model': 'gpt-4o-2024-11-20',
+                                'gen_ai.token.type': 'output',
+                            },
+                            'start_time_unix_nano': IsInt(),
+                            'time_unix_nano': IsInt(),
+                            'count': 1,
+                            'sum': 200,
+                            'scale': 20,
+                            'zero_count': 0,
+                            'positive': {'offset': 8015164, 'bucket_counts': [1]},
+                            'negative': {'offset': 0, 'bucket_counts': [0]},
+                            'flags': 0,
+                            'min': 200,
+                            'max': 200,
+                            'exemplars': [],
+                        },
+                    ],
+                    'aggregation_temporality': 1,
+                },
+            },
+            {
+                'name': 'operation.cost',
+                'description': 'Monetary cost',
+                'unit': '{USD}',
+                'data': {
+                    'data_points': [
+                        {
+                            'attributes': {
+                                'gen_ai.system': 'openai',
+                                'gen_ai.operation.name': 'chat',
+                                'gen_ai.request.model': 'gpt-4o',
+                                'gen_ai.response.model': 'gpt-4o-2024-11-20',
+                                'gen_ai.token.type': 'input',
+                            },
+                            'start_time_unix_nano': IsInt(),
+                            'time_unix_nano': IsInt(),
+                            'count': 1,
+                            'sum': 0.00025,
+                            'scale': 20,
+                            'zero_count': 0,
+                            'positive': {'offset': -12547035, 'bucket_counts': [1]},
+                            'negative': {'offset': 0, 'bucket_counts': [0]},
+                            'flags': 0,
+                            'min': 0.00025,
+                            'max': 0.00025,
+                            'exemplars': [],
+                        },
+                        {
+                            'attributes': {
+                                'gen_ai.system': 'openai',
+                                'gen_ai.operation.name': 'chat',
+                                'gen_ai.request.model': 'gpt-4o',
+                                'gen_ai.response.model': 'gpt-4o-2024-11-20',
+                                'gen_ai.token.type': 'output',
+                            },
+                            'start_time_unix_nano': IsInt(),
+                            'time_unix_nano': IsInt(),
+                            'count': 1,
+                            'sum': 0.002,
+                            'scale': 20,
+                            'zero_count': 0,
+                            'positive': {'offset': -9401307, 'bucket_counts': [1]},
+                            'negative': {'offset': 0, 'bucket_counts': [0]},
+                            'flags': 0,
+                            'min': 0.002,
+                            'max': 0.002,
+                            'exemplars': [],
+                        },
+                    ],
+                    'aggregation_temporality': 1,
+                },
+            },
+        ]
+    )
+
 
 def test_messages_to_otel_events_serialization_errors():
     class Foo: