Skip to content

Commit 3430187

Browse files
alexmojakiCopilot
andauthored
Add operation.cost metric to instrumented models (#3013)
Co-authored-by: Copilot <[email protected]>
1 parent bfcccba commit 3430187

File tree

2 files changed

+137
-12
lines changed

2 files changed

+137
-12
lines changed

pydantic_ai_slim/pydantic_ai/models/instrumented.py

Lines changed: 27 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from typing import Any, Literal, cast
1010
from urllib.parse import urlparse
1111

12+
from genai_prices.types import PriceCalculation
1213
from opentelemetry._events import (
1314
Event, # pyright: ignore[reportPrivateImportUsage]
1415
EventLogger, # pyright: ignore[reportPrivateImportUsage]
@@ -169,6 +170,11 @@ def __init__(
169170
self.tokens_histogram = self.meter.create_histogram(
170171
**tokens_histogram_kwargs, # pyright: ignore
171172
)
173+
self.cost_histogram = self.meter.create_histogram(
174+
'operation.cost',
175+
unit='{USD}',
176+
description='Monetary cost',
177+
)
172178

173179
def messages_to_otel_events(self, messages: list[ModelMessage]) -> list[Event]:
174180
"""Convert a list of model messages to OpenTelemetry events.
@@ -302,6 +308,21 @@ def _emit_events(self, span: Span, events: list[Event]) -> None:
302308
}
303309
)
304310

311+
def record_metrics(
312+
self,
313+
response: ModelResponse,
314+
price_calculation: PriceCalculation | None,
315+
attributes: dict[str, AttributeValue],
316+
):
317+
for typ in ['input', 'output']:
318+
if not (tokens := getattr(response.usage, f'{typ}_tokens', 0)): # pragma: no cover
319+
continue
320+
token_attributes = {**attributes, 'gen_ai.token.type': typ}
321+
self.tokens_histogram.record(tokens, token_attributes)
322+
if price_calculation:
323+
cost = float(getattr(price_calculation, f'{typ}_price'))
324+
self.cost_histogram.record(cost, token_attributes)
325+
305326

306327
GEN_AI_SYSTEM_ATTRIBUTE = 'gen_ai.system'
307328
GEN_AI_REQUEST_MODEL_ATTRIBUTE = 'gen_ai.request.model'
@@ -395,6 +416,7 @@ def finish(response: ModelResponse):
395416
system = cast(str, attributes[GEN_AI_SYSTEM_ATTRIBUTE])
396417

397418
response_model = response.model_name or request_model
419+
price_calculation = None
398420

399421
def _record_metrics():
400422
metric_attributes = {
@@ -403,16 +425,7 @@ def _record_metrics():
403425
'gen_ai.request.model': request_model,
404426
'gen_ai.response.model': response_model,
405427
}
406-
if response.usage.input_tokens: # pragma: no branch
407-
self.instrumentation_settings.tokens_histogram.record(
408-
response.usage.input_tokens,
409-
{**metric_attributes, 'gen_ai.token.type': 'input'},
410-
)
411-
if response.usage.output_tokens: # pragma: no branch
412-
self.instrumentation_settings.tokens_histogram.record(
413-
response.usage.output_tokens,
414-
{**metric_attributes, 'gen_ai.token.type': 'output'},
415-
)
428+
self.instrumentation_settings.record_metrics(response, price_calculation, metric_attributes)
416429

417430
nonlocal record_metrics
418431
record_metrics = _record_metrics
@@ -427,14 +440,17 @@ def _record_metrics():
427440
'gen_ai.response.model': response_model,
428441
}
429442
try:
430-
attributes_to_set['operation.cost'] = float(response.cost().total_price)
443+
price_calculation = response.cost()
431444
except LookupError:
432445
# The cost of this provider/model is unknown, which is common.
433446
pass
434447
except Exception as e:
435448
warnings.warn(
436449
f'Failed to get cost from response: {type(e).__name__}: {e}', CostCalculationFailedWarning
437450
)
451+
else:
452+
attributes_to_set['operation.cost'] = float(price_calculation.total_price)
453+
438454
if response.provider_response_id is not None:
439455
attributes_to_set['gen_ai.response.id'] = response.provider_response_id
440456
if response.finish_reason is not None:

tests/models/test_instrumented.py

Lines changed: 110 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@
4242
from pydantic_ai.settings import ModelSettings
4343
from pydantic_ai.usage import RequestUsage
4444

45-
from ..conftest import IsStr, try_import
45+
from ..conftest import IsInt, IsStr, try_import
4646

4747
with try_import() as imports_successful:
4848
from logfire.testing import CaptureLogfire
@@ -831,6 +831,115 @@ async def test_instrumented_model_attributes_mode(capfire: CaptureLogfire, instr
831831
]
832832
)
833833

834+
assert capfire.get_collected_metrics() == snapshot(
835+
[
836+
{
837+
'name': 'gen_ai.client.token.usage',
838+
'description': 'Measures number of input and output tokens used',
839+
'unit': '{token}',
840+
'data': {
841+
'data_points': [
842+
{
843+
'attributes': {
844+
'gen_ai.system': 'openai',
845+
'gen_ai.operation.name': 'chat',
846+
'gen_ai.request.model': 'gpt-4o',
847+
'gen_ai.response.model': 'gpt-4o-2024-11-20',
848+
'gen_ai.token.type': 'input',
849+
},
850+
'start_time_unix_nano': IsInt(),
851+
'time_unix_nano': IsInt(),
852+
'count': 1,
853+
'sum': 100,
854+
'scale': 20,
855+
'zero_count': 0,
856+
'positive': {'offset': 6966588, 'bucket_counts': [1]},
857+
'negative': {'offset': 0, 'bucket_counts': [0]},
858+
'flags': 0,
859+
'min': 100,
860+
'max': 100,
861+
'exemplars': [],
862+
},
863+
{
864+
'attributes': {
865+
'gen_ai.system': 'openai',
866+
'gen_ai.operation.name': 'chat',
867+
'gen_ai.request.model': 'gpt-4o',
868+
'gen_ai.response.model': 'gpt-4o-2024-11-20',
869+
'gen_ai.token.type': 'output',
870+
},
871+
'start_time_unix_nano': IsInt(),
872+
'time_unix_nano': IsInt(),
873+
'count': 1,
874+
'sum': 200,
875+
'scale': 20,
876+
'zero_count': 0,
877+
'positive': {'offset': 8015164, 'bucket_counts': [1]},
878+
'negative': {'offset': 0, 'bucket_counts': [0]},
879+
'flags': 0,
880+
'min': 200,
881+
'max': 200,
882+
'exemplars': [],
883+
},
884+
],
885+
'aggregation_temporality': 1,
886+
},
887+
},
888+
{
889+
'name': 'operation.cost',
890+
'description': 'Monetary cost',
891+
'unit': '{USD}',
892+
'data': {
893+
'data_points': [
894+
{
895+
'attributes': {
896+
'gen_ai.system': 'openai',
897+
'gen_ai.operation.name': 'chat',
898+
'gen_ai.request.model': 'gpt-4o',
899+
'gen_ai.response.model': 'gpt-4o-2024-11-20',
900+
'gen_ai.token.type': 'input',
901+
},
902+
'start_time_unix_nano': IsInt(),
903+
'time_unix_nano': IsInt(),
904+
'count': 1,
905+
'sum': 0.00025,
906+
'scale': 20,
907+
'zero_count': 0,
908+
'positive': {'offset': -12547035, 'bucket_counts': [1]},
909+
'negative': {'offset': 0, 'bucket_counts': [0]},
910+
'flags': 0,
911+
'min': 0.00025,
912+
'max': 0.00025,
913+
'exemplars': [],
914+
},
915+
{
916+
'attributes': {
917+
'gen_ai.system': 'openai',
918+
'gen_ai.operation.name': 'chat',
919+
'gen_ai.request.model': 'gpt-4o',
920+
'gen_ai.response.model': 'gpt-4o-2024-11-20',
921+
'gen_ai.token.type': 'output',
922+
},
923+
'start_time_unix_nano': IsInt(),
924+
'time_unix_nano': IsInt(),
925+
'count': 1,
926+
'sum': 0.002,
927+
'scale': 20,
928+
'zero_count': 0,
929+
'positive': {'offset': -9401307, 'bucket_counts': [1]},
930+
'negative': {'offset': 0, 'bucket_counts': [0]},
931+
'flags': 0,
932+
'min': 0.002,
933+
'max': 0.002,
934+
'exemplars': [],
935+
},
936+
],
937+
'aggregation_temporality': 1,
938+
},
939+
},
940+
]
941+
)
942+
834943

835944
def test_messages_to_otel_events_serialization_errors():
836945
class Foo:

0 commit comments

Comments
 (0)