Skip to content

Commit 6f71a1b

Browse files
authored
Use span.data instead of measurements for token usage (#4567)
Store AI token usage in `span.data` instead of deprecated `measurements`. In `relay` there is already code in place that copies the data from the deprecated `span.measurements` to `span.data` and uses `span.data` for calculating the cost of token usage. So this PR can be deployed in a minor without risk. See also `relay` PR: getsentry/relay#4768
1 parent 1df6c9a commit 6f71a1b

File tree

6 files changed

+58
-54
lines changed

6 files changed

+58
-54
lines changed

sentry_sdk/ai/monitoring.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -102,15 +102,19 @@ def record_token_usage(
102102
ai_pipeline_name = get_ai_pipeline_name()
103103
if ai_pipeline_name:
104104
span.set_data(SPANDATA.AI_PIPELINE_NAME, ai_pipeline_name)
105+
105106
if prompt_tokens is not None:
106-
span.set_measurement("ai_prompt_tokens_used", value=prompt_tokens)
107+
span.set_data(SPANDATA.GEN_AI_USAGE_INPUT_TOKENS, prompt_tokens)
108+
107109
if completion_tokens is not None:
108-
span.set_measurement("ai_completion_tokens_used", value=completion_tokens)
110+
span.set_data(SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS, completion_tokens)
111+
109112
if (
110113
total_tokens is None
111114
and prompt_tokens is not None
112115
and completion_tokens is not None
113116
):
114117
total_tokens = prompt_tokens + completion_tokens
118+
115119
if total_tokens is not None:
116-
span.set_measurement("ai_total_tokens_used", total_tokens)
120+
span.set_data(SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS, total_tokens)

tests/integrations/anthropic/test_anthropic.py

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -125,9 +125,9 @@ def test_nonstreaming_create_message(
125125
assert SPANDATA.AI_INPUT_MESSAGES not in span["data"]
126126
assert SPANDATA.AI_RESPONSES not in span["data"]
127127

128-
assert span["measurements"]["ai_prompt_tokens_used"]["value"] == 10
129-
assert span["measurements"]["ai_completion_tokens_used"]["value"] == 20
130-
assert span["measurements"]["ai_total_tokens_used"]["value"] == 30
128+
assert span["data"]["gen_ai.usage.input_tokens"] == 10
129+
assert span["data"]["gen_ai.usage.output_tokens"] == 20
130+
assert span["data"]["gen_ai.usage.total_tokens"] == 30
131131
assert span["data"][SPANDATA.AI_STREAMING] is False
132132

133133

@@ -193,9 +193,9 @@ async def test_nonstreaming_create_message_async(
193193
assert SPANDATA.AI_INPUT_MESSAGES not in span["data"]
194194
assert SPANDATA.AI_RESPONSES not in span["data"]
195195

196-
assert span["measurements"]["ai_prompt_tokens_used"]["value"] == 10
197-
assert span["measurements"]["ai_completion_tokens_used"]["value"] == 20
198-
assert span["measurements"]["ai_total_tokens_used"]["value"] == 30
196+
assert span["data"]["gen_ai.usage.input_tokens"] == 10
197+
assert span["data"]["gen_ai.usage.output_tokens"] == 20
198+
assert span["data"]["gen_ai.usage.total_tokens"] == 30
199199
assert span["data"][SPANDATA.AI_STREAMING] is False
200200

201201

@@ -293,9 +293,9 @@ def test_streaming_create_message(
293293
assert SPANDATA.AI_INPUT_MESSAGES not in span["data"]
294294
assert SPANDATA.AI_RESPONSES not in span["data"]
295295

296-
assert span["measurements"]["ai_prompt_tokens_used"]["value"] == 10
297-
assert span["measurements"]["ai_completion_tokens_used"]["value"] == 30
298-
assert span["measurements"]["ai_total_tokens_used"]["value"] == 40
296+
assert span["data"]["gen_ai.usage.input_tokens"] == 10
297+
assert span["data"]["gen_ai.usage.output_tokens"] == 30
298+
assert span["data"]["gen_ai.usage.total_tokens"] == 40
299299
assert span["data"][SPANDATA.AI_STREAMING] is True
300300

301301

@@ -396,9 +396,9 @@ async def test_streaming_create_message_async(
396396
assert SPANDATA.AI_INPUT_MESSAGES not in span["data"]
397397
assert SPANDATA.AI_RESPONSES not in span["data"]
398398

399-
assert span["measurements"]["ai_prompt_tokens_used"]["value"] == 10
400-
assert span["measurements"]["ai_completion_tokens_used"]["value"] == 30
401-
assert span["measurements"]["ai_total_tokens_used"]["value"] == 40
399+
assert span["data"]["gen_ai.usage.input_tokens"] == 10
400+
assert span["data"]["gen_ai.usage.output_tokens"] == 30
401+
assert span["data"]["gen_ai.usage.total_tokens"] == 40
402402
assert span["data"][SPANDATA.AI_STREAMING] is True
403403

404404

@@ -525,9 +525,9 @@ def test_streaming_create_message_with_input_json_delta(
525525
assert SPANDATA.AI_INPUT_MESSAGES not in span["data"]
526526
assert SPANDATA.AI_RESPONSES not in span["data"]
527527

528-
assert span["measurements"]["ai_prompt_tokens_used"]["value"] == 366
529-
assert span["measurements"]["ai_completion_tokens_used"]["value"] == 51
530-
assert span["measurements"]["ai_total_tokens_used"]["value"] == 417
528+
assert span["data"]["gen_ai.usage.input_tokens"] == 366
529+
assert span["data"]["gen_ai.usage.output_tokens"] == 51
530+
assert span["data"]["gen_ai.usage.total_tokens"] == 417
531531
assert span["data"][SPANDATA.AI_STREAMING] is True
532532

533533

@@ -662,9 +662,9 @@ async def test_streaming_create_message_with_input_json_delta_async(
662662
assert SPANDATA.AI_INPUT_MESSAGES not in span["data"]
663663
assert SPANDATA.AI_RESPONSES not in span["data"]
664664

665-
assert span["measurements"]["ai_prompt_tokens_used"]["value"] == 366
666-
assert span["measurements"]["ai_completion_tokens_used"]["value"] == 51
667-
assert span["measurements"]["ai_total_tokens_used"]["value"] == 417
665+
assert span["data"]["gen_ai.usage.input_tokens"] == 366
666+
assert span["data"]["gen_ai.usage.output_tokens"] == 51
667+
assert span["data"]["gen_ai.usage.total_tokens"] == 417
668668
assert span["data"][SPANDATA.AI_STREAMING] is True
669669

670670

@@ -807,10 +807,10 @@ def test_add_ai_data_to_span_with_input_json_delta(sentry_init):
807807
content_blocks=["{'test': 'data',", "'more': 'json'}"],
808808
)
809809

810-
assert span._data.get(SPANDATA.AI_RESPONSES) == [
810+
assert span._data.get("ai.responses") == [
811811
{"type": "text", "text": "{'test': 'data','more': 'json'}"}
812812
]
813-
assert span._data.get(SPANDATA.AI_STREAMING) is True
814-
assert span._measurements.get("ai_prompt_tokens_used")["value"] == 10
815-
assert span._measurements.get("ai_completion_tokens_used")["value"] == 20
816-
assert span._measurements.get("ai_total_tokens_used")["value"] == 30
813+
assert span._data.get("ai.streaming") is True
814+
assert span._data.get("gen_ai.usage.input_tokens") == 10
815+
assert span._data.get("gen_ai.usage.output_tokens") == 20
816+
assert span._data.get("gen_ai.usage.total_tokens") == 30

tests/integrations/cohere/test_cohere.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -64,9 +64,9 @@ def test_nonstreaming_chat(
6464
assert SPANDATA.AI_INPUT_MESSAGES not in span["data"]
6565
assert SPANDATA.AI_RESPONSES not in span["data"]
6666

67-
assert span["measurements"]["ai_completion_tokens_used"]["value"] == 10
68-
assert span["measurements"]["ai_prompt_tokens_used"]["value"] == 20
69-
assert span["measurements"]["ai_total_tokens_used"]["value"] == 30
67+
assert span["data"]["gen_ai.usage.output_tokens"] == 10
68+
assert span["data"]["gen_ai.usage.input_tokens"] == 20
69+
assert span["data"]["gen_ai.usage.total_tokens"] == 30
7070

7171

7272
# noinspection PyTypeChecker
@@ -135,9 +135,9 @@ def test_streaming_chat(sentry_init, capture_events, send_default_pii, include_p
135135
assert SPANDATA.AI_INPUT_MESSAGES not in span["data"]
136136
assert SPANDATA.AI_RESPONSES not in span["data"]
137137

138-
assert span["measurements"]["ai_completion_tokens_used"]["value"] == 10
139-
assert span["measurements"]["ai_prompt_tokens_used"]["value"] == 20
140-
assert span["measurements"]["ai_total_tokens_used"]["value"] == 30
138+
assert span["data"]["gen_ai.usage.output_tokens"] == 10
139+
assert span["data"]["gen_ai.usage.input_tokens"] == 20
140+
assert span["data"]["gen_ai.usage.total_tokens"] == 30
141141

142142

143143
def test_bad_chat(sentry_init, capture_events):
@@ -199,8 +199,8 @@ def test_embed(sentry_init, capture_events, send_default_pii, include_prompts):
199199
else:
200200
assert SPANDATA.AI_INPUT_MESSAGES not in span["data"]
201201

202-
assert span["measurements"]["ai_prompt_tokens_used"]["value"] == 10
203-
assert span["measurements"]["ai_total_tokens_used"]["value"] == 10
202+
assert span["data"]["gen_ai.usage.input_tokens"] == 10
203+
assert span["data"]["gen_ai.usage.total_tokens"] == 10
204204

205205

206206
def test_span_origin_chat(sentry_init, capture_events):

tests/integrations/huggingface_hub/test_huggingface_hub.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ def test_nonstreaming_chat_completion(
7575
assert SPANDATA.AI_RESPONSES not in span["data"]
7676

7777
if details_arg:
78-
assert span["measurements"]["ai_total_tokens_used"]["value"] == 10
78+
assert span["data"]["gen_ai.usage.total_tokens"] == 10
7979

8080

8181
@pytest.mark.parametrize(
@@ -134,7 +134,7 @@ def test_streaming_chat_completion(
134134
assert SPANDATA.AI_RESPONSES not in span["data"]
135135

136136
if details_arg:
137-
assert span["measurements"]["ai_total_tokens_used"]["value"] == 10
137+
assert span["data"]["gen_ai.usage.total_tokens"] == 10
138138

139139

140140
def test_bad_chat_completion(sentry_init, capture_events):

tests/integrations/langchain/test_langchain.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -186,8 +186,8 @@ def test_langchain_agent(
186186
assert len(list(x for x in tx["spans"] if x["op"] == "ai.run.langchain")) > 0
187187

188188
if use_unknown_llm_type:
189-
assert "ai_prompt_tokens_used" in chat_spans[0]["measurements"]
190-
assert "ai_total_tokens_used" in chat_spans[0]["measurements"]
189+
assert "gen_ai.usage.input_tokens" in chat_spans[0]["data"]
190+
assert "gen_ai.usage.total_tokens" in chat_spans[0]["data"]
191191
else:
192192
# important: to avoid double counting, we do *not* measure
193193
# tokens used if we have an explicit integration (e.g. OpenAI)

tests/integrations/openai/test_openai.py

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -90,9 +90,9 @@ def test_nonstreaming_chat_completion(
9090
assert SPANDATA.AI_INPUT_MESSAGES not in span["data"]
9191
assert SPANDATA.AI_RESPONSES not in span["data"]
9292

93-
assert span["measurements"]["ai_completion_tokens_used"]["value"] == 10
94-
assert span["measurements"]["ai_prompt_tokens_used"]["value"] == 20
95-
assert span["measurements"]["ai_total_tokens_used"]["value"] == 30
93+
assert span["data"]["gen_ai.usage.output_tokens"] == 10
94+
assert span["data"]["gen_ai.usage.input_tokens"] == 20
95+
assert span["data"]["gen_ai.usage.total_tokens"] == 30
9696

9797

9898
@pytest.mark.asyncio
@@ -132,9 +132,9 @@ async def test_nonstreaming_chat_completion_async(
132132
assert SPANDATA.AI_INPUT_MESSAGES not in span["data"]
133133
assert SPANDATA.AI_RESPONSES not in span["data"]
134134

135-
assert span["measurements"]["ai_completion_tokens_used"]["value"] == 10
136-
assert span["measurements"]["ai_prompt_tokens_used"]["value"] == 20
137-
assert span["measurements"]["ai_total_tokens_used"]["value"] == 30
135+
assert span["data"]["gen_ai.usage.output_tokens"] == 10
136+
assert span["data"]["gen_ai.usage.input_tokens"] == 20
137+
assert span["data"]["gen_ai.usage.total_tokens"] == 30
138138

139139

140140
def tiktoken_encoding_if_installed():
@@ -228,9 +228,9 @@ def test_streaming_chat_completion(
228228
try:
229229
import tiktoken # type: ignore # noqa # pylint: disable=unused-import
230230

231-
assert span["measurements"]["ai_completion_tokens_used"]["value"] == 2
232-
assert span["measurements"]["ai_prompt_tokens_used"]["value"] == 1
233-
assert span["measurements"]["ai_total_tokens_used"]["value"] == 3
231+
assert span["data"]["gen_ai.usage.output_tokens"] == 2
232+
assert span["data"]["gen_ai.usage.input_tokens"] == 1
233+
assert span["data"]["gen_ai.usage.total_tokens"] == 3
234234
except ImportError:
235235
pass # if tiktoken is not installed, we can't guarantee token usage will be calculated properly
236236

@@ -324,9 +324,9 @@ async def test_streaming_chat_completion_async(
324324
try:
325325
import tiktoken # type: ignore # noqa # pylint: disable=unused-import
326326

327-
assert span["measurements"]["ai_completion_tokens_used"]["value"] == 2
328-
assert span["measurements"]["ai_prompt_tokens_used"]["value"] == 1
329-
assert span["measurements"]["ai_total_tokens_used"]["value"] == 3
327+
assert span["data"]["gen_ai.usage.output_tokens"] == 2
328+
assert span["data"]["gen_ai.usage.input_tokens"] == 1
329+
assert span["data"]["gen_ai.usage.total_tokens"] == 3
330330
except ImportError:
331331
pass # if tiktoken is not installed, we can't guarantee token usage will be calculated properly
332332

@@ -409,8 +409,8 @@ def test_embeddings_create(
409409
else:
410410
assert SPANDATA.AI_INPUT_MESSAGES not in span["data"]
411411

412-
assert span["measurements"]["ai_prompt_tokens_used"]["value"] == 20
413-
assert span["measurements"]["ai_total_tokens_used"]["value"] == 30
412+
assert span["data"]["gen_ai.usage.input_tokens"] == 20
413+
assert span["data"]["gen_ai.usage.total_tokens"] == 30
414414

415415

416416
@pytest.mark.asyncio
@@ -457,8 +457,8 @@ async def test_embeddings_create_async(
457457
else:
458458
assert SPANDATA.AI_INPUT_MESSAGES not in span["data"]
459459

460-
assert span["measurements"]["ai_prompt_tokens_used"]["value"] == 20
461-
assert span["measurements"]["ai_total_tokens_used"]["value"] == 30
460+
assert span["data"]["gen_ai.usage.input_tokens"] == 20
461+
assert span["data"]["gen_ai.usage.total_tokens"] == 30
462462

463463

464464
@pytest.mark.parametrize(

0 commit comments

Comments
 (0)