Skip to content

Commit d62e5da

Browse files
committed
refactor logic for token estimation
1 parent d091fed commit d62e5da

File tree

2 files changed

+15
-10
lines changed
  • instrumentation/opentelemetry-instrumentation-botocore/src/opentelemetry/instrumentation/botocore/extensions

2 files changed

+15
-10
lines changed

instrumentation/opentelemetry-instrumentation-botocore/src/opentelemetry/instrumentation/botocore/extensions/bedrock.py

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
import io
2222
import json
2323
import logging
24-
import math
2524
from timeit import default_timer
2625
from typing import Any
2726

@@ -34,6 +33,7 @@
3433
_Choice,
3534
genai_capture_message_content,
3635
message_to_event,
36+
estimate_token_count,
3737
)
3838
from opentelemetry.instrumentation.botocore.extensions.types import (
3939
_AttributeMapT,
@@ -105,9 +105,6 @@
105105
]
106106

107107
_MODEL_ID_KEY: str = "modelId"
108-
# estimate 6 chars per token for models that don't provide input/output token count in response body.
109-
# https://docs.aws.amazon.com/bedrock/latest/userguide/model-customization-prepare.html
110-
_CHARS_PER_TOKEN: int = 6
111108

112109
class _BedrockRuntimeExtension(_AwsSdkExtension):
113110
"""
@@ -293,7 +290,7 @@ def _extract_claude_attributes(self, attributes, request_body):
293290
def _extract_command_r_attributes(self, attributes, request_body):
294291
prompt = request_body.get("message")
295292
self._set_if_not_none(
296-
attributes, GEN_AI_USAGE_INPUT_TOKENS, math.ceil(len(prompt) / _CHARS_PER_TOKEN)
293+
attributes, GEN_AI_USAGE_INPUT_TOKENS, estimate_token_count(prompt)
297294
)
298295
self._set_if_not_none(
299296
attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_tokens")
@@ -311,7 +308,7 @@ def _extract_command_r_attributes(self, attributes, request_body):
311308
def _extract_command_attributes(self, attributes, request_body):
312309
prompt = request_body.get("prompt")
313310
self._set_if_not_none(
314-
attributes, GEN_AI_USAGE_INPUT_TOKENS, math.ceil(len(prompt) / _CHARS_PER_TOKEN)
311+
attributes, GEN_AI_USAGE_INPUT_TOKENS, estimate_token_count(prompt)
315312
)
316313
self._set_if_not_none(
317314
attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_tokens")
@@ -342,7 +339,7 @@ def _extract_mistral_attributes(self, attributes, request_body):
342339
prompt = request_body.get("prompt")
343340
if prompt:
344341
self._set_if_not_none(
345-
attributes, GEN_AI_USAGE_INPUT_TOKENS, math.ceil(len(prompt) / _CHARS_PER_TOKEN)
342+
attributes, GEN_AI_USAGE_INPUT_TOKENS, estimate_token_count(prompt)
346343
)
347344
self._set_if_not_none(
348345
attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_tokens")
@@ -840,7 +837,7 @@ def _handle_cohere_command_r_response(
840837
):
841838
if "text" in response_body:
842839
span.set_attribute(
843-
GEN_AI_USAGE_OUTPUT_TOKENS, math.ceil(len(response_body["text"]) / _CHARS_PER_TOKEN)
840+
GEN_AI_USAGE_OUTPUT_TOKENS, estimate_token_count(response_body["text"])
844841
)
845842
if "finish_reason" in response_body:
846843
span.set_attribute(
@@ -864,7 +861,7 @@ def _handle_cohere_command_response(
864861
generations = response_body["generations"][0]
865862
if "text" in generations:
866863
span.set_attribute(
867-
GEN_AI_USAGE_OUTPUT_TOKENS, math.ceil(len(generations["text"]) / _CHARS_PER_TOKEN)
864+
GEN_AI_USAGE_OUTPUT_TOKENS, estimate_token_count(generations["text"])
868865
)
869866
if "finish_reason" in generations:
870867
span.set_attribute(
@@ -913,7 +910,7 @@ def _handle_mistral_ai_response(
913910
if "outputs" in response_body:
914911
outputs = response_body["outputs"][0]
915912
if "text" in outputs:
916-
span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, math.ceil(len(outputs["text"]) / _CHARS_PER_TOKEN))
913+
span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, estimate_token_count(outputs["text"]))
917914
if "stop_reason" in outputs:
918915
span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [outputs["stop_reason"]])
919916

instrumentation/opentelemetry-instrumentation-botocore/src/opentelemetry/instrumentation/botocore/extensions/bedrock_utils.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from __future__ import annotations
1616

1717
import json
18+
import math
1819
from os import environ
1920
from typing import Any, Callable, Dict, Iterator, Sequence, Union
2021

@@ -357,6 +358,13 @@ def _process_anthropic_claude_chunk(self, chunk):
357358
self._stream_done_callback(self._response)
358359
return
359360

361+
def estimate_token_count(
362+
message: str
363+
) -> int:
364+
# https://docs.aws.amazon.com/bedrock/latest/userguide/model-customization-prepare.html
365+
# use 6 chars per token to approximate token count when not provided in response body
366+
return math.ceil(len(message) / 6)
367+
360368

361369
def genai_capture_message_content() -> bool:
362370
capture_content = environ.get(

0 commit comments

Comments
 (0)