refactor logic for token estimation

ezhang6811 · ezhang6811 · commit d62e5da26909 · 2025-04-15T12:55:40.000-07:00
diff --git a/instrumentation/opentelemetry-instrumentation-botocore/src/opentelemetry/instrumentation/botocore/extensions/bedrock.py b/instrumentation/opentelemetry-instrumentation-botocore/src/opentelemetry/instrumentation/botocore/extensions/bedrock.py
@@ -21,7 +21,6 @@
 import io
 import json
 import logging
-import math
 from timeit import default_timer
 from typing import Any
 
@@ -34,6 +33,7 @@
     _Choice,
     genai_capture_message_content,
     message_to_event,
+    estimate_token_count,
 )
 from opentelemetry.instrumentation.botocore.extensions.types import (
     _AttributeMapT,
@@ -105,9 +105,6 @@
 ]
 
 _MODEL_ID_KEY: str = "modelId"
-# estimate 6 chars per token for models that don't provide input/output token count in response body.
-# https://docs.aws.amazon.com/bedrock/latest/userguide/model-customization-prepare.html
-_CHARS_PER_TOKEN: int = 6
 
 class _BedrockRuntimeExtension(_AwsSdkExtension):
     """
@@ -293,7 +290,7 @@ def _extract_claude_attributes(self, attributes, request_body):
     def _extract_command_r_attributes(self, attributes, request_body):
         prompt = request_body.get("message")
         self._set_if_not_none(
-            attributes, GEN_AI_USAGE_INPUT_TOKENS, math.ceil(len(prompt) / _CHARS_PER_TOKEN)
+            attributes, GEN_AI_USAGE_INPUT_TOKENS, estimate_token_count(prompt)
         )
         self._set_if_not_none(
             attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_tokens")
@@ -311,7 +308,7 @@ def _extract_command_r_attributes(self, attributes, request_body):
     def _extract_command_attributes(self, attributes, request_body):
         prompt = request_body.get("prompt")
         self._set_if_not_none(
-            attributes, GEN_AI_USAGE_INPUT_TOKENS, math.ceil(len(prompt) / _CHARS_PER_TOKEN)
+            attributes, GEN_AI_USAGE_INPUT_TOKENS, estimate_token_count(prompt)
         )
         self._set_if_not_none(
             attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_tokens")
@@ -342,7 +339,7 @@ def _extract_mistral_attributes(self, attributes, request_body):
         prompt = request_body.get("prompt")
         if prompt:
             self._set_if_not_none(
-                attributes, GEN_AI_USAGE_INPUT_TOKENS, math.ceil(len(prompt) / _CHARS_PER_TOKEN)
+                attributes, GEN_AI_USAGE_INPUT_TOKENS, estimate_token_count(prompt)
             )
         self._set_if_not_none(
             attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_tokens")
@@ -840,7 +837,7 @@ def _handle_cohere_command_r_response(
     ):
         if "text" in response_body:
             span.set_attribute(
-                GEN_AI_USAGE_OUTPUT_TOKENS, math.ceil(len(response_body["text"]) / _CHARS_PER_TOKEN)
+                GEN_AI_USAGE_OUTPUT_TOKENS, estimate_token_count(response_body["text"])
             )
         if "finish_reason" in response_body:
             span.set_attribute(
@@ -864,7 +861,7 @@ def _handle_cohere_command_response(
             generations = response_body["generations"][0]
             if "text" in generations:
                 span.set_attribute(
-                    GEN_AI_USAGE_OUTPUT_TOKENS, math.ceil(len(generations["text"]) / _CHARS_PER_TOKEN)
+                    GEN_AI_USAGE_OUTPUT_TOKENS, estimate_token_count(generations["text"])
                 )
             if "finish_reason" in generations:
                 span.set_attribute(
@@ -913,7 +910,7 @@ def _handle_mistral_ai_response(
         if "outputs" in response_body:
             outputs = response_body["outputs"][0]
             if "text" in outputs:
-                span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, math.ceil(len(outputs["text"]) / _CHARS_PER_TOKEN))
+                span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, estimate_token_count(outputs["text"]))
             if "stop_reason" in outputs:
                 span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [outputs["stop_reason"]])
         
diff --git a/instrumentation/opentelemetry-instrumentation-botocore/src/opentelemetry/instrumentation/botocore/extensions/bedrock_utils.py b/instrumentation/opentelemetry-instrumentation-botocore/src/opentelemetry/instrumentation/botocore/extensions/bedrock_utils.py
@@ -15,6 +15,7 @@
 from __future__ import annotations
 
 import json
+import math
 from os import environ
 from typing import Any, Callable, Dict, Iterator, Sequence, Union
 
@@ -357,6 +358,13 @@ def _process_anthropic_claude_chunk(self, chunk):
             self._stream_done_callback(self._response)
             return
 
+def estimate_token_count(
+    message: str
+) -> int:
+    # https://docs.aws.amazon.com/bedrock/latest/userguide/model-customization-prepare.html
+    # use 6 chars per token to approximate token count when not provided in response body
+    return math.ceil(len(message) / 6)
+
 
 def genai_capture_message_content() -> bool:
     capture_content = environ.get(