use upstream's bedrock runtime extension

yiyuan-he · yiyuan-he · commit 5233ef26adea · 2025-06-18T03:35:30.000Z
diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/patches/_bedrock_patches.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/patches/_bedrock_patches.py
@@ -36,6 +36,7 @@
 )
 from opentelemetry.trace.span import Span
 
+
 _AGENT_ID: str = "agentId"
 _KNOWLEDGE_BASE_ID: str = "knowledgeBaseId"
 _DATA_SOURCE_ID: str = "dataSourceId"
@@ -245,205 +246,3 @@ def on_success(self, span: Span, result: _BotoResultT, instrumentor_context: _Bo
                 AWS_BEDROCK_GUARDRAIL_ARN,
                 guardrail_arn,
             )
-
-
-class _BedrockRuntimeExtension(_AwsSdkExtension):
-    """
-    This class is an extension for <a
-    href="https://docs.aws.amazon.com/bedrock/latest/APIReference/API_Operations_Amazon_Bedrock_Runtime.html">
-    Amazon Bedrock Runtime</a>.
-    """
-
-    def extract_attributes(self, attributes: _AttributeMapT):
-        attributes[GEN_AI_SYSTEM] = _AWS_BEDROCK_SYSTEM
-
-        model_id = self._call_context.params.get(_MODEL_ID)
-        if model_id:
-            attributes[GEN_AI_REQUEST_MODEL] = model_id
-
-            # Get the request body if it exists
-            body = self._call_context.params.get("body")
-            if body:
-                try:
-                    request_body = json.loads(body)
-
-                    if "amazon.titan" in model_id:
-                        self._extract_titan_attributes(attributes, request_body)
-                    if "amazon.nova" in model_id:
-                        self._extract_nova_attributes(attributes, request_body)
-                    elif "anthropic.claude" in model_id:
-                        self._extract_claude_attributes(attributes, request_body)
-                    elif "meta.llama" in model_id:
-                        self._extract_llama_attributes(attributes, request_body)
-                    elif "cohere.command" in model_id:
-                        self._extract_cohere_attributes(attributes, request_body)
-                    elif "ai21.jamba" in model_id:
-                        self._extract_ai21_attributes(attributes, request_body)
-                    elif "mistral" in model_id:
-                        self._extract_mistral_attributes(attributes, request_body)
-
-                except json.JSONDecodeError:
-                    _logger.debug("Error: Unable to parse the body as JSON")
-
-    def _extract_titan_attributes(self, attributes, request_body):
-        config = request_body.get("textGenerationConfig", {})
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, config.get("temperature"))
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, config.get("topP"))
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, config.get("maxTokenCount"))
-
-    def _extract_nova_attributes(self, attributes, request_body):
-        config = request_body.get("inferenceConfig", {})
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, config.get("temperature"))
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, config.get("top_p"))
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, config.get("max_new_tokens"))
-
-    def _extract_claude_attributes(self, attributes, request_body):
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_tokens"))
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get("temperature"))
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, request_body.get("top_p"))
-
-    def _extract_cohere_attributes(self, attributes, request_body):
-        prompt = request_body.get("message")
-        if prompt:
-            attributes[GEN_AI_USAGE_INPUT_TOKENS] = math.ceil(len(prompt) / 6)
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_tokens"))
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get("temperature"))
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, request_body.get("p"))
-
-    def _extract_ai21_attributes(self, attributes, request_body):
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_tokens"))
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get("temperature"))
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, request_body.get("top_p"))
-
-    def _extract_llama_attributes(self, attributes, request_body):
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_gen_len"))
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get("temperature"))
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, request_body.get("top_p"))
-
-    def _extract_mistral_attributes(self, attributes, request_body):
-        prompt = request_body.get("prompt")
-        if prompt:
-            attributes[GEN_AI_USAGE_INPUT_TOKENS] = math.ceil(len(prompt) / 6)
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_tokens"))
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get("temperature"))
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, request_body.get("top_p"))
-
-    @staticmethod
-    def _set_if_not_none(attributes, key, value):
-        if value is not None:
-            attributes[key] = value
-
-    # pylint: disable=too-many-branches
-    def on_success(self, span: Span, result: Dict[str, Any], instrumentor_context: _BotocoreInstrumentorContext):
-        model_id = self._call_context.params.get(_MODEL_ID)
-
-        if not model_id:
-            return
-
-        if "body" in result and isinstance(result["body"], StreamingBody):
-            original_body = None
-            try:
-                original_body = result["body"]
-                body_content = original_body.read()
-
-                # Use one stream for telemetry
-                stream = io.BytesIO(body_content)
-                telemetry_content = stream.read()
-                response_body = json.loads(telemetry_content.decode("utf-8"))
-                if "amazon.titan" in model_id:
-                    self._handle_amazon_titan_response(span, response_body)
-                if "amazon.nova" in model_id:
-                    self._handle_amazon_nova_response(span, response_body)
-                elif "anthropic.claude" in model_id:
-                    self._handle_anthropic_claude_response(span, response_body)
-                elif "meta.llama" in model_id:
-                    self._handle_meta_llama_response(span, response_body)
-                elif "cohere.command" in model_id:
-                    self._handle_cohere_command_response(span, response_body)
-                elif "ai21.jamba" in model_id:
-                    self._handle_ai21_jamba_response(span, response_body)
-                elif "mistral" in model_id:
-                    self._handle_mistral_mistral_response(span, response_body)
-                # Replenish stream for downstream application use
-                new_stream = io.BytesIO(body_content)
-                result["body"] = StreamingBody(new_stream, len(body_content))
-
-            except json.JSONDecodeError:
-                _logger.debug("Error: Unable to parse the response body as JSON")
-            except Exception as e:  # pylint: disable=broad-exception-caught, invalid-name
-                _logger.debug("Error processing response: %s", e)
-            finally:
-                if original_body is not None:
-                    original_body.close()
-
-    # pylint: disable=no-self-use
-    def _handle_amazon_titan_response(self, span: Span, response_body: Dict[str, Any]):
-        if "inputTextTokenCount" in response_body:
-            span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, response_body["inputTextTokenCount"])
-            if "results" in response_body and response_body["results"]:
-                result = response_body["results"][0]
-                if "tokenCount" in result:
-                    span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, result["tokenCount"])
-                if "completionReason" in result:
-                    span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [result["completionReason"]])
-
-    # pylint: disable=no-self-use
-    def _handle_amazon_nova_response(self, span: Span, response_body: Dict[str, Any]):
-        if "usage" in response_body:
-            usage = response_body["usage"]
-            if "inputTokens" in usage:
-                span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, usage["inputTokens"])
-            if "outputTokens" in usage:
-                span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, usage["outputTokens"])
-        if "stopReason" in response_body:
-            span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [response_body["stopReason"]])
-
-    # pylint: disable=no-self-use
-    def _handle_anthropic_claude_response(self, span: Span, response_body: Dict[str, Any]):
-        if "usage" in response_body:
-            usage = response_body["usage"]
-            if "input_tokens" in usage:
-                span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, usage["input_tokens"])
-            if "output_tokens" in usage:
-                span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, usage["output_tokens"])
-        if "stop_reason" in response_body:
-            span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [response_body["stop_reason"]])
-
-    # pylint: disable=no-self-use
-    def _handle_cohere_command_response(self, span: Span, response_body: Dict[str, Any]):
-        # Output tokens: Approximate from the response text
-        if "text" in response_body:
-            span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, math.ceil(len(response_body["text"]) / 6))
-        if "finish_reason" in response_body:
-            span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [response_body["finish_reason"]])
-
-    # pylint: disable=no-self-use
-    def _handle_ai21_jamba_response(self, span: Span, response_body: Dict[str, Any]):
-        if "usage" in response_body:
-            usage = response_body["usage"]
-            if "prompt_tokens" in usage:
-                span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, usage["prompt_tokens"])
-            if "completion_tokens" in usage:
-                span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, usage["completion_tokens"])
-        if "choices" in response_body:
-            choices = response_body["choices"][0]
-            if "finish_reason" in choices:
-                span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [choices["finish_reason"]])
-
-    # pylint: disable=no-self-use
-    def _handle_meta_llama_response(self, span: Span, response_body: Dict[str, Any]):
-        if "prompt_token_count" in response_body:
-            span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, response_body["prompt_token_count"])
-        if "generation_token_count" in response_body:
-            span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, response_body["generation_token_count"])
-        if "stop_reason" in response_body:
-            span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [response_body["stop_reason"]])
-
-    # pylint: disable=no-self-use
-    def _handle_mistral_mistral_response(self, span: Span, response_body: Dict[str, Any]):
-        if "outputs" in response_body:
-            outputs = response_body["outputs"][0]
-            if "text" in outputs:
-                span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, math.ceil(len(outputs["text"]) / 6))
-        if "stop_reason" in outputs:
-            span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [outputs["stop_reason"]])
diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/patches/_botocore_patches.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/patches/_botocore_patches.py
@@ -19,7 +19,6 @@
     _BedrockAgentExtension,
     _BedrockAgentRuntimeExtension,
     _BedrockExtension,
-    _BedrockRuntimeExtension,
 )
 from opentelemetry.instrumentation.botocore.extensions import _KNOWN_EXTENSIONS
 from opentelemetry.instrumentation.botocore.extensions.lmbd import _LambdaExtension
@@ -196,17 +195,17 @@ def patch_on_success(self, span: Span, result: _BotoResultT, instrumentor_contex
 
 
 def _apply_botocore_bedrock_patch() -> None:
-    """Botocore instrumentation patch for Bedrock, Bedrock Agent, Bedrock Runtime and Bedrock Agent Runtime
+    """Botocore instrumentation patch for Bedrock, Bedrock Agent, and Bedrock Agent Runtime
 
     This patch adds an extension to the upstream's list of known extension for Bedrock.
     Extensions allow for custom logic for adding service-specific information to spans, such as attributes.
-    Specifically, we are adding logic to add the AWS_BEDROCK attributes referenced in _aws_attribute_keys,
-    GEN_AI_REQUEST_MODEL and GEN_AI_SYSTEM attributes referenced in _aws_span_processing_util.
+    Specifically, we are adding logic to add the AWS_BEDROCK attributes referenced in _aws_attribute_keys.
+    Note: Bedrock Runtime uses the upstream extension directly.
     """
     _KNOWN_EXTENSIONS["bedrock"] = _lazy_load(".", "_BedrockExtension")
     _KNOWN_EXTENSIONS["bedrock-agent"] = _lazy_load(".", "_BedrockAgentExtension")
     _KNOWN_EXTENSIONS["bedrock-agent-runtime"] = _lazy_load(".", "_BedrockAgentRuntimeExtension")
-    _KNOWN_EXTENSIONS["bedrock-runtime"] = _lazy_load(".", "_BedrockRuntimeExtension")
+    # bedrock-runtime is handled by upstream
 
 
 # The OpenTelemetry Authors code
diff --git a/contract-tests/images/applications/botocore/botocore_server.py b/contract-tests/images/applications/botocore/botocore_server.py
@@ -435,7 +435,7 @@ def get_model_request_response(path):
             "inferenceConfig": {
                 "max_new_tokens": 800,
                 "temperature": 0.9,
-                "top_p": 0.7,
+                "topP": 0.7,
             },
         }
 
@@ -496,32 +496,6 @@ def get_model_request_response(path):
             "text": "test-generation-text",
         }
 
-    if "ai21.jamba" in path:
-        model_id = "ai21.jamba-1-5-large-v1:0"
-
-        request_body = {
-            "messages": [
-                {
-                    "role": "user",
-                    "content": prompt,
-                },
-            ],
-            "top_p": 0.8,
-            "temperature": 0.6,
-            "max_tokens": 512,
-        }
-
-        response_body = {
-            "stop_reason": "end_turn",
-            "usage": {
-                "prompt_tokens": 21,
-                "completion_tokens": 24,
-            },
-            "choices": [
-                {"finish_reason": "stop"},
-            ],
-        }
-
     if "mistral" in path:
         model_id = "mistral.mistral-7b-instruct-v0:2"
 
diff --git a/contract-tests/tests/test/amazon/botocore/botocore_test.py b/contract-tests/tests/test/amazon/botocore/botocore_test.py
@@ -440,7 +440,7 @@ def test_bedrock_runtime_invoke_model_amazon_titan(self):
                 _GEN_AI_USAGE_INPUT_TOKENS: 15,
                 _GEN_AI_USAGE_OUTPUT_TOKENS: 13,
             },
-            span_name="Bedrock Runtime.InvokeModel",
+            span_name="text_completion amazon.titan-text-premier-v1:0",
         )
 
     def test_bedrock_runtime_invoke_model_amazon_nova(self):
@@ -458,6 +458,7 @@ def test_bedrock_runtime_invoke_model_amazon_nova(self):
             cloudformation_primary_identifier="amazon.nova-pro-v1:0",
             request_specific_attributes={
                 _GEN_AI_REQUEST_MODEL: "amazon.nova-pro-v1:0",
+                _GEN_AI_SYSTEM: "aws.bedrock",
                 _GEN_AI_REQUEST_MAX_TOKENS: 800,
                 _GEN_AI_REQUEST_TEMPERATURE: 0.9,
                 _GEN_AI_REQUEST_TOP_P: 0.7,
@@ -467,7 +468,7 @@ def test_bedrock_runtime_invoke_model_amazon_nova(self):
                 _GEN_AI_USAGE_INPUT_TOKENS: 432,
                 _GEN_AI_USAGE_OUTPUT_TOKENS: 681,
             },
-            span_name="Bedrock Runtime.InvokeModel",
+            span_name="chat amazon.nova-pro-v1:0",
         )
 
     def test_bedrock_runtime_invoke_model_anthropic_claude(self):
@@ -495,7 +496,7 @@ def test_bedrock_runtime_invoke_model_anthropic_claude(self):
                 _GEN_AI_USAGE_INPUT_TOKENS: 15,
                 _GEN_AI_USAGE_OUTPUT_TOKENS: 13,
             },
-            span_name="Bedrock Runtime.InvokeModel",
+            span_name="chat anthropic.claude-v2:1",
         )
 
     def test_bedrock_runtime_invoke_model_meta_llama(self):
@@ -523,7 +524,7 @@ def test_bedrock_runtime_invoke_model_meta_llama(self):
                 _GEN_AI_USAGE_INPUT_TOKENS: 31,
                 _GEN_AI_USAGE_OUTPUT_TOKENS: 49,
             },
-            span_name="Bedrock Runtime.InvokeModel",
+            span_name="chat meta.llama2-13b-chat-v1",
         )
 
     def test_bedrock_runtime_invoke_model_cohere_command(self):
@@ -553,35 +554,7 @@ def test_bedrock_runtime_invoke_model_cohere_command(self):
                 ),
                 _GEN_AI_USAGE_OUTPUT_TOKENS: math.ceil(len("test-generation-text") / 6),
             },
-            span_name="Bedrock Runtime.InvokeModel",
-        )
-
-    def test_bedrock_runtime_invoke_model_ai21_jamba(self):
-        self.do_test_requests(
-            "bedrock/invokemodel/invoke-model/ai21.jamba-1-5-large-v1:0",
-            "GET",
-            200,
-            0,
-            0,
-            rpc_service="Bedrock Runtime",
-            remote_service="AWS::BedrockRuntime",
-            remote_operation="InvokeModel",
-            remote_resource_type="AWS::Bedrock::Model",
-            remote_resource_identifier="ai21.jamba-1-5-large-v1:0",
-            cloudformation_primary_identifier="ai21.jamba-1-5-large-v1:0",
-            request_specific_attributes={
-                _GEN_AI_REQUEST_MODEL: "ai21.jamba-1-5-large-v1:0",
-                _GEN_AI_SYSTEM: "aws.bedrock",
-                _GEN_AI_REQUEST_MAX_TOKENS: 512,
-                _GEN_AI_REQUEST_TEMPERATURE: 0.6,
-                _GEN_AI_REQUEST_TOP_P: 0.8,
-            },
-            response_specific_attributes={
-                _GEN_AI_RESPONSE_FINISH_REASONS: ["stop"],
-                _GEN_AI_USAGE_INPUT_TOKENS: 21,
-                _GEN_AI_USAGE_OUTPUT_TOKENS: 24,
-            },
-            span_name="Bedrock Runtime.InvokeModel",
+            span_name="chat cohere.command-r-v1:0",
         )
 
     def test_bedrock_runtime_invoke_model_mistral(self):
@@ -611,7 +584,7 @@ def test_bedrock_runtime_invoke_model_mistral(self):
                 ),
                 _GEN_AI_USAGE_OUTPUT_TOKENS: math.ceil(len("test-output-text") / 6),
             },
-            span_name="Bedrock Runtime.InvokeModel",
+            span_name="chat mistral.mistral-7b-instruct-v0:2",
         )
 
     def test_bedrock_get_guardrail(self):