Gen-AI python implementation

Jeel Mehta · Jeel Mehta · commit dcc616adbb39 · 2024-11-12T11:25:57.000-08:00
diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/patches/_bedrock_patches.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/patches/_bedrock_patches.py
@@ -342,6 +342,7 @@ def on_success(self, span: Span, result: Dict[str, Any]):
                 result['body'].close()
 
     def _handle_amazon_titan_response(self, span: Span, response_body: Dict[str, Any]):
+        #print("This is the response body :", response_body)
         if 'inputTextTokenCount' in response_body:
             span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, response_body['inputTextTokenCount'])
         
@@ -352,6 +353,7 @@ def _handle_amazon_titan_response(self, span: Span, response_body: Dict[str, Any
                 span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [result['completionReason']])
     
     def _handle_anthropic_claude_response(self, span: Span, response_body: Dict[str, Any]):
+        #print("This is the response body :", response_body)
         if 'usage' in response_body:
             usage = response_body['usage']
             if 'input_tokens' in usage:
@@ -362,6 +364,7 @@ def _handle_anthropic_claude_response(self, span: Span, response_body: Dict[str,
             span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [response_body['stop_reason']])
 
     def _handle_cohere_command_response(self, span: Span, response_body: Dict[str, Any]):
+        print("This is the response body :", response_body)
         # Input tokens: Approximate from the user's message in chat history
         if 'chat_history' in response_body:
             user_messages = [msg['message'] for msg in response_body['chat_history'] if msg['role'] == 'USER']
@@ -387,15 +390,16 @@ def _handle_ai21_jamba_response(self, span: Span, response_body: Dict[str, Any])
                 span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [choices['finish_reason']])
 
     def _handle_meta_llama_response(self, span: Span, response_body: Dict[str, Any]):
-        #print("This is the response body :", response_body)
+        print("This is the response body :", response_body)
         if 'prompt_token_count' in response_body:
             span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, response_body['prompt_token_count'])
         if 'generation_token_count' in response_body:
             span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, response_body['generation_token_count'])
         if 'stop_reason' in response_body:
-            span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, response_body['stop_reason'])
+            span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [response_body['stop_reason']])
         
     def _handle_mistral_mistral_response(self, span: Span, response_body: Dict[str, Any]):
+        print("This is the response body :", response_body)
         if "outputs" in response_body:
             outputs = response_body["outputs"][0]
             if "text" in outputs:
diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_instrumentation_patch.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_instrumentation_patch.py
@@ -4,6 +4,9 @@
 from typing import Any, Dict
 from unittest import TestCase
 from unittest.mock import MagicMock, patch
+from io import BytesIO
+import json
+from botocore.response import StreamingBody
 
 import gevent.monkey
 import pkg_resources
@@ -211,12 +214,189 @@ def _test_patched_botocore_instrumentation(self):
         bedrock_agent_runtime_sucess_attributes: Dict[str, str] = _do_on_success_bedrock("bedrock-agent-runtime")
         self.assertEqual(len(bedrock_agent_runtime_sucess_attributes), 0)
 
-        # BedrockRuntime
+        # BedrockRuntime - Amazon Titan Models
+        self.assertTrue("bedrock-runtime" in _KNOWN_EXTENSIONS)
+        request_body = {
+            "textGenerationConfig": {
+                "maxTokenCount": 512,
+                "temperature":  0.9,
+                "topP": 0.75,
+            }
+        }
+        bedrock_runtime_attributes: Dict[str, str] = _do_extract_attributes_bedrock(
+            "bedrock-runtime", 
+            model_id="amazon.titan",
+            request_body=json.dumps(request_body)
+        )
+        self.assertEqual(len(bedrock_runtime_attributes), 5)
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.system"], _GEN_AI_SYSTEM)
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.request.model"], "amazon.titan")
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.request.max_tokens"], 512)
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.request.temperature"], 0.9)
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.request.top_p"], 0.75)
+        response_body = {
+            "inputTextTokenCount": 123,
+            "results": [{
+                "tokenCount": 456,
+                "outputText": "testing",
+                "completionReason": "FINISH",
+            }]
+        }
+        json_bytes = json.dumps(response_body).encode('utf-8')
+        body_bytes = BytesIO(json_bytes)
+        streaming_body = StreamingBody(body_bytes, len(json_bytes))
+        bedrock_runtime_success_attributes: Dict[str, str] = _do_on_success_bedrock(
+            "bedrock-runtime",
+            model_id="amazon.titan",
+            streaming_body=streaming_body
+        )
+        self.assertEqual(bedrock_runtime_success_attributes["gen_ai.usage.input_tokens"], 123)
+        self.assertEqual(bedrock_runtime_success_attributes["gen_ai.usage.output_tokens"], 456)
+        self.assertEqual(bedrock_runtime_success_attributes["gen_ai.response.finish_reasons"], ["FINISH"])
+
+        #BedrockRuntime - Anthropic Claude Models
+
         self.assertTrue("bedrock-runtime" in _KNOWN_EXTENSIONS)
-        bedrock_runtime_attributes: Dict[str, str] = _do_extract_attributes_bedrock("bedrock-runtime")
-        self.assertEqual(len(bedrock_runtime_attributes), 2)
+        request_body = {
+            "anthropic_version": "bedrock-2023-05-31",
+            "max_tokens": 512,
+            "temperature": 0.5,
+            "top_p":0.999,
+        }
+        
+        bedrock_runtime_attributes: Dict[str, str] = _do_extract_attributes_bedrock(
+            "bedrock-runtime", 
+            model_id="anthropic.claude",
+            request_body=json.dumps(request_body)
+        )
+        self.assertEqual(len(bedrock_runtime_attributes), 5)
         self.assertEqual(bedrock_runtime_attributes["gen_ai.system"], _GEN_AI_SYSTEM)
-        self.assertEqual(bedrock_runtime_attributes["gen_ai.request.model"], _GEN_AI_REQUEST_MODEL)
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.request.model"], "anthropic.claude")
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.request.max_tokens"], 512)
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.request.temperature"], 0.5)
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.request.top_p"], 0.999)
+        response_body = {
+            'stop_reason': 'end_turn', 'stop_sequence': None, 'usage': {'input_tokens': 23, 'output_tokens': 36}
+        }
+        json_bytes = json.dumps(response_body).encode('utf-8')
+        body_bytes = BytesIO(json_bytes)
+        streaming_body = StreamingBody(body_bytes, len(json_bytes))
+        bedrock_runtime_success_attributes: Dict[str, str] = _do_on_success_bedrock(
+            "bedrock-runtime",
+            model_id="anthropic.claude",
+            streaming_body=streaming_body
+        )
+        self.assertEqual(bedrock_runtime_success_attributes["gen_ai.usage.input_tokens"], 23)
+        self.assertEqual(bedrock_runtime_success_attributes["gen_ai.usage.output_tokens"], 36)
+        self.assertEqual(bedrock_runtime_success_attributes["gen_ai.response.finish_reasons"], ["end_turn"])
+
+        #BedrockRuntime - Cohere Command Models _testing Pending
+        # self.assertTrue("bedrock-runtime" in _KNOWN_EXTENSIONS)
+        # request_body = {
+        #     "max_tokens": 512,
+        #     "temperature": 0.5,
+        #     "p":0.75,
+        # }
+        
+        # bedrock_runtime_attributes: Dict[str, str] = _do_extract_attributes_bedrock(
+        #     "bedrock-runtime", 
+        #     model_id="cohere.command",
+        #     request_body=json.dumps(request_body)
+        # )
+        # self.assertEqual(len(bedrock_runtime_attributes), 5)
+        # self.assertEqual(bedrock_runtime_attributes["gen_ai.system"], _GEN_AI_SYSTEM)
+        # self.assertEqual(bedrock_runtime_attributes["gen_ai.request.model"], "cohere.command")
+        # self.assertEqual(bedrock_runtime_attributes["gen_ai.request.max_tokens"], 512)
+        # self.assertEqual(bedrock_runtime_attributes["gen_ai.request.temperature"], 0.5)
+        # self.assertEqual(bedrock_runtime_attributes["gen_ai.request.top_p"], 0.75)
+        # response_body = {
+        #     'finish_reason': 'COMPLETE'
+        # }
+        # json_bytes = json.dumps(response_body).encode('utf-8')
+        # body_bytes = BytesIO(json_bytes)
+        # streaming_body = StreamingBody(body_bytes, len(json_bytes))
+        # bedrock_runtime_success_attributes: Dict[str, str] = _do_on_success_bedrock(
+        #     "bedrock-runtime",
+        #     model_id="cohere.command",
+        #     streaming_body=streaming_body
+        # )
+        # self.assertEqual(bedrock_runtime_success_attributes["gen_ai.usage.input_tokens"], 23)
+        # self.assertEqual(bedrock_runtime_success_attributes["gen_ai.usage.output_tokens"], 36)
+        # self.assertEqual(bedrock_runtime_success_attributes["gen_ai.response.finish_reasons"], ["COMPLETE"])
+
+        #BedrockRuntime - AI21 Jamba Models
+        self.assertTrue("bedrock-runtime" in _KNOWN_EXTENSIONS)
+        request_body = {
+            "max_tokens": 512,
+            "temperature": 0.5,
+            "top_p":0.9,
+        }
+        
+        bedrock_runtime_attributes: Dict[str, str] = _do_extract_attributes_bedrock(
+            "bedrock-runtime", 
+            model_id="ai21.jamba",
+            request_body=json.dumps(request_body)
+        )
+        self.assertEqual(len(bedrock_runtime_attributes), 5)
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.system"], _GEN_AI_SYSTEM)
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.request.model"], "ai21.jamba")
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.request.max_tokens"], 512)
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.request.temperature"], 0.5)
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.request.top_p"], 0.9)
+        response_body = {
+            'choices':[{'finish_reason': "stop"}],
+            'usage': {'prompt_tokens': 24,
+                      'completion_tokens': 31,
+                      'total_tokens': 55}
+        }
+        json_bytes = json.dumps(response_body).encode('utf-8')
+        body_bytes = BytesIO(json_bytes)
+        streaming_body = StreamingBody(body_bytes, len(json_bytes))
+        bedrock_runtime_success_attributes: Dict[str, str] = _do_on_success_bedrock(
+            "bedrock-runtime",
+            model_id="ai21.jamba",
+            streaming_body=streaming_body
+        )
+        self.assertEqual(bedrock_runtime_success_attributes["gen_ai.usage.input_tokens"], 24)
+        self.assertEqual(bedrock_runtime_success_attributes["gen_ai.usage.output_tokens"], 31)
+        self.assertEqual(bedrock_runtime_success_attributes["gen_ai.response.finish_reasons"], ["stop"])
+
+        #BedrockRuntime - Meta LLama Models 
+        self.assertTrue("bedrock-runtime" in _KNOWN_EXTENSIONS)
+        request_body = {
+            "max_gen_len": 512,
+            "temperature": 0.5,
+            "top_p":0.9,
+        }
+        
+        bedrock_runtime_attributes: Dict[str, str] = _do_extract_attributes_bedrock(
+            "bedrock-runtime", 
+            model_id="meta.llama",
+            request_body=json.dumps(request_body)
+        )
+        self.assertEqual(len(bedrock_runtime_attributes), 5)
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.system"], _GEN_AI_SYSTEM)
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.request.model"], "meta.llama")
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.request.max_tokens"], 512)
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.request.temperature"], 0.5)
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.request.top_p"], 0.9)
+        response_body = {
+            'prompt_token_count': 31, 
+            'generation_token_count': 36, 
+            'stop_reason': 'stop'
+        }
+        json_bytes = json.dumps(response_body).encode('utf-8')
+        body_bytes = BytesIO(json_bytes)
+        streaming_body = StreamingBody(body_bytes, len(json_bytes))
+        bedrock_runtime_success_attributes: Dict[str, str] = _do_on_success_bedrock(
+            "bedrock-runtime",
+            model_id="meta.llama",
+            streaming_body=streaming_body
+        )
+        self.assertEqual(bedrock_runtime_success_attributes["gen_ai.usage.input_tokens"], 31)
+        self.assertEqual(bedrock_runtime_success_attributes["gen_ai.usage.output_tokens"], 36)
+        self.assertEqual(bedrock_runtime_success_attributes["gen_ai.response.finish_reasons"], ["stop"])
+
 
         # SecretsManager
         self.assertTrue("secretsmanager" in _KNOWN_EXTENSIONS)
@@ -385,26 +565,27 @@ def _do_extract_sqs_attributes() -> Dict[str, str]:
     return _do_extract_attributes(service_name, params)
 
 
-def _do_extract_attributes_bedrock(service, operation=None) -> Dict[str, str]:
+def _do_extract_attributes_bedrock(service, operation=None, model_id=None, request_body=None) -> Dict[str, str]:
     params: Dict[str, Any] = {
         "agentId": _BEDROCK_AGENT_ID,
         "dataSourceId": _BEDROCK_DATASOURCE_ID,
         "knowledgeBaseId": _BEDROCK_KNOWLEDGEBASE_ID,
         "guardrailId": _BEDROCK_GUARDRAIL_ID,
-        "modelId": _GEN_AI_REQUEST_MODEL,
+        "modelId": model_id,
+        "body": request_body,
     }
     return _do_extract_attributes(service, params, operation)
 
 
-def _do_on_success_bedrock(service, operation=None) -> Dict[str, str]:
+def _do_on_success_bedrock(service, operation=None, model_id=None, streaming_body=None) -> Dict[str, str]:
     result: Dict[str, Any] = {
         "agentId": _BEDROCK_AGENT_ID,
         "dataSourceId": _BEDROCK_DATASOURCE_ID,
         "knowledgeBaseId": _BEDROCK_KNOWLEDGEBASE_ID,
         "guardrailId": _BEDROCK_GUARDRAIL_ID,
-        "modelId": _GEN_AI_REQUEST_MODEL,
+        "body": streaming_body,
     }
-    return _do_on_success(service, result, operation)
+    return _do_on_success(service, result, operation, params={"modelId": model_id})
 
 
 def _do_extract_secretsmanager_attributes() -> Dict[str, str]: