Gen-AI python implementation

Jeel Mehta · Jeel Mehta · commit 551fa8515db3 · 2024-11-13T14:44:12.000-08:00
diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/patches/_bedrock_patches.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/patches/_bedrock_patches.py
@@ -279,6 +279,9 @@ def _extract_claude_attributes(self, attributes, request_body):
         self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, request_body.get('top_p'))
 
     def _extract_cohere_attributes(self, attributes, request_body):
+        prompt = request_body.get('message')
+        if prompt:
+            attributes[GEN_AI_USAGE_INPUT_TOKENS] = math.ceil(len(prompt) / 6)
         self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get('max_tokens'))
         self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get('temperature'))
         self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, request_body.get('p'))
@@ -294,6 +297,7 @@ def _extract_llama_attributes(self, attributes, request_body):
         self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, request_body.get('top_p'))
 
     def _extract_mistral_attributes(self, attributes, request_body):
+        print("This is the request body:", request_body)
         prompt = request_body.get('prompt')
         if prompt:
             attributes[GEN_AI_USAGE_INPUT_TOKENS] = math.ceil(len(prompt) / 6)
@@ -342,7 +346,6 @@ def on_success(self, span: Span, result: Dict[str, Any]):
                 result['body'].close()
 
     def _handle_amazon_titan_response(self, span: Span, response_body: Dict[str, Any]):
-        #print("This is the response body :", response_body)
         if 'inputTextTokenCount' in response_body:
             span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, response_body['inputTextTokenCount'])
         
@@ -353,7 +356,6 @@ def _handle_amazon_titan_response(self, span: Span, response_body: Dict[str, Any
                 span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [result['completionReason']])
     
     def _handle_anthropic_claude_response(self, span: Span, response_body: Dict[str, Any]):
-        #print("This is the response body :", response_body)
         if 'usage' in response_body:
             usage = response_body['usage']
             if 'input_tokens' in usage:
@@ -364,20 +366,13 @@ def _handle_anthropic_claude_response(self, span: Span, response_body: Dict[str,
             span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [response_body['stop_reason']])
 
     def _handle_cohere_command_response(self, span: Span, response_body: Dict[str, Any]):
-        print("This is the response body :", response_body)
-        # Input tokens: Approximate from the user's message in chat history
-        if 'chat_history' in response_body:
-            user_messages = [msg['message'] for msg in response_body['chat_history'] if msg['role'] == 'USER']
-            input_text = ' '.join(user_messages)
-            span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, math.ceil(len(input_text) / 6))
         # Output tokens: Approximate from the response text
         if 'text' in response_body:
             span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, math.ceil(len(response_body['text']) / 6))
         if 'finish_reason' in response_body:
             span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [response_body['finish_reason']])
 
     def _handle_ai21_jamba_response(self, span: Span, response_body: Dict[str, Any]):
-        print("This is the response body :", response_body)
         if 'usage' in response_body:
             usage = response_body['usage']
             if 'prompt_tokens' in usage:
@@ -390,7 +385,6 @@ def _handle_ai21_jamba_response(self, span: Span, response_body: Dict[str, Any])
                 span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [choices['finish_reason']])
 
     def _handle_meta_llama_response(self, span: Span, response_body: Dict[str, Any]):
-        print("This is the response body :", response_body)
         if 'prompt_token_count' in response_body:
             span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, response_body['prompt_token_count'])
         if 'generation_token_count' in response_body:
diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_instrumentation_patch.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_instrumentation_patch.py
@@ -7,6 +7,7 @@
 from io import BytesIO
 import json
 from botocore.response import StreamingBody
+import math
 
 import gevent.monkey
 import pkg_resources
@@ -290,39 +291,42 @@ def _test_patched_botocore_instrumentation(self):
         self.assertEqual(bedrock_runtime_success_attributes["gen_ai.usage.output_tokens"], 36)
         self.assertEqual(bedrock_runtime_success_attributes["gen_ai.response.finish_reasons"], ["end_turn"])
 
-        #BedrockRuntime - Cohere Command Models _testing Pending
-        # self.assertTrue("bedrock-runtime" in _KNOWN_EXTENSIONS)
-        # request_body = {
-        #     "max_tokens": 512,
-        #     "temperature": 0.5,
-        #     "p":0.75,
-        # }
+        #BedrockRuntime - Cohere Command Models
+        self.assertTrue("bedrock-runtime" in _KNOWN_EXTENSIONS)
+        request_body = {
+            'message': "Describe the purpose of a 'hello world' program in one line.",
+            "max_tokens": 512,
+            "temperature": 0.5,
+            "p":0.75,
+        }
         
-        # bedrock_runtime_attributes: Dict[str, str] = _do_extract_attributes_bedrock(
-        #     "bedrock-runtime", 
-        #     model_id="cohere.command",
-        #     request_body=json.dumps(request_body)
-        # )
-        # self.assertEqual(len(bedrock_runtime_attributes), 5)
-        # self.assertEqual(bedrock_runtime_attributes["gen_ai.system"], _GEN_AI_SYSTEM)
-        # self.assertEqual(bedrock_runtime_attributes["gen_ai.request.model"], "cohere.command")
-        # self.assertEqual(bedrock_runtime_attributes["gen_ai.request.max_tokens"], 512)
-        # self.assertEqual(bedrock_runtime_attributes["gen_ai.request.temperature"], 0.5)
-        # self.assertEqual(bedrock_runtime_attributes["gen_ai.request.top_p"], 0.75)
-        # response_body = {
-        #     'finish_reason': 'COMPLETE'
-        # }
-        # json_bytes = json.dumps(response_body).encode('utf-8')
-        # body_bytes = BytesIO(json_bytes)
-        # streaming_body = StreamingBody(body_bytes, len(json_bytes))
-        # bedrock_runtime_success_attributes: Dict[str, str] = _do_on_success_bedrock(
-        #     "bedrock-runtime",
-        #     model_id="cohere.command",
-        #     streaming_body=streaming_body
-        # )
-        # self.assertEqual(bedrock_runtime_success_attributes["gen_ai.usage.input_tokens"], 23)
-        # self.assertEqual(bedrock_runtime_success_attributes["gen_ai.usage.output_tokens"], 36)
-        # self.assertEqual(bedrock_runtime_success_attributes["gen_ai.response.finish_reasons"], ["COMPLETE"])
+        bedrock_runtime_attributes: Dict[str, str] = _do_extract_attributes_bedrock(
+            "bedrock-runtime", 
+            model_id="cohere.command",
+            request_body=json.dumps(request_body)
+        )
+        self.assertEqual(len(bedrock_runtime_attributes), 6)
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.system"], _GEN_AI_SYSTEM)
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.request.model"], "cohere.command")
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.request.max_tokens"], 512)
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.request.temperature"], 0.5)
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.request.top_p"], 0.75)
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.usage.input_tokens"], math.ceil(len(request_body['message'])/6))
+        response_body = {
+            'text': 'A "hello world" program serves as a simple introduction to programming, helping developers confirm their setup and test their coding environment.',
+            'finish_reason': 'COMPLETE'
+
+        }
+        json_bytes = json.dumps(response_body).encode('utf-8')
+        body_bytes = BytesIO(json_bytes)
+        streaming_body = StreamingBody(body_bytes, len(json_bytes))
+        bedrock_runtime_success_attributes: Dict[str, str] = _do_on_success_bedrock(
+            "bedrock-runtime",
+            model_id="cohere.command",
+            streaming_body=streaming_body
+        )
+        self.assertEqual(bedrock_runtime_success_attributes["gen_ai.usage.output_tokens"], math.ceil(len(response_body['text'])/6))
+        self.assertEqual(bedrock_runtime_success_attributes["gen_ai.response.finish_reasons"], ["COMPLETE"])
 
         #BedrockRuntime - AI21 Jamba Models
         self.assertTrue("bedrock-runtime" in _KNOWN_EXTENSIONS)
@@ -397,6 +401,45 @@ def _test_patched_botocore_instrumentation(self):
         self.assertEqual(bedrock_runtime_success_attributes["gen_ai.usage.output_tokens"], 36)
         self.assertEqual(bedrock_runtime_success_attributes["gen_ai.response.finish_reasons"], ["stop"])
 
+        #BedrockRuntime - Mistral Models
+        self.assertTrue("bedrock-runtime" in _KNOWN_EXTENSIONS)
+        msg="Hello, World"
+        formatted_prompt = f"<s>[INST] {msg} [/INST]"
+        request_body = {
+            'prompt': formatted_prompt,
+            "max_tokens": 512,
+            "temperature": 0.5,
+            "top_p": 0.9,
+        }
+        
+        bedrock_runtime_attributes: Dict[str, str] = _do_extract_attributes_bedrock(
+            "bedrock-runtime", 
+            model_id="mistral",
+            request_body=json.dumps(request_body)
+        )
+        self.assertEqual(len(bedrock_runtime_attributes), 6)
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.system"], _GEN_AI_SYSTEM)
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.request.model"], "mistral")
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.request.max_tokens"], 512)
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.request.temperature"], 0.5)
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.request.top_p"], 0.9)
+        self.assertEqual(bedrock_runtime_attributes["gen_ai.usage.input_tokens"], math.ceil(len(request_body['prompt'])/6))
+        response_body = {
+            'outputs':[{
+                'text': 'Goodbye, World',
+                'stop_reason': 'stop'}]
+        }
+        json_bytes = json.dumps(response_body).encode('utf-8')
+        body_bytes = BytesIO(json_bytes)
+        streaming_body = StreamingBody(body_bytes, len(json_bytes))
+        bedrock_runtime_success_attributes: Dict[str, str] = _do_on_success_bedrock(
+            "bedrock-runtime",
+            model_id="mistral",
+            streaming_body=streaming_body
+        )
+        #self.assertEqual(bedrock_runtime_success_attributes["gen_ai.usage.input_tokens"], 31) Srill have concerns regarging these lines
+        self.assertEqual(bedrock_runtime_success_attributes["gen_ai.usage.output_tokens"], math.ceil(len(response_body['outputs'][0]['text'])/6))
+        self.assertEqual(bedrock_runtime_success_attributes["gen_ai.response.finish_reasons"], ["stop"])
 
         # SecretsManager
         self.assertTrue("secretsmanager" in _KNOWN_EXTENSIONS)