Gen-AI python implementation

Jeel Mehta · Jeel Mehta · commit f22f8b15c202 · 2024-11-14T11:25:23.000-08:00
diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_aws_span_processing_util.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/_aws_span_processing_util.py
@@ -36,6 +36,7 @@
 GEN_AI_USAGE_INPUT_TOKENS: str = "gen_ai.usage.input_tokens"
 GEN_AI_USAGE_OUTPUT_TOKENS: str = "gen_ai.usage.output_tokens"
 
+
 # Get dialect keywords retrieved from dialect_keywords.json file.
 # Only meant to be invoked by SQL_KEYWORD_PATTERN and unit tests
 def _get_dialect_keywords() -> List[str]:
diff --git a/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/patches/_bedrock_patches.py b/aws-opentelemetry-distro/src/amazon/opentelemetry/distro/patches/_bedrock_patches.py
@@ -2,10 +2,11 @@
 # SPDX-License-Identifier: Apache-2.0
 import abc
 import inspect
-from typing import Any, Dict, Optional
 import json
-from botocore.response import StreamingBody
 import math
+from typing import Any, Dict, Optional
+
+from botocore.response import StreamingBody
 
 from amazon.opentelemetry.distro._aws_attribute_keys import (
     AWS_BEDROCK_AGENT_ID,
@@ -14,7 +15,16 @@
     AWS_BEDROCK_GUARDRAIL_ID,
     AWS_BEDROCK_KNOWLEDGE_BASE_ID,
 )
-from amazon.opentelemetry.distro._aws_span_processing_util import GEN_AI_REQUEST_MODEL, GEN_AI_SYSTEM, GEN_AI_REQUEST_MAX_TOKENS, GEN_AI_REQUEST_TEMPERATURE, GEN_AI_REQUEST_TOP_P, GEN_AI_RESPONSE_FINISH_REASONS, GEN_AI_USAGE_INPUT_TOKENS, GEN_AI_USAGE_OUTPUT_TOKENS
+from amazon.opentelemetry.distro._aws_span_processing_util import (
+    GEN_AI_REQUEST_MAX_TOKENS,
+    GEN_AI_REQUEST_MODEL,
+    GEN_AI_REQUEST_TEMPERATURE,
+    GEN_AI_REQUEST_TOP_P,
+    GEN_AI_RESPONSE_FINISH_REASONS,
+    GEN_AI_SYSTEM,
+    GEN_AI_USAGE_INPUT_TOKENS,
+    GEN_AI_USAGE_OUTPUT_TOKENS,
+)
 from opentelemetry.instrumentation.botocore.extensions.types import (
     _AttributeMapT,
     _AwsSdkCallContext,
@@ -241,100 +251,101 @@ def extract_attributes(self, attributes: _AttributeMapT):
         attributes[GEN_AI_SYSTEM] = _AWS_BEDROCK_SYSTEM
 
         model_id = self._call_context.params.get(_MODEL_ID)
-        #attributes["Testing"]= "Test"
+        # attributes["Testing"]= "Test"
         if model_id:
-            attributes[GEN_AI_REQUEST_MODEL] = model_id      
-            
+            attributes[GEN_AI_REQUEST_MODEL] = model_id
+
             # Get the request body if it exists
-            body = self._call_context.params.get('body')
-            #print("This is the body :",body)
+            body = self._call_context.params.get("body")
+            # print("This is the body :",body)
             if body:
                 try:
                     request_body = json.loads(body)
-                    
-                    if 'amazon.titan' in model_id:
+
+                    if "amazon.titan" in model_id:
                         self._extract_titan_attributes(attributes, request_body)
-                    elif 'anthropic.claude' in model_id:
+                    elif "anthropic.claude" in model_id:
                         self._extract_claude_attributes(attributes, request_body)
-                    elif 'meta.llama' in model_id:
+                    elif "meta.llama" in model_id:
                         self._extract_llama_attributes(attributes, request_body)
-                    elif 'cohere.command' in model_id:
+                    elif "cohere.command" in model_id:
                         self._extract_cohere_attributes(attributes, request_body)
-                    elif 'ai21.jamba' in model_id:
+                    elif "ai21.jamba" in model_id:
                         self._extract_ai21_attributes(attributes, request_body)
-                    elif 'mistral' in model_id:
+                    elif "mistral" in model_id:
                         self._extract_mistral_attributes(attributes, request_body)
-                
+
                 except json.JSONDecodeError:
                     print("Error: Unable to parse the body as JSON")
+
     def _extract_titan_attributes(self, attributes, request_body):
-        config = request_body.get('textGenerationConfig', {})
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, config.get('temperature'))
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, config.get('topP'))
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, config.get('maxTokenCount'))
+        config = request_body.get("textGenerationConfig", {})
+        self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, config.get("temperature"))
+        self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, config.get("topP"))
+        self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, config.get("maxTokenCount"))
 
     def _extract_claude_attributes(self, attributes, request_body):
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get('max_tokens'))
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get('temperature'))
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, request_body.get('top_p'))
+        self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_tokens"))
+        self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get("temperature"))
+        self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, request_body.get("top_p"))
 
     def _extract_cohere_attributes(self, attributes, request_body):
-        prompt = request_body.get('message')
+        prompt = request_body.get("message")
         if prompt:
             attributes[GEN_AI_USAGE_INPUT_TOKENS] = math.ceil(len(prompt) / 6)
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get('max_tokens'))
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get('temperature'))
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, request_body.get('p'))
+        self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_tokens"))
+        self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get("temperature"))
+        self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, request_body.get("p"))
 
     def _extract_ai21_attributes(self, attributes, request_body):
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get('max_tokens'))
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get('temperature'))
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, request_body.get('top_p'))
+        self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_tokens"))
+        self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get("temperature"))
+        self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, request_body.get("top_p"))
 
     def _extract_llama_attributes(self, attributes, request_body):
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get('max_gen_len'))
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get('temperature'))
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, request_body.get('top_p'))
+        self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_gen_len"))
+        self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get("temperature"))
+        self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, request_body.get("top_p"))
 
     def _extract_mistral_attributes(self, attributes, request_body):
         print("This is the request body:", request_body)
-        prompt = request_body.get('prompt')
+        prompt = request_body.get("prompt")
         if prompt:
             attributes[GEN_AI_USAGE_INPUT_TOKENS] = math.ceil(len(prompt) / 6)
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get('max_tokens'))
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get('temperature'))
-        self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, request_body.get('top_p'))
+        self._set_if_not_none(attributes, GEN_AI_REQUEST_MAX_TOKENS, request_body.get("max_tokens"))
+        self._set_if_not_none(attributes, GEN_AI_REQUEST_TEMPERATURE, request_body.get("temperature"))
+        self._set_if_not_none(attributes, GEN_AI_REQUEST_TOP_P, request_body.get("top_p"))
 
     @staticmethod
     def _set_if_not_none(attributes, key, value):
         if value is not None:
             attributes[key] = value
-                
+
     def on_success(self, span: Span, result: Dict[str, Any]):
         super().on_success(span, result)
-        
+
         model_id = self._call_context.params.get(_MODEL_ID)
         if not model_id:
             return
 
-        if 'body' in result and isinstance(result['body'], StreamingBody):
+        if "body" in result and isinstance(result["body"], StreamingBody):
             try:
                 # Read the entire content of the StreamingBody
-                body_content = result['body'].read()
+                body_content = result["body"].read()
                 # Decode the bytes to string and parse as JSON
-                response_body = json.loads(body_content.decode('utf-8'))
-                
-                if 'amazon.titan' in model_id:
+                response_body = json.loads(body_content.decode("utf-8"))
+
+                if "amazon.titan" in model_id:
                     self._handle_amazon_titan_response(span, response_body)
-                elif 'anthropic.claude' in model_id:
+                elif "anthropic.claude" in model_id:
                     self._handle_anthropic_claude_response(span, response_body)
-                elif 'meta.llama' in model_id:
+                elif "meta.llama" in model_id:
                     self._handle_meta_llama_response(span, response_body)
-                elif 'cohere.command' in model_id:
+                elif "cohere.command" in model_id:
                     self._handle_cohere_command_response(span, response_body)
-                elif 'ai21.jamba' in model_id:
+                elif "ai21.jamba" in model_id:
                     self._handle_ai21_jamba_response(span, response_body)
-                elif 'mistral' in model_id:
+                elif "mistral" in model_id:
                     self._handle_mistral_mistral_response(span, response_body)
 
             except json.JSONDecodeError:
@@ -343,60 +354,60 @@ def on_success(self, span: Span, result: Dict[str, Any]):
                 print(f"Error processing response: {str(e)}")
             finally:
                 # Make sure to close the stream
-                result['body'].close()
+                result["body"].close()
 
     def _handle_amazon_titan_response(self, span: Span, response_body: Dict[str, Any]):
-        if 'inputTextTokenCount' in response_body:
-            span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, response_body['inputTextTokenCount'])
-        
-            result = response_body['results'][0]
-            if 'tokenCount' in result:
-                span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, result['tokenCount'])
-            if 'completionReason' in result:
-                span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [result['completionReason']])
-    
+        if "inputTextTokenCount" in response_body:
+            span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, response_body["inputTextTokenCount"])
+
+            result = response_body["results"][0]
+            if "tokenCount" in result:
+                span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, result["tokenCount"])
+            if "completionReason" in result:
+                span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [result["completionReason"]])
+
     def _handle_anthropic_claude_response(self, span: Span, response_body: Dict[str, Any]):
-        if 'usage' in response_body:
-            usage = response_body['usage']
-            if 'input_tokens' in usage:
-                span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, usage['input_tokens'])
-            if 'output_tokens' in usage:
-                span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, usage['output_tokens'])
-        if 'stop_reason' in response_body:
-            span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [response_body['stop_reason']])
+        if "usage" in response_body:
+            usage = response_body["usage"]
+            if "input_tokens" in usage:
+                span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, usage["input_tokens"])
+            if "output_tokens" in usage:
+                span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, usage["output_tokens"])
+        if "stop_reason" in response_body:
+            span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [response_body["stop_reason"]])
 
     def _handle_cohere_command_response(self, span: Span, response_body: Dict[str, Any]):
         # Output tokens: Approximate from the response text
-        if 'text' in response_body:
-            span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, math.ceil(len(response_body['text']) / 6))
-        if 'finish_reason' in response_body:
-            span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [response_body['finish_reason']])
+        if "text" in response_body:
+            span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, math.ceil(len(response_body["text"]) / 6))
+        if "finish_reason" in response_body:
+            span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [response_body["finish_reason"]])
 
     def _handle_ai21_jamba_response(self, span: Span, response_body: Dict[str, Any]):
-        if 'usage' in response_body:
-            usage = response_body['usage']
-            if 'prompt_tokens' in usage:
-                span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, usage['prompt_tokens'])
-            if 'completion_tokens' in usage:
-                span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, usage['completion_tokens'])
-        if 'choices' in response_body:
-            choices = response_body['choices'][0]
-            if 'finish_reason' in choices:
-                span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [choices['finish_reason']])
+        if "usage" in response_body:
+            usage = response_body["usage"]
+            if "prompt_tokens" in usage:
+                span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, usage["prompt_tokens"])
+            if "completion_tokens" in usage:
+                span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, usage["completion_tokens"])
+        if "choices" in response_body:
+            choices = response_body["choices"][0]
+            if "finish_reason" in choices:
+                span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [choices["finish_reason"]])
 
     def _handle_meta_llama_response(self, span: Span, response_body: Dict[str, Any]):
-        if 'prompt_token_count' in response_body:
-            span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, response_body['prompt_token_count'])
-        if 'generation_token_count' in response_body:
-            span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, response_body['generation_token_count'])
-        if 'stop_reason' in response_body:
-            span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [response_body['stop_reason']])
-        
+        if "prompt_token_count" in response_body:
+            span.set_attribute(GEN_AI_USAGE_INPUT_TOKENS, response_body["prompt_token_count"])
+        if "generation_token_count" in response_body:
+            span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, response_body["generation_token_count"])
+        if "stop_reason" in response_body:
+            span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [response_body["stop_reason"]])
+
     def _handle_mistral_mistral_response(self, span: Span, response_body: Dict[str, Any]):
         print("This is the response body :", response_body)
         if "outputs" in response_body:
             outputs = response_body["outputs"][0]
             if "text" in outputs:
                 span.set_attribute(GEN_AI_USAGE_OUTPUT_TOKENS, math.ceil(len(outputs["text"]) / 6))
-        if 'stop_reason' in outputs:
-            span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [outputs['stop_reason']])
+        if "stop_reason" in outputs:
+            span.set_attribute(GEN_AI_RESPONSE_FINISH_REASONS, [outputs["stop_reason"]])
diff --git a/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_instrumentation_patch.py b/aws-opentelemetry-distro/tests/amazon/opentelemetry/distro/test_instrumentation_patch.py