update claude support to replace legacy model

ezhang6811 · ezhang6811 · commit 07ace1f773aa · 2024-11-21T01:49:08.000Z
diff --git a/src/OpenTelemetry.Instrumentation.AWS/Implementation/AWSLlmModelProcessor.cs b/src/OpenTelemetry.Instrumentation.AWS/Implementation/AWSLlmModelProcessor.cs
@@ -10,7 +10,7 @@ namespace OpenTelemetry.Instrumentation.AWS.Implementation;
 
 internal class AWSLlmModelProcessor
 {
-    internal static void ProcessGenAiAttributes<T>(Activity activity, T message, string model, bool isRequest)
+    internal static void ProcessGenAiAttributes<T>(Activity activity, T message, string modelName, bool isRequest)
     {
         // message can be either a request or a response. isRequest is used by the model-specific methods to determine
         // whether to extract the request or response attributes.
@@ -37,26 +37,29 @@ internal static void ProcessGenAiAttributes<T>(Activity activity, T message, str
                     }
 
                     // extract model specific attributes based on model name
-                    switch (model)
+                    if (modelName.Contains("amazon.titan"))
                     {
-                        case "amazon.titan":
-                            ProcessTitanModelAttributes(activity, jsonObject, isRequest);
-                            break;
-                        case "anthropic.claude":
-                            ProcessClaudeModelAttributes(activity, jsonObject, isRequest);
-                            break;
-                        case "meta.llama3":
-                            ProcessLlamaModelAttributes(activity, jsonObject, isRequest);
-                            break;
-                        case "cohere.command":
-                            ProcessCommandModelAttributes(activity, jsonObject, isRequest);
-                            break;
-                        case "ai21.jamba":
-                            ProcessJambaModelAttributes(activity, jsonObject, isRequest);
-                            break;
-                        case "mistral.mistral":
-                            ProcessMistralModelAttributes(activity, jsonObject, isRequest);
-                            break;
+                        ProcessTitanModelAttributes(activity, jsonObject, isRequest);
+                    }
+                    else if (modelName.Contains("anthropic.claude"))
+                    {
+                        ProcessClaudeModelAttributes(activity, jsonObject, isRequest);
+                    }
+                    else if (modelName.Contains("meta.llama3"))
+                    {
+                        ProcessLlamaModelAttributes(activity, jsonObject, isRequest);
+                    }
+                    else if (modelName.Contains("cohere.command"))
+                    {
+                        ProcessCommandModelAttributes(activity, jsonObject, isRequest);
+                    }
+                    else if (modelName.Contains("ai21.jamba"))
+                    {
+                        ProcessJambaModelAttributes(activity, jsonObject, isRequest);
+                    }
+                    else if (modelName.Contains("mistral.mistral"))
+                    {
+                        ProcessMistralModelAttributes(activity, jsonObject, isRequest);
                     }
                 }
                 catch (Exception ex)
@@ -135,28 +138,27 @@ private static void ProcessClaudeModelAttributes(Activity activity, Dictionary<s
                     activity.SetTag(AWSSemanticConventions.AttributeGenAiTemperature, temperature.GetDouble());
                 }
 
-                if (jsonBody.TryGetValue("max_tokens_to_sample", out var maxTokens))
+                if (jsonBody.TryGetValue("max_tokens", out var maxTokens))
                 {
                     activity.SetTag(AWSSemanticConventions.AttributeGenAiMaxTokens, maxTokens.GetInt32());
                 }
-
-                // input tokens not provided in Claude response body, so we estimate the value based on input length
-                if (jsonBody.TryGetValue("prompt", out var input))
-                {
-                    activity.SetTag(AWSSemanticConventions.AttributeGenAiInputTokens, Convert.ToInt32(Math.Ceiling((double) input.GetString().Length / 6)));
-                }
             }
             else
             {
-                if (jsonBody.TryGetValue("stop_reason", out var finishReasons))
+                if (jsonBody.TryGetValue("usage", out var usage))
                 {
-                    activity.SetTag(AWSSemanticConventions.AttributeGenAiFinishReasons, new string[] { finishReasons.GetString() });
+                    if (usage.TryGetProperty("input_tokens", out var inputTokens))
+                    {
+                        activity.SetTag(AWSSemanticConventions.AttributeGenAiInputTokens, inputTokens.GetInt32());
+                    }
+                    if (usage.TryGetProperty("output_tokens", out var outputTokens))
+                    {
+                        activity.SetTag(AWSSemanticConventions.AttributeGenAiOutputTokens, outputTokens.GetInt32());
+                    }
                 }
-
-                // output tokens not provided in Claude response body, so we estimate the value based on output length
-                if (jsonBody.TryGetValue("completion", out var output))
+                if (jsonBody.TryGetValue("stop_reason", out var finishReasons))
                 {
-                    activity.SetTag(AWSSemanticConventions.AttributeGenAiOutputTokens, Convert.ToInt32(Math.Ceiling((double) output.GetString().Length / 6)));
+                    activity.SetTag(AWSSemanticConventions.AttributeGenAiFinishReasons, new string[] { finishReasons.GetString() });
                 }
             }
         }
diff --git a/src/OpenTelemetry.Instrumentation.AWS/Implementation/AWSTracingPipelineHandler.cs b/src/OpenTelemetry.Instrumentation.AWS/Implementation/AWSTracingPipelineHandler.cs
@@ -168,8 +168,7 @@ private static void AddRequestSpecificInformation(Activity activity, IRequestCon
                                 var modelString = model.ToString();
                                 if (modelString != null)
                                 {
-                                    var modelName = modelString.Split('-')[0];
-                                    AWSLlmModelProcessor.ProcessGenAiAttributes(activity, request, modelName, true);
+                                    AWSLlmModelProcessor.ProcessGenAiAttributes(activity, request, modelString, true);
                                 }
                             }
                         }
@@ -252,8 +251,7 @@ private static void AddResponseSpecificInformation(Activity activity, IResponseC
                 var modelString = model.ToString();
                 if (modelString != null)
                 {
-                    var modelName = modelString.Split('-')[0];
-                    AWSLlmModelProcessor.ProcessGenAiAttributes(activity, responseContext.Response, modelName, false);
+                    AWSLlmModelProcessor.ProcessGenAiAttributes(activity, responseContext.Response, modelString, false);
                 }
             }
         }
diff --git a/test/contract-tests/images/applications/TestSimpleApp.AWSSDK.Core/BedrockTests.cs b/test/contract-tests/images/applications/TestSimpleApp.AWSSDK.Core/BedrockTests.cs
@@ -83,14 +83,27 @@ public void InvokeModelAnthropicClaude()
     {
         bedrockRuntime.InvokeModelAsync(new InvokeModelRequest
         {
-            ModelId = "anthropic.claude-v2:1",
+            ModelId = "us.anthropic.claude-3-5-haiku-20241022-v1:0",
             Body = new MemoryStream(Encoding.UTF8.GetBytes(JsonSerializer.Serialize(new
             {
-                // prompt is 72 chars long, input_tokens should be estimated as ceil(72/6) = 12
-                prompt = "sample input text sample input text sample input text sample input text ",
+                messages = new object[]
+                {
+                    new
+                    {
+                        role = "user",
+                        content = new object[]
+                        {
+                            new
+                            {
+                                type = "text",
+                                text = "sample input text",
+                            }
+                        }
+                    },
+                },
                 temperature = 0.123,
                 top_p = 0.456,
-                max_tokens_to_sample = 123,
+                max_tokens = 123,
             }))),
             ContentType = "application/json",
         });
@@ -101,8 +114,11 @@ public object InvokeModelAnthropicClaudeResponse()
     {
         return new
         {
-            // response is 56 chars long, output_tokens should be estimated as ceil(56/6) = 10
-            completion = "sample output text sample output text sample output text",
+            usage = new
+            {
+                input_tokens = 456,
+                output_tokens = 789,
+            },
             stop_reason = "finish_reason",
         };
     }
diff --git a/test/contract-tests/images/applications/TestSimpleApp.AWSSDK.Core/Program.cs b/test/contract-tests/images/applications/TestSimpleApp.AWSSDK.Core/Program.cs
@@ -174,7 +174,7 @@
 app.MapGet("guardrails/test-guardrail", (BedrockTests bedrock) => bedrock.GetGuardrailResponse());
 // For invoke model, we have one test case for each of the 6 suppported models.
 app.MapPost("model/amazon.titan-text-express-v1/invoke", (BedrockTests bedrock) => bedrock.InvokeModelAmazonTitanResponse());
-app.MapPost("model/anthropic.claude-v2:1/invoke", (BedrockTests bedrock) => bedrock.InvokeModelAnthropicClaudeResponse());
+app.MapPost("model/us.anthropic.claude-3-5-haiku-20241022-v1:0/invoke", (BedrockTests bedrock) => bedrock.InvokeModelAnthropicClaudeResponse());
 app.MapPost("model/meta.llama3-8b-instruct-v1:0/invoke", (BedrockTests bedrock) => bedrock.InvokeModelMetaLlamaResponse());
 app.MapPost("model/cohere.command-r-v1:0/invoke", (BedrockTests bedrock) => bedrock.InvokeModelCohereCommandResponse());
 app.MapPost("model/ai21.jamba-1-5-large-v1:0/invoke", (BedrockTests bedrock) => bedrock.InvokeModelAi21JambaResponse());
diff --git a/test/contract-tests/tests/test/amazon/awssdk/awssdk_test.py b/test/contract-tests/tests/test/amazon/awssdk/awssdk_test.py
@@ -360,15 +360,15 @@ def test_bedrock_runtime_invoke_model_claude(self):
             remote_service="AWS::BedrockRuntime",
             remote_operation="InvokeModel",
             remote_resource_type="AWS::Bedrock::Model",
-            remote_resource_identifier="anthropic.claude-v2:1",
+            remote_resource_identifier="us.anthropic.claude-3-5-haiku-20241022-v1:0",
             request_response_specific_attributes={
                 _GEN_AI_SYSTEM: "aws_bedrock",
-                _GEN_AI_REQUEST_MODEL: "anthropic.claude-v2:1",
+                _GEN_AI_REQUEST_MODEL: "us.anthropic.claude-3-5-haiku-20241022-v1:0",
                 _GEN_AI_REQUEST_TEMPERATURE: 0.123,
                 _GEN_AI_REQUEST_TOP_P: 0.456,
                 _GEN_AI_REQUEST_MAX_TOKENS: 123,
-                _GEN_AI_USAGE_INPUT_TOKENS: 12,
-                _GEN_AI_USAGE_OUTPUT_TOKENS: 10,
+                _GEN_AI_USAGE_INPUT_TOKENS: 456,
+                _GEN_AI_USAGE_OUTPUT_TOKENS: 789,
                 _GEN_AI_RESPONSE_FINISH_REASONS: ["finish_reason"],
             },
             span_name="Bedrock Runtime.InvokeModel",
@@ -739,7 +739,7 @@ def _filter_bedrock_metrics(self, target_metrics: List[Metric]):
             "GET knowledgebases/test-knowledge-base/datasources/test-data-source",
             "POST agents/test-agent/agentAliases/test-agent-alias/sessions/test-session/text",
             "POST model/amazon.titan-text-express-v1/invoke",
-            "POST model/anthropic.claude-v2:1/invoke",
+            "POST model/us.anthropic.claude-3-5-haiku-20241022-v1:0/invoke",
             "POST model/meta.llama3-8b-instruct-v1:0/invoke",
             "POST model/cohere.command-r-v1:0/invoke",
             "POST model/ai21.jamba-1-5-large-v1:0/invoke",

Original file line number	Diff line number	Diff line change
`@@ -168,8 +168,7 @@ private static void AddRequestSpecificInformation(Activity activity, IRequestCon`
`168`	`168`	`var modelString = model.ToString();`
`169`	`169`	`if (modelString != null)`
`170`	`170`	`{`
`171`		`- var modelName = modelString.Split('-')[0];`
`172`		`- AWSLlmModelProcessor.ProcessGenAiAttributes(activity, request, modelName, true);`
	`171`	`+ AWSLlmModelProcessor.ProcessGenAiAttributes(activity, request, modelString, true);`
`173`	`172`	`}`
`174`	`173`	`}`
`175`	`174`	`}`
`@@ -252,8 +251,7 @@ private static void AddResponseSpecificInformation(Activity activity, IResponseC`
`252`	`251`	`var modelString = model.ToString();`
`253`	`252`	`if (modelString != null)`
`254`	`253`	`{`
`255`		`- var modelName = modelString.Split('-')[0];`
`256`		`- AWSLlmModelProcessor.ProcessGenAiAttributes(activity, responseContext.Response, modelName, false);`
	`254`	`+ AWSLlmModelProcessor.ProcessGenAiAttributes(activity, responseContext.Response, modelString, false);`
`257`	`255`	`}`
`258`	`256`	`}`
`259`	`257`	`}`