add test for Anthropic Claude model

ezhang6811 · ezhang6811 · commit e3cc6506d997 · 2024-11-08T22:01:13.000Z
diff --git a/test/contract-tests/images/applications/TestSimpleApp.AWSSDK.Core/BedrockTests.cs b/test/contract-tests/images/applications/TestSimpleApp.AWSSDK.Core/BedrockTests.cs
@@ -39,7 +39,7 @@ public GetGuardrailResponse GetGuardrailResponse()
         };
     }
 
-    public void InvokeModel()
+    public void InvokeModelAmazonTitan()
     {
         bedrockRuntime.InvokeModelAsync(new InvokeModelRequest
         {
@@ -59,7 +59,7 @@ public void InvokeModel()
         return;
     }
 
-    public object InvokeModelResponse()
+    public object InvokeModelAmazonTitanResponse()
     {
         return new
         {
@@ -68,14 +68,42 @@ public object InvokeModelResponse()
             {
                 new
                 {
-                    outputText = "\nsample output text\n",
+                    outputText = "sample output text",
                     tokenCount = 789,
                     completionReason = "finish_reason"
                 },
             },
         };
     }
 
+    public void InvokeModelAnthropicClaude()
+    {
+        bedrockRuntime.InvokeModelAsync(new InvokeModelRequest
+        {
+            ModelId = "anthropic.claude-v2:1",
+            Body = new MemoryStream(Encoding.UTF8.GetBytes(JsonSerializer.Serialize(new
+            {
+                // prompt is 72 chars long, input_tokens should be estimated as ceil(72/6) = 12
+                prompt = "sample input text sample input text sample input text sample input text ",
+                temperature = 0.123,
+                top_p = 0.456,
+                max_tokens_to_sample = 123,
+            }))),
+            ContentType = "application/json",
+        });
+        return;
+    }
+
+    public object InvokeModelAnthropicClaudeResponse()
+    {
+        return new
+        {
+            // response is 56 chars long, output_tokens should be estimated as ceil(56/6) = 10
+            completion = "sample output text sample output text sample output text",
+            stop_reason = "finish_reason",
+        };
+    }
+
     public Task<GetAgentResponse> GetAgent()
     {
         return bedrockAgent.GetAgentAsync(new GetAgentRequest
diff --git a/test/contract-tests/images/applications/TestSimpleApp.AWSSDK.Core/Program.cs b/test/contract-tests/images/applications/TestSimpleApp.AWSSDK.Core/Program.cs
@@ -125,8 +125,11 @@
     .WithName("get-guardrail")
     .WithOpenApi();
 
-app.MapGet("bedrock/invokemodel/invoke-model", (BedrockTests bedrock) => bedrock.InvokeModel())
-    .WithName("invoke-model")
+app.MapGet("bedrock/invokemodel/invoke-model-titan", (BedrockTests bedrock) => bedrock.InvokeModelAmazonTitan())
+    .WithName("invoke-model-titan")
+    .WithOpenApi();
+app.MapGet("bedrock/invokemodel/invoke-model-claude", (BedrockTests bedrock) => bedrock.InvokeModelAnthropicClaude())
+    .WithName("invoke-model-claude")
     .WithOpenApi();
 
 app.MapGet("bedrock/getagent/get-agent", (BedrockTests bedrock) => bedrock.GetAgent())
@@ -152,7 +155,8 @@
 // Reroute the Bedrock API calls to our mock responses in BedrockTests. While other services use localstack to handle the requests,
 // we write our own responses with the necessary data to mimic the expected behavior of the Bedrock services.
 app.MapGet("guardrails/test-guardrail", (BedrockTests bedrock) => bedrock.GetGuardrailResponse());
-app.MapPost("model/amazon.titan-text-express-v1/invoke", (BedrockTests bedrock) => bedrock.InvokeModelResponse());
+app.MapPost("model/amazon.titan-text-express-v1/invoke", (BedrockTests bedrock) => bedrock.InvokeModelAmazonTitanResponse());
+app.MapPost("model/anthropic.claude-v2:1/invoke", (BedrockTests bedrock) => bedrock.InvokeModelAnthropicClaudeResponse());
 app.MapGet("agents/test-agent", (BedrockTests bedrock) => bedrock.GetAgentResponse());
 app.MapGet("knowledgebases/test-knowledge-base", (BedrockTests bedrock) => bedrock.GetKnowledgeBaseResponse());
 app.MapGet("knowledgebases/test-knowledge-base/datasources/test-data-source", (BedrockTests bedrock) => bedrock.GetDataSourceResponse());
diff --git a/test/contract-tests/tests/test/amazon/awssdk/awssdk_test.py b/test/contract-tests/tests/test/amazon/awssdk/awssdk_test.py
@@ -323,9 +323,9 @@ def test_bedrock_get_guardrail(self):
             span_name="Bedrock.GetGuardrail",
         )
 
-    def test_bedrock_runtime_invoke_model(self):
+    def test_bedrock_runtime_invoke_model_titan(self):
         self.do_test_requests(
-            "bedrock/invokemodel/invoke-model",
+            "bedrock/invokemodel/invoke-model-titan",
             "GET",
             200,
             0,
@@ -346,6 +346,30 @@ def test_bedrock_runtime_invoke_model(self):
             },
             span_name="Bedrock Runtime.InvokeModel",
         )
+    
+    def test_bedrock_runtime_invoke_model_claude(self):
+        self.do_test_requests(
+            "bedrock/invokemodel/invoke-model-claude",
+            "GET",
+            200,
+            0,
+            0,
+            rpc_service="Bedrock Runtime",
+            remote_service="AWS::BedrockRuntime",
+            remote_operation="InvokeModel",
+            remote_resource_type="AWS::Bedrock::Model",
+            remote_resource_identifier="anthropic.claude-v2:1",
+            request_specific_attributes={
+                _GEN_AI_REQUEST_MODEL: "anthropic.claude-v2:1",
+                _GEN_AI_REQUEST_TEMPERATURE: 0.123,
+                _GEN_AI_REQUEST_TOP_P: 0.456,
+                _GEN_AI_REQUEST_MAX_TOKENS: 123,
+                _GEN_AI_USAGE_INPUT_TOKENS: 12,
+                _GEN_AI_USAGE_OUTPUT_TOKENS: 10,
+                _GEN_AI_RESPONSE_FINISH_REASONS: ["finish_reason"],
+            },
+            span_name="Bedrock Runtime.InvokeModel",
+        )
 
     def test_bedrock_agent_runtime_invoke_agent(self):
         self.do_test_requests(
@@ -612,6 +636,7 @@ def _filter_bedrock_metrics(self, target_metrics: List[Metric]):
             "GET knowledgebases/test-knowledge-base/datasources/test-data-source",
             "POST agents/test-agent/agentAliases/test-agent-alias/sessions/test-session/text",
             "POST model/amazon.titan-text-express-v1/invoke",
+            "POST model/anthropic.claude-v2:1/invoke",
             "POST knowledgebases/test-knowledge-base/retrieve"
         }
         for metric in target_metrics: