added assertions for response and usage inference parameter attributes

liustve · liustve · commit 4797a7d37797 · 2024-11-07T22:59:10.000Z
diff --git a/contract-tests/images/applications/aws-sdk/server.js b/contract-tests/images/applications/aws-sdk/server.js
@@ -10,7 +10,6 @@ const { S3Client, CreateBucketCommand, PutObjectCommand, GetObjectCommand } = re
 const { DynamoDBClient, CreateTableCommand, PutItemCommand } = require('@aws-sdk/client-dynamodb');
 const { SQSClient, CreateQueueCommand, SendMessageCommand, ReceiveMessageCommand } = require('@aws-sdk/client-sqs');
 const { KinesisClient, CreateStreamCommand, PutRecordCommand } = require('@aws-sdk/client-kinesis');
-const fetch = require('node-fetch');
 const { BedrockClient, GetGuardrailCommand } = require('@aws-sdk/client-bedrock');
 const { BedrockAgentClient, GetKnowledgeBaseCommand, GetDataSourceCommand, GetAgentCommand } = require('@aws-sdk/client-bedrock-agent');
 const { BedrockRuntimeClient, InvokeModelCommand } = require('@aws-sdk/client-bedrock-runtime');
@@ -553,28 +552,44 @@ async function handleBedrockRequest(req, res, path) {
       });
       res.statusCode = 200;
     } else if (path.includes('invokemodel/invoke-model')) {
-      await withInjected200Success(bedrockRuntimeClient, ['InvokeModelCommand'], {}, async () => {
-        let modelId = ''
-        let body = {}
-        const userMessage = "Describe the purpose of a 'hello world' program in one line.";
-        const prompt = `<s>[INST] ${userMessage} [/INST]`;
-
-        if (path.includes('amazon.titan')) {
+        const get_model_request_response = function () {
+          const prompt = "Describe the purpose of a 'hello world' program in one line.";
+          let modelId = ''
+          let request_body = {}
+          let response_body = {}
+          
+          if (path.includes('amazon.titan')) {
+            
             modelId = 'amazon.titan-text-premier-v1:0';
-            body = JSON.stringify({
+
+            request_body = {
               inputText: prompt,
               textGenerationConfig: {
                 maxTokenCount: 3072,
                 stopSequences: [],
                 temperature: 0.7,
                 topP: 0.9,
               },
-            });
-        }
+            };
+
+            response_body = {
+              inputTextTokenCount: 15,
+              results: [
+                {
+                  tokenCount: 13,
+                  outputText: 'text-test-response',
+                  completionReason: 'CONTENT_FILTERED',
+                },
+              ],
+            }
+
+          }
 
-        if (path.includes('anthropic.claude')) {
+          if (path.includes('anthropic.claude')) {
+            
             modelId = 'anthropic.claude-v2:1';
-            body = JSON.stringify({
+            
+            request_body = {
               anthropic_version: 'bedrock-2023-05-31',
               max_tokens: 1000,
               temperature: 0.99,
@@ -585,64 +600,120 @@ async function handleBedrockRequest(req, res, path) {
                   content: [{ type: 'text', text: prompt }],
                 },
               ],
-            });
-        }
-
-        if (path.includes('meta.llama')) {
-          modelId = 'meta.llama2-13b-chat-v1';
-          body = JSON.stringify({
-            prompt,
-            max_gen_len: 512,
-            temperature: 0.5,
-            top_p: 0.9
-          });
-        }
+            };
 
-        if (path.includes('cohere.command')) {
-          modelId = 'cohere.command-light-text-v14';
-          body = JSON.stringify({
-            prompt,
-            max_tokens: 512,
-            temperature: 0.5,
-            p: 0.65,
-          });
-        }
-
-        if (path.includes('ai21.jamba')) {
-          modelId = 'ai21.jamba-1-5-large-v1:0';
-          body = JSON.stringify({
-            messages: [
-              {
-                role: 'user',
-                content: prompt,
+            response_body = {
+              stop_reason: 'end_turn',
+              usage: {
+                input_tokens: 15,
+                output_tokens: 13,
               },
-            ],
-            top_p: 0.8,
-            temperature: 0.6,
-            max_tokens: 512,
-          });
-        }
-
-        if (path.includes('mistral.mistral')) {
-          modelId = 'mistral.mistral-7b-instruct-v0:2';
-          body = JSON.stringify({
-            prompt,
-            max_tokens: 4096,
-            temperature: 0.75,
-            top_p: 0.99,
-          });
+            }
+          }
+
+          if (path.includes('meta.llama')) {
+            modelId = 'meta.llama2-13b-chat-v1';
+            
+            request_body = {
+              prompt,
+              max_gen_len: 512,
+              temperature: 0.5,
+              top_p: 0.9
+            };
+
+            response_body = {
+              prompt_token_count: 31,
+              generation_token_count: 49,
+              stop_reason: 'stop'
+            }
+          }
+
+          if (path.includes('cohere.command')) {
+            modelId = 'cohere.command-light-text-v14';
+            
+            request_body = {
+              prompt,
+              max_tokens: 512,
+              temperature: 0.5,
+              p: 0.65,
+            };
+
+            response_body = {
+              generations: [
+                {
+                  finish_reason: 'COMPLETE',
+                  text: 'test-generation-text',
+                },
+              ],
+              prompt: prompt,
+            };
+          }
+  
+          if (path.includes('ai21.jamba')) {
+            modelId = 'ai21.jamba-1-5-large-v1:0';
+            
+            request_body = {
+              messages: [
+                {
+                  role: 'user',
+                  content: prompt,
+                },
+              ],
+              top_p: 0.8,
+              temperature: 0.6,
+              max_tokens: 512,
+            };
+
+            response_body = {
+              stop_reason: 'end_turn',
+              usage: {
+                prompt_tokens: 21,
+                completion_tokens: 24,
+              },
+              choices: [
+                {
+                  finish_reason: 'stop',
+                },
+              ],
+            }
+          }
+  
+          if (path.includes('mistral.mistral')) {
+            modelId = 'mistral.mistral-7b-instruct-v0:2';
+            
+            request_body = {
+              prompt,
+              max_tokens: 4096,
+              temperature: 0.75,
+              top_p: 0.99,
+            };
+
+            response_body = {
+              outputs: [
+                {
+                  text: 'test-output-text',
+                  stop_reason: 'stop',
+                },
+              ]
+            }
+          }
+          
+          return [modelId, JSON.stringify(request_body), new TextEncoder().encode(JSON.stringify(response_body))]
         }
+        
+        const [modelId, request_body, response_body] = get_model_request_response();
 
+      await withInjected200Success(bedrockRuntimeClient, ['InvokeModelCommand'], { body: response_body }, async () => {          
         await bedrockRuntimeClient.send(
           new InvokeModelCommand({
-            body: body,
+            body: request_body,
             modelId: modelId,
             accept: 'application/json',
             contentType: 'application/json',
           })
         );
       });
-      
+
       res.statusCode = 200;
     } else {
       res.statusCode = 404;
diff --git a/contract-tests/tests/test/amazon/aws-sdk/aws_sdk_test.py b/contract-tests/tests/test/amazon/aws-sdk/aws_sdk_test.py
@@ -1,6 +1,7 @@
 # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 # SPDX-License-Identifier: Apache-2.0
 from logging import INFO, Logger, getLogger
+import math
 from typing import Dict, List
 
 from docker.types import EndpointConfig
@@ -37,7 +38,9 @@
 _GEN_AI_REQUEST_TEMPERATURE: str = "gen_ai.request.temperature"
 _GEN_AI_REQUEST_TOP_P: str = "gen_ai.request.top_p"
 _GEN_AI_REQUEST_MAX_TOKENS: str = "gen_ai.request.max_tokens"
-
+_GEN_AI_RESPONSE_FINISH_REASONS: str = "gen_ai.response.finish_reasons"
+_GEN_AI_USAGE_INPUT_TOKENS: str = 'gen_ai.usage.input_tokens'
+_GEN_AI_USAGE_OUTPUT_TOKENS: str = 'gen_ai.usage.output_tokens'
 
 # pylint: disable=too-many-public-methods
 class AWSSDKTest(ContractTestBase):
@@ -410,7 +413,7 @@ def test_kinesis_fault(self):
         )
 
     def test_bedrock_runtime_invoke_model_amazon_titan(self):
-        self.do_test_requests(
+        result = self.do_test_requests(
             "bedrock/invokemodel/invoke-model/amazon.titan-text-premier-v1:0",
             "GET",
             200,
@@ -428,9 +431,15 @@ def test_bedrock_runtime_invoke_model_amazon_titan(self):
                 _GEN_AI_REQUEST_TEMPERATURE: 0.7,
                 _GEN_AI_REQUEST_TOP_P: 0.9
                 },
+            response_specific_attributes={
+                _GEN_AI_RESPONSE_FINISH_REASONS: ['CONTENT_FILTERED'],
+                _GEN_AI_USAGE_INPUT_TOKENS: 15,
+                _GEN_AI_USAGE_OUTPUT_TOKENS: 13
+                },
+            
             span_name="BedrockRuntime.InvokeModel"
         )
-
+        
     def test_bedrock_runtime_invoke_model_anthropic_claude(self):
         self.do_test_requests(
             "bedrock/invokemodel/invoke-model/anthropic.claude-v2:1",
@@ -450,6 +459,11 @@ def test_bedrock_runtime_invoke_model_anthropic_claude(self):
                 _GEN_AI_REQUEST_TEMPERATURE: 0.99,
                 _GEN_AI_REQUEST_TOP_P: 1
                 },
+            response_specific_attributes={
+                _GEN_AI_RESPONSE_FINISH_REASONS: ['end_turn'],
+                _GEN_AI_USAGE_INPUT_TOKENS: 15,
+                _GEN_AI_USAGE_OUTPUT_TOKENS: 13
+                },
             span_name="BedrockRuntime.InvokeModel"
         )
 
@@ -472,6 +486,11 @@ def test_bedrock_runtime_invoke_model_meta_llama(self):
                 _GEN_AI_REQUEST_TEMPERATURE: 0.5,
                 _GEN_AI_REQUEST_TOP_P: 0.9
                 },
+            response_specific_attributes={
+                _GEN_AI_RESPONSE_FINISH_REASONS: ['stop'],
+                _GEN_AI_USAGE_INPUT_TOKENS: 31,
+                _GEN_AI_USAGE_OUTPUT_TOKENS: 49
+                },
             span_name="BedrockRuntime.InvokeModel"
         )
 
@@ -494,6 +513,11 @@ def test_bedrock_runtime_invoke_model_cohere_command(self):
                 _GEN_AI_REQUEST_TEMPERATURE: 0.5,
                 _GEN_AI_REQUEST_TOP_P: 0.65
                 },
+            response_specific_attributes={
+                _GEN_AI_RESPONSE_FINISH_REASONS: ['COMPLETE'],
+                _GEN_AI_USAGE_INPUT_TOKENS: math.ceil(len("Describe the purpose of a 'hello world' program in one line.") / 6),
+                _GEN_AI_USAGE_OUTPUT_TOKENS: math.ceil(len("test-generation-text") / 6)
+                },
             span_name="BedrockRuntime.InvokeModel"
         )
 
@@ -516,6 +540,11 @@ def test_bedrock_runtime_invoke_model_ai21_jamba(self):
                 _GEN_AI_REQUEST_TEMPERATURE: 0.6,
                 _GEN_AI_REQUEST_TOP_P: 0.8
                 },
+            response_specific_attributes={
+                _GEN_AI_RESPONSE_FINISH_REASONS: ['stop'],
+                _GEN_AI_USAGE_INPUT_TOKENS: 21,
+                _GEN_AI_USAGE_OUTPUT_TOKENS: 24
+                },
             span_name="BedrockRuntime.InvokeModel"
         )
     
@@ -538,6 +567,11 @@ def test_bedrock_runtime_invoke_model_mistral_mistral(self):
                 _GEN_AI_REQUEST_TEMPERATURE: 0.75,
                 _GEN_AI_REQUEST_TOP_P: 0.99
                 },
+            response_specific_attributes={
+                _GEN_AI_RESPONSE_FINISH_REASONS: ['stop'],
+                _GEN_AI_USAGE_INPUT_TOKENS: math.ceil(len("Describe the purpose of a 'hello world' program in one line.") / 6),
+                _GEN_AI_USAGE_OUTPUT_TOKENS: math.ceil(len("test-output-text") / 6)
+                },
             span_name="BedrockRuntime.InvokeModel"
         )
     
@@ -654,9 +688,6 @@ def test_bedrock_agent_get_data_source(self):
             },
             span_name="BedrockAgent.GetDataSource",
         )
-    
-    # def test_bedrock_agent_runtime_invoke_agent(self):
-    #     return None
 
     @override
     def _assert_aws_span_attributes(self, resource_scope_spans: List[ResourceScopeSpan], path: str, **kwargs) -> None:
@@ -726,6 +757,7 @@ def _assert_semantic_conventions_span_attributes(
             kwargs.get("remote_operation"),
             status_code,
             kwargs.get("request_specific_attributes", {}),
+            kwargs.get("response_specific_attributes", {}),
         )
 
     # pylint: disable=unidiomatic-typecheck
@@ -736,6 +768,7 @@ def _assert_semantic_conventions_attributes(
         operation: str,
         status_code: int,
         request_specific_attributes: dict,
+        response_specific_attributes: dict,
     ) -> None:
         attributes_dict: Dict[str, AnyValue] = self._get_attributes_dict(attributes_list)
         self._assert_str_attribute(attributes_dict, SpanAttributes.RPC_METHOD, operation)
@@ -744,7 +777,11 @@ def _assert_semantic_conventions_attributes(
         self._assert_int_attribute(attributes_dict, SpanAttributes.HTTP_STATUS_CODE, status_code)
         # TODO: aws sdk instrumentation is not respecting PEER_SERVICE
         # self._assert_str_attribute(attributes_dict, SpanAttributes.PEER_SERVICE, "backend:8080")
-        for key, value in request_specific_attributes.items():
+        self._assert_specific_attributes(attributes_dict, request_specific_attributes)
+        self._assert_specific_attributes(attributes_dict, response_specific_attributes)
+    
+    def _assert_specific_attributes(self, attributes_dict: Dict[str, AnyValue], specific_attributes: Dict[str, AnyValue]) -> None:
+        for key, value in specific_attributes.items():
             if isinstance(value, str):
                 self._assert_str_attribute(attributes_dict, key, value)
             elif isinstance(value, int):