diff --git a/contract-tests/images/applications/aws-sdk/server.js b/contract-tests/images/applications/aws-sdk/server.js index 0a841550..6fd65c7d 100644 --- a/contract-tests/images/applications/aws-sdk/server.js +++ b/contract-tests/images/applications/aws-sdk/server.js @@ -10,7 +10,6 @@ const { S3Client, CreateBucketCommand, PutObjectCommand, GetObjectCommand } = re const { DynamoDBClient, CreateTableCommand, PutItemCommand } = require('@aws-sdk/client-dynamodb'); const { SQSClient, CreateQueueCommand, SendMessageCommand, ReceiveMessageCommand } = require('@aws-sdk/client-sqs'); const { KinesisClient, CreateStreamCommand, PutRecordCommand } = require('@aws-sdk/client-kinesis'); -const fetch = require('node-fetch'); const { BedrockClient, GetGuardrailCommand } = require('@aws-sdk/client-bedrock'); const { BedrockAgentClient, GetKnowledgeBaseCommand, GetDataSourceCommand, GetAgentCommand } = require('@aws-sdk/client-bedrock-agent'); const { BedrockRuntimeClient, InvokeModelCommand } = require('@aws-sdk/client-bedrock-runtime'); @@ -553,30 +552,190 @@ async function handleBedrockRequest(req, res, path) { }); res.statusCode = 200; } else if (path.includes('invokemodel/invoke-model')) { - await withInjected200Success(bedrockRuntimeClient, ['InvokeModelCommand'], {}, async () => { - const modelId = 'amazon.titan-text-premier-v1:0'; - const userMessage = "Describe the purpose of a 'hello world' program in one line."; - const prompt = `[INST] ${userMessage} [/INST]`; - - const body = JSON.stringify({ - inputText: prompt, - textGenerationConfig: { - maxTokenCount: 3072, - stopSequences: [], - temperature: 0.7, - topP: 0.9, - }, - }); + const get_model_request_response = function () { + const prompt = "Describe the purpose of a 'hello world' program in one line."; + let modelId = '' + let request_body = {} + let response_body = {} + + if (path.includes('amazon.titan')) { + + modelId = 'amazon.titan-text-premier-v1:0'; + + request_body = { + inputText: prompt, + textGenerationConfig: { + maxTokenCount: 3072, + stopSequences: [], + temperature: 0.7, + topP: 0.9, + }, + }; + + response_body = { + inputTextTokenCount: 15, + results: [ + { + tokenCount: 13, + outputText: 'text-test-response', + completionReason: 'CONTENT_FILTERED', + }, + ], + } + + } + + if (path.includes('anthropic.claude')) { + + modelId = 'anthropic.claude-v2:1'; + + request_body = { + anthropic_version: 'bedrock-2023-05-31', + max_tokens: 1000, + temperature: 0.99, + top_p: 1, + messages: [ + { + role: 'user', + content: [{ type: 'text', text: prompt }], + }, + ], + }; + + response_body = { + stop_reason: 'end_turn', + usage: { + input_tokens: 15, + output_tokens: 13, + }, + } + } + + if (path.includes('meta.llama')) { + modelId = 'meta.llama2-13b-chat-v1'; + + request_body = { + prompt, + max_gen_len: 512, + temperature: 0.5, + top_p: 0.9 + }; + + response_body = { + prompt_token_count: 31, + generation_token_count: 49, + stop_reason: 'stop' + } + } + + if (path.includes('cohere.command')) { + modelId = 'cohere.command-light-text-v14'; + + request_body = { + prompt, + max_tokens: 512, + temperature: 0.5, + p: 0.65, + }; + + response_body = { + generations: [ + { + finish_reason: 'COMPLETE', + text: 'test-generation-text', + }, + ], + prompt: prompt, + }; + } + + if (path.includes('cohere.command-r')) { + modelId = 'cohere.command-r-v1:0'; + + request_body = { + message: prompt, + max_tokens: 512, + temperature: 0.5, + p: 0.65, + }; + + response_body = { + finish_reason: 'COMPLETE', + text: 'test-generation-text', + prompt: prompt, + request: { + commandInput: { + modelId: modelId, + }, + }, + } + } + + if (path.includes('ai21.jamba')) { + modelId = 'ai21.jamba-1-5-large-v1:0'; + + request_body = { + messages: [ + { + role: 'user', + content: prompt, + }, + ], + top_p: 0.8, + temperature: 0.6, + max_tokens: 512, + }; + + response_body = { + stop_reason: 'end_turn', + usage: { + prompt_tokens: 21, + completion_tokens: 24, + }, + choices: [ + { + finish_reason: 'stop', + }, + ], + } + } + + if (path.includes('mistral')) { + modelId = 'mistral.mistral-7b-instruct-v0:2'; + + request_body = { + prompt, + max_tokens: 4096, + temperature: 0.75, + top_p: 0.99, + }; + + response_body = { + outputs: [ + { + text: 'test-output-text', + stop_reason: 'stop', + }, + ] + } + } + + return [modelId, JSON.stringify(request_body), new TextEncoder().encode(JSON.stringify(response_body))] + } + + const [modelId, request_body, response_body] = get_model_request_response(); + await withInjected200Success(bedrockRuntimeClient, ['InvokeModelCommand'], { body: response_body }, async () => { await bedrockRuntimeClient.send( new InvokeModelCommand({ - body: body, + body: request_body, modelId: modelId, accept: 'application/json', contentType: 'application/json', }) ); }); + res.statusCode = 200; } else { res.statusCode = 404; @@ -624,3 +783,4 @@ prepareAwsServer().then(() => { console.log('Ready'); }); }); + diff --git a/contract-tests/tests/test/amazon/aws-sdk/aws_sdk_test.py b/contract-tests/tests/test/amazon/aws-sdk/aws_sdk_test.py index bb2ce3ca..31f72d2a 100644 --- a/contract-tests/tests/test/amazon/aws-sdk/aws_sdk_test.py +++ b/contract-tests/tests/test/amazon/aws-sdk/aws_sdk_test.py @@ -1,6 +1,7 @@ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 from logging import INFO, Logger, getLogger +import math from typing import Dict, List from docker.types import EndpointConfig @@ -34,7 +35,12 @@ _AWS_BEDROCK_KNOWLEDGE_BASE_ID: str = "aws.bedrock.knowledge_base.id" _AWS_BEDROCK_DATA_SOURCE_ID: str = "aws.bedrock.data_source.id" _GEN_AI_REQUEST_MODEL: str = "gen_ai.request.model" - +_GEN_AI_REQUEST_TEMPERATURE: str = "gen_ai.request.temperature" +_GEN_AI_REQUEST_TOP_P: str = "gen_ai.request.top_p" +_GEN_AI_REQUEST_MAX_TOKENS: str = "gen_ai.request.max_tokens" +_GEN_AI_RESPONSE_FINISH_REASONS: str = "gen_ai.response.finish_reasons" +_GEN_AI_USAGE_INPUT_TOKENS: str = 'gen_ai.usage.input_tokens' +_GEN_AI_USAGE_OUTPUT_TOKENS: str = 'gen_ai.usage.output_tokens' # pylint: disable=too-many-public-methods class AWSSDKTest(ContractTestBase): @@ -406,9 +412,37 @@ def test_kinesis_fault(self): span_name="Kinesis.PutRecord", ) - def test_bedrock_runtime_invoke_model(self): + def test_bedrock_runtime_invoke_model_amazon_titan(self): + result = self.do_test_requests( + "bedrock/invokemodel/invoke-model/amazon.titan-text-premier-v1:0", + "GET", + 200, + 0, + 0, + local_operation="GET /bedrock", + rpc_service="BedrockRuntime", + remote_service="AWS::BedrockRuntime", + remote_operation="InvokeModel", + remote_resource_type="AWS::Bedrock::Model", + remote_resource_identifier='amazon.titan-text-premier-v1:0', + request_specific_attributes={ + _GEN_AI_REQUEST_MODEL: 'amazon.titan-text-premier-v1:0', + _GEN_AI_REQUEST_MAX_TOKENS: 3072, + _GEN_AI_REQUEST_TEMPERATURE: 0.7, + _GEN_AI_REQUEST_TOP_P: 0.9 + }, + response_specific_attributes={ + _GEN_AI_RESPONSE_FINISH_REASONS: ['CONTENT_FILTERED'], + _GEN_AI_USAGE_INPUT_TOKENS: 15, + _GEN_AI_USAGE_OUTPUT_TOKENS: 13 + }, + + span_name="BedrockRuntime.InvokeModel" + ) + + def test_bedrock_runtime_invoke_model_anthropic_claude(self): self.do_test_requests( - "bedrock/invokemodel/invoke-model", + "bedrock/invokemodel/invoke-model/anthropic.claude-v2:1", "GET", 200, 0, @@ -418,13 +452,157 @@ def test_bedrock_runtime_invoke_model(self): remote_service="AWS::BedrockRuntime", remote_operation="InvokeModel", remote_resource_type="AWS::Bedrock::Model", - remote_resource_identifier="amazon.titan-text-premier-v1:0", + remote_resource_identifier='anthropic.claude-v2:1', request_specific_attributes={ - _GEN_AI_REQUEST_MODEL: "amazon.titan-text-premier-v1:0", - }, - span_name="BedrockRuntime.InvokeModel", + _GEN_AI_REQUEST_MODEL: 'anthropic.claude-v2:1', + _GEN_AI_REQUEST_MAX_TOKENS: 1000, + _GEN_AI_REQUEST_TEMPERATURE: 0.99, + _GEN_AI_REQUEST_TOP_P: 1 + }, + response_specific_attributes={ + _GEN_AI_RESPONSE_FINISH_REASONS: ['end_turn'], + _GEN_AI_USAGE_INPUT_TOKENS: 15, + _GEN_AI_USAGE_OUTPUT_TOKENS: 13 + }, + span_name="BedrockRuntime.InvokeModel" + ) + + def test_bedrock_runtime_invoke_model_meta_llama(self): + self.do_test_requests( + "bedrock/invokemodel/invoke-model/meta.llama2-13b-chat-v1", + "GET", + 200, + 0, + 0, + local_operation="GET /bedrock", + rpc_service="BedrockRuntime", + remote_service="AWS::BedrockRuntime", + remote_operation="InvokeModel", + remote_resource_type="AWS::Bedrock::Model", + remote_resource_identifier='meta.llama2-13b-chat-v1', + request_specific_attributes={ + _GEN_AI_REQUEST_MODEL: 'meta.llama2-13b-chat-v1', + _GEN_AI_REQUEST_MAX_TOKENS: 512, + _GEN_AI_REQUEST_TEMPERATURE: 0.5, + _GEN_AI_REQUEST_TOP_P: 0.9 + }, + response_specific_attributes={ + _GEN_AI_RESPONSE_FINISH_REASONS: ['stop'], + _GEN_AI_USAGE_INPUT_TOKENS: 31, + _GEN_AI_USAGE_OUTPUT_TOKENS: 49 + }, + span_name="BedrockRuntime.InvokeModel" + ) + + def test_bedrock_runtime_invoke_model_cohere_command_r(self): + self.do_test_requests( + "bedrock/invokemodel/invoke-model/cohere.command-r-v1:0", + "GET", + 200, + 0, + 0, + local_operation="GET /bedrock", + rpc_service="BedrockRuntime", + remote_service="AWS::BedrockRuntime", + remote_operation="InvokeModel", + remote_resource_type="AWS::Bedrock::Model", + remote_resource_identifier='cohere.command-r-v1:0', + request_specific_attributes={ + _GEN_AI_REQUEST_MODEL: 'cohere.command-r-v1:0', + _GEN_AI_REQUEST_MAX_TOKENS: 512, + _GEN_AI_REQUEST_TEMPERATURE: 0.5, + _GEN_AI_REQUEST_TOP_P: 0.65 + }, + response_specific_attributes={ + _GEN_AI_RESPONSE_FINISH_REASONS: ['COMPLETE'], + _GEN_AI_USAGE_INPUT_TOKENS: math.ceil(len("Describe the purpose of a 'hello world' program in one line.") / 6), + _GEN_AI_USAGE_OUTPUT_TOKENS: math.ceil(len("test-generation-text") / 6) + }, + span_name="BedrockRuntime.InvokeModel" ) + # Delete once this model is fully deprecated on node + def test_bedrock_runtime_invoke_model_cohere_command(self): + self.do_test_requests( + "bedrock/invokemodel/invoke-model/cohere.command-light-text-v14", + "GET", + 200, + 0, + 0, + local_operation="GET /bedrock", + rpc_service="BedrockRuntime", + remote_service="AWS::BedrockRuntime", + remote_operation="InvokeModel", + remote_resource_type="AWS::Bedrock::Model", + remote_resource_identifier='cohere.command-light-text-v14', + request_specific_attributes={ + _GEN_AI_REQUEST_MODEL: 'cohere.command-light-text-v14', + _GEN_AI_REQUEST_MAX_TOKENS: 512, + _GEN_AI_REQUEST_TEMPERATURE: 0.5, + _GEN_AI_REQUEST_TOP_P: 0.65 + }, + response_specific_attributes={ + _GEN_AI_RESPONSE_FINISH_REASONS: ['COMPLETE'], + _GEN_AI_USAGE_INPUT_TOKENS: math.ceil(len("Describe the purpose of a 'hello world' program in one line.") / 6), + _GEN_AI_USAGE_OUTPUT_TOKENS: math.ceil(len("test-generation-text") / 6) + }, + span_name="BedrockRuntime.InvokeModel" + ) + + def test_bedrock_runtime_invoke_model_ai21_jamba(self): + self.do_test_requests( + "bedrock/invokemodel/invoke-model/ai21.jamba-1-5-large-v1:0", + "GET", + 200, + 0, + 0, + local_operation="GET /bedrock", + rpc_service="BedrockRuntime", + remote_service="AWS::BedrockRuntime", + remote_operation="InvokeModel", + remote_resource_type="AWS::Bedrock::Model", + remote_resource_identifier='ai21.jamba-1-5-large-v1:0', + request_specific_attributes={ + _GEN_AI_REQUEST_MODEL: 'ai21.jamba-1-5-large-v1:0', + _GEN_AI_REQUEST_MAX_TOKENS: 512, + _GEN_AI_REQUEST_TEMPERATURE: 0.6, + _GEN_AI_REQUEST_TOP_P: 0.8 + }, + response_specific_attributes={ + _GEN_AI_RESPONSE_FINISH_REASONS: ['stop'], + _GEN_AI_USAGE_INPUT_TOKENS: 21, + _GEN_AI_USAGE_OUTPUT_TOKENS: 24 + }, + span_name="BedrockRuntime.InvokeModel" + ) + + def test_bedrock_runtime_invoke_model_mistral_mistral(self): + self.do_test_requests( + "bedrock/invokemodel/invoke-model/mistral.mistral-7b-instruct-v0:2", + "GET", + 200, + 0, + 0, + local_operation="GET /bedrock", + rpc_service="BedrockRuntime", + remote_service="AWS::BedrockRuntime", + remote_operation="InvokeModel", + remote_resource_type="AWS::Bedrock::Model", + remote_resource_identifier='mistral.mistral-7b-instruct-v0:2', + request_specific_attributes={ + _GEN_AI_REQUEST_MODEL: 'mistral.mistral-7b-instruct-v0:2', + _GEN_AI_REQUEST_MAX_TOKENS: 4096, + _GEN_AI_REQUEST_TEMPERATURE: 0.75, + _GEN_AI_REQUEST_TOP_P: 0.99 + }, + response_specific_attributes={ + _GEN_AI_RESPONSE_FINISH_REASONS: ['stop'], + _GEN_AI_USAGE_INPUT_TOKENS: math.ceil(len("Describe the purpose of a 'hello world' program in one line.") / 6), + _GEN_AI_USAGE_OUTPUT_TOKENS: math.ceil(len("test-output-text") / 6) + }, + span_name="BedrockRuntime.InvokeModel" + ) + def test_bedrock_get_guardrail(self): self.do_test_requests( "bedrock/getguardrail/get-guardrail", @@ -591,6 +769,7 @@ def _assert_aws_attributes( def _assert_semantic_conventions_span_attributes( self, resource_scope_spans: List[ResourceScopeSpan], method: str, path: str, status_code: int, **kwargs ) -> None: + target_spans: List[Span] = [] for resource_scope_span in resource_scope_spans: # pylint: disable=no-member @@ -606,6 +785,7 @@ def _assert_semantic_conventions_span_attributes( kwargs.get("remote_operation"), status_code, kwargs.get("request_specific_attributes", {}), + kwargs.get("response_specific_attributes", {}), ) # pylint: disable=unidiomatic-typecheck @@ -616,6 +796,7 @@ def _assert_semantic_conventions_attributes( operation: str, status_code: int, request_specific_attributes: dict, + response_specific_attributes: dict, ) -> None: attributes_dict: Dict[str, AnyValue] = self._get_attributes_dict(attributes_list) self._assert_str_attribute(attributes_dict, SpanAttributes.RPC_METHOD, operation) @@ -624,11 +805,17 @@ def _assert_semantic_conventions_attributes( self._assert_int_attribute(attributes_dict, SpanAttributes.HTTP_STATUS_CODE, status_code) # TODO: aws sdk instrumentation is not respecting PEER_SERVICE # self._assert_str_attribute(attributes_dict, SpanAttributes.PEER_SERVICE, "backend:8080") - for key, value in request_specific_attributes.items(): + self._assert_specific_attributes(attributes_dict, request_specific_attributes) + self._assert_specific_attributes(attributes_dict, response_specific_attributes) + + def _assert_specific_attributes(self, attributes_dict: Dict[str, AnyValue], specific_attributes: Dict[str, AnyValue]) -> None: + for key, value in specific_attributes.items(): if isinstance(value, str): self._assert_str_attribute(attributes_dict, key, value) elif isinstance(value, int): self._assert_int_attribute(attributes_dict, key, value) + elif isinstance(value, float): + self._assert_float_attribute(attributes_dict, key, value) else: self._assert_array_value_ddb_table_name(attributes_dict, key, value) diff --git a/contract-tests/tests/test/amazon/base/contract_test_base.py b/contract-tests/tests/test/amazon/base/contract_test_base.py index af6557c7..ea522f8e 100644 --- a/contract-tests/tests/test/amazon/base/contract_test_base.py +++ b/contract-tests/tests/test/amazon/base/contract_test_base.py @@ -230,6 +230,12 @@ def _assert_int_attribute(self, attributes_dict: Dict[str, AnyValue], key: str, self.assertIsNotNone(actual_value) self.assertEqual(expected_value, actual_value.int_value) + def _assert_float_attribute(self, attributes_dict: Dict[str, AnyValue], key: str, expected_value: float) -> None: + self.assertIn(key, attributes_dict) + actual_value: AnyValue = attributes_dict[key] + self.assertIsNotNone(actual_value) + self.assertEqual(expected_value, actual_value.double_value) + def check_sum(self, metric_name: str, actual_sum: float, expected_sum: float) -> None: if metric_name is LATENCY_METRIC: self.assertTrue(0 < actual_sum < expected_sum)