From b53d557276c689508a7f8fbd910dfcf7a5892fb6 Mon Sep 17 00:00:00 2001 From: liustve Date: Tue, 5 Nov 2024 21:26:38 +0000 Subject: [PATCH 1/6] - added support for additional gen ai attributes - added contract tests for amazon titan and anthropic claude models - added support to compare float attributes --- .../images/applications/aws-sdk/server.js | 41 +++++++++++++---- .../tests/test/amazon/aws-sdk/aws_sdk_test.py | 46 ++++++++++++++++--- .../test/amazon/base/contract_test_base.py | 6 +++ 3 files changed, 77 insertions(+), 16 deletions(-) diff --git a/contract-tests/images/applications/aws-sdk/server.js b/contract-tests/images/applications/aws-sdk/server.js index 0a841550..d1ce4222 100644 --- a/contract-tests/images/applications/aws-sdk/server.js +++ b/contract-tests/images/applications/aws-sdk/server.js @@ -554,19 +554,39 @@ async function handleBedrockRequest(req, res, path) { res.statusCode = 200; } else if (path.includes('invokemodel/invoke-model')) { await withInjected200Success(bedrockRuntimeClient, ['InvokeModelCommand'], {}, async () => { - const modelId = 'amazon.titan-text-premier-v1:0'; + let modelId = '' + let body = {} const userMessage = "Describe the purpose of a 'hello world' program in one line."; const prompt = `[INST] ${userMessage} [/INST]`; - const body = JSON.stringify({ - inputText: prompt, - textGenerationConfig: { - maxTokenCount: 3072, - stopSequences: [], - temperature: 0.7, - topP: 0.9, - }, - }); + if (path.includes('amazon.titan')) { + modelId = 'amazon.titan-text-premier-v1:0'; + body = JSON.stringify({ + inputText: prompt, + textGenerationConfig: { + maxTokenCount: 3072, + stopSequences: [], + temperature: 0.7, + topP: 0.9, + }, + }); + } + + if (path.includes('anthropic.claude')) { + modelId = 'anthropic.claude-v2:1'; + body = JSON.stringify({ + anthropic_version: 'bedrock-2023-05-31', + max_tokens: 1000, + temperature: 1.1, + top_p: 1, + messages: [ + { + role: 'user', + content: [{ type: 'text', text: prompt }], + }, + ], + }); + } await bedrockRuntimeClient.send( new InvokeModelCommand({ @@ -577,6 +597,7 @@ async function handleBedrockRequest(req, res, path) { }) ); }); + res.statusCode = 200; } else { res.statusCode = 404; diff --git a/contract-tests/tests/test/amazon/aws-sdk/aws_sdk_test.py b/contract-tests/tests/test/amazon/aws-sdk/aws_sdk_test.py index bb2ce3ca..ffd0e852 100644 --- a/contract-tests/tests/test/amazon/aws-sdk/aws_sdk_test.py +++ b/contract-tests/tests/test/amazon/aws-sdk/aws_sdk_test.py @@ -34,6 +34,9 @@ _AWS_BEDROCK_KNOWLEDGE_BASE_ID: str = "aws.bedrock.knowledge_base.id" _AWS_BEDROCK_DATA_SOURCE_ID: str = "aws.bedrock.data_source.id" _GEN_AI_REQUEST_MODEL: str = "gen_ai.request.model" +_GEN_AI_REQUEST_TEMPERATURE: str = "gen_ai.request.temperature" +_GEN_AI_REQUEST_TOP_P: str = "gen_ai.request.top_p" +_GEN_AI_REQUEST_MAX_TOKENS: str = "gen_ai.request.max_tokens" # pylint: disable=too-many-public-methods @@ -406,9 +409,9 @@ def test_kinesis_fault(self): span_name="Kinesis.PutRecord", ) - def test_bedrock_runtime_invoke_model(self): + def test_bedrock_runtime_invoke_model_amazon_titan(self): self.do_test_requests( - "bedrock/invokemodel/invoke-model", + "bedrock/invokemodel/invoke-model/amazon.titan-text-premier-v1:0", "GET", 200, 0, @@ -418,11 +421,36 @@ def test_bedrock_runtime_invoke_model(self): remote_service="AWS::BedrockRuntime", remote_operation="InvokeModel", remote_resource_type="AWS::Bedrock::Model", - remote_resource_identifier="amazon.titan-text-premier-v1:0", + remote_resource_identifier='amazon.titan-text-premier-v1:0', request_specific_attributes={ - _GEN_AI_REQUEST_MODEL: "amazon.titan-text-premier-v1:0", - }, - span_name="BedrockRuntime.InvokeModel", + _GEN_AI_REQUEST_MODEL: 'amazon.titan-text-premier-v1:0', + _GEN_AI_REQUEST_MAX_TOKENS: 3072, + _GEN_AI_REQUEST_TEMPERATURE: 0.7, + _GEN_AI_REQUEST_TOP_P: 0.9 + }, + span_name="BedrockRuntime.InvokeModel" + ) + + def test_bedrock_runtime_invoke_model_anthropic_claude(self): + self.do_test_requests( + "bedrock/invokemodel/invoke-model/anthropic.claude-v2:1", + "GET", + 200, + 0, + 0, + local_operation="GET /bedrock", + rpc_service="BedrockRuntime", + remote_service="AWS::BedrockRuntime", + remote_operation="InvokeModel", + remote_resource_type="AWS::Bedrock::Model", + remote_resource_identifier='anthropic.claude-v2:1', + request_specific_attributes={ + _GEN_AI_REQUEST_MODEL: 'anthropic.claude-v2:1', + _GEN_AI_REQUEST_MAX_TOKENS: 1000, + _GEN_AI_REQUEST_TEMPERATURE: 1.1, + _GEN_AI_REQUEST_TOP_P: 1 + }, + span_name="BedrockRuntime.InvokeModel" ) def test_bedrock_get_guardrail(self): @@ -538,6 +566,9 @@ def test_bedrock_agent_get_data_source(self): }, span_name="BedrockAgent.GetDataSource", ) + + # def test_bedrock_agent_runtime_invoke_agent(self): + # return None @override def _assert_aws_span_attributes(self, resource_scope_spans: List[ResourceScopeSpan], path: str, **kwargs) -> None: @@ -591,6 +622,7 @@ def _assert_aws_attributes( def _assert_semantic_conventions_span_attributes( self, resource_scope_spans: List[ResourceScopeSpan], method: str, path: str, status_code: int, **kwargs ) -> None: + target_spans: List[Span] = [] for resource_scope_span in resource_scope_spans: # pylint: disable=no-member @@ -629,6 +661,8 @@ def _assert_semantic_conventions_attributes( self._assert_str_attribute(attributes_dict, key, value) elif isinstance(value, int): self._assert_int_attribute(attributes_dict, key, value) + elif isinstance(value, float): + self._assert_float_attribute(attributes_dict, key, value) else: self._assert_array_value_ddb_table_name(attributes_dict, key, value) diff --git a/contract-tests/tests/test/amazon/base/contract_test_base.py b/contract-tests/tests/test/amazon/base/contract_test_base.py index af6557c7..ea522f8e 100644 --- a/contract-tests/tests/test/amazon/base/contract_test_base.py +++ b/contract-tests/tests/test/amazon/base/contract_test_base.py @@ -230,6 +230,12 @@ def _assert_int_attribute(self, attributes_dict: Dict[str, AnyValue], key: str, self.assertIsNotNone(actual_value) self.assertEqual(expected_value, actual_value.int_value) + def _assert_float_attribute(self, attributes_dict: Dict[str, AnyValue], key: str, expected_value: float) -> None: + self.assertIn(key, attributes_dict) + actual_value: AnyValue = attributes_dict[key] + self.assertIsNotNone(actual_value) + self.assertEqual(expected_value, actual_value.double_value) + def check_sum(self, metric_name: str, actual_sum: float, expected_sum: float) -> None: if metric_name is LATENCY_METRIC: self.assertTrue(0 < actual_sum < expected_sum) From 60eec2841061d928c5aa6b9da42c8a0c2d5a5ad9 Mon Sep 17 00:00:00 2001 From: liustve Date: Tue, 5 Nov 2024 23:08:30 +0000 Subject: [PATCH 2/6] adding tests for rest of the models --- .../images/applications/aws-sdk/server.js | 49 +++++++++- .../tests/test/amazon/aws-sdk/aws_sdk_test.py | 90 ++++++++++++++++++- 2 files changed, 136 insertions(+), 3 deletions(-) diff --git a/contract-tests/images/applications/aws-sdk/server.js b/contract-tests/images/applications/aws-sdk/server.js index d1ce4222..510f4c65 100644 --- a/contract-tests/images/applications/aws-sdk/server.js +++ b/contract-tests/images/applications/aws-sdk/server.js @@ -577,7 +577,7 @@ async function handleBedrockRequest(req, res, path) { body = JSON.stringify({ anthropic_version: 'bedrock-2023-05-31', max_tokens: 1000, - temperature: 1.1, + temperature: 0.99, top_p: 1, messages: [ { @@ -586,7 +586,52 @@ async function handleBedrockRequest(req, res, path) { }, ], }); - } + } + + if (path.includes('meta.llama')) { + modelId = 'meta.llama2-13b-chat-v1'; + body = JSON.stringify({ + prompt, + max_gen_len: 512, + temperature: 0.5, + top_p: 0.9 + }); + } + + if (path.includes('cohere.command')) { + modelId = 'cohere.command-light-text-v14'; + body = JSON.stringify({ + prompt, + max_tokens: 512, + temperature: 0.5, + p: 0.65, + }); + } + + if (path.includes('ai21.jamba')) { + modelId = 'ai21.jamba-1-5-large-v1:0'; + body = JSON.stringify({ + messages: [ + { + role: 'user', + content: prompt, + }, + ], + top_p: 0.8, + temperature: 0.6, + max_tokens: 512, + }); + } + + if (path.includes('mistral.mistral')) { + modelId = 'mistral.mistral-7b-instruct-v0:2'; + body = JSON.stringify({ + prompt, + max_tokens: 4096, + temperature: 0.75, + top_p: 0.99, + }); + } await bedrockRuntimeClient.send( new InvokeModelCommand({ diff --git a/contract-tests/tests/test/amazon/aws-sdk/aws_sdk_test.py b/contract-tests/tests/test/amazon/aws-sdk/aws_sdk_test.py index ffd0e852..7fa8c4f5 100644 --- a/contract-tests/tests/test/amazon/aws-sdk/aws_sdk_test.py +++ b/contract-tests/tests/test/amazon/aws-sdk/aws_sdk_test.py @@ -447,12 +447,100 @@ def test_bedrock_runtime_invoke_model_anthropic_claude(self): request_specific_attributes={ _GEN_AI_REQUEST_MODEL: 'anthropic.claude-v2:1', _GEN_AI_REQUEST_MAX_TOKENS: 1000, - _GEN_AI_REQUEST_TEMPERATURE: 1.1, + _GEN_AI_REQUEST_TEMPERATURE: 0.99, _GEN_AI_REQUEST_TOP_P: 1 }, span_name="BedrockRuntime.InvokeModel" ) + def test_bedrock_runtime_invoke_model_meta_llama(self): + self.do_test_requests( + "bedrock/invokemodel/invoke-model/meta.llama2-13b-chat-v1", + "GET", + 200, + 0, + 0, + local_operation="GET /bedrock", + rpc_service="BedrockRuntime", + remote_service="AWS::BedrockRuntime", + remote_operation="InvokeModel", + remote_resource_type="AWS::Bedrock::Model", + remote_resource_identifier='meta.llama2-13b-chat-v1', + request_specific_attributes={ + _GEN_AI_REQUEST_MODEL: 'meta.llama2-13b-chat-v1', + _GEN_AI_REQUEST_MAX_TOKENS: 512, + _GEN_AI_REQUEST_TEMPERATURE: 0.5, + _GEN_AI_REQUEST_TOP_P: 0.9 + }, + span_name="BedrockRuntime.InvokeModel" + ) + + def test_bedrock_runtime_invoke_model_cohere_command(self): + self.do_test_requests( + "bedrock/invokemodel/invoke-model/cohere.command-light-text-v14", + "GET", + 200, + 0, + 0, + local_operation="GET /bedrock", + rpc_service="BedrockRuntime", + remote_service="AWS::BedrockRuntime", + remote_operation="InvokeModel", + remote_resource_type="AWS::Bedrock::Model", + remote_resource_identifier='cohere.command-light-text-v14', + request_specific_attributes={ + _GEN_AI_REQUEST_MODEL: 'cohere.command-light-text-v14', + _GEN_AI_REQUEST_MAX_TOKENS: 512, + _GEN_AI_REQUEST_TEMPERATURE: 0.5, + _GEN_AI_REQUEST_TOP_P: 0.65 + }, + span_name="BedrockRuntime.InvokeModel" + ) + + def test_bedrock_runtime_invoke_model_ai21_jamba(self): + self.do_test_requests( + "bedrock/invokemodel/invoke-model/ai21.jamba-1-5-large-v1:0", + "GET", + 200, + 0, + 0, + local_operation="GET /bedrock", + rpc_service="BedrockRuntime", + remote_service="AWS::BedrockRuntime", + remote_operation="InvokeModel", + remote_resource_type="AWS::Bedrock::Model", + remote_resource_identifier='ai21.jamba-1-5-large-v1:0', + request_specific_attributes={ + _GEN_AI_REQUEST_MODEL: 'ai21.jamba-1-5-large-v1:0', + _GEN_AI_REQUEST_MAX_TOKENS: 512, + _GEN_AI_REQUEST_TEMPERATURE: 0.6, + _GEN_AI_REQUEST_TOP_P: 0.8 + }, + span_name="BedrockRuntime.InvokeModel" + ) + + def test_bedrock_runtime_invoke_model_mistral_mistral(self): + self.do_test_requests( + "bedrock/invokemodel/invoke-model/mistral.mistral-7b-instruct-v0:2", + "GET", + 200, + 0, + 0, + local_operation="GET /bedrock", + rpc_service="BedrockRuntime", + remote_service="AWS::BedrockRuntime", + remote_operation="InvokeModel", + remote_resource_type="AWS::Bedrock::Model", + remote_resource_identifier='mistral.mistral-7b-instruct-v0:2', + request_specific_attributes={ + _GEN_AI_REQUEST_MODEL: 'mistral.mistral-7b-instruct-v0:2', + _GEN_AI_REQUEST_MAX_TOKENS: 4096, + _GEN_AI_REQUEST_TEMPERATURE: 0.75, + _GEN_AI_REQUEST_TOP_P: 0.99 + }, + span_name="BedrockRuntime.InvokeModel" + ) + def test_bedrock_get_guardrail(self): self.do_test_requests( "bedrock/getguardrail/get-guardrail", From 4797a7d37797b4eba23c7dc8d4037c11ac8983fe Mon Sep 17 00:00:00 2001 From: liustve Date: Thu, 7 Nov 2024 22:59:10 +0000 Subject: [PATCH 3/6] added assertions for response and usage inference parameter attributes --- .../images/applications/aws-sdk/server.js | 189 ++++++++++++------ .../tests/test/amazon/aws-sdk/aws_sdk_test.py | 51 ++++- 2 files changed, 174 insertions(+), 66 deletions(-) diff --git a/contract-tests/images/applications/aws-sdk/server.js b/contract-tests/images/applications/aws-sdk/server.js index 510f4c65..f0182456 100644 --- a/contract-tests/images/applications/aws-sdk/server.js +++ b/contract-tests/images/applications/aws-sdk/server.js @@ -10,7 +10,6 @@ const { S3Client, CreateBucketCommand, PutObjectCommand, GetObjectCommand } = re const { DynamoDBClient, CreateTableCommand, PutItemCommand } = require('@aws-sdk/client-dynamodb'); const { SQSClient, CreateQueueCommand, SendMessageCommand, ReceiveMessageCommand } = require('@aws-sdk/client-sqs'); const { KinesisClient, CreateStreamCommand, PutRecordCommand } = require('@aws-sdk/client-kinesis'); -const fetch = require('node-fetch'); const { BedrockClient, GetGuardrailCommand } = require('@aws-sdk/client-bedrock'); const { BedrockAgentClient, GetKnowledgeBaseCommand, GetDataSourceCommand, GetAgentCommand } = require('@aws-sdk/client-bedrock-agent'); const { BedrockRuntimeClient, InvokeModelCommand } = require('@aws-sdk/client-bedrock-runtime'); @@ -553,15 +552,17 @@ async function handleBedrockRequest(req, res, path) { }); res.statusCode = 200; } else if (path.includes('invokemodel/invoke-model')) { - await withInjected200Success(bedrockRuntimeClient, ['InvokeModelCommand'], {}, async () => { - let modelId = '' - let body = {} - const userMessage = "Describe the purpose of a 'hello world' program in one line."; - const prompt = `[INST] ${userMessage} [/INST]`; - - if (path.includes('amazon.titan')) { + const get_model_request_response = function () { + const prompt = "Describe the purpose of a 'hello world' program in one line."; + let modelId = '' + let request_body = {} + let response_body = {} + + if (path.includes('amazon.titan')) { + modelId = 'amazon.titan-text-premier-v1:0'; - body = JSON.stringify({ + + request_body = { inputText: prompt, textGenerationConfig: { maxTokenCount: 3072, @@ -569,12 +570,26 @@ async function handleBedrockRequest(req, res, path) { temperature: 0.7, topP: 0.9, }, - }); - } + }; + + response_body = { + inputTextTokenCount: 15, + results: [ + { + tokenCount: 13, + outputText: 'text-test-response', + completionReason: 'CONTENT_FILTERED', + }, + ], + } + + } - if (path.includes('anthropic.claude')) { + if (path.includes('anthropic.claude')) { + modelId = 'anthropic.claude-v2:1'; - body = JSON.stringify({ + + request_body = { anthropic_version: 'bedrock-2023-05-31', max_tokens: 1000, temperature: 0.99, @@ -585,64 +600,120 @@ async function handleBedrockRequest(req, res, path) { content: [{ type: 'text', text: prompt }], }, ], - }); - } - - if (path.includes('meta.llama')) { - modelId = 'meta.llama2-13b-chat-v1'; - body = JSON.stringify({ - prompt, - max_gen_len: 512, - temperature: 0.5, - top_p: 0.9 - }); - } + }; - if (path.includes('cohere.command')) { - modelId = 'cohere.command-light-text-v14'; - body = JSON.stringify({ - prompt, - max_tokens: 512, - temperature: 0.5, - p: 0.65, - }); - } - - if (path.includes('ai21.jamba')) { - modelId = 'ai21.jamba-1-5-large-v1:0'; - body = JSON.stringify({ - messages: [ - { - role: 'user', - content: prompt, + response_body = { + stop_reason: 'end_turn', + usage: { + input_tokens: 15, + output_tokens: 13, }, - ], - top_p: 0.8, - temperature: 0.6, - max_tokens: 512, - }); - } - - if (path.includes('mistral.mistral')) { - modelId = 'mistral.mistral-7b-instruct-v0:2'; - body = JSON.stringify({ - prompt, - max_tokens: 4096, - temperature: 0.75, - top_p: 0.99, - }); + } + } + + if (path.includes('meta.llama')) { + modelId = 'meta.llama2-13b-chat-v1'; + + request_body = { + prompt, + max_gen_len: 512, + temperature: 0.5, + top_p: 0.9 + }; + + response_body = { + prompt_token_count: 31, + generation_token_count: 49, + stop_reason: 'stop' + } + } + + if (path.includes('cohere.command')) { + modelId = 'cohere.command-light-text-v14'; + + request_body = { + prompt, + max_tokens: 512, + temperature: 0.5, + p: 0.65, + }; + + response_body = { + generations: [ + { + finish_reason: 'COMPLETE', + text: 'test-generation-text', + }, + ], + prompt: prompt, + }; + } + + if (path.includes('ai21.jamba')) { + modelId = 'ai21.jamba-1-5-large-v1:0'; + + request_body = { + messages: [ + { + role: 'user', + content: prompt, + }, + ], + top_p: 0.8, + temperature: 0.6, + max_tokens: 512, + }; + + response_body = { + stop_reason: 'end_turn', + usage: { + prompt_tokens: 21, + completion_tokens: 24, + }, + choices: [ + { + finish_reason: 'stop', + }, + ], + } + } + + if (path.includes('mistral.mistral')) { + modelId = 'mistral.mistral-7b-instruct-v0:2'; + + request_body = { + prompt, + max_tokens: 4096, + temperature: 0.75, + top_p: 0.99, + }; + + response_body = { + outputs: [ + { + text: 'test-output-text', + stop_reason: 'stop', + }, + ] + } + } + + return [modelId, JSON.stringify(request_body), new TextEncoder().encode(JSON.stringify(response_body))] } + + const [modelId, request_body, response_body] = get_model_request_response(); + await withInjected200Success(bedrockRuntimeClient, ['InvokeModelCommand'], { body: response_body }, async () => { await bedrockRuntimeClient.send( new InvokeModelCommand({ - body: body, + body: request_body, modelId: modelId, accept: 'application/json', contentType: 'application/json', }) ); }); - + res.statusCode = 200; } else { res.statusCode = 404; diff --git a/contract-tests/tests/test/amazon/aws-sdk/aws_sdk_test.py b/contract-tests/tests/test/amazon/aws-sdk/aws_sdk_test.py index 7fa8c4f5..e69cd13d 100644 --- a/contract-tests/tests/test/amazon/aws-sdk/aws_sdk_test.py +++ b/contract-tests/tests/test/amazon/aws-sdk/aws_sdk_test.py @@ -1,6 +1,7 @@ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 from logging import INFO, Logger, getLogger +import math from typing import Dict, List from docker.types import EndpointConfig @@ -37,7 +38,9 @@ _GEN_AI_REQUEST_TEMPERATURE: str = "gen_ai.request.temperature" _GEN_AI_REQUEST_TOP_P: str = "gen_ai.request.top_p" _GEN_AI_REQUEST_MAX_TOKENS: str = "gen_ai.request.max_tokens" - +_GEN_AI_RESPONSE_FINISH_REASONS: str = "gen_ai.response.finish_reasons" +_GEN_AI_USAGE_INPUT_TOKENS: str = 'gen_ai.usage.input_tokens' +_GEN_AI_USAGE_OUTPUT_TOKENS: str = 'gen_ai.usage.output_tokens' # pylint: disable=too-many-public-methods class AWSSDKTest(ContractTestBase): @@ -410,7 +413,7 @@ def test_kinesis_fault(self): ) def test_bedrock_runtime_invoke_model_amazon_titan(self): - self.do_test_requests( + result = self.do_test_requests( "bedrock/invokemodel/invoke-model/amazon.titan-text-premier-v1:0", "GET", 200, @@ -428,9 +431,15 @@ def test_bedrock_runtime_invoke_model_amazon_titan(self): _GEN_AI_REQUEST_TEMPERATURE: 0.7, _GEN_AI_REQUEST_TOP_P: 0.9 }, + response_specific_attributes={ + _GEN_AI_RESPONSE_FINISH_REASONS: ['CONTENT_FILTERED'], + _GEN_AI_USAGE_INPUT_TOKENS: 15, + _GEN_AI_USAGE_OUTPUT_TOKENS: 13 + }, + span_name="BedrockRuntime.InvokeModel" ) - + def test_bedrock_runtime_invoke_model_anthropic_claude(self): self.do_test_requests( "bedrock/invokemodel/invoke-model/anthropic.claude-v2:1", @@ -450,6 +459,11 @@ def test_bedrock_runtime_invoke_model_anthropic_claude(self): _GEN_AI_REQUEST_TEMPERATURE: 0.99, _GEN_AI_REQUEST_TOP_P: 1 }, + response_specific_attributes={ + _GEN_AI_RESPONSE_FINISH_REASONS: ['end_turn'], + _GEN_AI_USAGE_INPUT_TOKENS: 15, + _GEN_AI_USAGE_OUTPUT_TOKENS: 13 + }, span_name="BedrockRuntime.InvokeModel" ) @@ -472,6 +486,11 @@ def test_bedrock_runtime_invoke_model_meta_llama(self): _GEN_AI_REQUEST_TEMPERATURE: 0.5, _GEN_AI_REQUEST_TOP_P: 0.9 }, + response_specific_attributes={ + _GEN_AI_RESPONSE_FINISH_REASONS: ['stop'], + _GEN_AI_USAGE_INPUT_TOKENS: 31, + _GEN_AI_USAGE_OUTPUT_TOKENS: 49 + }, span_name="BedrockRuntime.InvokeModel" ) @@ -494,6 +513,11 @@ def test_bedrock_runtime_invoke_model_cohere_command(self): _GEN_AI_REQUEST_TEMPERATURE: 0.5, _GEN_AI_REQUEST_TOP_P: 0.65 }, + response_specific_attributes={ + _GEN_AI_RESPONSE_FINISH_REASONS: ['COMPLETE'], + _GEN_AI_USAGE_INPUT_TOKENS: math.ceil(len("Describe the purpose of a 'hello world' program in one line.") / 6), + _GEN_AI_USAGE_OUTPUT_TOKENS: math.ceil(len("test-generation-text") / 6) + }, span_name="BedrockRuntime.InvokeModel" ) @@ -516,6 +540,11 @@ def test_bedrock_runtime_invoke_model_ai21_jamba(self): _GEN_AI_REQUEST_TEMPERATURE: 0.6, _GEN_AI_REQUEST_TOP_P: 0.8 }, + response_specific_attributes={ + _GEN_AI_RESPONSE_FINISH_REASONS: ['stop'], + _GEN_AI_USAGE_INPUT_TOKENS: 21, + _GEN_AI_USAGE_OUTPUT_TOKENS: 24 + }, span_name="BedrockRuntime.InvokeModel" ) @@ -538,6 +567,11 @@ def test_bedrock_runtime_invoke_model_mistral_mistral(self): _GEN_AI_REQUEST_TEMPERATURE: 0.75, _GEN_AI_REQUEST_TOP_P: 0.99 }, + response_specific_attributes={ + _GEN_AI_RESPONSE_FINISH_REASONS: ['stop'], + _GEN_AI_USAGE_INPUT_TOKENS: math.ceil(len("Describe the purpose of a 'hello world' program in one line.") / 6), + _GEN_AI_USAGE_OUTPUT_TOKENS: math.ceil(len("test-output-text") / 6) + }, span_name="BedrockRuntime.InvokeModel" ) @@ -654,9 +688,6 @@ def test_bedrock_agent_get_data_source(self): }, span_name="BedrockAgent.GetDataSource", ) - - # def test_bedrock_agent_runtime_invoke_agent(self): - # return None @override def _assert_aws_span_attributes(self, resource_scope_spans: List[ResourceScopeSpan], path: str, **kwargs) -> None: @@ -726,6 +757,7 @@ def _assert_semantic_conventions_span_attributes( kwargs.get("remote_operation"), status_code, kwargs.get("request_specific_attributes", {}), + kwargs.get("response_specific_attributes", {}), ) # pylint: disable=unidiomatic-typecheck @@ -736,6 +768,7 @@ def _assert_semantic_conventions_attributes( operation: str, status_code: int, request_specific_attributes: dict, + response_specific_attributes: dict, ) -> None: attributes_dict: Dict[str, AnyValue] = self._get_attributes_dict(attributes_list) self._assert_str_attribute(attributes_dict, SpanAttributes.RPC_METHOD, operation) @@ -744,7 +777,11 @@ def _assert_semantic_conventions_attributes( self._assert_int_attribute(attributes_dict, SpanAttributes.HTTP_STATUS_CODE, status_code) # TODO: aws sdk instrumentation is not respecting PEER_SERVICE # self._assert_str_attribute(attributes_dict, SpanAttributes.PEER_SERVICE, "backend:8080") - for key, value in request_specific_attributes.items(): + self._assert_specific_attributes(attributes_dict, request_specific_attributes) + self._assert_specific_attributes(attributes_dict, response_specific_attributes) + + def _assert_specific_attributes(self, attributes_dict: Dict[str, AnyValue], specific_attributes: Dict[str, AnyValue]) -> None: + for key, value in specific_attributes.items(): if isinstance(value, str): self._assert_str_attribute(attributes_dict, key, value) elif isinstance(value, int): From 506481dff5bcfaca95ffe9f1388cf4c85d9c25e7 Mon Sep 17 00:00:00 2001 From: liustve Date: Thu, 7 Nov 2024 23:24:32 +0000 Subject: [PATCH 4/6] empty test --- test | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 test diff --git a/test b/test new file mode 100644 index 00000000..e69de29b From 2d94b6ef47a13e8618bd495dcf3df2e2f0d0241a Mon Sep 17 00:00:00 2001 From: liustve Date: Thu, 7 Nov 2024 23:30:28 +0000 Subject: [PATCH 5/6] rerun --- test | 0 1 file changed, 0 insertions(+), 0 deletions(-) delete mode 100644 test diff --git a/test b/test deleted file mode 100644 index e69de29b..00000000 From f8c036f0c6322d325d135dbe4a15228fcbf5e919 Mon Sep 17 00:00:00 2001 From: liustve Date: Tue, 12 Nov 2024 21:54:27 +0000 Subject: [PATCH 6/6] add contract tests for cohere command-r model --- .../images/applications/aws-sdk/server.js | 25 ++++++++++++++++- .../tests/test/amazon/aws-sdk/aws_sdk_test.py | 28 +++++++++++++++++++ 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/contract-tests/images/applications/aws-sdk/server.js b/contract-tests/images/applications/aws-sdk/server.js index f0182456..6fd65c7d 100644 --- a/contract-tests/images/applications/aws-sdk/server.js +++ b/contract-tests/images/applications/aws-sdk/server.js @@ -648,6 +648,28 @@ async function handleBedrockRequest(req, res, path) { prompt: prompt, }; } + + if (path.includes('cohere.command-r')) { + modelId = 'cohere.command-r-v1:0'; + + request_body = { + message: prompt, + max_tokens: 512, + temperature: 0.5, + p: 0.65, + }; + + response_body = { + finish_reason: 'COMPLETE', + text: 'test-generation-text', + prompt: prompt, + request: { + commandInput: { + modelId: modelId, + }, + }, + } + } if (path.includes('ai21.jamba')) { modelId = 'ai21.jamba-1-5-large-v1:0'; @@ -678,7 +700,7 @@ async function handleBedrockRequest(req, res, path) { } } - if (path.includes('mistral.mistral')) { + if (path.includes('mistral')) { modelId = 'mistral.mistral-7b-instruct-v0:2'; request_body = { @@ -761,3 +783,4 @@ prepareAwsServer().then(() => { console.log('Ready'); }); }); + diff --git a/contract-tests/tests/test/amazon/aws-sdk/aws_sdk_test.py b/contract-tests/tests/test/amazon/aws-sdk/aws_sdk_test.py index e69cd13d..31f72d2a 100644 --- a/contract-tests/tests/test/amazon/aws-sdk/aws_sdk_test.py +++ b/contract-tests/tests/test/amazon/aws-sdk/aws_sdk_test.py @@ -493,7 +493,35 @@ def test_bedrock_runtime_invoke_model_meta_llama(self): }, span_name="BedrockRuntime.InvokeModel" ) + + def test_bedrock_runtime_invoke_model_cohere_command_r(self): + self.do_test_requests( + "bedrock/invokemodel/invoke-model/cohere.command-r-v1:0", + "GET", + 200, + 0, + 0, + local_operation="GET /bedrock", + rpc_service="BedrockRuntime", + remote_service="AWS::BedrockRuntime", + remote_operation="InvokeModel", + remote_resource_type="AWS::Bedrock::Model", + remote_resource_identifier='cohere.command-r-v1:0', + request_specific_attributes={ + _GEN_AI_REQUEST_MODEL: 'cohere.command-r-v1:0', + _GEN_AI_REQUEST_MAX_TOKENS: 512, + _GEN_AI_REQUEST_TEMPERATURE: 0.5, + _GEN_AI_REQUEST_TOP_P: 0.65 + }, + response_specific_attributes={ + _GEN_AI_RESPONSE_FINISH_REASONS: ['COMPLETE'], + _GEN_AI_USAGE_INPUT_TOKENS: math.ceil(len("Describe the purpose of a 'hello world' program in one line.") / 6), + _GEN_AI_USAGE_OUTPUT_TOKENS: math.ceil(len("test-generation-text") / 6) + }, + span_name="BedrockRuntime.InvokeModel" + ) + # Delete once this model is fully deprecated on node def test_bedrock_runtime_invoke_model_cohere_command(self): self.do_test_requests( "bedrock/invokemodel/invoke-model/cohere.command-light-text-v14",