diff --git a/CHANGELOG.md b/CHANGELOG.md index 720d2fccb1..056fe9686b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - `opentelemetry-instrumentation-asyncio` Fix duplicate instrumentation ([#3383](https://github.com/open-telemetry/opentelemetry-python-contrib/issues/3383)) +- `opentelemetry-instrumentation-botocore` Add GenAI instrumentation for additional Bedrock models for InvokeModel API + ([#3419](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3419)) + ## Version 1.32.0/0.53b0 (2025-04-10) ### Added diff --git a/instrumentation/opentelemetry-instrumentation-botocore/src/opentelemetry/instrumentation/botocore/extensions/bedrock.py b/instrumentation/opentelemetry-instrumentation-botocore/src/opentelemetry/instrumentation/botocore/extensions/bedrock.py index e56624b6d4..96935f0351 100644 --- a/instrumentation/opentelemetry-instrumentation-botocore/src/opentelemetry/instrumentation/botocore/extensions/bedrock.py +++ b/instrumentation/opentelemetry-instrumentation-botocore/src/opentelemetry/instrumentation/botocore/extensions/bedrock.py @@ -31,6 +31,7 @@ ConverseStreamWrapper, InvokeModelWithResponseStreamWrapper, _Choice, + estimate_token_count, genai_capture_message_content, message_to_event, ) @@ -223,6 +224,23 @@ def extract_attributes(self, attributes: _AttributeMapT): self._extract_claude_attributes( attributes, request_body ) + elif "cohere.command-r" in model_id: + self._extract_command_r_attributes( + attributes, request_body + ) + elif "cohere.command" in model_id: + self._extract_command_attributes( + attributes, request_body + ) + elif "meta.llama" in model_id: + self._extract_llama_attributes( + attributes, request_body + ) + elif "mistral" in model_id: + self._extract_mistral_attributes( + attributes, request_body + ) + except json.JSONDecodeError: _logger.debug("Error: Unable to parse the body as JSON") @@ -280,6 +298,95 @@ def _extract_claude_attributes(self, attributes, request_body): request_body.get("stop_sequences"), ) + def _extract_command_r_attributes(self, attributes, request_body): + prompt = request_body.get("message") + self._set_if_not_none( + attributes, GEN_AI_USAGE_INPUT_TOKENS, estimate_token_count(prompt) + ) + self._set_if_not_none( + attributes, + GEN_AI_REQUEST_MAX_TOKENS, + request_body.get("max_tokens"), + ) + self._set_if_not_none( + attributes, + GEN_AI_REQUEST_TEMPERATURE, + request_body.get("temperature"), + ) + self._set_if_not_none( + attributes, GEN_AI_REQUEST_TOP_P, request_body.get("p") + ) + self._set_if_not_none( + attributes, + GEN_AI_REQUEST_STOP_SEQUENCES, + request_body.get("stop_sequences"), + ) + + def _extract_command_attributes(self, attributes, request_body): + prompt = request_body.get("prompt") + self._set_if_not_none( + attributes, GEN_AI_USAGE_INPUT_TOKENS, estimate_token_count(prompt) + ) + self._set_if_not_none( + attributes, + GEN_AI_REQUEST_MAX_TOKENS, + request_body.get("max_tokens"), + ) + self._set_if_not_none( + attributes, + GEN_AI_REQUEST_TEMPERATURE, + request_body.get("temperature"), + ) + self._set_if_not_none( + attributes, GEN_AI_REQUEST_TOP_P, request_body.get("p") + ) + self._set_if_not_none( + attributes, + GEN_AI_REQUEST_STOP_SEQUENCES, + request_body.get("stop_sequences"), + ) + + def _extract_llama_attributes(self, attributes, request_body): + self._set_if_not_none( + attributes, + GEN_AI_REQUEST_MAX_TOKENS, + request_body.get("max_gen_len"), + ) + self._set_if_not_none( + attributes, + GEN_AI_REQUEST_TEMPERATURE, + request_body.get("temperature"), + ) + self._set_if_not_none( + attributes, GEN_AI_REQUEST_TOP_P, request_body.get("top_p") + ) + # request for meta llama models does not contain stop_sequences field + + def _extract_mistral_attributes(self, attributes, request_body): + prompt = request_body.get("prompt") + if prompt: + self._set_if_not_none( + attributes, + GEN_AI_USAGE_INPUT_TOKENS, + estimate_token_count(prompt), + ) + self._set_if_not_none( + attributes, + GEN_AI_REQUEST_MAX_TOKENS, + request_body.get("max_tokens"), + ) + self._set_if_not_none( + attributes, + GEN_AI_REQUEST_TEMPERATURE, + request_body.get("temperature"), + ) + self._set_if_not_none( + attributes, GEN_AI_REQUEST_TOP_P, request_body.get("top_p") + ) + self._set_if_not_none( + attributes, GEN_AI_REQUEST_STOP_SEQUENCES, request_body.get("stop") + ) + @staticmethod def _set_if_not_none(attributes, key, value): if value is not None: @@ -287,7 +394,6 @@ def _set_if_not_none(attributes, key, value): def _get_request_messages(self): """Extracts and normalize system and user / assistant messages""" - input_text = None if system := self._call_context.params.get("system", []): system_messages = [{"role": "system", "content": system}] else: @@ -304,15 +410,37 @@ def _get_request_messages(self): system_messages = [{"role": "system", "content": content}] messages = decoded_body.get("messages", []) + # if no messages interface, convert to messages format from generic API if not messages: - # transform old school amazon titan invokeModel api to messages - if input_text := decoded_body.get("inputText"): - messages = [ - {"role": "user", "content": [{"text": input_text}]} - ] + model_id = self._call_context.params.get(_MODEL_ID_KEY) + if "amazon.titan" in model_id: + messages = self._get_messages_from_input_text( + decoded_body, "inputText" + ) + elif "cohere.command-r" in model_id: + # chat_history can be converted to messages; for now, just use message + messages = self._get_messages_from_input_text( + decoded_body, "message" + ) + elif ( + "cohere.command" in model_id + or "meta.llama" in model_id + or "mistral.mistral" in model_id + ): + messages = self._get_messages_from_input_text( + decoded_body, "prompt" + ) return system_messages + messages + # pylint: disable=no-self-use + def _get_messages_from_input_text( + self, decoded_body: dict[str, Any], input_name: str + ): + if input_text := decoded_body.get(input_name): + return [{"role": "user", "content": [{"text": input_text}]}] + return [] + def before_service_call( self, span: Span, instrumentor_context: _BotocoreInstrumentorContext ): @@ -439,6 +567,22 @@ def _invoke_model_on_success( self._handle_anthropic_claude_response( span, response_body, instrumentor_context, capture_content ) + elif "cohere.command-r" in model_id: + self._handle_cohere_command_r_response( + span, response_body, instrumentor_context, capture_content + ) + elif "cohere.command" in model_id: + self._handle_cohere_command_response( + span, response_body, instrumentor_context, capture_content + ) + elif "meta.llama" in model_id: + self._handle_meta_llama_response( + span, response_body, instrumentor_context, capture_content + ) + elif "mistral" in model_id: + self._handle_mistral_ai_response( + span, response_body, instrumentor_context, capture_content + ) except json.JSONDecodeError: _logger.debug("Error: Unable to parse the response body as JSON") except Exception as exc: # pylint: disable=broad-exception-caught @@ -725,6 +869,106 @@ def _handle_anthropic_claude_response( output_tokens, output_attributes ) + def _handle_cohere_command_r_response( + self, + span: Span, + response_body: dict[str, Any], + instrumentor_context: _BotocoreInstrumentorContext, + capture_content: bool, + ): + if "text" in response_body: + span.set_attribute( + GEN_AI_USAGE_OUTPUT_TOKENS, + estimate_token_count(response_body["text"]), + ) + if "finish_reason" in response_body: + span.set_attribute( + GEN_AI_RESPONSE_FINISH_REASONS, + [response_body["finish_reason"]], + ) + + event_logger = instrumentor_context.event_logger + choice = _Choice.from_invoke_cohere_command_r( + response_body, capture_content + ) + event_logger.emit(choice.to_choice_event()) + + def _handle_cohere_command_response( + self, + span: Span, + response_body: dict[str, Any], + instrumentor_context: _BotocoreInstrumentorContext, + capture_content: bool, + ): + if "generations" in response_body and response_body["generations"]: + generations = response_body["generations"][0] + if "text" in generations: + span.set_attribute( + GEN_AI_USAGE_OUTPUT_TOKENS, + estimate_token_count(generations["text"]), + ) + if "finish_reason" in generations: + span.set_attribute( + GEN_AI_RESPONSE_FINISH_REASONS, + [generations["finish_reason"]], + ) + + event_logger = instrumentor_context.event_logger + choice = _Choice.from_invoke_cohere_command( + response_body, capture_content + ) + event_logger.emit(choice.to_choice_event()) + + def _handle_meta_llama_response( + self, + span: Span, + response_body: dict[str, Any], + instrumentor_context: _BotocoreInstrumentorContext, + capture_content: bool, + ): + if "prompt_token_count" in response_body: + span.set_attribute( + GEN_AI_USAGE_INPUT_TOKENS, response_body["prompt_token_count"] + ) + if "generation_token_count" in response_body: + span.set_attribute( + GEN_AI_USAGE_OUTPUT_TOKENS, + response_body["generation_token_count"], + ) + if "stop_reason" in response_body: + span.set_attribute( + GEN_AI_RESPONSE_FINISH_REASONS, [response_body["stop_reason"]] + ) + + event_logger = instrumentor_context.event_logger + choice = _Choice.from_invoke_meta_llama(response_body, capture_content) + event_logger.emit(choice.to_choice_event()) + + def _handle_mistral_ai_response( + self, + span: Span, + response_body: dict[str, Any], + instrumentor_context: _BotocoreInstrumentorContext, + capture_content: bool, + ): + if "outputs" in response_body: + outputs = response_body["outputs"][0] + if "text" in outputs: + span.set_attribute( + GEN_AI_USAGE_OUTPUT_TOKENS, + estimate_token_count(outputs["text"]), + ) + if "stop_reason" in outputs: + span.set_attribute( + GEN_AI_RESPONSE_FINISH_REASONS, [outputs["stop_reason"]] + ) + + event_logger = instrumentor_context.event_logger + choice = _Choice.from_invoke_mistral_mistral( + response_body, capture_content + ) + event_logger.emit(choice.to_choice_event()) + def on_error( self, span: Span, diff --git a/instrumentation/opentelemetry-instrumentation-botocore/src/opentelemetry/instrumentation/botocore/extensions/bedrock_utils.py b/instrumentation/opentelemetry-instrumentation-botocore/src/opentelemetry/instrumentation/botocore/extensions/bedrock_utils.py index 08d7a0820b..c0d8b537a0 100644 --- a/instrumentation/opentelemetry-instrumentation-botocore/src/opentelemetry/instrumentation/botocore/extensions/bedrock_utils.py +++ b/instrumentation/opentelemetry-instrumentation-botocore/src/opentelemetry/instrumentation/botocore/extensions/bedrock_utils.py @@ -15,6 +15,7 @@ from __future__ import annotations import json +import math from os import environ from typing import Any, Callable, Dict, Iterator, Sequence, Union @@ -358,6 +359,12 @@ def _process_anthropic_claude_chunk(self, chunk): return +def estimate_token_count(message: str) -> int: + # https://docs.aws.amazon.com/bedrock/latest/userguide/model-customization-prepare.html + # use 6 chars per token to approximate token count when not provided in response body + return math.ceil(len(message) / 6) + + def genai_capture_message_content() -> bool: capture_content = environ.get( OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT, "false" @@ -519,6 +526,48 @@ def from_invoke_anthropic_claude( message["content"] = response["content"] return cls(message, response["stop_reason"], index=0) + @classmethod + def from_invoke_cohere_command_r( + cls, response: dict[str, Any], capture_content: bool + ) -> _Choice: + if capture_content: + message = {"content": response["text"]} + else: + message = {} + return cls(message, response["finish_reason"], index=0) + + @classmethod + def from_invoke_cohere_command( + cls, response: dict[str, Any], capture_content: bool + ) -> _Choice: + result = response["generations"][0] + if capture_content: + message = {"content": result["text"]} + else: + message = {} + return cls(message, result["finish_reason"], index=0) + + @classmethod + def from_invoke_meta_llama( + cls, response: dict[str, Any], capture_content: bool + ) -> _Choice: + if capture_content: + message = {"content": response["generation"]} + else: + message = {} + return cls(message, response["stop_reason"], index=0) + + @classmethod + def from_invoke_mistral_mistral( + cls, response: dict[str, Any], capture_content: bool + ) -> _Choice: + result = response["outputs"][0] + if capture_content: + message = {"content": result["text"]} + else: + message = {} + return cls(message, result["stop_reason"], index=0) + def _to_body_dict(self) -> dict[str, Any]: return { "finish_reason": self.finish_reason, diff --git a/instrumentation/opentelemetry-instrumentation-botocore/tests/README.md b/instrumentation/opentelemetry-instrumentation-botocore/tests/README.md index c2e47a80cd..4c32c6d5fb 100644 --- a/instrumentation/opentelemetry-instrumentation-botocore/tests/README.md +++ b/instrumentation/opentelemetry-instrumentation-botocore/tests/README.md @@ -1,7 +1,8 @@ ## Recording calls If you need to record calls you may need to export authentication variables and the default region as environment -variables in order to have the code work properly. +variables in order to have the code work properly. The recorded tests assume the region us-east-1, so ensure that +AWS_DEFAULT_REGION is set accordingly when recording new calls. Since tox blocks environment variables by default you need to override its configuration to let them pass: ``` diff --git a/instrumentation/opentelemetry-instrumentation-botocore/tests/bedrock_utils.py b/instrumentation/opentelemetry-instrumentation-botocore/tests/bedrock_utils.py index b520171fc3..0fb368e799 100644 --- a/instrumentation/opentelemetry-instrumentation-botocore/tests/bedrock_utils.py +++ b/instrumentation/opentelemetry-instrumentation-botocore/tests/bedrock_utils.py @@ -15,6 +15,7 @@ from __future__ import annotations import json +import math from typing import Any from botocore.response import StreamingBody @@ -40,7 +41,7 @@ ) -# pylint: disable=too-many-branches, too-many-locals +# pylint: disable=too-many-branches, too-many-locals, too-many-statements def assert_completion_attributes_from_streaming_body( span: ReadableSpan, request_model: str, @@ -54,6 +55,7 @@ def assert_completion_attributes_from_streaming_body( input_tokens = None output_tokens = None finish_reason = None + request_prompt = "Say this is a test" if response is not None: original_body = response["body"] body_content = original_body.read() @@ -89,6 +91,33 @@ def assert_completion_attributes_from_streaming_body( finish_reason = (response["stop_reason"],) else: finish_reason = None + elif "cohere.command-r" in request_model: + input_tokens = math.ceil(len(request_prompt) / 6) + text = response.get("text") + if text: + output_tokens = math.ceil(len(text) / 6) + finish_reason = (response["finish_reason"],) + elif "cohere.command" in request_model: + input_tokens = math.ceil(len(request_prompt) / 6) + generations = response.get("generations") + if generations: + first_generation = generations[0] + output_tokens = math.ceil(len(first_generation["text"]) / 6) + finish_reason = (first_generation["finish_reason"],) + elif "meta.llama" in request_model: + if "prompt_token_count" in response: + input_tokens = response.get("prompt_token_count") + if "generation_token_count" in response: + output_tokens = response.get("generation_token_count") + if "stop_reason" in response: + finish_reason = (response["stop_reason"],) + elif "mistral.mistral" in request_model: + input_tokens = math.ceil(len(request_prompt) / 6) + outputs = response.get("outputs") + if outputs: + first_output = outputs[0] + output_tokens = math.ceil(len(first_output["text"]) / 6) + finish_reason = (first_output["stop_reason"],) return assert_all_attributes( span, diff --git a/instrumentation/opentelemetry-instrumentation-botocore/tests/cassettes/test_invoke_model_no_content[cohere.command-r].yaml b/instrumentation/opentelemetry-instrumentation-botocore/tests/cassettes/test_invoke_model_no_content[cohere.command-r].yaml new file mode 100644 index 0000000000..626db5aad9 --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-botocore/tests/cassettes/test_invoke_model_no_content[cohere.command-r].yaml @@ -0,0 +1,71 @@ +interactions: +- request: + body: |- + { + "message": "Say this is a test", + "max_tokens": 10, + "temperature": 0.8, + "p": 0.9, + "stop_sequences": [ + "|" + ] + } + headers: + Content-Length: + - '106' + User-Agent: + - Boto3/1.35.56 md/Botocore#1.35.56 ua/2.0 os/macos#24.3.0 md/arch#arm64 lang/python#3.10.16 + md/pyimpl#CPython cfg/retry-mode#legacy Botocore/1.35.56 + X-Amz-Date: + - 20250410T165615Z + X-Amz-Security-Token: + - test_aws_security_token + X-Amzn-Trace-Id: + - Root=1-d11162cc-5e3ef3bd07c5f9eb3ad25214;Parent=0a40817ca75eead7;Sampled=1 + amz-sdk-invocation-id: + - 54acc108-181d-4437-a0a2-8293496f892d + amz-sdk-request: + - attempt=1 + authorization: + - Bearer test_aws_authorization + method: POST + uri: https://bedrock-runtime.us-east-1.amazonaws.com/model/cohere.command-r-v1%3A0/invoke + response: + body: + string: |- + { + "response_id": "379ed018/64bcb4c2-902c-4646-9e54-3e5e90c3b11e", + "text": "This is a test.<", + "generation_id": "dcf7f3e3-a611-4fb5-85fe-ae09e6b2eaea", + "chat_history": [ + { + "role": "USER", + "message": "Say this is a test" + }, + { + "role": "CHATBOT", + "message": "This is a test.<" + } + ], + "finish_reason": "STOP_SEQUENCE" + } + headers: + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Thu, 10 Apr 2025 16:56:15 GMT + Set-Cookie: test_set_cookie + X-Amzn-Bedrock-Input-Token-Count: + - '5' + X-Amzn-Bedrock-Invocation-Latency: + - '141' + X-Amzn-Bedrock-Output-Token-Count: + - '5' + x-amzn-RequestId: + - 4d1ce10c-03d3-4579-8bab-a0e4eb14a8d0 + status: + code: 200 + message: OK +version: 1 diff --git a/instrumentation/opentelemetry-instrumentation-botocore/tests/cassettes/test_invoke_model_no_content[cohere.command].yaml b/instrumentation/opentelemetry-instrumentation-botocore/tests/cassettes/test_invoke_model_no_content[cohere.command].yaml new file mode 100644 index 0000000000..546554933c --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-botocore/tests/cassettes/test_invoke_model_no_content[cohere.command].yaml @@ -0,0 +1,66 @@ +interactions: +- request: + body: |- + { + "prompt": "Say this is a test", + "max_tokens": 10, + "temperature": 0.8, + "p": 1, + "stop_sequences": [ + "|" + ] + } + headers: + Content-Length: + - '103' + User-Agent: + - Boto3/1.35.56 md/Botocore#1.35.56 ua/2.0 os/macos#24.3.0 md/arch#arm64 lang/python#3.10.16 + md/pyimpl#CPython cfg/retry-mode#legacy Botocore/1.35.56 + X-Amz-Date: + - 20250410T183021Z + X-Amz-Security-Token: + - test_aws_security_token + X-Amzn-Trace-Id: + - Root=1-668b3eaf-1927d32dd30fc4f0fedf3a02;Parent=6b49f910f73172c6;Sampled=1 + amz-sdk-invocation-id: + - 98cb0162-a3bb-4d4e-bcbf-290664700ecf + amz-sdk-request: + - attempt=1 + authorization: + - Bearer test_aws_authorization + method: POST + uri: https://bedrock-runtime.us-east-1.amazonaws.com/model/cohere.command-light-text-v14/invoke + response: + body: + string: |- + { + "id": "b68fba0b-67f6-46dd-a659-dab9a99ee354", + "generations": [ + { + "id": "195692c2-6457-4ae0-9e5e-f0472a429cdc", + "text": " I would be more than happy to assist you with", + "finish_reason": "MAX_TOKENS" + } + ], + "prompt": "Say this is a test" + } + headers: + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Thu, 10 Apr 2025 18:30:21 GMT + Set-Cookie: test_set_cookie + X-Amzn-Bedrock-Input-Token-Count: + - '5' + X-Amzn-Bedrock-Invocation-Latency: + - '309' + X-Amzn-Bedrock-Output-Token-Count: + - '10' + x-amzn-RequestId: + - b68fba0b-67f6-46dd-a659-dab9a99ee354 + status: + code: 200 + message: OK +version: 1 diff --git a/instrumentation/opentelemetry-instrumentation-botocore/tests/cassettes/test_invoke_model_no_content[meta.llama].yaml b/instrumentation/opentelemetry-instrumentation-botocore/tests/cassettes/test_invoke_model_no_content[meta.llama].yaml new file mode 100644 index 0000000000..1b3c88e31d --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-botocore/tests/cassettes/test_invoke_model_no_content[meta.llama].yaml @@ -0,0 +1,58 @@ +interactions: +- request: + body: |- + { + "prompt": "Say this is a test", + "max_gen_len": 10, + "temperature": 0.8, + "top_p": 1 + } + headers: + Content-Length: + - '83' + User-Agent: + - Boto3/1.35.56 md/Botocore#1.35.56 ua/2.0 os/macos#24.3.0 md/arch#arm64 lang/python#3.10.16 + md/pyimpl#CPython cfg/retry-mode#legacy Botocore/1.35.56 + X-Amz-Date: + - 20250410T192408Z + X-Amz-Security-Token: + - test_aws_security_token + X-Amzn-Trace-Id: + - Root=1-db2cb3ce-1a9a7362f6f0e5eb7dc0ce56;Parent=a301dca505171c7f;Sampled=1 + amz-sdk-invocation-id: + - 87acada6-b95b-4280-b002-50ef98edf99d + amz-sdk-request: + - attempt=1 + authorization: + - Bearer test_aws_authorization + method: POST + uri: https://bedrock-runtime.us-east-1.amazonaws.com/model/meta.llama3-1-70b-instruct-v1%3A0/invoke + response: + body: + string: |- + { + "generation": " and you are a test subject. You are not", + "prompt_token_count": 5, + "generation_token_count": 10, + "stop_reason": "length" + } + headers: + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Thu, 10 Apr 2025 19:24:09 GMT + Set-Cookie: test_set_cookie + X-Amzn-Bedrock-Input-Token-Count: + - '5' + X-Amzn-Bedrock-Invocation-Latency: + - '617' + X-Amzn-Bedrock-Output-Token-Count: + - '10' + x-amzn-RequestId: + - 51b421ef-fd84-4c69-b933-8c928b735d17 + status: + code: 200 + message: OK +version: 1 diff --git a/instrumentation/opentelemetry-instrumentation-botocore/tests/cassettes/test_invoke_model_no_content[mistral.mistral].yaml b/instrumentation/opentelemetry-instrumentation-botocore/tests/cassettes/test_invoke_model_no_content[mistral.mistral].yaml new file mode 100644 index 0000000000..9a753e5a27 --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-botocore/tests/cassettes/test_invoke_model_no_content[mistral.mistral].yaml @@ -0,0 +1,63 @@ +interactions: +- request: + body: |- + { + "prompt": "Say this is a test", + "max_tokens": 10, + "temperature": 0.8, + "top_p": 1, + "stop": [ + "|" + ] + } + headers: + Content-Length: + - '97' + User-Agent: + - Boto3/1.35.56 md/Botocore#1.35.56 ua/2.0 os/macos#24.3.0 md/arch#arm64 lang/python#3.10.16 + md/pyimpl#CPython cfg/retry-mode#legacy Botocore/1.35.56 + X-Amz-Date: + - 20250410T183022Z + X-Amz-Security-Token: + - test_aws_security_token + X-Amzn-Trace-Id: + - Root=1-e325095b-1fab9945412a3dcac8474715;Parent=ae78dbba36fcf5b0;Sampled=1 + amz-sdk-invocation-id: + - cbfbbad5-adc8-4a71-82f0-0777f6c78f83 + amz-sdk-request: + - attempt=1 + authorization: + - Bearer test_aws_authorization + method: POST + uri: https://bedrock-runtime.us-east-1.amazonaws.com/model/mistral.mistral-7b-instruct-v0%3A2/invoke + response: + body: + string: |- + { + "outputs": [ + { + "text": " to find out whether you think an animal is cute", + "stop_reason": "length" + } + ] + } + headers: + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Thu, 10 Apr 2025 18:30:22 GMT + Set-Cookie: test_set_cookie + X-Amzn-Bedrock-Input-Token-Count: + - '6' + X-Amzn-Bedrock-Invocation-Latency: + - '219' + X-Amzn-Bedrock-Output-Token-Count: + - '10' + x-amzn-RequestId: + - 97084131-1cef-4927-9bab-53a36390153e + status: + code: 200 + message: OK +version: 1 diff --git a/instrumentation/opentelemetry-instrumentation-botocore/tests/cassettes/test_invoke_model_with_content[cohere.command-r].yaml b/instrumentation/opentelemetry-instrumentation-botocore/tests/cassettes/test_invoke_model_with_content[cohere.command-r].yaml new file mode 100644 index 0000000000..90cd91b297 --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-botocore/tests/cassettes/test_invoke_model_with_content[cohere.command-r].yaml @@ -0,0 +1,71 @@ +interactions: +- request: + body: |- + { + "message": "Say this is a test", + "max_tokens": 10, + "temperature": 0.8, + "p": 0.99, + "stop_sequences": [ + "|" + ] + } + headers: + Content-Length: + - '107' + User-Agent: + - Boto3/1.35.56 md/Botocore#1.35.56 ua/2.0 os/macos#24.3.0 md/arch#arm64 lang/python#3.10.16 + md/pyimpl#CPython cfg/retry-mode#legacy Botocore/1.35.56 + X-Amz-Date: + - 20250410T224018Z + X-Amz-Security-Token: + - test_aws_security_token + X-Amzn-Trace-Id: + - Root=1-51673f20-3dd018601b078c785c032a50;Parent=c05b84f54e2c35ee;Sampled=1 + amz-sdk-invocation-id: + - c81a224e-d8b3-4416-aaca-858b478b7db4 + amz-sdk-request: + - attempt=1 + authorization: + - Bearer test_aws_authorization + method: POST + uri: https://bedrock-runtime.us-east-1.amazonaws.com/model/cohere.command-r-v1%3A0/invoke + response: + body: + string: |- + { + "response_id": "379ed018/aa2df2bf-edc8-483f-8cd0-d22d04ba34ba", + "text": "This is a test. How are you doing today", + "generation_id": "2d74c447-266d-4286-8425-a92ad7fc0cbc", + "chat_history": [ + { + "role": "USER", + "message": "Say this is a test" + }, + { + "role": "CHATBOT", + "message": "This is a test. How are you doing today" + } + ], + "finish_reason": "MAX_TOKENS" + } + headers: + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Thu, 10 Apr 2025 22:40:18 GMT + Set-Cookie: test_set_cookie + X-Amzn-Bedrock-Input-Token-Count: + - '5' + X-Amzn-Bedrock-Invocation-Latency: + - '196' + X-Amzn-Bedrock-Output-Token-Count: + - '10' + x-amzn-RequestId: + - c1fd38df-b669-4464-9f4b-4cf32c32fde8 + status: + code: 200 + message: OK +version: 1 diff --git a/instrumentation/opentelemetry-instrumentation-botocore/tests/cassettes/test_invoke_model_with_content[cohere.command].yaml b/instrumentation/opentelemetry-instrumentation-botocore/tests/cassettes/test_invoke_model_with_content[cohere.command].yaml new file mode 100644 index 0000000000..01d617158c --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-botocore/tests/cassettes/test_invoke_model_with_content[cohere.command].yaml @@ -0,0 +1,66 @@ +interactions: +- request: + body: |- + { + "prompt": "Say this is a test", + "max_tokens": 10, + "temperature": 0.8, + "p": 1, + "stop_sequences": [ + "|" + ] + } + headers: + Content-Length: + - '103' + User-Agent: + - Boto3/1.35.56 md/Botocore#1.35.56 ua/2.0 os/macos#24.3.0 md/arch#arm64 lang/python#3.10.16 + md/pyimpl#CPython cfg/retry-mode#legacy Botocore/1.35.56 + X-Amz-Date: + - 20250410T223753Z + X-Amz-Security-Token: + - test_aws_security_token + X-Amzn-Trace-Id: + - Root=1-461bf523-e7a0388221ce8a1d7a31fc5b;Parent=a55a9398f0f3d364;Sampled=1 + amz-sdk-invocation-id: + - 17fbe175-373f-4c10-8047-8bba2fd8e22c + amz-sdk-request: + - attempt=1 + authorization: + - Bearer test_aws_authorization + method: POST + uri: https://bedrock-runtime.us-east-1.amazonaws.com/model/cohere.command-light-text-v14/invoke + response: + body: + string: |- + { + "id": "a09c1c60-6608-482a-b98d-764e4d87fcd1", + "generations": [ + { + "id": "8b38ce59-5f77-4b79-82c3-58eb72d4b1a2", + "text": " Let it be a test of knowledge, skills,", + "finish_reason": "MAX_TOKENS" + } + ], + "prompt": "Say this is a test" + } + headers: + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Thu, 10 Apr 2025 22:37:53 GMT + Set-Cookie: test_set_cookie + X-Amzn-Bedrock-Input-Token-Count: + - '5' + X-Amzn-Bedrock-Invocation-Latency: + - '248' + X-Amzn-Bedrock-Output-Token-Count: + - '10' + x-amzn-RequestId: + - a09c1c60-6608-482a-b98d-764e4d87fcd1 + status: + code: 200 + message: OK +version: 1 diff --git a/instrumentation/opentelemetry-instrumentation-botocore/tests/cassettes/test_invoke_model_with_content[meta.llama].yaml b/instrumentation/opentelemetry-instrumentation-botocore/tests/cassettes/test_invoke_model_with_content[meta.llama].yaml new file mode 100644 index 0000000000..d6697c70eb --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-botocore/tests/cassettes/test_invoke_model_with_content[meta.llama].yaml @@ -0,0 +1,58 @@ +interactions: +- request: + body: |- + { + "prompt": "Say this is a test", + "max_gen_len": 10, + "temperature": 0.8, + "top_p": 1 + } + headers: + Content-Length: + - '83' + User-Agent: + - Boto3/1.35.56 md/Botocore#1.35.56 ua/2.0 os/macos#24.3.0 md/arch#arm64 lang/python#3.10.16 + md/pyimpl#CPython cfg/retry-mode#legacy Botocore/1.35.56 + X-Amz-Date: + - 20250410T224059Z + X-Amz-Security-Token: + - test_aws_security_token + X-Amzn-Trace-Id: + - Root=1-9930be15-db431eb0bf3c21be1ed23dde;Parent=0c4e46fcf0474877;Sampled=1 + amz-sdk-invocation-id: + - c3afc5f6-9912-4892-b48e-dff457911e1c + amz-sdk-request: + - attempt=1 + authorization: + - Bearer test_aws_authorization + method: POST + uri: https://bedrock-runtime.us-east-1.amazonaws.com/model/meta.llama3-1-70b-instruct-v1%3A0/invoke + response: + body: + string: |- + { + "generation": " post. This is a test post. This is", + "prompt_token_count": 5, + "generation_token_count": 10, + "stop_reason": "length" + } + headers: + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Thu, 10 Apr 2025 22:40:59 GMT + Set-Cookie: test_set_cookie + X-Amzn-Bedrock-Input-Token-Count: + - '5' + X-Amzn-Bedrock-Invocation-Latency: + - '604' + X-Amzn-Bedrock-Output-Token-Count: + - '10' + x-amzn-RequestId: + - 4b8da626-00b1-4535-b75b-a7e985e9877e + status: + code: 200 + message: OK +version: 1 diff --git a/instrumentation/opentelemetry-instrumentation-botocore/tests/cassettes/test_invoke_model_with_content[mistral.mistral].yaml b/instrumentation/opentelemetry-instrumentation-botocore/tests/cassettes/test_invoke_model_with_content[mistral.mistral].yaml new file mode 100644 index 0000000000..b949c240f6 --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-botocore/tests/cassettes/test_invoke_model_with_content[mistral.mistral].yaml @@ -0,0 +1,63 @@ +interactions: +- request: + body: |- + { + "prompt": "Say this is a test", + "max_tokens": 10, + "temperature": 0.8, + "top_p": 1, + "stop": [ + "|" + ] + } + headers: + Content-Length: + - '97' + User-Agent: + - Boto3/1.35.56 md/Botocore#1.35.56 ua/2.0 os/macos#24.3.0 md/arch#arm64 lang/python#3.10.16 + md/pyimpl#CPython cfg/retry-mode#legacy Botocore/1.35.56 + X-Amz-Date: + - 20250410T224059Z + X-Amz-Security-Token: + - test_aws_security_token + X-Amzn-Trace-Id: + - Root=1-332feae9-215a9553da54b4789bf12414;Parent=f3899b50f888d5d7;Sampled=1 + amz-sdk-invocation-id: + - 2d410cd2-ef68-48af-9cad-b7ddf3b1d844 + amz-sdk-request: + - attempt=1 + authorization: + - Bearer test_aws_authorization + method: POST + uri: https://bedrock-runtime.us-east-1.amazonaws.com/model/mistral.mistral-7b-instruct-v0%3A2/invoke + response: + body: + string: |- + { + "outputs": [ + { + "text": "\n\nA man stands before a crowd of people", + "stop_reason": "length" + } + ] + } + headers: + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Thu, 10 Apr 2025 22:41:00 GMT + Set-Cookie: test_set_cookie + X-Amzn-Bedrock-Input-Token-Count: + - '6' + X-Amzn-Bedrock-Invocation-Latency: + - '174' + X-Amzn-Bedrock-Output-Token-Count: + - '10' + x-amzn-RequestId: + - 68547039-f30c-4aff-b368-31bb6d9b9d07 + status: + code: 200 + message: OK +version: 1 diff --git a/instrumentation/opentelemetry-instrumentation-botocore/tests/test_botocore_bedrock.py b/instrumentation/opentelemetry-instrumentation-botocore/tests/test_botocore_bedrock.py index bd2211c56c..de5589607b 100644 --- a/instrumentation/opentelemetry-instrumentation-botocore/tests/test_botocore_bedrock.py +++ b/instrumentation/opentelemetry-instrumentation-botocore/tests/test_botocore_bedrock.py @@ -1172,6 +1172,37 @@ def set_if_not_none(config, key, value): set_if_not_none(body, "temperature", temperature) set_if_not_none(body, "top_p", top_p) set_if_not_none(body, "stop_sequences", stop_sequences) + elif "cohere.command-r" in llm_model: + body = { + "message": prompt, + "max_tokens": max_tokens, + "temperature": temperature, + "p": top_p, + "stop_sequences": stop_sequences, + } + elif "cohere.command" in llm_model: + body = { + "prompt": prompt, + "max_tokens": max_tokens, + "temperature": temperature, + "p": top_p, + "stop_sequences": stop_sequences, + } + elif "meta.llama" in llm_model: + body = { + "prompt": prompt, + "max_gen_len": max_tokens, + "temperature": temperature, + "top_p": top_p, + } + elif "mistral.mistral" in llm_model: + body = { + "prompt": prompt, + "max_tokens": max_tokens, + "temperature": temperature, + "top_p": top_p, + "stop": stop_sequences, + } else: raise ValueError(f"No config for {llm_model}") @@ -1183,13 +1214,25 @@ def get_model_name_from_family(llm_model): "amazon.titan": "amazon.titan-text-lite-v1", "amazon.nova": "amazon.nova-micro-v1:0", "anthropic.claude": "anthropic.claude-v2", + "cohere.command-r": "cohere.command-r-v1:0", + "cohere.command": "cohere.command-light-text-v14", + "meta.llama": "meta.llama3-1-70b-instruct-v1:0", + "mistral.mistral": "mistral.mistral-7b-instruct-v0:2", } return llm_model_name[llm_model] @pytest.mark.parametrize( "model_family", - ["amazon.nova", "amazon.titan", "anthropic.claude"], + [ + "amazon.nova", + "amazon.titan", + "anthropic.claude", + "cohere.command-r", + "cohere.command", + "meta.llama", + "mistral.mistral", + ], ) @pytest.mark.vcr() def test_invoke_model_with_content( @@ -1201,7 +1244,12 @@ def test_invoke_model_with_content( ): # pylint:disable=too-many-locals llm_model_value = get_model_name_from_family(model_family) - max_tokens, temperature, top_p, stop_sequences = 10, 0.8, 1, ["|"] + max_tokens, temperature, top_p, stop_sequences = ( + 10, + 0.8, + 0.99 if model_family == "cohere.command-r" else 1, + ["|"], + ) body = get_invoke_model_body( llm_model_value, max_tokens, temperature, top_p, stop_sequences ) @@ -1219,7 +1267,7 @@ def test_invoke_model_with_content( top_p, temperature, max_tokens, - stop_sequences, + None if model_family == "meta.llama" else stop_sequences, ) logs = log_exporter.get_finished_logs() @@ -1247,6 +1295,26 @@ def test_invoke_model_with_content( ], } finish_reason = "max_tokens" + elif model_family == "cohere.command-r": + message = { + "content": "This is a test. How are you doing today", + } + finish_reason = "MAX_TOKENS" + elif model_family == "cohere.command": + message = { + "content": " Let it be a test of knowledge, skills,", + } + finish_reason = "MAX_TOKENS" + elif model_family == "meta.llama": + message = { + "content": " post. This is a test post. This is", + } + finish_reason = "length" + elif model_family == "mistral.mistral": + message = { + "content": "\n\nA man stands before a crowd of people", + } + finish_reason = "length" assert_message_in_logs(logs[0], "gen_ai.user.message", user_content, span) choice_body = { "index": 0, @@ -1780,7 +1848,15 @@ def test_invoke_model_with_content_tool_call( @pytest.mark.parametrize( "model_family", - ["amazon.nova", "amazon.titan", "anthropic.claude"], + [ + "amazon.nova", + "amazon.titan", + "anthropic.claude", + "cohere.command-r", + "cohere.command", + "meta.llama", + "mistral.mistral", + ], ) @pytest.mark.vcr() def test_invoke_model_no_content( @@ -1792,7 +1868,12 @@ def test_invoke_model_no_content( ): # pylint:disable=too-many-locals llm_model_value = get_model_name_from_family(model_family) - max_tokens, temperature, top_p, stop_sequences = 10, 0.8, 1, ["|"] + max_tokens, temperature, top_p, stop_sequences = ( + 10, + 0.8, + 0.99 if model_family == "cohere.command-r" else 1, + ["|"], + ) body = get_invoke_model_body( llm_model_value, max_tokens, temperature, top_p, stop_sequences ) @@ -1810,7 +1891,7 @@ def test_invoke_model_no_content( top_p, temperature, max_tokens, - stop_sequences, + None if model_family == "meta.llama" else stop_sequences, ) logs = log_exporter.get_finished_logs() @@ -1825,6 +1906,18 @@ def test_invoke_model_no_content( elif model_family == "amazon.titan": choice_message = {} finish_reason = "LENGTH" + elif model_family == "cohere.command-r": + choice_message = {} + finish_reason = "STOP_SEQUENCE" + elif model_family == "cohere.command": + choice_message = {} + finish_reason = "MAX_TOKENS" + elif model_family == "meta.llama": + choice_message = {} + finish_reason = "length" + elif model_family == "mistral.mistral": + choice_message = {} + finish_reason = "length" choice_body = { "index": 0, "finish_reason": finish_reason,