diff --git a/newrelic/hooks/external_aiobotocore.py b/newrelic/hooks/external_aiobotocore.py index c398a5b68..7e289212a 100644 --- a/newrelic/hooks/external_aiobotocore.py +++ b/newrelic/hooks/external_aiobotocore.py @@ -11,9 +11,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + import logging -import sys -import traceback from io import BytesIO from aiobotocore.response import StreamingBody @@ -23,7 +22,9 @@ from newrelic.hooks.external_botocore import ( EMBEDDING_STREAMING_UNSUPPORTED_LOG_MESSAGE, RESPONSE_PROCESSING_FAILURE_LOG_MESSAGE, + REQUEST_EXTRACTOR_FAILURE_LOG_MESSAGE, AsyncEventStreamWrapper, + extract_bedrock_converse_attrs, handle_bedrock_exception, run_bedrock_request_extractor, run_bedrock_response_extractor, @@ -97,23 +98,36 @@ async def wrap_client__make_api_call(wrapped, instance, args, kwargs): response_extractor = getattr(instance, "_nr_response_extractor", None) stream_extractor = getattr(instance, "_nr_stream_extractor", None) response_streaming = getattr(instance, "_nr_response_streaming", False) - + is_converse = getattr(instance, "_nr_is_converse", False) ft = getattr(instance, "_nr_ft", None) - if len(args) >= 2: - model = args[1].get("modelId") - request_body = args[1].get("body") - is_embedding = "embed" in model - else: - model = "" - request_body = None - is_embedding = False + try: + bedrock_args = args[1] if len(args) >= 2 else {} + model = bedrock_args.get("modelId") if bedrock_args else "" + is_embedding = "embed" in model if model else False + + if is_converse: + request_body = {} + else: + request_body = bedrock_args.get("body") if bedrock_args else "" + except Exception: + _logger.warning(REQUEST_EXTRACTOR_FAILURE_LOG_MESSAGE, exc_info=True) try: response = await wrapped(*args, **kwargs) except Exception as exc: handle_bedrock_exception( - exc, is_embedding, model, span_id, trace_id, request_extractor, request_body, ft, transaction + exc, + is_embedding, + model, + span_id, + trace_id, + request_extractor, + request_body, + ft, + transaction, + bedrock_args, + is_converse, ) raise @@ -132,36 +146,43 @@ async def wrap_client__make_api_call(wrapped, instance, args, kwargs): return response response_headers = response.get("ResponseMetadata", {}).get("HTTPHeaders") or {} - bedrock_attrs = { - "request_id": response_headers.get("x-amzn-requestid"), - "model": model, - "span_id": span_id, - "trace_id": trace_id, - } - - run_bedrock_request_extractor(request_extractor, request_body, bedrock_attrs) - try: - if response_streaming: - # Wrap EventStream object here to intercept __iter__ method instead of instrumenting class. - # This class is used in numerous other services in botocore, and would cause conflicts. - response["body"] = body = AsyncEventStreamWrapper(response["body"]) - body._nr_ft = ft or None - body._nr_bedrock_attrs = bedrock_attrs or {} - body._nr_model_extractor = stream_extractor or None - return response - - # Read and replace response streaming bodies - response_body = await response["body"].read() + if is_converse: + response_body = {} + bedrock_attrs = extract_bedrock_converse_attrs( + args[1], response, response_headers, model, span_id, trace_id + ) + else: + bedrock_attrs = { + "request_id": response_headers.get("x-amzn-requestid"), + "model": model, + "span_id": span_id, + "trace_id": trace_id, + } + # We only need to run the request extractor if invoke_model was called since the request formats are different + # across models + run_bedrock_request_extractor(request_extractor, request_body, bedrock_attrs) + + if response_streaming: + # Wrap EventStream object here to intercept __iter__ method instead of instrumenting class. + # This class is used in numerous other services in botocore, and would cause conflicts. + response["body"] = body = AsyncEventStreamWrapper(response["body"]) + body._nr_ft = ft or None + body._nr_bedrock_attrs = bedrock_attrs or {} + body._nr_model_extractor = stream_extractor or None + return response + + # Read and replace response streaming bodies + response_body = await response["body"].read() + response["body"] = StreamingBody(AsyncBytesIO(response_body), len(response_body)) if ft: ft.__exit__(None, None, None) bedrock_attrs["duration"] = ft.duration * 1000 - response["body"] = StreamingBody(AsyncBytesIO(response_body), len(response_body)) run_bedrock_response_extractor(response_extractor, response_body, bedrock_attrs, is_embedding, transaction) except Exception: - _logger.warning(RESPONSE_PROCESSING_FAILURE_LOG_MESSAGE, traceback.format_exception(*sys.exc_info())) + _logger.warning(RESPONSE_PROCESSING_FAILURE_LOG_MESSAGE, exc_info=True) return response diff --git a/newrelic/hooks/external_botocore.py b/newrelic/hooks/external_botocore.py index a49c5e1f8..4596f4c8e 100644 --- a/newrelic/hooks/external_botocore.py +++ b/newrelic/hooks/external_botocore.py @@ -42,7 +42,7 @@ _logger = logging.getLogger(__name__) EXCEPTION_HANDLING_FAILURE_LOG_MESSAGE = "Exception occurred in botocore instrumentation for AWS Bedrock: While reporting an exception in botocore, another exception occurred. Report this issue to New Relic Support.\n%s" -REQUEST_EXTACTOR_FAILURE_LOG_MESSAGE = "Exception occurred in botocore instrumentation for AWS Bedrock: Failed to extract request information. Report this issue to New Relic Support.\n%s" +REQUEST_EXTRACTOR_FAILURE_LOG_MESSAGE = "Exception occurred in botocore instrumentation for AWS Bedrock: Failed to extract request information. Report this issue to New Relic Support.\n%s" RESPONSE_EXTRACTOR_FAILURE_LOG_MESSAGE = "Exception occurred in botocore instrumentation for AWS Bedrock: Failed to extract response information. If the issue persists, report this issue to New Relic support.\n%s" RESPONSE_PROCESSING_FAILURE_LOG_MESSAGE = "Exception occurred in botocore instrumentation for AWS Bedrock: Failed to report response data. Report this issue to New Relic Support.\n%s" EMBEDDING_STREAMING_UNSUPPORTED_LOG_MESSAGE = "Response streaming with embedding models is unsupported in botocore instrumentation for AWS Bedrock. If this feature is now supported by AWS and botocore, report this issue to New Relic Support." @@ -137,9 +137,9 @@ def extract_firehose_agent_attrs(instance, *args, **kwargs): region = instance._client_config.region_name if account_id and region: agent_attrs["cloud.platform"] = "aws_kinesis_delivery_streams" - agent_attrs["cloud.resource_id"] = ( - f"arn:aws:firehose:{region}:{account_id}:deliverystream/{stream_name}" - ) + agent_attrs[ + "cloud.resource_id" + ] = f"arn:aws:firehose:{region}:{account_id}:deliverystream/{stream_name}" except Exception: _logger.debug("Failed to capture AWS Kinesis Delivery Stream (Firehose) info.", exc_info=True) return agent_attrs @@ -163,6 +163,8 @@ def extractor_string(*args, **kwargs): def bedrock_error_attributes(exception, bedrock_attrs): + # In some cases, such as a botocore.exceptions.ParamValidationError, the exception may not have a response attr + # We still want to record the error, so we add `error: True` to bedrock_attrs immediately response = getattr(exception, "response", None) if not response: return bedrock_attrs @@ -534,18 +536,33 @@ def extract_bedrock_cohere_model_streaming_response(response_body, bedrock_attrs def handle_bedrock_exception( - exc, is_embedding, model, span_id, trace_id, request_extractor, request_body, ft, transaction + exc, is_embedding, model, span_id, trace_id, request_extractor, request_body, ft, transaction, kwargs, is_converse ): try: bedrock_attrs = {"model": model, "span_id": span_id, "trace_id": trace_id} + if is_converse: + try: + input_message_list = [ + {"role": "user", "content": result["text"]} for result in kwargs["messages"][-1].get("content", []) + ] + if "system" in kwargs.keys(): + input_message_list.append({"role": "system", "content": kwargs.get("system")[0].get("text")}) + except Exception: + input_message_list = [] + + bedrock_attrs["input_message_list"] = input_message_list + bedrock_attrs["request.max_tokens"] = kwargs.get("inferenceConfig", {}).get("maxTokens", None) + bedrock_attrs["request.temperature"] = kwargs.get("inferenceConfig", {}).get("temperature", None) + try: request_extractor(request_body, bedrock_attrs) except json.decoder.JSONDecodeError: pass except Exception: - _logger.warning(REQUEST_EXTACTOR_FAILURE_LOG_MESSAGE, traceback.format_exception(*sys.exc_info())) + _logger.warning(REQUEST_EXTRACTOR_FAILURE_LOG_MESSAGE, traceback.format_exception(*sys.exc_info())) error_attributes = bedrock_error_attributes(exc, bedrock_attrs) + notice_error_attributes = { "http.statusCode": error_attributes.get("http.statusCode"), "error.message": error_attributes.get("error.message"), @@ -590,7 +607,7 @@ def run_bedrock_request_extractor(request_extractor, request_body, bedrock_attrs except json.decoder.JSONDecodeError: pass except Exception: - _logger.warning(REQUEST_EXTACTOR_FAILURE_LOG_MESSAGE, traceback.format_exception(*sys.exc_info())) + _logger.warning(REQUEST_EXTRACTOR_FAILURE_LOG_MESSAGE, traceback.format_exception(*sys.exc_info())) def wrap_bedrock_runtime_invoke_model(response_streaming=False): @@ -660,6 +677,7 @@ def _wrap_bedrock_runtime_invoke_model(wrapped, instance, args, kwargs): instance._nr_ft = ft instance._nr_response_streaming = response_streaming instance._nr_settings = settings + instance._nr_is_converse = False # Add a bedrock flag to instance so we can determine when make_api_call instrumentation is hit from non-Bedrock paths and bypass it if so instance._nr_is_bedrock = True @@ -669,7 +687,17 @@ def _wrap_bedrock_runtime_invoke_model(wrapped, instance, args, kwargs): response = wrapped(*args, **kwargs) except Exception as exc: handle_bedrock_exception( - exc, is_embedding, model, span_id, trace_id, request_extractor, request_body, ft, transaction + exc, + is_embedding, + model, + span_id, + trace_id, + request_extractor, + request_body, + ft, + transaction, + kwargs, + is_converse=False, ) raise @@ -725,6 +753,117 @@ def _wrap_bedrock_runtime_invoke_model(wrapped, instance, args, kwargs): return _wrap_bedrock_runtime_invoke_model +def wrap_bedrock_runtime_converse(response_streaming=False): + @function_wrapper + def _wrap_bedrock_runtime_converse(wrapped, instance, args, kwargs): + # Wrapped function only takes keyword arguments, no need for binding + transaction = current_transaction() + if not transaction: + return wrapped(*args, **kwargs) + + settings = transaction.settings or global_settings + if not settings.ai_monitoring.enabled: + return wrapped(*args, **kwargs) + + transaction.add_ml_model_info("Bedrock", BOTOCORE_VERSION) + transaction._add_agent_attribute("llm", True) + + model = kwargs.get("modelId") + if not model: + return wrapped(*args, **kwargs) + + # Extractors are not needed for Converse API since the request and response formats are consistent across models + request_extractor = response_extractor = stream_extractor = NULL_EXTRACTOR + + function_name = wrapped.__name__ + # Function trace may not be exited in this function in the case of streaming, so start manually + ft = FunctionTrace(name=function_name, group="Llm/completion/Bedrock") + ft.__enter__() + + # Get trace information + available_metadata = get_trace_linking_metadata() + span_id = available_metadata.get("span.id") + trace_id = available_metadata.get("trace.id") + + # Store data on instance to pass context to async instrumentation in aiobotocore + instance._nr_trace_id = trace_id + instance._nr_span_id = span_id + instance._nr_request_extractor = request_extractor + instance._nr_response_extractor = response_extractor + instance._nr_stream_extractor = stream_extractor + instance._nr_txn = transaction + instance._nr_ft = ft + instance._nr_response_streaming = response_streaming + instance._nr_settings = settings + instance._nr_is_converse = True + + # Add a bedrock flag to instance so we can determine when make_api_call instrumentation is hit from non-Bedrock paths and bypass it if so + instance._nr_is_bedrock = True + + try: + # For aioboto3 clients, this will call make_api_call instrumentation in external_aiobotocore + response = wrapped(*args, **kwargs) + except Exception as exc: + handle_bedrock_exception( + exc, False, model, span_id, trace_id, request_extractor, {}, ft, transaction, kwargs, is_converse=True + ) + raise + + if not response or response_streaming and not settings.ai_monitoring.streaming.enabled: + ft.__exit__(None, None, None) + return response + + # Let the instrumentation of make_api_call in the aioboto3 client handle it if we have an async case + if inspect.iscoroutine(response): + return response + + response_headers = response.get("ResponseMetadata", {}).get("HTTPHeaders") or {} + bedrock_attrs = extract_bedrock_converse_attrs(kwargs, response, response_headers, model, span_id, trace_id) + + try: + ft.__exit__(None, None, None) + bedrock_attrs["duration"] = ft.duration * 1000 + run_bedrock_response_extractor(response_extractor, {}, bedrock_attrs, False, transaction) + + except Exception: + _logger.warning(RESPONSE_PROCESSING_FAILURE_LOG_MESSAGE, traceback.format_exception(*sys.exc_info())) + + return response + + return _wrap_bedrock_runtime_converse + + +def extract_bedrock_converse_attrs(kwargs, response, response_headers, model, span_id, trace_id): + input_message_list = [] + # If a system message is supplied, it is under its own key in kwargs rather than with the other input messages + if "system" in kwargs.keys(): + input_message_list.extend({"role": "system", "content": result["text"]} for result in kwargs.get("system", [])) + + # kwargs["messages"] can hold multiple requests and responses to maintain conversation history + # We grab the last message (the newest request) in the list each time, so we don't duplicate recorded data + input_message_list.extend( + [{"role": "user", "content": result["text"]} for result in kwargs["messages"][-1].get("content", [])] + ) + + output_message_list = [ + {"role": "assistant", "content": result["text"]} + for result in response.get("output").get("message").get("content", []) + ] + + bedrock_attrs = { + "request_id": response_headers.get("x-amzn-requestid"), + "model": model, + "span_id": span_id, + "trace_id": trace_id, + "response.choices.finish_reason": response.get("stopReason"), + "output_message_list": output_message_list, + "request.max_tokens": kwargs.get("inferenceConfig", {}).get("maxTokens", None), + "request.temperature": kwargs.get("inferenceConfig", {}).get("temperature", None), + "input_message_list": input_message_list, + } + return bedrock_attrs + + class EventStreamWrapper(ObjectProxy): def __iter__(self): g = GeneratorProxy(self.__wrapped__.__iter__()) @@ -905,7 +1044,6 @@ def handle_embedding_event(transaction, bedrock_attrs): def handle_chat_completion_event(transaction, bedrock_attrs): chat_completion_id = str(uuid.uuid4()) - # Grab LLM-related custom attributes off of the transaction to store as metadata on LLM events custom_attrs_dict = transaction._custom_params llm_metadata_dict = {key: value for key, value in custom_attrs_dict.items() if key.startswith("llm.")} @@ -944,7 +1082,6 @@ def handle_chat_completion_event(transaction, bedrock_attrs): } chat_completion_summary_dict.update(llm_metadata_dict) chat_completion_summary_dict = {k: v for k, v in chat_completion_summary_dict.items() if v is not None} - transaction.record_custom_event("LlmChatCompletionSummary", chat_completion_summary_dict) create_chat_completion_message_event( @@ -1049,9 +1186,9 @@ def _nr_dynamodb_datastore_trace_wrapper_(wrapped, instance, args, kwargs): partition = "aws-us-gov" if partition and region and account_id and _target: - agent_attrs["cloud.resource_id"] = ( - f"arn:{partition}:dynamodb:{region}:{account_id:012d}:table/{_target}" - ) + agent_attrs[ + "cloud.resource_id" + ] = f"arn:{partition}:dynamodb:{region}:{account_id:012d}:table/{_target}" except Exception: _logger.debug("Failed to capture AWS DynamoDB info.", exc_info=True) @@ -1390,6 +1527,7 @@ def wrap_serialize_to_request(wrapped, instance, args, kwargs): ("bedrock-runtime", "invoke_model_with_response_stream"): wrap_bedrock_runtime_invoke_model( response_streaming=True ), + ("bedrock-runtime", "converse"): wrap_bedrock_runtime_converse(response_streaming=False), } @@ -1399,8 +1537,8 @@ def bind__create_api_method(py_operation_name, operation_name, service_model, *a def _nr_clientcreator__create_api_method_(wrapped, instance, args, kwargs): (py_operation_name, service_model) = bind__create_api_method(*args, **kwargs) - service_name = service_model.service_name.lower() + tracer = CUSTOM_TRACE_POINTS.get((service_name, py_operation_name)) wrapped = wrapped(*args, **kwargs) diff --git a/tests/external_aiobotocore/conftest.py b/tests/external_aiobotocore/conftest.py index b7fddfc4d..41d2adf8d 100644 --- a/tests/external_aiobotocore/conftest.py +++ b/tests/external_aiobotocore/conftest.py @@ -20,7 +20,8 @@ import moto.server import pytest import werkzeug.serving -from external_botocore._mock_external_bedrock_server import MockExternalBedrockServer +from external_botocore._mock_external_bedrock_server_invoke_model import MockExternalBedrockServer +from external_botocore._mock_external_bedrock_server_converse import MockExternalBedrockConverseServer from testing_support.fixture.event_loop import event_loop as loop from testing_support.fixtures import collector_agent_registration_fixture, collector_available_fixture @@ -183,3 +184,37 @@ def bedrock_server(loop): yield client loop.run_until_complete(client.__aexit__(None, None, None)) + + +# Bedrock Fixtures +@pytest.fixture(scope="session") +def bedrock_converse_server(loop): + """ + This fixture will create a mocked backend for testing purposes. + """ + import aiobotocore + + from newrelic.core.config import _environ_as_bool + + if get_package_version_tuple("botocore") < (1, 31, 57): + pytest.skip(reason="Bedrock Runtime not available.") + + if _environ_as_bool("NEW_RELIC_TESTING_RECORD_BEDROCK_RESPONSES", False): + raise NotImplementedError("To record test responses, use botocore instead.") + + # Use mocked Bedrock backend and prerecorded responses + with MockExternalBedrockConverseServer() as server: + session = aiobotocore.session.get_session() + client = loop.run_until_complete( + session.create_client( + "bedrock-runtime", + "us-east-1", + endpoint_url=f"http://localhost:{server.port}", + aws_access_key_id="NOT-A-REAL-SECRET", + aws_secret_access_key="NOT-A-REAL-SECRET", + ).__aenter__() + ) + + yield client + + loop.run_until_complete(client.__aexit__(None, None, None)) diff --git a/tests/external_aiobotocore/test_bedrock_chat_completion_converse.py b/tests/external_aiobotocore/test_bedrock_chat_completion_converse.py new file mode 100644 index 000000000..85f96fe8c --- /dev/null +++ b/tests/external_aiobotocore/test_bedrock_chat_completion_converse.py @@ -0,0 +1,522 @@ +# Copyright 2010 New Relic, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import botocore.exceptions +import pytest +from conftest import BOTOCORE_VERSION +from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes +from testing_support.ml_testing_utils import ( + add_token_count_to_events, + disabled_ai_monitoring_record_content_settings, + disabled_ai_monitoring_settings, + events_sans_content, + events_sans_llm_metadata, + events_with_context_attrs, + llm_token_count_callback, + set_trace_info, +) +from testing_support.validators.validate_custom_event import validate_custom_event_count +from testing_support.validators.validate_custom_events import validate_custom_events +from testing_support.validators.validate_error_trace_attributes import validate_error_trace_attributes +from testing_support.validators.validate_transaction_metrics import validate_transaction_metrics + +from newrelic.api.background_task import background_task +from newrelic.api.llm_custom_attributes import WithLlmCustomAttributes +from newrelic.api.transaction import add_custom_attribute +from newrelic.common.object_names import callable_name + + +chat_completion_expected_events = [ + ( + {"type": "LlmChatCompletionSummary"}, + { + "id": None, # UUID that varies with each run + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "span_id": None, + "trace_id": "trace-id", + "request_id": "c20d345e-6878-4778-b674-6b187bae8ecf", + "duration": None, # Response time varies each test run + "request.model": "anthropic.claude-3-sonnet-20240229-v1:0", + "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", + "request.temperature": 0.7, + "request.max_tokens": 100, + "response.choices.finish_reason": "max_tokens", + "vendor": "bedrock", + "ingest_source": "Python", + "response.number_of_messages": 3, + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": None, # UUID that varies with each run + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "request_id": "c20d345e-6878-4778-b674-6b187bae8ecf", + "span_id": None, + "trace_id": "trace-id", + "content": "You are a scientist.", + "role": "system", + "completion_id": None, + "sequence": 0, + "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", + "vendor": "bedrock", + "ingest_source": "Python", + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": None, # UUID that varies with each run + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "request_id": "c20d345e-6878-4778-b674-6b187bae8ecf", + "span_id": None, + "trace_id": "trace-id", + "content": "What is 212 degrees Fahrenheit converted to Celsius?", + "role": "user", + "completion_id": None, + "sequence": 1, + "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", + "vendor": "bedrock", + "ingest_source": "Python", + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": None, # UUID that varies with each run + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "request_id": "c20d345e-6878-4778-b674-6b187bae8ecf", + "span_id": None, + "trace_id": "trace-id", + "content": "To convert 212°F to Celsius, we can use the formula:\n\nC = (F - 32) × 5/9\n\nWhere:\nC is the temperature in Celsius\nF is the temperature in Fahrenheit\n\nPlugging in 212°F, we get:\n\nC = (212 - 32) × 5/9\nC = 180 × 5/9\nC = 100\n\nTherefore, 212°", + "role": "assistant", + "completion_id": None, + "sequence": 2, + "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", + "vendor": "bedrock", + "ingest_source": "Python", + "is_response": True, + }, + ), +] + + +@pytest.fixture(scope="module") +def exercise_model(loop, bedrock_converse_server): + def _exercise_model(message): + async def coro(): + inference_config = {"temperature": 0.7, "maxTokens": 100} + + response = await bedrock_converse_server.converse( + modelId="anthropic.claude-3-sonnet-20240229-v1:0", + messages=message, + system=[{"text": "You are a scientist."}], + inferenceConfig=inference_config, + ) + assert response + + return loop.run_until_complete(coro()) + + return _exercise_model + + +@reset_core_stats_engine() +def test_bedrock_chat_completion_in_txn_with_llm_metadata(set_trace_info, exercise_model): + @validate_custom_events(events_with_context_attrs(chat_completion_expected_events)) + # One summary event, one user message, and one response message from the assistant + @validate_custom_event_count(count=4) + @validate_transaction_metrics( + name="test_bedrock_chat_completion_in_txn_with_llm_metadata", + scoped_metrics=[("Llm/completion/Bedrock/converse", 1)], + rollup_metrics=[("Llm/completion/Bedrock/converse", 1)], + custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], + background_task=True, + ) + @validate_attributes("agent", ["llm"]) + @background_task(name="test_bedrock_chat_completion_in_txn_with_llm_metadata") + def _test(): + set_trace_info() + add_custom_attribute("llm.conversation_id", "my-awesome-id") + add_custom_attribute("llm.foo", "bar") + add_custom_attribute("non_llm_attr", "python-agent") + with WithLlmCustomAttributes({"context": "attr"}): + message = [{"role": "user", "content": [{"text": "What is 212 degrees Fahrenheit converted to Celsius?"}]}] + exercise_model(message) + + _test() + + +@disabled_ai_monitoring_record_content_settings +@reset_core_stats_engine() +def test_bedrock_chat_completion_no_content(set_trace_info, exercise_model): + @validate_custom_events(events_sans_content(chat_completion_expected_events)) + # One summary event, one user message, and one response message from the assistant + @validate_custom_event_count(count=4) + @validate_transaction_metrics( + name="test_bedrock_chat_completion_no_content", + scoped_metrics=[("Llm/completion/Bedrock/converse", 1)], + rollup_metrics=[("Llm/completion/Bedrock/converse", 1)], + custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], + background_task=True, + ) + @validate_attributes("agent", ["llm"]) + @background_task(name="test_bedrock_chat_completion_no_content") + def _test(): + set_trace_info() + add_custom_attribute("llm.conversation_id", "my-awesome-id") + add_custom_attribute("llm.foo", "bar") + add_custom_attribute("non_llm_attr", "python-agent") + message = [{"role": "user", "content": [{"text": "What is 212 degrees Fahrenheit converted to Celsius?"}]}] + exercise_model(message) + + _test() + + +@reset_core_stats_engine() +@override_llm_token_callback_settings(llm_token_count_callback) +def test_bedrock_chat_completion_with_token_count(set_trace_info, exercise_model): + @validate_custom_events(add_token_count_to_events(chat_completion_expected_events)) + # One summary event, one user message, and one response message from the assistant + @validate_custom_event_count(count=4) + @validate_transaction_metrics( + name="test_bedrock_chat_completion_with_token_count", + scoped_metrics=[("Llm/completion/Bedrock/converse", 1)], + rollup_metrics=[("Llm/completion/Bedrock/converse", 1)], + custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], + background_task=True, + ) + @validate_attributes("agent", ["llm"]) + @background_task(name="test_bedrock_chat_completion_with_token_count") + def _test(): + set_trace_info() + add_custom_attribute("llm.conversation_id", "my-awesome-id") + add_custom_attribute("llm.foo", "bar") + add_custom_attribute("non_llm_attr", "python-agent") + message = [{"role": "user", "content": [{"text": "What is 212 degrees Fahrenheit converted to Celsius?"}]}] + exercise_model(message) + + _test() + + +@reset_core_stats_engine() +def test_bedrock_chat_completion_no_llm_metadata(set_trace_info, exercise_model): + @validate_custom_events(events_sans_llm_metadata(chat_completion_expected_events)) + @validate_custom_event_count(count=4) + @validate_transaction_metrics( + name="test_bedrock_chat_completion_in_txn_no_llm_metadata", + scoped_metrics=[("Llm/completion/Bedrock/converse", 1)], + rollup_metrics=[("Llm/completion/Bedrock/converse", 1)], + custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], + background_task=True, + ) + @background_task(name="test_bedrock_chat_completion_in_txn_no_llm_metadata") + def _test(): + set_trace_info() + message = [{"role": "user", "content": [{"text": "What is 212 degrees Fahrenheit converted to Celsius?"}]}] + exercise_model(message) + + _test() + + +@reset_core_stats_engine() +@validate_custom_event_count(count=0) +def test_bedrock_chat_completion_outside_txn(exercise_model): + add_custom_attribute("llm.conversation_id", "my-awesome-id") + message = [{"role": "user", "content": [{"text": "What is 212 degrees Fahrenheit converted to Celsius?"}]}] + exercise_model(message) + + +@disabled_ai_monitoring_settings +@reset_core_stats_engine() +@validate_custom_event_count(count=0) +@background_task(name="test_bedrock_chat_completion_disabled_ai_monitoring_settings") +def test_bedrock_chat_completion_disabled_ai_monitoring_settings(set_trace_info, exercise_model): + set_trace_info() + message = [{"role": "user", "content": [{"text": "What is 212 degrees Fahrenheit converted to Celsius?"}]}] + exercise_model(message) + + +chat_completion_invalid_access_key_error_events = [ + ( + {"type": "LlmChatCompletionSummary"}, + { + "id": None, # UUID that varies with each run + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "span_id": None, + "trace_id": "trace-id", + "request_id": "e1206e19-2318-4a9d-be98-017c73f06118", + "duration": None, # Response time varies each test run + "request.model": "anthropic.claude-3-sonnet-20240229-v1:0", + "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", + "request.temperature": 0.7, + "request.max_tokens": 100, + "vendor": "bedrock", + "ingest_source": "Python", + "response.number_of_messages": 1, + "error": True, + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": None, # UUID that varies with each run + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "request_id": "e1206e19-2318-4a9d-be98-017c73f06118", + "span_id": None, + "trace_id": "trace-id", + "content": "Invalid Token", + "role": "user", + "completion_id": None, + "sequence": 0, + "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", + "vendor": "bedrock", + "ingest_source": "Python", + }, + ), +] + +_client_error = botocore.exceptions.ClientError +_client_error_name = callable_name(_client_error) + + +@reset_core_stats_engine() +def test_bedrock_chat_completion_error_incorrect_access_key( + loop, monkeypatch, bedrock_converse_server, exercise_model, set_trace_info +): + """ + A request is made to the server with invalid credentials. botocore will reach out to the server and receive an + UnrecognizedClientException as a response. Information from the request will be parsed and reported in customer + events. The error response can also be parsed, and will be included as attributes on the recorded exception. + """ + + @validate_custom_events(chat_completion_invalid_access_key_error_events) + @validate_error_trace_attributes( + _client_error_name, + exact_attrs={ + "agent": {}, + "intrinsic": {}, + "user": { + "http.statusCode": 403, + "error.message": "The security token included in the request is invalid.", + "error.code": "UnrecognizedClientException", + }, + }, + ) + @validate_transaction_metrics( + name="test_bedrock_chat_completion", + scoped_metrics=[("Llm/completion/Bedrock/converse", 1)], + rollup_metrics=[("Llm/completion/Bedrock/converse", 1)], + custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], + background_task=True, + ) + @background_task(name="test_bedrock_chat_completion") + def _test(): + set_trace_info() + add_custom_attribute("llm.conversation_id", "my-awesome-id") + add_custom_attribute("llm.foo", "bar") + add_custom_attribute("non_llm_attr", "python-agent") + + converse_incorrect_access_key(loop, bedrock_converse_server, monkeypatch) + + _test() + + +def converse_incorrect_access_key(loop, bedrock_converse_server, monkeypatch): + async def _coro(): + monkeypatch.setattr(bedrock_converse_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY") + + with pytest.raises(_client_error): + message = [{"role": "user", "content": [{"text": "Invalid Token"}]}] + response = await bedrock_converse_server.converse( + modelId="anthropic.claude-3-sonnet-20240229-v1:0", + messages=message, + inferenceConfig={"temperature": 0.7, "maxTokens": 100}, + ) + assert response + + loop.run_until_complete(_coro()) + + +chat_completion_invalid_model_error_events = [ + ( + {"type": "LlmChatCompletionSummary"}, + { + "id": None, # UUID that varies with each run + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "request_id": "f4908827-3db9-4742-9103-2bbc34578b03", + "span_id": None, + "trace_id": "trace-id", + "duration": None, # Response time varies each test run + "request.model": "does-not-exist", + "response.model": "does-not-exist", + "request.temperature": 0.7, + "request.max_tokens": 100, + "response.number_of_messages": 1, + "vendor": "bedrock", + "ingest_source": "Python", + "error": True, + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": None, + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "span_id": None, + "trace_id": "trace-id", + "request_id": "f4908827-3db9-4742-9103-2bbc34578b03", + "content": "Model does not exist.", + "role": "user", + "completion_id": None, + "response.model": "does-not-exist", + "sequence": 0, + "vendor": "bedrock", + "ingest_source": "Python", + }, + ), +] + + +@reset_core_stats_engine() +def test_bedrock_chat_completion_error_invalid_model(loop, bedrock_converse_server, set_trace_info): + @validate_custom_events(chat_completion_invalid_model_error_events) + @validate_error_trace_attributes( + "botocore.errorfactory:ValidationException", + exact_attrs={ + "agent": {}, + "intrinsic": {}, + "user": { + "http.statusCode": 400, + "error.message": "The provided model identifier is invalid.", + "error.code": "ValidationException", + }, + }, + ) + @validate_transaction_metrics( + name="test_bedrock_chat_completion_error_invalid_model", + scoped_metrics=[("Llm/completion/Bedrock/converse", 1)], + rollup_metrics=[("Llm/completion/Bedrock/converse", 1)], + custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], + background_task=True, + ) + @background_task(name="test_bedrock_chat_completion_error_invalid_model") + def _test(): + set_trace_info() + add_custom_attribute("llm.conversation_id", "my-awesome-id") + add_custom_attribute("llm.foo", "bar") + add_custom_attribute("non_llm_attr", "python-agent") + + converse_invalid_model(loop, bedrock_converse_server) + + _test() + + +def converse_invalid_model(loop, bedrock_converse_server): + async def _coro(): + with pytest.raises(_client_error): + message = [{"role": "user", "content": [{"text": "Model does not exist."}]}] + + response = await bedrock_converse_server.converse( + modelId="does-not-exist", messages=message, inferenceConfig={"temperature": 0.7, "maxTokens": 100} + ) + + assert response + + loop.run_until_complete(_coro()) + + +@reset_core_stats_engine() +@disabled_ai_monitoring_record_content_settings +def test_bedrock_chat_completion_error_invalid_model_no_content(loop, bedrock_converse_server, set_trace_info): + @validate_custom_events(events_sans_content(chat_completion_invalid_model_error_events)) + @validate_error_trace_attributes( + "botocore.errorfactory:ValidationException", + exact_attrs={ + "agent": {}, + "intrinsic": {}, + "user": { + "http.statusCode": 400, + "error.message": "The provided model identifier is invalid.", + "error.code": "ValidationException", + }, + }, + ) + @validate_transaction_metrics( + name="test_bedrock_chat_completion_error_invalid_model_no_content", + scoped_metrics=[("Llm/completion/Bedrock/converse", 1)], + rollup_metrics=[("Llm/completion/Bedrock/converse", 1)], + custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], + background_task=True, + ) + @background_task(name="test_bedrock_chat_completion_error_invalid_model_no_content") + def _test(): + set_trace_info() + add_custom_attribute("llm.conversation_id", "my-awesome-id") + add_custom_attribute("llm.foo", "bar") + add_custom_attribute("non_llm_attr", "python-agent") + + converse_invalid_model(loop, bedrock_converse_server) + + _test() + + +@reset_core_stats_engine() +@override_llm_token_callback_settings(llm_token_count_callback) +def test_bedrock_chat_completion_error_incorrect_access_key_with_token_count( + monkeypatch, bedrock_converse_server, loop, set_trace_info +): + """ + A request is made to the server with invalid credentials. botocore will reach out to the server and receive an + UnrecognizedClientException as a response. Information from the request will be parsed and reported in customer + events. The error response can also be parsed, and will be included as attributes on the recorded exception. + """ + + @validate_custom_events(add_token_count_to_events(chat_completion_invalid_access_key_error_events)) + @validate_error_trace_attributes( + _client_error_name, + exact_attrs={ + "agent": {}, + "intrinsic": {}, + "user": { + "http.statusCode": 403, + "error.message": "The security token included in the request is invalid.", + "error.code": "UnrecognizedClientException", + }, + }, + ) + @validate_transaction_metrics( + name="test_bedrock_chat_completion_incorrect_access_key_with_token_count", + scoped_metrics=[("Llm/completion/Bedrock/converse", 1)], + rollup_metrics=[("Llm/completion/Bedrock/converse", 1)], + custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], + background_task=True, + ) + @background_task(name="test_bedrock_chat_completion_incorrect_access_key_with_token_count") + def _test(): + set_trace_info() + add_custom_attribute("llm.conversation_id", "my-awesome-id") + add_custom_attribute("llm.foo", "bar") + add_custom_attribute("non_llm_attr", "python-agent") + + converse_incorrect_access_key(loop, bedrock_converse_server, monkeypatch) + + _test() diff --git a/tests/external_aiobotocore/test_bedrock_chat_completion.py b/tests/external_aiobotocore/test_bedrock_chat_completion_invoke_model.py similarity index 100% rename from tests/external_aiobotocore/test_bedrock_chat_completion.py rename to tests/external_aiobotocore/test_bedrock_chat_completion_invoke_model.py diff --git a/tests/external_botocore/_mock_external_bedrock_server_converse.py b/tests/external_botocore/_mock_external_bedrock_server_converse.py new file mode 100644 index 000000000..beba15691 --- /dev/null +++ b/tests/external_botocore/_mock_external_bedrock_server_converse.py @@ -0,0 +1,120 @@ +# Copyright 2010 New Relic, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json + +from testing_support.mock_external_http_server import MockExternalHTTPServer + +RESPONSES = { + "What is 212 degrees Fahrenheit converted to Celsius?": [ + {"Content-Type": "application/json", "x-amzn-RequestId": "c20d345e-6878-4778-b674-6b187bae8ecf"}, + 200, + { + "metrics": {"latencyMs": 1866}, + "output": { + "message": { + "content": [ + { + "text": "To convert 212°F to Celsius, we can use the formula:\n\nC = (F - 32) × 5/9\n\nWhere:\nC is the temperature in Celsius\nF is the temperature in Fahrenheit\n\nPlugging in 212°F, we get:\n\nC = (212 - 32) × 5/9\nC = 180 × 5/9\nC = 100\n\nTherefore, 212°" + } + ], + "role": "assistant", + } + }, + "stopReason": "max_tokens", + "usage": {"inputTokens": 26, "outputTokens": 100, "totalTokens": 126}, + }, + ], + "Invalid Token": [ + { + "Content-Type": "application/json", + "x-amzn-RequestId": "e1206e19-2318-4a9d-be98-017c73f06118", + "x-amzn-ErrorType": "UnrecognizedClientException:http://internal.amazon.com/coral/com.amazon.coral.service/", + }, + 403, + {"message": "The security token included in the request is invalid."}, + ], + "Model does not exist.": [ + { + "Content-Type": "application/json", + "x-amzn-RequestId": "f4908827-3db9-4742-9103-2bbc34578b03", + "x-amzn-ErrorType": "ValidationException:http://internal.amazon.com/coral/com.amazon.bedrock/", + }, + 400, + {"message": "The provided model identifier is invalid."}, + ], +} + + +def simple_get(self): + content_len = int(self.headers.get("content-length")) + body = self.rfile.read(content_len).decode("utf-8") + try: + content = json.loads(body) + except Exception: + content = body + + prompt = extract_shortened_prompt_converse(content) + if not prompt: + self.send_response(500) + self.end_headers() + self.wfile.write(b"Could not parse prompt.") + return + + headers, status_code, response = ({}, 0, "") + + for k, v in RESPONSES.items(): + if prompt.startswith(k): + headers, status_code, response = v + break + + if not response: + # If no matches found + self.send_response(500) + self.end_headers() + self.wfile.write(f"Unknown Prompt:\n{prompt}".encode()) + return + + # Send response code + self.send_response(status_code) + + # Send headers + for k, v in headers.items(): + self.send_header(k, v) + self.end_headers() + + # Send response body + response_body = json.dumps(response).encode("utf-8") + + self.wfile.write(response_body) + return + + +def extract_shortened_prompt_converse(content): + try: + prompt = content["messages"][0].get("content")[0].get("text", None) + # Sometimes there are leading whitespaces in the prompt. + prompt = prompt.lstrip().split("\n")[0] + except Exception: + prompt = "" + return prompt + + +class MockExternalBedrockConverseServer(MockExternalHTTPServer): + # To use this class in a test one needs to start and stop this server + # before and after making requests to the test app that makes the external + # calls. + + def __init__(self, handler=simple_get, port=None, *args, **kwargs): + super().__init__(handler=handler, port=port, *args, **kwargs) # noqa: B026 diff --git a/tests/external_botocore/_mock_external_bedrock_server.py b/tests/external_botocore/_mock_external_bedrock_server_invoke_model.py similarity index 100% rename from tests/external_botocore/_mock_external_bedrock_server.py rename to tests/external_botocore/_mock_external_bedrock_server_invoke_model.py diff --git a/tests/external_botocore/conftest.py b/tests/external_botocore/conftest.py index b5e6b7b32..b561dd9a7 100644 --- a/tests/external_botocore/conftest.py +++ b/tests/external_botocore/conftest.py @@ -19,7 +19,8 @@ from pathlib import Path import pytest -from _mock_external_bedrock_server import MockExternalBedrockServer, extract_shortened_prompt +from _mock_external_bedrock_server_invoke_model import MockExternalBedrockServer, extract_shortened_prompt +from _mock_external_bedrock_server_converse import MockExternalBedrockConverseServer, extract_shortened_prompt_converse from botocore.response import StreamingBody from testing_support.fixtures import ( collector_agent_registration_fixture, @@ -34,7 +35,7 @@ BOTOCORE_VERSION = get_package_version("botocore") _default_settings = { - "package_reporting.enabled": False, # Turn off package reporting for testing as it causes slow downs. + "package_reporting.enabled": False, # Turn off package reporting for testing as it causes slowdowns. "transaction_tracer.explain_threshold": 0.0, "transaction_tracer.transaction_threshold": 0.0, "transaction_tracer.stack_trace_threshold": 0.0, @@ -54,6 +55,9 @@ BEDROCK_AUDIT_LOG_FILE = Path(__file__).parent / "bedrock_audit.log" BEDROCK_AUDIT_LOG_CONTENTS = {} +BEDROCK_CONVERSE_AUDIT_LOG_FILE = Path(__file__).parent / "bedrock_audit_converse.log" +BEDROCK_CONVERSE_AUDIT_LOG_CONTENTS = {} + @pytest.fixture(scope="session") def bedrock_server(): @@ -103,6 +107,58 @@ def bedrock_server(): json.dump(bedrock_audit_log_contents, fp=audit_log_fp, indent=4) +@pytest.fixture(scope="session") +def bedrock_converse_server(): + """ + This fixture will either create a mocked backend for testing purposes, or will + set up an audit log file to log responses of the real Bedrock backend to a file. + The behavior can be controlled by setting NEW_RELIC_TESTING_RECORD_BEDROCK_RESPONSES=1 as + an environment variable to run using the real Bedrock backend. (Default: mocking) + """ + import boto3 + + from newrelic.core.config import _environ_as_bool + + if get_package_version_tuple("botocore") < (1, 31, 57): + pytest.skip(reason="Bedrock Runtime not available.") + + if not _environ_as_bool("NEW_RELIC_TESTING_RECORD_BEDROCK_RESPONSES", False): + # Use mocked Bedrock backend and prerecorded responses + with MockExternalBedrockConverseServer() as server: + client = boto3.client( + "bedrock-runtime", + "us-east-1", + endpoint_url=f"http://localhost:{server.port}", + aws_access_key_id="NOT-A-REAL-SECRET", + aws_secret_access_key="NOT-A-REAL-SECRET", + ) + + yield client + else: + # Use real Bedrock backend and record responses + assert os.environ["AWS_ACCESS_KEY_ID"], "AWS_ACCESS_KEY_ID is required." + assert os.environ["AWS_SECRET_ACCESS_KEY"], "AWS_SECRET_ACCESS_KEY is required." + + # Construct real client + client = boto3.client("bedrock-runtime", "us-east-1") + + # Apply function wrappers to record data + wrap_function_wrapper( + "botocore.endpoint", "Endpoint._do_get_response", wrap_botocore_endpoint_Endpoint__do_get_response_converse + ) + wrap_function_wrapper( + "botocore.eventstream", "EventStreamBuffer.add_data", wrap_botocore_eventstream_add_data_converse + ) + yield client # Run tests + + # Write responses to audit log + bedrock_audit_log_contents = dict( + sorted(BEDROCK_CONVERSE_AUDIT_LOG_CONTENTS.items(), key=lambda i: (i[1][1], i[0])) + ) + with open(BEDROCK_CONVERSE_AUDIT_LOG_FILE, "w") as audit_log_fp: + json.dump(bedrock_audit_log_contents, fp=audit_log_fp, indent=4) + + # Intercept outgoing requests and log to file for mocking RECORDED_HEADERS = {"x-amzn-requestid", "x-amzn-errortype", "content-type"} @@ -153,6 +209,45 @@ def wrap_botocore_endpoint_Endpoint__do_get_response(wrapped, instance, args, kw return result +def wrap_botocore_endpoint_Endpoint__do_get_response_converse(wrapped, instance, args, kwargs): + request = bind__do_get_response(*args, **kwargs) + + if not request: + return wrapped(*args, **kwargs) + + # Send request + result = wrapped(*args, **kwargs) + # Unpack response + success, exception = result + response = (success or exception)[0] + + body = request.body + + try: + content = json.loads(body) + except Exception: + content = body.decode("utf-8") + + prompt = extract_shortened_prompt_converse(content) + headers = dict(response.headers.items()) + headers = dict( + filter(lambda k: k[0].lower() in RECORDED_HEADERS or k[0].startswith("x-ratelimit"), headers.items()) + ) + status_code = response.status_code + + # Log response + if response.raw.chunked: + # Log response + BEDROCK_CONVERSE_AUDIT_LOG_CONTENTS[prompt] = headers, status_code, [] # Append response data to audit log + else: + # Clean up data + response_content = response.content + data = json.loads(response_content.decode("utf-8")) + BEDROCK_CONVERSE_AUDIT_LOG_CONTENTS[prompt] = headers, status_code, data # Append response data to audit log + + return result + + def bind__do_get_response(request, operation_model, context): return request @@ -163,3 +258,11 @@ def wrap_botocore_eventstream_add_data(wrapped, instance, args, kwargs): prompt = list(BEDROCK_AUDIT_LOG_CONTENTS.keys())[-1] BEDROCK_AUDIT_LOG_CONTENTS[prompt][2].append(data) return wrapped(*args, **kwargs) + + +def wrap_botocore_eventstream_add_data_converse(wrapped, instance, args, kwargs): + bound_args = bind_args(wrapped, args, kwargs) + data = bound_args["data"].hex() # convert bytes to hex for storage + prompt = list(BEDROCK_CONVERSE_AUDIT_LOG_CONTENTS.keys())[-1] + BEDROCK_CONVERSE_AUDIT_LOG_CONTENTS[prompt][2].append(data) + return wrapped(*args, **kwargs) diff --git a/tests/external_botocore/test_bedrock_chat_completion.py b/tests/external_botocore/test_bedrock_chat_completion_invoke_model.py similarity index 100% rename from tests/external_botocore/test_bedrock_chat_completion.py rename to tests/external_botocore/test_bedrock_chat_completion_invoke_model.py diff --git a/tests/external_botocore/test_chat_completion_converse.py b/tests/external_botocore/test_chat_completion_converse.py new file mode 100644 index 000000000..6e7025a56 --- /dev/null +++ b/tests/external_botocore/test_chat_completion_converse.py @@ -0,0 +1,525 @@ +# Copyright 2010 New Relic, Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import botocore.exceptions +import pytest +from conftest import BOTOCORE_VERSION +from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes +from testing_support.ml_testing_utils import ( + add_token_count_to_events, + disabled_ai_monitoring_record_content_settings, + disabled_ai_monitoring_settings, + events_sans_content, + events_sans_llm_metadata, + events_with_context_attrs, + llm_token_count_callback, + set_trace_info, +) +from testing_support.validators.validate_custom_event import validate_custom_event_count +from testing_support.validators.validate_custom_events import validate_custom_events +from testing_support.validators.validate_error_trace_attributes import validate_error_trace_attributes +from testing_support.validators.validate_transaction_metrics import validate_transaction_metrics + +from newrelic.api.background_task import background_task +from newrelic.api.llm_custom_attributes import WithLlmCustomAttributes +from newrelic.api.transaction import add_custom_attribute +from newrelic.common.object_names import callable_name + + +chat_completion_expected_events = [ + ( + {"type": "LlmChatCompletionSummary"}, + { + "id": None, # UUID that varies with each run + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "span_id": None, + "trace_id": "trace-id", + "request_id": "c20d345e-6878-4778-b674-6b187bae8ecf", + "duration": None, # Response time varies each test run + "request.model": "anthropic.claude-3-sonnet-20240229-v1:0", + "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", + "request.temperature": 0.7, + "request.max_tokens": 100, + "response.choices.finish_reason": "max_tokens", + "vendor": "bedrock", + "ingest_source": "Python", + "response.number_of_messages": 3, + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": None, # UUID that varies with each run + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "request_id": "c20d345e-6878-4778-b674-6b187bae8ecf", + "span_id": None, + "trace_id": "trace-id", + "content": "You are a scientist.", + "role": "system", + "completion_id": None, + "sequence": 0, + "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", + "vendor": "bedrock", + "ingest_source": "Python", + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": None, # UUID that varies with each run + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "request_id": "c20d345e-6878-4778-b674-6b187bae8ecf", + "span_id": None, + "trace_id": "trace-id", + "content": "What is 212 degrees Fahrenheit converted to Celsius?", + "role": "user", + "completion_id": None, + "sequence": 1, + "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", + "vendor": "bedrock", + "ingest_source": "Python", + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": None, # UUID that varies with each run + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "request_id": "c20d345e-6878-4778-b674-6b187bae8ecf", + "span_id": None, + "trace_id": "trace-id", + "content": "To convert 212°F to Celsius, we can use the formula:\n\nC = (F - 32) × 5/9\n\nWhere:\nC is the temperature in Celsius\nF is the temperature in Fahrenheit\n\nPlugging in 212°F, we get:\n\nC = (212 - 32) × 5/9\nC = 180 × 5/9\nC = 100\n\nTherefore, 212°", + "role": "assistant", + "completion_id": None, + "sequence": 2, + "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", + "vendor": "bedrock", + "ingest_source": "Python", + "is_response": True, + }, + ), +] + + +@pytest.fixture(scope="module") +def exercise_model(bedrock_converse_server): + def _exercise_model(message): + inference_config = {"temperature": 0.7, "maxTokens": 100} + + response = bedrock_converse_server.converse( + modelId="anthropic.claude-3-sonnet-20240229-v1:0", + messages=message, + system=[{"text": "You are a scientist."}], + inferenceConfig=inference_config, + ) + + return _exercise_model + + +@reset_core_stats_engine() +def test_bedrock_chat_completion_in_txn_with_llm_metadata(set_trace_info, exercise_model): + @validate_custom_events(events_with_context_attrs(chat_completion_expected_events)) + # One summary event, one user message, and one response message from the assistant + @validate_custom_event_count(count=4) + @validate_transaction_metrics( + name="test_bedrock_chat_completion_in_txn_with_llm_metadata", + scoped_metrics=[("Llm/completion/Bedrock/converse", 1)], + rollup_metrics=[("Llm/completion/Bedrock/converse", 1)], + custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], + background_task=True, + ) + @validate_attributes("agent", ["llm"]) + @background_task(name="test_bedrock_chat_completion_in_txn_with_llm_metadata") + def _test(): + set_trace_info() + add_custom_attribute("llm.conversation_id", "my-awesome-id") + add_custom_attribute("llm.foo", "bar") + add_custom_attribute("non_llm_attr", "python-agent") + with WithLlmCustomAttributes({"context": "attr"}): + message = [{"role": "user", "content": [{"text": "What is 212 degrees Fahrenheit converted to Celsius?"}]}] + exercise_model(message) + + _test() + + +@disabled_ai_monitoring_record_content_settings +@reset_core_stats_engine() +def test_bedrock_chat_completion_no_content(set_trace_info, exercise_model): + @validate_custom_events(events_sans_content(chat_completion_expected_events)) + # One summary event, one user message, and one response message from the assistant + @validate_custom_event_count(count=4) + @validate_transaction_metrics( + name="test_bedrock_chat_completion_no_content", + scoped_metrics=[("Llm/completion/Bedrock/converse", 1)], + rollup_metrics=[("Llm/completion/Bedrock/converse", 1)], + custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], + background_task=True, + ) + @validate_attributes("agent", ["llm"]) + @background_task(name="test_bedrock_chat_completion_no_content") + def _test(): + set_trace_info() + add_custom_attribute("llm.conversation_id", "my-awesome-id") + add_custom_attribute("llm.foo", "bar") + add_custom_attribute("non_llm_attr", "python-agent") + message = [{"role": "user", "content": [{"text": "What is 212 degrees Fahrenheit converted to Celsius?"}]}] + exercise_model(message) + + _test() + + +@reset_core_stats_engine() +@override_llm_token_callback_settings(llm_token_count_callback) +def test_bedrock_chat_completion_with_token_count(set_trace_info, exercise_model): + @validate_custom_events(add_token_count_to_events(chat_completion_expected_events)) + # One summary event, one user message, and one response message from the assistant + @validate_custom_event_count(count=4) + @validate_transaction_metrics( + name="test_bedrock_chat_completion_with_token_count", + scoped_metrics=[("Llm/completion/Bedrock/converse", 1)], + rollup_metrics=[("Llm/completion/Bedrock/converse", 1)], + custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], + background_task=True, + ) + @validate_attributes("agent", ["llm"]) + @background_task(name="test_bedrock_chat_completion_with_token_count") + def _test(): + set_trace_info() + add_custom_attribute("llm.conversation_id", "my-awesome-id") + add_custom_attribute("llm.foo", "bar") + add_custom_attribute("non_llm_attr", "python-agent") + message = [{"role": "user", "content": [{"text": "What is 212 degrees Fahrenheit converted to Celsius?"}]}] + exercise_model(message) + + _test() + + +@reset_core_stats_engine() +def test_bedrock_chat_completion_no_llm_metadata(set_trace_info, exercise_model): + @validate_custom_events(events_sans_llm_metadata(chat_completion_expected_events)) + @validate_custom_event_count(count=4) + @validate_transaction_metrics( + name="test_bedrock_chat_completion_in_txn_no_llm_metadata", + scoped_metrics=[("Llm/completion/Bedrock/converse", 1)], + rollup_metrics=[("Llm/completion/Bedrock/converse", 1)], + custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], + background_task=True, + ) + @background_task(name="test_bedrock_chat_completion_in_txn_no_llm_metadata") + def _test(): + set_trace_info() + message = [{"role": "user", "content": [{"text": "What is 212 degrees Fahrenheit converted to Celsius?"}]}] + exercise_model(message) + + _test() + + +@reset_core_stats_engine() +@validate_custom_event_count(count=0) +def test_bedrock_chat_completion_outside_txn(exercise_model): + add_custom_attribute("llm.conversation_id", "my-awesome-id") + message = [{"role": "user", "content": [{"text": "What is 212 degrees Fahrenheit converted to Celsius?"}]}] + exercise_model(message) + + +@disabled_ai_monitoring_settings +@reset_core_stats_engine() +@validate_custom_event_count(count=0) +@background_task(name="test_bedrock_chat_completion_disabled_ai_monitoring_settings") +def test_bedrock_chat_completion_disabled_ai_monitoring_settings(set_trace_info, exercise_model): + set_trace_info() + message = [{"role": "user", "content": [{"text": "What is 212 degrees Fahrenheit converted to Celsius?"}]}] + exercise_model(message) + + +chat_completion_invalid_access_key_error_events = [ + ( + {"type": "LlmChatCompletionSummary"}, + { + "id": None, # UUID that varies with each run + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "span_id": None, + "trace_id": "trace-id", + "request_id": "e1206e19-2318-4a9d-be98-017c73f06118", + "duration": None, # Response time varies each test run + "request.model": "anthropic.claude-3-sonnet-20240229-v1:0", + "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", + "request.temperature": 0.7, + "request.max_tokens": 100, + "vendor": "bedrock", + "ingest_source": "Python", + "response.number_of_messages": 1, + "error": True, + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": None, # UUID that varies with each run + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "request_id": "e1206e19-2318-4a9d-be98-017c73f06118", + "span_id": None, + "trace_id": "trace-id", + "content": "Invalid Token", + "role": "user", + "completion_id": None, + "sequence": 0, + "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", + "vendor": "bedrock", + "ingest_source": "Python", + }, + ), +] + +_client_error = botocore.exceptions.ClientError +_client_error_name = callable_name(_client_error) + + +@reset_core_stats_engine() +def test_bedrock_chat_completion_error_incorrect_access_key( + monkeypatch, bedrock_converse_server, exercise_model, set_trace_info +): + """ + A request is made to the server with invalid credentials. botocore will reach out to the server and receive an + UnrecognizedClientException as a response. Information from the request will be parsed and reported in customer + events. The error response can also be parsed, and will be included as attributes on the recorded exception. + """ + + @validate_custom_events(chat_completion_invalid_access_key_error_events) + @validate_error_trace_attributes( + _client_error_name, + exact_attrs={ + "agent": {}, + "intrinsic": {}, + "user": { + "http.statusCode": 403, + "error.message": "The security token included in the request is invalid.", + "error.code": "UnrecognizedClientException", + }, + }, + ) + @validate_transaction_metrics( + name="test_bedrock_chat_completion", + scoped_metrics=[("Llm/completion/Bedrock/converse", 1)], + rollup_metrics=[("Llm/completion/Bedrock/converse", 1)], + custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], + background_task=True, + ) + @background_task(name="test_bedrock_chat_completion") + def _test(): + monkeypatch.setattr(bedrock_converse_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY") + + with pytest.raises(_client_error): + set_trace_info() + add_custom_attribute("llm.conversation_id", "my-awesome-id") + add_custom_attribute("llm.foo", "bar") + add_custom_attribute("non_llm_attr", "python-agent") + + message = [{"role": "user", "content": [{"text": "Invalid Token"}]}] + + response = bedrock_converse_server.converse( + modelId="anthropic.claude-3-sonnet-20240229-v1:0", + messages=message, + inferenceConfig={"temperature": 0.7, "maxTokens": 100}, + ) + + assert response + + _test() + + +chat_completion_invalid_model_error_events = [ + ( + {"type": "LlmChatCompletionSummary"}, + { + "id": None, # UUID that varies with each run + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "request_id": "f4908827-3db9-4742-9103-2bbc34578b03", + "span_id": None, + "trace_id": "trace-id", + "duration": None, # Response time varies each test run + "request.model": "does-not-exist", + "response.model": "does-not-exist", + "request.temperature": 0.7, + "request.max_tokens": 100, + "response.number_of_messages": 1, + "vendor": "bedrock", + "ingest_source": "Python", + "error": True, + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": None, + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "span_id": None, + "trace_id": "trace-id", + "request_id": "f4908827-3db9-4742-9103-2bbc34578b03", + "content": "Model does not exist.", + "role": "user", + "completion_id": None, + "response.model": "does-not-exist", + "sequence": 0, + "vendor": "bedrock", + "ingest_source": "Python", + }, + ), +] + + +@reset_core_stats_engine() +def test_bedrock_chat_completion_error_invalid_model(bedrock_converse_server, set_trace_info): + @validate_custom_events(events_with_context_attrs(chat_completion_invalid_model_error_events)) + @validate_error_trace_attributes( + "botocore.errorfactory:ValidationException", + exact_attrs={ + "agent": {}, + "intrinsic": {}, + "user": { + "http.statusCode": 400, + "error.message": "The provided model identifier is invalid.", + "error.code": "ValidationException", + }, + }, + ) + @validate_transaction_metrics( + name="test_bedrock_chat_completion_error_invalid_model", + scoped_metrics=[("Llm/completion/Bedrock/converse", 1)], + rollup_metrics=[("Llm/completion/Bedrock/converse", 1)], + custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], + background_task=True, + ) + @background_task(name="test_bedrock_chat_completion_error_invalid_model") + def _test(): + set_trace_info() + add_custom_attribute("llm.conversation_id", "my-awesome-id") + add_custom_attribute("llm.foo", "bar") + add_custom_attribute("non_llm_attr", "python-agent") + + with pytest.raises(_client_error): + with WithLlmCustomAttributes({"context": "attr"}): + message = [{"role": "user", "content": [{"text": "Model does not exist."}]}] + + response = bedrock_converse_server.converse( + modelId="does-not-exist", messages=message, inferenceConfig={"temperature": 0.7, "maxTokens": 100} + ) + + assert response + + _test() + + +@reset_core_stats_engine() +@disabled_ai_monitoring_record_content_settings +def test_bedrock_chat_completion_error_invalid_model_no_content(bedrock_converse_server, set_trace_info): + @validate_custom_events(events_sans_content(chat_completion_invalid_model_error_events)) + @validate_error_trace_attributes( + "botocore.errorfactory:ValidationException", + exact_attrs={ + "agent": {}, + "intrinsic": {}, + "user": { + "http.statusCode": 400, + "error.message": "The provided model identifier is invalid.", + "error.code": "ValidationException", + }, + }, + ) + @validate_transaction_metrics( + name="test_bedrock_chat_completion_error_invalid_model_no_content", + scoped_metrics=[("Llm/completion/Bedrock/converse", 1)], + rollup_metrics=[("Llm/completion/Bedrock/converse", 1)], + custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], + background_task=True, + ) + @background_task(name="test_bedrock_chat_completion_error_invalid_model_no_content") + def _test(): + set_trace_info() + add_custom_attribute("llm.conversation_id", "my-awesome-id") + add_custom_attribute("llm.foo", "bar") + add_custom_attribute("non_llm_attr", "python-agent") + + with pytest.raises(_client_error): + message = [{"role": "user", "content": [{"text": "Model does not exist."}]}] + + response = bedrock_converse_server.converse( + modelId="does-not-exist", messages=message, inferenceConfig={"temperature": 0.7, "maxTokens": 100} + ) + + assert response + + _test() + + +@reset_core_stats_engine() +@override_llm_token_callback_settings(llm_token_count_callback) +def test_bedrock_chat_completion_error_incorrect_access_key_with_token_count( + monkeypatch, bedrock_converse_server, exercise_model, set_trace_info +): + """ + A request is made to the server with invalid credentials. botocore will reach out to the server and receive an + UnrecognizedClientException as a response. Information from the request will be parsed and reported in customer + events. The error response can also be parsed, and will be included as attributes on the recorded exception. + """ + + @validate_custom_events(add_token_count_to_events(chat_completion_invalid_access_key_error_events)) + @validate_error_trace_attributes( + _client_error_name, + exact_attrs={ + "agent": {}, + "intrinsic": {}, + "user": { + "http.statusCode": 403, + "error.message": "The security token included in the request is invalid.", + "error.code": "UnrecognizedClientException", + }, + }, + ) + @validate_transaction_metrics( + name="test_bedrock_chat_completion_incorrect_access_key_with_token_count", + scoped_metrics=[("Llm/completion/Bedrock/converse", 1)], + rollup_metrics=[("Llm/completion/Bedrock/converse", 1)], + custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], + background_task=True, + ) + @background_task(name="test_bedrock_chat_completion_incorrect_access_key_with_token_count") + def _test(): + monkeypatch.setattr(bedrock_converse_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY") + + with pytest.raises(_client_error): + set_trace_info() + add_custom_attribute("llm.conversation_id", "my-awesome-id") + add_custom_attribute("llm.foo", "bar") + add_custom_attribute("non_llm_attr", "python-agent") + + message = [{"role": "user", "content": [{"text": "Invalid Token"}]}] + + response = bedrock_converse_server.converse( + modelId="anthropic.claude-3-sonnet-20240229-v1:0", + messages=message, + inferenceConfig={"temperature": 0.7, "maxTokens": 100}, + ) + + assert response + + _test()