diff --git a/newrelic/hooks/external_botocore.py b/newrelic/hooks/external_botocore.py index d8c18b49db..a11064ac36 100644 --- a/newrelic/hooks/external_botocore.py +++ b/newrelic/hooks/external_botocore.py @@ -192,6 +192,7 @@ def create_chat_completion_message_event( request_model, request_id, llm_metadata_dict, + all_token_counts, response_id=None, ): if not transaction: @@ -224,6 +225,8 @@ def create_chat_completion_message_event( "vendor": "bedrock", "ingest_source": "Python", } + if all_token_counts: + chat_completion_message_dict["token_count"] = 0 if settings.ai_monitoring.record_content.enabled: chat_completion_message_dict["content"] = content @@ -263,6 +266,8 @@ def create_chat_completion_message_event( "ingest_source": "Python", "is_response": True, } + if all_token_counts: + chat_completion_message_dict["token_count"] = 0 if settings.ai_monitoring.record_content.enabled: chat_completion_message_dict["content"] = content @@ -272,24 +277,21 @@ def create_chat_completion_message_event( transaction.record_custom_event("LlmChatCompletionMessage", chat_completion_message_dict) -def extract_bedrock_titan_text_model_request(request_body, bedrock_attrs): +def extract_bedrock_titan_embedding_model_request(request_body, bedrock_attrs): request_body = json.loads(request_body) - request_config = request_body.get("textGenerationConfig", {}) - input_message_list = [{"role": "user", "content": request_body.get("inputText")}] - - bedrock_attrs["input_message_list"] = input_message_list - bedrock_attrs["request.max_tokens"] = request_config.get("maxTokenCount") - bedrock_attrs["request.temperature"] = request_config.get("temperature") + bedrock_attrs["input"] = request_body.get("inputText") return bedrock_attrs -def extract_bedrock_mistral_text_model_request(request_body, bedrock_attrs): - request_body = json.loads(request_body) - bedrock_attrs["input_message_list"] = [{"role": "user", "content": request_body.get("prompt")}] - bedrock_attrs["request.max_tokens"] = request_body.get("max_tokens") - bedrock_attrs["request.temperature"] = request_body.get("temperature") +def extract_bedrock_titan_embedding_model_response(response_body, bedrock_attrs): + if response_body: + response_body = json.loads(response_body) + + input_tokens = response_body.get("inputTextTokenCount", 0) + bedrock_attrs["response.usage.total_tokens"] = input_tokens + return bedrock_attrs @@ -297,16 +299,31 @@ def extract_bedrock_titan_text_model_response(response_body, bedrock_attrs): if response_body: response_body = json.loads(response_body) + input_tokens = response_body.get("inputTextTokenCount", 0) + completion_tokens = sum(result.get("tokenCount", 0) for result in response_body.get("results", [])) + total_tokens = input_tokens + completion_tokens + output_message_list = [ - {"role": "assistant", "content": result["outputText"]} for result in response_body.get("results", []) + {"role": "assistant", "content": result.get("outputText")} for result in response_body.get("results", []) ] bedrock_attrs["response.choices.finish_reason"] = response_body["results"][0]["completionReason"] + bedrock_attrs["response.usage.completion_tokens"] = completion_tokens + bedrock_attrs["response.usage.prompt_tokens"] = input_tokens + bedrock_attrs["response.usage.total_tokens"] = total_tokens bedrock_attrs["output_message_list"] = output_message_list return bedrock_attrs +def extract_bedrock_mistral_text_model_request(request_body, bedrock_attrs): + request_body = json.loads(request_body) + bedrock_attrs["input_message_list"] = [{"role": "user", "content": request_body.get("prompt")}] + bedrock_attrs["request.max_tokens"] = request_body.get("max_tokens") + bedrock_attrs["request.temperature"] = request_body.get("temperature") + return bedrock_attrs + + def extract_bedrock_mistral_text_model_response(response_body, bedrock_attrs): if response_body: response_body = json.loads(response_body) @@ -319,17 +336,6 @@ def extract_bedrock_mistral_text_model_response(response_body, bedrock_attrs): return bedrock_attrs -def extract_bedrock_titan_text_model_streaming_response(response_body, bedrock_attrs): - if response_body: - if "outputText" in response_body: - bedrock_attrs["output_message_list"] = messages = bedrock_attrs.get("output_message_list", []) - messages.append({"role": "assistant", "content": response_body["outputText"]}) - - bedrock_attrs["response.choices.finish_reason"] = response_body.get("completionReason", None) - - return bedrock_attrs - - def extract_bedrock_mistral_text_model_streaming_response(response_body, bedrock_attrs): if response_body: outputs = response_body.get("outputs") @@ -342,10 +348,42 @@ def extract_bedrock_mistral_text_model_streaming_response(response_body, bedrock return bedrock_attrs -def extract_bedrock_titan_embedding_model_request(request_body, bedrock_attrs): +def extract_bedrock_titan_text_model_request(request_body, bedrock_attrs): request_body = json.loads(request_body) + request_config = request_body.get("textGenerationConfig", {}) - bedrock_attrs["input"] = request_body.get("inputText") + input_message_list = [{"role": "user", "content": request_body.get("inputText")}] + + bedrock_attrs["input_message_list"] = input_message_list + bedrock_attrs["request.max_tokens"] = request_config.get("maxTokenCount") + bedrock_attrs["request.temperature"] = request_config.get("temperature") + + return bedrock_attrs + + +def extract_bedrock_titan_text_model_streaming_response(response_body, bedrock_attrs): + if response_body: + if "outputText" in response_body: + bedrock_attrs["output_message_list"] = messages = bedrock_attrs.get("output_message_list", []) + messages.append({"role": "assistant", "content": response_body["outputText"]}) + + bedrock_attrs["response.choices.finish_reason"] = response_body.get("completionReason", None) + + # Extract token information + invocation_metrics = response_body.get("amazon-bedrock-invocationMetrics", {}) + prompt_tokens = invocation_metrics.get("inputTokenCount", 0) + completion_tokens = invocation_metrics.get("outputTokenCount", 0) + total_tokens = prompt_tokens + completion_tokens + + bedrock_attrs["response.usage.completion_tokens"] = ( + bedrock_attrs.get("response.usage.completion_tokens", 0) + completion_tokens + ) + bedrock_attrs["response.usage.prompt_tokens"] = ( + bedrock_attrs.get("response.usage.prompt_tokens", 0) + prompt_tokens + ) + bedrock_attrs["response.usage.total_tokens"] = ( + bedrock_attrs.get("response.usage.total_tokens", 0) + total_tokens + ) return bedrock_attrs @@ -409,6 +447,17 @@ def extract_bedrock_claude_model_response(response_body, bedrock_attrs): output_message_list = [{"role": role, "content": content}] bedrock_attrs["response.choices.finish_reason"] = response_body.get("stop_reason") bedrock_attrs["output_message_list"] = output_message_list + bedrock_attrs[""] = str(response_body.get("id")) + + # Extract token information + token_usage = response_body.get("usage", {}) + if token_usage: + prompt_tokens = token_usage.get("input_tokens", 0) + completion_tokens = token_usage.get("output_tokens", 0) + total_tokens = prompt_tokens + completion_tokens + bedrock_attrs["response.usage.prompt_tokens"] = prompt_tokens + bedrock_attrs["response.usage.completion_tokens"] = completion_tokens + bedrock_attrs["response.usage.total_tokens"] = total_tokens return bedrock_attrs @@ -420,6 +469,23 @@ def extract_bedrock_claude_model_streaming_response(response_body, bedrock_attrs bedrock_attrs["output_message_list"] = [{"role": "assistant", "content": ""}] bedrock_attrs["output_message_list"][0]["content"] += content bedrock_attrs["response.choices.finish_reason"] = response_body.get("stop_reason") + + # Extract token information + invocation_metrics = response_body.get("amazon-bedrock-invocationMetrics", {}) + prompt_tokens = invocation_metrics.get("inputTokenCount", 0) + completion_tokens = invocation_metrics.get("outputTokenCount", 0) + total_tokens = prompt_tokens + completion_tokens + + bedrock_attrs["response.usage.completion_tokens"] = ( + bedrock_attrs.get("response.usage.completion_tokens", 0) + completion_tokens + ) + bedrock_attrs["response.usage.prompt_tokens"] = ( + bedrock_attrs.get("response.usage.prompt_tokens", 0) + prompt_tokens + ) + bedrock_attrs["response.usage.total_tokens"] = ( + bedrock_attrs.get("response.usage.total_tokens", 0) + total_tokens + ) + return bedrock_attrs @@ -440,6 +506,13 @@ def extract_bedrock_llama_model_response(response_body, bedrock_attrs): response_body = json.loads(response_body) output_message_list = [{"role": "assistant", "content": response_body.get("generation")}] + prompt_tokens = response_body.get("prompt_token_count", 0) + completion_tokens = response_body.get("generation_token_count", 0) + total_tokens = prompt_tokens + completion_tokens + + bedrock_attrs["response.usage.completion_tokens"] = completion_tokens + bedrock_attrs["response.usage.prompt_tokens"] = prompt_tokens + bedrock_attrs["response.usage.total_tokens"] = total_tokens bedrock_attrs["response.choices.finish_reason"] = response_body.get("stop_reason") bedrock_attrs["output_message_list"] = output_message_list @@ -453,6 +526,22 @@ def extract_bedrock_llama_model_streaming_response(response_body, bedrock_attrs) bedrock_attrs["output_message_list"] = [{"role": "assistant", "content": ""}] bedrock_attrs["output_message_list"][0]["content"] += content bedrock_attrs["response.choices.finish_reason"] = response_body.get("stop_reason") + + # Extract token information + invocation_metrics = response_body.get("amazon-bedrock-invocationMetrics", {}) + prompt_tokens = invocation_metrics.get("inputTokenCount", 0) + completion_tokens = invocation_metrics.get("outputTokenCount", 0) + total_tokens = prompt_tokens + completion_tokens + + bedrock_attrs["response.usage.completion_tokens"] = ( + bedrock_attrs.get("response.usage.completion_tokens", 0) + completion_tokens + ) + bedrock_attrs["response.usage.prompt_tokens"] = ( + bedrock_attrs.get("response.usage.prompt_tokens", 0) + prompt_tokens + ) + bedrock_attrs["response.usage.total_tokens"] = ( + bedrock_attrs.get("response.usage.total_tokens", 0) + total_tokens + ) return bedrock_attrs @@ -493,12 +582,33 @@ def extract_bedrock_cohere_model_streaming_response(response_body, bedrock_attrs bedrock_attrs["response.choices.finish_reason"] = response_body["generations"][0]["finish_reason"] bedrock_attrs["response_id"] = str(response_body.get("id")) + # Extract token information + invocation_metrics = response_body.get("amazon-bedrock-invocationMetrics", {}) + prompt_tokens = invocation_metrics.get("inputTokenCount", 0) + completion_tokens = invocation_metrics.get("outputTokenCount", 0) + total_tokens = prompt_tokens + completion_tokens + + bedrock_attrs["response.usage.completion_tokens"] = ( + bedrock_attrs.get("response.usage.completion_tokens", 0) + completion_tokens + ) + bedrock_attrs["response.usage.prompt_tokens"] = ( + bedrock_attrs.get("response.usage.prompt_tokens", 0) + prompt_tokens + ) + bedrock_attrs["response.usage.total_tokens"] = ( + bedrock_attrs.get("response.usage.total_tokens", 0) + total_tokens + ) + return bedrock_attrs NULL_EXTRACTOR = lambda *args: {} # noqa: E731 # Empty extractor that returns nothing MODEL_EXTRACTORS = [ # Order is important here, avoiding dictionaries - ("amazon.titan-embed", extract_bedrock_titan_embedding_model_request, NULL_EXTRACTOR, NULL_EXTRACTOR), + ( + "amazon.titan-embed", + extract_bedrock_titan_embedding_model_request, + extract_bedrock_titan_embedding_model_response, + NULL_EXTRACTOR, + ), ("cohere.embed", extract_bedrock_cohere_embedding_model_request, NULL_EXTRACTOR, NULL_EXTRACTOR), ( "amazon.titan", @@ -801,6 +911,7 @@ def _wrap_bedrock_runtime_converse(wrapped, instance, args, kwargs): try: # For aioboto3 clients, this will call make_api_call instrumentation in external_aiobotocore response = wrapped(*args, **kwargs) + except Exception as exc: handle_bedrock_exception( exc, False, model, span_id, trace_id, request_extractor, {}, ft, transaction, kwargs, is_converse=True @@ -848,6 +959,10 @@ def extract_bedrock_converse_attrs(kwargs, response, response_headers, model, sp for result in response.get("output").get("message").get("content", []) ] + response_prompt_tokens = response.get("usage", {}).get("inputTokens") if response else None + response_completion_tokens = response.get("usage", {}).get("outputTokens") if response else None + response_total_tokens = response.get("usage", {}).get("totalTokens") if response else None + bedrock_attrs = { "request_id": response_headers.get("x-amzn-requestid"), "model": model, @@ -858,6 +973,9 @@ def extract_bedrock_converse_attrs(kwargs, response, response_headers, model, sp "request.max_tokens": kwargs.get("inferenceConfig", {}).get("maxTokens", None), "request.temperature": kwargs.get("inferenceConfig", {}).get("temperature", None), "input_message_list": input_message_list, + "response.usage.prompt_tokens": response_prompt_tokens, + "response.usage.completion_tokens": response_completion_tokens, + "response.usage.total_tokens": response_total_tokens, } return bedrock_attrs @@ -1014,23 +1132,28 @@ def handle_embedding_event(transaction, bedrock_attrs): model = bedrock_attrs.get("model", None) input_ = bedrock_attrs.get("input") + response_total_tokens = bedrock_attrs.get("response.usage.total_tokens", None) + + total_tokens = ( + settings.ai_monitoring.llm_token_count_callback(model, input_) + if settings.ai_monitoring.llm_token_count_callback and input_ + else response_total_tokens + ) + embedding_dict = { "vendor": "bedrock", "ingest_source": "Python", "id": embedding_id, "span_id": span_id, "trace_id": trace_id, - "token_count": ( - settings.ai_monitoring.llm_token_count_callback(model, input_) - if settings.ai_monitoring.llm_token_count_callback - else None - ), "request_id": request_id, "duration": bedrock_attrs.get("duration", None), "request.model": model, "response.model": model, + "response.usage.total_tokens": total_tokens, "error": bedrock_attrs.get("error", None), } + embedding_dict.update(llm_metadata_dict) if settings.ai_monitoring.record_content.enabled: @@ -1041,6 +1164,7 @@ def handle_embedding_event(transaction, bedrock_attrs): def handle_chat_completion_event(transaction, bedrock_attrs): + settings = transaction.settings or global_settings() chat_completion_id = str(uuid.uuid4()) # Grab LLM-related custom attributes off of the transaction to store as metadata on LLM events custom_attrs_dict = transaction._custom_params @@ -1055,12 +1179,35 @@ def handle_chat_completion_event(transaction, bedrock_attrs): response_id = bedrock_attrs.get("response_id", None) model = bedrock_attrs.get("model", None) + response_prompt_tokens = bedrock_attrs.get("response.usage.prompt_tokens", None) + response_completion_tokens = bedrock_attrs.get("response.usage.completion_tokens", None) + response_total_tokens = bedrock_attrs.get("response.usage.total_tokens", None) + input_message_list = bedrock_attrs.get("input_message_list", []) output_message_list = bedrock_attrs.get("output_message_list", []) number_of_messages = ( len(input_message_list) + len(output_message_list) ) or None # If 0, attribute will be set to None and removed + input_message_content = " ".join([msg.get("content", "") for msg in input_message_list if msg.get("content")]) + prompt_tokens = ( + settings.ai_monitoring.llm_token_count_callback(model, input_message_content) + if settings.ai_monitoring.llm_token_count_callback and input_message_content + else response_prompt_tokens + ) + + output_message_content = " ".join([msg.get("content", "") for msg in output_message_list if msg.get("content")]) + completion_tokens = ( + settings.ai_monitoring.llm_token_count_callback(model, output_message_content) + if settings.ai_monitoring.llm_token_count_callback and output_message_content + else response_completion_tokens + ) + total_tokens = ( + prompt_tokens + completion_tokens if all([prompt_tokens, completion_tokens]) else response_total_tokens + ) + + all_token_counts = bool(prompt_tokens and completion_tokens and total_tokens) + chat_completion_summary_dict = { "vendor": "bedrock", "ingest_source": "Python", @@ -1078,6 +1225,12 @@ def handle_chat_completion_event(transaction, bedrock_attrs): "response.choices.finish_reason": bedrock_attrs.get("response.choices.finish_reason", None), "error": bedrock_attrs.get("error", None), } + + if all_token_counts: + chat_completion_summary_dict["response.usage.prompt_tokens"] = prompt_tokens + chat_completion_summary_dict["response.usage.completion_tokens"] = completion_tokens + chat_completion_summary_dict["response.usage.total_tokens"] = total_tokens + chat_completion_summary_dict.update(llm_metadata_dict) chat_completion_summary_dict = {k: v for k, v in chat_completion_summary_dict.items() if v is not None} transaction.record_custom_event("LlmChatCompletionSummary", chat_completion_summary_dict) @@ -1092,6 +1245,7 @@ def handle_chat_completion_event(transaction, bedrock_attrs): request_model=model, request_id=request_id, llm_metadata_dict=llm_metadata_dict, + all_token_counts=all_token_counts, response_id=response_id, ) diff --git a/tests/external_aiobotocore/test_bedrock_chat_completion_converse.py b/tests/external_aiobotocore/test_bedrock_chat_completion_converse.py index da9c5818e7..87dfa1f1b6 100644 --- a/tests/external_aiobotocore/test_bedrock_chat_completion_converse.py +++ b/tests/external_aiobotocore/test_bedrock_chat_completion_converse.py @@ -17,7 +17,7 @@ from conftest import BOTOCORE_VERSION from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, events_sans_content, @@ -49,6 +49,9 @@ "duration": None, # Response time varies each test run "request.model": "anthropic.claude-3-sonnet-20240229-v1:0", "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", + "response.usage.prompt_tokens": 26, + "response.usage.completion_tokens": 100, + "response.usage.total_tokens": 126, "request.temperature": 0.7, "request.max_tokens": 100, "response.choices.finish_reason": "max_tokens", @@ -70,6 +73,7 @@ "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", "vendor": "bedrock", "ingest_source": "Python", @@ -88,6 +92,7 @@ "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", "vendor": "bedrock", "ingest_source": "Python", @@ -106,6 +111,7 @@ "role": "assistant", "completion_id": None, "sequence": 2, + "token_count": 0, "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", "vendor": "bedrock", "ingest_source": "Python", @@ -189,7 +195,7 @@ def _test(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) def test_bedrock_chat_completion_with_token_count(set_trace_info, exercise_model): - @validate_custom_events(add_token_count_to_events(chat_completion_expected_events)) + @validate_custom_events(add_token_counts_to_chat_events(chat_completion_expected_events)) # One summary event, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( @@ -476,46 +482,3 @@ def _test(): converse_invalid_model(loop, bedrock_converse_server) _test() - - -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -def test_bedrock_chat_completion_error_incorrect_access_key_with_token_count( - monkeypatch, bedrock_converse_server, loop, set_trace_info -): - """ - A request is made to the server with invalid credentials. botocore will reach out to the server and receive an - UnrecognizedClientException as a response. Information from the request will be parsed and reported in customer - events. The error response can also be parsed, and will be included as attributes on the recorded exception. - """ - - @validate_custom_events(add_token_count_to_events(chat_completion_invalid_access_key_error_events)) - @validate_error_trace_attributes( - _client_error_name, - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "http.statusCode": 403, - "error.message": "The security token included in the request is invalid.", - "error.code": "UnrecognizedClientException", - }, - }, - ) - @validate_transaction_metrics( - name="test_bedrock_chat_completion_incorrect_access_key_with_token_count", - scoped_metrics=[("Llm/completion/Bedrock/converse", 1)], - rollup_metrics=[("Llm/completion/Bedrock/converse", 1)], - custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], - background_task=True, - ) - @background_task(name="test_bedrock_chat_completion_incorrect_access_key_with_token_count") - def _test(): - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - converse_incorrect_access_key(loop, bedrock_converse_server, monkeypatch) - - _test() diff --git a/tests/external_aiobotocore/test_bedrock_chat_completion_invoke_model.py b/tests/external_aiobotocore/test_bedrock_chat_completion_invoke_model.py index 65cb276c77..e3a897d0c8 100644 --- a/tests/external_aiobotocore/test_bedrock_chat_completion_invoke_model.py +++ b/tests/external_aiobotocore/test_bedrock_chat_completion_invoke_model.py @@ -34,7 +34,8 @@ ) from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_count_streaming_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, disabled_ai_monitoring_streaming_settings, @@ -206,7 +207,7 @@ def _test(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) def test_bedrock_chat_completion_with_token_count(set_trace_info, exercise_model, expected_events, expected_metrics): - @validate_custom_events(add_token_count_to_events(expected_events)) + @validate_custom_events(add_token_counts_to_chat_events(add_token_count_streaming_events(expected_events))) # One summary event, one user message, and one response message from the assistant @validate_custom_event_count(count=3) @validate_transaction_metrics( @@ -455,51 +456,6 @@ def _test(): _test() -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -def test_bedrock_chat_completion_error_incorrect_access_key_with_token( - monkeypatch, - bedrock_server, - exercise_model, - set_trace_info, - expected_invalid_access_key_error_events, - expected_metrics, -): - @validate_custom_events(add_token_count_to_events(expected_invalid_access_key_error_events)) - @validate_error_trace_attributes( - _client_error_name, - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "http.statusCode": 403, - "error.message": "The security token included in the request is invalid.", - "error.code": "UnrecognizedClientException", - }, - }, - ) - @validate_transaction_metrics( - name="test_bedrock_chat_completion", - scoped_metrics=expected_metrics, - rollup_metrics=expected_metrics, - custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], - background_task=True, - ) - @background_task(name="test_bedrock_chat_completion") - def _test(): - monkeypatch.setattr(bedrock_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY") - - with pytest.raises(_client_error): # not sure where this exception actually comes from - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - exercise_model(prompt="Invalid Token", temperature=0.7, max_tokens=100) - - _test() - - def invoke_model_malformed_request_body(loop, bedrock_server, response_streaming): async def _coro(): with pytest.raises(_client_error): @@ -798,58 +754,6 @@ async def _test(): loop.run_until_complete(_test()) -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_expected_streaming_error_events)) -@validate_custom_event_count(count=2) -@validate_error_trace_attributes( - _event_stream_error_name, - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "error.message": "Malformed input request, please reformat your input and try again.", - "error.code": "ValidationException", - }, - }, - forgone_params={"agent": (), "intrinsic": (), "user": ("http.statusCode")}, -) -@validate_transaction_metrics( - name="test_bedrock_chat_completion", - scoped_metrics=[("Llm/completion/Bedrock/invoke_model_with_response_stream", 1)], - rollup_metrics=[("Llm/completion/Bedrock/invoke_model_with_response_stream", 1)], - custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], - background_task=True, -) -@background_task(name="test_bedrock_chat_completion") -def test_bedrock_chat_completion_error_streaming_exception_with_token_count(loop, bedrock_server, set_trace_info): - """ - Duplicate of test_bedrock_chat_completion_error_streaming_exception, but with token callback being set. - - See the original test for a description of the error case. - """ - - async def _test(): - with pytest.raises(_event_stream_error): - model = "amazon.titan-text-express-v1" - body = (chat_completion_payload_templates[model] % ("Streaming Exception", 0.7, 100)).encode("utf-8") - - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - response = await bedrock_server.invoke_model_with_response_stream( - body=body, modelId=model, accept="application/json", contentType="application/json" - ) - - body = response.get("body") - async for resp in body: - assert resp - - loop.run_until_complete(_test()) - - def test_bedrock_chat_completion_functions_marked_as_wrapped_for_sdk_compatibility(bedrock_server): assert bedrock_server._nr_wrapped diff --git a/tests/external_aiobotocore/test_bedrock_embeddings.py b/tests/external_aiobotocore/test_bedrock_embeddings.py index 96b930feb5..dacfbb4eed 100644 --- a/tests/external_aiobotocore/test_bedrock_embeddings.py +++ b/tests/external_aiobotocore/test_bedrock_embeddings.py @@ -27,7 +27,7 @@ ) from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_count_to_embedding_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, events_sans_content, @@ -164,7 +164,7 @@ def _test(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) def test_bedrock_embedding_with_token_count(set_trace_info, exercise_model, expected_events): - @validate_custom_events(add_token_count_to_events(expected_events)) + @validate_custom_events(add_token_count_to_embedding_events(expected_events)) @validate_custom_event_count(count=1) @validate_transaction_metrics( name="test_bedrock_embedding", @@ -289,45 +289,6 @@ def _test(): _test() -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -def test_bedrock_embedding_error_incorrect_access_key_with_token_count( - monkeypatch, bedrock_server, exercise_model, set_trace_info, expected_invalid_access_key_error_events -): - @validate_custom_events(add_token_count_to_events(expected_invalid_access_key_error_events)) - @validate_error_trace_attributes( - _client_error_name, - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "http.statusCode": 403, - "error.message": "The security token included in the request is invalid.", - "error.code": "UnrecognizedClientException", - }, - }, - ) - @validate_transaction_metrics( - name="test_bedrock_embedding", - scoped_metrics=[("Llm/embedding/Bedrock/invoke_model", 1)], - rollup_metrics=[("Llm/embedding/Bedrock/invoke_model", 1)], - background_task=True, - ) - @background_task(name="test_bedrock_embedding") - def _test(): - monkeypatch.setattr(bedrock_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY") - - with pytest.raises(_client_error): # not sure where this exception actually comes from - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - exercise_model(prompt="Invalid Token") - - _test() - - @reset_core_stats_engine() @validate_custom_events(embedding_expected_malformed_request_body_events) @validate_custom_event_count(count=1) diff --git a/tests/external_botocore/_test_bedrock_chat_completion.py b/tests/external_botocore/_test_bedrock_chat_completion.py index 155b6c993c..6b65af8cb2 100644 --- a/tests/external_botocore/_test_bedrock_chat_completion.py +++ b/tests/external_botocore/_test_bedrock_chat_completion.py @@ -97,6 +97,9 @@ "duration": None, # Response time varies each test run "request.model": "amazon.titan-text-express-v1", "response.model": "amazon.titan-text-express-v1", + "response.usage.completion_tokens": 32, + "response.usage.total_tokens": 44, + "response.usage.prompt_tokens": 12, "request.temperature": 0.7, "request.max_tokens": 100, "response.choices.finish_reason": "FINISH", @@ -118,6 +121,7 @@ "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "amazon.titan-text-express-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -136,6 +140,7 @@ "role": "assistant", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "amazon.titan-text-express-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -335,6 +340,9 @@ "duration": None, # Response time varies each test run "request.model": "meta.llama2-13b-chat-v1", "response.model": "meta.llama2-13b-chat-v1", + "response.usage.prompt_tokens": 17, + "response.usage.completion_tokens": 69, + "response.usage.total_tokens": 86, "request.temperature": 0.7, "request.max_tokens": 100, "response.choices.finish_reason": "stop", @@ -356,6 +364,7 @@ "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "meta.llama2-13b-chat-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -374,6 +383,7 @@ "role": "assistant", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "meta.llama2-13b-chat-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -919,6 +929,9 @@ "duration": None, # Response time varies each test run "request.model": "amazon.titan-text-express-v1", "response.model": "amazon.titan-text-express-v1", + "response.usage.completion_tokens": 35, + "response.usage.total_tokens": 47, + "response.usage.prompt_tokens": 12, "request.temperature": 0.7, "request.max_tokens": 100, "response.choices.finish_reason": "FINISH", @@ -940,6 +953,7 @@ "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "amazon.titan-text-express-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -958,6 +972,7 @@ "role": "assistant", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "amazon.titan-text-express-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -978,6 +993,9 @@ "duration": None, # Response time varies each test run "request.model": "anthropic.claude-instant-v1", "response.model": "anthropic.claude-instant-v1", + "response.usage.completion_tokens": 99, + "response.usage.prompt_tokens": 19, + "response.usage.total_tokens": 118, "request.temperature": 0.7, "request.max_tokens": 100, "response.choices.finish_reason": "stop_sequence", @@ -999,6 +1017,7 @@ "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "anthropic.claude-instant-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -1017,6 +1036,7 @@ "role": "assistant", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "anthropic.claude-instant-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -1038,6 +1058,9 @@ "duration": None, # Response time varies each test run "request.model": "cohere.command-text-v14", "response.model": "cohere.command-text-v14", + "response.usage.completion_tokens": 91, + "response.usage.total_tokens": 100, + "response.usage.prompt_tokens": 9, "request.temperature": 0.7, "request.max_tokens": 100, "response.choices.finish_reason": "COMPLETE", @@ -1059,6 +1082,7 @@ "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "cohere.command-text-v14", "vendor": "bedrock", "ingest_source": "Python", @@ -1077,6 +1101,7 @@ "role": "assistant", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "cohere.command-text-v14", "vendor": "bedrock", "ingest_source": "Python", @@ -1097,6 +1122,9 @@ "duration": None, # Response time varies each test run "request.model": "meta.llama2-13b-chat-v1", "response.model": "meta.llama2-13b-chat-v1", + "response.usage.prompt_tokens": 17, + "response.usage.completion_tokens": 100, + "response.usage.total_tokens": 117, "request.temperature": 0.7, "request.max_tokens": 100, "response.choices.finish_reason": "length", @@ -1118,6 +1146,7 @@ "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "meta.llama2-13b-chat-v1", "vendor": "bedrock", "ingest_source": "Python", @@ -1136,6 +1165,7 @@ "role": "assistant", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "meta.llama2-13b-chat-v1", "vendor": "bedrock", "ingest_source": "Python", diff --git a/tests/external_botocore/_test_bedrock_embeddings.py b/tests/external_botocore/_test_bedrock_embeddings.py index f5c227b9c3..af544af001 100644 --- a/tests/external_botocore/_test_bedrock_embeddings.py +++ b/tests/external_botocore/_test_bedrock_embeddings.py @@ -33,6 +33,7 @@ "response.model": "amazon.titan-embed-text-v1", "request.model": "amazon.titan-embed-text-v1", "request_id": "11233989-07e8-4ecb-9ba6-79601ba6d8cc", + "response.usage.total_tokens": 6, "vendor": "bedrock", "ingest_source": "Python", }, @@ -52,6 +53,7 @@ "response.model": "amazon.titan-embed-g1-text-02", "request.model": "amazon.titan-embed-g1-text-02", "request_id": "b10ac895-eae3-4f07-b926-10b2866c55ed", + "response.usage.total_tokens": 6, "vendor": "bedrock", "ingest_source": "Python", }, diff --git a/tests/external_botocore/test_bedrock_chat_completion_invoke_model.py b/tests/external_botocore/test_bedrock_chat_completion_invoke_model.py index 94a88e7a56..7a471b950e 100644 --- a/tests/external_botocore/test_bedrock_chat_completion_invoke_model.py +++ b/tests/external_botocore/test_bedrock_chat_completion_invoke_model.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + import json import os from io import BytesIO @@ -35,7 +36,8 @@ from conftest import BOTOCORE_VERSION from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_count_streaming_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, disabled_ai_monitoring_streaming_settings, @@ -129,6 +131,14 @@ def expected_events(model_id, response_streaming): return chat_completion_expected_events[model_id] +@pytest.fixture(scope="module") +def expected_events(model_id, response_streaming): + if response_streaming: + return chat_completion_streaming_expected_events[model_id] + else: + return chat_completion_expected_events[model_id] + + @pytest.fixture(scope="module") def expected_metrics(response_streaming): if response_streaming: @@ -200,7 +210,7 @@ def _test(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) def test_bedrock_chat_completion_with_token_count(set_trace_info, exercise_model, expected_events, expected_metrics): - @validate_custom_events(add_token_count_to_events(expected_events)) + @validate_custom_events(add_token_counts_to_chat_events(add_token_count_streaming_events(expected_events))) # One summary event, one user message, and one response message from the assistant @validate_custom_event_count(count=3) @validate_transaction_metrics( @@ -438,49 +448,50 @@ def _test(): _test() -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -def test_bedrock_chat_completion_error_incorrect_access_key_with_token( - monkeypatch, - bedrock_server, - exercise_model, - set_trace_info, - expected_invalid_access_key_error_events, - expected_metrics, -): - @validate_custom_events(add_token_count_to_events(expected_invalid_access_key_error_events)) - @validate_error_trace_attributes( - _client_error_name, - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "http.statusCode": 403, - "error.message": "The security token included in the request is invalid.", - "error.code": "UnrecognizedClientException", - }, - }, - ) - @validate_transaction_metrics( - name="test_bedrock_chat_completion", - scoped_metrics=expected_metrics, - rollup_metrics=expected_metrics, - custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], - background_task=True, - ) - @background_task(name="test_bedrock_chat_completion") - def _test(): - monkeypatch.setattr(bedrock_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY") - - with pytest.raises(_client_error): # not sure where this exception actually comes from - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - exercise_model(prompt="Invalid Token", temperature=0.7, max_tokens=100) - - _test() +# +# @reset_core_stats_engine() +# @override_llm_token_callback_settings(llm_token_count_callback) +# def test_bedrock_chat_completion_error_incorrect_access_key_with_token( +# monkeypatch, +# bedrock_server, +# exercise_model, +# set_trace_info, +# expected_invalid_access_key_error_events, +# expected_metrics, +# ): +# @validate_custom_events(add_token_count_to_events(expected_invalid_access_key_error_events)) +# @validate_error_trace_attributes( +# _client_error_name, +# exact_attrs={ +# "agent": {}, +# "intrinsic": {}, +# "user": { +# "http.statusCode": 403, +# "error.message": "The security token included in the request is invalid.", +# "error.code": "UnrecognizedClientException", +# }, +# }, +# ) +# @validate_transaction_metrics( +# name="test_bedrock_chat_completion", +# scoped_metrics=expected_metrics, +# rollup_metrics=expected_metrics, +# custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], +# background_task=True, +# ) +# @background_task(name="test_bedrock_chat_completion") +# def _test(): +# monkeypatch.setattr(bedrock_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY") +# +# with pytest.raises(_client_error): # not sure where this exception actually comes from +# set_trace_info() +# add_custom_attribute("llm.conversation_id", "my-awesome-id") +# add_custom_attribute("llm.foo", "bar") +# add_custom_attribute("non_llm_attr", "python-agent") +# +# exercise_model(prompt="Invalid Token", temperature=0.7, max_tokens=100) +# +# _test() @reset_core_stats_engine() @@ -762,55 +773,6 @@ def _test(): _test() -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -def test_bedrock_chat_completion_error_streaming_exception_with_token_count(bedrock_server, set_trace_info): - """ - Duplicate of test_bedrock_chat_completion_error_streaming_exception, but with token callback being set. - - See the original test for a description of the error case. - """ - - @validate_custom_events(add_token_count_to_events(chat_completion_expected_streaming_error_events)) - @validate_custom_event_count(count=2) - @validate_error_trace_attributes( - _event_stream_error_name, - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "error.message": "Malformed input request, please reformat your input and try again.", - "error.code": "ValidationException", - }, - }, - forgone_params={"agent": (), "intrinsic": (), "user": ("http.statusCode")}, - ) - @validate_transaction_metrics( - name="test_bedrock_chat_completion", - scoped_metrics=[("Llm/completion/Bedrock/invoke_model_with_response_stream", 1)], - rollup_metrics=[("Llm/completion/Bedrock/invoke_model_with_response_stream", 1)], - custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], - background_task=True, - ) - @background_task(name="test_bedrock_chat_completion") - def _test(): - with pytest.raises(_event_stream_error): - model = "amazon.titan-text-express-v1" - body = (chat_completion_payload_templates[model] % ("Streaming Exception", 0.7, 100)).encode("utf-8") - - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - response = bedrock_server.invoke_model_with_response_stream( - body=body, modelId=model, accept="application/json", contentType="application/json" - ) - list(response["body"]) # Iterate - - _test() - - def test_bedrock_chat_completion_functions_marked_as_wrapped_for_sdk_compatibility(bedrock_server): assert bedrock_server._nr_wrapped diff --git a/tests/external_botocore/test_bedrock_embeddings.py b/tests/external_botocore/test_bedrock_embeddings.py index 417e24b2d9..de2cb201e7 100644 --- a/tests/external_botocore/test_bedrock_embeddings.py +++ b/tests/external_botocore/test_bedrock_embeddings.py @@ -28,7 +28,7 @@ from conftest import BOTOCORE_VERSION from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_count_to_embedding_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, events_sans_content, @@ -161,7 +161,7 @@ def _test(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) def test_bedrock_embedding_with_token_count(set_trace_info, exercise_model, expected_events): - @validate_custom_events(add_token_count_to_events(expected_events)) + @validate_custom_events(add_token_count_to_embedding_events(expected_events)) @validate_custom_event_count(count=1) @validate_transaction_metrics( name="test_bedrock_embedding", @@ -286,45 +286,6 @@ def _test(): _test() -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -def test_bedrock_embedding_error_incorrect_access_key_with_token_count( - monkeypatch, bedrock_server, exercise_model, set_trace_info, expected_invalid_access_key_error_events -): - @validate_custom_events(add_token_count_to_events(expected_invalid_access_key_error_events)) - @validate_error_trace_attributes( - _client_error_name, - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "http.statusCode": 403, - "error.message": "The security token included in the request is invalid.", - "error.code": "UnrecognizedClientException", - }, - }, - ) - @validate_transaction_metrics( - name="test_bedrock_embedding", - scoped_metrics=[("Llm/embedding/Bedrock/invoke_model", 1)], - rollup_metrics=[("Llm/embedding/Bedrock/invoke_model", 1)], - background_task=True, - ) - @background_task(name="test_bedrock_embedding") - def _test(): - monkeypatch.setattr(bedrock_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY") - - with pytest.raises(_client_error): # not sure where this exception actually comes from - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - exercise_model(prompt="Invalid Token") - - _test() - - @reset_core_stats_engine() def test_bedrock_embedding_error_malformed_request_body(bedrock_server, set_trace_info): """ diff --git a/tests/external_botocore/test_chat_completion_converse.py b/tests/external_botocore/test_chat_completion_converse.py index 96ead41dd7..2d38d6b4a4 100644 --- a/tests/external_botocore/test_chat_completion_converse.py +++ b/tests/external_botocore/test_chat_completion_converse.py @@ -17,7 +17,7 @@ from conftest import BOTOCORE_VERSION from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, events_sans_content, @@ -49,6 +49,9 @@ "duration": None, # Response time varies each test run "request.model": "anthropic.claude-3-sonnet-20240229-v1:0", "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", + "response.usage.prompt_tokens": 26, + "response.usage.completion_tokens": 100, + "response.usage.total_tokens": 126, "request.temperature": 0.7, "request.max_tokens": 100, "response.choices.finish_reason": "max_tokens", @@ -70,6 +73,7 @@ "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", "vendor": "bedrock", "ingest_source": "Python", @@ -88,6 +92,7 @@ "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", "vendor": "bedrock", "ingest_source": "Python", @@ -106,6 +111,7 @@ "role": "assistant", "completion_id": None, "sequence": 2, + "token_count": 0, "response.model": "anthropic.claude-3-sonnet-20240229-v1:0", "vendor": "bedrock", "ingest_source": "Python", @@ -185,7 +191,7 @@ def _test(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) def test_bedrock_chat_completion_with_token_count(set_trace_info, exercise_model): - @validate_custom_events(add_token_count_to_events(chat_completion_expected_events)) + @validate_custom_events(add_token_counts_to_chat_events(chat_completion_expected_events)) # One summary event, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( @@ -468,57 +474,3 @@ def _test(): assert response _test() - - -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -def test_bedrock_chat_completion_error_incorrect_access_key_with_token_count( - monkeypatch, bedrock_converse_server, exercise_model, set_trace_info -): - """ - A request is made to the server with invalid credentials. botocore will reach out to the server and receive an - UnrecognizedClientException as a response. Information from the request will be parsed and reported in customer - events. The error response can also be parsed, and will be included as attributes on the recorded exception. - """ - - @validate_custom_events(add_token_count_to_events(chat_completion_invalid_access_key_error_events)) - @validate_error_trace_attributes( - _client_error_name, - exact_attrs={ - "agent": {}, - "intrinsic": {}, - "user": { - "http.statusCode": 403, - "error.message": "The security token included in the request is invalid.", - "error.code": "UnrecognizedClientException", - }, - }, - ) - @validate_transaction_metrics( - name="test_bedrock_chat_completion_incorrect_access_key_with_token_count", - scoped_metrics=[("Llm/completion/Bedrock/converse", 1)], - rollup_metrics=[("Llm/completion/Bedrock/converse", 1)], - custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)], - background_task=True, - ) - @background_task(name="test_bedrock_chat_completion_incorrect_access_key_with_token_count") - def _test(): - monkeypatch.setattr(bedrock_converse_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY") - - with pytest.raises(_client_error): - set_trace_info() - add_custom_attribute("llm.conversation_id", "my-awesome-id") - add_custom_attribute("llm.foo", "bar") - add_custom_attribute("non_llm_attr", "python-agent") - - message = [{"role": "user", "content": [{"text": "Invalid Token"}]}] - - response = bedrock_converse_server.converse( - modelId="anthropic.claude-3-sonnet-20240229-v1:0", - messages=message, - inferenceConfig={"temperature": 0.7, "maxTokens": 100}, - ) - - assert response - - _test() diff --git a/tests/mlmodel_openai/test_embeddings_error.py b/tests/mlmodel_openai/test_embeddings_error.py index a8e46bf23a..f80e6ff41d 100644 --- a/tests/mlmodel_openai/test_embeddings_error.py +++ b/tests/mlmodel_openai/test_embeddings_error.py @@ -14,12 +14,10 @@ import openai import pytest -from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine +from testing_support.fixtures import dt_enabled, reset_core_stats_engine from testing_support.ml_testing_utils import ( - add_token_count_to_events, disabled_ai_monitoring_record_content_settings, events_sans_content, - llm_token_count_callback, set_trace_info, ) from testing_support.validators.validate_custom_event import validate_custom_event_count @@ -128,35 +126,6 @@ def test_embeddings_invalid_request_error_no_model_no_content(set_trace_info): ] -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.InvalidRequestError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404}}, -) -@validate_span_events( - exact_agents={ - "error.message": "The model `does-not-exist` does not exist" - # "http.statusCode": 404, - } -) -@validate_transaction_metrics( - name="test_embeddings_error:test_embeddings_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/embedding/OpenAI/create", 1)], - rollup_metrics=[("Llm/embedding/OpenAI/create", 1)], - custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(invalid_model_events)) -@validate_custom_event_count(count=1) -@background_task() -def test_embeddings_invalid_request_error_invalid_model_with_token_count(set_trace_info): - set_trace_info() - with pytest.raises(openai.InvalidRequestError): - openai.Embedding.create(input="Model does not exist.", model="does-not-exist") - - # Invalid model provided @dt_enabled @reset_core_stats_engine() @@ -348,30 +317,6 @@ def test_embeddings_invalid_request_error_no_model_async_no_content(loop, set_tr ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.InvalidRequestError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404}}, -) -@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"}) -@validate_transaction_metrics( - name="test_embeddings_error:test_embeddings_invalid_request_error_invalid_model_with_token_count_async", - scoped_metrics=[("Llm/embedding/OpenAI/acreate", 1)], - rollup_metrics=[("Llm/embedding/OpenAI/acreate", 1)], - custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(invalid_model_events)) -@validate_custom_event_count(count=1) -@background_task() -def test_embeddings_invalid_request_error_invalid_model_with_token_count_async(set_trace_info, loop): - set_trace_info() - with pytest.raises(openai.InvalidRequestError): - loop.run_until_complete(openai.Embedding.acreate(input="Model does not exist.", model="does-not-exist")) - - # Invalid model provided @dt_enabled @reset_core_stats_engine()