diff --git a/newrelic/hooks/mlmodel_openai.py b/newrelic/hooks/mlmodel_openai.py index c3f7960b6e..3484762951 100644 --- a/newrelic/hooks/mlmodel_openai.py +++ b/newrelic/hooks/mlmodel_openai.py @@ -129,11 +129,11 @@ def create_chat_completion_message_event( span_id, trace_id, response_model, - request_model, response_id, request_id, llm_metadata, output_message_list, + all_token_counts, ): settings = transaction.settings if transaction.settings is not None else global_settings() @@ -153,11 +153,6 @@ def create_chat_completion_message_event( "request_id": request_id, "span_id": span_id, "trace_id": trace_id, - "token_count": ( - settings.ai_monitoring.llm_token_count_callback(request_model, message_content) - if settings.ai_monitoring.llm_token_count_callback - else None - ), "role": message.get("role"), "completion_id": chat_completion_id, "sequence": index, @@ -166,6 +161,9 @@ def create_chat_completion_message_event( "ingest_source": "Python", } + if all_token_counts: + chat_completion_input_message_dict["token_count"] = 0 + if settings.ai_monitoring.record_content.enabled: chat_completion_input_message_dict["content"] = message_content @@ -193,11 +191,6 @@ def create_chat_completion_message_event( "request_id": request_id, "span_id": span_id, "trace_id": trace_id, - "token_count": ( - settings.ai_monitoring.llm_token_count_callback(response_model, message_content) - if settings.ai_monitoring.llm_token_count_callback - else None - ), "role": message.get("role"), "completion_id": chat_completion_id, "sequence": index, @@ -207,6 +200,9 @@ def create_chat_completion_message_event( "is_response": True, } + if all_token_counts: + chat_completion_output_message_dict["token_count"] = 0 + if settings.ai_monitoring.record_content.enabled: chat_completion_output_message_dict["content"] = message_content @@ -280,15 +276,18 @@ def _record_embedding_success(transaction, embedding_id, linking_metadata, kwarg else getattr(attribute_response, "organization", None) ) + response_total_tokens = attribute_response.get("usage", {}).get("total_tokens") if response else None + + total_tokens = ( + settings.ai_monitoring.llm_token_count_callback(response_model, input_) + if settings.ai_monitoring.llm_token_count_callback and input_ + else response_total_tokens + ) + full_embedding_response_dict = { "id": embedding_id, "span_id": span_id, "trace_id": trace_id, - "token_count": ( - settings.ai_monitoring.llm_token_count_callback(response_model, input_) - if settings.ai_monitoring.llm_token_count_callback - else None - ), "request.model": kwargs.get("model") or kwargs.get("engine"), "request_id": request_id, "duration": ft.duration * 1000, @@ -313,6 +312,7 @@ def _record_embedding_success(transaction, embedding_id, linking_metadata, kwarg "response.headers.ratelimitRemainingRequests": check_rate_limit_header( response_headers, "x-ratelimit-remaining-requests", True ), + "response.usage.total_tokens": total_tokens, "vendor": "openai", "ingest_source": "Python", } @@ -475,12 +475,15 @@ def _handle_completion_success(transaction, linking_metadata, completion_id, kwa def _record_completion_success(transaction, linking_metadata, completion_id, kwargs, ft, response_headers, response): + settings = transaction.settings if transaction.settings is not None else global_settings() span_id = linking_metadata.get("span.id") trace_id = linking_metadata.get("trace.id") + try: if response: response_model = response.get("model") response_id = response.get("id") + token_usage = response.get("usage") or {} output_message_list = [] finish_reason = None choices = response.get("choices") or [] @@ -494,6 +497,7 @@ def _record_completion_success(transaction, linking_metadata, completion_id, kwa else: response_model = kwargs.get("response.model") response_id = kwargs.get("id") + token_usage = {} output_message_list = [] finish_reason = kwargs.get("finish_reason") if "content" in kwargs: @@ -505,10 +509,44 @@ def _record_completion_success(transaction, linking_metadata, completion_id, kwa output_message_list = [] request_model = kwargs.get("model") or kwargs.get("engine") - request_id = response_headers.get("x-request-id") - organization = response_headers.get("openai-organization") or getattr(response, "organization", None) messages = kwargs.get("messages") or [{"content": kwargs.get("prompt"), "role": "user"}] input_message_list = list(messages) + + # Extract token counts from response object + if token_usage: + response_prompt_tokens = token_usage.get("prompt_tokens") + response_completion_tokens = token_usage.get("completion_tokens") + response_total_tokens = token_usage.get("total_tokens") + + else: + response_prompt_tokens = None + response_completion_tokens = None + response_total_tokens = None + + # Calculate token counts by checking if a callback is registered and if we have the necessary content to pass + # to it. If not, then we use the token counts provided in the response object + input_message_content = " ".join([msg.get("content", "") for msg in input_message_list if msg.get("content")]) + prompt_tokens = ( + settings.ai_monitoring.llm_token_count_callback(request_model, input_message_content) + if settings.ai_monitoring.llm_token_count_callback and input_message_content + else response_prompt_tokens + ) + output_message_content = " ".join([msg.get("content", "") for msg in output_message_list if msg.get("content")]) + completion_tokens = ( + settings.ai_monitoring.llm_token_count_callback(response_model, output_message_content) + if settings.ai_monitoring.llm_token_count_callback and output_message_content + else response_completion_tokens + ) + + total_tokens = ( + prompt_tokens + completion_tokens if all([prompt_tokens, completion_tokens]) else response_total_tokens + ) + + all_token_counts = bool(prompt_tokens and completion_tokens and total_tokens) + + request_id = response_headers.get("x-request-id") + organization = response_headers.get("openai-organization") or getattr(response, "organization", None) + full_chat_completion_summary_dict = { "id": completion_id, "span_id": span_id, @@ -553,6 +591,12 @@ def _record_completion_success(transaction, linking_metadata, completion_id, kwa ), "response.number_of_messages": len(input_message_list) + len(output_message_list), } + + if all_token_counts: + full_chat_completion_summary_dict["response.usage.prompt_tokens"] = prompt_tokens + full_chat_completion_summary_dict["response.usage.completion_tokens"] = completion_tokens + full_chat_completion_summary_dict["response.usage.total_tokens"] = total_tokens + llm_metadata = _get_llm_attributes(transaction) full_chat_completion_summary_dict.update(llm_metadata) transaction.record_custom_event("LlmChatCompletionSummary", full_chat_completion_summary_dict) @@ -564,11 +608,11 @@ def _record_completion_success(transaction, linking_metadata, completion_id, kwa span_id, trace_id, response_model, - request_model, response_id, request_id, llm_metadata, output_message_list, + all_token_counts, ) except Exception: _logger.warning(RECORD_EVENTS_FAILURE_LOG_MESSAGE, traceback.format_exception(*sys.exc_info())) @@ -579,6 +623,7 @@ def _record_completion_error(transaction, linking_metadata, completion_id, kwarg trace_id = linking_metadata.get("trace.id") request_message_list = kwargs.get("messages", None) or [] notice_error_attributes = {} + try: if OPENAI_V1: response = getattr(exc, "response", None) @@ -643,6 +688,7 @@ def _record_completion_error(transaction, linking_metadata, completion_id, kwarg output_message_list = [] if "content" in kwargs: output_message_list = [{"content": kwargs.get("content"), "role": kwargs.get("role")}] + create_chat_completion_message_event( transaction, request_message_list, @@ -650,11 +696,12 @@ def _record_completion_error(transaction, linking_metadata, completion_id, kwarg span_id, trace_id, kwargs.get("response.model"), - request_model, response_id, request_id, llm_metadata, output_message_list, + # We do not record token counts in error cases, so set all_token_counts to True so the pipeline tokenizer does not run + all_token_counts=True, ) except Exception: _logger.warning(RECORD_EVENTS_FAILURE_LOG_MESSAGE, traceback.format_exception(*sys.exc_info())) diff --git a/tests/mlmodel_langchain/test_chain.py b/tests/mlmodel_langchain/test_chain.py index 1a3cbbfd76..abf52efe09 100644 --- a/tests/mlmodel_langchain/test_chain.py +++ b/tests/mlmodel_langchain/test_chain.py @@ -359,6 +359,7 @@ "response.headers.ratelimitResetRequests": "20ms", "response.headers.ratelimitRemainingTokens": 999992, "response.headers.ratelimitRemainingRequests": 2999, + "response.usage.total_tokens": 8, "vendor": "openai", "ingest_source": "Python", "input": "[[3923, 374, 220, 17, 489, 220, 19, 30]]", @@ -382,6 +383,7 @@ "response.headers.ratelimitResetRequests": "20ms", "response.headers.ratelimitRemainingTokens": 999998, "response.headers.ratelimitRemainingRequests": 2999, + "response.usage.total_tokens": 1, "vendor": "openai", "ingest_source": "Python", "input": "[[10590]]", @@ -452,6 +454,9 @@ "response.headers.ratelimitResetRequests": "8.64s", "response.headers.ratelimitRemainingTokens": 199912, "response.headers.ratelimitRemainingRequests": 9999, + "response.usage.prompt_tokens": 73, + "response.usage.completion_tokens": 375, + "response.usage.total_tokens": 448, "response.number_of_messages": 3, }, ], @@ -467,6 +472,7 @@ "sequence": 0, "response.model": "gpt-3.5-turbo-0125", "vendor": "openai", + "token_count": 0, "ingest_source": "Python", "content": "You are a generator of quiz questions for a seminar. Use the following pieces of retrieved context to generate 5 multiple choice questions (A,B,C,D) on the subject matter. Use a three sentence maximum and keep the answer concise. Render the output as HTML\n\nWhat is 2 + 4?", }, @@ -483,6 +489,7 @@ "sequence": 1, "response.model": "gpt-3.5-turbo-0125", "vendor": "openai", + "token_count": 0, "ingest_source": "Python", "content": "math", }, @@ -499,6 +506,7 @@ "sequence": 2, "response.model": "gpt-3.5-turbo-0125", "vendor": "openai", + "token_count": 0, "ingest_source": "Python", "is_response": True, "content": "```html\n\n\n\n Math Quiz\n\n\n

Math Quiz Questions

\n
    \n
  1. What is the result of 5 + 3?
  2. \n \n
  3. What is the product of 6 x 7?
  4. \n \n
  5. What is the square root of 64?
  6. \n \n
  7. What is the result of 12 / 4?
  8. \n \n
  9. What is the sum of 15 + 9?
  10. \n \n
\n\n\n```", diff --git a/tests/mlmodel_openai/test_chat_completion.py b/tests/mlmodel_openai/test_chat_completion.py index 1f8cf1cb74..5e4d209ed7 100644 --- a/tests/mlmodel_openai/test_chat_completion.py +++ b/tests/mlmodel_openai/test_chat_completion.py @@ -15,7 +15,7 @@ import openai from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, disabled_ai_monitoring_streaming_settings, @@ -55,6 +55,9 @@ "response.organization": "new-relic-nkmd8b", "request.temperature": 0.7, "request.max_tokens": 100, + "response.usage.completion_tokens": 11, + "response.usage.total_tokens": 64, + "response.usage.prompt_tokens": 53, "response.choices.finish_reason": "stop", "response.headers.llmVersion": "2020-10-01", "response.headers.ratelimitLimitRequests": 200, @@ -81,6 +84,7 @@ "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "gpt-3.5-turbo-0613", "vendor": "openai", "ingest_source": "Python", @@ -99,6 +103,7 @@ "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "gpt-3.5-turbo-0613", "vendor": "openai", "ingest_source": "Python", @@ -117,6 +122,7 @@ "role": "assistant", "completion_id": None, "sequence": 2, + "token_count": 0, "response.model": "gpt-3.5-turbo-0613", "vendor": "openai", "is_response": True, @@ -172,7 +178,7 @@ def test_openai_chat_completion_sync_no_content(set_trace_info): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events)) +@validate_custom_events(add_token_counts_to_chat_events(chat_completion_recorded_events)) # One summary event, one system message, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( @@ -343,7 +349,7 @@ def test_openai_chat_completion_async_no_content(loop, set_trace_info): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events)) +@validate_custom_events(add_token_counts_to_chat_events(chat_completion_recorded_events)) # One summary event, one system message, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( diff --git a/tests/mlmodel_openai/test_chat_completion_error.py b/tests/mlmodel_openai/test_chat_completion_error.py index bfb2267a33..97a4dd8793 100644 --- a/tests/mlmodel_openai/test_chat_completion_error.py +++ b/tests/mlmodel_openai/test_chat_completion_error.py @@ -15,13 +15,11 @@ import openai import pytest -from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine +from testing_support.fixtures import dt_enabled, reset_core_stats_engine from testing_support.ml_testing_utils import ( - add_token_count_to_events, disabled_ai_monitoring_record_content_settings, events_sans_content, events_with_context_attrs, - llm_token_count_callback, set_trace_info, ) from testing_support.validators.validate_custom_event import validate_custom_event_count @@ -68,6 +66,7 @@ "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -83,6 +82,7 @@ "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -186,6 +186,7 @@ def test_chat_completion_invalid_request_error_no_model_no_content(set_trace_inf "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -193,36 +194,6 @@ def test_chat_completion_invalid_request_error_no_model_no_content(set_trace_inf ] -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.InvalidRequestError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"}) -@validate_transaction_metrics( - "test_chat_completion_error:test_chat_completion_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/completion/OpenAI/create", 1)], - rollup_metrics=[("Llm/completion/OpenAI/create", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count(set_trace_info): - set_trace_info() - with pytest.raises(openai.InvalidRequestError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - - openai.ChatCompletion.create( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - ) - - # Invalid model provided @dt_enabled @reset_core_stats_engine() @@ -281,6 +252,7 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info): "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -296,6 +268,7 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info): "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -360,6 +333,7 @@ def test_chat_completion_authentication_error(monkeypatch, set_trace_info): "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -471,37 +445,6 @@ def test_chat_completion_invalid_request_error_no_model_async_no_content(loop, s ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.InvalidRequestError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"}) -@validate_transaction_metrics( - "test_chat_completion_error:test_chat_completion_invalid_request_error_invalid_model_with_token_count_async", - scoped_metrics=[("Llm/completion/OpenAI/acreate", 1)], - rollup_metrics=[("Llm/completion/OpenAI/acreate", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count_async(loop, set_trace_info): - set_trace_info() - with pytest.raises(openai.InvalidRequestError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - loop.run_until_complete( - openai.ChatCompletion.acreate( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - ) - ) - - # Invalid model provided @dt_enabled @reset_core_stats_engine() diff --git a/tests/mlmodel_openai/test_chat_completion_error_v1.py b/tests/mlmodel_openai/test_chat_completion_error_v1.py index 9be9fcab9c..5af1598847 100644 --- a/tests/mlmodel_openai/test_chat_completion_error_v1.py +++ b/tests/mlmodel_openai/test_chat_completion_error_v1.py @@ -14,13 +14,11 @@ import openai import pytest -from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine +from testing_support.fixtures import dt_enabled, reset_core_stats_engine from testing_support.ml_testing_utils import ( - add_token_count_to_events, disabled_ai_monitoring_record_content_settings, events_sans_content, events_with_context_attrs, - llm_token_count_callback, set_trace_info, ) from testing_support.validators.validate_custom_event import validate_custom_event_count @@ -67,6 +65,7 @@ "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -82,6 +81,7 @@ "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -229,6 +229,7 @@ def test_chat_completion_invalid_request_error_no_model_async_no_content(loop, s "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -266,37 +267,6 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info, syn ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events( - exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."} -) -@validate_transaction_metrics( - "test_chat_completion_error_v1:test_chat_completion_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/completion/OpenAI/create", 1)], - rollup_metrics=[("Llm/completion/OpenAI/create", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count(set_trace_info, sync_openai_client): - set_trace_info() - with pytest.raises(openai.NotFoundError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - sync_openai_client.chat.completions.create( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - ) - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( @@ -329,41 +299,6 @@ def test_chat_completion_invalid_request_error_invalid_model_async(loop, set_tra ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events( - exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."} -) -@validate_transaction_metrics( - "test_chat_completion_error_v1:test_chat_completion_invalid_request_error_invalid_model_with_token_count_async", - scoped_metrics=[("Llm/completion/OpenAI/create", 1)], - rollup_metrics=[("Llm/completion/OpenAI/create", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count_async( - loop, set_trace_info, async_openai_client -): - set_trace_info() - with pytest.raises(openai.NotFoundError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - loop.run_until_complete( - async_openai_client.chat.completions.create( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - ) - ) - - expected_events_on_wrong_api_key_error = [ ( {"type": "LlmChatCompletionSummary"}, @@ -391,6 +326,7 @@ def test_chat_completion_invalid_request_error_invalid_model_with_token_count_as "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -610,39 +546,6 @@ def test_chat_completion_invalid_request_error_invalid_model_with_raw_response(s ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events( - exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."} -) -@validate_transaction_metrics( - "test_chat_completion_error_v1:test_chat_completion_invalid_request_error_invalid_model_with_token_count_with_raw_response", - scoped_metrics=[("Llm/completion/OpenAI/create", 1)], - rollup_metrics=[("Llm/completion/OpenAI/create", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count_with_raw_response( - set_trace_info, sync_openai_client -): - set_trace_info() - with pytest.raises(openai.NotFoundError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - sync_openai_client.chat.completions.with_raw_response.create( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - ) - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( @@ -677,41 +580,6 @@ def test_chat_completion_invalid_request_error_invalid_model_async_with_raw_resp ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events( - exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."} -) -@validate_transaction_metrics( - "test_chat_completion_error_v1:test_chat_completion_invalid_request_error_invalid_model_with_token_count_async_with_raw_response", - scoped_metrics=[("Llm/completion/OpenAI/create", 1)], - rollup_metrics=[("Llm/completion/OpenAI/create", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count_async_with_raw_response( - loop, set_trace_info, async_openai_client -): - set_trace_info() - with pytest.raises(openai.NotFoundError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - loop.run_until_complete( - async_openai_client.chat.completions.with_raw_response.create( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - ) - ) - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( diff --git a/tests/mlmodel_openai/test_chat_completion_stream.py b/tests/mlmodel_openai/test_chat_completion_stream.py index ad89d6f260..8019c0b6a9 100644 --- a/tests/mlmodel_openai/test_chat_completion_stream.py +++ b/tests/mlmodel_openai/test_chat_completion_stream.py @@ -15,7 +15,8 @@ import openai from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_count_streaming_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, disabled_ai_monitoring_streaming_settings, @@ -184,9 +185,101 @@ def test_openai_chat_completion_sync_no_content(set_trace_info): assert resp +chat_completion_recorded_token_events = [ + ( + {"type": "LlmChatCompletionSummary"}, + { + "id": None, # UUID that varies with each run + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "span_id": None, + "trace_id": "trace-id", + "request_id": "49dbbffbd3c3f4612aa48def69059ccd", + "duration": None, # Response time varies each test run + "request.model": "gpt-3.5-turbo", + "response.model": "gpt-3.5-turbo-0613", + "response.organization": "new-relic-nkmd8b", + "request.temperature": 0.7, + "request.max_tokens": 100, + "response.choices.finish_reason": "stop", + "response.headers.llmVersion": "2020-10-01", + "response.headers.ratelimitLimitRequests": 200, + "response.headers.ratelimitLimitTokens": 40000, + "response.headers.ratelimitResetTokens": "90ms", + "response.headers.ratelimitResetRequests": "7m12s", + "response.headers.ratelimitRemainingTokens": 39940, + "response.headers.ratelimitRemainingRequests": 199, + "vendor": "openai", + "ingest_source": "Python", + "response.number_of_messages": 3, + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-0", + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "request_id": "49dbbffbd3c3f4612aa48def69059ccd", + "span_id": None, + "trace_id": "trace-id", + "content": "You are a scientist.", + "role": "system", + "completion_id": None, + "sequence": 0, + "token_count": 0, + "response.model": "gpt-3.5-turbo-0613", + "vendor": "openai", + "ingest_source": "Python", + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-1", + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "request_id": "49dbbffbd3c3f4612aa48def69059ccd", + "span_id": None, + "trace_id": "trace-id", + "content": "What is 212 degrees Fahrenheit converted to Celsius?", + "role": "user", + "completion_id": None, + "sequence": 1, + "token_count": 0, + "response.model": "gpt-3.5-turbo-0613", + "vendor": "openai", + "ingest_source": "Python", + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-2", + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "request_id": "49dbbffbd3c3f4612aa48def69059ccd", + "span_id": None, + "trace_id": "trace-id", + "content": "212 degrees Fahrenheit is equal to 100 degrees Celsius.", + "role": "assistant", + "completion_id": None, + "sequence": 2, + "token_count": 0, + "response.model": "gpt-3.5-turbo-0613", + "vendor": "openai", + "is_response": True, + "ingest_source": "Python", + }, + ), +] + + @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events)) +@validate_custom_events( + add_token_counts_to_chat_events(add_token_count_streaming_events(chat_completion_recorded_events)) +) # One summary event, one system message, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( @@ -378,7 +471,9 @@ async def consumer(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events)) +@validate_custom_events( + add_token_counts_to_chat_events(add_token_count_streaming_events(chat_completion_recorded_events)) +) @validate_custom_event_count(count=4) @validate_transaction_metrics( name="test_chat_completion_stream:test_openai_chat_completion_async_with_token_count", diff --git a/tests/mlmodel_openai/test_chat_completion_stream_error.py b/tests/mlmodel_openai/test_chat_completion_stream_error.py index eebb5ee8fb..e8e55426e9 100644 --- a/tests/mlmodel_openai/test_chat_completion_stream_error.py +++ b/tests/mlmodel_openai/test_chat_completion_stream_error.py @@ -15,13 +15,11 @@ import openai import pytest -from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine +from testing_support.fixtures import dt_enabled, reset_core_stats_engine from testing_support.ml_testing_utils import ( - add_token_count_to_events, disabled_ai_monitoring_record_content_settings, events_sans_content, events_with_context_attrs, - llm_token_count_callback, set_trace_info, ) from testing_support.validators.validate_custom_event import validate_custom_event_count @@ -68,6 +66,7 @@ "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -83,6 +82,7 @@ "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -191,6 +191,7 @@ def test_chat_completion_invalid_request_error_no_model_no_content(set_trace_inf "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -198,38 +199,6 @@ def test_chat_completion_invalid_request_error_no_model_no_content(set_trace_inf ] -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.InvalidRequestError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"}) -@validate_transaction_metrics( - "test_chat_completion_stream_error:test_chat_completion_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/completion/OpenAI/create", 1)], - rollup_metrics=[("Llm/completion/OpenAI/create", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count(set_trace_info): - set_trace_info() - with pytest.raises(openai.InvalidRequestError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - generator = openai.ChatCompletion.create( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - stream=True, - ) - for resp in generator: - assert resp - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( @@ -290,6 +259,7 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info): "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -305,6 +275,7 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info): "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -374,6 +345,7 @@ def test_chat_completion_authentication_error(monkeypatch, set_trace_info): "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -488,38 +460,6 @@ def test_chat_completion_invalid_request_error_no_model_async_no_content(loop, s ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.InvalidRequestError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"}) -@validate_transaction_metrics( - "test_chat_completion_stream_error:test_chat_completion_invalid_request_error_invalid_model_with_token_count_async", - scoped_metrics=[("Llm/completion/OpenAI/acreate", 1)], - rollup_metrics=[("Llm/completion/OpenAI/acreate", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count_async(loop, set_trace_info): - set_trace_info() - with pytest.raises(openai.InvalidRequestError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - loop.run_until_complete( - openai.ChatCompletion.acreate( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - stream=True, - ) - ) - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( @@ -649,6 +589,7 @@ def test_chat_completion_wrong_api_key_error_async(loop, monkeypatch, set_trace_ "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, diff --git a/tests/mlmodel_openai/test_chat_completion_stream_error_v1.py b/tests/mlmodel_openai/test_chat_completion_stream_error_v1.py index 5f769ea0e6..64798300fc 100644 --- a/tests/mlmodel_openai/test_chat_completion_stream_error_v1.py +++ b/tests/mlmodel_openai/test_chat_completion_stream_error_v1.py @@ -12,16 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. - import openai import pytest -from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine +from testing_support.fixtures import dt_enabled, reset_core_stats_engine from testing_support.ml_testing_utils import ( - add_token_count_to_events, disabled_ai_monitoring_record_content_settings, events_sans_content, events_with_context_attrs, - llm_token_count_callback, set_trace_info, ) from testing_support.validators.validate_custom_event import validate_custom_event_count @@ -68,6 +65,7 @@ "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -83,6 +81,7 @@ "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -243,6 +242,7 @@ async def consumer(): "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -281,77 +281,6 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info, syn assert resp -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"}) -@validate_transaction_metrics( - "test_chat_completion_stream_error_v1:test_chat_completion_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/completion/OpenAI/create", 1)], - rollup_metrics=[("Llm/completion/OpenAI/create", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count(set_trace_info, sync_openai_client): - set_trace_info() - with pytest.raises(openai.NotFoundError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - - generator = sync_openai_client.chat.completions.create( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - stream=True, - ) - for resp in generator: - assert resp - - -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"}) -@validate_transaction_metrics( - "test_chat_completion_stream_error_v1:test_chat_completion_invalid_request_error_invalid_model_async_with_token_count", - scoped_metrics=[("Llm/completion/OpenAI/create", 1)], - rollup_metrics=[("Llm/completion/OpenAI/create", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_async_with_token_count( - loop, set_trace_info, async_openai_client -): - set_trace_info() - with pytest.raises(openai.NotFoundError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - - async def consumer(): - generator = await async_openai_client.chat.completions.create( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - stream=True, - ) - async for resp in generator: - assert resp - - loop.run_until_complete(consumer()) - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( @@ -414,6 +343,7 @@ async def consumer(): "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, diff --git a/tests/mlmodel_openai/test_chat_completion_stream_v1.py b/tests/mlmodel_openai/test_chat_completion_stream_v1.py index 796404012b..c88e8b1df6 100644 --- a/tests/mlmodel_openai/test_chat_completion_stream_v1.py +++ b/tests/mlmodel_openai/test_chat_completion_stream_v1.py @@ -17,7 +17,8 @@ from conftest import get_openai_version from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_count_streaming_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, disabled_ai_monitoring_streaming_settings, @@ -300,7 +301,9 @@ def test_openai_chat_completion_sync_no_content(set_trace_info, sync_openai_clie @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events)) +@validate_custom_events( + add_token_counts_to_chat_events(add_token_count_streaming_events(chat_completion_recorded_events)) +) # One summary event, one system message, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( @@ -622,7 +625,9 @@ async def consumer(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events)) +@validate_custom_events( + add_token_counts_to_chat_events(add_token_count_streaming_events(chat_completion_recorded_events)) +) # One summary event, one system message, one user message, and one response message from the assistant # @validate_custom_event_count(count=4) @validate_transaction_metrics( diff --git a/tests/mlmodel_openai/test_chat_completion_v1.py b/tests/mlmodel_openai/test_chat_completion_v1.py index 817db35d8e..007effcb17 100644 --- a/tests/mlmodel_openai/test_chat_completion_v1.py +++ b/tests/mlmodel_openai/test_chat_completion_v1.py @@ -15,7 +15,7 @@ import openai from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, disabled_ai_monitoring_streaming_settings, @@ -54,6 +54,9 @@ "response.organization": "new-relic-nkmd8b", "request.temperature": 0.7, "request.max_tokens": 100, + "response.usage.completion_tokens": 75, + "response.usage.total_tokens": 101, + "response.usage.prompt_tokens": 26, "response.choices.finish_reason": "stop", "response.headers.llmVersion": "2020-10-01", "response.headers.ratelimitLimitRequests": 10000, @@ -80,6 +83,7 @@ "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "gpt-3.5-turbo-0125", "vendor": "openai", "ingest_source": "Python", @@ -98,6 +102,7 @@ "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "gpt-3.5-turbo-0125", "vendor": "openai", "ingest_source": "Python", @@ -116,6 +121,7 @@ "role": "assistant", "completion_id": None, "sequence": 2, + "token_count": 0, "response.model": "gpt-3.5-turbo-0125", "vendor": "openai", "is_response": True, @@ -193,7 +199,7 @@ def test_openai_chat_completion_sync_no_content(set_trace_info, sync_openai_clie @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events)) +@validate_custom_events(add_token_counts_to_chat_events(chat_completion_recorded_events)) # One summary event, one system message, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( @@ -389,7 +395,7 @@ def test_openai_chat_completion_async_with_llm_metadata_no_content(loop, set_tra @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events)) +@validate_custom_events(add_token_counts_to_chat_events(chat_completion_recorded_events)) # One summary event, one system message, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( diff --git a/tests/mlmodel_openai/test_embeddings.py b/tests/mlmodel_openai/test_embeddings.py index c3c3e7c429..935db04fe0 100644 --- a/tests/mlmodel_openai/test_embeddings.py +++ b/tests/mlmodel_openai/test_embeddings.py @@ -19,7 +19,7 @@ validate_attributes, ) from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_count_to_embedding_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, events_sans_content, @@ -55,6 +55,7 @@ "response.headers.ratelimitResetRequests": "19m45.394s", "response.headers.ratelimitRemainingTokens": 149994, "response.headers.ratelimitRemainingRequests": 197, + "response.usage.total_tokens": 6, "vendor": "openai", "ingest_source": "Python", }, @@ -107,7 +108,7 @@ def test_openai_embedding_sync_no_content(set_trace_info): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(embedding_recorded_events)) +@validate_custom_events(add_token_count_to_embedding_events(embedding_recorded_events)) @validate_custom_event_count(count=1) @validate_transaction_metrics( name="test_embeddings:test_openai_embedding_sync_with_token_count", @@ -191,7 +192,7 @@ def test_openai_embedding_async_no_content(loop, set_trace_info): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(embedding_recorded_events)) +@validate_custom_events(add_token_count_to_embedding_events(embedding_recorded_events)) @validate_custom_event_count(count=1) @validate_transaction_metrics( name="test_embeddings:test_openai_embedding_async_with_token_count", diff --git a/tests/mlmodel_openai/test_embeddings_error_v1.py b/tests/mlmodel_openai/test_embeddings_error_v1.py index fd29236122..499f96893b 100644 --- a/tests/mlmodel_openai/test_embeddings_error_v1.py +++ b/tests/mlmodel_openai/test_embeddings_error_v1.py @@ -16,12 +16,10 @@ import openai import pytest -from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine +from testing_support.fixtures import dt_enabled, reset_core_stats_engine from testing_support.ml_testing_utils import ( - add_token_count_to_events, disabled_ai_monitoring_record_content_settings, events_sans_content, - llm_token_count_callback, set_trace_info, ) from testing_support.validators.validate_custom_event import validate_custom_event_count @@ -149,32 +147,6 @@ def test_embeddings_invalid_request_error_no_model_async(set_trace_info, async_o ] -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404, "error.code": "model_not_found"}}, -) -@validate_span_events( - exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."} -) -@validate_transaction_metrics( - name="test_embeddings_error_v1:test_embeddings_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/embedding/OpenAI/create", 1)], - rollup_metrics=[("Llm/embedding/OpenAI/create", 1)], - custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(invalid_model_events)) -@validate_custom_event_count(count=1) -@background_task() -def test_embeddings_invalid_request_error_invalid_model_with_token_count(set_trace_info, sync_openai_client): - set_trace_info() - with pytest.raises(openai.NotFoundError): - sync_openai_client.embeddings.create(input="Model does not exist.", model="does-not-exist") - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( @@ -255,36 +227,6 @@ def test_embeddings_invalid_request_error_invalid_model_async_no_content(set_tra ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404, "error.code": "model_not_found"}}, -) -@validate_span_events( - exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."} -) -@validate_transaction_metrics( - name="test_embeddings_error_v1:test_embeddings_invalid_request_error_invalid_model_async_with_token_count", - scoped_metrics=[("Llm/embedding/OpenAI/create", 1)], - rollup_metrics=[("Llm/embedding/OpenAI/create", 1)], - custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(invalid_model_events)) -@validate_custom_event_count(count=1) -@background_task() -def test_embeddings_invalid_request_error_invalid_model_async_with_token_count( - set_trace_info, async_openai_client, loop -): - set_trace_info() - with pytest.raises(openai.NotFoundError): - loop.run_until_complete( - async_openai_client.embeddings.create(input="Model does not exist.", model="does-not-exist") - ) - - embedding_invalid_key_error_events = [ ( {"type": "LlmEmbedding"}, @@ -449,34 +391,6 @@ def test_embeddings_invalid_request_error_no_model_async_with_raw_response(set_t ) # no model provided -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404, "error.code": "model_not_found"}}, -) -@validate_span_events( - exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."} -) -@validate_transaction_metrics( - name="test_embeddings_error_v1:test_embeddings_invalid_request_error_invalid_model_with_token_count_with_raw_response", - scoped_metrics=[("Llm/embedding/OpenAI/create", 1)], - rollup_metrics=[("Llm/embedding/OpenAI/create", 1)], - custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(invalid_model_events)) -@validate_custom_event_count(count=1) -@background_task() -def test_embeddings_invalid_request_error_invalid_model_with_token_count_with_raw_response( - set_trace_info, sync_openai_client -): - set_trace_info() - with pytest.raises(openai.NotFoundError): - sync_openai_client.embeddings.with_raw_response.create(input="Model does not exist.", model="does-not-exist") - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( @@ -566,38 +480,6 @@ def test_embeddings_invalid_request_error_invalid_model_async_no_content_with_ra ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404, "error.code": "model_not_found"}}, -) -@validate_span_events( - exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."} -) -@validate_transaction_metrics( - name="test_embeddings_error_v1:test_embeddings_invalid_request_error_invalid_model_async_with_token_count_with_raw_response", - scoped_metrics=[("Llm/embedding/OpenAI/create", 1)], - rollup_metrics=[("Llm/embedding/OpenAI/create", 1)], - custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(invalid_model_events)) -@validate_custom_event_count(count=1) -@background_task() -def test_embeddings_invalid_request_error_invalid_model_async_with_token_count_with_raw_response( - set_trace_info, async_openai_client, loop -): - set_trace_info() - with pytest.raises(openai.NotFoundError): - loop.run_until_complete( - async_openai_client.embeddings.with_raw_response.create( - input="Model does not exist.", model="does-not-exist" - ) - ) - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( diff --git a/tests/mlmodel_openai/test_embeddings_v1.py b/tests/mlmodel_openai/test_embeddings_v1.py index 405a2a9e5f..3801d3639c 100644 --- a/tests/mlmodel_openai/test_embeddings_v1.py +++ b/tests/mlmodel_openai/test_embeddings_v1.py @@ -15,7 +15,7 @@ import openai from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_count_to_embedding_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, events_sans_content, @@ -48,6 +48,7 @@ "response.headers.ratelimitResetRequests": "20ms", "response.headers.ratelimitRemainingTokens": 999994, "response.headers.ratelimitRemainingRequests": 2999, + "response.usage.total_tokens": 6, "vendor": "openai", "ingest_source": "Python", }, @@ -111,7 +112,7 @@ def test_openai_embedding_sync_no_content(set_trace_info, sync_openai_client): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(embedding_recorded_events)) +@validate_custom_events(add_token_count_to_embedding_events(embedding_recorded_events)) @validate_custom_event_count(count=1) @validate_transaction_metrics( name="test_embeddings_v1:test_openai_embedding_sync_with_token_count", @@ -206,7 +207,7 @@ def test_openai_embedding_async_no_content(loop, set_trace_info, async_openai_cl @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(embedding_recorded_events)) +@validate_custom_events(add_token_count_to_embedding_events(embedding_recorded_events)) @validate_custom_event_count(count=1) @validate_transaction_metrics( name="test_embeddings_v1:test_openai_embedding_async_with_token_count", diff --git a/tests/testing_support/ml_testing_utils.py b/tests/testing_support/ml_testing_utils.py index 55dbd08105..8c2c0444f0 100644 --- a/tests/testing_support/ml_testing_utils.py +++ b/tests/testing_support/ml_testing_utils.py @@ -46,6 +46,14 @@ def add_token_count_to_embedding_events(expected_events): return events +def add_token_count_streaming_events(expected_events): + events = copy.deepcopy(expected_events) + for event in events: + if event[0]["type"] == "LlmChatCompletionMessage": + event[1]["token_count"] = 0 + return events + + def add_token_counts_to_chat_events(expected_events): events = copy.deepcopy(expected_events) for event in events: