diff --git a/newrelic/hooks/mlmodel_openai.py b/newrelic/hooks/mlmodel_openai.py
index c3f7960b6e..3484762951 100644
--- a/newrelic/hooks/mlmodel_openai.py
+++ b/newrelic/hooks/mlmodel_openai.py
@@ -129,11 +129,11 @@ def create_chat_completion_message_event(
span_id,
trace_id,
response_model,
- request_model,
response_id,
request_id,
llm_metadata,
output_message_list,
+ all_token_counts,
):
settings = transaction.settings if transaction.settings is not None else global_settings()
@@ -153,11 +153,6 @@ def create_chat_completion_message_event(
"request_id": request_id,
"span_id": span_id,
"trace_id": trace_id,
- "token_count": (
- settings.ai_monitoring.llm_token_count_callback(request_model, message_content)
- if settings.ai_monitoring.llm_token_count_callback
- else None
- ),
"role": message.get("role"),
"completion_id": chat_completion_id,
"sequence": index,
@@ -166,6 +161,9 @@ def create_chat_completion_message_event(
"ingest_source": "Python",
}
+ if all_token_counts:
+ chat_completion_input_message_dict["token_count"] = 0
+
if settings.ai_monitoring.record_content.enabled:
chat_completion_input_message_dict["content"] = message_content
@@ -193,11 +191,6 @@ def create_chat_completion_message_event(
"request_id": request_id,
"span_id": span_id,
"trace_id": trace_id,
- "token_count": (
- settings.ai_monitoring.llm_token_count_callback(response_model, message_content)
- if settings.ai_monitoring.llm_token_count_callback
- else None
- ),
"role": message.get("role"),
"completion_id": chat_completion_id,
"sequence": index,
@@ -207,6 +200,9 @@ def create_chat_completion_message_event(
"is_response": True,
}
+ if all_token_counts:
+ chat_completion_output_message_dict["token_count"] = 0
+
if settings.ai_monitoring.record_content.enabled:
chat_completion_output_message_dict["content"] = message_content
@@ -280,15 +276,18 @@ def _record_embedding_success(transaction, embedding_id, linking_metadata, kwarg
else getattr(attribute_response, "organization", None)
)
+ response_total_tokens = attribute_response.get("usage", {}).get("total_tokens") if response else None
+
+ total_tokens = (
+ settings.ai_monitoring.llm_token_count_callback(response_model, input_)
+ if settings.ai_monitoring.llm_token_count_callback and input_
+ else response_total_tokens
+ )
+
full_embedding_response_dict = {
"id": embedding_id,
"span_id": span_id,
"trace_id": trace_id,
- "token_count": (
- settings.ai_monitoring.llm_token_count_callback(response_model, input_)
- if settings.ai_monitoring.llm_token_count_callback
- else None
- ),
"request.model": kwargs.get("model") or kwargs.get("engine"),
"request_id": request_id,
"duration": ft.duration * 1000,
@@ -313,6 +312,7 @@ def _record_embedding_success(transaction, embedding_id, linking_metadata, kwarg
"response.headers.ratelimitRemainingRequests": check_rate_limit_header(
response_headers, "x-ratelimit-remaining-requests", True
),
+ "response.usage.total_tokens": total_tokens,
"vendor": "openai",
"ingest_source": "Python",
}
@@ -475,12 +475,15 @@ def _handle_completion_success(transaction, linking_metadata, completion_id, kwa
def _record_completion_success(transaction, linking_metadata, completion_id, kwargs, ft, response_headers, response):
+ settings = transaction.settings if transaction.settings is not None else global_settings()
span_id = linking_metadata.get("span.id")
trace_id = linking_metadata.get("trace.id")
+
try:
if response:
response_model = response.get("model")
response_id = response.get("id")
+ token_usage = response.get("usage") or {}
output_message_list = []
finish_reason = None
choices = response.get("choices") or []
@@ -494,6 +497,7 @@ def _record_completion_success(transaction, linking_metadata, completion_id, kwa
else:
response_model = kwargs.get("response.model")
response_id = kwargs.get("id")
+ token_usage = {}
output_message_list = []
finish_reason = kwargs.get("finish_reason")
if "content" in kwargs:
@@ -505,10 +509,44 @@ def _record_completion_success(transaction, linking_metadata, completion_id, kwa
output_message_list = []
request_model = kwargs.get("model") or kwargs.get("engine")
- request_id = response_headers.get("x-request-id")
- organization = response_headers.get("openai-organization") or getattr(response, "organization", None)
messages = kwargs.get("messages") or [{"content": kwargs.get("prompt"), "role": "user"}]
input_message_list = list(messages)
+
+ # Extract token counts from response object
+ if token_usage:
+ response_prompt_tokens = token_usage.get("prompt_tokens")
+ response_completion_tokens = token_usage.get("completion_tokens")
+ response_total_tokens = token_usage.get("total_tokens")
+
+ else:
+ response_prompt_tokens = None
+ response_completion_tokens = None
+ response_total_tokens = None
+
+ # Calculate token counts by checking if a callback is registered and if we have the necessary content to pass
+ # to it. If not, then we use the token counts provided in the response object
+ input_message_content = " ".join([msg.get("content", "") for msg in input_message_list if msg.get("content")])
+ prompt_tokens = (
+ settings.ai_monitoring.llm_token_count_callback(request_model, input_message_content)
+ if settings.ai_monitoring.llm_token_count_callback and input_message_content
+ else response_prompt_tokens
+ )
+ output_message_content = " ".join([msg.get("content", "") for msg in output_message_list if msg.get("content")])
+ completion_tokens = (
+ settings.ai_monitoring.llm_token_count_callback(response_model, output_message_content)
+ if settings.ai_monitoring.llm_token_count_callback and output_message_content
+ else response_completion_tokens
+ )
+
+ total_tokens = (
+ prompt_tokens + completion_tokens if all([prompt_tokens, completion_tokens]) else response_total_tokens
+ )
+
+ all_token_counts = bool(prompt_tokens and completion_tokens and total_tokens)
+
+ request_id = response_headers.get("x-request-id")
+ organization = response_headers.get("openai-organization") or getattr(response, "organization", None)
+
full_chat_completion_summary_dict = {
"id": completion_id,
"span_id": span_id,
@@ -553,6 +591,12 @@ def _record_completion_success(transaction, linking_metadata, completion_id, kwa
),
"response.number_of_messages": len(input_message_list) + len(output_message_list),
}
+
+ if all_token_counts:
+ full_chat_completion_summary_dict["response.usage.prompt_tokens"] = prompt_tokens
+ full_chat_completion_summary_dict["response.usage.completion_tokens"] = completion_tokens
+ full_chat_completion_summary_dict["response.usage.total_tokens"] = total_tokens
+
llm_metadata = _get_llm_attributes(transaction)
full_chat_completion_summary_dict.update(llm_metadata)
transaction.record_custom_event("LlmChatCompletionSummary", full_chat_completion_summary_dict)
@@ -564,11 +608,11 @@ def _record_completion_success(transaction, linking_metadata, completion_id, kwa
span_id,
trace_id,
response_model,
- request_model,
response_id,
request_id,
llm_metadata,
output_message_list,
+ all_token_counts,
)
except Exception:
_logger.warning(RECORD_EVENTS_FAILURE_LOG_MESSAGE, traceback.format_exception(*sys.exc_info()))
@@ -579,6 +623,7 @@ def _record_completion_error(transaction, linking_metadata, completion_id, kwarg
trace_id = linking_metadata.get("trace.id")
request_message_list = kwargs.get("messages", None) or []
notice_error_attributes = {}
+
try:
if OPENAI_V1:
response = getattr(exc, "response", None)
@@ -643,6 +688,7 @@ def _record_completion_error(transaction, linking_metadata, completion_id, kwarg
output_message_list = []
if "content" in kwargs:
output_message_list = [{"content": kwargs.get("content"), "role": kwargs.get("role")}]
+
create_chat_completion_message_event(
transaction,
request_message_list,
@@ -650,11 +696,12 @@ def _record_completion_error(transaction, linking_metadata, completion_id, kwarg
span_id,
trace_id,
kwargs.get("response.model"),
- request_model,
response_id,
request_id,
llm_metadata,
output_message_list,
+ # We do not record token counts in error cases, so set all_token_counts to True so the pipeline tokenizer does not run
+ all_token_counts=True,
)
except Exception:
_logger.warning(RECORD_EVENTS_FAILURE_LOG_MESSAGE, traceback.format_exception(*sys.exc_info()))
diff --git a/tests/mlmodel_langchain/test_chain.py b/tests/mlmodel_langchain/test_chain.py
index 1a3cbbfd76..abf52efe09 100644
--- a/tests/mlmodel_langchain/test_chain.py
+++ b/tests/mlmodel_langchain/test_chain.py
@@ -359,6 +359,7 @@
"response.headers.ratelimitResetRequests": "20ms",
"response.headers.ratelimitRemainingTokens": 999992,
"response.headers.ratelimitRemainingRequests": 2999,
+ "response.usage.total_tokens": 8,
"vendor": "openai",
"ingest_source": "Python",
"input": "[[3923, 374, 220, 17, 489, 220, 19, 30]]",
@@ -382,6 +383,7 @@
"response.headers.ratelimitResetRequests": "20ms",
"response.headers.ratelimitRemainingTokens": 999998,
"response.headers.ratelimitRemainingRequests": 2999,
+ "response.usage.total_tokens": 1,
"vendor": "openai",
"ingest_source": "Python",
"input": "[[10590]]",
@@ -452,6 +454,9 @@
"response.headers.ratelimitResetRequests": "8.64s",
"response.headers.ratelimitRemainingTokens": 199912,
"response.headers.ratelimitRemainingRequests": 9999,
+ "response.usage.prompt_tokens": 73,
+ "response.usage.completion_tokens": 375,
+ "response.usage.total_tokens": 448,
"response.number_of_messages": 3,
},
],
@@ -467,6 +472,7 @@
"sequence": 0,
"response.model": "gpt-3.5-turbo-0125",
"vendor": "openai",
+ "token_count": 0,
"ingest_source": "Python",
"content": "You are a generator of quiz questions for a seminar. Use the following pieces of retrieved context to generate 5 multiple choice questions (A,B,C,D) on the subject matter. Use a three sentence maximum and keep the answer concise. Render the output as HTML\n\nWhat is 2 + 4?",
},
@@ -483,6 +489,7 @@
"sequence": 1,
"response.model": "gpt-3.5-turbo-0125",
"vendor": "openai",
+ "token_count": 0,
"ingest_source": "Python",
"content": "math",
},
@@ -499,6 +506,7 @@
"sequence": 2,
"response.model": "gpt-3.5-turbo-0125",
"vendor": "openai",
+ "token_count": 0,
"ingest_source": "Python",
"is_response": True,
"content": "```html\n\n\n
\n Math Quiz\n\n\n Math Quiz Questions
\n \n - What is the result of 5 + 3?
\n \n - A) 7
\n - B) 8
\n - C) 9
\n - D) 10
\n
\n - What is the product of 6 x 7?
\n \n - A) 36
\n - B) 42
\n - C) 48
\n - D) 56
\n
\n - What is the square root of 64?
\n \n - A) 6
\n - B) 7
\n - C) 8
\n - D) 9
\n
\n - What is the result of 12 / 4?
\n \n - A) 2
\n - B) 3
\n - C) 4
\n - D) 5
\n
\n - What is the sum of 15 + 9?
\n \n - A) 22
\n - B) 23
\n - C) 24
\n - D) 25
\n
\n
\n\n\n```",
diff --git a/tests/mlmodel_openai/test_chat_completion.py b/tests/mlmodel_openai/test_chat_completion.py
index 1f8cf1cb74..5e4d209ed7 100644
--- a/tests/mlmodel_openai/test_chat_completion.py
+++ b/tests/mlmodel_openai/test_chat_completion.py
@@ -15,7 +15,7 @@
import openai
from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes
from testing_support.ml_testing_utils import (
- add_token_count_to_events,
+ add_token_counts_to_chat_events,
disabled_ai_monitoring_record_content_settings,
disabled_ai_monitoring_settings,
disabled_ai_monitoring_streaming_settings,
@@ -55,6 +55,9 @@
"response.organization": "new-relic-nkmd8b",
"request.temperature": 0.7,
"request.max_tokens": 100,
+ "response.usage.completion_tokens": 11,
+ "response.usage.total_tokens": 64,
+ "response.usage.prompt_tokens": 53,
"response.choices.finish_reason": "stop",
"response.headers.llmVersion": "2020-10-01",
"response.headers.ratelimitLimitRequests": 200,
@@ -81,6 +84,7 @@
"role": "system",
"completion_id": None,
"sequence": 0,
+ "token_count": 0,
"response.model": "gpt-3.5-turbo-0613",
"vendor": "openai",
"ingest_source": "Python",
@@ -99,6 +103,7 @@
"role": "user",
"completion_id": None,
"sequence": 1,
+ "token_count": 0,
"response.model": "gpt-3.5-turbo-0613",
"vendor": "openai",
"ingest_source": "Python",
@@ -117,6 +122,7 @@
"role": "assistant",
"completion_id": None,
"sequence": 2,
+ "token_count": 0,
"response.model": "gpt-3.5-turbo-0613",
"vendor": "openai",
"is_response": True,
@@ -172,7 +178,7 @@ def test_openai_chat_completion_sync_no_content(set_trace_info):
@reset_core_stats_engine()
@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events))
+@validate_custom_events(add_token_counts_to_chat_events(chat_completion_recorded_events))
# One summary event, one system message, one user message, and one response message from the assistant
@validate_custom_event_count(count=4)
@validate_transaction_metrics(
@@ -343,7 +349,7 @@ def test_openai_chat_completion_async_no_content(loop, set_trace_info):
@reset_core_stats_engine()
@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events))
+@validate_custom_events(add_token_counts_to_chat_events(chat_completion_recorded_events))
# One summary event, one system message, one user message, and one response message from the assistant
@validate_custom_event_count(count=4)
@validate_transaction_metrics(
diff --git a/tests/mlmodel_openai/test_chat_completion_error.py b/tests/mlmodel_openai/test_chat_completion_error.py
index bfb2267a33..97a4dd8793 100644
--- a/tests/mlmodel_openai/test_chat_completion_error.py
+++ b/tests/mlmodel_openai/test_chat_completion_error.py
@@ -15,13 +15,11 @@
import openai
import pytest
-from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine
+from testing_support.fixtures import dt_enabled, reset_core_stats_engine
from testing_support.ml_testing_utils import (
- add_token_count_to_events,
disabled_ai_monitoring_record_content_settings,
events_sans_content,
events_with_context_attrs,
- llm_token_count_callback,
set_trace_info,
)
from testing_support.validators.validate_custom_event import validate_custom_event_count
@@ -68,6 +66,7 @@
"role": "system",
"completion_id": None,
"sequence": 0,
+ "token_count": 0,
"vendor": "openai",
"ingest_source": "Python",
},
@@ -83,6 +82,7 @@
"role": "user",
"completion_id": None,
"sequence": 1,
+ "token_count": 0,
"vendor": "openai",
"ingest_source": "Python",
},
@@ -186,6 +186,7 @@ def test_chat_completion_invalid_request_error_no_model_no_content(set_trace_inf
"role": "user",
"completion_id": None,
"sequence": 0,
+ "token_count": 0,
"vendor": "openai",
"ingest_source": "Python",
},
@@ -193,36 +194,6 @@ def test_chat_completion_invalid_request_error_no_model_no_content(set_trace_inf
]
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
- callable_name(openai.InvalidRequestError),
- exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}},
-)
-@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"})
-@validate_transaction_metrics(
- "test_chat_completion_error:test_chat_completion_invalid_request_error_invalid_model_with_token_count",
- scoped_metrics=[("Llm/completion/OpenAI/create", 1)],
- rollup_metrics=[("Llm/completion/OpenAI/create", 1)],
- background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error))
-@validate_custom_event_count(count=2)
-@background_task()
-def test_chat_completion_invalid_request_error_invalid_model_with_token_count(set_trace_info):
- set_trace_info()
- with pytest.raises(openai.InvalidRequestError):
- add_custom_attribute("llm.conversation_id", "my-awesome-id")
-
- openai.ChatCompletion.create(
- model="does-not-exist",
- messages=({"role": "user", "content": "Model does not exist."},),
- temperature=0.7,
- max_tokens=100,
- )
-
-
# Invalid model provided
@dt_enabled
@reset_core_stats_engine()
@@ -281,6 +252,7 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info):
"role": "system",
"completion_id": None,
"sequence": 0,
+ "token_count": 0,
"vendor": "openai",
"ingest_source": "Python",
},
@@ -296,6 +268,7 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info):
"role": "user",
"completion_id": None,
"sequence": 1,
+ "token_count": 0,
"vendor": "openai",
"ingest_source": "Python",
},
@@ -360,6 +333,7 @@ def test_chat_completion_authentication_error(monkeypatch, set_trace_info):
"role": "user",
"completion_id": None,
"sequence": 0,
+ "token_count": 0,
"vendor": "openai",
"ingest_source": "Python",
},
@@ -471,37 +445,6 @@ def test_chat_completion_invalid_request_error_no_model_async_no_content(loop, s
)
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
- callable_name(openai.InvalidRequestError),
- exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}},
-)
-@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"})
-@validate_transaction_metrics(
- "test_chat_completion_error:test_chat_completion_invalid_request_error_invalid_model_with_token_count_async",
- scoped_metrics=[("Llm/completion/OpenAI/acreate", 1)],
- rollup_metrics=[("Llm/completion/OpenAI/acreate", 1)],
- background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error))
-@validate_custom_event_count(count=2)
-@background_task()
-def test_chat_completion_invalid_request_error_invalid_model_with_token_count_async(loop, set_trace_info):
- set_trace_info()
- with pytest.raises(openai.InvalidRequestError):
- add_custom_attribute("llm.conversation_id", "my-awesome-id")
- loop.run_until_complete(
- openai.ChatCompletion.acreate(
- model="does-not-exist",
- messages=({"role": "user", "content": "Model does not exist."},),
- temperature=0.7,
- max_tokens=100,
- )
- )
-
-
# Invalid model provided
@dt_enabled
@reset_core_stats_engine()
diff --git a/tests/mlmodel_openai/test_chat_completion_error_v1.py b/tests/mlmodel_openai/test_chat_completion_error_v1.py
index 9be9fcab9c..5af1598847 100644
--- a/tests/mlmodel_openai/test_chat_completion_error_v1.py
+++ b/tests/mlmodel_openai/test_chat_completion_error_v1.py
@@ -14,13 +14,11 @@
import openai
import pytest
-from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine
+from testing_support.fixtures import dt_enabled, reset_core_stats_engine
from testing_support.ml_testing_utils import (
- add_token_count_to_events,
disabled_ai_monitoring_record_content_settings,
events_sans_content,
events_with_context_attrs,
- llm_token_count_callback,
set_trace_info,
)
from testing_support.validators.validate_custom_event import validate_custom_event_count
@@ -67,6 +65,7 @@
"role": "system",
"completion_id": None,
"sequence": 0,
+ "token_count": 0,
"vendor": "openai",
"ingest_source": "Python",
},
@@ -82,6 +81,7 @@
"role": "user",
"completion_id": None,
"sequence": 1,
+ "token_count": 0,
"vendor": "openai",
"ingest_source": "Python",
},
@@ -229,6 +229,7 @@ def test_chat_completion_invalid_request_error_no_model_async_no_content(loop, s
"role": "user",
"completion_id": None,
"sequence": 0,
+ "token_count": 0,
"vendor": "openai",
"ingest_source": "Python",
},
@@ -266,37 +267,6 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info, syn
)
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
- callable_name(openai.NotFoundError),
- exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}},
-)
-@validate_span_events(
- exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."}
-)
-@validate_transaction_metrics(
- "test_chat_completion_error_v1:test_chat_completion_invalid_request_error_invalid_model_with_token_count",
- scoped_metrics=[("Llm/completion/OpenAI/create", 1)],
- rollup_metrics=[("Llm/completion/OpenAI/create", 1)],
- background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error))
-@validate_custom_event_count(count=2)
-@background_task()
-def test_chat_completion_invalid_request_error_invalid_model_with_token_count(set_trace_info, sync_openai_client):
- set_trace_info()
- with pytest.raises(openai.NotFoundError):
- add_custom_attribute("llm.conversation_id", "my-awesome-id")
- sync_openai_client.chat.completions.create(
- model="does-not-exist",
- messages=({"role": "user", "content": "Model does not exist."},),
- temperature=0.7,
- max_tokens=100,
- )
-
-
@dt_enabled
@reset_core_stats_engine()
@validate_error_trace_attributes(
@@ -329,41 +299,6 @@ def test_chat_completion_invalid_request_error_invalid_model_async(loop, set_tra
)
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
- callable_name(openai.NotFoundError),
- exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}},
-)
-@validate_span_events(
- exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."}
-)
-@validate_transaction_metrics(
- "test_chat_completion_error_v1:test_chat_completion_invalid_request_error_invalid_model_with_token_count_async",
- scoped_metrics=[("Llm/completion/OpenAI/create", 1)],
- rollup_metrics=[("Llm/completion/OpenAI/create", 1)],
- background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error))
-@validate_custom_event_count(count=2)
-@background_task()
-def test_chat_completion_invalid_request_error_invalid_model_with_token_count_async(
- loop, set_trace_info, async_openai_client
-):
- set_trace_info()
- with pytest.raises(openai.NotFoundError):
- add_custom_attribute("llm.conversation_id", "my-awesome-id")
- loop.run_until_complete(
- async_openai_client.chat.completions.create(
- model="does-not-exist",
- messages=({"role": "user", "content": "Model does not exist."},),
- temperature=0.7,
- max_tokens=100,
- )
- )
-
-
expected_events_on_wrong_api_key_error = [
(
{"type": "LlmChatCompletionSummary"},
@@ -391,6 +326,7 @@ def test_chat_completion_invalid_request_error_invalid_model_with_token_count_as
"role": "user",
"completion_id": None,
"sequence": 0,
+ "token_count": 0,
"vendor": "openai",
"ingest_source": "Python",
},
@@ -610,39 +546,6 @@ def test_chat_completion_invalid_request_error_invalid_model_with_raw_response(s
)
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
- callable_name(openai.NotFoundError),
- exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}},
-)
-@validate_span_events(
- exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."}
-)
-@validate_transaction_metrics(
- "test_chat_completion_error_v1:test_chat_completion_invalid_request_error_invalid_model_with_token_count_with_raw_response",
- scoped_metrics=[("Llm/completion/OpenAI/create", 1)],
- rollup_metrics=[("Llm/completion/OpenAI/create", 1)],
- background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error))
-@validate_custom_event_count(count=2)
-@background_task()
-def test_chat_completion_invalid_request_error_invalid_model_with_token_count_with_raw_response(
- set_trace_info, sync_openai_client
-):
- set_trace_info()
- with pytest.raises(openai.NotFoundError):
- add_custom_attribute("llm.conversation_id", "my-awesome-id")
- sync_openai_client.chat.completions.with_raw_response.create(
- model="does-not-exist",
- messages=({"role": "user", "content": "Model does not exist."},),
- temperature=0.7,
- max_tokens=100,
- )
-
-
@dt_enabled
@reset_core_stats_engine()
@validate_error_trace_attributes(
@@ -677,41 +580,6 @@ def test_chat_completion_invalid_request_error_invalid_model_async_with_raw_resp
)
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
- callable_name(openai.NotFoundError),
- exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}},
-)
-@validate_span_events(
- exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."}
-)
-@validate_transaction_metrics(
- "test_chat_completion_error_v1:test_chat_completion_invalid_request_error_invalid_model_with_token_count_async_with_raw_response",
- scoped_metrics=[("Llm/completion/OpenAI/create", 1)],
- rollup_metrics=[("Llm/completion/OpenAI/create", 1)],
- background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error))
-@validate_custom_event_count(count=2)
-@background_task()
-def test_chat_completion_invalid_request_error_invalid_model_with_token_count_async_with_raw_response(
- loop, set_trace_info, async_openai_client
-):
- set_trace_info()
- with pytest.raises(openai.NotFoundError):
- add_custom_attribute("llm.conversation_id", "my-awesome-id")
- loop.run_until_complete(
- async_openai_client.chat.completions.with_raw_response.create(
- model="does-not-exist",
- messages=({"role": "user", "content": "Model does not exist."},),
- temperature=0.7,
- max_tokens=100,
- )
- )
-
-
@dt_enabled
@reset_core_stats_engine()
@validate_error_trace_attributes(
diff --git a/tests/mlmodel_openai/test_chat_completion_stream.py b/tests/mlmodel_openai/test_chat_completion_stream.py
index ad89d6f260..8019c0b6a9 100644
--- a/tests/mlmodel_openai/test_chat_completion_stream.py
+++ b/tests/mlmodel_openai/test_chat_completion_stream.py
@@ -15,7 +15,8 @@
import openai
from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes
from testing_support.ml_testing_utils import (
- add_token_count_to_events,
+ add_token_count_streaming_events,
+ add_token_counts_to_chat_events,
disabled_ai_monitoring_record_content_settings,
disabled_ai_monitoring_settings,
disabled_ai_monitoring_streaming_settings,
@@ -184,9 +185,101 @@ def test_openai_chat_completion_sync_no_content(set_trace_info):
assert resp
+chat_completion_recorded_token_events = [
+ (
+ {"type": "LlmChatCompletionSummary"},
+ {
+ "id": None, # UUID that varies with each run
+ "llm.conversation_id": "my-awesome-id",
+ "llm.foo": "bar",
+ "span_id": None,
+ "trace_id": "trace-id",
+ "request_id": "49dbbffbd3c3f4612aa48def69059ccd",
+ "duration": None, # Response time varies each test run
+ "request.model": "gpt-3.5-turbo",
+ "response.model": "gpt-3.5-turbo-0613",
+ "response.organization": "new-relic-nkmd8b",
+ "request.temperature": 0.7,
+ "request.max_tokens": 100,
+ "response.choices.finish_reason": "stop",
+ "response.headers.llmVersion": "2020-10-01",
+ "response.headers.ratelimitLimitRequests": 200,
+ "response.headers.ratelimitLimitTokens": 40000,
+ "response.headers.ratelimitResetTokens": "90ms",
+ "response.headers.ratelimitResetRequests": "7m12s",
+ "response.headers.ratelimitRemainingTokens": 39940,
+ "response.headers.ratelimitRemainingRequests": 199,
+ "vendor": "openai",
+ "ingest_source": "Python",
+ "response.number_of_messages": 3,
+ },
+ ),
+ (
+ {"type": "LlmChatCompletionMessage"},
+ {
+ "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-0",
+ "llm.conversation_id": "my-awesome-id",
+ "llm.foo": "bar",
+ "request_id": "49dbbffbd3c3f4612aa48def69059ccd",
+ "span_id": None,
+ "trace_id": "trace-id",
+ "content": "You are a scientist.",
+ "role": "system",
+ "completion_id": None,
+ "sequence": 0,
+ "token_count": 0,
+ "response.model": "gpt-3.5-turbo-0613",
+ "vendor": "openai",
+ "ingest_source": "Python",
+ },
+ ),
+ (
+ {"type": "LlmChatCompletionMessage"},
+ {
+ "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-1",
+ "llm.conversation_id": "my-awesome-id",
+ "llm.foo": "bar",
+ "request_id": "49dbbffbd3c3f4612aa48def69059ccd",
+ "span_id": None,
+ "trace_id": "trace-id",
+ "content": "What is 212 degrees Fahrenheit converted to Celsius?",
+ "role": "user",
+ "completion_id": None,
+ "sequence": 1,
+ "token_count": 0,
+ "response.model": "gpt-3.5-turbo-0613",
+ "vendor": "openai",
+ "ingest_source": "Python",
+ },
+ ),
+ (
+ {"type": "LlmChatCompletionMessage"},
+ {
+ "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-2",
+ "llm.conversation_id": "my-awesome-id",
+ "llm.foo": "bar",
+ "request_id": "49dbbffbd3c3f4612aa48def69059ccd",
+ "span_id": None,
+ "trace_id": "trace-id",
+ "content": "212 degrees Fahrenheit is equal to 100 degrees Celsius.",
+ "role": "assistant",
+ "completion_id": None,
+ "sequence": 2,
+ "token_count": 0,
+ "response.model": "gpt-3.5-turbo-0613",
+ "vendor": "openai",
+ "is_response": True,
+ "ingest_source": "Python",
+ },
+ ),
+]
+
+
@reset_core_stats_engine()
@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events))
+@validate_custom_events(
+ add_token_counts_to_chat_events(add_token_count_streaming_events(chat_completion_recorded_events))
+)
# One summary event, one system message, one user message, and one response message from the assistant
@validate_custom_event_count(count=4)
@validate_transaction_metrics(
@@ -378,7 +471,9 @@ async def consumer():
@reset_core_stats_engine()
@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events))
+@validate_custom_events(
+ add_token_counts_to_chat_events(add_token_count_streaming_events(chat_completion_recorded_events))
+)
@validate_custom_event_count(count=4)
@validate_transaction_metrics(
name="test_chat_completion_stream:test_openai_chat_completion_async_with_token_count",
diff --git a/tests/mlmodel_openai/test_chat_completion_stream_error.py b/tests/mlmodel_openai/test_chat_completion_stream_error.py
index eebb5ee8fb..e8e55426e9 100644
--- a/tests/mlmodel_openai/test_chat_completion_stream_error.py
+++ b/tests/mlmodel_openai/test_chat_completion_stream_error.py
@@ -15,13 +15,11 @@
import openai
import pytest
-from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine
+from testing_support.fixtures import dt_enabled, reset_core_stats_engine
from testing_support.ml_testing_utils import (
- add_token_count_to_events,
disabled_ai_monitoring_record_content_settings,
events_sans_content,
events_with_context_attrs,
- llm_token_count_callback,
set_trace_info,
)
from testing_support.validators.validate_custom_event import validate_custom_event_count
@@ -68,6 +66,7 @@
"role": "system",
"completion_id": None,
"sequence": 0,
+ "token_count": 0,
"vendor": "openai",
"ingest_source": "Python",
},
@@ -83,6 +82,7 @@
"role": "user",
"completion_id": None,
"sequence": 1,
+ "token_count": 0,
"vendor": "openai",
"ingest_source": "Python",
},
@@ -191,6 +191,7 @@ def test_chat_completion_invalid_request_error_no_model_no_content(set_trace_inf
"role": "user",
"completion_id": None,
"sequence": 0,
+ "token_count": 0,
"vendor": "openai",
"ingest_source": "Python",
},
@@ -198,38 +199,6 @@ def test_chat_completion_invalid_request_error_no_model_no_content(set_trace_inf
]
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
- callable_name(openai.InvalidRequestError),
- exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}},
-)
-@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"})
-@validate_transaction_metrics(
- "test_chat_completion_stream_error:test_chat_completion_invalid_request_error_invalid_model_with_token_count",
- scoped_metrics=[("Llm/completion/OpenAI/create", 1)],
- rollup_metrics=[("Llm/completion/OpenAI/create", 1)],
- background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error))
-@validate_custom_event_count(count=2)
-@background_task()
-def test_chat_completion_invalid_request_error_invalid_model_with_token_count(set_trace_info):
- set_trace_info()
- with pytest.raises(openai.InvalidRequestError):
- add_custom_attribute("llm.conversation_id", "my-awesome-id")
- generator = openai.ChatCompletion.create(
- model="does-not-exist",
- messages=({"role": "user", "content": "Model does not exist."},),
- temperature=0.7,
- max_tokens=100,
- stream=True,
- )
- for resp in generator:
- assert resp
-
-
@dt_enabled
@reset_core_stats_engine()
@validate_error_trace_attributes(
@@ -290,6 +259,7 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info):
"role": "system",
"completion_id": None,
"sequence": 0,
+ "token_count": 0,
"vendor": "openai",
"ingest_source": "Python",
},
@@ -305,6 +275,7 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info):
"role": "user",
"completion_id": None,
"sequence": 1,
+ "token_count": 0,
"vendor": "openai",
"ingest_source": "Python",
},
@@ -374,6 +345,7 @@ def test_chat_completion_authentication_error(monkeypatch, set_trace_info):
"role": "user",
"completion_id": None,
"sequence": 0,
+ "token_count": 0,
"vendor": "openai",
"ingest_source": "Python",
},
@@ -488,38 +460,6 @@ def test_chat_completion_invalid_request_error_no_model_async_no_content(loop, s
)
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
- callable_name(openai.InvalidRequestError),
- exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}},
-)
-@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"})
-@validate_transaction_metrics(
- "test_chat_completion_stream_error:test_chat_completion_invalid_request_error_invalid_model_with_token_count_async",
- scoped_metrics=[("Llm/completion/OpenAI/acreate", 1)],
- rollup_metrics=[("Llm/completion/OpenAI/acreate", 1)],
- background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error))
-@validate_custom_event_count(count=2)
-@background_task()
-def test_chat_completion_invalid_request_error_invalid_model_with_token_count_async(loop, set_trace_info):
- set_trace_info()
- with pytest.raises(openai.InvalidRequestError):
- add_custom_attribute("llm.conversation_id", "my-awesome-id")
- loop.run_until_complete(
- openai.ChatCompletion.acreate(
- model="does-not-exist",
- messages=({"role": "user", "content": "Model does not exist."},),
- temperature=0.7,
- max_tokens=100,
- stream=True,
- )
- )
-
-
@dt_enabled
@reset_core_stats_engine()
@validate_error_trace_attributes(
@@ -649,6 +589,7 @@ def test_chat_completion_wrong_api_key_error_async(loop, monkeypatch, set_trace_
"role": "user",
"completion_id": None,
"sequence": 0,
+ "token_count": 0,
"vendor": "openai",
"ingest_source": "Python",
},
diff --git a/tests/mlmodel_openai/test_chat_completion_stream_error_v1.py b/tests/mlmodel_openai/test_chat_completion_stream_error_v1.py
index 5f769ea0e6..64798300fc 100644
--- a/tests/mlmodel_openai/test_chat_completion_stream_error_v1.py
+++ b/tests/mlmodel_openai/test_chat_completion_stream_error_v1.py
@@ -12,16 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-
import openai
import pytest
-from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine
+from testing_support.fixtures import dt_enabled, reset_core_stats_engine
from testing_support.ml_testing_utils import (
- add_token_count_to_events,
disabled_ai_monitoring_record_content_settings,
events_sans_content,
events_with_context_attrs,
- llm_token_count_callback,
set_trace_info,
)
from testing_support.validators.validate_custom_event import validate_custom_event_count
@@ -68,6 +65,7 @@
"role": "system",
"completion_id": None,
"sequence": 0,
+ "token_count": 0,
"vendor": "openai",
"ingest_source": "Python",
},
@@ -83,6 +81,7 @@
"role": "user",
"completion_id": None,
"sequence": 1,
+ "token_count": 0,
"vendor": "openai",
"ingest_source": "Python",
},
@@ -243,6 +242,7 @@ async def consumer():
"role": "user",
"completion_id": None,
"sequence": 0,
+ "token_count": 0,
"vendor": "openai",
"ingest_source": "Python",
},
@@ -281,77 +281,6 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info, syn
assert resp
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
- callable_name(openai.NotFoundError),
- exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}},
-)
-@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"})
-@validate_transaction_metrics(
- "test_chat_completion_stream_error_v1:test_chat_completion_invalid_request_error_invalid_model_with_token_count",
- scoped_metrics=[("Llm/completion/OpenAI/create", 1)],
- rollup_metrics=[("Llm/completion/OpenAI/create", 1)],
- background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error))
-@validate_custom_event_count(count=2)
-@background_task()
-def test_chat_completion_invalid_request_error_invalid_model_with_token_count(set_trace_info, sync_openai_client):
- set_trace_info()
- with pytest.raises(openai.NotFoundError):
- add_custom_attribute("llm.conversation_id", "my-awesome-id")
-
- generator = sync_openai_client.chat.completions.create(
- model="does-not-exist",
- messages=({"role": "user", "content": "Model does not exist."},),
- temperature=0.7,
- max_tokens=100,
- stream=True,
- )
- for resp in generator:
- assert resp
-
-
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
- callable_name(openai.NotFoundError),
- exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}},
-)
-@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"})
-@validate_transaction_metrics(
- "test_chat_completion_stream_error_v1:test_chat_completion_invalid_request_error_invalid_model_async_with_token_count",
- scoped_metrics=[("Llm/completion/OpenAI/create", 1)],
- rollup_metrics=[("Llm/completion/OpenAI/create", 1)],
- background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error))
-@validate_custom_event_count(count=2)
-@background_task()
-def test_chat_completion_invalid_request_error_invalid_model_async_with_token_count(
- loop, set_trace_info, async_openai_client
-):
- set_trace_info()
- with pytest.raises(openai.NotFoundError):
- add_custom_attribute("llm.conversation_id", "my-awesome-id")
-
- async def consumer():
- generator = await async_openai_client.chat.completions.create(
- model="does-not-exist",
- messages=({"role": "user", "content": "Model does not exist."},),
- temperature=0.7,
- max_tokens=100,
- stream=True,
- )
- async for resp in generator:
- assert resp
-
- loop.run_until_complete(consumer())
-
-
@dt_enabled
@reset_core_stats_engine()
@validate_error_trace_attributes(
@@ -414,6 +343,7 @@ async def consumer():
"role": "user",
"completion_id": None,
"sequence": 0,
+ "token_count": 0,
"vendor": "openai",
"ingest_source": "Python",
},
diff --git a/tests/mlmodel_openai/test_chat_completion_stream_v1.py b/tests/mlmodel_openai/test_chat_completion_stream_v1.py
index 796404012b..c88e8b1df6 100644
--- a/tests/mlmodel_openai/test_chat_completion_stream_v1.py
+++ b/tests/mlmodel_openai/test_chat_completion_stream_v1.py
@@ -17,7 +17,8 @@
from conftest import get_openai_version
from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes
from testing_support.ml_testing_utils import (
- add_token_count_to_events,
+ add_token_count_streaming_events,
+ add_token_counts_to_chat_events,
disabled_ai_monitoring_record_content_settings,
disabled_ai_monitoring_settings,
disabled_ai_monitoring_streaming_settings,
@@ -300,7 +301,9 @@ def test_openai_chat_completion_sync_no_content(set_trace_info, sync_openai_clie
@reset_core_stats_engine()
@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events))
+@validate_custom_events(
+ add_token_counts_to_chat_events(add_token_count_streaming_events(chat_completion_recorded_events))
+)
# One summary event, one system message, one user message, and one response message from the assistant
@validate_custom_event_count(count=4)
@validate_transaction_metrics(
@@ -622,7 +625,9 @@ async def consumer():
@reset_core_stats_engine()
@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events))
+@validate_custom_events(
+ add_token_counts_to_chat_events(add_token_count_streaming_events(chat_completion_recorded_events))
+)
# One summary event, one system message, one user message, and one response message from the assistant
# @validate_custom_event_count(count=4)
@validate_transaction_metrics(
diff --git a/tests/mlmodel_openai/test_chat_completion_v1.py b/tests/mlmodel_openai/test_chat_completion_v1.py
index 817db35d8e..007effcb17 100644
--- a/tests/mlmodel_openai/test_chat_completion_v1.py
+++ b/tests/mlmodel_openai/test_chat_completion_v1.py
@@ -15,7 +15,7 @@
import openai
from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes
from testing_support.ml_testing_utils import (
- add_token_count_to_events,
+ add_token_counts_to_chat_events,
disabled_ai_monitoring_record_content_settings,
disabled_ai_monitoring_settings,
disabled_ai_monitoring_streaming_settings,
@@ -54,6 +54,9 @@
"response.organization": "new-relic-nkmd8b",
"request.temperature": 0.7,
"request.max_tokens": 100,
+ "response.usage.completion_tokens": 75,
+ "response.usage.total_tokens": 101,
+ "response.usage.prompt_tokens": 26,
"response.choices.finish_reason": "stop",
"response.headers.llmVersion": "2020-10-01",
"response.headers.ratelimitLimitRequests": 10000,
@@ -80,6 +83,7 @@
"role": "system",
"completion_id": None,
"sequence": 0,
+ "token_count": 0,
"response.model": "gpt-3.5-turbo-0125",
"vendor": "openai",
"ingest_source": "Python",
@@ -98,6 +102,7 @@
"role": "user",
"completion_id": None,
"sequence": 1,
+ "token_count": 0,
"response.model": "gpt-3.5-turbo-0125",
"vendor": "openai",
"ingest_source": "Python",
@@ -116,6 +121,7 @@
"role": "assistant",
"completion_id": None,
"sequence": 2,
+ "token_count": 0,
"response.model": "gpt-3.5-turbo-0125",
"vendor": "openai",
"is_response": True,
@@ -193,7 +199,7 @@ def test_openai_chat_completion_sync_no_content(set_trace_info, sync_openai_clie
@reset_core_stats_engine()
@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events))
+@validate_custom_events(add_token_counts_to_chat_events(chat_completion_recorded_events))
# One summary event, one system message, one user message, and one response message from the assistant
@validate_custom_event_count(count=4)
@validate_transaction_metrics(
@@ -389,7 +395,7 @@ def test_openai_chat_completion_async_with_llm_metadata_no_content(loop, set_tra
@reset_core_stats_engine()
@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events))
+@validate_custom_events(add_token_counts_to_chat_events(chat_completion_recorded_events))
# One summary event, one system message, one user message, and one response message from the assistant
@validate_custom_event_count(count=4)
@validate_transaction_metrics(
diff --git a/tests/mlmodel_openai/test_embeddings.py b/tests/mlmodel_openai/test_embeddings.py
index c3c3e7c429..935db04fe0 100644
--- a/tests/mlmodel_openai/test_embeddings.py
+++ b/tests/mlmodel_openai/test_embeddings.py
@@ -19,7 +19,7 @@
validate_attributes,
)
from testing_support.ml_testing_utils import (
- add_token_count_to_events,
+ add_token_count_to_embedding_events,
disabled_ai_monitoring_record_content_settings,
disabled_ai_monitoring_settings,
events_sans_content,
@@ -55,6 +55,7 @@
"response.headers.ratelimitResetRequests": "19m45.394s",
"response.headers.ratelimitRemainingTokens": 149994,
"response.headers.ratelimitRemainingRequests": 197,
+ "response.usage.total_tokens": 6,
"vendor": "openai",
"ingest_source": "Python",
},
@@ -107,7 +108,7 @@ def test_openai_embedding_sync_no_content(set_trace_info):
@reset_core_stats_engine()
@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_custom_events(add_token_count_to_events(embedding_recorded_events))
+@validate_custom_events(add_token_count_to_embedding_events(embedding_recorded_events))
@validate_custom_event_count(count=1)
@validate_transaction_metrics(
name="test_embeddings:test_openai_embedding_sync_with_token_count",
@@ -191,7 +192,7 @@ def test_openai_embedding_async_no_content(loop, set_trace_info):
@reset_core_stats_engine()
@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_custom_events(add_token_count_to_events(embedding_recorded_events))
+@validate_custom_events(add_token_count_to_embedding_events(embedding_recorded_events))
@validate_custom_event_count(count=1)
@validate_transaction_metrics(
name="test_embeddings:test_openai_embedding_async_with_token_count",
diff --git a/tests/mlmodel_openai/test_embeddings_error_v1.py b/tests/mlmodel_openai/test_embeddings_error_v1.py
index fd29236122..499f96893b 100644
--- a/tests/mlmodel_openai/test_embeddings_error_v1.py
+++ b/tests/mlmodel_openai/test_embeddings_error_v1.py
@@ -16,12 +16,10 @@
import openai
import pytest
-from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine
+from testing_support.fixtures import dt_enabled, reset_core_stats_engine
from testing_support.ml_testing_utils import (
- add_token_count_to_events,
disabled_ai_monitoring_record_content_settings,
events_sans_content,
- llm_token_count_callback,
set_trace_info,
)
from testing_support.validators.validate_custom_event import validate_custom_event_count
@@ -149,32 +147,6 @@ def test_embeddings_invalid_request_error_no_model_async(set_trace_info, async_o
]
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
- callable_name(openai.NotFoundError),
- exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404, "error.code": "model_not_found"}},
-)
-@validate_span_events(
- exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."}
-)
-@validate_transaction_metrics(
- name="test_embeddings_error_v1:test_embeddings_invalid_request_error_invalid_model_with_token_count",
- scoped_metrics=[("Llm/embedding/OpenAI/create", 1)],
- rollup_metrics=[("Llm/embedding/OpenAI/create", 1)],
- custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)],
- background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(invalid_model_events))
-@validate_custom_event_count(count=1)
-@background_task()
-def test_embeddings_invalid_request_error_invalid_model_with_token_count(set_trace_info, sync_openai_client):
- set_trace_info()
- with pytest.raises(openai.NotFoundError):
- sync_openai_client.embeddings.create(input="Model does not exist.", model="does-not-exist")
-
-
@dt_enabled
@reset_core_stats_engine()
@validate_error_trace_attributes(
@@ -255,36 +227,6 @@ def test_embeddings_invalid_request_error_invalid_model_async_no_content(set_tra
)
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
- callable_name(openai.NotFoundError),
- exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404, "error.code": "model_not_found"}},
-)
-@validate_span_events(
- exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."}
-)
-@validate_transaction_metrics(
- name="test_embeddings_error_v1:test_embeddings_invalid_request_error_invalid_model_async_with_token_count",
- scoped_metrics=[("Llm/embedding/OpenAI/create", 1)],
- rollup_metrics=[("Llm/embedding/OpenAI/create", 1)],
- custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)],
- background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(invalid_model_events))
-@validate_custom_event_count(count=1)
-@background_task()
-def test_embeddings_invalid_request_error_invalid_model_async_with_token_count(
- set_trace_info, async_openai_client, loop
-):
- set_trace_info()
- with pytest.raises(openai.NotFoundError):
- loop.run_until_complete(
- async_openai_client.embeddings.create(input="Model does not exist.", model="does-not-exist")
- )
-
-
embedding_invalid_key_error_events = [
(
{"type": "LlmEmbedding"},
@@ -449,34 +391,6 @@ def test_embeddings_invalid_request_error_no_model_async_with_raw_response(set_t
) # no model provided
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
- callable_name(openai.NotFoundError),
- exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404, "error.code": "model_not_found"}},
-)
-@validate_span_events(
- exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."}
-)
-@validate_transaction_metrics(
- name="test_embeddings_error_v1:test_embeddings_invalid_request_error_invalid_model_with_token_count_with_raw_response",
- scoped_metrics=[("Llm/embedding/OpenAI/create", 1)],
- rollup_metrics=[("Llm/embedding/OpenAI/create", 1)],
- custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)],
- background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(invalid_model_events))
-@validate_custom_event_count(count=1)
-@background_task()
-def test_embeddings_invalid_request_error_invalid_model_with_token_count_with_raw_response(
- set_trace_info, sync_openai_client
-):
- set_trace_info()
- with pytest.raises(openai.NotFoundError):
- sync_openai_client.embeddings.with_raw_response.create(input="Model does not exist.", model="does-not-exist")
-
-
@dt_enabled
@reset_core_stats_engine()
@validate_error_trace_attributes(
@@ -566,38 +480,6 @@ def test_embeddings_invalid_request_error_invalid_model_async_no_content_with_ra
)
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
- callable_name(openai.NotFoundError),
- exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404, "error.code": "model_not_found"}},
-)
-@validate_span_events(
- exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."}
-)
-@validate_transaction_metrics(
- name="test_embeddings_error_v1:test_embeddings_invalid_request_error_invalid_model_async_with_token_count_with_raw_response",
- scoped_metrics=[("Llm/embedding/OpenAI/create", 1)],
- rollup_metrics=[("Llm/embedding/OpenAI/create", 1)],
- custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)],
- background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(invalid_model_events))
-@validate_custom_event_count(count=1)
-@background_task()
-def test_embeddings_invalid_request_error_invalid_model_async_with_token_count_with_raw_response(
- set_trace_info, async_openai_client, loop
-):
- set_trace_info()
- with pytest.raises(openai.NotFoundError):
- loop.run_until_complete(
- async_openai_client.embeddings.with_raw_response.create(
- input="Model does not exist.", model="does-not-exist"
- )
- )
-
-
@dt_enabled
@reset_core_stats_engine()
@validate_error_trace_attributes(
diff --git a/tests/mlmodel_openai/test_embeddings_v1.py b/tests/mlmodel_openai/test_embeddings_v1.py
index 405a2a9e5f..3801d3639c 100644
--- a/tests/mlmodel_openai/test_embeddings_v1.py
+++ b/tests/mlmodel_openai/test_embeddings_v1.py
@@ -15,7 +15,7 @@
import openai
from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes
from testing_support.ml_testing_utils import (
- add_token_count_to_events,
+ add_token_count_to_embedding_events,
disabled_ai_monitoring_record_content_settings,
disabled_ai_monitoring_settings,
events_sans_content,
@@ -48,6 +48,7 @@
"response.headers.ratelimitResetRequests": "20ms",
"response.headers.ratelimitRemainingTokens": 999994,
"response.headers.ratelimitRemainingRequests": 2999,
+ "response.usage.total_tokens": 6,
"vendor": "openai",
"ingest_source": "Python",
},
@@ -111,7 +112,7 @@ def test_openai_embedding_sync_no_content(set_trace_info, sync_openai_client):
@reset_core_stats_engine()
@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_custom_events(add_token_count_to_events(embedding_recorded_events))
+@validate_custom_events(add_token_count_to_embedding_events(embedding_recorded_events))
@validate_custom_event_count(count=1)
@validate_transaction_metrics(
name="test_embeddings_v1:test_openai_embedding_sync_with_token_count",
@@ -206,7 +207,7 @@ def test_openai_embedding_async_no_content(loop, set_trace_info, async_openai_cl
@reset_core_stats_engine()
@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_custom_events(add_token_count_to_events(embedding_recorded_events))
+@validate_custom_events(add_token_count_to_embedding_events(embedding_recorded_events))
@validate_custom_event_count(count=1)
@validate_transaction_metrics(
name="test_embeddings_v1:test_openai_embedding_async_with_token_count",
diff --git a/tests/testing_support/ml_testing_utils.py b/tests/testing_support/ml_testing_utils.py
index 55dbd08105..8c2c0444f0 100644
--- a/tests/testing_support/ml_testing_utils.py
+++ b/tests/testing_support/ml_testing_utils.py
@@ -46,6 +46,14 @@ def add_token_count_to_embedding_events(expected_events):
return events
+def add_token_count_streaming_events(expected_events):
+ events = copy.deepcopy(expected_events)
+ for event in events:
+ if event[0]["type"] == "LlmChatCompletionMessage":
+ event[1]["token_count"] = 0
+ return events
+
+
def add_token_counts_to_chat_events(expected_events):
events = copy.deepcopy(expected_events)
for event in events: