diff --git a/newrelic/hooks/mlmodel_openai.py b/newrelic/hooks/mlmodel_openai.py
index c3f7960b6e..3484762951 100644
--- a/newrelic/hooks/mlmodel_openai.py
+++ b/newrelic/hooks/mlmodel_openai.py
@@ -129,11 +129,11 @@ def create_chat_completion_message_event(
     span_id,
     trace_id,
     response_model,
-    request_model,
     response_id,
     request_id,
     llm_metadata,
     output_message_list,
+    all_token_counts,
 ):
     settings = transaction.settings if transaction.settings is not None else global_settings()
 
@@ -153,11 +153,6 @@ def create_chat_completion_message_event(
             "request_id": request_id,
             "span_id": span_id,
             "trace_id": trace_id,
-            "token_count": (
-                settings.ai_monitoring.llm_token_count_callback(request_model, message_content)
-                if settings.ai_monitoring.llm_token_count_callback
-                else None
-            ),
             "role": message.get("role"),
             "completion_id": chat_completion_id,
             "sequence": index,
@@ -166,6 +161,9 @@ def create_chat_completion_message_event(
             "ingest_source": "Python",
         }
 
+        if all_token_counts:
+            chat_completion_input_message_dict["token_count"] = 0
+
         if settings.ai_monitoring.record_content.enabled:
             chat_completion_input_message_dict["content"] = message_content
 
@@ -193,11 +191,6 @@ def create_chat_completion_message_event(
                 "request_id": request_id,
                 "span_id": span_id,
                 "trace_id": trace_id,
-                "token_count": (
-                    settings.ai_monitoring.llm_token_count_callback(response_model, message_content)
-                    if settings.ai_monitoring.llm_token_count_callback
-                    else None
-                ),
                 "role": message.get("role"),
                 "completion_id": chat_completion_id,
                 "sequence": index,
@@ -207,6 +200,9 @@ def create_chat_completion_message_event(
                 "is_response": True,
             }
 
+            if all_token_counts:
+                chat_completion_output_message_dict["token_count"] = 0
+
             if settings.ai_monitoring.record_content.enabled:
                 chat_completion_output_message_dict["content"] = message_content
 
@@ -280,15 +276,18 @@ def _record_embedding_success(transaction, embedding_id, linking_metadata, kwarg
             else getattr(attribute_response, "organization", None)
         )
 
+        response_total_tokens = attribute_response.get("usage", {}).get("total_tokens") if response else None
+
+        total_tokens = (
+            settings.ai_monitoring.llm_token_count_callback(response_model, input_)
+            if settings.ai_monitoring.llm_token_count_callback and input_
+            else response_total_tokens
+        )
+
         full_embedding_response_dict = {
             "id": embedding_id,
             "span_id": span_id,
             "trace_id": trace_id,
-            "token_count": (
-                settings.ai_monitoring.llm_token_count_callback(response_model, input_)
-                if settings.ai_monitoring.llm_token_count_callback
-                else None
-            ),
             "request.model": kwargs.get("model") or kwargs.get("engine"),
             "request_id": request_id,
             "duration": ft.duration * 1000,
@@ -313,6 +312,7 @@ def _record_embedding_success(transaction, embedding_id, linking_metadata, kwarg
             "response.headers.ratelimitRemainingRequests": check_rate_limit_header(
                 response_headers, "x-ratelimit-remaining-requests", True
             ),
+            "response.usage.total_tokens": total_tokens,
             "vendor": "openai",
             "ingest_source": "Python",
         }
@@ -475,12 +475,15 @@ def _handle_completion_success(transaction, linking_metadata, completion_id, kwa
 
 
 def _record_completion_success(transaction, linking_metadata, completion_id, kwargs, ft, response_headers, response):
+    settings = transaction.settings if transaction.settings is not None else global_settings()
     span_id = linking_metadata.get("span.id")
     trace_id = linking_metadata.get("trace.id")
+
     try:
         if response:
             response_model = response.get("model")
             response_id = response.get("id")
+            token_usage = response.get("usage") or {}
             output_message_list = []
             finish_reason = None
             choices = response.get("choices") or []
@@ -494,6 +497,7 @@ def _record_completion_success(transaction, linking_metadata, completion_id, kwa
         else:
             response_model = kwargs.get("response.model")
             response_id = kwargs.get("id")
+            token_usage = {}
             output_message_list = []
             finish_reason = kwargs.get("finish_reason")
             if "content" in kwargs:
@@ -505,10 +509,44 @@ def _record_completion_success(transaction, linking_metadata, completion_id, kwa
                 output_message_list = []
         request_model = kwargs.get("model") or kwargs.get("engine")
 
-        request_id = response_headers.get("x-request-id")
-        organization = response_headers.get("openai-organization") or getattr(response, "organization", None)
         messages = kwargs.get("messages") or [{"content": kwargs.get("prompt"), "role": "user"}]
         input_message_list = list(messages)
+
+        # Extract token counts from response object
+        if token_usage:
+            response_prompt_tokens = token_usage.get("prompt_tokens")
+            response_completion_tokens = token_usage.get("completion_tokens")
+            response_total_tokens = token_usage.get("total_tokens")
+
+        else:
+            response_prompt_tokens = None
+            response_completion_tokens = None
+            response_total_tokens = None
+
+        # Calculate token counts by checking if a callback is registered and if we have the necessary content to pass
+        # to it. If not, then we use the token counts provided in the response object
+        input_message_content = " ".join([msg.get("content", "") for msg in input_message_list if msg.get("content")])
+        prompt_tokens = (
+            settings.ai_monitoring.llm_token_count_callback(request_model, input_message_content)
+            if settings.ai_monitoring.llm_token_count_callback and input_message_content
+            else response_prompt_tokens
+        )
+        output_message_content = " ".join([msg.get("content", "") for msg in output_message_list if msg.get("content")])
+        completion_tokens = (
+            settings.ai_monitoring.llm_token_count_callback(response_model, output_message_content)
+            if settings.ai_monitoring.llm_token_count_callback and output_message_content
+            else response_completion_tokens
+        )
+
+        total_tokens = (
+            prompt_tokens + completion_tokens if all([prompt_tokens, completion_tokens]) else response_total_tokens
+        )
+
+        all_token_counts = bool(prompt_tokens and completion_tokens and total_tokens)
+
+        request_id = response_headers.get("x-request-id")
+        organization = response_headers.get("openai-organization") or getattr(response, "organization", None)
+
         full_chat_completion_summary_dict = {
             "id": completion_id,
             "span_id": span_id,
@@ -553,6 +591,12 @@ def _record_completion_success(transaction, linking_metadata, completion_id, kwa
             ),
             "response.number_of_messages": len(input_message_list) + len(output_message_list),
         }
+
+        if all_token_counts:
+            full_chat_completion_summary_dict["response.usage.prompt_tokens"] = prompt_tokens
+            full_chat_completion_summary_dict["response.usage.completion_tokens"] = completion_tokens
+            full_chat_completion_summary_dict["response.usage.total_tokens"] = total_tokens
+
         llm_metadata = _get_llm_attributes(transaction)
         full_chat_completion_summary_dict.update(llm_metadata)
         transaction.record_custom_event("LlmChatCompletionSummary", full_chat_completion_summary_dict)
@@ -564,11 +608,11 @@ def _record_completion_success(transaction, linking_metadata, completion_id, kwa
             span_id,
             trace_id,
             response_model,
-            request_model,
             response_id,
             request_id,
             llm_metadata,
             output_message_list,
+            all_token_counts,
         )
     except Exception:
         _logger.warning(RECORD_EVENTS_FAILURE_LOG_MESSAGE, traceback.format_exception(*sys.exc_info()))
@@ -579,6 +623,7 @@ def _record_completion_error(transaction, linking_metadata, completion_id, kwarg
     trace_id = linking_metadata.get("trace.id")
     request_message_list = kwargs.get("messages", None) or []
     notice_error_attributes = {}
+
     try:
         if OPENAI_V1:
             response = getattr(exc, "response", None)
@@ -643,6 +688,7 @@ def _record_completion_error(transaction, linking_metadata, completion_id, kwarg
         output_message_list = []
         if "content" in kwargs:
             output_message_list = [{"content": kwargs.get("content"), "role": kwargs.get("role")}]
+
         create_chat_completion_message_event(
             transaction,
             request_message_list,
@@ -650,11 +696,12 @@ def _record_completion_error(transaction, linking_metadata, completion_id, kwarg
             span_id,
             trace_id,
             kwargs.get("response.model"),
-            request_model,
             response_id,
             request_id,
             llm_metadata,
             output_message_list,
+            # We do not record token counts in error cases, so set all_token_counts to True so the pipeline tokenizer does not run
+            all_token_counts=True,
         )
     except Exception:
         _logger.warning(RECORD_EVENTS_FAILURE_LOG_MESSAGE, traceback.format_exception(*sys.exc_info()))
diff --git a/tests/mlmodel_langchain/test_chain.py b/tests/mlmodel_langchain/test_chain.py
index 1a3cbbfd76..abf52efe09 100644
--- a/tests/mlmodel_langchain/test_chain.py
+++ b/tests/mlmodel_langchain/test_chain.py
@@ -359,6 +359,7 @@
             "response.headers.ratelimitResetRequests": "20ms",
             "response.headers.ratelimitRemainingTokens": 999992,
             "response.headers.ratelimitRemainingRequests": 2999,
+            "response.usage.total_tokens": 8,
             "vendor": "openai",
             "ingest_source": "Python",
             "input": "[[3923, 374, 220, 17, 489, 220, 19, 30]]",
@@ -382,6 +383,7 @@
             "response.headers.ratelimitResetRequests": "20ms",
             "response.headers.ratelimitRemainingTokens": 999998,
             "response.headers.ratelimitRemainingRequests": 2999,
+            "response.usage.total_tokens": 1,
             "vendor": "openai",
             "ingest_source": "Python",
             "input": "[[10590]]",
@@ -452,6 +454,9 @@
             "response.headers.ratelimitResetRequests": "8.64s",
             "response.headers.ratelimitRemainingTokens": 199912,
             "response.headers.ratelimitRemainingRequests": 9999,
+            "response.usage.prompt_tokens": 73,
+            "response.usage.completion_tokens": 375,
+            "response.usage.total_tokens": 448,
             "response.number_of_messages": 3,
         },
     ],
@@ -467,6 +472,7 @@
             "sequence": 0,
             "response.model": "gpt-3.5-turbo-0125",
             "vendor": "openai",
+            "token_count": 0,
             "ingest_source": "Python",
             "content": "You are a generator of quiz questions for a seminar. Use the following pieces of retrieved context to generate 5 multiple choice questions (A,B,C,D) on the subject matter. Use a three sentence maximum and keep the answer concise. Render the output as HTML\n\nWhat is 2 + 4?",
         },
@@ -483,6 +489,7 @@
             "sequence": 1,
             "response.model": "gpt-3.5-turbo-0125",
             "vendor": "openai",
+            "token_count": 0,
             "ingest_source": "Python",
             "content": "math",
         },
@@ -499,6 +506,7 @@
             "sequence": 2,
             "response.model": "gpt-3.5-turbo-0125",
             "vendor": "openai",
+            "token_count": 0,
             "ingest_source": "Python",
             "is_response": True,
             "content": "```html\n<!DOCTYPE html>\n<html>\n<head>\n  <title>Math Quiz</title>\n</head>\n<body>\n  <h2>Math Quiz Questions</h2>\n  <ol>\n    <li>What is the result of 5 + 3?</li>\n      <ul>\n        <li>A) 7</li>\n        <li>B) 8</li>\n        <li>C) 9</li>\n        <li>D) 10</li>\n      </ul>\n    <li>What is the product of 6 x 7?</li>\n      <ul>\n        <li>A) 36</li>\n        <li>B) 42</li>\n        <li>C) 48</li>\n        <li>D) 56</li>\n      </ul>\n    <li>What is the square root of 64?</li>\n      <ul>\n        <li>A) 6</li>\n        <li>B) 7</li>\n        <li>C) 8</li>\n        <li>D) 9</li>\n      </ul>\n    <li>What is the result of 12 / 4?</li>\n      <ul>\n        <li>A) 2</li>\n        <li>B) 3</li>\n        <li>C) 4</li>\n        <li>D) 5</li>\n      </ul>\n    <li>What is the sum of 15 + 9?</li>\n      <ul>\n        <li>A) 22</li>\n        <li>B) 23</li>\n        <li>C) 24</li>\n        <li>D) 25</li>\n      </ul>\n  </ol>\n</body>\n</html>\n```",
diff --git a/tests/mlmodel_openai/test_chat_completion.py b/tests/mlmodel_openai/test_chat_completion.py
index 1f8cf1cb74..5e4d209ed7 100644
--- a/tests/mlmodel_openai/test_chat_completion.py
+++ b/tests/mlmodel_openai/test_chat_completion.py
@@ -15,7 +15,7 @@
 import openai
 from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes
 from testing_support.ml_testing_utils import (
-    add_token_count_to_events,
+    add_token_counts_to_chat_events,
     disabled_ai_monitoring_record_content_settings,
     disabled_ai_monitoring_settings,
     disabled_ai_monitoring_streaming_settings,
@@ -55,6 +55,9 @@
             "response.organization": "new-relic-nkmd8b",
             "request.temperature": 0.7,
             "request.max_tokens": 100,
+            "response.usage.completion_tokens": 11,
+            "response.usage.total_tokens": 64,
+            "response.usage.prompt_tokens": 53,
             "response.choices.finish_reason": "stop",
             "response.headers.llmVersion": "2020-10-01",
             "response.headers.ratelimitLimitRequests": 200,
@@ -81,6 +84,7 @@
             "role": "system",
             "completion_id": None,
             "sequence": 0,
+            "token_count": 0,
             "response.model": "gpt-3.5-turbo-0613",
             "vendor": "openai",
             "ingest_source": "Python",
@@ -99,6 +103,7 @@
             "role": "user",
             "completion_id": None,
             "sequence": 1,
+            "token_count": 0,
             "response.model": "gpt-3.5-turbo-0613",
             "vendor": "openai",
             "ingest_source": "Python",
@@ -117,6 +122,7 @@
             "role": "assistant",
             "completion_id": None,
             "sequence": 2,
+            "token_count": 0,
             "response.model": "gpt-3.5-turbo-0613",
             "vendor": "openai",
             "is_response": True,
@@ -172,7 +178,7 @@ def test_openai_chat_completion_sync_no_content(set_trace_info):
 
 @reset_core_stats_engine()
 @override_llm_token_callback_settings(llm_token_count_callback)
-@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events))
+@validate_custom_events(add_token_counts_to_chat_events(chat_completion_recorded_events))
 # One summary event, one system message, one user message, and one response message from the assistant
 @validate_custom_event_count(count=4)
 @validate_transaction_metrics(
@@ -343,7 +349,7 @@ def test_openai_chat_completion_async_no_content(loop, set_trace_info):
 
 @reset_core_stats_engine()
 @override_llm_token_callback_settings(llm_token_count_callback)
-@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events))
+@validate_custom_events(add_token_counts_to_chat_events(chat_completion_recorded_events))
 # One summary event, one system message, one user message, and one response message from the assistant
 @validate_custom_event_count(count=4)
 @validate_transaction_metrics(
diff --git a/tests/mlmodel_openai/test_chat_completion_error.py b/tests/mlmodel_openai/test_chat_completion_error.py
index bfb2267a33..97a4dd8793 100644
--- a/tests/mlmodel_openai/test_chat_completion_error.py
+++ b/tests/mlmodel_openai/test_chat_completion_error.py
@@ -15,13 +15,11 @@
 
 import openai
 import pytest
-from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine
+from testing_support.fixtures import dt_enabled, reset_core_stats_engine
 from testing_support.ml_testing_utils import (
-    add_token_count_to_events,
     disabled_ai_monitoring_record_content_settings,
     events_sans_content,
     events_with_context_attrs,
-    llm_token_count_callback,
     set_trace_info,
 )
 from testing_support.validators.validate_custom_event import validate_custom_event_count
@@ -68,6 +66,7 @@
             "role": "system",
             "completion_id": None,
             "sequence": 0,
+            "token_count": 0,
             "vendor": "openai",
             "ingest_source": "Python",
         },
@@ -83,6 +82,7 @@
             "role": "user",
             "completion_id": None,
             "sequence": 1,
+            "token_count": 0,
             "vendor": "openai",
             "ingest_source": "Python",
         },
@@ -186,6 +186,7 @@ def test_chat_completion_invalid_request_error_no_model_no_content(set_trace_inf
             "role": "user",
             "completion_id": None,
             "sequence": 0,
+            "token_count": 0,
             "vendor": "openai",
             "ingest_source": "Python",
         },
@@ -193,36 +194,6 @@ def test_chat_completion_invalid_request_error_no_model_no_content(set_trace_inf
 ]
 
 
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
-    callable_name(openai.InvalidRequestError),
-    exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}},
-)
-@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"})
-@validate_transaction_metrics(
-    "test_chat_completion_error:test_chat_completion_invalid_request_error_invalid_model_with_token_count",
-    scoped_metrics=[("Llm/completion/OpenAI/create", 1)],
-    rollup_metrics=[("Llm/completion/OpenAI/create", 1)],
-    background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error))
-@validate_custom_event_count(count=2)
-@background_task()
-def test_chat_completion_invalid_request_error_invalid_model_with_token_count(set_trace_info):
-    set_trace_info()
-    with pytest.raises(openai.InvalidRequestError):
-        add_custom_attribute("llm.conversation_id", "my-awesome-id")
-
-        openai.ChatCompletion.create(
-            model="does-not-exist",
-            messages=({"role": "user", "content": "Model does not exist."},),
-            temperature=0.7,
-            max_tokens=100,
-        )
-
-
 # Invalid model provided
 @dt_enabled
 @reset_core_stats_engine()
@@ -281,6 +252,7 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info):
             "role": "system",
             "completion_id": None,
             "sequence": 0,
+            "token_count": 0,
             "vendor": "openai",
             "ingest_source": "Python",
         },
@@ -296,6 +268,7 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info):
             "role": "user",
             "completion_id": None,
             "sequence": 1,
+            "token_count": 0,
             "vendor": "openai",
             "ingest_source": "Python",
         },
@@ -360,6 +333,7 @@ def test_chat_completion_authentication_error(monkeypatch, set_trace_info):
             "role": "user",
             "completion_id": None,
             "sequence": 0,
+            "token_count": 0,
             "vendor": "openai",
             "ingest_source": "Python",
         },
@@ -471,37 +445,6 @@ def test_chat_completion_invalid_request_error_no_model_async_no_content(loop, s
         )
 
 
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
-    callable_name(openai.InvalidRequestError),
-    exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}},
-)
-@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"})
-@validate_transaction_metrics(
-    "test_chat_completion_error:test_chat_completion_invalid_request_error_invalid_model_with_token_count_async",
-    scoped_metrics=[("Llm/completion/OpenAI/acreate", 1)],
-    rollup_metrics=[("Llm/completion/OpenAI/acreate", 1)],
-    background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error))
-@validate_custom_event_count(count=2)
-@background_task()
-def test_chat_completion_invalid_request_error_invalid_model_with_token_count_async(loop, set_trace_info):
-    set_trace_info()
-    with pytest.raises(openai.InvalidRequestError):
-        add_custom_attribute("llm.conversation_id", "my-awesome-id")
-        loop.run_until_complete(
-            openai.ChatCompletion.acreate(
-                model="does-not-exist",
-                messages=({"role": "user", "content": "Model does not exist."},),
-                temperature=0.7,
-                max_tokens=100,
-            )
-        )
-
-
 # Invalid model provided
 @dt_enabled
 @reset_core_stats_engine()
diff --git a/tests/mlmodel_openai/test_chat_completion_error_v1.py b/tests/mlmodel_openai/test_chat_completion_error_v1.py
index 9be9fcab9c..5af1598847 100644
--- a/tests/mlmodel_openai/test_chat_completion_error_v1.py
+++ b/tests/mlmodel_openai/test_chat_completion_error_v1.py
@@ -14,13 +14,11 @@
 
 import openai
 import pytest
-from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine
+from testing_support.fixtures import dt_enabled, reset_core_stats_engine
 from testing_support.ml_testing_utils import (
-    add_token_count_to_events,
     disabled_ai_monitoring_record_content_settings,
     events_sans_content,
     events_with_context_attrs,
-    llm_token_count_callback,
     set_trace_info,
 )
 from testing_support.validators.validate_custom_event import validate_custom_event_count
@@ -67,6 +65,7 @@
             "role": "system",
             "completion_id": None,
             "sequence": 0,
+            "token_count": 0,
             "vendor": "openai",
             "ingest_source": "Python",
         },
@@ -82,6 +81,7 @@
             "role": "user",
             "completion_id": None,
             "sequence": 1,
+            "token_count": 0,
             "vendor": "openai",
             "ingest_source": "Python",
         },
@@ -229,6 +229,7 @@ def test_chat_completion_invalid_request_error_no_model_async_no_content(loop, s
             "role": "user",
             "completion_id": None,
             "sequence": 0,
+            "token_count": 0,
             "vendor": "openai",
             "ingest_source": "Python",
         },
@@ -266,37 +267,6 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info, syn
         )
 
 
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
-    callable_name(openai.NotFoundError),
-    exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}},
-)
-@validate_span_events(
-    exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."}
-)
-@validate_transaction_metrics(
-    "test_chat_completion_error_v1:test_chat_completion_invalid_request_error_invalid_model_with_token_count",
-    scoped_metrics=[("Llm/completion/OpenAI/create", 1)],
-    rollup_metrics=[("Llm/completion/OpenAI/create", 1)],
-    background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error))
-@validate_custom_event_count(count=2)
-@background_task()
-def test_chat_completion_invalid_request_error_invalid_model_with_token_count(set_trace_info, sync_openai_client):
-    set_trace_info()
-    with pytest.raises(openai.NotFoundError):
-        add_custom_attribute("llm.conversation_id", "my-awesome-id")
-        sync_openai_client.chat.completions.create(
-            model="does-not-exist",
-            messages=({"role": "user", "content": "Model does not exist."},),
-            temperature=0.7,
-            max_tokens=100,
-        )
-
-
 @dt_enabled
 @reset_core_stats_engine()
 @validate_error_trace_attributes(
@@ -329,41 +299,6 @@ def test_chat_completion_invalid_request_error_invalid_model_async(loop, set_tra
         )
 
 
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
-    callable_name(openai.NotFoundError),
-    exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}},
-)
-@validate_span_events(
-    exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."}
-)
-@validate_transaction_metrics(
-    "test_chat_completion_error_v1:test_chat_completion_invalid_request_error_invalid_model_with_token_count_async",
-    scoped_metrics=[("Llm/completion/OpenAI/create", 1)],
-    rollup_metrics=[("Llm/completion/OpenAI/create", 1)],
-    background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error))
-@validate_custom_event_count(count=2)
-@background_task()
-def test_chat_completion_invalid_request_error_invalid_model_with_token_count_async(
-    loop, set_trace_info, async_openai_client
-):
-    set_trace_info()
-    with pytest.raises(openai.NotFoundError):
-        add_custom_attribute("llm.conversation_id", "my-awesome-id")
-        loop.run_until_complete(
-            async_openai_client.chat.completions.create(
-                model="does-not-exist",
-                messages=({"role": "user", "content": "Model does not exist."},),
-                temperature=0.7,
-                max_tokens=100,
-            )
-        )
-
-
 expected_events_on_wrong_api_key_error = [
     (
         {"type": "LlmChatCompletionSummary"},
@@ -391,6 +326,7 @@ def test_chat_completion_invalid_request_error_invalid_model_with_token_count_as
             "role": "user",
             "completion_id": None,
             "sequence": 0,
+            "token_count": 0,
             "vendor": "openai",
             "ingest_source": "Python",
         },
@@ -610,39 +546,6 @@ def test_chat_completion_invalid_request_error_invalid_model_with_raw_response(s
         )
 
 
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
-    callable_name(openai.NotFoundError),
-    exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}},
-)
-@validate_span_events(
-    exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."}
-)
-@validate_transaction_metrics(
-    "test_chat_completion_error_v1:test_chat_completion_invalid_request_error_invalid_model_with_token_count_with_raw_response",
-    scoped_metrics=[("Llm/completion/OpenAI/create", 1)],
-    rollup_metrics=[("Llm/completion/OpenAI/create", 1)],
-    background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error))
-@validate_custom_event_count(count=2)
-@background_task()
-def test_chat_completion_invalid_request_error_invalid_model_with_token_count_with_raw_response(
-    set_trace_info, sync_openai_client
-):
-    set_trace_info()
-    with pytest.raises(openai.NotFoundError):
-        add_custom_attribute("llm.conversation_id", "my-awesome-id")
-        sync_openai_client.chat.completions.with_raw_response.create(
-            model="does-not-exist",
-            messages=({"role": "user", "content": "Model does not exist."},),
-            temperature=0.7,
-            max_tokens=100,
-        )
-
-
 @dt_enabled
 @reset_core_stats_engine()
 @validate_error_trace_attributes(
@@ -677,41 +580,6 @@ def test_chat_completion_invalid_request_error_invalid_model_async_with_raw_resp
         )
 
 
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
-    callable_name(openai.NotFoundError),
-    exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}},
-)
-@validate_span_events(
-    exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."}
-)
-@validate_transaction_metrics(
-    "test_chat_completion_error_v1:test_chat_completion_invalid_request_error_invalid_model_with_token_count_async_with_raw_response",
-    scoped_metrics=[("Llm/completion/OpenAI/create", 1)],
-    rollup_metrics=[("Llm/completion/OpenAI/create", 1)],
-    background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error))
-@validate_custom_event_count(count=2)
-@background_task()
-def test_chat_completion_invalid_request_error_invalid_model_with_token_count_async_with_raw_response(
-    loop, set_trace_info, async_openai_client
-):
-    set_trace_info()
-    with pytest.raises(openai.NotFoundError):
-        add_custom_attribute("llm.conversation_id", "my-awesome-id")
-        loop.run_until_complete(
-            async_openai_client.chat.completions.with_raw_response.create(
-                model="does-not-exist",
-                messages=({"role": "user", "content": "Model does not exist."},),
-                temperature=0.7,
-                max_tokens=100,
-            )
-        )
-
-
 @dt_enabled
 @reset_core_stats_engine()
 @validate_error_trace_attributes(
diff --git a/tests/mlmodel_openai/test_chat_completion_stream.py b/tests/mlmodel_openai/test_chat_completion_stream.py
index ad89d6f260..8019c0b6a9 100644
--- a/tests/mlmodel_openai/test_chat_completion_stream.py
+++ b/tests/mlmodel_openai/test_chat_completion_stream.py
@@ -15,7 +15,8 @@
 import openai
 from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes
 from testing_support.ml_testing_utils import (
-    add_token_count_to_events,
+    add_token_count_streaming_events,
+    add_token_counts_to_chat_events,
     disabled_ai_monitoring_record_content_settings,
     disabled_ai_monitoring_settings,
     disabled_ai_monitoring_streaming_settings,
@@ -184,9 +185,101 @@ def test_openai_chat_completion_sync_no_content(set_trace_info):
         assert resp
 
 
+chat_completion_recorded_token_events = [
+    (
+        {"type": "LlmChatCompletionSummary"},
+        {
+            "id": None,  # UUID that varies with each run
+            "llm.conversation_id": "my-awesome-id",
+            "llm.foo": "bar",
+            "span_id": None,
+            "trace_id": "trace-id",
+            "request_id": "49dbbffbd3c3f4612aa48def69059ccd",
+            "duration": None,  # Response time varies each test run
+            "request.model": "gpt-3.5-turbo",
+            "response.model": "gpt-3.5-turbo-0613",
+            "response.organization": "new-relic-nkmd8b",
+            "request.temperature": 0.7,
+            "request.max_tokens": 100,
+            "response.choices.finish_reason": "stop",
+            "response.headers.llmVersion": "2020-10-01",
+            "response.headers.ratelimitLimitRequests": 200,
+            "response.headers.ratelimitLimitTokens": 40000,
+            "response.headers.ratelimitResetTokens": "90ms",
+            "response.headers.ratelimitResetRequests": "7m12s",
+            "response.headers.ratelimitRemainingTokens": 39940,
+            "response.headers.ratelimitRemainingRequests": 199,
+            "vendor": "openai",
+            "ingest_source": "Python",
+            "response.number_of_messages": 3,
+        },
+    ),
+    (
+        {"type": "LlmChatCompletionMessage"},
+        {
+            "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-0",
+            "llm.conversation_id": "my-awesome-id",
+            "llm.foo": "bar",
+            "request_id": "49dbbffbd3c3f4612aa48def69059ccd",
+            "span_id": None,
+            "trace_id": "trace-id",
+            "content": "You are a scientist.",
+            "role": "system",
+            "completion_id": None,
+            "sequence": 0,
+            "token_count": 0,
+            "response.model": "gpt-3.5-turbo-0613",
+            "vendor": "openai",
+            "ingest_source": "Python",
+        },
+    ),
+    (
+        {"type": "LlmChatCompletionMessage"},
+        {
+            "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-1",
+            "llm.conversation_id": "my-awesome-id",
+            "llm.foo": "bar",
+            "request_id": "49dbbffbd3c3f4612aa48def69059ccd",
+            "span_id": None,
+            "trace_id": "trace-id",
+            "content": "What is 212 degrees Fahrenheit converted to Celsius?",
+            "role": "user",
+            "completion_id": None,
+            "sequence": 1,
+            "token_count": 0,
+            "response.model": "gpt-3.5-turbo-0613",
+            "vendor": "openai",
+            "ingest_source": "Python",
+        },
+    ),
+    (
+        {"type": "LlmChatCompletionMessage"},
+        {
+            "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-2",
+            "llm.conversation_id": "my-awesome-id",
+            "llm.foo": "bar",
+            "request_id": "49dbbffbd3c3f4612aa48def69059ccd",
+            "span_id": None,
+            "trace_id": "trace-id",
+            "content": "212 degrees Fahrenheit is equal to 100 degrees Celsius.",
+            "role": "assistant",
+            "completion_id": None,
+            "sequence": 2,
+            "token_count": 0,
+            "response.model": "gpt-3.5-turbo-0613",
+            "vendor": "openai",
+            "is_response": True,
+            "ingest_source": "Python",
+        },
+    ),
+]
+
+
 @reset_core_stats_engine()
 @override_llm_token_callback_settings(llm_token_count_callback)
-@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events))
+@validate_custom_events(
+    add_token_counts_to_chat_events(add_token_count_streaming_events(chat_completion_recorded_events))
+)
 # One summary event, one system message, one user message, and one response message from the assistant
 @validate_custom_event_count(count=4)
 @validate_transaction_metrics(
@@ -378,7 +471,9 @@ async def consumer():
 
 @reset_core_stats_engine()
 @override_llm_token_callback_settings(llm_token_count_callback)
-@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events))
+@validate_custom_events(
+    add_token_counts_to_chat_events(add_token_count_streaming_events(chat_completion_recorded_events))
+)
 @validate_custom_event_count(count=4)
 @validate_transaction_metrics(
     name="test_chat_completion_stream:test_openai_chat_completion_async_with_token_count",
diff --git a/tests/mlmodel_openai/test_chat_completion_stream_error.py b/tests/mlmodel_openai/test_chat_completion_stream_error.py
index eebb5ee8fb..e8e55426e9 100644
--- a/tests/mlmodel_openai/test_chat_completion_stream_error.py
+++ b/tests/mlmodel_openai/test_chat_completion_stream_error.py
@@ -15,13 +15,11 @@
 
 import openai
 import pytest
-from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine
+from testing_support.fixtures import dt_enabled, reset_core_stats_engine
 from testing_support.ml_testing_utils import (
-    add_token_count_to_events,
     disabled_ai_monitoring_record_content_settings,
     events_sans_content,
     events_with_context_attrs,
-    llm_token_count_callback,
     set_trace_info,
 )
 from testing_support.validators.validate_custom_event import validate_custom_event_count
@@ -68,6 +66,7 @@
             "role": "system",
             "completion_id": None,
             "sequence": 0,
+            "token_count": 0,
             "vendor": "openai",
             "ingest_source": "Python",
         },
@@ -83,6 +82,7 @@
             "role": "user",
             "completion_id": None,
             "sequence": 1,
+            "token_count": 0,
             "vendor": "openai",
             "ingest_source": "Python",
         },
@@ -191,6 +191,7 @@ def test_chat_completion_invalid_request_error_no_model_no_content(set_trace_inf
             "role": "user",
             "completion_id": None,
             "sequence": 0,
+            "token_count": 0,
             "vendor": "openai",
             "ingest_source": "Python",
         },
@@ -198,38 +199,6 @@ def test_chat_completion_invalid_request_error_no_model_no_content(set_trace_inf
 ]
 
 
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
-    callable_name(openai.InvalidRequestError),
-    exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}},
-)
-@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"})
-@validate_transaction_metrics(
-    "test_chat_completion_stream_error:test_chat_completion_invalid_request_error_invalid_model_with_token_count",
-    scoped_metrics=[("Llm/completion/OpenAI/create", 1)],
-    rollup_metrics=[("Llm/completion/OpenAI/create", 1)],
-    background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error))
-@validate_custom_event_count(count=2)
-@background_task()
-def test_chat_completion_invalid_request_error_invalid_model_with_token_count(set_trace_info):
-    set_trace_info()
-    with pytest.raises(openai.InvalidRequestError):
-        add_custom_attribute("llm.conversation_id", "my-awesome-id")
-        generator = openai.ChatCompletion.create(
-            model="does-not-exist",
-            messages=({"role": "user", "content": "Model does not exist."},),
-            temperature=0.7,
-            max_tokens=100,
-            stream=True,
-        )
-        for resp in generator:
-            assert resp
-
-
 @dt_enabled
 @reset_core_stats_engine()
 @validate_error_trace_attributes(
@@ -290,6 +259,7 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info):
             "role": "system",
             "completion_id": None,
             "sequence": 0,
+            "token_count": 0,
             "vendor": "openai",
             "ingest_source": "Python",
         },
@@ -305,6 +275,7 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info):
             "role": "user",
             "completion_id": None,
             "sequence": 1,
+            "token_count": 0,
             "vendor": "openai",
             "ingest_source": "Python",
         },
@@ -374,6 +345,7 @@ def test_chat_completion_authentication_error(monkeypatch, set_trace_info):
             "role": "user",
             "completion_id": None,
             "sequence": 0,
+            "token_count": 0,
             "vendor": "openai",
             "ingest_source": "Python",
         },
@@ -488,38 +460,6 @@ def test_chat_completion_invalid_request_error_no_model_async_no_content(loop, s
         )
 
 
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
-    callable_name(openai.InvalidRequestError),
-    exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}},
-)
-@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"})
-@validate_transaction_metrics(
-    "test_chat_completion_stream_error:test_chat_completion_invalid_request_error_invalid_model_with_token_count_async",
-    scoped_metrics=[("Llm/completion/OpenAI/acreate", 1)],
-    rollup_metrics=[("Llm/completion/OpenAI/acreate", 1)],
-    background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error))
-@validate_custom_event_count(count=2)
-@background_task()
-def test_chat_completion_invalid_request_error_invalid_model_with_token_count_async(loop, set_trace_info):
-    set_trace_info()
-    with pytest.raises(openai.InvalidRequestError):
-        add_custom_attribute("llm.conversation_id", "my-awesome-id")
-        loop.run_until_complete(
-            openai.ChatCompletion.acreate(
-                model="does-not-exist",
-                messages=({"role": "user", "content": "Model does not exist."},),
-                temperature=0.7,
-                max_tokens=100,
-                stream=True,
-            )
-        )
-
-
 @dt_enabled
 @reset_core_stats_engine()
 @validate_error_trace_attributes(
@@ -649,6 +589,7 @@ def test_chat_completion_wrong_api_key_error_async(loop, monkeypatch, set_trace_
             "role": "user",
             "completion_id": None,
             "sequence": 0,
+            "token_count": 0,
             "vendor": "openai",
             "ingest_source": "Python",
         },
diff --git a/tests/mlmodel_openai/test_chat_completion_stream_error_v1.py b/tests/mlmodel_openai/test_chat_completion_stream_error_v1.py
index 5f769ea0e6..64798300fc 100644
--- a/tests/mlmodel_openai/test_chat_completion_stream_error_v1.py
+++ b/tests/mlmodel_openai/test_chat_completion_stream_error_v1.py
@@ -12,16 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
 import openai
 import pytest
-from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine
+from testing_support.fixtures import dt_enabled, reset_core_stats_engine
 from testing_support.ml_testing_utils import (
-    add_token_count_to_events,
     disabled_ai_monitoring_record_content_settings,
     events_sans_content,
     events_with_context_attrs,
-    llm_token_count_callback,
     set_trace_info,
 )
 from testing_support.validators.validate_custom_event import validate_custom_event_count
@@ -68,6 +65,7 @@
             "role": "system",
             "completion_id": None,
             "sequence": 0,
+            "token_count": 0,
             "vendor": "openai",
             "ingest_source": "Python",
         },
@@ -83,6 +81,7 @@
             "role": "user",
             "completion_id": None,
             "sequence": 1,
+            "token_count": 0,
             "vendor": "openai",
             "ingest_source": "Python",
         },
@@ -243,6 +242,7 @@ async def consumer():
             "role": "user",
             "completion_id": None,
             "sequence": 0,
+            "token_count": 0,
             "vendor": "openai",
             "ingest_source": "Python",
         },
@@ -281,77 +281,6 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info, syn
             assert resp
 
 
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
-    callable_name(openai.NotFoundError),
-    exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}},
-)
-@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"})
-@validate_transaction_metrics(
-    "test_chat_completion_stream_error_v1:test_chat_completion_invalid_request_error_invalid_model_with_token_count",
-    scoped_metrics=[("Llm/completion/OpenAI/create", 1)],
-    rollup_metrics=[("Llm/completion/OpenAI/create", 1)],
-    background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error))
-@validate_custom_event_count(count=2)
-@background_task()
-def test_chat_completion_invalid_request_error_invalid_model_with_token_count(set_trace_info, sync_openai_client):
-    set_trace_info()
-    with pytest.raises(openai.NotFoundError):
-        add_custom_attribute("llm.conversation_id", "my-awesome-id")
-
-        generator = sync_openai_client.chat.completions.create(
-            model="does-not-exist",
-            messages=({"role": "user", "content": "Model does not exist."},),
-            temperature=0.7,
-            max_tokens=100,
-            stream=True,
-        )
-        for resp in generator:
-            assert resp
-
-
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
-    callable_name(openai.NotFoundError),
-    exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}},
-)
-@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"})
-@validate_transaction_metrics(
-    "test_chat_completion_stream_error_v1:test_chat_completion_invalid_request_error_invalid_model_async_with_token_count",
-    scoped_metrics=[("Llm/completion/OpenAI/create", 1)],
-    rollup_metrics=[("Llm/completion/OpenAI/create", 1)],
-    background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error))
-@validate_custom_event_count(count=2)
-@background_task()
-def test_chat_completion_invalid_request_error_invalid_model_async_with_token_count(
-    loop, set_trace_info, async_openai_client
-):
-    set_trace_info()
-    with pytest.raises(openai.NotFoundError):
-        add_custom_attribute("llm.conversation_id", "my-awesome-id")
-
-        async def consumer():
-            generator = await async_openai_client.chat.completions.create(
-                model="does-not-exist",
-                messages=({"role": "user", "content": "Model does not exist."},),
-                temperature=0.7,
-                max_tokens=100,
-                stream=True,
-            )
-            async for resp in generator:
-                assert resp
-
-        loop.run_until_complete(consumer())
-
-
 @dt_enabled
 @reset_core_stats_engine()
 @validate_error_trace_attributes(
@@ -414,6 +343,7 @@ async def consumer():
             "role": "user",
             "completion_id": None,
             "sequence": 0,
+            "token_count": 0,
             "vendor": "openai",
             "ingest_source": "Python",
         },
diff --git a/tests/mlmodel_openai/test_chat_completion_stream_v1.py b/tests/mlmodel_openai/test_chat_completion_stream_v1.py
index 796404012b..c88e8b1df6 100644
--- a/tests/mlmodel_openai/test_chat_completion_stream_v1.py
+++ b/tests/mlmodel_openai/test_chat_completion_stream_v1.py
@@ -17,7 +17,8 @@
 from conftest import get_openai_version
 from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes
 from testing_support.ml_testing_utils import (
-    add_token_count_to_events,
+    add_token_count_streaming_events,
+    add_token_counts_to_chat_events,
     disabled_ai_monitoring_record_content_settings,
     disabled_ai_monitoring_settings,
     disabled_ai_monitoring_streaming_settings,
@@ -300,7 +301,9 @@ def test_openai_chat_completion_sync_no_content(set_trace_info, sync_openai_clie
 
 @reset_core_stats_engine()
 @override_llm_token_callback_settings(llm_token_count_callback)
-@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events))
+@validate_custom_events(
+    add_token_counts_to_chat_events(add_token_count_streaming_events(chat_completion_recorded_events))
+)
 # One summary event, one system message, one user message, and one response message from the assistant
 @validate_custom_event_count(count=4)
 @validate_transaction_metrics(
@@ -622,7 +625,9 @@ async def consumer():
 
 @reset_core_stats_engine()
 @override_llm_token_callback_settings(llm_token_count_callback)
-@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events))
+@validate_custom_events(
+    add_token_counts_to_chat_events(add_token_count_streaming_events(chat_completion_recorded_events))
+)
 # One summary event, one system message, one user message, and one response message from the assistant
 # @validate_custom_event_count(count=4)
 @validate_transaction_metrics(
diff --git a/tests/mlmodel_openai/test_chat_completion_v1.py b/tests/mlmodel_openai/test_chat_completion_v1.py
index 817db35d8e..007effcb17 100644
--- a/tests/mlmodel_openai/test_chat_completion_v1.py
+++ b/tests/mlmodel_openai/test_chat_completion_v1.py
@@ -15,7 +15,7 @@
 import openai
 from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes
 from testing_support.ml_testing_utils import (
-    add_token_count_to_events,
+    add_token_counts_to_chat_events,
     disabled_ai_monitoring_record_content_settings,
     disabled_ai_monitoring_settings,
     disabled_ai_monitoring_streaming_settings,
@@ -54,6 +54,9 @@
             "response.organization": "new-relic-nkmd8b",
             "request.temperature": 0.7,
             "request.max_tokens": 100,
+            "response.usage.completion_tokens": 75,
+            "response.usage.total_tokens": 101,
+            "response.usage.prompt_tokens": 26,
             "response.choices.finish_reason": "stop",
             "response.headers.llmVersion": "2020-10-01",
             "response.headers.ratelimitLimitRequests": 10000,
@@ -80,6 +83,7 @@
             "role": "system",
             "completion_id": None,
             "sequence": 0,
+            "token_count": 0,
             "response.model": "gpt-3.5-turbo-0125",
             "vendor": "openai",
             "ingest_source": "Python",
@@ -98,6 +102,7 @@
             "role": "user",
             "completion_id": None,
             "sequence": 1,
+            "token_count": 0,
             "response.model": "gpt-3.5-turbo-0125",
             "vendor": "openai",
             "ingest_source": "Python",
@@ -116,6 +121,7 @@
             "role": "assistant",
             "completion_id": None,
             "sequence": 2,
+            "token_count": 0,
             "response.model": "gpt-3.5-turbo-0125",
             "vendor": "openai",
             "is_response": True,
@@ -193,7 +199,7 @@ def test_openai_chat_completion_sync_no_content(set_trace_info, sync_openai_clie
 
 @reset_core_stats_engine()
 @override_llm_token_callback_settings(llm_token_count_callback)
-@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events))
+@validate_custom_events(add_token_counts_to_chat_events(chat_completion_recorded_events))
 # One summary event, one system message, one user message, and one response message from the assistant
 @validate_custom_event_count(count=4)
 @validate_transaction_metrics(
@@ -389,7 +395,7 @@ def test_openai_chat_completion_async_with_llm_metadata_no_content(loop, set_tra
 
 @reset_core_stats_engine()
 @override_llm_token_callback_settings(llm_token_count_callback)
-@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events))
+@validate_custom_events(add_token_counts_to_chat_events(chat_completion_recorded_events))
 # One summary event, one system message, one user message, and one response message from the assistant
 @validate_custom_event_count(count=4)
 @validate_transaction_metrics(
diff --git a/tests/mlmodel_openai/test_embeddings.py b/tests/mlmodel_openai/test_embeddings.py
index c3c3e7c429..935db04fe0 100644
--- a/tests/mlmodel_openai/test_embeddings.py
+++ b/tests/mlmodel_openai/test_embeddings.py
@@ -19,7 +19,7 @@
     validate_attributes,
 )
 from testing_support.ml_testing_utils import (
-    add_token_count_to_events,
+    add_token_count_to_embedding_events,
     disabled_ai_monitoring_record_content_settings,
     disabled_ai_monitoring_settings,
     events_sans_content,
@@ -55,6 +55,7 @@
             "response.headers.ratelimitResetRequests": "19m45.394s",
             "response.headers.ratelimitRemainingTokens": 149994,
             "response.headers.ratelimitRemainingRequests": 197,
+            "response.usage.total_tokens": 6,
             "vendor": "openai",
             "ingest_source": "Python",
         },
@@ -107,7 +108,7 @@ def test_openai_embedding_sync_no_content(set_trace_info):
 
 @reset_core_stats_engine()
 @override_llm_token_callback_settings(llm_token_count_callback)
-@validate_custom_events(add_token_count_to_events(embedding_recorded_events))
+@validate_custom_events(add_token_count_to_embedding_events(embedding_recorded_events))
 @validate_custom_event_count(count=1)
 @validate_transaction_metrics(
     name="test_embeddings:test_openai_embedding_sync_with_token_count",
@@ -191,7 +192,7 @@ def test_openai_embedding_async_no_content(loop, set_trace_info):
 
 @reset_core_stats_engine()
 @override_llm_token_callback_settings(llm_token_count_callback)
-@validate_custom_events(add_token_count_to_events(embedding_recorded_events))
+@validate_custom_events(add_token_count_to_embedding_events(embedding_recorded_events))
 @validate_custom_event_count(count=1)
 @validate_transaction_metrics(
     name="test_embeddings:test_openai_embedding_async_with_token_count",
diff --git a/tests/mlmodel_openai/test_embeddings_error_v1.py b/tests/mlmodel_openai/test_embeddings_error_v1.py
index fd29236122..499f96893b 100644
--- a/tests/mlmodel_openai/test_embeddings_error_v1.py
+++ b/tests/mlmodel_openai/test_embeddings_error_v1.py
@@ -16,12 +16,10 @@
 
 import openai
 import pytest
-from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine
+from testing_support.fixtures import dt_enabled, reset_core_stats_engine
 from testing_support.ml_testing_utils import (
-    add_token_count_to_events,
     disabled_ai_monitoring_record_content_settings,
     events_sans_content,
-    llm_token_count_callback,
     set_trace_info,
 )
 from testing_support.validators.validate_custom_event import validate_custom_event_count
@@ -149,32 +147,6 @@ def test_embeddings_invalid_request_error_no_model_async(set_trace_info, async_o
 ]
 
 
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
-    callable_name(openai.NotFoundError),
-    exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404, "error.code": "model_not_found"}},
-)
-@validate_span_events(
-    exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."}
-)
-@validate_transaction_metrics(
-    name="test_embeddings_error_v1:test_embeddings_invalid_request_error_invalid_model_with_token_count",
-    scoped_metrics=[("Llm/embedding/OpenAI/create", 1)],
-    rollup_metrics=[("Llm/embedding/OpenAI/create", 1)],
-    custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)],
-    background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(invalid_model_events))
-@validate_custom_event_count(count=1)
-@background_task()
-def test_embeddings_invalid_request_error_invalid_model_with_token_count(set_trace_info, sync_openai_client):
-    set_trace_info()
-    with pytest.raises(openai.NotFoundError):
-        sync_openai_client.embeddings.create(input="Model does not exist.", model="does-not-exist")
-
-
 @dt_enabled
 @reset_core_stats_engine()
 @validate_error_trace_attributes(
@@ -255,36 +227,6 @@ def test_embeddings_invalid_request_error_invalid_model_async_no_content(set_tra
         )
 
 
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
-    callable_name(openai.NotFoundError),
-    exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404, "error.code": "model_not_found"}},
-)
-@validate_span_events(
-    exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."}
-)
-@validate_transaction_metrics(
-    name="test_embeddings_error_v1:test_embeddings_invalid_request_error_invalid_model_async_with_token_count",
-    scoped_metrics=[("Llm/embedding/OpenAI/create", 1)],
-    rollup_metrics=[("Llm/embedding/OpenAI/create", 1)],
-    custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)],
-    background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(invalid_model_events))
-@validate_custom_event_count(count=1)
-@background_task()
-def test_embeddings_invalid_request_error_invalid_model_async_with_token_count(
-    set_trace_info, async_openai_client, loop
-):
-    set_trace_info()
-    with pytest.raises(openai.NotFoundError):
-        loop.run_until_complete(
-            async_openai_client.embeddings.create(input="Model does not exist.", model="does-not-exist")
-        )
-
-
 embedding_invalid_key_error_events = [
     (
         {"type": "LlmEmbedding"},
@@ -449,34 +391,6 @@ def test_embeddings_invalid_request_error_no_model_async_with_raw_response(set_t
         )  # no model provided
 
 
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
-    callable_name(openai.NotFoundError),
-    exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404, "error.code": "model_not_found"}},
-)
-@validate_span_events(
-    exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."}
-)
-@validate_transaction_metrics(
-    name="test_embeddings_error_v1:test_embeddings_invalid_request_error_invalid_model_with_token_count_with_raw_response",
-    scoped_metrics=[("Llm/embedding/OpenAI/create", 1)],
-    rollup_metrics=[("Llm/embedding/OpenAI/create", 1)],
-    custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)],
-    background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(invalid_model_events))
-@validate_custom_event_count(count=1)
-@background_task()
-def test_embeddings_invalid_request_error_invalid_model_with_token_count_with_raw_response(
-    set_trace_info, sync_openai_client
-):
-    set_trace_info()
-    with pytest.raises(openai.NotFoundError):
-        sync_openai_client.embeddings.with_raw_response.create(input="Model does not exist.", model="does-not-exist")
-
-
 @dt_enabled
 @reset_core_stats_engine()
 @validate_error_trace_attributes(
@@ -566,38 +480,6 @@ def test_embeddings_invalid_request_error_invalid_model_async_no_content_with_ra
         )
 
 
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
-    callable_name(openai.NotFoundError),
-    exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404, "error.code": "model_not_found"}},
-)
-@validate_span_events(
-    exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."}
-)
-@validate_transaction_metrics(
-    name="test_embeddings_error_v1:test_embeddings_invalid_request_error_invalid_model_async_with_token_count_with_raw_response",
-    scoped_metrics=[("Llm/embedding/OpenAI/create", 1)],
-    rollup_metrics=[("Llm/embedding/OpenAI/create", 1)],
-    custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)],
-    background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(invalid_model_events))
-@validate_custom_event_count(count=1)
-@background_task()
-def test_embeddings_invalid_request_error_invalid_model_async_with_token_count_with_raw_response(
-    set_trace_info, async_openai_client, loop
-):
-    set_trace_info()
-    with pytest.raises(openai.NotFoundError):
-        loop.run_until_complete(
-            async_openai_client.embeddings.with_raw_response.create(
-                input="Model does not exist.", model="does-not-exist"
-            )
-        )
-
-
 @dt_enabled
 @reset_core_stats_engine()
 @validate_error_trace_attributes(
diff --git a/tests/mlmodel_openai/test_embeddings_v1.py b/tests/mlmodel_openai/test_embeddings_v1.py
index 405a2a9e5f..3801d3639c 100644
--- a/tests/mlmodel_openai/test_embeddings_v1.py
+++ b/tests/mlmodel_openai/test_embeddings_v1.py
@@ -15,7 +15,7 @@
 import openai
 from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes
 from testing_support.ml_testing_utils import (
-    add_token_count_to_events,
+    add_token_count_to_embedding_events,
     disabled_ai_monitoring_record_content_settings,
     disabled_ai_monitoring_settings,
     events_sans_content,
@@ -48,6 +48,7 @@
             "response.headers.ratelimitResetRequests": "20ms",
             "response.headers.ratelimitRemainingTokens": 999994,
             "response.headers.ratelimitRemainingRequests": 2999,
+            "response.usage.total_tokens": 6,
             "vendor": "openai",
             "ingest_source": "Python",
         },
@@ -111,7 +112,7 @@ def test_openai_embedding_sync_no_content(set_trace_info, sync_openai_client):
 
 @reset_core_stats_engine()
 @override_llm_token_callback_settings(llm_token_count_callback)
-@validate_custom_events(add_token_count_to_events(embedding_recorded_events))
+@validate_custom_events(add_token_count_to_embedding_events(embedding_recorded_events))
 @validate_custom_event_count(count=1)
 @validate_transaction_metrics(
     name="test_embeddings_v1:test_openai_embedding_sync_with_token_count",
@@ -206,7 +207,7 @@ def test_openai_embedding_async_no_content(loop, set_trace_info, async_openai_cl
 
 @reset_core_stats_engine()
 @override_llm_token_callback_settings(llm_token_count_callback)
-@validate_custom_events(add_token_count_to_events(embedding_recorded_events))
+@validate_custom_events(add_token_count_to_embedding_events(embedding_recorded_events))
 @validate_custom_event_count(count=1)
 @validate_transaction_metrics(
     name="test_embeddings_v1:test_openai_embedding_async_with_token_count",
diff --git a/tests/testing_support/ml_testing_utils.py b/tests/testing_support/ml_testing_utils.py
index 55dbd08105..8c2c0444f0 100644
--- a/tests/testing_support/ml_testing_utils.py
+++ b/tests/testing_support/ml_testing_utils.py
@@ -46,6 +46,14 @@ def add_token_count_to_embedding_events(expected_events):
     return events
 
 
+def add_token_count_streaming_events(expected_events):
+    events = copy.deepcopy(expected_events)
+    for event in events:
+        if event[0]["type"] == "LlmChatCompletionMessage":
+            event[1]["token_count"] = 0
+    return events
+
+
 def add_token_counts_to_chat_events(expected_events):
     events = copy.deepcopy(expected_events)
     for event in events: