diff --git a/newrelic/hooks/external_botocore.py b/newrelic/hooks/external_botocore.py
index d8c18b49db..a11064ac36 100644
--- a/newrelic/hooks/external_botocore.py
+++ b/newrelic/hooks/external_botocore.py
@@ -192,6 +192,7 @@ def create_chat_completion_message_event(
     request_model,
     request_id,
     llm_metadata_dict,
+    all_token_counts,
     response_id=None,
 ):
     if not transaction:
@@ -224,6 +225,8 @@ def create_chat_completion_message_event(
             "vendor": "bedrock",
             "ingest_source": "Python",
         }
+        if all_token_counts:
+            chat_completion_message_dict["token_count"] = 0
 
         if settings.ai_monitoring.record_content.enabled:
             chat_completion_message_dict["content"] = content
@@ -263,6 +266,8 @@ def create_chat_completion_message_event(
             "ingest_source": "Python",
             "is_response": True,
         }
+        if all_token_counts:
+            chat_completion_message_dict["token_count"] = 0
 
         if settings.ai_monitoring.record_content.enabled:
             chat_completion_message_dict["content"] = content
@@ -272,24 +277,21 @@ def create_chat_completion_message_event(
         transaction.record_custom_event("LlmChatCompletionMessage", chat_completion_message_dict)
 
 
-def extract_bedrock_titan_text_model_request(request_body, bedrock_attrs):
+def extract_bedrock_titan_embedding_model_request(request_body, bedrock_attrs):
     request_body = json.loads(request_body)
-    request_config = request_body.get("textGenerationConfig", {})
 
-    input_message_list = [{"role": "user", "content": request_body.get("inputText")}]
-
-    bedrock_attrs["input_message_list"] = input_message_list
-    bedrock_attrs["request.max_tokens"] = request_config.get("maxTokenCount")
-    bedrock_attrs["request.temperature"] = request_config.get("temperature")
+    bedrock_attrs["input"] = request_body.get("inputText")
 
     return bedrock_attrs
 
 
-def extract_bedrock_mistral_text_model_request(request_body, bedrock_attrs):
-    request_body = json.loads(request_body)
-    bedrock_attrs["input_message_list"] = [{"role": "user", "content": request_body.get("prompt")}]
-    bedrock_attrs["request.max_tokens"] = request_body.get("max_tokens")
-    bedrock_attrs["request.temperature"] = request_body.get("temperature")
+def extract_bedrock_titan_embedding_model_response(response_body, bedrock_attrs):
+    if response_body:
+        response_body = json.loads(response_body)
+
+        input_tokens = response_body.get("inputTextTokenCount", 0)
+        bedrock_attrs["response.usage.total_tokens"] = input_tokens
+
     return bedrock_attrs
 
 
@@ -297,16 +299,31 @@ def extract_bedrock_titan_text_model_response(response_body, bedrock_attrs):
     if response_body:
         response_body = json.loads(response_body)
 
+        input_tokens = response_body.get("inputTextTokenCount", 0)
+        completion_tokens = sum(result.get("tokenCount", 0) for result in response_body.get("results", []))
+        total_tokens = input_tokens + completion_tokens
+
         output_message_list = [
-            {"role": "assistant", "content": result["outputText"]} for result in response_body.get("results", [])
+            {"role": "assistant", "content": result.get("outputText")} for result in response_body.get("results", [])
         ]
 
         bedrock_attrs["response.choices.finish_reason"] = response_body["results"][0]["completionReason"]
+        bedrock_attrs["response.usage.completion_tokens"] = completion_tokens
+        bedrock_attrs["response.usage.prompt_tokens"] = input_tokens
+        bedrock_attrs["response.usage.total_tokens"] = total_tokens
         bedrock_attrs["output_message_list"] = output_message_list
 
     return bedrock_attrs
 
 
+def extract_bedrock_mistral_text_model_request(request_body, bedrock_attrs):
+    request_body = json.loads(request_body)
+    bedrock_attrs["input_message_list"] = [{"role": "user", "content": request_body.get("prompt")}]
+    bedrock_attrs["request.max_tokens"] = request_body.get("max_tokens")
+    bedrock_attrs["request.temperature"] = request_body.get("temperature")
+    return bedrock_attrs
+
+
 def extract_bedrock_mistral_text_model_response(response_body, bedrock_attrs):
     if response_body:
         response_body = json.loads(response_body)
@@ -319,17 +336,6 @@ def extract_bedrock_mistral_text_model_response(response_body, bedrock_attrs):
     return bedrock_attrs
 
 
-def extract_bedrock_titan_text_model_streaming_response(response_body, bedrock_attrs):
-    if response_body:
-        if "outputText" in response_body:
-            bedrock_attrs["output_message_list"] = messages = bedrock_attrs.get("output_message_list", [])
-            messages.append({"role": "assistant", "content": response_body["outputText"]})
-
-        bedrock_attrs["response.choices.finish_reason"] = response_body.get("completionReason", None)
-
-    return bedrock_attrs
-
-
 def extract_bedrock_mistral_text_model_streaming_response(response_body, bedrock_attrs):
     if response_body:
         outputs = response_body.get("outputs")
@@ -342,10 +348,42 @@ def extract_bedrock_mistral_text_model_streaming_response(response_body, bedrock
     return bedrock_attrs
 
 
-def extract_bedrock_titan_embedding_model_request(request_body, bedrock_attrs):
+def extract_bedrock_titan_text_model_request(request_body, bedrock_attrs):
     request_body = json.loads(request_body)
+    request_config = request_body.get("textGenerationConfig", {})
 
-    bedrock_attrs["input"] = request_body.get("inputText")
+    input_message_list = [{"role": "user", "content": request_body.get("inputText")}]
+
+    bedrock_attrs["input_message_list"] = input_message_list
+    bedrock_attrs["request.max_tokens"] = request_config.get("maxTokenCount")
+    bedrock_attrs["request.temperature"] = request_config.get("temperature")
+
+    return bedrock_attrs
+
+
+def extract_bedrock_titan_text_model_streaming_response(response_body, bedrock_attrs):
+    if response_body:
+        if "outputText" in response_body:
+            bedrock_attrs["output_message_list"] = messages = bedrock_attrs.get("output_message_list", [])
+            messages.append({"role": "assistant", "content": response_body["outputText"]})
+
+        bedrock_attrs["response.choices.finish_reason"] = response_body.get("completionReason", None)
+
+        # Extract token information
+        invocation_metrics = response_body.get("amazon-bedrock-invocationMetrics", {})
+        prompt_tokens = invocation_metrics.get("inputTokenCount", 0)
+        completion_tokens = invocation_metrics.get("outputTokenCount", 0)
+        total_tokens = prompt_tokens + completion_tokens
+
+        bedrock_attrs["response.usage.completion_tokens"] = (
+            bedrock_attrs.get("response.usage.completion_tokens", 0) + completion_tokens
+        )
+        bedrock_attrs["response.usage.prompt_tokens"] = (
+            bedrock_attrs.get("response.usage.prompt_tokens", 0) + prompt_tokens
+        )
+        bedrock_attrs["response.usage.total_tokens"] = (
+            bedrock_attrs.get("response.usage.total_tokens", 0) + total_tokens
+        )
 
     return bedrock_attrs
 
@@ -409,6 +447,17 @@ def extract_bedrock_claude_model_response(response_body, bedrock_attrs):
         output_message_list = [{"role": role, "content": content}]
         bedrock_attrs["response.choices.finish_reason"] = response_body.get("stop_reason")
         bedrock_attrs["output_message_list"] = output_message_list
+        bedrock_attrs[""] = str(response_body.get("id"))
+
+        # Extract token information
+        token_usage = response_body.get("usage", {})
+        if token_usage:
+            prompt_tokens = token_usage.get("input_tokens", 0)
+            completion_tokens = token_usage.get("output_tokens", 0)
+            total_tokens = prompt_tokens + completion_tokens
+            bedrock_attrs["response.usage.prompt_tokens"] = prompt_tokens
+            bedrock_attrs["response.usage.completion_tokens"] = completion_tokens
+            bedrock_attrs["response.usage.total_tokens"] = total_tokens
 
     return bedrock_attrs
 
@@ -420,6 +469,23 @@ def extract_bedrock_claude_model_streaming_response(response_body, bedrock_attrs
             bedrock_attrs["output_message_list"] = [{"role": "assistant", "content": ""}]
         bedrock_attrs["output_message_list"][0]["content"] += content
         bedrock_attrs["response.choices.finish_reason"] = response_body.get("stop_reason")
+
+        # Extract token information
+        invocation_metrics = response_body.get("amazon-bedrock-invocationMetrics", {})
+        prompt_tokens = invocation_metrics.get("inputTokenCount", 0)
+        completion_tokens = invocation_metrics.get("outputTokenCount", 0)
+        total_tokens = prompt_tokens + completion_tokens
+
+        bedrock_attrs["response.usage.completion_tokens"] = (
+            bedrock_attrs.get("response.usage.completion_tokens", 0) + completion_tokens
+        )
+        bedrock_attrs["response.usage.prompt_tokens"] = (
+            bedrock_attrs.get("response.usage.prompt_tokens", 0) + prompt_tokens
+        )
+        bedrock_attrs["response.usage.total_tokens"] = (
+            bedrock_attrs.get("response.usage.total_tokens", 0) + total_tokens
+        )
+
     return bedrock_attrs
 
 
@@ -440,6 +506,13 @@ def extract_bedrock_llama_model_response(response_body, bedrock_attrs):
         response_body = json.loads(response_body)
 
         output_message_list = [{"role": "assistant", "content": response_body.get("generation")}]
+        prompt_tokens = response_body.get("prompt_token_count", 0)
+        completion_tokens = response_body.get("generation_token_count", 0)
+        total_tokens = prompt_tokens + completion_tokens
+
+        bedrock_attrs["response.usage.completion_tokens"] = completion_tokens
+        bedrock_attrs["response.usage.prompt_tokens"] = prompt_tokens
+        bedrock_attrs["response.usage.total_tokens"] = total_tokens
         bedrock_attrs["response.choices.finish_reason"] = response_body.get("stop_reason")
         bedrock_attrs["output_message_list"] = output_message_list
 
@@ -453,6 +526,22 @@ def extract_bedrock_llama_model_streaming_response(response_body, bedrock_attrs)
             bedrock_attrs["output_message_list"] = [{"role": "assistant", "content": ""}]
         bedrock_attrs["output_message_list"][0]["content"] += content
         bedrock_attrs["response.choices.finish_reason"] = response_body.get("stop_reason")
+
+        # Extract token information
+        invocation_metrics = response_body.get("amazon-bedrock-invocationMetrics", {})
+        prompt_tokens = invocation_metrics.get("inputTokenCount", 0)
+        completion_tokens = invocation_metrics.get("outputTokenCount", 0)
+        total_tokens = prompt_tokens + completion_tokens
+
+        bedrock_attrs["response.usage.completion_tokens"] = (
+            bedrock_attrs.get("response.usage.completion_tokens", 0) + completion_tokens
+        )
+        bedrock_attrs["response.usage.prompt_tokens"] = (
+            bedrock_attrs.get("response.usage.prompt_tokens", 0) + prompt_tokens
+        )
+        bedrock_attrs["response.usage.total_tokens"] = (
+            bedrock_attrs.get("response.usage.total_tokens", 0) + total_tokens
+        )
     return bedrock_attrs
 
 
@@ -493,12 +582,33 @@ def extract_bedrock_cohere_model_streaming_response(response_body, bedrock_attrs
         bedrock_attrs["response.choices.finish_reason"] = response_body["generations"][0]["finish_reason"]
         bedrock_attrs["response_id"] = str(response_body.get("id"))
 
+        # Extract token information
+        invocation_metrics = response_body.get("amazon-bedrock-invocationMetrics", {})
+        prompt_tokens = invocation_metrics.get("inputTokenCount", 0)
+        completion_tokens = invocation_metrics.get("outputTokenCount", 0)
+        total_tokens = prompt_tokens + completion_tokens
+
+        bedrock_attrs["response.usage.completion_tokens"] = (
+            bedrock_attrs.get("response.usage.completion_tokens", 0) + completion_tokens
+        )
+        bedrock_attrs["response.usage.prompt_tokens"] = (
+            bedrock_attrs.get("response.usage.prompt_tokens", 0) + prompt_tokens
+        )
+        bedrock_attrs["response.usage.total_tokens"] = (
+            bedrock_attrs.get("response.usage.total_tokens", 0) + total_tokens
+        )
+
     return bedrock_attrs
 
 
 NULL_EXTRACTOR = lambda *args: {}  # noqa: E731  # Empty extractor that returns nothing
 MODEL_EXTRACTORS = [  # Order is important here, avoiding dictionaries
-    ("amazon.titan-embed", extract_bedrock_titan_embedding_model_request, NULL_EXTRACTOR, NULL_EXTRACTOR),
+    (
+        "amazon.titan-embed",
+        extract_bedrock_titan_embedding_model_request,
+        extract_bedrock_titan_embedding_model_response,
+        NULL_EXTRACTOR,
+    ),
     ("cohere.embed", extract_bedrock_cohere_embedding_model_request, NULL_EXTRACTOR, NULL_EXTRACTOR),
     (
         "amazon.titan",
@@ -801,6 +911,7 @@ def _wrap_bedrock_runtime_converse(wrapped, instance, args, kwargs):
         try:
             # For aioboto3 clients, this will call make_api_call instrumentation in external_aiobotocore
             response = wrapped(*args, **kwargs)
+
         except Exception as exc:
             handle_bedrock_exception(
                 exc, False, model, span_id, trace_id, request_extractor, {}, ft, transaction, kwargs, is_converse=True
@@ -848,6 +959,10 @@ def extract_bedrock_converse_attrs(kwargs, response, response_headers, model, sp
         for result in response.get("output").get("message").get("content", [])
     ]
 
+    response_prompt_tokens = response.get("usage", {}).get("inputTokens") if response else None
+    response_completion_tokens = response.get("usage", {}).get("outputTokens") if response else None
+    response_total_tokens = response.get("usage", {}).get("totalTokens") if response else None
+
     bedrock_attrs = {
         "request_id": response_headers.get("x-amzn-requestid"),
         "model": model,
@@ -858,6 +973,9 @@ def extract_bedrock_converse_attrs(kwargs, response, response_headers, model, sp
         "request.max_tokens": kwargs.get("inferenceConfig", {}).get("maxTokens", None),
         "request.temperature": kwargs.get("inferenceConfig", {}).get("temperature", None),
         "input_message_list": input_message_list,
+        "response.usage.prompt_tokens": response_prompt_tokens,
+        "response.usage.completion_tokens": response_completion_tokens,
+        "response.usage.total_tokens": response_total_tokens,
     }
     return bedrock_attrs
 
@@ -1014,23 +1132,28 @@ def handle_embedding_event(transaction, bedrock_attrs):
     model = bedrock_attrs.get("model", None)
     input_ = bedrock_attrs.get("input")
 
+    response_total_tokens = bedrock_attrs.get("response.usage.total_tokens", None)
+
+    total_tokens = (
+        settings.ai_monitoring.llm_token_count_callback(model, input_)
+        if settings.ai_monitoring.llm_token_count_callback and input_
+        else response_total_tokens
+    )
+
     embedding_dict = {
         "vendor": "bedrock",
         "ingest_source": "Python",
         "id": embedding_id,
         "span_id": span_id,
         "trace_id": trace_id,
-        "token_count": (
-            settings.ai_monitoring.llm_token_count_callback(model, input_)
-            if settings.ai_monitoring.llm_token_count_callback
-            else None
-        ),
         "request_id": request_id,
         "duration": bedrock_attrs.get("duration", None),
         "request.model": model,
         "response.model": model,
+        "response.usage.total_tokens": total_tokens,
         "error": bedrock_attrs.get("error", None),
     }
+
     embedding_dict.update(llm_metadata_dict)
 
     if settings.ai_monitoring.record_content.enabled:
@@ -1041,6 +1164,7 @@ def handle_embedding_event(transaction, bedrock_attrs):
 
 
 def handle_chat_completion_event(transaction, bedrock_attrs):
+    settings = transaction.settings or global_settings()
     chat_completion_id = str(uuid.uuid4())
     # Grab LLM-related custom attributes off of the transaction to store as metadata on LLM events
     custom_attrs_dict = transaction._custom_params
@@ -1055,12 +1179,35 @@ def handle_chat_completion_event(transaction, bedrock_attrs):
     response_id = bedrock_attrs.get("response_id", None)
     model = bedrock_attrs.get("model", None)
 
+    response_prompt_tokens = bedrock_attrs.get("response.usage.prompt_tokens", None)
+    response_completion_tokens = bedrock_attrs.get("response.usage.completion_tokens", None)
+    response_total_tokens = bedrock_attrs.get("response.usage.total_tokens", None)
+
     input_message_list = bedrock_attrs.get("input_message_list", [])
     output_message_list = bedrock_attrs.get("output_message_list", [])
     number_of_messages = (
         len(input_message_list) + len(output_message_list)
     ) or None  # If 0, attribute will be set to None and removed
 
+    input_message_content = " ".join([msg.get("content", "") for msg in input_message_list if msg.get("content")])
+    prompt_tokens = (
+        settings.ai_monitoring.llm_token_count_callback(model, input_message_content)
+        if settings.ai_monitoring.llm_token_count_callback and input_message_content
+        else response_prompt_tokens
+    )
+
+    output_message_content = " ".join([msg.get("content", "") for msg in output_message_list if msg.get("content")])
+    completion_tokens = (
+        settings.ai_monitoring.llm_token_count_callback(model, output_message_content)
+        if settings.ai_monitoring.llm_token_count_callback and output_message_content
+        else response_completion_tokens
+    )
+    total_tokens = (
+        prompt_tokens + completion_tokens if all([prompt_tokens, completion_tokens]) else response_total_tokens
+    )
+
+    all_token_counts = bool(prompt_tokens and completion_tokens and total_tokens)
+
     chat_completion_summary_dict = {
         "vendor": "bedrock",
         "ingest_source": "Python",
@@ -1078,6 +1225,12 @@ def handle_chat_completion_event(transaction, bedrock_attrs):
         "response.choices.finish_reason": bedrock_attrs.get("response.choices.finish_reason", None),
         "error": bedrock_attrs.get("error", None),
     }
+
+    if all_token_counts:
+        chat_completion_summary_dict["response.usage.prompt_tokens"] = prompt_tokens
+        chat_completion_summary_dict["response.usage.completion_tokens"] = completion_tokens
+        chat_completion_summary_dict["response.usage.total_tokens"] = total_tokens
+
     chat_completion_summary_dict.update(llm_metadata_dict)
     chat_completion_summary_dict = {k: v for k, v in chat_completion_summary_dict.items() if v is not None}
     transaction.record_custom_event("LlmChatCompletionSummary", chat_completion_summary_dict)
@@ -1092,6 +1245,7 @@ def handle_chat_completion_event(transaction, bedrock_attrs):
         request_model=model,
         request_id=request_id,
         llm_metadata_dict=llm_metadata_dict,
+        all_token_counts=all_token_counts,
         response_id=response_id,
     )
 
diff --git a/tests/external_aiobotocore/test_bedrock_chat_completion_converse.py b/tests/external_aiobotocore/test_bedrock_chat_completion_converse.py
index da9c5818e7..87dfa1f1b6 100644
--- a/tests/external_aiobotocore/test_bedrock_chat_completion_converse.py
+++ b/tests/external_aiobotocore/test_bedrock_chat_completion_converse.py
@@ -17,7 +17,7 @@
 from conftest import BOTOCORE_VERSION
 from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes
 from testing_support.ml_testing_utils import (
-    add_token_count_to_events,
+    add_token_counts_to_chat_events,
     disabled_ai_monitoring_record_content_settings,
     disabled_ai_monitoring_settings,
     events_sans_content,
@@ -49,6 +49,9 @@
             "duration": None,  # Response time varies each test run
             "request.model": "anthropic.claude-3-sonnet-20240229-v1:0",
             "response.model": "anthropic.claude-3-sonnet-20240229-v1:0",
+            "response.usage.prompt_tokens": 26,
+            "response.usage.completion_tokens": 100,
+            "response.usage.total_tokens": 126,
             "request.temperature": 0.7,
             "request.max_tokens": 100,
             "response.choices.finish_reason": "max_tokens",
@@ -70,6 +73,7 @@
             "role": "system",
             "completion_id": None,
             "sequence": 0,
+            "token_count": 0,
             "response.model": "anthropic.claude-3-sonnet-20240229-v1:0",
             "vendor": "bedrock",
             "ingest_source": "Python",
@@ -88,6 +92,7 @@
             "role": "user",
             "completion_id": None,
             "sequence": 1,
+            "token_count": 0,
             "response.model": "anthropic.claude-3-sonnet-20240229-v1:0",
             "vendor": "bedrock",
             "ingest_source": "Python",
@@ -106,6 +111,7 @@
             "role": "assistant",
             "completion_id": None,
             "sequence": 2,
+            "token_count": 0,
             "response.model": "anthropic.claude-3-sonnet-20240229-v1:0",
             "vendor": "bedrock",
             "ingest_source": "Python",
@@ -189,7 +195,7 @@ def _test():
 @reset_core_stats_engine()
 @override_llm_token_callback_settings(llm_token_count_callback)
 def test_bedrock_chat_completion_with_token_count(set_trace_info, exercise_model):
-    @validate_custom_events(add_token_count_to_events(chat_completion_expected_events))
+    @validate_custom_events(add_token_counts_to_chat_events(chat_completion_expected_events))
     # One summary event, one user message, and one response message from the assistant
     @validate_custom_event_count(count=4)
     @validate_transaction_metrics(
@@ -476,46 +482,3 @@ def _test():
         converse_invalid_model(loop, bedrock_converse_server)
 
     _test()
-
-
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-def test_bedrock_chat_completion_error_incorrect_access_key_with_token_count(
-    monkeypatch, bedrock_converse_server, loop, set_trace_info
-):
-    """
-    A request is made to the server with invalid credentials. botocore will reach out to the server and receive an
-    UnrecognizedClientException as a response. Information from the request will be parsed and reported in customer
-    events. The error response can also be parsed, and will be included as attributes on the recorded exception.
-    """
-
-    @validate_custom_events(add_token_count_to_events(chat_completion_invalid_access_key_error_events))
-    @validate_error_trace_attributes(
-        _client_error_name,
-        exact_attrs={
-            "agent": {},
-            "intrinsic": {},
-            "user": {
-                "http.statusCode": 403,
-                "error.message": "The security token included in the request is invalid.",
-                "error.code": "UnrecognizedClientException",
-            },
-        },
-    )
-    @validate_transaction_metrics(
-        name="test_bedrock_chat_completion_incorrect_access_key_with_token_count",
-        scoped_metrics=[("Llm/completion/Bedrock/converse", 1)],
-        rollup_metrics=[("Llm/completion/Bedrock/converse", 1)],
-        custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)],
-        background_task=True,
-    )
-    @background_task(name="test_bedrock_chat_completion_incorrect_access_key_with_token_count")
-    def _test():
-        set_trace_info()
-        add_custom_attribute("llm.conversation_id", "my-awesome-id")
-        add_custom_attribute("llm.foo", "bar")
-        add_custom_attribute("non_llm_attr", "python-agent")
-
-        converse_incorrect_access_key(loop, bedrock_converse_server, monkeypatch)
-
-    _test()
diff --git a/tests/external_aiobotocore/test_bedrock_chat_completion_invoke_model.py b/tests/external_aiobotocore/test_bedrock_chat_completion_invoke_model.py
index 65cb276c77..e3a897d0c8 100644
--- a/tests/external_aiobotocore/test_bedrock_chat_completion_invoke_model.py
+++ b/tests/external_aiobotocore/test_bedrock_chat_completion_invoke_model.py
@@ -34,7 +34,8 @@
 )
 from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes
 from testing_support.ml_testing_utils import (
-    add_token_count_to_events,
+    add_token_count_streaming_events,
+    add_token_counts_to_chat_events,
     disabled_ai_monitoring_record_content_settings,
     disabled_ai_monitoring_settings,
     disabled_ai_monitoring_streaming_settings,
@@ -206,7 +207,7 @@ def _test():
 @reset_core_stats_engine()
 @override_llm_token_callback_settings(llm_token_count_callback)
 def test_bedrock_chat_completion_with_token_count(set_trace_info, exercise_model, expected_events, expected_metrics):
-    @validate_custom_events(add_token_count_to_events(expected_events))
+    @validate_custom_events(add_token_counts_to_chat_events(add_token_count_streaming_events(expected_events)))
     # One summary event, one user message, and one response message from the assistant
     @validate_custom_event_count(count=3)
     @validate_transaction_metrics(
@@ -455,51 +456,6 @@ def _test():
     _test()
 
 
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-def test_bedrock_chat_completion_error_incorrect_access_key_with_token(
-    monkeypatch,
-    bedrock_server,
-    exercise_model,
-    set_trace_info,
-    expected_invalid_access_key_error_events,
-    expected_metrics,
-):
-    @validate_custom_events(add_token_count_to_events(expected_invalid_access_key_error_events))
-    @validate_error_trace_attributes(
-        _client_error_name,
-        exact_attrs={
-            "agent": {},
-            "intrinsic": {},
-            "user": {
-                "http.statusCode": 403,
-                "error.message": "The security token included in the request is invalid.",
-                "error.code": "UnrecognizedClientException",
-            },
-        },
-    )
-    @validate_transaction_metrics(
-        name="test_bedrock_chat_completion",
-        scoped_metrics=expected_metrics,
-        rollup_metrics=expected_metrics,
-        custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)],
-        background_task=True,
-    )
-    @background_task(name="test_bedrock_chat_completion")
-    def _test():
-        monkeypatch.setattr(bedrock_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY")
-
-        with pytest.raises(_client_error):  # not sure where this exception actually comes from
-            set_trace_info()
-            add_custom_attribute("llm.conversation_id", "my-awesome-id")
-            add_custom_attribute("llm.foo", "bar")
-            add_custom_attribute("non_llm_attr", "python-agent")
-
-            exercise_model(prompt="Invalid Token", temperature=0.7, max_tokens=100)
-
-    _test()
-
-
 def invoke_model_malformed_request_body(loop, bedrock_server, response_streaming):
     async def _coro():
         with pytest.raises(_client_error):
@@ -798,58 +754,6 @@ async def _test():
     loop.run_until_complete(_test())
 
 
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_custom_events(add_token_count_to_events(chat_completion_expected_streaming_error_events))
-@validate_custom_event_count(count=2)
-@validate_error_trace_attributes(
-    _event_stream_error_name,
-    exact_attrs={
-        "agent": {},
-        "intrinsic": {},
-        "user": {
-            "error.message": "Malformed input request, please reformat your input and try again.",
-            "error.code": "ValidationException",
-        },
-    },
-    forgone_params={"agent": (), "intrinsic": (), "user": ("http.statusCode")},
-)
-@validate_transaction_metrics(
-    name="test_bedrock_chat_completion",
-    scoped_metrics=[("Llm/completion/Bedrock/invoke_model_with_response_stream", 1)],
-    rollup_metrics=[("Llm/completion/Bedrock/invoke_model_with_response_stream", 1)],
-    custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)],
-    background_task=True,
-)
-@background_task(name="test_bedrock_chat_completion")
-def test_bedrock_chat_completion_error_streaming_exception_with_token_count(loop, bedrock_server, set_trace_info):
-    """
-    Duplicate of test_bedrock_chat_completion_error_streaming_exception, but with token callback being set.
-
-    See the original test for a description of the error case.
-    """
-
-    async def _test():
-        with pytest.raises(_event_stream_error):
-            model = "amazon.titan-text-express-v1"
-            body = (chat_completion_payload_templates[model] % ("Streaming Exception", 0.7, 100)).encode("utf-8")
-
-            set_trace_info()
-            add_custom_attribute("llm.conversation_id", "my-awesome-id")
-            add_custom_attribute("llm.foo", "bar")
-            add_custom_attribute("non_llm_attr", "python-agent")
-
-            response = await bedrock_server.invoke_model_with_response_stream(
-                body=body, modelId=model, accept="application/json", contentType="application/json"
-            )
-
-            body = response.get("body")
-            async for resp in body:
-                assert resp
-
-    loop.run_until_complete(_test())
-
-
 def test_bedrock_chat_completion_functions_marked_as_wrapped_for_sdk_compatibility(bedrock_server):
     assert bedrock_server._nr_wrapped
 
diff --git a/tests/external_aiobotocore/test_bedrock_embeddings.py b/tests/external_aiobotocore/test_bedrock_embeddings.py
index 96b930feb5..dacfbb4eed 100644
--- a/tests/external_aiobotocore/test_bedrock_embeddings.py
+++ b/tests/external_aiobotocore/test_bedrock_embeddings.py
@@ -27,7 +27,7 @@
 )
 from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes
 from testing_support.ml_testing_utils import (
-    add_token_count_to_events,
+    add_token_count_to_embedding_events,
     disabled_ai_monitoring_record_content_settings,
     disabled_ai_monitoring_settings,
     events_sans_content,
@@ -164,7 +164,7 @@ def _test():
 @reset_core_stats_engine()
 @override_llm_token_callback_settings(llm_token_count_callback)
 def test_bedrock_embedding_with_token_count(set_trace_info, exercise_model, expected_events):
-    @validate_custom_events(add_token_count_to_events(expected_events))
+    @validate_custom_events(add_token_count_to_embedding_events(expected_events))
     @validate_custom_event_count(count=1)
     @validate_transaction_metrics(
         name="test_bedrock_embedding",
@@ -289,45 +289,6 @@ def _test():
     _test()
 
 
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-def test_bedrock_embedding_error_incorrect_access_key_with_token_count(
-    monkeypatch, bedrock_server, exercise_model, set_trace_info, expected_invalid_access_key_error_events
-):
-    @validate_custom_events(add_token_count_to_events(expected_invalid_access_key_error_events))
-    @validate_error_trace_attributes(
-        _client_error_name,
-        exact_attrs={
-            "agent": {},
-            "intrinsic": {},
-            "user": {
-                "http.statusCode": 403,
-                "error.message": "The security token included in the request is invalid.",
-                "error.code": "UnrecognizedClientException",
-            },
-        },
-    )
-    @validate_transaction_metrics(
-        name="test_bedrock_embedding",
-        scoped_metrics=[("Llm/embedding/Bedrock/invoke_model", 1)],
-        rollup_metrics=[("Llm/embedding/Bedrock/invoke_model", 1)],
-        background_task=True,
-    )
-    @background_task(name="test_bedrock_embedding")
-    def _test():
-        monkeypatch.setattr(bedrock_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY")
-
-        with pytest.raises(_client_error):  # not sure where this exception actually comes from
-            set_trace_info()
-            add_custom_attribute("llm.conversation_id", "my-awesome-id")
-            add_custom_attribute("llm.foo", "bar")
-            add_custom_attribute("non_llm_attr", "python-agent")
-
-            exercise_model(prompt="Invalid Token")
-
-    _test()
-
-
 @reset_core_stats_engine()
 @validate_custom_events(embedding_expected_malformed_request_body_events)
 @validate_custom_event_count(count=1)
diff --git a/tests/external_botocore/_test_bedrock_chat_completion.py b/tests/external_botocore/_test_bedrock_chat_completion.py
index 155b6c993c..6b65af8cb2 100644
--- a/tests/external_botocore/_test_bedrock_chat_completion.py
+++ b/tests/external_botocore/_test_bedrock_chat_completion.py
@@ -97,6 +97,9 @@
                 "duration": None,  # Response time varies each test run
                 "request.model": "amazon.titan-text-express-v1",
                 "response.model": "amazon.titan-text-express-v1",
+                "response.usage.completion_tokens": 32,
+                "response.usage.total_tokens": 44,
+                "response.usage.prompt_tokens": 12,
                 "request.temperature": 0.7,
                 "request.max_tokens": 100,
                 "response.choices.finish_reason": "FINISH",
@@ -118,6 +121,7 @@
                 "role": "user",
                 "completion_id": None,
                 "sequence": 0,
+                "token_count": 0,
                 "response.model": "amazon.titan-text-express-v1",
                 "vendor": "bedrock",
                 "ingest_source": "Python",
@@ -136,6 +140,7 @@
                 "role": "assistant",
                 "completion_id": None,
                 "sequence": 1,
+                "token_count": 0,
                 "response.model": "amazon.titan-text-express-v1",
                 "vendor": "bedrock",
                 "ingest_source": "Python",
@@ -335,6 +340,9 @@
                 "duration": None,  # Response time varies each test run
                 "request.model": "meta.llama2-13b-chat-v1",
                 "response.model": "meta.llama2-13b-chat-v1",
+                "response.usage.prompt_tokens": 17,
+                "response.usage.completion_tokens": 69,
+                "response.usage.total_tokens": 86,
                 "request.temperature": 0.7,
                 "request.max_tokens": 100,
                 "response.choices.finish_reason": "stop",
@@ -356,6 +364,7 @@
                 "role": "user",
                 "completion_id": None,
                 "sequence": 0,
+                "token_count": 0,
                 "response.model": "meta.llama2-13b-chat-v1",
                 "vendor": "bedrock",
                 "ingest_source": "Python",
@@ -374,6 +383,7 @@
                 "role": "assistant",
                 "completion_id": None,
                 "sequence": 1,
+                "token_count": 0,
                 "response.model": "meta.llama2-13b-chat-v1",
                 "vendor": "bedrock",
                 "ingest_source": "Python",
@@ -919,6 +929,9 @@
                 "duration": None,  # Response time varies each test run
                 "request.model": "amazon.titan-text-express-v1",
                 "response.model": "amazon.titan-text-express-v1",
+                "response.usage.completion_tokens": 35,
+                "response.usage.total_tokens": 47,
+                "response.usage.prompt_tokens": 12,
                 "request.temperature": 0.7,
                 "request.max_tokens": 100,
                 "response.choices.finish_reason": "FINISH",
@@ -940,6 +953,7 @@
                 "role": "user",
                 "completion_id": None,
                 "sequence": 0,
+                "token_count": 0,
                 "response.model": "amazon.titan-text-express-v1",
                 "vendor": "bedrock",
                 "ingest_source": "Python",
@@ -958,6 +972,7 @@
                 "role": "assistant",
                 "completion_id": None,
                 "sequence": 1,
+                "token_count": 0,
                 "response.model": "amazon.titan-text-express-v1",
                 "vendor": "bedrock",
                 "ingest_source": "Python",
@@ -978,6 +993,9 @@
                 "duration": None,  # Response time varies each test run
                 "request.model": "anthropic.claude-instant-v1",
                 "response.model": "anthropic.claude-instant-v1",
+                "response.usage.completion_tokens": 99,
+                "response.usage.prompt_tokens": 19,
+                "response.usage.total_tokens": 118,
                 "request.temperature": 0.7,
                 "request.max_tokens": 100,
                 "response.choices.finish_reason": "stop_sequence",
@@ -999,6 +1017,7 @@
                 "role": "user",
                 "completion_id": None,
                 "sequence": 0,
+                "token_count": 0,
                 "response.model": "anthropic.claude-instant-v1",
                 "vendor": "bedrock",
                 "ingest_source": "Python",
@@ -1017,6 +1036,7 @@
                 "role": "assistant",
                 "completion_id": None,
                 "sequence": 1,
+                "token_count": 0,
                 "response.model": "anthropic.claude-instant-v1",
                 "vendor": "bedrock",
                 "ingest_source": "Python",
@@ -1038,6 +1058,9 @@
                 "duration": None,  # Response time varies each test run
                 "request.model": "cohere.command-text-v14",
                 "response.model": "cohere.command-text-v14",
+                "response.usage.completion_tokens": 91,
+                "response.usage.total_tokens": 100,
+                "response.usage.prompt_tokens": 9,
                 "request.temperature": 0.7,
                 "request.max_tokens": 100,
                 "response.choices.finish_reason": "COMPLETE",
@@ -1059,6 +1082,7 @@
                 "role": "user",
                 "completion_id": None,
                 "sequence": 0,
+                "token_count": 0,
                 "response.model": "cohere.command-text-v14",
                 "vendor": "bedrock",
                 "ingest_source": "Python",
@@ -1077,6 +1101,7 @@
                 "role": "assistant",
                 "completion_id": None,
                 "sequence": 1,
+                "token_count": 0,
                 "response.model": "cohere.command-text-v14",
                 "vendor": "bedrock",
                 "ingest_source": "Python",
@@ -1097,6 +1122,9 @@
                 "duration": None,  # Response time varies each test run
                 "request.model": "meta.llama2-13b-chat-v1",
                 "response.model": "meta.llama2-13b-chat-v1",
+                "response.usage.prompt_tokens": 17,
+                "response.usage.completion_tokens": 100,
+                "response.usage.total_tokens": 117,
                 "request.temperature": 0.7,
                 "request.max_tokens": 100,
                 "response.choices.finish_reason": "length",
@@ -1118,6 +1146,7 @@
                 "role": "user",
                 "completion_id": None,
                 "sequence": 0,
+                "token_count": 0,
                 "response.model": "meta.llama2-13b-chat-v1",
                 "vendor": "bedrock",
                 "ingest_source": "Python",
@@ -1136,6 +1165,7 @@
                 "role": "assistant",
                 "completion_id": None,
                 "sequence": 1,
+                "token_count": 0,
                 "response.model": "meta.llama2-13b-chat-v1",
                 "vendor": "bedrock",
                 "ingest_source": "Python",
diff --git a/tests/external_botocore/_test_bedrock_embeddings.py b/tests/external_botocore/_test_bedrock_embeddings.py
index f5c227b9c3..af544af001 100644
--- a/tests/external_botocore/_test_bedrock_embeddings.py
+++ b/tests/external_botocore/_test_bedrock_embeddings.py
@@ -33,6 +33,7 @@
                 "response.model": "amazon.titan-embed-text-v1",
                 "request.model": "amazon.titan-embed-text-v1",
                 "request_id": "11233989-07e8-4ecb-9ba6-79601ba6d8cc",
+                "response.usage.total_tokens": 6,
                 "vendor": "bedrock",
                 "ingest_source": "Python",
             },
@@ -52,6 +53,7 @@
                 "response.model": "amazon.titan-embed-g1-text-02",
                 "request.model": "amazon.titan-embed-g1-text-02",
                 "request_id": "b10ac895-eae3-4f07-b926-10b2866c55ed",
+                "response.usage.total_tokens": 6,
                 "vendor": "bedrock",
                 "ingest_source": "Python",
             },
diff --git a/tests/external_botocore/test_bedrock_chat_completion_invoke_model.py b/tests/external_botocore/test_bedrock_chat_completion_invoke_model.py
index 94a88e7a56..7a471b950e 100644
--- a/tests/external_botocore/test_bedrock_chat_completion_invoke_model.py
+++ b/tests/external_botocore/test_bedrock_chat_completion_invoke_model.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+
 import json
 import os
 from io import BytesIO
@@ -35,7 +36,8 @@
 from conftest import BOTOCORE_VERSION
 from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes
 from testing_support.ml_testing_utils import (
-    add_token_count_to_events,
+    add_token_count_streaming_events,
+    add_token_counts_to_chat_events,
     disabled_ai_monitoring_record_content_settings,
     disabled_ai_monitoring_settings,
     disabled_ai_monitoring_streaming_settings,
@@ -129,6 +131,14 @@ def expected_events(model_id, response_streaming):
         return chat_completion_expected_events[model_id]
 
 
+@pytest.fixture(scope="module")
+def expected_events(model_id, response_streaming):
+    if response_streaming:
+        return chat_completion_streaming_expected_events[model_id]
+    else:
+        return chat_completion_expected_events[model_id]
+
+
 @pytest.fixture(scope="module")
 def expected_metrics(response_streaming):
     if response_streaming:
@@ -200,7 +210,7 @@ def _test():
 @reset_core_stats_engine()
 @override_llm_token_callback_settings(llm_token_count_callback)
 def test_bedrock_chat_completion_with_token_count(set_trace_info, exercise_model, expected_events, expected_metrics):
-    @validate_custom_events(add_token_count_to_events(expected_events))
+    @validate_custom_events(add_token_counts_to_chat_events(add_token_count_streaming_events(expected_events)))
     # One summary event, one user message, and one response message from the assistant
     @validate_custom_event_count(count=3)
     @validate_transaction_metrics(
@@ -438,49 +448,50 @@ def _test():
     _test()
 
 
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-def test_bedrock_chat_completion_error_incorrect_access_key_with_token(
-    monkeypatch,
-    bedrock_server,
-    exercise_model,
-    set_trace_info,
-    expected_invalid_access_key_error_events,
-    expected_metrics,
-):
-    @validate_custom_events(add_token_count_to_events(expected_invalid_access_key_error_events))
-    @validate_error_trace_attributes(
-        _client_error_name,
-        exact_attrs={
-            "agent": {},
-            "intrinsic": {},
-            "user": {
-                "http.statusCode": 403,
-                "error.message": "The security token included in the request is invalid.",
-                "error.code": "UnrecognizedClientException",
-            },
-        },
-    )
-    @validate_transaction_metrics(
-        name="test_bedrock_chat_completion",
-        scoped_metrics=expected_metrics,
-        rollup_metrics=expected_metrics,
-        custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)],
-        background_task=True,
-    )
-    @background_task(name="test_bedrock_chat_completion")
-    def _test():
-        monkeypatch.setattr(bedrock_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY")
-
-        with pytest.raises(_client_error):  # not sure where this exception actually comes from
-            set_trace_info()
-            add_custom_attribute("llm.conversation_id", "my-awesome-id")
-            add_custom_attribute("llm.foo", "bar")
-            add_custom_attribute("non_llm_attr", "python-agent")
-
-            exercise_model(prompt="Invalid Token", temperature=0.7, max_tokens=100)
-
-    _test()
+#
+# @reset_core_stats_engine()
+# @override_llm_token_callback_settings(llm_token_count_callback)
+# def test_bedrock_chat_completion_error_incorrect_access_key_with_token(
+#     monkeypatch,
+#     bedrock_server,
+#     exercise_model,
+#     set_trace_info,
+#     expected_invalid_access_key_error_events,
+#     expected_metrics,
+# ):
+#     @validate_custom_events(add_token_count_to_events(expected_invalid_access_key_error_events))
+#     @validate_error_trace_attributes(
+#         _client_error_name,
+#         exact_attrs={
+#             "agent": {},
+#             "intrinsic": {},
+#             "user": {
+#                 "http.statusCode": 403,
+#                 "error.message": "The security token included in the request is invalid.",
+#                 "error.code": "UnrecognizedClientException",
+#             },
+#         },
+#     )
+#     @validate_transaction_metrics(
+#         name="test_bedrock_chat_completion",
+#         scoped_metrics=expected_metrics,
+#         rollup_metrics=expected_metrics,
+#         custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)],
+#         background_task=True,
+#     )
+#     @background_task(name="test_bedrock_chat_completion")
+#     def _test():
+#         monkeypatch.setattr(bedrock_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY")
+#
+#         with pytest.raises(_client_error):  # not sure where this exception actually comes from
+#             set_trace_info()
+#             add_custom_attribute("llm.conversation_id", "my-awesome-id")
+#             add_custom_attribute("llm.foo", "bar")
+#             add_custom_attribute("non_llm_attr", "python-agent")
+#
+#             exercise_model(prompt="Invalid Token", temperature=0.7, max_tokens=100)
+#
+#     _test()
 
 
 @reset_core_stats_engine()
@@ -762,55 +773,6 @@ def _test():
     _test()
 
 
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-def test_bedrock_chat_completion_error_streaming_exception_with_token_count(bedrock_server, set_trace_info):
-    """
-    Duplicate of test_bedrock_chat_completion_error_streaming_exception, but with token callback being set.
-
-    See the original test for a description of the error case.
-    """
-
-    @validate_custom_events(add_token_count_to_events(chat_completion_expected_streaming_error_events))
-    @validate_custom_event_count(count=2)
-    @validate_error_trace_attributes(
-        _event_stream_error_name,
-        exact_attrs={
-            "agent": {},
-            "intrinsic": {},
-            "user": {
-                "error.message": "Malformed input request, please reformat your input and try again.",
-                "error.code": "ValidationException",
-            },
-        },
-        forgone_params={"agent": (), "intrinsic": (), "user": ("http.statusCode")},
-    )
-    @validate_transaction_metrics(
-        name="test_bedrock_chat_completion",
-        scoped_metrics=[("Llm/completion/Bedrock/invoke_model_with_response_stream", 1)],
-        rollup_metrics=[("Llm/completion/Bedrock/invoke_model_with_response_stream", 1)],
-        custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)],
-        background_task=True,
-    )
-    @background_task(name="test_bedrock_chat_completion")
-    def _test():
-        with pytest.raises(_event_stream_error):
-            model = "amazon.titan-text-express-v1"
-            body = (chat_completion_payload_templates[model] % ("Streaming Exception", 0.7, 100)).encode("utf-8")
-
-            set_trace_info()
-            add_custom_attribute("llm.conversation_id", "my-awesome-id")
-            add_custom_attribute("llm.foo", "bar")
-            add_custom_attribute("non_llm_attr", "python-agent")
-
-            response = bedrock_server.invoke_model_with_response_stream(
-                body=body, modelId=model, accept="application/json", contentType="application/json"
-            )
-            list(response["body"])  # Iterate
-
-    _test()
-
-
 def test_bedrock_chat_completion_functions_marked_as_wrapped_for_sdk_compatibility(bedrock_server):
     assert bedrock_server._nr_wrapped
 
diff --git a/tests/external_botocore/test_bedrock_embeddings.py b/tests/external_botocore/test_bedrock_embeddings.py
index 417e24b2d9..de2cb201e7 100644
--- a/tests/external_botocore/test_bedrock_embeddings.py
+++ b/tests/external_botocore/test_bedrock_embeddings.py
@@ -28,7 +28,7 @@
 from conftest import BOTOCORE_VERSION
 from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes
 from testing_support.ml_testing_utils import (
-    add_token_count_to_events,
+    add_token_count_to_embedding_events,
     disabled_ai_monitoring_record_content_settings,
     disabled_ai_monitoring_settings,
     events_sans_content,
@@ -161,7 +161,7 @@ def _test():
 @reset_core_stats_engine()
 @override_llm_token_callback_settings(llm_token_count_callback)
 def test_bedrock_embedding_with_token_count(set_trace_info, exercise_model, expected_events):
-    @validate_custom_events(add_token_count_to_events(expected_events))
+    @validate_custom_events(add_token_count_to_embedding_events(expected_events))
     @validate_custom_event_count(count=1)
     @validate_transaction_metrics(
         name="test_bedrock_embedding",
@@ -286,45 +286,6 @@ def _test():
     _test()
 
 
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-def test_bedrock_embedding_error_incorrect_access_key_with_token_count(
-    monkeypatch, bedrock_server, exercise_model, set_trace_info, expected_invalid_access_key_error_events
-):
-    @validate_custom_events(add_token_count_to_events(expected_invalid_access_key_error_events))
-    @validate_error_trace_attributes(
-        _client_error_name,
-        exact_attrs={
-            "agent": {},
-            "intrinsic": {},
-            "user": {
-                "http.statusCode": 403,
-                "error.message": "The security token included in the request is invalid.",
-                "error.code": "UnrecognizedClientException",
-            },
-        },
-    )
-    @validate_transaction_metrics(
-        name="test_bedrock_embedding",
-        scoped_metrics=[("Llm/embedding/Bedrock/invoke_model", 1)],
-        rollup_metrics=[("Llm/embedding/Bedrock/invoke_model", 1)],
-        background_task=True,
-    )
-    @background_task(name="test_bedrock_embedding")
-    def _test():
-        monkeypatch.setattr(bedrock_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY")
-
-        with pytest.raises(_client_error):  # not sure where this exception actually comes from
-            set_trace_info()
-            add_custom_attribute("llm.conversation_id", "my-awesome-id")
-            add_custom_attribute("llm.foo", "bar")
-            add_custom_attribute("non_llm_attr", "python-agent")
-
-            exercise_model(prompt="Invalid Token")
-
-    _test()
-
-
 @reset_core_stats_engine()
 def test_bedrock_embedding_error_malformed_request_body(bedrock_server, set_trace_info):
     """
diff --git a/tests/external_botocore/test_chat_completion_converse.py b/tests/external_botocore/test_chat_completion_converse.py
index 96ead41dd7..2d38d6b4a4 100644
--- a/tests/external_botocore/test_chat_completion_converse.py
+++ b/tests/external_botocore/test_chat_completion_converse.py
@@ -17,7 +17,7 @@
 from conftest import BOTOCORE_VERSION
 from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes
 from testing_support.ml_testing_utils import (
-    add_token_count_to_events,
+    add_token_counts_to_chat_events,
     disabled_ai_monitoring_record_content_settings,
     disabled_ai_monitoring_settings,
     events_sans_content,
@@ -49,6 +49,9 @@
             "duration": None,  # Response time varies each test run
             "request.model": "anthropic.claude-3-sonnet-20240229-v1:0",
             "response.model": "anthropic.claude-3-sonnet-20240229-v1:0",
+            "response.usage.prompt_tokens": 26,
+            "response.usage.completion_tokens": 100,
+            "response.usage.total_tokens": 126,
             "request.temperature": 0.7,
             "request.max_tokens": 100,
             "response.choices.finish_reason": "max_tokens",
@@ -70,6 +73,7 @@
             "role": "system",
             "completion_id": None,
             "sequence": 0,
+            "token_count": 0,
             "response.model": "anthropic.claude-3-sonnet-20240229-v1:0",
             "vendor": "bedrock",
             "ingest_source": "Python",
@@ -88,6 +92,7 @@
             "role": "user",
             "completion_id": None,
             "sequence": 1,
+            "token_count": 0,
             "response.model": "anthropic.claude-3-sonnet-20240229-v1:0",
             "vendor": "bedrock",
             "ingest_source": "Python",
@@ -106,6 +111,7 @@
             "role": "assistant",
             "completion_id": None,
             "sequence": 2,
+            "token_count": 0,
             "response.model": "anthropic.claude-3-sonnet-20240229-v1:0",
             "vendor": "bedrock",
             "ingest_source": "Python",
@@ -185,7 +191,7 @@ def _test():
 @reset_core_stats_engine()
 @override_llm_token_callback_settings(llm_token_count_callback)
 def test_bedrock_chat_completion_with_token_count(set_trace_info, exercise_model):
-    @validate_custom_events(add_token_count_to_events(chat_completion_expected_events))
+    @validate_custom_events(add_token_counts_to_chat_events(chat_completion_expected_events))
     # One summary event, one user message, and one response message from the assistant
     @validate_custom_event_count(count=4)
     @validate_transaction_metrics(
@@ -468,57 +474,3 @@ def _test():
             assert response
 
     _test()
-
-
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-def test_bedrock_chat_completion_error_incorrect_access_key_with_token_count(
-    monkeypatch, bedrock_converse_server, exercise_model, set_trace_info
-):
-    """
-    A request is made to the server with invalid credentials. botocore will reach out to the server and receive an
-    UnrecognizedClientException as a response. Information from the request will be parsed and reported in customer
-    events. The error response can also be parsed, and will be included as attributes on the recorded exception.
-    """
-
-    @validate_custom_events(add_token_count_to_events(chat_completion_invalid_access_key_error_events))
-    @validate_error_trace_attributes(
-        _client_error_name,
-        exact_attrs={
-            "agent": {},
-            "intrinsic": {},
-            "user": {
-                "http.statusCode": 403,
-                "error.message": "The security token included in the request is invalid.",
-                "error.code": "UnrecognizedClientException",
-            },
-        },
-    )
-    @validate_transaction_metrics(
-        name="test_bedrock_chat_completion_incorrect_access_key_with_token_count",
-        scoped_metrics=[("Llm/completion/Bedrock/converse", 1)],
-        rollup_metrics=[("Llm/completion/Bedrock/converse", 1)],
-        custom_metrics=[(f"Supportability/Python/ML/Bedrock/{BOTOCORE_VERSION}", 1)],
-        background_task=True,
-    )
-    @background_task(name="test_bedrock_chat_completion_incorrect_access_key_with_token_count")
-    def _test():
-        monkeypatch.setattr(bedrock_converse_server._request_signer._credentials, "access_key", "INVALID-ACCESS-KEY")
-
-        with pytest.raises(_client_error):
-            set_trace_info()
-            add_custom_attribute("llm.conversation_id", "my-awesome-id")
-            add_custom_attribute("llm.foo", "bar")
-            add_custom_attribute("non_llm_attr", "python-agent")
-
-            message = [{"role": "user", "content": [{"text": "Invalid Token"}]}]
-
-            response = bedrock_converse_server.converse(
-                modelId="anthropic.claude-3-sonnet-20240229-v1:0",
-                messages=message,
-                inferenceConfig={"temperature": 0.7, "maxTokens": 100},
-            )
-
-            assert response
-
-    _test()
diff --git a/tests/mlmodel_openai/test_embeddings_error.py b/tests/mlmodel_openai/test_embeddings_error.py
index a8e46bf23a..f80e6ff41d 100644
--- a/tests/mlmodel_openai/test_embeddings_error.py
+++ b/tests/mlmodel_openai/test_embeddings_error.py
@@ -14,12 +14,10 @@
 
 import openai
 import pytest
-from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine
+from testing_support.fixtures import dt_enabled, reset_core_stats_engine
 from testing_support.ml_testing_utils import (
-    add_token_count_to_events,
     disabled_ai_monitoring_record_content_settings,
     events_sans_content,
-    llm_token_count_callback,
     set_trace_info,
 )
 from testing_support.validators.validate_custom_event import validate_custom_event_count
@@ -128,35 +126,6 @@ def test_embeddings_invalid_request_error_no_model_no_content(set_trace_info):
 ]
 
 
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
-    callable_name(openai.InvalidRequestError),
-    exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404}},
-)
-@validate_span_events(
-    exact_agents={
-        "error.message": "The model `does-not-exist` does not exist"
-        # "http.statusCode": 404,
-    }
-)
-@validate_transaction_metrics(
-    name="test_embeddings_error:test_embeddings_invalid_request_error_invalid_model_with_token_count",
-    scoped_metrics=[("Llm/embedding/OpenAI/create", 1)],
-    rollup_metrics=[("Llm/embedding/OpenAI/create", 1)],
-    custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)],
-    background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(invalid_model_events))
-@validate_custom_event_count(count=1)
-@background_task()
-def test_embeddings_invalid_request_error_invalid_model_with_token_count(set_trace_info):
-    set_trace_info()
-    with pytest.raises(openai.InvalidRequestError):
-        openai.Embedding.create(input="Model does not exist.", model="does-not-exist")
-
-
 # Invalid model provided
 @dt_enabled
 @reset_core_stats_engine()
@@ -348,30 +317,6 @@ def test_embeddings_invalid_request_error_no_model_async_no_content(loop, set_tr
         )
 
 
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
-    callable_name(openai.InvalidRequestError),
-    exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404}},
-)
-@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"})
-@validate_transaction_metrics(
-    name="test_embeddings_error:test_embeddings_invalid_request_error_invalid_model_with_token_count_async",
-    scoped_metrics=[("Llm/embedding/OpenAI/acreate", 1)],
-    rollup_metrics=[("Llm/embedding/OpenAI/acreate", 1)],
-    custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)],
-    background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(invalid_model_events))
-@validate_custom_event_count(count=1)
-@background_task()
-def test_embeddings_invalid_request_error_invalid_model_with_token_count_async(set_trace_info, loop):
-    set_trace_info()
-    with pytest.raises(openai.InvalidRequestError):
-        loop.run_until_complete(openai.Embedding.acreate(input="Model does not exist.", model="does-not-exist"))
-
-
 # Invalid model provided
 @dt_enabled
 @reset_core_stats_engine()