diff --git a/newrelic/hooks/mlmodel_openai.py b/newrelic/hooks/mlmodel_openai.py
index 564069ef75..0c626683e5 100644
--- a/newrelic/hooks/mlmodel_openai.py
+++ b/newrelic/hooks/mlmodel_openai.py
@@ -133,11 +133,11 @@ def create_chat_completion_message_event(
     span_id,
     trace_id,
     response_model,
-    request_model,
     response_id,
     request_id,
     llm_metadata,
     output_message_list,
+    all_token_counts,
     request_timestamp=None,
 ):
     settings = transaction.settings if transaction.settings is not None else global_settings()
@@ -158,11 +158,6 @@ def create_chat_completion_message_event(
             "request_id": request_id,
             "span_id": span_id,
             "trace_id": trace_id,
-            "token_count": (
-                settings.ai_monitoring.llm_token_count_callback(request_model, message_content)
-                if settings.ai_monitoring.llm_token_count_callback
-                else None
-            ),
             "role": message.get("role"),
             "completion_id": chat_completion_id,
             "sequence": index,
@@ -170,8 +165,13 @@ def create_chat_completion_message_event(
             "vendor": "openai",
             "ingest_source": "Python",
         }
+
         if settings.ai_monitoring.record_content.enabled and message_content:
             chat_completion_input_message_dict["content"] = message_content
+
+        if all_token_counts:
+            chat_completion_input_message_dict["token_count"] = 0
+
         if request_timestamp:
             chat_completion_input_message_dict["timestamp"] = request_timestamp
 
@@ -199,11 +199,6 @@ def create_chat_completion_message_event(
                 "request_id": request_id,
                 "span_id": span_id,
                 "trace_id": trace_id,
-                "token_count": (
-                    settings.ai_monitoring.llm_token_count_callback(response_model, message_content)
-                    if settings.ai_monitoring.llm_token_count_callback
-                    else None
-                ),
                 "role": message.get("role"),
                 "completion_id": chat_completion_id,
                 "sequence": index,
@@ -216,6 +211,9 @@ def create_chat_completion_message_event(
             if settings.ai_monitoring.record_content.enabled and message_content:
                 chat_completion_output_message_dict["content"] = message_content
 
+            if all_token_counts:
+                chat_completion_output_message_dict["token_count"] = 0
+
             chat_completion_output_message_dict.update(llm_metadata)
 
             transaction.record_custom_event("LlmChatCompletionMessage", chat_completion_output_message_dict)
@@ -286,15 +284,18 @@ def _record_embedding_success(transaction, embedding_id, linking_metadata, kwarg
             else getattr(attribute_response, "organization", None)
         )
 
+        response_total_tokens = attribute_response.get("usage", {}).get("total_tokens") if response else None
+
+        total_tokens = (
+            settings.ai_monitoring.llm_token_count_callback(response_model, input_)
+            if settings.ai_monitoring.llm_token_count_callback and input_
+            else response_total_tokens
+        )
+
         full_embedding_response_dict = {
             "id": embedding_id,
             "span_id": span_id,
             "trace_id": trace_id,
-            "token_count": (
-                settings.ai_monitoring.llm_token_count_callback(response_model, input_)
-                if settings.ai_monitoring.llm_token_count_callback
-                else None
-            ),
             "request.model": kwargs.get("model") or kwargs.get("engine"),
             "request_id": request_id,
             "duration": ft.duration * 1000,
@@ -319,6 +320,7 @@ def _record_embedding_success(transaction, embedding_id, linking_metadata, kwarg
             "response.headers.ratelimitRemainingRequests": check_rate_limit_header(
                 response_headers, "x-ratelimit-remaining-requests", True
             ),
+            "response.usage.total_tokens": total_tokens,
             "vendor": "openai",
             "ingest_source": "Python",
         }
@@ -489,6 +491,7 @@ def _handle_completion_success(
 def _record_completion_success(
     transaction, linking_metadata, completion_id, kwargs, ft, response_headers, response, request_timestamp=None
 ):
+    settings = transaction.settings if transaction.settings is not None else global_settings()
     span_id = linking_metadata.get("span.id")
     trace_id = linking_metadata.get("trace.id")
 
@@ -496,6 +499,7 @@ def _record_completion_success(
         if response:
             response_model = response.get("model")
             response_id = response.get("id")
+            token_usage = response.get("usage") or {}
             output_message_list = []
             finish_reason = None
             choices = response.get("choices") or []
@@ -509,6 +513,7 @@ def _record_completion_success(
         else:
             response_model = kwargs.get("response.model")
             response_id = kwargs.get("id")
+            token_usage = kwargs.get("response.usage") or {}
             finish_reason = kwargs.get("finish_reason")
             content = kwargs.get("content")
             # Tool-call responses may carry an empty content string; in that case the
@@ -519,12 +524,39 @@ def _record_completion_success(
                 output_message_list = [{"content": content, "role": kwargs.get("role")}]
             else:
                 output_message_list = []
+
         request_model = kwargs.get("model") or kwargs.get("engine")
 
-        request_id = response_headers.get("x-request-id")
-        organization = response_headers.get("openai-organization") or getattr(response, "organization", None)
         messages = kwargs.get("messages") or [{"content": kwargs.get("prompt"), "role": "user"}]
         input_message_list = list(messages)
+
+        # Token counts default to those reported in the response object if available,
+        # but the user registered callback below may override them.
+        response_prompt_tokens = token_usage.get("prompt_tokens")
+        response_completion_tokens = token_usage.get("completion_tokens")
+        response_total_tokens = token_usage.get("total_tokens")
+
+        # If the user has registered a callback to compute token counts it should always be preferred.
+        token_count_callback = settings.ai_monitoring.llm_token_count_callback
+        if token_count_callback:
+            input_message_content = " ".join(content for msg in input_message_list if (content := msg.get("content")))
+            if input_message_content:
+                response_prompt_tokens = token_count_callback(request_model, input_message_content)
+            output_message_content = " ".join(content for msg in output_message_list if (content := msg.get("content")))
+            if output_message_content:
+                response_completion_tokens = token_count_callback(response_model, output_message_content)
+
+        # Prefer the sum of individual counts as the total whenever both are available.
+        # This ensures consistency in the event that the token counting callback has reported
+        # different values for prompt or completion tokens.
+        if response_prompt_tokens and response_completion_tokens:
+            response_total_tokens = response_prompt_tokens + response_completion_tokens
+
+        all_token_counts = bool(response_prompt_tokens and response_completion_tokens and response_total_tokens)
+
+        request_id = response_headers.get("x-request-id")
+        organization = response_headers.get("openai-organization") or getattr(response, "organization", None)
+
         full_chat_completion_summary_dict = {
             "id": completion_id,
             "span_id": span_id,
@@ -571,6 +603,12 @@ def _record_completion_success(
             "response.number_of_messages": len(input_message_list) + len(output_message_list),
             "timestamp": request_timestamp,
         }
+
+        if all_token_counts:
+            full_chat_completion_summary_dict["response.usage.prompt_tokens"] = response_prompt_tokens
+            full_chat_completion_summary_dict["response.usage.completion_tokens"] = response_completion_tokens
+            full_chat_completion_summary_dict["response.usage.total_tokens"] = response_total_tokens
+
         llm_metadata = _get_llm_attributes(transaction)
 
         if "time_to_first_token" in kwargs:
@@ -586,11 +624,11 @@ def _record_completion_success(
             span_id,
             trace_id,
             response_model,
-            request_model,
             response_id,
             request_id,
             llm_metadata,
             output_message_list,
+            all_token_counts,
             request_timestamp,
         )
     except Exception:
@@ -602,6 +640,7 @@ def _record_completion_error(transaction, linking_metadata, completion_id, kwarg
     trace_id = linking_metadata.get("trace.id")
     request_message_list = kwargs.get("messages", None) or []
     notice_error_attributes = {}
+
     try:
         if OPENAI_V1:
             response = getattr(exc, "response", None)
@@ -667,6 +706,7 @@ def _record_completion_error(transaction, linking_metadata, completion_id, kwarg
         output_message_list = []
         if "content" in kwargs:
             output_message_list = [{"content": kwargs.get("content"), "role": kwargs.get("role")}]
+
         create_chat_completion_message_event(
             transaction,
             request_message_list,
@@ -674,11 +714,12 @@ def _record_completion_error(transaction, linking_metadata, completion_id, kwarg
             span_id,
             trace_id,
             kwargs.get("response.model"),
-            request_model,
             response_id,
             request_id,
             llm_metadata,
             output_message_list,
+            # We do not record token counts in error cases, so set all_token_counts to True so the pipeline tokenizer does not run
+            True,
             request_timestamp,
         )
     except Exception:
@@ -787,6 +828,7 @@ def _record_stream_chunk(self, return_val):
             self._nr_openai_attrs["response.model"] = return_val.get("model")
             self._nr_openai_attrs["id"] = return_val.get("id")
             self._nr_openai_attrs["response.organization"] = return_val.get("organization")
+            self._nr_openai_attrs["response.usage"] = return_val.get("usage")
             if choices:
                 delta = choices[0].get("delta") or {}
                 if delta:
diff --git a/tests/mlmodel_langchain/_mock_external_openai_server.py b/tests/mlmodel_langchain/_mock_external_openai_server.py
index 74740ba520..fd8cc1231b 100644
--- a/tests/mlmodel_langchain/_mock_external_openai_server.py
+++ b/tests/mlmodel_langchain/_mock_external_openai_server.py
@@ -33,7 +33,7 @@
     "system: You are a world class algorithm for extracting information in structured formats. | user: Use the given format to extract information from the following input: Hello, world | user: Tip: Make sure to answer in the correct format": [
         {
             "content-type": "text/event-stream; charset=utf-8",
-            "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
+            "openai-organization": "nr-test-org",
             "openai-processing-ms": "440",
             "openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
             "openai-version": "2020-10-01",
@@ -125,7 +125,7 @@
     'user: Use a tool to add an exclamation to the word "Hello"': [
         {
             "content-type": "text/event-stream; charset=utf-8",
-            "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
+            "openai-organization": "nr-test-org",
             "openai-processing-ms": "134",
             "openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
             "openai-version": "2020-10-01",
@@ -334,7 +334,7 @@
     'system: You are a text manipulation algorithm. | user: Use a tool to add an exclamation to the word "Hello"': [
         {
             "content-type": "application/json",
-            "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
+            "openai-organization": "nr-test-org",
             "openai-processing-ms": "324",
             "openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
             "openai-version": "2020-10-01",
@@ -391,7 +391,7 @@
     'system: You are a text manipulation algorithm. | user: Use a tool to add an exclamation to the word "Hello" | assistant: None | tool: Hello!': [
         {
             "content-type": "application/json",
-            "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
+            "openai-organization": "nr-test-org",
             "openai-processing-ms": "751",
             "openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
             "openai-version": "2020-10-01",
@@ -441,7 +441,7 @@
     'system: You are a text manipulation algorithm. | user: Use a tool to add an exclamation to the word "exc"': [
         {
             "content-type": "application/json",
-            "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
+            "openai-organization": "nr-test-org",
             "openai-processing-ms": "767",
             "openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
             "openai-version": "2020-10-01",
@@ -498,7 +498,7 @@
     "system: You are a helpful assistant who generates a random first name. A user will pass in a first letter, and you should generate a name that starts with that first letter. | user: M": [
         {
             "content-type": "application/json",
-            "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
+            "openai-organization": "nr-test-org",
             "openai-processing-ms": "236",
             "openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
             "openai-version": "2020-10-01",
@@ -543,7 +543,7 @@
     "system: You are a helpful assistant who generates comma separated lists.\n    A user will pass in a category, and you should generate 5 objects in that category in a comma separated list.\n    ONLY return a comma separated list, and nothing more. | user: colors": [
         {
             "content-type": "application/json",
-            "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
+            "openai-organization": "nr-test-org",
             "openai-processing-ms": "289",
             "openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
             "openai-version": "2020-10-01",
@@ -593,7 +593,7 @@
     "system: You are a world class algorithm for extracting information in structured formats. | user: Use the given format to extract information from the following input: Sally is 13 | user: Tip: Make sure to answer in the correct format": [
         {
             "content-type": "application/json",
-            "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
+            "openai-organization": "nr-test-org",
             "openai-processing-ms": "201",
             "openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
             "openai-version": "2020-10-01",
@@ -656,7 +656,7 @@
     "system: You are a generator of quiz questions for a seminar. Use the following pieces of retrieved context to generate 5 multiple choice questions (A,B,C,D) on the subject matter. Use a three sentence maximum and keep the answer concise. Render the output as HTML\n\nWhat is 2 + 4? | user: math": [
         {
             "content-type": "application/json",
-            "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
+            "openai-organization": "nr-test-org",
             "openai-processing-ms": "2029",
             "openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
             "openai-version": "2020-10-01",
@@ -708,7 +708,7 @@
         {
             "content-type": "application/json",
             "openai-model": "text-embedding-ada-002-v2",
-            "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
+            "openai-organization": "nr-test-org",
             "openai-processing-ms": "42",
             "openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
             "openai-version": "2020-10-01",
@@ -738,7 +738,7 @@
         {
             "content-type": "application/json",
             "openai-model": "text-embedding-ada-002-v2",
-            "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
+            "openai-organization": "nr-test-org",
             "openai-processing-ms": "82",
             "openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
             "openai-version": "2020-10-01",
@@ -768,7 +768,7 @@
         {
             "content-type": "application/json",
             "openai-model": "text-embedding-ada-002-v2",
-            "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
+            "openai-organization": "nr-test-org",
             "openai-processing-ms": "158",
             "openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
             "openai-version": "2020-10-01",
@@ -798,7 +798,7 @@
         {
             "content-type": "application/json",
             "openai-model": "text-embedding-ada-002-v2",
-            "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
+            "openai-organization": "nr-test-org",
             "openai-processing-ms": "116",
             "openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
             "openai-version": "2020-10-01",
@@ -827,7 +827,7 @@
     'user: Use a tool to add an exclamation to the word "Hello"': [
         {
             "content-type": "application/json",
-            "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
+            "openai-organization": "nr-test-org",
             "openai-processing-ms": "238",
             "openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
             "openai-version": "2020-10-01",
diff --git a/tests/mlmodel_langchain/test_chain.py b/tests/mlmodel_langchain/test_chain.py
index 89757675d7..156a2c8d4b 100644
--- a/tests/mlmodel_langchain/test_chain.py
+++ b/tests/mlmodel_langchain/test_chain.py
@@ -377,7 +377,7 @@
             "request_id": None,
             "duration": None,
             "response.model": "text-embedding-ada-002-v2",
-            "response.organization": "user-rk8wq9voijy9sejrncvgi0iw",
+            "response.organization": "nr-test-org",
             "response.headers.llmVersion": "2020-10-01",
             "response.headers.ratelimitLimitRequests": 10000,
             "response.headers.ratelimitLimitTokens": 10000000,
@@ -385,6 +385,7 @@
             "response.headers.ratelimitRemainingTokens": 9999992,
             "response.headers.ratelimitResetRequests": "6ms",
             "response.headers.ratelimitResetTokens": "0s",
+            "response.usage.total_tokens": 8,
             "vendor": "openai",
             "ingest_source": "Python",
             "input": "[[3923, 374, 220, 17, 489, 220, 19, 30]]",
@@ -400,7 +401,7 @@
             "request_id": None,
             "duration": None,
             "response.model": "text-embedding-ada-002-v2",
-            "response.organization": "user-rk8wq9voijy9sejrncvgi0iw",
+            "response.organization": "nr-test-org",
             "response.headers.llmVersion": "2020-10-01",
             "response.headers.ratelimitLimitRequests": 10000,
             "response.headers.ratelimitLimitTokens": 10000000,
@@ -408,6 +409,7 @@
             "response.headers.ratelimitRemainingTokens": 9999998,
             "response.headers.ratelimitResetRequests": "6ms",
             "response.headers.ratelimitResetTokens": "0s",
+            "response.usage.total_tokens": 1,
             "vendor": "openai",
             "ingest_source": "Python",
             "input": "[[10590]]",
@@ -471,7 +473,7 @@
             "request_id": None,
             "duration": None,
             "response.model": "gpt-3.5-turbo-0125",
-            "response.organization": "user-rk8wq9voijy9sejrncvgi0iw",
+            "response.organization": "nr-test-org",
             "response.choices.finish_reason": "stop",
             "response.headers.llmVersion": "2020-10-01",
             "response.headers.ratelimitLimitRequests": 10000,
@@ -480,6 +482,9 @@
             "response.headers.ratelimitRemainingTokens": 49999927,
             "response.headers.ratelimitResetRequests": "6ms",
             "response.headers.ratelimitResetTokens": "0s",
+            "response.usage.prompt_tokens": 73,
+            "response.usage.completion_tokens": 337,
+            "response.usage.total_tokens": 410,
             "response.number_of_messages": 3,
         },
     ],
@@ -496,6 +501,7 @@
             "sequence": 0,
             "response.model": "gpt-3.5-turbo-0125",
             "vendor": "openai",
+            "token_count": 0,
             "ingest_source": "Python",
             "content": "You are a generator of quiz questions for a seminar. Use the following pieces of retrieved context to generate 5 multiple choice questions (A,B,C,D) on the subject matter. Use a three sentence maximum and keep the answer concise. Render the output as HTML\n\nWhat is 2 + 4?",
         },
@@ -513,6 +519,7 @@
             "sequence": 1,
             "response.model": "gpt-3.5-turbo-0125",
             "vendor": "openai",
+            "token_count": 0,
             "ingest_source": "Python",
             "content": "math",
         },
@@ -529,6 +536,7 @@
             "sequence": 2,
             "response.model": "gpt-3.5-turbo-0125",
             "vendor": "openai",
+            "token_count": 0,
             "ingest_source": "Python",
             "is_response": True,
             "content": "```html\n<!DOCTYPE html>\n<html>\n<head>\n  <title>Math Quiz</title>\n</head>\n<body>\n  <h2>Math Quiz Questions</h2>\n  <ol>\n    <li>What is the result of 5 + 3?</li>\n      <ul>\n        <li>A) 7</li>\n        <li>B) 8</li>\n        <li>C) 9</li>\n        <li>D) 10</li>\n      </ul>\n    <li>What is the product of 6 x 7?</li>\n      <ul>\n        <li>A) 36</li>\n        <li>B) 42</li>\n        <li>C) 48</li>\n        <li>D) 56</li>\n      </ul>\n    <li>What is the square root of 64?</li>\n      <ul>\n        <li>A) 6</li>\n        <li>B) 7</li>\n        <li>C) 8</li>\n        <li>D) 9</li>\n      </ul>\n    <li>What is the result of 12 / 4?</li>\n      <ul>\n        <li>A) 2</li>\n        <li>B) 3</li>\n        <li>C) 4</li>\n        <li>D) 5</li>\n      </ul>\n    <li>What is the sum of 15 + 9?</li>\n      <ul>\n        <li>A) 22</li>\n        <li>B) 23</li>\n        <li>C) 24</li>\n        <li>D) 25</li>\n      </ul>\n  </ol>\n</body>\n</html>\n```",
diff --git a/tests/mlmodel_langchain/test_state_graph.py b/tests/mlmodel_langchain/test_state_graph.py
index a47ad5f3d6..799124bb0a 100644
--- a/tests/mlmodel_langchain/test_state_graph.py
+++ b/tests/mlmodel_langchain/test_state_graph.py
@@ -43,7 +43,10 @@
             "response.headers.ratelimitResetTokens": "0s",
             "response.model": "gpt-3.5-turbo-0125",
             "response.number_of_messages": 2,
-            "response.organization": "user-rk8wq9voijy9sejrncvgi0iw",
+            "response.organization": "nr-test-org",
+            "response.usage.completion_tokens": 2,
+            "response.usage.prompt_tokens": 21,
+            "response.usage.total_tokens": 23,
             "span_id": None,
             "timestamp": None,
             "trace_id": None,
@@ -63,6 +66,7 @@
             "sequence": 0,
             "span_id": None,
             "timestamp": None,
+            "token_count": 0,
             "trace_id": None,
             "vendor": "openai",
         },
@@ -80,6 +84,7 @@
             "role": "assistant",
             "sequence": 1,
             "span_id": None,
+            "token_count": 0,
             "trace_id": None,
             "vendor": "openai",
         },
@@ -106,7 +111,12 @@
             "response.headers.ratelimitResetTokens": "0s",
             "response.model": "gpt-3.5-turbo-0125",
             "response.number_of_messages": 2,
-            "response.organization": "user-rk8wq9voijy9sejrncvgi0iw",
+            "response.organization": "nr-test-org",
+            # langchain's ChatOpenAI.stream() passes stream_options={"include_usage": True}
+            # by default, so the final usage chunk is captured and these are populated.
+            "response.usage.completion_tokens": 2,
+            "response.usage.prompt_tokens": 21,
+            "response.usage.total_tokens": 23,
             "span_id": None,
             "time_to_first_token": None,
             "timestamp": None,
@@ -127,6 +137,7 @@
             "sequence": 0,
             "span_id": None,
             "timestamp": None,
+            "token_count": 0,
             "trace_id": None,
             "vendor": "openai",
         },
@@ -144,6 +155,7 @@
             "role": "assistant",
             "sequence": 1,
             "span_id": None,
+            "token_count": 0,
             "trace_id": None,
             "vendor": "openai",
         },
diff --git a/tests/mlmodel_openai/_mock_external_openai_server.py b/tests/mlmodel_openai/_mock_external_openai_server.py
index e218b4939a..73de8e202c 100644
--- a/tests/mlmodel_openai/_mock_external_openai_server.py
+++ b/tests/mlmodel_openai/_mock_external_openai_server.py
@@ -30,7 +30,7 @@
 #    created by an external call.
 # 3) This app runs on a separate thread meaning it won't block the test app.
 
-STREAMED_RESPONSES = {
+STREAMED_RESPONSES_V0 = {
     "Stream parsing error.": [
         {
             "Content-Type": "text/event-stream",
@@ -506,7 +506,156 @@
         ],
     ],
 }
-RESPONSES = {
+
+# Streaming responses returned when the request includes `stream_options={"include_usage": True}`.
+# OpenAI emits one extra trailing chunk with `choices: []` and a populated `usage` block before
+# sending [DONE].
+STREAMED_RESPONSES_V1_WITH_USAGE = {
+    "You are a scientist.": [
+        {
+            "content-type": "text/event-stream; charset=utf-8",
+            "openai-organization": "nr-test-org",
+            "openai-processing-ms": "334",
+            "openai-project": "proj_id",
+            "openai-version": "2020-10-01",
+            "x-ratelimit-limit-requests": "15000",
+            "x-ratelimit-limit-tokens": "40000",
+            "x-ratelimit-remaining-requests": "14999",
+            "x-ratelimit-remaining-tokens": "39999978",
+            "x-ratelimit-reset-requests": "4ms",
+            "x-ratelimit-reset-tokens": "0s",
+            "x-request-id": "req_f821c73df45f4e30821a81a2d751fe64",
+        },
+        200,
+        [
+            {
+                "id": "chatcmpl-CocmvmDih6DGKIgPUbrzKFxGnMyco",
+                "object": "chat.completion.chunk",
+                "created": 1779406074,
+                "model": "gpt-5.1-2025-11-13",
+                "service_tier": "default",
+                "system_fingerprint": None,
+                "choices": [
+                    {"index": 0, "delta": {"role": "assistant", "content": "", "refusal": None}, "finish_reason": None}
+                ],
+                "usage": None,
+                "obfuscation": "n8bcfdX5",
+            },
+            {
+                "id": "chatcmpl-CocmvmDih6DGKIgPUbrzKFxGnMyco",
+                "object": "chat.completion.chunk",
+                "created": 1779406074,
+                "model": "gpt-5.1-2025-11-13",
+                "service_tier": "default",
+                "system_fingerprint": None,
+                "choices": [{"index": 0, "delta": {"content": "212"}, "finish_reason": None}],
+                "usage": None,
+                "obfuscation": "t2ASUcp",
+            },
+            {
+                "id": "chatcmpl-CocmvmDih6DGKIgPUbrzKFxGnMyco",
+                "object": "chat.completion.chunk",
+                "created": 1779406074,
+                "model": "gpt-5.1-2025-11-13",
+                "service_tier": "default",
+                "system_fingerprint": None,
+                "choices": [{"index": 0, "delta": {"content": "\u00b0F"}, "finish_reason": None}],
+                "usage": None,
+                "obfuscation": "QehtIgXV",
+            },
+            {
+                "id": "chatcmpl-CocmvmDih6DGKIgPUbrzKFxGnMyco",
+                "object": "chat.completion.chunk",
+                "created": 1779406074,
+                "model": "gpt-5.1-2025-11-13",
+                "service_tier": "default",
+                "system_fingerprint": None,
+                "choices": [{"index": 0, "delta": {"content": " is"}, "finish_reason": None}],
+                "usage": None,
+                "obfuscation": "lEH0ats",
+            },
+            {
+                "id": "chatcmpl-CocmvmDih6DGKIgPUbrzKFxGnMyco",
+                "object": "chat.completion.chunk",
+                "created": 1779406074,
+                "model": "gpt-5.1-2025-11-13",
+                "service_tier": "default",
+                "system_fingerprint": None,
+                "choices": [{"index": 0, "delta": {"content": " "}, "finish_reason": None}],
+                "usage": None,
+                "obfuscation": "SzPHsL8tM",
+            },
+            {
+                "id": "chatcmpl-CocmvmDih6DGKIgPUbrzKFxGnMyco",
+                "object": "chat.completion.chunk",
+                "created": 1779406074,
+                "model": "gpt-5.1-2025-11-13",
+                "service_tier": "default",
+                "system_fingerprint": None,
+                "choices": [{"index": 0, "delta": {"content": "100"}, "finish_reason": None}],
+                "usage": None,
+                "obfuscation": "Z0vwQZq",
+            },
+            {
+                "id": "chatcmpl-CocmvmDih6DGKIgPUbrzKFxGnMyco",
+                "object": "chat.completion.chunk",
+                "created": 1779406074,
+                "model": "gpt-5.1-2025-11-13",
+                "service_tier": "default",
+                "system_fingerprint": None,
+                "choices": [{"index": 0, "delta": {"content": "\u00b0C"}, "finish_reason": None}],
+                "usage": None,
+                "obfuscation": "HYe0zapB",
+            },
+            {
+                "id": "chatcmpl-CocmvmDih6DGKIgPUbrzKFxGnMyco",
+                "object": "chat.completion.chunk",
+                "created": 1779406074,
+                "model": "gpt-5.1-2025-11-13",
+                "service_tier": "default",
+                "system_fingerprint": None,
+                "choices": [{"index": 0, "delta": {"content": "."}, "finish_reason": None}],
+                "usage": None,
+                "obfuscation": "0XyvpKQ0L",
+            },
+            {
+                "id": "chatcmpl-CocmvmDih6DGKIgPUbrzKFxGnMyco",
+                "object": "chat.completion.chunk",
+                "created": 1779406074,
+                "model": "gpt-5.1-2025-11-13",
+                "service_tier": "default",
+                "system_fingerprint": None,
+                "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
+                "usage": None,
+                "obfuscation": "EE9z",
+            },
+            {
+                "id": "chatcmpl-CocmvmDih6DGKIgPUbrzKFxGnMyco",
+                "object": "chat.completion.chunk",
+                "created": 1779406074,
+                "model": "gpt-5.1-2025-11-13",
+                "service_tier": "default",
+                "system_fingerprint": None,
+                "choices": [],
+                "usage": {
+                    "prompt_tokens": 25,
+                    "completion_tokens": 16,
+                    "total_tokens": 41,
+                    "prompt_tokens_details": {"cached_tokens": 0, "audio_tokens": 0},
+                    "completion_tokens_details": {
+                        "reasoning_tokens": 0,
+                        "audio_tokens": 0,
+                        "accepted_prediction_tokens": 0,
+                        "rejected_prediction_tokens": 0,
+                    },
+                },
+                "obfuscation": "kV6540YmG",
+            },
+        ],
+    ]
+}
+
+RESPONSES_V0 = {
     "Invalid API key.": (
         {"Content-Type": "application/json; charset=utf-8", "x-request-id": "4f8f61a7d0401e42a6760ea2ca2049f6"},
         401,
@@ -678,7 +827,10 @@ def simple_get(openai_version, extract_shortened_prompt):
     def _simple_get(self):
         content_len = int(self.headers.get("content-length"))
         content = json.loads(self.rfile.read(content_len).decode("utf-8"))
-        stream = content.get("stream", False)
+        is_openai_v0 = openai_version < (1, 0)
+        is_streaming = content.get("stream", False)
+        # Streaming responses include a final usage chunk only when stream_options.include_usage is True.
+        include_usage = bool(is_streaming and (content.get("stream_options") or {}).get("include_usage"))
         prompt = extract_shortened_prompt(content)
         if not prompt:
             self.send_response(500)
@@ -688,14 +840,18 @@ def _simple_get(self):
 
         headers, response = ({}, "")
 
-        if openai_version < (1, 0):
-            mocked_responses = RESPONSES
-            if stream:
-                mocked_responses = STREAMED_RESPONSES
+        if is_openai_v0:
+            if is_streaming:
+                mocked_responses = STREAMED_RESPONSES_V0
+            else:
+                mocked_responses = RESPONSES_V0
         else:
-            mocked_responses = RESPONSES_V1
-            if stream:
+            if is_streaming and include_usage:
+                mocked_responses = STREAMED_RESPONSES_V1_WITH_USAGE
+            elif is_streaming:
                 mocked_responses = STREAMED_RESPONSES_V1
+            else:
+                mocked_responses = RESPONSES_V1
 
         for k, v in mocked_responses.items():
             if prompt.startswith(k):
@@ -704,7 +860,7 @@ def _simple_get(self):
         else:  # If no matches found
             self.send_response(500)
             self.end_headers()
-            self.wfile.write(f"Unknown Prompt ({'Streaming' if stream else 'Non-Streaming'}):\n{prompt}".encode())
+            self.wfile.write(f"Unknown Prompt ({'Streaming' if is_streaming else 'Non-Streaming'}):\n{prompt}".encode())
             return
 
         # Send response code
@@ -716,7 +872,7 @@ def _simple_get(self):
         self.end_headers()
 
         # Send response body
-        if stream and status_code < 400:
+        if is_streaming and status_code < 400:
             for resp in response:
                 data = json.dumps(resp).encode("utf-8")
                 if prompt == "Stream parsing error.":
diff --git a/tests/mlmodel_openai/test_chat_completion.py b/tests/mlmodel_openai/test_chat_completion.py
index fc7f7f3852..243d3d7f18 100644
--- a/tests/mlmodel_openai/test_chat_completion.py
+++ b/tests/mlmodel_openai/test_chat_completion.py
@@ -15,7 +15,7 @@
 import openai
 from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes
 from testing_support.ml_testing_utils import (
-    add_token_count_to_events,
+    add_token_counts_to_chat_events,
     disabled_ai_monitoring_record_content_settings,
     disabled_ai_monitoring_settings,
     disabled_ai_monitoring_streaming_settings,
@@ -56,6 +56,9 @@
             "response.organization": "new-relic-nkmd8b",
             "request.temperature": 0.7,
             "request.max_tokens": 100,
+            "response.usage.completion_tokens": 11,
+            "response.usage.total_tokens": 64,
+            "response.usage.prompt_tokens": 53,
             "response.choices.finish_reason": "stop",
             "response.headers.llmVersion": "2020-10-01",
             "response.headers.ratelimitLimitRequests": 200,
@@ -83,6 +86,7 @@
             "role": "system",
             "completion_id": None,
             "sequence": 0,
+            "token_count": 0,
             "response.model": "gpt-3.5-turbo-0613",
             "vendor": "openai",
             "ingest_source": "Python",
@@ -102,6 +106,7 @@
             "role": "user",
             "completion_id": None,
             "sequence": 1,
+            "token_count": 0,
             "response.model": "gpt-3.5-turbo-0613",
             "vendor": "openai",
             "ingest_source": "Python",
@@ -120,6 +125,7 @@
             "role": "assistant",
             "completion_id": None,
             "sequence": 2,
+            "token_count": 0,
             "response.model": "gpt-3.5-turbo-0613",
             "vendor": "openai",
             "is_response": True,
@@ -175,7 +181,7 @@ def test_openai_chat_completion_sync_no_content(set_trace_info):
 
 @reset_core_stats_engine()
 @override_llm_token_callback_settings(llm_token_count_callback)
-@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events))
+@validate_custom_events(add_token_counts_to_chat_events(chat_completion_recorded_events))
 # One summary event, one system message, one user message, and one response message from the assistant
 @validate_custom_event_count(count=4)
 @validate_transaction_metrics(
@@ -346,7 +352,7 @@ def test_openai_chat_completion_async_no_content(loop, set_trace_info):
 
 @reset_core_stats_engine()
 @override_llm_token_callback_settings(llm_token_count_callback)
-@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events))
+@validate_custom_events(add_token_counts_to_chat_events(chat_completion_recorded_events))
 # One summary event, one system message, one user message, and one response message from the assistant
 @validate_custom_event_count(count=4)
 @validate_transaction_metrics(
diff --git a/tests/mlmodel_openai/test_chat_completion_error.py b/tests/mlmodel_openai/test_chat_completion_error.py
index 79cc79d6db..042cdef31a 100644
--- a/tests/mlmodel_openai/test_chat_completion_error.py
+++ b/tests/mlmodel_openai/test_chat_completion_error.py
@@ -15,13 +15,11 @@
 
 import openai
 import pytest
-from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine
+from testing_support.fixtures import dt_enabled, reset_core_stats_engine
 from testing_support.ml_testing_utils import (
-    add_token_count_to_events,
     disabled_ai_monitoring_record_content_settings,
     events_sans_content,
     events_with_context_attrs,
-    llm_token_count_callback,
     set_trace_info,
 )
 from testing_support.validators.validate_custom_event import validate_custom_event_count
@@ -70,6 +68,7 @@
             "role": "system",
             "completion_id": None,
             "sequence": 0,
+            "token_count": 0,
             "vendor": "openai",
             "ingest_source": "Python",
         },
@@ -86,6 +85,7 @@
             "role": "user",
             "completion_id": None,
             "sequence": 1,
+            "token_count": 0,
             "vendor": "openai",
             "ingest_source": "Python",
         },
@@ -191,6 +191,7 @@ def test_chat_completion_invalid_request_error_no_model_no_content(set_trace_inf
             "role": "user",
             "completion_id": None,
             "sequence": 0,
+            "token_count": 0,
             "vendor": "openai",
             "ingest_source": "Python",
         },
@@ -198,36 +199,6 @@ def test_chat_completion_invalid_request_error_no_model_no_content(set_trace_inf
 ]
 
 
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
-    callable_name(openai.InvalidRequestError),
-    exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}},
-)
-@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"})
-@validate_transaction_metrics(
-    "test_chat_completion_error:test_chat_completion_invalid_request_error_invalid_model_with_token_count",
-    scoped_metrics=[("Llm/completion/OpenAI/create", 1)],
-    rollup_metrics=[("Llm/completion/OpenAI/create", 1)],
-    background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error))
-@validate_custom_event_count(count=2)
-@background_task()
-def test_chat_completion_invalid_request_error_invalid_model_with_token_count(set_trace_info):
-    set_trace_info()
-    with pytest.raises(openai.InvalidRequestError):
-        add_custom_attribute("llm.conversation_id", "my-awesome-id")
-
-        openai.ChatCompletion.create(
-            model="does-not-exist",
-            messages=({"role": "user", "content": "Model does not exist."},),
-            temperature=0.7,
-            max_tokens=100,
-        )
-
-
 # Invalid model provided
 @dt_enabled
 @reset_core_stats_engine()
@@ -288,6 +259,7 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info):
             "role": "system",
             "completion_id": None,
             "sequence": 0,
+            "token_count": 0,
             "vendor": "openai",
             "ingest_source": "Python",
         },
@@ -304,6 +276,7 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info):
             "role": "user",
             "completion_id": None,
             "sequence": 1,
+            "token_count": 0,
             "vendor": "openai",
             "ingest_source": "Python",
         },
@@ -370,6 +343,7 @@ def test_chat_completion_authentication_error(monkeypatch, set_trace_info):
             "role": "user",
             "completion_id": None,
             "sequence": 0,
+            "token_count": 0,
             "vendor": "openai",
             "ingest_source": "Python",
         },
@@ -481,37 +455,6 @@ def test_chat_completion_invalid_request_error_no_model_async_no_content(loop, s
         )
 
 
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
-    callable_name(openai.InvalidRequestError),
-    exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}},
-)
-@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"})
-@validate_transaction_metrics(
-    "test_chat_completion_error:test_chat_completion_invalid_request_error_invalid_model_with_token_count_async",
-    scoped_metrics=[("Llm/completion/OpenAI/acreate", 1)],
-    rollup_metrics=[("Llm/completion/OpenAI/acreate", 1)],
-    background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error))
-@validate_custom_event_count(count=2)
-@background_task()
-def test_chat_completion_invalid_request_error_invalid_model_with_token_count_async(loop, set_trace_info):
-    set_trace_info()
-    with pytest.raises(openai.InvalidRequestError):
-        add_custom_attribute("llm.conversation_id", "my-awesome-id")
-        loop.run_until_complete(
-            openai.ChatCompletion.acreate(
-                model="does-not-exist",
-                messages=({"role": "user", "content": "Model does not exist."},),
-                temperature=0.7,
-                max_tokens=100,
-            )
-        )
-
-
 # Invalid model provided
 @dt_enabled
 @reset_core_stats_engine()
diff --git a/tests/mlmodel_openai/test_chat_completion_error_v1.py b/tests/mlmodel_openai/test_chat_completion_error_v1.py
index 555001a702..5e83b415e6 100644
--- a/tests/mlmodel_openai/test_chat_completion_error_v1.py
+++ b/tests/mlmodel_openai/test_chat_completion_error_v1.py
@@ -14,13 +14,11 @@
 
 import openai
 import pytest
-from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine
+from testing_support.fixtures import dt_enabled, reset_core_stats_engine
 from testing_support.ml_testing_utils import (
-    add_token_count_to_events,
     disabled_ai_monitoring_record_content_settings,
     events_sans_content,
     events_with_context_attrs,
-    llm_token_count_callback,
     set_trace_info,
 )
 from testing_support.validators.validate_custom_event import validate_custom_event_count
@@ -69,6 +67,7 @@
             "role": "system",
             "completion_id": None,
             "sequence": 0,
+            "token_count": 0,
             "vendor": "openai",
             "ingest_source": "Python",
         },
@@ -85,6 +84,7 @@
             "role": "user",
             "completion_id": None,
             "sequence": 1,
+            "token_count": 0,
             "vendor": "openai",
             "ingest_source": "Python",
         },
@@ -234,6 +234,7 @@ def test_chat_completion_invalid_request_error_no_model_async_no_content(loop, s
             "role": "user",
             "completion_id": None,
             "sequence": 0,
+            "token_count": 0,
             "vendor": "openai",
             "ingest_source": "Python",
         },
@@ -271,37 +272,6 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info, syn
         )
 
 
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
-    callable_name(openai.NotFoundError),
-    exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}},
-)
-@validate_span_events(
-    exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."}
-)
-@validate_transaction_metrics(
-    "test_chat_completion_error_v1:test_chat_completion_invalid_request_error_invalid_model_with_token_count",
-    scoped_metrics=[("Llm/completion/OpenAI/create", 1)],
-    rollup_metrics=[("Llm/completion/OpenAI/create", 1)],
-    background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error))
-@validate_custom_event_count(count=2)
-@background_task()
-def test_chat_completion_invalid_request_error_invalid_model_with_token_count(set_trace_info, sync_openai_client):
-    set_trace_info()
-    with pytest.raises(openai.NotFoundError):
-        add_custom_attribute("llm.conversation_id", "my-awesome-id")
-        sync_openai_client.chat.completions.create(
-            model="does-not-exist",
-            messages=({"role": "user", "content": "Model does not exist."},),
-            temperature=0.7,
-            max_completion_tokens=100,
-        )
-
-
 @dt_enabled
 @reset_core_stats_engine()
 @validate_error_trace_attributes(
@@ -334,41 +304,6 @@ def test_chat_completion_invalid_request_error_invalid_model_async(loop, set_tra
         )
 
 
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
-    callable_name(openai.NotFoundError),
-    exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}},
-)
-@validate_span_events(
-    exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."}
-)
-@validate_transaction_metrics(
-    "test_chat_completion_error_v1:test_chat_completion_invalid_request_error_invalid_model_with_token_count_async",
-    scoped_metrics=[("Llm/completion/OpenAI/create", 1)],
-    rollup_metrics=[("Llm/completion/OpenAI/create", 1)],
-    background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error))
-@validate_custom_event_count(count=2)
-@background_task()
-def test_chat_completion_invalid_request_error_invalid_model_with_token_count_async(
-    loop, set_trace_info, async_openai_client
-):
-    set_trace_info()
-    with pytest.raises(openai.NotFoundError):
-        add_custom_attribute("llm.conversation_id", "my-awesome-id")
-        loop.run_until_complete(
-            async_openai_client.chat.completions.create(
-                model="does-not-exist",
-                messages=({"role": "user", "content": "Model does not exist."},),
-                temperature=0.7,
-                max_completion_tokens=100,
-            )
-        )
-
-
 expected_events_on_wrong_api_key_error = [
     (
         {"type": "LlmChatCompletionSummary"},
@@ -398,6 +333,7 @@ def test_chat_completion_invalid_request_error_invalid_model_with_token_count_as
             "role": "user",
             "completion_id": None,
             "sequence": 0,
+            "token_count": 0,
             "vendor": "openai",
             "ingest_source": "Python",
         },
@@ -617,39 +553,6 @@ def test_chat_completion_invalid_request_error_invalid_model_with_raw_response(s
         )
 
 
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
-    callable_name(openai.NotFoundError),
-    exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}},
-)
-@validate_span_events(
-    exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."}
-)
-@validate_transaction_metrics(
-    "test_chat_completion_error_v1:test_chat_completion_invalid_request_error_invalid_model_with_token_count_with_raw_response",
-    scoped_metrics=[("Llm/completion/OpenAI/create", 1)],
-    rollup_metrics=[("Llm/completion/OpenAI/create", 1)],
-    background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error))
-@validate_custom_event_count(count=2)
-@background_task()
-def test_chat_completion_invalid_request_error_invalid_model_with_token_count_with_raw_response(
-    set_trace_info, sync_openai_client
-):
-    set_trace_info()
-    with pytest.raises(openai.NotFoundError):
-        add_custom_attribute("llm.conversation_id", "my-awesome-id")
-        sync_openai_client.chat.completions.with_raw_response.create(
-            model="does-not-exist",
-            messages=({"role": "user", "content": "Model does not exist."},),
-            temperature=0.7,
-            max_completion_tokens=100,
-        )
-
-
 @dt_enabled
 @reset_core_stats_engine()
 @validate_error_trace_attributes(
@@ -684,41 +587,6 @@ def test_chat_completion_invalid_request_error_invalid_model_async_with_raw_resp
         )
 
 
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
-    callable_name(openai.NotFoundError),
-    exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}},
-)
-@validate_span_events(
-    exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."}
-)
-@validate_transaction_metrics(
-    "test_chat_completion_error_v1:test_chat_completion_invalid_request_error_invalid_model_with_token_count_async_with_raw_response",
-    scoped_metrics=[("Llm/completion/OpenAI/create", 1)],
-    rollup_metrics=[("Llm/completion/OpenAI/create", 1)],
-    background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error))
-@validate_custom_event_count(count=2)
-@background_task()
-def test_chat_completion_invalid_request_error_invalid_model_with_token_count_async_with_raw_response(
-    loop, set_trace_info, async_openai_client
-):
-    set_trace_info()
-    with pytest.raises(openai.NotFoundError):
-        add_custom_attribute("llm.conversation_id", "my-awesome-id")
-        loop.run_until_complete(
-            async_openai_client.chat.completions.with_raw_response.create(
-                model="does-not-exist",
-                messages=({"role": "user", "content": "Model does not exist."},),
-                temperature=0.7,
-                max_completion_tokens=100,
-            )
-        )
-
-
 @dt_enabled
 @reset_core_stats_engine()
 @validate_error_trace_attributes(
diff --git a/tests/mlmodel_openai/test_chat_completion_stream.py b/tests/mlmodel_openai/test_chat_completion_stream.py
index 6858da8107..b1679ba4d2 100644
--- a/tests/mlmodel_openai/test_chat_completion_stream.py
+++ b/tests/mlmodel_openai/test_chat_completion_stream.py
@@ -15,7 +15,8 @@
 import openai
 from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes
 from testing_support.ml_testing_utils import (
-    add_token_count_to_events,
+    add_token_count_streaming_events,
+    add_token_counts_to_chat_events,
     disabled_ai_monitoring_record_content_settings,
     disabled_ai_monitoring_settings,
     disabled_ai_monitoring_streaming_settings,
@@ -188,9 +189,101 @@ def test_openai_chat_completion_sync_no_content(set_trace_info):
         assert resp
 
 
+chat_completion_recorded_token_events = [
+    (
+        {"type": "LlmChatCompletionSummary"},
+        {
+            "id": None,  # UUID that varies with each run
+            "llm.conversation_id": "my-awesome-id",
+            "llm.foo": "bar",
+            "span_id": None,
+            "trace_id": "trace-id",
+            "request_id": "49dbbffbd3c3f4612aa48def69059ccd",
+            "duration": None,  # Response time varies each test run
+            "request.model": "gpt-3.5-turbo",
+            "response.model": "gpt-3.5-turbo-0613",
+            "response.organization": "new-relic-nkmd8b",
+            "request.temperature": 0.7,
+            "request.max_tokens": 100,
+            "response.choices.finish_reason": "stop",
+            "response.headers.llmVersion": "2020-10-01",
+            "response.headers.ratelimitLimitRequests": 200,
+            "response.headers.ratelimitLimitTokens": 40000,
+            "response.headers.ratelimitResetTokens": "90ms",
+            "response.headers.ratelimitResetRequests": "7m12s",
+            "response.headers.ratelimitRemainingTokens": 39940,
+            "response.headers.ratelimitRemainingRequests": 199,
+            "vendor": "openai",
+            "ingest_source": "Python",
+            "response.number_of_messages": 3,
+        },
+    ),
+    (
+        {"type": "LlmChatCompletionMessage"},
+        {
+            "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-0",
+            "llm.conversation_id": "my-awesome-id",
+            "llm.foo": "bar",
+            "request_id": "49dbbffbd3c3f4612aa48def69059ccd",
+            "span_id": None,
+            "trace_id": "trace-id",
+            "content": "You are a scientist.",
+            "role": "system",
+            "completion_id": None,
+            "sequence": 0,
+            "token_count": 0,
+            "response.model": "gpt-3.5-turbo-0613",
+            "vendor": "openai",
+            "ingest_source": "Python",
+        },
+    ),
+    (
+        {"type": "LlmChatCompletionMessage"},
+        {
+            "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-1",
+            "llm.conversation_id": "my-awesome-id",
+            "llm.foo": "bar",
+            "request_id": "49dbbffbd3c3f4612aa48def69059ccd",
+            "span_id": None,
+            "trace_id": "trace-id",
+            "content": "What is 212 degrees Fahrenheit converted to Celsius?",
+            "role": "user",
+            "completion_id": None,
+            "sequence": 1,
+            "token_count": 0,
+            "response.model": "gpt-3.5-turbo-0613",
+            "vendor": "openai",
+            "ingest_source": "Python",
+        },
+    ),
+    (
+        {"type": "LlmChatCompletionMessage"},
+        {
+            "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-2",
+            "llm.conversation_id": "my-awesome-id",
+            "llm.foo": "bar",
+            "request_id": "49dbbffbd3c3f4612aa48def69059ccd",
+            "span_id": None,
+            "trace_id": "trace-id",
+            "content": "212 degrees Fahrenheit is equal to 100 degrees Celsius.",
+            "role": "assistant",
+            "completion_id": None,
+            "sequence": 2,
+            "token_count": 0,
+            "response.model": "gpt-3.5-turbo-0613",
+            "vendor": "openai",
+            "is_response": True,
+            "ingest_source": "Python",
+        },
+    ),
+]
+
+
 @reset_core_stats_engine()
 @override_llm_token_callback_settings(llm_token_count_callback)
-@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events))
+@validate_custom_events(
+    add_token_counts_to_chat_events(add_token_count_streaming_events(chat_completion_recorded_events))
+)
 # One summary event, one system message, one user message, and one response message from the assistant
 @validate_custom_event_count(count=4)
 @validate_transaction_metrics(
@@ -253,25 +346,29 @@ def test_openai_chat_completion_sync_no_llm_metadata(set_trace_info):
 )
 @background_task()
 def test_openai_chat_completion_sync_ai_monitoring_streaming_disabled():
-    openai.ChatCompletion.create(
+    generator = openai.ChatCompletion.create(
         model="gpt-3.5-turbo",
         messages=_test_openai_chat_completion_messages,
         temperature=0.7,
         max_tokens=100,
         stream=True,
     )
+    for resp in generator:
+        assert resp
 
 
 @reset_core_stats_engine()
 @validate_custom_event_count(count=0)
 def test_openai_chat_completion_sync_outside_txn():
-    openai.ChatCompletion.create(
+    generator = openai.ChatCompletion.create(
         model="gpt-3.5-turbo",
         messages=_test_openai_chat_completion_messages,
         temperature=0.7,
         max_tokens=100,
         stream=True,
     )
+    for resp in generator:
+        assert resp
 
 
 @disabled_ai_monitoring_settings
@@ -279,13 +376,15 @@ def test_openai_chat_completion_sync_outside_txn():
 @validate_custom_event_count(count=0)
 @background_task()
 def test_openai_chat_completion_sync_ai_monitoring_disabled():
-    openai.ChatCompletion.create(
+    generator = openai.ChatCompletion.create(
         model="gpt-3.5-turbo",
         messages=_test_openai_chat_completion_messages,
         temperature=0.7,
         max_tokens=100,
         stream=True,
     )
+    for resp in generator:
+        assert resp
 
 
 @reset_core_stats_engine()
@@ -382,7 +481,9 @@ async def consumer():
 
 @reset_core_stats_engine()
 @override_llm_token_callback_settings(llm_token_count_callback)
-@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events))
+@validate_custom_events(
+    add_token_counts_to_chat_events(add_token_count_streaming_events(chat_completion_recorded_events))
+)
 @validate_custom_event_count(count=4)
 @validate_transaction_metrics(
     name="test_chat_completion_stream:test_openai_chat_completion_async_with_token_count",
diff --git a/tests/mlmodel_openai/test_chat_completion_stream_error.py b/tests/mlmodel_openai/test_chat_completion_stream_error.py
index 0fb0d06867..2b01813d9f 100644
--- a/tests/mlmodel_openai/test_chat_completion_stream_error.py
+++ b/tests/mlmodel_openai/test_chat_completion_stream_error.py
@@ -15,13 +15,11 @@
 
 import openai
 import pytest
-from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine
+from testing_support.fixtures import dt_enabled, reset_core_stats_engine
 from testing_support.ml_testing_utils import (
-    add_token_count_to_events,
     disabled_ai_monitoring_record_content_settings,
     events_sans_content,
     events_with_context_attrs,
-    llm_token_count_callback,
     set_trace_info,
 )
 from testing_support.validators.validate_custom_event import validate_custom_event_count
@@ -70,6 +68,7 @@
             "role": "system",
             "completion_id": None,
             "sequence": 0,
+            "token_count": 0,
             "vendor": "openai",
             "ingest_source": "Python",
         },
@@ -86,6 +85,7 @@
             "role": "user",
             "completion_id": None,
             "sequence": 1,
+            "token_count": 0,
             "vendor": "openai",
             "ingest_source": "Python",
         },
@@ -196,6 +196,7 @@ def test_chat_completion_invalid_request_error_no_model_no_content(set_trace_inf
             "role": "user",
             "completion_id": None,
             "sequence": 0,
+            "token_count": 0,
             "vendor": "openai",
             "ingest_source": "Python",
         },
@@ -203,38 +204,6 @@ def test_chat_completion_invalid_request_error_no_model_no_content(set_trace_inf
 ]
 
 
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
-    callable_name(openai.InvalidRequestError),
-    exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}},
-)
-@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"})
-@validate_transaction_metrics(
-    "test_chat_completion_stream_error:test_chat_completion_invalid_request_error_invalid_model_with_token_count",
-    scoped_metrics=[("Llm/completion/OpenAI/create", 1)],
-    rollup_metrics=[("Llm/completion/OpenAI/create", 1)],
-    background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error))
-@validate_custom_event_count(count=2)
-@background_task()
-def test_chat_completion_invalid_request_error_invalid_model_with_token_count(set_trace_info):
-    set_trace_info()
-    with pytest.raises(openai.InvalidRequestError):
-        add_custom_attribute("llm.conversation_id", "my-awesome-id")
-        generator = openai.ChatCompletion.create(
-            model="does-not-exist",
-            messages=({"role": "user", "content": "Model does not exist."},),
-            temperature=0.7,
-            max_tokens=100,
-            stream=True,
-        )
-        for resp in generator:
-            assert resp
-
-
 @dt_enabled
 @reset_core_stats_engine()
 @validate_error_trace_attributes(
@@ -297,6 +266,7 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info):
             "role": "system",
             "completion_id": None,
             "sequence": 0,
+            "token_count": 0,
             "vendor": "openai",
             "ingest_source": "Python",
         },
@@ -313,6 +283,7 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info):
             "role": "user",
             "completion_id": None,
             "sequence": 1,
+            "token_count": 0,
             "vendor": "openai",
             "ingest_source": "Python",
         },
@@ -384,6 +355,7 @@ def test_chat_completion_authentication_error(monkeypatch, set_trace_info):
             "role": "user",
             "completion_id": None,
             "sequence": 0,
+            "token_count": 0,
             "vendor": "openai",
             "ingest_source": "Python",
         },
@@ -498,38 +470,6 @@ def test_chat_completion_invalid_request_error_no_model_async_no_content(loop, s
         )
 
 
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
-    callable_name(openai.InvalidRequestError),
-    exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}},
-)
-@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"})
-@validate_transaction_metrics(
-    "test_chat_completion_stream_error:test_chat_completion_invalid_request_error_invalid_model_with_token_count_async",
-    scoped_metrics=[("Llm/completion/OpenAI/acreate", 1)],
-    rollup_metrics=[("Llm/completion/OpenAI/acreate", 1)],
-    background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error))
-@validate_custom_event_count(count=2)
-@background_task()
-def test_chat_completion_invalid_request_error_invalid_model_with_token_count_async(loop, set_trace_info):
-    set_trace_info()
-    with pytest.raises(openai.InvalidRequestError):
-        add_custom_attribute("llm.conversation_id", "my-awesome-id")
-        loop.run_until_complete(
-            openai.ChatCompletion.acreate(
-                model="does-not-exist",
-                messages=({"role": "user", "content": "Model does not exist."},),
-                temperature=0.7,
-                max_tokens=100,
-                stream=True,
-            )
-        )
-
-
 @dt_enabled
 @reset_core_stats_engine()
 @validate_error_trace_attributes(
@@ -661,6 +601,7 @@ def test_chat_completion_wrong_api_key_error_async(loop, monkeypatch, set_trace_
             "role": "user",
             "completion_id": None,
             "sequence": 0,
+            "token_count": 0,
             "vendor": "openai",
             "ingest_source": "Python",
         },
diff --git a/tests/mlmodel_openai/test_chat_completion_stream_error_v1.py b/tests/mlmodel_openai/test_chat_completion_stream_error_v1.py
index ce3ce8061e..074a482669 100644
--- a/tests/mlmodel_openai/test_chat_completion_stream_error_v1.py
+++ b/tests/mlmodel_openai/test_chat_completion_stream_error_v1.py
@@ -12,16 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
 import openai
 import pytest
-from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine
+from testing_support.fixtures import dt_enabled, reset_core_stats_engine
 from testing_support.ml_testing_utils import (
-    add_token_count_to_events,
     disabled_ai_monitoring_record_content_settings,
     events_sans_content,
     events_with_context_attrs,
-    llm_token_count_callback,
     set_trace_info,
 )
 from testing_support.validators.validate_custom_event import validate_custom_event_count
@@ -70,6 +67,7 @@
             "role": "system",
             "completion_id": None,
             "sequence": 0,
+            "token_count": 0,
             "vendor": "openai",
             "ingest_source": "Python",
         },
@@ -86,6 +84,7 @@
             "role": "user",
             "completion_id": None,
             "sequence": 1,
+            "token_count": 0,
             "vendor": "openai",
             "ingest_source": "Python",
         },
@@ -251,6 +250,7 @@ async def consumer():
             "role": "user",
             "completion_id": None,
             "sequence": 0,
+            "token_count": 0,
             "vendor": "openai",
             "ingest_source": "Python",
         },
@@ -291,81 +291,6 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info, syn
             assert resp
 
 
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
-    callable_name(openai.NotFoundError),
-    exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}},
-)
-@validate_span_events(
-    exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."}
-)
-@validate_transaction_metrics(
-    "test_chat_completion_stream_error_v1:test_chat_completion_invalid_request_error_invalid_model_with_token_count",
-    scoped_metrics=[("Llm/completion/OpenAI/create", 1)],
-    rollup_metrics=[("Llm/completion/OpenAI/create", 1)],
-    background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error))
-@validate_custom_event_count(count=2)
-@background_task()
-def test_chat_completion_invalid_request_error_invalid_model_with_token_count(set_trace_info, sync_openai_client):
-    set_trace_info()
-    with pytest.raises(openai.NotFoundError):
-        add_custom_attribute("llm.conversation_id", "my-awesome-id")
-
-        generator = sync_openai_client.chat.completions.create(
-            model="does-not-exist",
-            messages=({"role": "user", "content": "Model does not exist."},),
-            temperature=0.7,
-            max_completion_tokens=100,
-            stream=True,
-        )
-        for resp in generator:
-            assert resp
-
-
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
-    callable_name(openai.NotFoundError),
-    exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}},
-)
-@validate_span_events(
-    exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."}
-)
-@validate_transaction_metrics(
-    "test_chat_completion_stream_error_v1:test_chat_completion_invalid_request_error_invalid_model_async_with_token_count",
-    scoped_metrics=[("Llm/completion/OpenAI/create", 1)],
-    rollup_metrics=[("Llm/completion/OpenAI/create", 1)],
-    background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error))
-@validate_custom_event_count(count=2)
-@background_task()
-def test_chat_completion_invalid_request_error_invalid_model_async_with_token_count(
-    loop, set_trace_info, async_openai_client
-):
-    set_trace_info()
-    with pytest.raises(openai.NotFoundError):
-        add_custom_attribute("llm.conversation_id", "my-awesome-id")
-
-        async def consumer():
-            generator = await async_openai_client.chat.completions.create(
-                model="does-not-exist",
-                messages=({"role": "user", "content": "Model does not exist."},),
-                temperature=0.7,
-                max_completion_tokens=100,
-                stream=True,
-            )
-            async for resp in generator:
-                assert resp
-
-        loop.run_until_complete(consumer())
-
-
 @dt_enabled
 @reset_core_stats_engine()
 @validate_error_trace_attributes(
@@ -432,6 +357,7 @@ async def consumer():
             "role": "user",
             "completion_id": None,
             "sequence": 0,
+            "token_count": 0,
             "vendor": "openai",
             "ingest_source": "Python",
         },
diff --git a/tests/mlmodel_openai/test_chat_completion_stream_v1.py b/tests/mlmodel_openai/test_chat_completion_stream_v1.py
index 1343c208ac..25dbba8cea 100644
--- a/tests/mlmodel_openai/test_chat_completion_stream_v1.py
+++ b/tests/mlmodel_openai/test_chat_completion_stream_v1.py
@@ -17,7 +17,8 @@
 from conftest import get_openai_version
 from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes
 from testing_support.ml_testing_utils import (
-    add_token_count_to_events,
+    add_token_count_streaming_events,
+    add_token_counts_to_chat_events,
     disabled_ai_monitoring_record_content_settings,
     disabled_ai_monitoring_settings,
     disabled_ai_monitoring_streaming_settings,
@@ -64,7 +65,8 @@
             "request.model": "gpt-5.1",
             "response.model": "gpt-5.1-2025-11-13",
             "response.organization": "nr-test-org",
-            # Usage tokens aren't available when streaming.
+            # Usage tokens are only emitted by OpenAI when stream_options={"include_usage": True}.
+            # See test_openai_chat_completion_sync_with_stream_options_include_usage for that path.
             "request.temperature": 0.7,
             "request.max_tokens": 100,
             "response.choices.finish_reason": "stop",
@@ -141,6 +143,14 @@
 ]
 
 
+# When stream_options={"include_usage": True} is set, the final stream chunk carries usage data
+# and the agent populates response.usage.* on the summary + token_count: 0 on each message.
+chat_completion_recorded_events_include_usage = add_token_count_streaming_events(chat_completion_recorded_events)
+chat_completion_recorded_events_include_usage[0][1].update(
+    {"response.usage.prompt_tokens": 25, "response.usage.completion_tokens": 16, "response.usage.total_tokens": 41}
+)
+
+
 @reset_core_stats_engine()
 @validate_custom_events(events_with_context_attrs(chat_completion_recorded_events))
 # One summary event, one system message, one user message, and one response message from the assistant
@@ -302,9 +312,39 @@ def test_openai_chat_completion_sync_no_content(set_trace_info, sync_openai_clie
         assert resp
 
 
+@reset_core_stats_engine()
+@validate_custom_events(chat_completion_recorded_events_include_usage)
+@validate_custom_event_count(count=4)
+@validate_transaction_metrics(
+    name="test_chat_completion_stream_v1:test_openai_chat_completion_sync_with_stream_options_include_usage",
+    custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)],
+    background_task=True,
+)
+@validate_attributes("agent", ["llm"])
+@background_task()
+def test_openai_chat_completion_sync_with_stream_options_include_usage(set_trace_info, sync_openai_client):
+    """Streaming with stream_options={"include_usage": True} populates response.usage.* on the summary."""
+    set_trace_info()
+    add_custom_attribute("llm.conversation_id", "my-awesome-id")
+    add_custom_attribute("llm.foo", "bar")
+
+    generator = sync_openai_client.chat.completions.create(
+        model="gpt-5.1",
+        messages=_test_openai_chat_completion_messages,
+        temperature=0.7,
+        max_completion_tokens=100,
+        stream=True,
+        stream_options={"include_usage": True},
+    )
+    for resp in generator:
+        assert resp
+
+
 @reset_core_stats_engine()
 @override_llm_token_callback_settings(llm_token_count_callback)
-@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events))
+@validate_custom_events(
+    add_token_counts_to_chat_events(add_token_count_streaming_events(chat_completion_recorded_events))
+)
 # One summary event, one system message, one user message, and one response message from the assistant
 @validate_custom_event_count(count=4)
 @validate_transaction_metrics(
@@ -624,9 +664,42 @@ async def consumer():
     loop.run_until_complete(consumer())
 
 
+@reset_core_stats_engine()
+@validate_custom_events(chat_completion_recorded_events_include_usage)
+@validate_custom_event_count(count=4)
+@validate_transaction_metrics(
+    name="test_chat_completion_stream_v1:test_openai_chat_completion_async_with_stream_options_include_usage",
+    custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)],
+    background_task=True,
+)
+@validate_attributes("agent", ["llm"])
+@background_task()
+def test_openai_chat_completion_async_with_stream_options_include_usage(set_trace_info, loop, async_openai_client):
+    """Streaming with stream_options={"include_usage": True} populates response.usage.* on the summary."""
+    set_trace_info()
+    add_custom_attribute("llm.conversation_id", "my-awesome-id")
+    add_custom_attribute("llm.foo", "bar")
+
+    async def consumer():
+        generator = await async_openai_client.chat.completions.create(
+            model="gpt-5.1",
+            messages=_test_openai_chat_completion_messages,
+            temperature=0.7,
+            max_completion_tokens=100,
+            stream=True,
+            stream_options={"include_usage": True},
+        )
+        async for resp in generator:
+            assert resp
+
+    loop.run_until_complete(consumer())
+
+
 @reset_core_stats_engine()
 @override_llm_token_callback_settings(llm_token_count_callback)
-@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events))
+@validate_custom_events(
+    add_token_counts_to_chat_events(add_token_count_streaming_events(chat_completion_recorded_events))
+)
 # One summary event, one system message, one user message, and one response message from the assistant
 # @validate_custom_event_count(count=4)
 @validate_transaction_metrics(
diff --git a/tests/mlmodel_openai/test_chat_completion_v1.py b/tests/mlmodel_openai/test_chat_completion_v1.py
index ec636ca7d2..ae87f1757b 100644
--- a/tests/mlmodel_openai/test_chat_completion_v1.py
+++ b/tests/mlmodel_openai/test_chat_completion_v1.py
@@ -15,7 +15,7 @@
 import openai
 from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes
 from testing_support.ml_testing_utils import (
-    add_token_count_to_events,
+    add_token_counts_to_chat_events,
     disabled_ai_monitoring_record_content_settings,
     disabled_ai_monitoring_settings,
     disabled_ai_monitoring_streaming_settings,
@@ -55,6 +55,9 @@
             "response.organization": "nr-test-org",
             "request.temperature": 0.7,
             "request.max_tokens": 100,
+            "response.usage.completion_tokens": 16,
+            "response.usage.prompt_tokens": 25,
+            "response.usage.total_tokens": 41,
             "response.choices.finish_reason": "stop",
             "response.headers.llmVersion": "2020-10-01",
             "response.headers.ratelimitLimitRequests": 15000,
@@ -83,6 +86,7 @@
             "completion_id": None,
             "sequence": 0,
             "response.model": "gpt-5.1-2025-11-13",
+            "token_count": 0,
             "vendor": "openai",
             "ingest_source": "Python",
         },
@@ -102,6 +106,7 @@
             "completion_id": None,
             "sequence": 1,
             "response.model": "gpt-5.1-2025-11-13",
+            "token_count": 0,
             "vendor": "openai",
             "ingest_source": "Python",
         },
@@ -120,6 +125,7 @@
             "completion_id": None,
             "sequence": 2,
             "response.model": "gpt-5.1-2025-11-13",
+            "token_count": 0,
             "vendor": "openai",
             "is_response": True,
             "ingest_source": "Python",
@@ -196,7 +202,7 @@ def test_openai_chat_completion_sync_no_content(set_trace_info, sync_openai_clie
 
 @reset_core_stats_engine()
 @override_llm_token_callback_settings(llm_token_count_callback)
-@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events))
+@validate_custom_events(add_token_counts_to_chat_events(chat_completion_recorded_events))
 # One summary event, one system message, one user message, and one response message from the assistant
 @validate_custom_event_count(count=4)
 @validate_transaction_metrics(
@@ -395,7 +401,7 @@ def test_openai_chat_completion_async_with_llm_metadata_no_content(loop, set_tra
 
 @reset_core_stats_engine()
 @override_llm_token_callback_settings(llm_token_count_callback)
-@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events))
+@validate_custom_events(add_token_counts_to_chat_events(chat_completion_recorded_events))
 # One summary event, one system message, one user message, and one response message from the assistant
 @validate_custom_event_count(count=4)
 @validate_transaction_metrics(
diff --git a/tests/mlmodel_openai/test_embeddings.py b/tests/mlmodel_openai/test_embeddings.py
index c3c3e7c429..935db04fe0 100644
--- a/tests/mlmodel_openai/test_embeddings.py
+++ b/tests/mlmodel_openai/test_embeddings.py
@@ -19,7 +19,7 @@
     validate_attributes,
 )
 from testing_support.ml_testing_utils import (
-    add_token_count_to_events,
+    add_token_count_to_embedding_events,
     disabled_ai_monitoring_record_content_settings,
     disabled_ai_monitoring_settings,
     events_sans_content,
@@ -55,6 +55,7 @@
             "response.headers.ratelimitResetRequests": "19m45.394s",
             "response.headers.ratelimitRemainingTokens": 149994,
             "response.headers.ratelimitRemainingRequests": 197,
+            "response.usage.total_tokens": 6,
             "vendor": "openai",
             "ingest_source": "Python",
         },
@@ -107,7 +108,7 @@ def test_openai_embedding_sync_no_content(set_trace_info):
 
 @reset_core_stats_engine()
 @override_llm_token_callback_settings(llm_token_count_callback)
-@validate_custom_events(add_token_count_to_events(embedding_recorded_events))
+@validate_custom_events(add_token_count_to_embedding_events(embedding_recorded_events))
 @validate_custom_event_count(count=1)
 @validate_transaction_metrics(
     name="test_embeddings:test_openai_embedding_sync_with_token_count",
@@ -191,7 +192,7 @@ def test_openai_embedding_async_no_content(loop, set_trace_info):
 
 @reset_core_stats_engine()
 @override_llm_token_callback_settings(llm_token_count_callback)
-@validate_custom_events(add_token_count_to_events(embedding_recorded_events))
+@validate_custom_events(add_token_count_to_embedding_events(embedding_recorded_events))
 @validate_custom_event_count(count=1)
 @validate_transaction_metrics(
     name="test_embeddings:test_openai_embedding_async_with_token_count",
diff --git a/tests/mlmodel_openai/test_embeddings_error.py b/tests/mlmodel_openai/test_embeddings_error.py
index a8e46bf23a..f80e6ff41d 100644
--- a/tests/mlmodel_openai/test_embeddings_error.py
+++ b/tests/mlmodel_openai/test_embeddings_error.py
@@ -14,12 +14,10 @@
 
 import openai
 import pytest
-from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine
+from testing_support.fixtures import dt_enabled, reset_core_stats_engine
 from testing_support.ml_testing_utils import (
-    add_token_count_to_events,
     disabled_ai_monitoring_record_content_settings,
     events_sans_content,
-    llm_token_count_callback,
     set_trace_info,
 )
 from testing_support.validators.validate_custom_event import validate_custom_event_count
@@ -128,35 +126,6 @@ def test_embeddings_invalid_request_error_no_model_no_content(set_trace_info):
 ]
 
 
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
-    callable_name(openai.InvalidRequestError),
-    exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404}},
-)
-@validate_span_events(
-    exact_agents={
-        "error.message": "The model `does-not-exist` does not exist"
-        # "http.statusCode": 404,
-    }
-)
-@validate_transaction_metrics(
-    name="test_embeddings_error:test_embeddings_invalid_request_error_invalid_model_with_token_count",
-    scoped_metrics=[("Llm/embedding/OpenAI/create", 1)],
-    rollup_metrics=[("Llm/embedding/OpenAI/create", 1)],
-    custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)],
-    background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(invalid_model_events))
-@validate_custom_event_count(count=1)
-@background_task()
-def test_embeddings_invalid_request_error_invalid_model_with_token_count(set_trace_info):
-    set_trace_info()
-    with pytest.raises(openai.InvalidRequestError):
-        openai.Embedding.create(input="Model does not exist.", model="does-not-exist")
-
-
 # Invalid model provided
 @dt_enabled
 @reset_core_stats_engine()
@@ -348,30 +317,6 @@ def test_embeddings_invalid_request_error_no_model_async_no_content(loop, set_tr
         )
 
 
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
-    callable_name(openai.InvalidRequestError),
-    exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404}},
-)
-@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"})
-@validate_transaction_metrics(
-    name="test_embeddings_error:test_embeddings_invalid_request_error_invalid_model_with_token_count_async",
-    scoped_metrics=[("Llm/embedding/OpenAI/acreate", 1)],
-    rollup_metrics=[("Llm/embedding/OpenAI/acreate", 1)],
-    custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)],
-    background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(invalid_model_events))
-@validate_custom_event_count(count=1)
-@background_task()
-def test_embeddings_invalid_request_error_invalid_model_with_token_count_async(set_trace_info, loop):
-    set_trace_info()
-    with pytest.raises(openai.InvalidRequestError):
-        loop.run_until_complete(openai.Embedding.acreate(input="Model does not exist.", model="does-not-exist"))
-
-
 # Invalid model provided
 @dt_enabled
 @reset_core_stats_engine()
diff --git a/tests/mlmodel_openai/test_embeddings_error_v1.py b/tests/mlmodel_openai/test_embeddings_error_v1.py
index fd29236122..499f96893b 100644
--- a/tests/mlmodel_openai/test_embeddings_error_v1.py
+++ b/tests/mlmodel_openai/test_embeddings_error_v1.py
@@ -16,12 +16,10 @@
 
 import openai
 import pytest
-from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine
+from testing_support.fixtures import dt_enabled, reset_core_stats_engine
 from testing_support.ml_testing_utils import (
-    add_token_count_to_events,
     disabled_ai_monitoring_record_content_settings,
     events_sans_content,
-    llm_token_count_callback,
     set_trace_info,
 )
 from testing_support.validators.validate_custom_event import validate_custom_event_count
@@ -149,32 +147,6 @@ def test_embeddings_invalid_request_error_no_model_async(set_trace_info, async_o
 ]
 
 
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
-    callable_name(openai.NotFoundError),
-    exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404, "error.code": "model_not_found"}},
-)
-@validate_span_events(
-    exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."}
-)
-@validate_transaction_metrics(
-    name="test_embeddings_error_v1:test_embeddings_invalid_request_error_invalid_model_with_token_count",
-    scoped_metrics=[("Llm/embedding/OpenAI/create", 1)],
-    rollup_metrics=[("Llm/embedding/OpenAI/create", 1)],
-    custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)],
-    background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(invalid_model_events))
-@validate_custom_event_count(count=1)
-@background_task()
-def test_embeddings_invalid_request_error_invalid_model_with_token_count(set_trace_info, sync_openai_client):
-    set_trace_info()
-    with pytest.raises(openai.NotFoundError):
-        sync_openai_client.embeddings.create(input="Model does not exist.", model="does-not-exist")
-
-
 @dt_enabled
 @reset_core_stats_engine()
 @validate_error_trace_attributes(
@@ -255,36 +227,6 @@ def test_embeddings_invalid_request_error_invalid_model_async_no_content(set_tra
         )
 
 
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
-    callable_name(openai.NotFoundError),
-    exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404, "error.code": "model_not_found"}},
-)
-@validate_span_events(
-    exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."}
-)
-@validate_transaction_metrics(
-    name="test_embeddings_error_v1:test_embeddings_invalid_request_error_invalid_model_async_with_token_count",
-    scoped_metrics=[("Llm/embedding/OpenAI/create", 1)],
-    rollup_metrics=[("Llm/embedding/OpenAI/create", 1)],
-    custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)],
-    background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(invalid_model_events))
-@validate_custom_event_count(count=1)
-@background_task()
-def test_embeddings_invalid_request_error_invalid_model_async_with_token_count(
-    set_trace_info, async_openai_client, loop
-):
-    set_trace_info()
-    with pytest.raises(openai.NotFoundError):
-        loop.run_until_complete(
-            async_openai_client.embeddings.create(input="Model does not exist.", model="does-not-exist")
-        )
-
-
 embedding_invalid_key_error_events = [
     (
         {"type": "LlmEmbedding"},
@@ -449,34 +391,6 @@ def test_embeddings_invalid_request_error_no_model_async_with_raw_response(set_t
         )  # no model provided
 
 
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
-    callable_name(openai.NotFoundError),
-    exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404, "error.code": "model_not_found"}},
-)
-@validate_span_events(
-    exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."}
-)
-@validate_transaction_metrics(
-    name="test_embeddings_error_v1:test_embeddings_invalid_request_error_invalid_model_with_token_count_with_raw_response",
-    scoped_metrics=[("Llm/embedding/OpenAI/create", 1)],
-    rollup_metrics=[("Llm/embedding/OpenAI/create", 1)],
-    custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)],
-    background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(invalid_model_events))
-@validate_custom_event_count(count=1)
-@background_task()
-def test_embeddings_invalid_request_error_invalid_model_with_token_count_with_raw_response(
-    set_trace_info, sync_openai_client
-):
-    set_trace_info()
-    with pytest.raises(openai.NotFoundError):
-        sync_openai_client.embeddings.with_raw_response.create(input="Model does not exist.", model="does-not-exist")
-
-
 @dt_enabled
 @reset_core_stats_engine()
 @validate_error_trace_attributes(
@@ -566,38 +480,6 @@ def test_embeddings_invalid_request_error_invalid_model_async_no_content_with_ra
         )
 
 
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
-    callable_name(openai.NotFoundError),
-    exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404, "error.code": "model_not_found"}},
-)
-@validate_span_events(
-    exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."}
-)
-@validate_transaction_metrics(
-    name="test_embeddings_error_v1:test_embeddings_invalid_request_error_invalid_model_async_with_token_count_with_raw_response",
-    scoped_metrics=[("Llm/embedding/OpenAI/create", 1)],
-    rollup_metrics=[("Llm/embedding/OpenAI/create", 1)],
-    custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)],
-    background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(invalid_model_events))
-@validate_custom_event_count(count=1)
-@background_task()
-def test_embeddings_invalid_request_error_invalid_model_async_with_token_count_with_raw_response(
-    set_trace_info, async_openai_client, loop
-):
-    set_trace_info()
-    with pytest.raises(openai.NotFoundError):
-        loop.run_until_complete(
-            async_openai_client.embeddings.with_raw_response.create(
-                input="Model does not exist.", model="does-not-exist"
-            )
-        )
-
-
 @dt_enabled
 @reset_core_stats_engine()
 @validate_error_trace_attributes(
diff --git a/tests/mlmodel_openai/test_embeddings_v1.py b/tests/mlmodel_openai/test_embeddings_v1.py
index 9dd10262a5..004f8ba729 100644
--- a/tests/mlmodel_openai/test_embeddings_v1.py
+++ b/tests/mlmodel_openai/test_embeddings_v1.py
@@ -15,7 +15,7 @@
 import openai
 from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes
 from testing_support.ml_testing_utils import (
-    add_token_count_to_events,
+    add_token_count_to_embedding_events,
     disabled_ai_monitoring_record_content_settings,
     disabled_ai_monitoring_settings,
     events_sans_content,
@@ -48,6 +48,7 @@
             "response.headers.ratelimitResetRequests": "6ms",
             "response.headers.ratelimitRemainingTokens": 9999994,
             "response.headers.ratelimitRemainingRequests": 9999,
+            "response.usage.total_tokens": 6,
             "vendor": "openai",
             "ingest_source": "Python",
         },
@@ -111,7 +112,7 @@ def test_openai_embedding_sync_no_content(set_trace_info, sync_openai_client):
 
 @reset_core_stats_engine()
 @override_llm_token_callback_settings(llm_token_count_callback)
-@validate_custom_events(add_token_count_to_events(embedding_recorded_events))
+@validate_custom_events(add_token_count_to_embedding_events(embedding_recorded_events))
 @validate_custom_event_count(count=1)
 @validate_transaction_metrics(
     name="test_embeddings_v1:test_openai_embedding_sync_with_token_count",
@@ -206,7 +207,7 @@ def test_openai_embedding_async_no_content(loop, set_trace_info, async_openai_cl
 
 @reset_core_stats_engine()
 @override_llm_token_callback_settings(llm_token_count_callback)
-@validate_custom_events(add_token_count_to_events(embedding_recorded_events))
+@validate_custom_events(add_token_count_to_embedding_events(embedding_recorded_events))
 @validate_custom_event_count(count=1)
 @validate_transaction_metrics(
     name="test_embeddings_v1:test_openai_embedding_async_with_token_count",
diff --git a/tests/testing_support/ml_testing_utils.py b/tests/testing_support/ml_testing_utils.py
index 4ff70c7ed4..8c2c0444f0 100644
--- a/tests/testing_support/ml_testing_utils.py
+++ b/tests/testing_support/ml_testing_utils.py
@@ -29,6 +29,7 @@ def llm_token_count_callback(model, content):
     return 105
 
 
+# This will be removed once all LLM instrumentations have been converted to use new token count design
 def add_token_count_to_events(expected_events):
     events = copy.deepcopy(expected_events)
     for event in events:
@@ -37,6 +38,32 @@ def add_token_count_to_events(expected_events):
     return events
 
 
+def add_token_count_to_embedding_events(expected_events):
+    events = copy.deepcopy(expected_events)
+    for event in events:
+        if event[0]["type"] == "LlmEmbedding":
+            event[1]["response.usage.total_tokens"] = 105
+    return events
+
+
+def add_token_count_streaming_events(expected_events):
+    events = copy.deepcopy(expected_events)
+    for event in events:
+        if event[0]["type"] == "LlmChatCompletionMessage":
+            event[1]["token_count"] = 0
+    return events
+
+
+def add_token_counts_to_chat_events(expected_events):
+    events = copy.deepcopy(expected_events)
+    for event in events:
+        if event[0]["type"] == "LlmChatCompletionSummary":
+            event[1]["response.usage.prompt_tokens"] = 105
+            event[1]["response.usage.completion_tokens"] = 105
+            event[1]["response.usage.total_tokens"] = 210
+    return events
+
+
 def events_sans_content(event):
     new_event = copy.deepcopy(event)
     for _event in new_event: