newrelic · TimPansino · Jun 1, 2026 · Jun 1, 2026 · Jun 2, 2026 · Jun 3, 2026
@@ -133,11 +133,11 @@ def create_chat_completion_message_event(
     span_id,
     trace_id,
     response_model,
-    request_model,
     response_id,
     request_id,
     llm_metadata,
     output_message_list,
+    all_token_counts,
     request_timestamp=None,
 ):
     settings = transaction.settings if transaction.settings is not None else global_settings()
@@ -158,20 +158,20 @@ def create_chat_completion_message_event(
             "request_id": request_id,
             "span_id": span_id,
             "trace_id": trace_id,
-            "token_count": (
-                settings.ai_monitoring.llm_token_count_callback(request_model, message_content)
-                if settings.ai_monitoring.llm_token_count_callback
-                else None
-            ),
             "role": message.get("role"),
             "completion_id": chat_completion_id,
             "sequence": index,
             "response.model": response_model,
             "vendor": "openai",
             "ingest_source": "Python",
         }
+
         if settings.ai_monitoring.record_content.enabled and message_content:
             chat_completion_input_message_dict["content"] = message_content
+
+        if all_token_counts:
+            chat_completion_input_message_dict["token_count"] = 0
+
         if request_timestamp:
             chat_completion_input_message_dict["timestamp"] = request_timestamp
 
@@ -199,11 +199,6 @@ def create_chat_completion_message_event(
                 "request_id": request_id,
                 "span_id": span_id,
                 "trace_id": trace_id,
-                "token_count": (
-                    settings.ai_monitoring.llm_token_count_callback(response_model, message_content)
-                    if settings.ai_monitoring.llm_token_count_callback
-                    else None
-                ),
                 "role": message.get("role"),
                 "completion_id": chat_completion_id,
                 "sequence": index,
@@ -216,6 +211,9 @@ def create_chat_completion_message_event(
             if settings.ai_monitoring.record_content.enabled and message_content:
                 chat_completion_output_message_dict["content"] = message_content
 
+            if all_token_counts:
+                chat_completion_output_message_dict["token_count"] = 0
+
             chat_completion_output_message_dict.update(llm_metadata)
 
             transaction.record_custom_event("LlmChatCompletionMessage", chat_completion_output_message_dict)
@@ -286,15 +284,18 @@ def _record_embedding_success(transaction, embedding_id, linking_metadata, kwarg
             else getattr(attribute_response, "organization", None)
         )
 
+        response_total_tokens = attribute_response.get("usage", {}).get("total_tokens") if response else None
+
+        total_tokens = (
+            settings.ai_monitoring.llm_token_count_callback(response_model, input_)
+            if settings.ai_monitoring.llm_token_count_callback and input_
+            else response_total_tokens
+        )
+
         full_embedding_response_dict = {
             "id": embedding_id,
             "span_id": span_id,
             "trace_id": trace_id,
-            "token_count": (
-                settings.ai_monitoring.llm_token_count_callback(response_model, input_)
-                if settings.ai_monitoring.llm_token_count_callback
-                else None
-            ),
             "request.model": kwargs.get("model") or kwargs.get("engine"),
             "request_id": request_id,
             "duration": ft.duration * 1000,
@@ -319,6 +320,7 @@ def _record_embedding_success(transaction, embedding_id, linking_metadata, kwarg
             "response.headers.ratelimitRemainingRequests": check_rate_limit_header(
                 response_headers, "x-ratelimit-remaining-requests", True
             ),
+            "response.usage.total_tokens": total_tokens,
             "vendor": "openai",
             "ingest_source": "Python",
         }
@@ -489,13 +491,15 @@ def _handle_completion_success(
 def _record_completion_success(
     transaction, linking_metadata, completion_id, kwargs, ft, response_headers, response, request_timestamp=None
 ):
+    settings = transaction.settings if transaction.settings is not None else global_settings()
     span_id = linking_metadata.get("span.id")
     trace_id = linking_metadata.get("trace.id")
 
     try:
         if response:
             response_model = response.get("model")
             response_id = response.get("id")
+            token_usage = response.get("usage") or {}
             output_message_list = []
             finish_reason = None
             choices = response.get("choices") or []
@@ -509,6 +513,7 @@ def _record_completion_success(
         else:
             response_model = kwargs.get("response.model")
             response_id = kwargs.get("id")
+            token_usage = kwargs.get("response.usage") or {}
             finish_reason = kwargs.get("finish_reason")
             content = kwargs.get("content")
             # Tool-call responses may carry an empty content string; in that case the
@@ -519,12 +524,39 @@ def _record_completion_success(
                 output_message_list = [{"content": content, "role": kwargs.get("role")}]
             else:
                 output_message_list = []
+
         request_model = kwargs.get("model") or kwargs.get("engine")
 
-        request_id = response_headers.get("x-request-id")
-        organization = response_headers.get("openai-organization") or getattr(response, "organization", None)
         messages = kwargs.get("messages") or [{"content": kwargs.get("prompt"), "role": "user"}]
         input_message_list = list(messages)
+
+        # Token counts default to those reported in the response object if available,
+        # but the user registered callback below may override them.
+        response_prompt_tokens = token_usage.get("prompt_tokens")
+        response_completion_tokens = token_usage.get("completion_tokens")
+        response_total_tokens = token_usage.get("total_tokens")
+
+        # If the user has registered a callback to compute token counts it should always be preferred.
+        token_count_callback = settings.ai_monitoring.llm_token_count_callback
+        if token_count_callback:
+            input_message_content = " ".join(content for msg in input_message_list if (content := msg.get("content")))
+            if input_message_content:
+                response_prompt_tokens = token_count_callback(request_model, input_message_content)
+            output_message_content = " ".join(content for msg in output_message_list if (content := msg.get("content")))
+            if output_message_content:
+                response_completion_tokens = token_count_callback(response_model, output_message_content)
+
+        # Prefer the sum of individual counts as the total whenever both are available.
+        # This ensures consistency in the event that the token counting callback has reported
+        # different values for prompt or completion tokens.
+        if response_prompt_tokens and response_completion_tokens:
+            response_total_tokens = response_prompt_tokens + response_completion_tokens
+
+        all_token_counts = bool(response_prompt_tokens and response_completion_tokens and response_total_tokens)
+
+        request_id = response_headers.get("x-request-id")
+        organization = response_headers.get("openai-organization") or getattr(response, "organization", None)
+
         full_chat_completion_summary_dict = {
             "id": completion_id,
             "span_id": span_id,
@@ -571,6 +603,12 @@ def _record_completion_success(
             "response.number_of_messages": len(input_message_list) + len(output_message_list),
             "timestamp": request_timestamp,
         }
+
+        if all_token_counts:
+            full_chat_completion_summary_dict["response.usage.prompt_tokens"] = response_prompt_tokens
+            full_chat_completion_summary_dict["response.usage.completion_tokens"] = response_completion_tokens
+            full_chat_completion_summary_dict["response.usage.total_tokens"] = response_total_tokens
+
         llm_metadata = _get_llm_attributes(transaction)
 
         if "time_to_first_token" in kwargs:
@@ -586,11 +624,11 @@ def _record_completion_success(
             span_id,
             trace_id,
             response_model,
-            request_model,
             response_id,
             request_id,
             llm_metadata,
             output_message_list,
+            all_token_counts,
             request_timestamp,
         )
     except Exception:
@@ -602,6 +640,7 @@ def _record_completion_error(transaction, linking_metadata, completion_id, kwarg
     trace_id = linking_metadata.get("trace.id")
     request_message_list = kwargs.get("messages", None) or []
     notice_error_attributes = {}
+
     try:
         if OPENAI_V1:
             response = getattr(exc, "response", None)
@@ -667,18 +706,20 @@ def _record_completion_error(transaction, linking_metadata, completion_id, kwarg
         output_message_list = []
         if "content" in kwargs:
             output_message_list = [{"content": kwargs.get("content"), "role": kwargs.get("role")}]
+
         create_chat_completion_message_event(
             transaction,
             request_message_list,
             completion_id,
             span_id,
             trace_id,
             kwargs.get("response.model"),
-            request_model,
             response_id,
             request_id,
             llm_metadata,
             output_message_list,
+            # We do not record token counts in error cases, so set all_token_counts to True so the pipeline tokenizer does not run
+            True,
             request_timestamp,
         )
     except Exception:
@@ -787,6 +828,7 @@ def _record_stream_chunk(self, return_val):
             self._nr_openai_attrs["response.model"] = return_val.get("model")
             self._nr_openai_attrs["id"] = return_val.get("id")
             self._nr_openai_attrs["response.organization"] = return_val.get("organization")
+            self._nr_openai_attrs["response.usage"] = return_val.get("usage")
             if choices:
                 delta = choices[0].get("delta") or {}
                 if delta:

@@ -33,7 +33,7 @@
     "system: You are a world class algorithm for extracting information in structured formats. | user: Use the given format to extract information from the following input: Hello, world | user: Tip: Make sure to answer in the correct format": [
         {
             "content-type": "text/event-stream; charset=utf-8",
-            "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
+            "openai-organization": "nr-test-org",
             "openai-processing-ms": "440",
             "openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
             "openai-version": "2020-10-01",
@@ -125,7 +125,7 @@
     'user: Use a tool to add an exclamation to the word "Hello"': [
         {
             "content-type": "text/event-stream; charset=utf-8",
-            "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
+            "openai-organization": "nr-test-org",
             "openai-processing-ms": "134",
             "openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
             "openai-version": "2020-10-01",
@@ -334,7 +334,7 @@
     'system: You are a text manipulation algorithm. | user: Use a tool to add an exclamation to the word "Hello"': [
         {
             "content-type": "application/json",
-            "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
+            "openai-organization": "nr-test-org",
             "openai-processing-ms": "324",
             "openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
             "openai-version": "2020-10-01",
@@ -391,7 +391,7 @@
     'system: You are a text manipulation algorithm. | user: Use a tool to add an exclamation to the word "Hello" | assistant: None | tool: Hello!': [
         {
             "content-type": "application/json",
-            "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
+            "openai-organization": "nr-test-org",
             "openai-processing-ms": "751",
             "openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
             "openai-version": "2020-10-01",
@@ -441,7 +441,7 @@
     'system: You are a text manipulation algorithm. | user: Use a tool to add an exclamation to the word "exc"': [
         {
             "content-type": "application/json",
-            "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
+            "openai-organization": "nr-test-org",
             "openai-processing-ms": "767",
             "openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
             "openai-version": "2020-10-01",
@@ -498,7 +498,7 @@
     "system: You are a helpful assistant who generates a random first name. A user will pass in a first letter, and you should generate a name that starts with that first letter. | user: M": [
         {
             "content-type": "application/json",
-            "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
+            "openai-organization": "nr-test-org",
             "openai-processing-ms": "236",
             "openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
             "openai-version": "2020-10-01",
@@ -543,7 +543,7 @@
     "system: You are a helpful assistant who generates comma separated lists.\n    A user will pass in a category, and you should generate 5 objects in that category in a comma separated list.\n    ONLY return a comma separated list, and nothing more. | user: colors": [
         {
             "content-type": "application/json",
-            "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
+            "openai-organization": "nr-test-org",
             "openai-processing-ms": "289",
             "openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
             "openai-version": "2020-10-01",
@@ -593,7 +593,7 @@
     "system: You are a world class algorithm for extracting information in structured formats. | user: Use the given format to extract information from the following input: Sally is 13 | user: Tip: Make sure to answer in the correct format": [
         {
             "content-type": "application/json",
-            "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
+            "openai-organization": "nr-test-org",
             "openai-processing-ms": "201",
             "openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
             "openai-version": "2020-10-01",
@@ -656,7 +656,7 @@
     "system: You are a generator of quiz questions for a seminar. Use the following pieces of retrieved context to generate 5 multiple choice questions (A,B,C,D) on the subject matter. Use a three sentence maximum and keep the answer concise. Render the output as HTML\n\nWhat is 2 + 4? | user: math": [
         {
             "content-type": "application/json",
-            "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
+            "openai-organization": "nr-test-org",
             "openai-processing-ms": "2029",
             "openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
             "openai-version": "2020-10-01",
@@ -708,7 +708,7 @@
         {
             "content-type": "application/json",
             "openai-model": "text-embedding-ada-002-v2",
-            "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
+            "openai-organization": "nr-test-org",
             "openai-processing-ms": "42",
             "openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
             "openai-version": "2020-10-01",
@@ -738,7 +738,7 @@
         {
             "content-type": "application/json",
             "openai-model": "text-embedding-ada-002-v2",
-            "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
+            "openai-organization": "nr-test-org",
             "openai-processing-ms": "82",
             "openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
             "openai-version": "2020-10-01",
@@ -768,7 +768,7 @@
         {
             "content-type": "application/json",
             "openai-model": "text-embedding-ada-002-v2",
-            "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
+            "openai-organization": "nr-test-org",
             "openai-processing-ms": "158",
             "openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
             "openai-version": "2020-10-01",
@@ -798,7 +798,7 @@
         {
             "content-type": "application/json",
             "openai-model": "text-embedding-ada-002-v2",
-            "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
+            "openai-organization": "nr-test-org",
             "openai-processing-ms": "116",
             "openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
             "openai-version": "2020-10-01",
@@ -827,7 +827,7 @@
     'user: Use a tool to add an exclamation to the word "Hello"': [
         {
             "content-type": "application/json",
-            "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
+            "openai-organization": "nr-test-org",
             "openai-processing-ms": "238",
             "openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
             "openai-version": "2020-10-01",