diff --git a/newrelic/hooks/mlmodel_openai.py b/newrelic/hooks/mlmodel_openai.py
index 564069ef75..0c626683e5 100644
--- a/newrelic/hooks/mlmodel_openai.py
+++ b/newrelic/hooks/mlmodel_openai.py
@@ -133,11 +133,11 @@ def create_chat_completion_message_event(
span_id,
trace_id,
response_model,
- request_model,
response_id,
request_id,
llm_metadata,
output_message_list,
+ all_token_counts,
request_timestamp=None,
):
settings = transaction.settings if transaction.settings is not None else global_settings()
@@ -158,11 +158,6 @@ def create_chat_completion_message_event(
"request_id": request_id,
"span_id": span_id,
"trace_id": trace_id,
- "token_count": (
- settings.ai_monitoring.llm_token_count_callback(request_model, message_content)
- if settings.ai_monitoring.llm_token_count_callback
- else None
- ),
"role": message.get("role"),
"completion_id": chat_completion_id,
"sequence": index,
@@ -170,8 +165,13 @@ def create_chat_completion_message_event(
"vendor": "openai",
"ingest_source": "Python",
}
+
if settings.ai_monitoring.record_content.enabled and message_content:
chat_completion_input_message_dict["content"] = message_content
+
+ if all_token_counts:
+ chat_completion_input_message_dict["token_count"] = 0
+
if request_timestamp:
chat_completion_input_message_dict["timestamp"] = request_timestamp
@@ -199,11 +199,6 @@ def create_chat_completion_message_event(
"request_id": request_id,
"span_id": span_id,
"trace_id": trace_id,
- "token_count": (
- settings.ai_monitoring.llm_token_count_callback(response_model, message_content)
- if settings.ai_monitoring.llm_token_count_callback
- else None
- ),
"role": message.get("role"),
"completion_id": chat_completion_id,
"sequence": index,
@@ -216,6 +211,9 @@ def create_chat_completion_message_event(
if settings.ai_monitoring.record_content.enabled and message_content:
chat_completion_output_message_dict["content"] = message_content
+ if all_token_counts:
+ chat_completion_output_message_dict["token_count"] = 0
+
chat_completion_output_message_dict.update(llm_metadata)
transaction.record_custom_event("LlmChatCompletionMessage", chat_completion_output_message_dict)
@@ -286,15 +284,18 @@ def _record_embedding_success(transaction, embedding_id, linking_metadata, kwarg
else getattr(attribute_response, "organization", None)
)
+ response_total_tokens = attribute_response.get("usage", {}).get("total_tokens") if response else None
+
+ total_tokens = (
+ settings.ai_monitoring.llm_token_count_callback(response_model, input_)
+ if settings.ai_monitoring.llm_token_count_callback and input_
+ else response_total_tokens
+ )
+
full_embedding_response_dict = {
"id": embedding_id,
"span_id": span_id,
"trace_id": trace_id,
- "token_count": (
- settings.ai_monitoring.llm_token_count_callback(response_model, input_)
- if settings.ai_monitoring.llm_token_count_callback
- else None
- ),
"request.model": kwargs.get("model") or kwargs.get("engine"),
"request_id": request_id,
"duration": ft.duration * 1000,
@@ -319,6 +320,7 @@ def _record_embedding_success(transaction, embedding_id, linking_metadata, kwarg
"response.headers.ratelimitRemainingRequests": check_rate_limit_header(
response_headers, "x-ratelimit-remaining-requests", True
),
+ "response.usage.total_tokens": total_tokens,
"vendor": "openai",
"ingest_source": "Python",
}
@@ -489,6 +491,7 @@ def _handle_completion_success(
def _record_completion_success(
transaction, linking_metadata, completion_id, kwargs, ft, response_headers, response, request_timestamp=None
):
+ settings = transaction.settings if transaction.settings is not None else global_settings()
span_id = linking_metadata.get("span.id")
trace_id = linking_metadata.get("trace.id")
@@ -496,6 +499,7 @@ def _record_completion_success(
if response:
response_model = response.get("model")
response_id = response.get("id")
+ token_usage = response.get("usage") or {}
output_message_list = []
finish_reason = None
choices = response.get("choices") or []
@@ -509,6 +513,7 @@ def _record_completion_success(
else:
response_model = kwargs.get("response.model")
response_id = kwargs.get("id")
+ token_usage = kwargs.get("response.usage") or {}
finish_reason = kwargs.get("finish_reason")
content = kwargs.get("content")
# Tool-call responses may carry an empty content string; in that case the
@@ -519,12 +524,39 @@ def _record_completion_success(
output_message_list = [{"content": content, "role": kwargs.get("role")}]
else:
output_message_list = []
+
request_model = kwargs.get("model") or kwargs.get("engine")
- request_id = response_headers.get("x-request-id")
- organization = response_headers.get("openai-organization") or getattr(response, "organization", None)
messages = kwargs.get("messages") or [{"content": kwargs.get("prompt"), "role": "user"}]
input_message_list = list(messages)
+
+ # Token counts default to those reported in the response object if available,
+ # but the user registered callback below may override them.
+ response_prompt_tokens = token_usage.get("prompt_tokens")
+ response_completion_tokens = token_usage.get("completion_tokens")
+ response_total_tokens = token_usage.get("total_tokens")
+
+ # If the user has registered a callback to compute token counts it should always be preferred.
+ token_count_callback = settings.ai_monitoring.llm_token_count_callback
+ if token_count_callback:
+ input_message_content = " ".join(content for msg in input_message_list if (content := msg.get("content")))
+ if input_message_content:
+ response_prompt_tokens = token_count_callback(request_model, input_message_content)
+ output_message_content = " ".join(content for msg in output_message_list if (content := msg.get("content")))
+ if output_message_content:
+ response_completion_tokens = token_count_callback(response_model, output_message_content)
+
+ # Prefer the sum of individual counts as the total whenever both are available.
+ # This ensures consistency in the event that the token counting callback has reported
+ # different values for prompt or completion tokens.
+ if response_prompt_tokens and response_completion_tokens:
+ response_total_tokens = response_prompt_tokens + response_completion_tokens
+
+ all_token_counts = bool(response_prompt_tokens and response_completion_tokens and response_total_tokens)
+
+ request_id = response_headers.get("x-request-id")
+ organization = response_headers.get("openai-organization") or getattr(response, "organization", None)
+
full_chat_completion_summary_dict = {
"id": completion_id,
"span_id": span_id,
@@ -571,6 +603,12 @@ def _record_completion_success(
"response.number_of_messages": len(input_message_list) + len(output_message_list),
"timestamp": request_timestamp,
}
+
+ if all_token_counts:
+ full_chat_completion_summary_dict["response.usage.prompt_tokens"] = response_prompt_tokens
+ full_chat_completion_summary_dict["response.usage.completion_tokens"] = response_completion_tokens
+ full_chat_completion_summary_dict["response.usage.total_tokens"] = response_total_tokens
+
llm_metadata = _get_llm_attributes(transaction)
if "time_to_first_token" in kwargs:
@@ -586,11 +624,11 @@ def _record_completion_success(
span_id,
trace_id,
response_model,
- request_model,
response_id,
request_id,
llm_metadata,
output_message_list,
+ all_token_counts,
request_timestamp,
)
except Exception:
@@ -602,6 +640,7 @@ def _record_completion_error(transaction, linking_metadata, completion_id, kwarg
trace_id = linking_metadata.get("trace.id")
request_message_list = kwargs.get("messages", None) or []
notice_error_attributes = {}
+
try:
if OPENAI_V1:
response = getattr(exc, "response", None)
@@ -667,6 +706,7 @@ def _record_completion_error(transaction, linking_metadata, completion_id, kwarg
output_message_list = []
if "content" in kwargs:
output_message_list = [{"content": kwargs.get("content"), "role": kwargs.get("role")}]
+
create_chat_completion_message_event(
transaction,
request_message_list,
@@ -674,11 +714,12 @@ def _record_completion_error(transaction, linking_metadata, completion_id, kwarg
span_id,
trace_id,
kwargs.get("response.model"),
- request_model,
response_id,
request_id,
llm_metadata,
output_message_list,
+ # We do not record token counts in error cases, so set all_token_counts to True so the pipeline tokenizer does not run
+ True,
request_timestamp,
)
except Exception:
@@ -787,6 +828,7 @@ def _record_stream_chunk(self, return_val):
self._nr_openai_attrs["response.model"] = return_val.get("model")
self._nr_openai_attrs["id"] = return_val.get("id")
self._nr_openai_attrs["response.organization"] = return_val.get("organization")
+ self._nr_openai_attrs["response.usage"] = return_val.get("usage")
if choices:
delta = choices[0].get("delta") or {}
if delta:
diff --git a/tests/mlmodel_langchain/_mock_external_openai_server.py b/tests/mlmodel_langchain/_mock_external_openai_server.py
index 74740ba520..fd8cc1231b 100644
--- a/tests/mlmodel_langchain/_mock_external_openai_server.py
+++ b/tests/mlmodel_langchain/_mock_external_openai_server.py
@@ -33,7 +33,7 @@
"system: You are a world class algorithm for extracting information in structured formats. | user: Use the given format to extract information from the following input: Hello, world | user: Tip: Make sure to answer in the correct format": [
{
"content-type": "text/event-stream; charset=utf-8",
- "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
+ "openai-organization": "nr-test-org",
"openai-processing-ms": "440",
"openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
"openai-version": "2020-10-01",
@@ -125,7 +125,7 @@
'user: Use a tool to add an exclamation to the word "Hello"': [
{
"content-type": "text/event-stream; charset=utf-8",
- "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
+ "openai-organization": "nr-test-org",
"openai-processing-ms": "134",
"openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
"openai-version": "2020-10-01",
@@ -334,7 +334,7 @@
'system: You are a text manipulation algorithm. | user: Use a tool to add an exclamation to the word "Hello"': [
{
"content-type": "application/json",
- "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
+ "openai-organization": "nr-test-org",
"openai-processing-ms": "324",
"openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
"openai-version": "2020-10-01",
@@ -391,7 +391,7 @@
'system: You are a text manipulation algorithm. | user: Use a tool to add an exclamation to the word "Hello" | assistant: None | tool: Hello!': [
{
"content-type": "application/json",
- "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
+ "openai-organization": "nr-test-org",
"openai-processing-ms": "751",
"openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
"openai-version": "2020-10-01",
@@ -441,7 +441,7 @@
'system: You are a text manipulation algorithm. | user: Use a tool to add an exclamation to the word "exc"': [
{
"content-type": "application/json",
- "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
+ "openai-organization": "nr-test-org",
"openai-processing-ms": "767",
"openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
"openai-version": "2020-10-01",
@@ -498,7 +498,7 @@
"system: You are a helpful assistant who generates a random first name. A user will pass in a first letter, and you should generate a name that starts with that first letter. | user: M": [
{
"content-type": "application/json",
- "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
+ "openai-organization": "nr-test-org",
"openai-processing-ms": "236",
"openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
"openai-version": "2020-10-01",
@@ -543,7 +543,7 @@
"system: You are a helpful assistant who generates comma separated lists.\n A user will pass in a category, and you should generate 5 objects in that category in a comma separated list.\n ONLY return a comma separated list, and nothing more. | user: colors": [
{
"content-type": "application/json",
- "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
+ "openai-organization": "nr-test-org",
"openai-processing-ms": "289",
"openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
"openai-version": "2020-10-01",
@@ -593,7 +593,7 @@
"system: You are a world class algorithm for extracting information in structured formats. | user: Use the given format to extract information from the following input: Sally is 13 | user: Tip: Make sure to answer in the correct format": [
{
"content-type": "application/json",
- "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
+ "openai-organization": "nr-test-org",
"openai-processing-ms": "201",
"openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
"openai-version": "2020-10-01",
@@ -656,7 +656,7 @@
"system: You are a generator of quiz questions for a seminar. Use the following pieces of retrieved context to generate 5 multiple choice questions (A,B,C,D) on the subject matter. Use a three sentence maximum and keep the answer concise. Render the output as HTML\n\nWhat is 2 + 4? | user: math": [
{
"content-type": "application/json",
- "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
+ "openai-organization": "nr-test-org",
"openai-processing-ms": "2029",
"openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
"openai-version": "2020-10-01",
@@ -708,7 +708,7 @@
{
"content-type": "application/json",
"openai-model": "text-embedding-ada-002-v2",
- "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
+ "openai-organization": "nr-test-org",
"openai-processing-ms": "42",
"openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
"openai-version": "2020-10-01",
@@ -738,7 +738,7 @@
{
"content-type": "application/json",
"openai-model": "text-embedding-ada-002-v2",
- "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
+ "openai-organization": "nr-test-org",
"openai-processing-ms": "82",
"openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
"openai-version": "2020-10-01",
@@ -768,7 +768,7 @@
{
"content-type": "application/json",
"openai-model": "text-embedding-ada-002-v2",
- "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
+ "openai-organization": "nr-test-org",
"openai-processing-ms": "158",
"openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
"openai-version": "2020-10-01",
@@ -798,7 +798,7 @@
{
"content-type": "application/json",
"openai-model": "text-embedding-ada-002-v2",
- "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
+ "openai-organization": "nr-test-org",
"openai-processing-ms": "116",
"openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
"openai-version": "2020-10-01",
@@ -827,7 +827,7 @@
'user: Use a tool to add an exclamation to the word "Hello"': [
{
"content-type": "application/json",
- "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
+ "openai-organization": "nr-test-org",
"openai-processing-ms": "238",
"openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
"openai-version": "2020-10-01",
diff --git a/tests/mlmodel_langchain/test_chain.py b/tests/mlmodel_langchain/test_chain.py
index 89757675d7..156a2c8d4b 100644
--- a/tests/mlmodel_langchain/test_chain.py
+++ b/tests/mlmodel_langchain/test_chain.py
@@ -377,7 +377,7 @@
"request_id": None,
"duration": None,
"response.model": "text-embedding-ada-002-v2",
- "response.organization": "user-rk8wq9voijy9sejrncvgi0iw",
+ "response.organization": "nr-test-org",
"response.headers.llmVersion": "2020-10-01",
"response.headers.ratelimitLimitRequests": 10000,
"response.headers.ratelimitLimitTokens": 10000000,
@@ -385,6 +385,7 @@
"response.headers.ratelimitRemainingTokens": 9999992,
"response.headers.ratelimitResetRequests": "6ms",
"response.headers.ratelimitResetTokens": "0s",
+ "response.usage.total_tokens": 8,
"vendor": "openai",
"ingest_source": "Python",
"input": "[[3923, 374, 220, 17, 489, 220, 19, 30]]",
@@ -400,7 +401,7 @@
"request_id": None,
"duration": None,
"response.model": "text-embedding-ada-002-v2",
- "response.organization": "user-rk8wq9voijy9sejrncvgi0iw",
+ "response.organization": "nr-test-org",
"response.headers.llmVersion": "2020-10-01",
"response.headers.ratelimitLimitRequests": 10000,
"response.headers.ratelimitLimitTokens": 10000000,
@@ -408,6 +409,7 @@
"response.headers.ratelimitRemainingTokens": 9999998,
"response.headers.ratelimitResetRequests": "6ms",
"response.headers.ratelimitResetTokens": "0s",
+ "response.usage.total_tokens": 1,
"vendor": "openai",
"ingest_source": "Python",
"input": "[[10590]]",
@@ -471,7 +473,7 @@
"request_id": None,
"duration": None,
"response.model": "gpt-3.5-turbo-0125",
- "response.organization": "user-rk8wq9voijy9sejrncvgi0iw",
+ "response.organization": "nr-test-org",
"response.choices.finish_reason": "stop",
"response.headers.llmVersion": "2020-10-01",
"response.headers.ratelimitLimitRequests": 10000,
@@ -480,6 +482,9 @@
"response.headers.ratelimitRemainingTokens": 49999927,
"response.headers.ratelimitResetRequests": "6ms",
"response.headers.ratelimitResetTokens": "0s",
+ "response.usage.prompt_tokens": 73,
+ "response.usage.completion_tokens": 337,
+ "response.usage.total_tokens": 410,
"response.number_of_messages": 3,
},
],
@@ -496,6 +501,7 @@
"sequence": 0,
"response.model": "gpt-3.5-turbo-0125",
"vendor": "openai",
+ "token_count": 0,
"ingest_source": "Python",
"content": "You are a generator of quiz questions for a seminar. Use the following pieces of retrieved context to generate 5 multiple choice questions (A,B,C,D) on the subject matter. Use a three sentence maximum and keep the answer concise. Render the output as HTML\n\nWhat is 2 + 4?",
},
@@ -513,6 +519,7 @@
"sequence": 1,
"response.model": "gpt-3.5-turbo-0125",
"vendor": "openai",
+ "token_count": 0,
"ingest_source": "Python",
"content": "math",
},
@@ -529,6 +536,7 @@
"sequence": 2,
"response.model": "gpt-3.5-turbo-0125",
"vendor": "openai",
+ "token_count": 0,
"ingest_source": "Python",
"is_response": True,
"content": "```html\n\n\n
\n Math Quiz\n\n\n Math Quiz Questions
\n \n - What is the result of 5 + 3?
\n \n - A) 7
\n - B) 8
\n - C) 9
\n - D) 10
\n
\n - What is the product of 6 x 7?
\n \n - A) 36
\n - B) 42
\n - C) 48
\n - D) 56
\n
\n - What is the square root of 64?
\n \n - A) 6
\n - B) 7
\n - C) 8
\n - D) 9
\n
\n - What is the result of 12 / 4?
\n \n - A) 2
\n - B) 3
\n - C) 4
\n - D) 5
\n
\n - What is the sum of 15 + 9?
\n \n - A) 22
\n - B) 23
\n - C) 24
\n - D) 25
\n
\n
\n\n\n```",
diff --git a/tests/mlmodel_langchain/test_state_graph.py b/tests/mlmodel_langchain/test_state_graph.py
index a47ad5f3d6..799124bb0a 100644
--- a/tests/mlmodel_langchain/test_state_graph.py
+++ b/tests/mlmodel_langchain/test_state_graph.py
@@ -43,7 +43,10 @@
"response.headers.ratelimitResetTokens": "0s",
"response.model": "gpt-3.5-turbo-0125",
"response.number_of_messages": 2,
- "response.organization": "user-rk8wq9voijy9sejrncvgi0iw",
+ "response.organization": "nr-test-org",
+ "response.usage.completion_tokens": 2,
+ "response.usage.prompt_tokens": 21,
+ "response.usage.total_tokens": 23,
"span_id": None,
"timestamp": None,
"trace_id": None,
@@ -63,6 +66,7 @@
"sequence": 0,
"span_id": None,
"timestamp": None,
+ "token_count": 0,
"trace_id": None,
"vendor": "openai",
},
@@ -80,6 +84,7 @@
"role": "assistant",
"sequence": 1,
"span_id": None,
+ "token_count": 0,
"trace_id": None,
"vendor": "openai",
},
@@ -106,7 +111,12 @@
"response.headers.ratelimitResetTokens": "0s",
"response.model": "gpt-3.5-turbo-0125",
"response.number_of_messages": 2,
- "response.organization": "user-rk8wq9voijy9sejrncvgi0iw",
+ "response.organization": "nr-test-org",
+ # langchain's ChatOpenAI.stream() passes stream_options={"include_usage": True}
+ # by default, so the final usage chunk is captured and these are populated.
+ "response.usage.completion_tokens": 2,
+ "response.usage.prompt_tokens": 21,
+ "response.usage.total_tokens": 23,
"span_id": None,
"time_to_first_token": None,
"timestamp": None,
@@ -127,6 +137,7 @@
"sequence": 0,
"span_id": None,
"timestamp": None,
+ "token_count": 0,
"trace_id": None,
"vendor": "openai",
},
@@ -144,6 +155,7 @@
"role": "assistant",
"sequence": 1,
"span_id": None,
+ "token_count": 0,
"trace_id": None,
"vendor": "openai",
},
diff --git a/tests/mlmodel_openai/_mock_external_openai_server.py b/tests/mlmodel_openai/_mock_external_openai_server.py
index e218b4939a..73de8e202c 100644
--- a/tests/mlmodel_openai/_mock_external_openai_server.py
+++ b/tests/mlmodel_openai/_mock_external_openai_server.py
@@ -30,7 +30,7 @@
# created by an external call.
# 3) This app runs on a separate thread meaning it won't block the test app.
-STREAMED_RESPONSES = {
+STREAMED_RESPONSES_V0 = {
"Stream parsing error.": [
{
"Content-Type": "text/event-stream",
@@ -506,7 +506,156 @@
],
],
}
-RESPONSES = {
+
+# Streaming responses returned when the request includes `stream_options={"include_usage": True}`.
+# OpenAI emits one extra trailing chunk with `choices: []` and a populated `usage` block before
+# sending [DONE].
+STREAMED_RESPONSES_V1_WITH_USAGE = {
+ "You are a scientist.": [
+ {
+ "content-type": "text/event-stream; charset=utf-8",
+ "openai-organization": "nr-test-org",
+ "openai-processing-ms": "334",
+ "openai-project": "proj_id",
+ "openai-version": "2020-10-01",
+ "x-ratelimit-limit-requests": "15000",
+ "x-ratelimit-limit-tokens": "40000",
+ "x-ratelimit-remaining-requests": "14999",
+ "x-ratelimit-remaining-tokens": "39999978",
+ "x-ratelimit-reset-requests": "4ms",
+ "x-ratelimit-reset-tokens": "0s",
+ "x-request-id": "req_f821c73df45f4e30821a81a2d751fe64",
+ },
+ 200,
+ [
+ {
+ "id": "chatcmpl-CocmvmDih6DGKIgPUbrzKFxGnMyco",
+ "object": "chat.completion.chunk",
+ "created": 1779406074,
+ "model": "gpt-5.1-2025-11-13",
+ "service_tier": "default",
+ "system_fingerprint": None,
+ "choices": [
+ {"index": 0, "delta": {"role": "assistant", "content": "", "refusal": None}, "finish_reason": None}
+ ],
+ "usage": None,
+ "obfuscation": "n8bcfdX5",
+ },
+ {
+ "id": "chatcmpl-CocmvmDih6DGKIgPUbrzKFxGnMyco",
+ "object": "chat.completion.chunk",
+ "created": 1779406074,
+ "model": "gpt-5.1-2025-11-13",
+ "service_tier": "default",
+ "system_fingerprint": None,
+ "choices": [{"index": 0, "delta": {"content": "212"}, "finish_reason": None}],
+ "usage": None,
+ "obfuscation": "t2ASUcp",
+ },
+ {
+ "id": "chatcmpl-CocmvmDih6DGKIgPUbrzKFxGnMyco",
+ "object": "chat.completion.chunk",
+ "created": 1779406074,
+ "model": "gpt-5.1-2025-11-13",
+ "service_tier": "default",
+ "system_fingerprint": None,
+ "choices": [{"index": 0, "delta": {"content": "\u00b0F"}, "finish_reason": None}],
+ "usage": None,
+ "obfuscation": "QehtIgXV",
+ },
+ {
+ "id": "chatcmpl-CocmvmDih6DGKIgPUbrzKFxGnMyco",
+ "object": "chat.completion.chunk",
+ "created": 1779406074,
+ "model": "gpt-5.1-2025-11-13",
+ "service_tier": "default",
+ "system_fingerprint": None,
+ "choices": [{"index": 0, "delta": {"content": " is"}, "finish_reason": None}],
+ "usage": None,
+ "obfuscation": "lEH0ats",
+ },
+ {
+ "id": "chatcmpl-CocmvmDih6DGKIgPUbrzKFxGnMyco",
+ "object": "chat.completion.chunk",
+ "created": 1779406074,
+ "model": "gpt-5.1-2025-11-13",
+ "service_tier": "default",
+ "system_fingerprint": None,
+ "choices": [{"index": 0, "delta": {"content": " "}, "finish_reason": None}],
+ "usage": None,
+ "obfuscation": "SzPHsL8tM",
+ },
+ {
+ "id": "chatcmpl-CocmvmDih6DGKIgPUbrzKFxGnMyco",
+ "object": "chat.completion.chunk",
+ "created": 1779406074,
+ "model": "gpt-5.1-2025-11-13",
+ "service_tier": "default",
+ "system_fingerprint": None,
+ "choices": [{"index": 0, "delta": {"content": "100"}, "finish_reason": None}],
+ "usage": None,
+ "obfuscation": "Z0vwQZq",
+ },
+ {
+ "id": "chatcmpl-CocmvmDih6DGKIgPUbrzKFxGnMyco",
+ "object": "chat.completion.chunk",
+ "created": 1779406074,
+ "model": "gpt-5.1-2025-11-13",
+ "service_tier": "default",
+ "system_fingerprint": None,
+ "choices": [{"index": 0, "delta": {"content": "\u00b0C"}, "finish_reason": None}],
+ "usage": None,
+ "obfuscation": "HYe0zapB",
+ },
+ {
+ "id": "chatcmpl-CocmvmDih6DGKIgPUbrzKFxGnMyco",
+ "object": "chat.completion.chunk",
+ "created": 1779406074,
+ "model": "gpt-5.1-2025-11-13",
+ "service_tier": "default",
+ "system_fingerprint": None,
+ "choices": [{"index": 0, "delta": {"content": "."}, "finish_reason": None}],
+ "usage": None,
+ "obfuscation": "0XyvpKQ0L",
+ },
+ {
+ "id": "chatcmpl-CocmvmDih6DGKIgPUbrzKFxGnMyco",
+ "object": "chat.completion.chunk",
+ "created": 1779406074,
+ "model": "gpt-5.1-2025-11-13",
+ "service_tier": "default",
+ "system_fingerprint": None,
+ "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
+ "usage": None,
+ "obfuscation": "EE9z",
+ },
+ {
+ "id": "chatcmpl-CocmvmDih6DGKIgPUbrzKFxGnMyco",
+ "object": "chat.completion.chunk",
+ "created": 1779406074,
+ "model": "gpt-5.1-2025-11-13",
+ "service_tier": "default",
+ "system_fingerprint": None,
+ "choices": [],
+ "usage": {
+ "prompt_tokens": 25,
+ "completion_tokens": 16,
+ "total_tokens": 41,
+ "prompt_tokens_details": {"cached_tokens": 0, "audio_tokens": 0},
+ "completion_tokens_details": {
+ "reasoning_tokens": 0,
+ "audio_tokens": 0,
+ "accepted_prediction_tokens": 0,
+ "rejected_prediction_tokens": 0,
+ },
+ },
+ "obfuscation": "kV6540YmG",
+ },
+ ],
+ ]
+}
+
+RESPONSES_V0 = {
"Invalid API key.": (
{"Content-Type": "application/json; charset=utf-8", "x-request-id": "4f8f61a7d0401e42a6760ea2ca2049f6"},
401,
@@ -678,7 +827,10 @@ def simple_get(openai_version, extract_shortened_prompt):
def _simple_get(self):
content_len = int(self.headers.get("content-length"))
content = json.loads(self.rfile.read(content_len).decode("utf-8"))
- stream = content.get("stream", False)
+ is_openai_v0 = openai_version < (1, 0)
+ is_streaming = content.get("stream", False)
+ # Streaming responses include a final usage chunk only when stream_options.include_usage is True.
+ include_usage = bool(is_streaming and (content.get("stream_options") or {}).get("include_usage"))
prompt = extract_shortened_prompt(content)
if not prompt:
self.send_response(500)
@@ -688,14 +840,18 @@ def _simple_get(self):
headers, response = ({}, "")
- if openai_version < (1, 0):
- mocked_responses = RESPONSES
- if stream:
- mocked_responses = STREAMED_RESPONSES
+ if is_openai_v0:
+ if is_streaming:
+ mocked_responses = STREAMED_RESPONSES_V0
+ else:
+ mocked_responses = RESPONSES_V0
else:
- mocked_responses = RESPONSES_V1
- if stream:
+ if is_streaming and include_usage:
+ mocked_responses = STREAMED_RESPONSES_V1_WITH_USAGE
+ elif is_streaming:
mocked_responses = STREAMED_RESPONSES_V1
+ else:
+ mocked_responses = RESPONSES_V1
for k, v in mocked_responses.items():
if prompt.startswith(k):
@@ -704,7 +860,7 @@ def _simple_get(self):
else: # If no matches found
self.send_response(500)
self.end_headers()
- self.wfile.write(f"Unknown Prompt ({'Streaming' if stream else 'Non-Streaming'}):\n{prompt}".encode())
+ self.wfile.write(f"Unknown Prompt ({'Streaming' if is_streaming else 'Non-Streaming'}):\n{prompt}".encode())
return
# Send response code
@@ -716,7 +872,7 @@ def _simple_get(self):
self.end_headers()
# Send response body
- if stream and status_code < 400:
+ if is_streaming and status_code < 400:
for resp in response:
data = json.dumps(resp).encode("utf-8")
if prompt == "Stream parsing error.":
diff --git a/tests/mlmodel_openai/test_chat_completion.py b/tests/mlmodel_openai/test_chat_completion.py
index fc7f7f3852..243d3d7f18 100644
--- a/tests/mlmodel_openai/test_chat_completion.py
+++ b/tests/mlmodel_openai/test_chat_completion.py
@@ -15,7 +15,7 @@
import openai
from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes
from testing_support.ml_testing_utils import (
- add_token_count_to_events,
+ add_token_counts_to_chat_events,
disabled_ai_monitoring_record_content_settings,
disabled_ai_monitoring_settings,
disabled_ai_monitoring_streaming_settings,
@@ -56,6 +56,9 @@
"response.organization": "new-relic-nkmd8b",
"request.temperature": 0.7,
"request.max_tokens": 100,
+ "response.usage.completion_tokens": 11,
+ "response.usage.total_tokens": 64,
+ "response.usage.prompt_tokens": 53,
"response.choices.finish_reason": "stop",
"response.headers.llmVersion": "2020-10-01",
"response.headers.ratelimitLimitRequests": 200,
@@ -83,6 +86,7 @@
"role": "system",
"completion_id": None,
"sequence": 0,
+ "token_count": 0,
"response.model": "gpt-3.5-turbo-0613",
"vendor": "openai",
"ingest_source": "Python",
@@ -102,6 +106,7 @@
"role": "user",
"completion_id": None,
"sequence": 1,
+ "token_count": 0,
"response.model": "gpt-3.5-turbo-0613",
"vendor": "openai",
"ingest_source": "Python",
@@ -120,6 +125,7 @@
"role": "assistant",
"completion_id": None,
"sequence": 2,
+ "token_count": 0,
"response.model": "gpt-3.5-turbo-0613",
"vendor": "openai",
"is_response": True,
@@ -175,7 +181,7 @@ def test_openai_chat_completion_sync_no_content(set_trace_info):
@reset_core_stats_engine()
@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events))
+@validate_custom_events(add_token_counts_to_chat_events(chat_completion_recorded_events))
# One summary event, one system message, one user message, and one response message from the assistant
@validate_custom_event_count(count=4)
@validate_transaction_metrics(
@@ -346,7 +352,7 @@ def test_openai_chat_completion_async_no_content(loop, set_trace_info):
@reset_core_stats_engine()
@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events))
+@validate_custom_events(add_token_counts_to_chat_events(chat_completion_recorded_events))
# One summary event, one system message, one user message, and one response message from the assistant
@validate_custom_event_count(count=4)
@validate_transaction_metrics(
diff --git a/tests/mlmodel_openai/test_chat_completion_error.py b/tests/mlmodel_openai/test_chat_completion_error.py
index 79cc79d6db..042cdef31a 100644
--- a/tests/mlmodel_openai/test_chat_completion_error.py
+++ b/tests/mlmodel_openai/test_chat_completion_error.py
@@ -15,13 +15,11 @@
import openai
import pytest
-from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine
+from testing_support.fixtures import dt_enabled, reset_core_stats_engine
from testing_support.ml_testing_utils import (
- add_token_count_to_events,
disabled_ai_monitoring_record_content_settings,
events_sans_content,
events_with_context_attrs,
- llm_token_count_callback,
set_trace_info,
)
from testing_support.validators.validate_custom_event import validate_custom_event_count
@@ -70,6 +68,7 @@
"role": "system",
"completion_id": None,
"sequence": 0,
+ "token_count": 0,
"vendor": "openai",
"ingest_source": "Python",
},
@@ -86,6 +85,7 @@
"role": "user",
"completion_id": None,
"sequence": 1,
+ "token_count": 0,
"vendor": "openai",
"ingest_source": "Python",
},
@@ -191,6 +191,7 @@ def test_chat_completion_invalid_request_error_no_model_no_content(set_trace_inf
"role": "user",
"completion_id": None,
"sequence": 0,
+ "token_count": 0,
"vendor": "openai",
"ingest_source": "Python",
},
@@ -198,36 +199,6 @@ def test_chat_completion_invalid_request_error_no_model_no_content(set_trace_inf
]
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
- callable_name(openai.InvalidRequestError),
- exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}},
-)
-@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"})
-@validate_transaction_metrics(
- "test_chat_completion_error:test_chat_completion_invalid_request_error_invalid_model_with_token_count",
- scoped_metrics=[("Llm/completion/OpenAI/create", 1)],
- rollup_metrics=[("Llm/completion/OpenAI/create", 1)],
- background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error))
-@validate_custom_event_count(count=2)
-@background_task()
-def test_chat_completion_invalid_request_error_invalid_model_with_token_count(set_trace_info):
- set_trace_info()
- with pytest.raises(openai.InvalidRequestError):
- add_custom_attribute("llm.conversation_id", "my-awesome-id")
-
- openai.ChatCompletion.create(
- model="does-not-exist",
- messages=({"role": "user", "content": "Model does not exist."},),
- temperature=0.7,
- max_tokens=100,
- )
-
-
# Invalid model provided
@dt_enabled
@reset_core_stats_engine()
@@ -288,6 +259,7 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info):
"role": "system",
"completion_id": None,
"sequence": 0,
+ "token_count": 0,
"vendor": "openai",
"ingest_source": "Python",
},
@@ -304,6 +276,7 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info):
"role": "user",
"completion_id": None,
"sequence": 1,
+ "token_count": 0,
"vendor": "openai",
"ingest_source": "Python",
},
@@ -370,6 +343,7 @@ def test_chat_completion_authentication_error(monkeypatch, set_trace_info):
"role": "user",
"completion_id": None,
"sequence": 0,
+ "token_count": 0,
"vendor": "openai",
"ingest_source": "Python",
},
@@ -481,37 +455,6 @@ def test_chat_completion_invalid_request_error_no_model_async_no_content(loop, s
)
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
- callable_name(openai.InvalidRequestError),
- exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}},
-)
-@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"})
-@validate_transaction_metrics(
- "test_chat_completion_error:test_chat_completion_invalid_request_error_invalid_model_with_token_count_async",
- scoped_metrics=[("Llm/completion/OpenAI/acreate", 1)],
- rollup_metrics=[("Llm/completion/OpenAI/acreate", 1)],
- background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error))
-@validate_custom_event_count(count=2)
-@background_task()
-def test_chat_completion_invalid_request_error_invalid_model_with_token_count_async(loop, set_trace_info):
- set_trace_info()
- with pytest.raises(openai.InvalidRequestError):
- add_custom_attribute("llm.conversation_id", "my-awesome-id")
- loop.run_until_complete(
- openai.ChatCompletion.acreate(
- model="does-not-exist",
- messages=({"role": "user", "content": "Model does not exist."},),
- temperature=0.7,
- max_tokens=100,
- )
- )
-
-
# Invalid model provided
@dt_enabled
@reset_core_stats_engine()
diff --git a/tests/mlmodel_openai/test_chat_completion_error_v1.py b/tests/mlmodel_openai/test_chat_completion_error_v1.py
index 555001a702..5e83b415e6 100644
--- a/tests/mlmodel_openai/test_chat_completion_error_v1.py
+++ b/tests/mlmodel_openai/test_chat_completion_error_v1.py
@@ -14,13 +14,11 @@
import openai
import pytest
-from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine
+from testing_support.fixtures import dt_enabled, reset_core_stats_engine
from testing_support.ml_testing_utils import (
- add_token_count_to_events,
disabled_ai_monitoring_record_content_settings,
events_sans_content,
events_with_context_attrs,
- llm_token_count_callback,
set_trace_info,
)
from testing_support.validators.validate_custom_event import validate_custom_event_count
@@ -69,6 +67,7 @@
"role": "system",
"completion_id": None,
"sequence": 0,
+ "token_count": 0,
"vendor": "openai",
"ingest_source": "Python",
},
@@ -85,6 +84,7 @@
"role": "user",
"completion_id": None,
"sequence": 1,
+ "token_count": 0,
"vendor": "openai",
"ingest_source": "Python",
},
@@ -234,6 +234,7 @@ def test_chat_completion_invalid_request_error_no_model_async_no_content(loop, s
"role": "user",
"completion_id": None,
"sequence": 0,
+ "token_count": 0,
"vendor": "openai",
"ingest_source": "Python",
},
@@ -271,37 +272,6 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info, syn
)
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
- callable_name(openai.NotFoundError),
- exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}},
-)
-@validate_span_events(
- exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."}
-)
-@validate_transaction_metrics(
- "test_chat_completion_error_v1:test_chat_completion_invalid_request_error_invalid_model_with_token_count",
- scoped_metrics=[("Llm/completion/OpenAI/create", 1)],
- rollup_metrics=[("Llm/completion/OpenAI/create", 1)],
- background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error))
-@validate_custom_event_count(count=2)
-@background_task()
-def test_chat_completion_invalid_request_error_invalid_model_with_token_count(set_trace_info, sync_openai_client):
- set_trace_info()
- with pytest.raises(openai.NotFoundError):
- add_custom_attribute("llm.conversation_id", "my-awesome-id")
- sync_openai_client.chat.completions.create(
- model="does-not-exist",
- messages=({"role": "user", "content": "Model does not exist."},),
- temperature=0.7,
- max_completion_tokens=100,
- )
-
-
@dt_enabled
@reset_core_stats_engine()
@validate_error_trace_attributes(
@@ -334,41 +304,6 @@ def test_chat_completion_invalid_request_error_invalid_model_async(loop, set_tra
)
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
- callable_name(openai.NotFoundError),
- exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}},
-)
-@validate_span_events(
- exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."}
-)
-@validate_transaction_metrics(
- "test_chat_completion_error_v1:test_chat_completion_invalid_request_error_invalid_model_with_token_count_async",
- scoped_metrics=[("Llm/completion/OpenAI/create", 1)],
- rollup_metrics=[("Llm/completion/OpenAI/create", 1)],
- background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error))
-@validate_custom_event_count(count=2)
-@background_task()
-def test_chat_completion_invalid_request_error_invalid_model_with_token_count_async(
- loop, set_trace_info, async_openai_client
-):
- set_trace_info()
- with pytest.raises(openai.NotFoundError):
- add_custom_attribute("llm.conversation_id", "my-awesome-id")
- loop.run_until_complete(
- async_openai_client.chat.completions.create(
- model="does-not-exist",
- messages=({"role": "user", "content": "Model does not exist."},),
- temperature=0.7,
- max_completion_tokens=100,
- )
- )
-
-
expected_events_on_wrong_api_key_error = [
(
{"type": "LlmChatCompletionSummary"},
@@ -398,6 +333,7 @@ def test_chat_completion_invalid_request_error_invalid_model_with_token_count_as
"role": "user",
"completion_id": None,
"sequence": 0,
+ "token_count": 0,
"vendor": "openai",
"ingest_source": "Python",
},
@@ -617,39 +553,6 @@ def test_chat_completion_invalid_request_error_invalid_model_with_raw_response(s
)
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
- callable_name(openai.NotFoundError),
- exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}},
-)
-@validate_span_events(
- exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."}
-)
-@validate_transaction_metrics(
- "test_chat_completion_error_v1:test_chat_completion_invalid_request_error_invalid_model_with_token_count_with_raw_response",
- scoped_metrics=[("Llm/completion/OpenAI/create", 1)],
- rollup_metrics=[("Llm/completion/OpenAI/create", 1)],
- background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error))
-@validate_custom_event_count(count=2)
-@background_task()
-def test_chat_completion_invalid_request_error_invalid_model_with_token_count_with_raw_response(
- set_trace_info, sync_openai_client
-):
- set_trace_info()
- with pytest.raises(openai.NotFoundError):
- add_custom_attribute("llm.conversation_id", "my-awesome-id")
- sync_openai_client.chat.completions.with_raw_response.create(
- model="does-not-exist",
- messages=({"role": "user", "content": "Model does not exist."},),
- temperature=0.7,
- max_completion_tokens=100,
- )
-
-
@dt_enabled
@reset_core_stats_engine()
@validate_error_trace_attributes(
@@ -684,41 +587,6 @@ def test_chat_completion_invalid_request_error_invalid_model_async_with_raw_resp
)
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
- callable_name(openai.NotFoundError),
- exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}},
-)
-@validate_span_events(
- exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."}
-)
-@validate_transaction_metrics(
- "test_chat_completion_error_v1:test_chat_completion_invalid_request_error_invalid_model_with_token_count_async_with_raw_response",
- scoped_metrics=[("Llm/completion/OpenAI/create", 1)],
- rollup_metrics=[("Llm/completion/OpenAI/create", 1)],
- background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error))
-@validate_custom_event_count(count=2)
-@background_task()
-def test_chat_completion_invalid_request_error_invalid_model_with_token_count_async_with_raw_response(
- loop, set_trace_info, async_openai_client
-):
- set_trace_info()
- with pytest.raises(openai.NotFoundError):
- add_custom_attribute("llm.conversation_id", "my-awesome-id")
- loop.run_until_complete(
- async_openai_client.chat.completions.with_raw_response.create(
- model="does-not-exist",
- messages=({"role": "user", "content": "Model does not exist."},),
- temperature=0.7,
- max_completion_tokens=100,
- )
- )
-
-
@dt_enabled
@reset_core_stats_engine()
@validate_error_trace_attributes(
diff --git a/tests/mlmodel_openai/test_chat_completion_stream.py b/tests/mlmodel_openai/test_chat_completion_stream.py
index 6858da8107..b1679ba4d2 100644
--- a/tests/mlmodel_openai/test_chat_completion_stream.py
+++ b/tests/mlmodel_openai/test_chat_completion_stream.py
@@ -15,7 +15,8 @@
import openai
from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes
from testing_support.ml_testing_utils import (
- add_token_count_to_events,
+ add_token_count_streaming_events,
+ add_token_counts_to_chat_events,
disabled_ai_monitoring_record_content_settings,
disabled_ai_monitoring_settings,
disabled_ai_monitoring_streaming_settings,
@@ -188,9 +189,101 @@ def test_openai_chat_completion_sync_no_content(set_trace_info):
assert resp
+chat_completion_recorded_token_events = [
+ (
+ {"type": "LlmChatCompletionSummary"},
+ {
+ "id": None, # UUID that varies with each run
+ "llm.conversation_id": "my-awesome-id",
+ "llm.foo": "bar",
+ "span_id": None,
+ "trace_id": "trace-id",
+ "request_id": "49dbbffbd3c3f4612aa48def69059ccd",
+ "duration": None, # Response time varies each test run
+ "request.model": "gpt-3.5-turbo",
+ "response.model": "gpt-3.5-turbo-0613",
+ "response.organization": "new-relic-nkmd8b",
+ "request.temperature": 0.7,
+ "request.max_tokens": 100,
+ "response.choices.finish_reason": "stop",
+ "response.headers.llmVersion": "2020-10-01",
+ "response.headers.ratelimitLimitRequests": 200,
+ "response.headers.ratelimitLimitTokens": 40000,
+ "response.headers.ratelimitResetTokens": "90ms",
+ "response.headers.ratelimitResetRequests": "7m12s",
+ "response.headers.ratelimitRemainingTokens": 39940,
+ "response.headers.ratelimitRemainingRequests": 199,
+ "vendor": "openai",
+ "ingest_source": "Python",
+ "response.number_of_messages": 3,
+ },
+ ),
+ (
+ {"type": "LlmChatCompletionMessage"},
+ {
+ "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-0",
+ "llm.conversation_id": "my-awesome-id",
+ "llm.foo": "bar",
+ "request_id": "49dbbffbd3c3f4612aa48def69059ccd",
+ "span_id": None,
+ "trace_id": "trace-id",
+ "content": "You are a scientist.",
+ "role": "system",
+ "completion_id": None,
+ "sequence": 0,
+ "token_count": 0,
+ "response.model": "gpt-3.5-turbo-0613",
+ "vendor": "openai",
+ "ingest_source": "Python",
+ },
+ ),
+ (
+ {"type": "LlmChatCompletionMessage"},
+ {
+ "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-1",
+ "llm.conversation_id": "my-awesome-id",
+ "llm.foo": "bar",
+ "request_id": "49dbbffbd3c3f4612aa48def69059ccd",
+ "span_id": None,
+ "trace_id": "trace-id",
+ "content": "What is 212 degrees Fahrenheit converted to Celsius?",
+ "role": "user",
+ "completion_id": None,
+ "sequence": 1,
+ "token_count": 0,
+ "response.model": "gpt-3.5-turbo-0613",
+ "vendor": "openai",
+ "ingest_source": "Python",
+ },
+ ),
+ (
+ {"type": "LlmChatCompletionMessage"},
+ {
+ "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-2",
+ "llm.conversation_id": "my-awesome-id",
+ "llm.foo": "bar",
+ "request_id": "49dbbffbd3c3f4612aa48def69059ccd",
+ "span_id": None,
+ "trace_id": "trace-id",
+ "content": "212 degrees Fahrenheit is equal to 100 degrees Celsius.",
+ "role": "assistant",
+ "completion_id": None,
+ "sequence": 2,
+ "token_count": 0,
+ "response.model": "gpt-3.5-turbo-0613",
+ "vendor": "openai",
+ "is_response": True,
+ "ingest_source": "Python",
+ },
+ ),
+]
+
+
@reset_core_stats_engine()
@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events))
+@validate_custom_events(
+ add_token_counts_to_chat_events(add_token_count_streaming_events(chat_completion_recorded_events))
+)
# One summary event, one system message, one user message, and one response message from the assistant
@validate_custom_event_count(count=4)
@validate_transaction_metrics(
@@ -253,25 +346,29 @@ def test_openai_chat_completion_sync_no_llm_metadata(set_trace_info):
)
@background_task()
def test_openai_chat_completion_sync_ai_monitoring_streaming_disabled():
- openai.ChatCompletion.create(
+ generator = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=_test_openai_chat_completion_messages,
temperature=0.7,
max_tokens=100,
stream=True,
)
+ for resp in generator:
+ assert resp
@reset_core_stats_engine()
@validate_custom_event_count(count=0)
def test_openai_chat_completion_sync_outside_txn():
- openai.ChatCompletion.create(
+ generator = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=_test_openai_chat_completion_messages,
temperature=0.7,
max_tokens=100,
stream=True,
)
+ for resp in generator:
+ assert resp
@disabled_ai_monitoring_settings
@@ -279,13 +376,15 @@ def test_openai_chat_completion_sync_outside_txn():
@validate_custom_event_count(count=0)
@background_task()
def test_openai_chat_completion_sync_ai_monitoring_disabled():
- openai.ChatCompletion.create(
+ generator = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=_test_openai_chat_completion_messages,
temperature=0.7,
max_tokens=100,
stream=True,
)
+ for resp in generator:
+ assert resp
@reset_core_stats_engine()
@@ -382,7 +481,9 @@ async def consumer():
@reset_core_stats_engine()
@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events))
+@validate_custom_events(
+ add_token_counts_to_chat_events(add_token_count_streaming_events(chat_completion_recorded_events))
+)
@validate_custom_event_count(count=4)
@validate_transaction_metrics(
name="test_chat_completion_stream:test_openai_chat_completion_async_with_token_count",
diff --git a/tests/mlmodel_openai/test_chat_completion_stream_error.py b/tests/mlmodel_openai/test_chat_completion_stream_error.py
index 0fb0d06867..2b01813d9f 100644
--- a/tests/mlmodel_openai/test_chat_completion_stream_error.py
+++ b/tests/mlmodel_openai/test_chat_completion_stream_error.py
@@ -15,13 +15,11 @@
import openai
import pytest
-from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine
+from testing_support.fixtures import dt_enabled, reset_core_stats_engine
from testing_support.ml_testing_utils import (
- add_token_count_to_events,
disabled_ai_monitoring_record_content_settings,
events_sans_content,
events_with_context_attrs,
- llm_token_count_callback,
set_trace_info,
)
from testing_support.validators.validate_custom_event import validate_custom_event_count
@@ -70,6 +68,7 @@
"role": "system",
"completion_id": None,
"sequence": 0,
+ "token_count": 0,
"vendor": "openai",
"ingest_source": "Python",
},
@@ -86,6 +85,7 @@
"role": "user",
"completion_id": None,
"sequence": 1,
+ "token_count": 0,
"vendor": "openai",
"ingest_source": "Python",
},
@@ -196,6 +196,7 @@ def test_chat_completion_invalid_request_error_no_model_no_content(set_trace_inf
"role": "user",
"completion_id": None,
"sequence": 0,
+ "token_count": 0,
"vendor": "openai",
"ingest_source": "Python",
},
@@ -203,38 +204,6 @@ def test_chat_completion_invalid_request_error_no_model_no_content(set_trace_inf
]
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
- callable_name(openai.InvalidRequestError),
- exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}},
-)
-@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"})
-@validate_transaction_metrics(
- "test_chat_completion_stream_error:test_chat_completion_invalid_request_error_invalid_model_with_token_count",
- scoped_metrics=[("Llm/completion/OpenAI/create", 1)],
- rollup_metrics=[("Llm/completion/OpenAI/create", 1)],
- background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error))
-@validate_custom_event_count(count=2)
-@background_task()
-def test_chat_completion_invalid_request_error_invalid_model_with_token_count(set_trace_info):
- set_trace_info()
- with pytest.raises(openai.InvalidRequestError):
- add_custom_attribute("llm.conversation_id", "my-awesome-id")
- generator = openai.ChatCompletion.create(
- model="does-not-exist",
- messages=({"role": "user", "content": "Model does not exist."},),
- temperature=0.7,
- max_tokens=100,
- stream=True,
- )
- for resp in generator:
- assert resp
-
-
@dt_enabled
@reset_core_stats_engine()
@validate_error_trace_attributes(
@@ -297,6 +266,7 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info):
"role": "system",
"completion_id": None,
"sequence": 0,
+ "token_count": 0,
"vendor": "openai",
"ingest_source": "Python",
},
@@ -313,6 +283,7 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info):
"role": "user",
"completion_id": None,
"sequence": 1,
+ "token_count": 0,
"vendor": "openai",
"ingest_source": "Python",
},
@@ -384,6 +355,7 @@ def test_chat_completion_authentication_error(monkeypatch, set_trace_info):
"role": "user",
"completion_id": None,
"sequence": 0,
+ "token_count": 0,
"vendor": "openai",
"ingest_source": "Python",
},
@@ -498,38 +470,6 @@ def test_chat_completion_invalid_request_error_no_model_async_no_content(loop, s
)
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
- callable_name(openai.InvalidRequestError),
- exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}},
-)
-@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"})
-@validate_transaction_metrics(
- "test_chat_completion_stream_error:test_chat_completion_invalid_request_error_invalid_model_with_token_count_async",
- scoped_metrics=[("Llm/completion/OpenAI/acreate", 1)],
- rollup_metrics=[("Llm/completion/OpenAI/acreate", 1)],
- background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error))
-@validate_custom_event_count(count=2)
-@background_task()
-def test_chat_completion_invalid_request_error_invalid_model_with_token_count_async(loop, set_trace_info):
- set_trace_info()
- with pytest.raises(openai.InvalidRequestError):
- add_custom_attribute("llm.conversation_id", "my-awesome-id")
- loop.run_until_complete(
- openai.ChatCompletion.acreate(
- model="does-not-exist",
- messages=({"role": "user", "content": "Model does not exist."},),
- temperature=0.7,
- max_tokens=100,
- stream=True,
- )
- )
-
-
@dt_enabled
@reset_core_stats_engine()
@validate_error_trace_attributes(
@@ -661,6 +601,7 @@ def test_chat_completion_wrong_api_key_error_async(loop, monkeypatch, set_trace_
"role": "user",
"completion_id": None,
"sequence": 0,
+ "token_count": 0,
"vendor": "openai",
"ingest_source": "Python",
},
diff --git a/tests/mlmodel_openai/test_chat_completion_stream_error_v1.py b/tests/mlmodel_openai/test_chat_completion_stream_error_v1.py
index ce3ce8061e..074a482669 100644
--- a/tests/mlmodel_openai/test_chat_completion_stream_error_v1.py
+++ b/tests/mlmodel_openai/test_chat_completion_stream_error_v1.py
@@ -12,16 +12,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-
import openai
import pytest
-from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine
+from testing_support.fixtures import dt_enabled, reset_core_stats_engine
from testing_support.ml_testing_utils import (
- add_token_count_to_events,
disabled_ai_monitoring_record_content_settings,
events_sans_content,
events_with_context_attrs,
- llm_token_count_callback,
set_trace_info,
)
from testing_support.validators.validate_custom_event import validate_custom_event_count
@@ -70,6 +67,7 @@
"role": "system",
"completion_id": None,
"sequence": 0,
+ "token_count": 0,
"vendor": "openai",
"ingest_source": "Python",
},
@@ -86,6 +84,7 @@
"role": "user",
"completion_id": None,
"sequence": 1,
+ "token_count": 0,
"vendor": "openai",
"ingest_source": "Python",
},
@@ -251,6 +250,7 @@ async def consumer():
"role": "user",
"completion_id": None,
"sequence": 0,
+ "token_count": 0,
"vendor": "openai",
"ingest_source": "Python",
},
@@ -291,81 +291,6 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info, syn
assert resp
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
- callable_name(openai.NotFoundError),
- exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}},
-)
-@validate_span_events(
- exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."}
-)
-@validate_transaction_metrics(
- "test_chat_completion_stream_error_v1:test_chat_completion_invalid_request_error_invalid_model_with_token_count",
- scoped_metrics=[("Llm/completion/OpenAI/create", 1)],
- rollup_metrics=[("Llm/completion/OpenAI/create", 1)],
- background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error))
-@validate_custom_event_count(count=2)
-@background_task()
-def test_chat_completion_invalid_request_error_invalid_model_with_token_count(set_trace_info, sync_openai_client):
- set_trace_info()
- with pytest.raises(openai.NotFoundError):
- add_custom_attribute("llm.conversation_id", "my-awesome-id")
-
- generator = sync_openai_client.chat.completions.create(
- model="does-not-exist",
- messages=({"role": "user", "content": "Model does not exist."},),
- temperature=0.7,
- max_completion_tokens=100,
- stream=True,
- )
- for resp in generator:
- assert resp
-
-
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
- callable_name(openai.NotFoundError),
- exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}},
-)
-@validate_span_events(
- exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."}
-)
-@validate_transaction_metrics(
- "test_chat_completion_stream_error_v1:test_chat_completion_invalid_request_error_invalid_model_async_with_token_count",
- scoped_metrics=[("Llm/completion/OpenAI/create", 1)],
- rollup_metrics=[("Llm/completion/OpenAI/create", 1)],
- background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error))
-@validate_custom_event_count(count=2)
-@background_task()
-def test_chat_completion_invalid_request_error_invalid_model_async_with_token_count(
- loop, set_trace_info, async_openai_client
-):
- set_trace_info()
- with pytest.raises(openai.NotFoundError):
- add_custom_attribute("llm.conversation_id", "my-awesome-id")
-
- async def consumer():
- generator = await async_openai_client.chat.completions.create(
- model="does-not-exist",
- messages=({"role": "user", "content": "Model does not exist."},),
- temperature=0.7,
- max_completion_tokens=100,
- stream=True,
- )
- async for resp in generator:
- assert resp
-
- loop.run_until_complete(consumer())
-
-
@dt_enabled
@reset_core_stats_engine()
@validate_error_trace_attributes(
@@ -432,6 +357,7 @@ async def consumer():
"role": "user",
"completion_id": None,
"sequence": 0,
+ "token_count": 0,
"vendor": "openai",
"ingest_source": "Python",
},
diff --git a/tests/mlmodel_openai/test_chat_completion_stream_v1.py b/tests/mlmodel_openai/test_chat_completion_stream_v1.py
index 1343c208ac..25dbba8cea 100644
--- a/tests/mlmodel_openai/test_chat_completion_stream_v1.py
+++ b/tests/mlmodel_openai/test_chat_completion_stream_v1.py
@@ -17,7 +17,8 @@
from conftest import get_openai_version
from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes
from testing_support.ml_testing_utils import (
- add_token_count_to_events,
+ add_token_count_streaming_events,
+ add_token_counts_to_chat_events,
disabled_ai_monitoring_record_content_settings,
disabled_ai_monitoring_settings,
disabled_ai_monitoring_streaming_settings,
@@ -64,7 +65,8 @@
"request.model": "gpt-5.1",
"response.model": "gpt-5.1-2025-11-13",
"response.organization": "nr-test-org",
- # Usage tokens aren't available when streaming.
+ # Usage tokens are only emitted by OpenAI when stream_options={"include_usage": True}.
+ # See test_openai_chat_completion_sync_with_stream_options_include_usage for that path.
"request.temperature": 0.7,
"request.max_tokens": 100,
"response.choices.finish_reason": "stop",
@@ -141,6 +143,14 @@
]
+# When stream_options={"include_usage": True} is set, the final stream chunk carries usage data
+# and the agent populates response.usage.* on the summary + token_count: 0 on each message.
+chat_completion_recorded_events_include_usage = add_token_count_streaming_events(chat_completion_recorded_events)
+chat_completion_recorded_events_include_usage[0][1].update(
+ {"response.usage.prompt_tokens": 25, "response.usage.completion_tokens": 16, "response.usage.total_tokens": 41}
+)
+
+
@reset_core_stats_engine()
@validate_custom_events(events_with_context_attrs(chat_completion_recorded_events))
# One summary event, one system message, one user message, and one response message from the assistant
@@ -302,9 +312,39 @@ def test_openai_chat_completion_sync_no_content(set_trace_info, sync_openai_clie
assert resp
+@reset_core_stats_engine()
+@validate_custom_events(chat_completion_recorded_events_include_usage)
+@validate_custom_event_count(count=4)
+@validate_transaction_metrics(
+ name="test_chat_completion_stream_v1:test_openai_chat_completion_sync_with_stream_options_include_usage",
+ custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)],
+ background_task=True,
+)
+@validate_attributes("agent", ["llm"])
+@background_task()
+def test_openai_chat_completion_sync_with_stream_options_include_usage(set_trace_info, sync_openai_client):
+ """Streaming with stream_options={"include_usage": True} populates response.usage.* on the summary."""
+ set_trace_info()
+ add_custom_attribute("llm.conversation_id", "my-awesome-id")
+ add_custom_attribute("llm.foo", "bar")
+
+ generator = sync_openai_client.chat.completions.create(
+ model="gpt-5.1",
+ messages=_test_openai_chat_completion_messages,
+ temperature=0.7,
+ max_completion_tokens=100,
+ stream=True,
+ stream_options={"include_usage": True},
+ )
+ for resp in generator:
+ assert resp
+
+
@reset_core_stats_engine()
@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events))
+@validate_custom_events(
+ add_token_counts_to_chat_events(add_token_count_streaming_events(chat_completion_recorded_events))
+)
# One summary event, one system message, one user message, and one response message from the assistant
@validate_custom_event_count(count=4)
@validate_transaction_metrics(
@@ -624,9 +664,42 @@ async def consumer():
loop.run_until_complete(consumer())
+@reset_core_stats_engine()
+@validate_custom_events(chat_completion_recorded_events_include_usage)
+@validate_custom_event_count(count=4)
+@validate_transaction_metrics(
+ name="test_chat_completion_stream_v1:test_openai_chat_completion_async_with_stream_options_include_usage",
+ custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)],
+ background_task=True,
+)
+@validate_attributes("agent", ["llm"])
+@background_task()
+def test_openai_chat_completion_async_with_stream_options_include_usage(set_trace_info, loop, async_openai_client):
+ """Streaming with stream_options={"include_usage": True} populates response.usage.* on the summary."""
+ set_trace_info()
+ add_custom_attribute("llm.conversation_id", "my-awesome-id")
+ add_custom_attribute("llm.foo", "bar")
+
+ async def consumer():
+ generator = await async_openai_client.chat.completions.create(
+ model="gpt-5.1",
+ messages=_test_openai_chat_completion_messages,
+ temperature=0.7,
+ max_completion_tokens=100,
+ stream=True,
+ stream_options={"include_usage": True},
+ )
+ async for resp in generator:
+ assert resp
+
+ loop.run_until_complete(consumer())
+
+
@reset_core_stats_engine()
@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events))
+@validate_custom_events(
+ add_token_counts_to_chat_events(add_token_count_streaming_events(chat_completion_recorded_events))
+)
# One summary event, one system message, one user message, and one response message from the assistant
# @validate_custom_event_count(count=4)
@validate_transaction_metrics(
diff --git a/tests/mlmodel_openai/test_chat_completion_v1.py b/tests/mlmodel_openai/test_chat_completion_v1.py
index ec636ca7d2..ae87f1757b 100644
--- a/tests/mlmodel_openai/test_chat_completion_v1.py
+++ b/tests/mlmodel_openai/test_chat_completion_v1.py
@@ -15,7 +15,7 @@
import openai
from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes
from testing_support.ml_testing_utils import (
- add_token_count_to_events,
+ add_token_counts_to_chat_events,
disabled_ai_monitoring_record_content_settings,
disabled_ai_monitoring_settings,
disabled_ai_monitoring_streaming_settings,
@@ -55,6 +55,9 @@
"response.organization": "nr-test-org",
"request.temperature": 0.7,
"request.max_tokens": 100,
+ "response.usage.completion_tokens": 16,
+ "response.usage.prompt_tokens": 25,
+ "response.usage.total_tokens": 41,
"response.choices.finish_reason": "stop",
"response.headers.llmVersion": "2020-10-01",
"response.headers.ratelimitLimitRequests": 15000,
@@ -83,6 +86,7 @@
"completion_id": None,
"sequence": 0,
"response.model": "gpt-5.1-2025-11-13",
+ "token_count": 0,
"vendor": "openai",
"ingest_source": "Python",
},
@@ -102,6 +106,7 @@
"completion_id": None,
"sequence": 1,
"response.model": "gpt-5.1-2025-11-13",
+ "token_count": 0,
"vendor": "openai",
"ingest_source": "Python",
},
@@ -120,6 +125,7 @@
"completion_id": None,
"sequence": 2,
"response.model": "gpt-5.1-2025-11-13",
+ "token_count": 0,
"vendor": "openai",
"is_response": True,
"ingest_source": "Python",
@@ -196,7 +202,7 @@ def test_openai_chat_completion_sync_no_content(set_trace_info, sync_openai_clie
@reset_core_stats_engine()
@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events))
+@validate_custom_events(add_token_counts_to_chat_events(chat_completion_recorded_events))
# One summary event, one system message, one user message, and one response message from the assistant
@validate_custom_event_count(count=4)
@validate_transaction_metrics(
@@ -395,7 +401,7 @@ def test_openai_chat_completion_async_with_llm_metadata_no_content(loop, set_tra
@reset_core_stats_engine()
@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events))
+@validate_custom_events(add_token_counts_to_chat_events(chat_completion_recorded_events))
# One summary event, one system message, one user message, and one response message from the assistant
@validate_custom_event_count(count=4)
@validate_transaction_metrics(
diff --git a/tests/mlmodel_openai/test_embeddings.py b/tests/mlmodel_openai/test_embeddings.py
index c3c3e7c429..935db04fe0 100644
--- a/tests/mlmodel_openai/test_embeddings.py
+++ b/tests/mlmodel_openai/test_embeddings.py
@@ -19,7 +19,7 @@
validate_attributes,
)
from testing_support.ml_testing_utils import (
- add_token_count_to_events,
+ add_token_count_to_embedding_events,
disabled_ai_monitoring_record_content_settings,
disabled_ai_monitoring_settings,
events_sans_content,
@@ -55,6 +55,7 @@
"response.headers.ratelimitResetRequests": "19m45.394s",
"response.headers.ratelimitRemainingTokens": 149994,
"response.headers.ratelimitRemainingRequests": 197,
+ "response.usage.total_tokens": 6,
"vendor": "openai",
"ingest_source": "Python",
},
@@ -107,7 +108,7 @@ def test_openai_embedding_sync_no_content(set_trace_info):
@reset_core_stats_engine()
@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_custom_events(add_token_count_to_events(embedding_recorded_events))
+@validate_custom_events(add_token_count_to_embedding_events(embedding_recorded_events))
@validate_custom_event_count(count=1)
@validate_transaction_metrics(
name="test_embeddings:test_openai_embedding_sync_with_token_count",
@@ -191,7 +192,7 @@ def test_openai_embedding_async_no_content(loop, set_trace_info):
@reset_core_stats_engine()
@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_custom_events(add_token_count_to_events(embedding_recorded_events))
+@validate_custom_events(add_token_count_to_embedding_events(embedding_recorded_events))
@validate_custom_event_count(count=1)
@validate_transaction_metrics(
name="test_embeddings:test_openai_embedding_async_with_token_count",
diff --git a/tests/mlmodel_openai/test_embeddings_error.py b/tests/mlmodel_openai/test_embeddings_error.py
index a8e46bf23a..f80e6ff41d 100644
--- a/tests/mlmodel_openai/test_embeddings_error.py
+++ b/tests/mlmodel_openai/test_embeddings_error.py
@@ -14,12 +14,10 @@
import openai
import pytest
-from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine
+from testing_support.fixtures import dt_enabled, reset_core_stats_engine
from testing_support.ml_testing_utils import (
- add_token_count_to_events,
disabled_ai_monitoring_record_content_settings,
events_sans_content,
- llm_token_count_callback,
set_trace_info,
)
from testing_support.validators.validate_custom_event import validate_custom_event_count
@@ -128,35 +126,6 @@ def test_embeddings_invalid_request_error_no_model_no_content(set_trace_info):
]
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
- callable_name(openai.InvalidRequestError),
- exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404}},
-)
-@validate_span_events(
- exact_agents={
- "error.message": "The model `does-not-exist` does not exist"
- # "http.statusCode": 404,
- }
-)
-@validate_transaction_metrics(
- name="test_embeddings_error:test_embeddings_invalid_request_error_invalid_model_with_token_count",
- scoped_metrics=[("Llm/embedding/OpenAI/create", 1)],
- rollup_metrics=[("Llm/embedding/OpenAI/create", 1)],
- custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)],
- background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(invalid_model_events))
-@validate_custom_event_count(count=1)
-@background_task()
-def test_embeddings_invalid_request_error_invalid_model_with_token_count(set_trace_info):
- set_trace_info()
- with pytest.raises(openai.InvalidRequestError):
- openai.Embedding.create(input="Model does not exist.", model="does-not-exist")
-
-
# Invalid model provided
@dt_enabled
@reset_core_stats_engine()
@@ -348,30 +317,6 @@ def test_embeddings_invalid_request_error_no_model_async_no_content(loop, set_tr
)
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
- callable_name(openai.InvalidRequestError),
- exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404}},
-)
-@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"})
-@validate_transaction_metrics(
- name="test_embeddings_error:test_embeddings_invalid_request_error_invalid_model_with_token_count_async",
- scoped_metrics=[("Llm/embedding/OpenAI/acreate", 1)],
- rollup_metrics=[("Llm/embedding/OpenAI/acreate", 1)],
- custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)],
- background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(invalid_model_events))
-@validate_custom_event_count(count=1)
-@background_task()
-def test_embeddings_invalid_request_error_invalid_model_with_token_count_async(set_trace_info, loop):
- set_trace_info()
- with pytest.raises(openai.InvalidRequestError):
- loop.run_until_complete(openai.Embedding.acreate(input="Model does not exist.", model="does-not-exist"))
-
-
# Invalid model provided
@dt_enabled
@reset_core_stats_engine()
diff --git a/tests/mlmodel_openai/test_embeddings_error_v1.py b/tests/mlmodel_openai/test_embeddings_error_v1.py
index fd29236122..499f96893b 100644
--- a/tests/mlmodel_openai/test_embeddings_error_v1.py
+++ b/tests/mlmodel_openai/test_embeddings_error_v1.py
@@ -16,12 +16,10 @@
import openai
import pytest
-from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine
+from testing_support.fixtures import dt_enabled, reset_core_stats_engine
from testing_support.ml_testing_utils import (
- add_token_count_to_events,
disabled_ai_monitoring_record_content_settings,
events_sans_content,
- llm_token_count_callback,
set_trace_info,
)
from testing_support.validators.validate_custom_event import validate_custom_event_count
@@ -149,32 +147,6 @@ def test_embeddings_invalid_request_error_no_model_async(set_trace_info, async_o
]
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
- callable_name(openai.NotFoundError),
- exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404, "error.code": "model_not_found"}},
-)
-@validate_span_events(
- exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."}
-)
-@validate_transaction_metrics(
- name="test_embeddings_error_v1:test_embeddings_invalid_request_error_invalid_model_with_token_count",
- scoped_metrics=[("Llm/embedding/OpenAI/create", 1)],
- rollup_metrics=[("Llm/embedding/OpenAI/create", 1)],
- custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)],
- background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(invalid_model_events))
-@validate_custom_event_count(count=1)
-@background_task()
-def test_embeddings_invalid_request_error_invalid_model_with_token_count(set_trace_info, sync_openai_client):
- set_trace_info()
- with pytest.raises(openai.NotFoundError):
- sync_openai_client.embeddings.create(input="Model does not exist.", model="does-not-exist")
-
-
@dt_enabled
@reset_core_stats_engine()
@validate_error_trace_attributes(
@@ -255,36 +227,6 @@ def test_embeddings_invalid_request_error_invalid_model_async_no_content(set_tra
)
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
- callable_name(openai.NotFoundError),
- exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404, "error.code": "model_not_found"}},
-)
-@validate_span_events(
- exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."}
-)
-@validate_transaction_metrics(
- name="test_embeddings_error_v1:test_embeddings_invalid_request_error_invalid_model_async_with_token_count",
- scoped_metrics=[("Llm/embedding/OpenAI/create", 1)],
- rollup_metrics=[("Llm/embedding/OpenAI/create", 1)],
- custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)],
- background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(invalid_model_events))
-@validate_custom_event_count(count=1)
-@background_task()
-def test_embeddings_invalid_request_error_invalid_model_async_with_token_count(
- set_trace_info, async_openai_client, loop
-):
- set_trace_info()
- with pytest.raises(openai.NotFoundError):
- loop.run_until_complete(
- async_openai_client.embeddings.create(input="Model does not exist.", model="does-not-exist")
- )
-
-
embedding_invalid_key_error_events = [
(
{"type": "LlmEmbedding"},
@@ -449,34 +391,6 @@ def test_embeddings_invalid_request_error_no_model_async_with_raw_response(set_t
) # no model provided
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
- callable_name(openai.NotFoundError),
- exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404, "error.code": "model_not_found"}},
-)
-@validate_span_events(
- exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."}
-)
-@validate_transaction_metrics(
- name="test_embeddings_error_v1:test_embeddings_invalid_request_error_invalid_model_with_token_count_with_raw_response",
- scoped_metrics=[("Llm/embedding/OpenAI/create", 1)],
- rollup_metrics=[("Llm/embedding/OpenAI/create", 1)],
- custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)],
- background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(invalid_model_events))
-@validate_custom_event_count(count=1)
-@background_task()
-def test_embeddings_invalid_request_error_invalid_model_with_token_count_with_raw_response(
- set_trace_info, sync_openai_client
-):
- set_trace_info()
- with pytest.raises(openai.NotFoundError):
- sync_openai_client.embeddings.with_raw_response.create(input="Model does not exist.", model="does-not-exist")
-
-
@dt_enabled
@reset_core_stats_engine()
@validate_error_trace_attributes(
@@ -566,38 +480,6 @@ def test_embeddings_invalid_request_error_invalid_model_async_no_content_with_ra
)
-@dt_enabled
-@reset_core_stats_engine()
-@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_error_trace_attributes(
- callable_name(openai.NotFoundError),
- exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404, "error.code": "model_not_found"}},
-)
-@validate_span_events(
- exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."}
-)
-@validate_transaction_metrics(
- name="test_embeddings_error_v1:test_embeddings_invalid_request_error_invalid_model_async_with_token_count_with_raw_response",
- scoped_metrics=[("Llm/embedding/OpenAI/create", 1)],
- rollup_metrics=[("Llm/embedding/OpenAI/create", 1)],
- custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)],
- background_task=True,
-)
-@validate_custom_events(add_token_count_to_events(invalid_model_events))
-@validate_custom_event_count(count=1)
-@background_task()
-def test_embeddings_invalid_request_error_invalid_model_async_with_token_count_with_raw_response(
- set_trace_info, async_openai_client, loop
-):
- set_trace_info()
- with pytest.raises(openai.NotFoundError):
- loop.run_until_complete(
- async_openai_client.embeddings.with_raw_response.create(
- input="Model does not exist.", model="does-not-exist"
- )
- )
-
-
@dt_enabled
@reset_core_stats_engine()
@validate_error_trace_attributes(
diff --git a/tests/mlmodel_openai/test_embeddings_v1.py b/tests/mlmodel_openai/test_embeddings_v1.py
index 9dd10262a5..004f8ba729 100644
--- a/tests/mlmodel_openai/test_embeddings_v1.py
+++ b/tests/mlmodel_openai/test_embeddings_v1.py
@@ -15,7 +15,7 @@
import openai
from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes
from testing_support.ml_testing_utils import (
- add_token_count_to_events,
+ add_token_count_to_embedding_events,
disabled_ai_monitoring_record_content_settings,
disabled_ai_monitoring_settings,
events_sans_content,
@@ -48,6 +48,7 @@
"response.headers.ratelimitResetRequests": "6ms",
"response.headers.ratelimitRemainingTokens": 9999994,
"response.headers.ratelimitRemainingRequests": 9999,
+ "response.usage.total_tokens": 6,
"vendor": "openai",
"ingest_source": "Python",
},
@@ -111,7 +112,7 @@ def test_openai_embedding_sync_no_content(set_trace_info, sync_openai_client):
@reset_core_stats_engine()
@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_custom_events(add_token_count_to_events(embedding_recorded_events))
+@validate_custom_events(add_token_count_to_embedding_events(embedding_recorded_events))
@validate_custom_event_count(count=1)
@validate_transaction_metrics(
name="test_embeddings_v1:test_openai_embedding_sync_with_token_count",
@@ -206,7 +207,7 @@ def test_openai_embedding_async_no_content(loop, set_trace_info, async_openai_cl
@reset_core_stats_engine()
@override_llm_token_callback_settings(llm_token_count_callback)
-@validate_custom_events(add_token_count_to_events(embedding_recorded_events))
+@validate_custom_events(add_token_count_to_embedding_events(embedding_recorded_events))
@validate_custom_event_count(count=1)
@validate_transaction_metrics(
name="test_embeddings_v1:test_openai_embedding_async_with_token_count",
diff --git a/tests/testing_support/ml_testing_utils.py b/tests/testing_support/ml_testing_utils.py
index 4ff70c7ed4..8c2c0444f0 100644
--- a/tests/testing_support/ml_testing_utils.py
+++ b/tests/testing_support/ml_testing_utils.py
@@ -29,6 +29,7 @@ def llm_token_count_callback(model, content):
return 105
+# This will be removed once all LLM instrumentations have been converted to use new token count design
def add_token_count_to_events(expected_events):
events = copy.deepcopy(expected_events)
for event in events:
@@ -37,6 +38,32 @@ def add_token_count_to_events(expected_events):
return events
+def add_token_count_to_embedding_events(expected_events):
+ events = copy.deepcopy(expected_events)
+ for event in events:
+ if event[0]["type"] == "LlmEmbedding":
+ event[1]["response.usage.total_tokens"] = 105
+ return events
+
+
+def add_token_count_streaming_events(expected_events):
+ events = copy.deepcopy(expected_events)
+ for event in events:
+ if event[0]["type"] == "LlmChatCompletionMessage":
+ event[1]["token_count"] = 0
+ return events
+
+
+def add_token_counts_to_chat_events(expected_events):
+ events = copy.deepcopy(expected_events)
+ for event in events:
+ if event[0]["type"] == "LlmChatCompletionSummary":
+ event[1]["response.usage.prompt_tokens"] = 105
+ event[1]["response.usage.completion_tokens"] = 105
+ event[1]["response.usage.total_tokens"] = 210
+ return events
+
+
def events_sans_content(event):
new_event = copy.deepcopy(event)
for _event in new_event: