diff --git a/newrelic/hooks/mlmodel_openai.py b/newrelic/hooks/mlmodel_openai.py index 564069ef75..0c626683e5 100644 --- a/newrelic/hooks/mlmodel_openai.py +++ b/newrelic/hooks/mlmodel_openai.py @@ -133,11 +133,11 @@ def create_chat_completion_message_event( span_id, trace_id, response_model, - request_model, response_id, request_id, llm_metadata, output_message_list, + all_token_counts, request_timestamp=None, ): settings = transaction.settings if transaction.settings is not None else global_settings() @@ -158,11 +158,6 @@ def create_chat_completion_message_event( "request_id": request_id, "span_id": span_id, "trace_id": trace_id, - "token_count": ( - settings.ai_monitoring.llm_token_count_callback(request_model, message_content) - if settings.ai_monitoring.llm_token_count_callback - else None - ), "role": message.get("role"), "completion_id": chat_completion_id, "sequence": index, @@ -170,8 +165,13 @@ def create_chat_completion_message_event( "vendor": "openai", "ingest_source": "Python", } + if settings.ai_monitoring.record_content.enabled and message_content: chat_completion_input_message_dict["content"] = message_content + + if all_token_counts: + chat_completion_input_message_dict["token_count"] = 0 + if request_timestamp: chat_completion_input_message_dict["timestamp"] = request_timestamp @@ -199,11 +199,6 @@ def create_chat_completion_message_event( "request_id": request_id, "span_id": span_id, "trace_id": trace_id, - "token_count": ( - settings.ai_monitoring.llm_token_count_callback(response_model, message_content) - if settings.ai_monitoring.llm_token_count_callback - else None - ), "role": message.get("role"), "completion_id": chat_completion_id, "sequence": index, @@ -216,6 +211,9 @@ def create_chat_completion_message_event( if settings.ai_monitoring.record_content.enabled and message_content: chat_completion_output_message_dict["content"] = message_content + if all_token_counts: + chat_completion_output_message_dict["token_count"] = 0 + chat_completion_output_message_dict.update(llm_metadata) transaction.record_custom_event("LlmChatCompletionMessage", chat_completion_output_message_dict) @@ -286,15 +284,18 @@ def _record_embedding_success(transaction, embedding_id, linking_metadata, kwarg else getattr(attribute_response, "organization", None) ) + response_total_tokens = attribute_response.get("usage", {}).get("total_tokens") if response else None + + total_tokens = ( + settings.ai_monitoring.llm_token_count_callback(response_model, input_) + if settings.ai_monitoring.llm_token_count_callback and input_ + else response_total_tokens + ) + full_embedding_response_dict = { "id": embedding_id, "span_id": span_id, "trace_id": trace_id, - "token_count": ( - settings.ai_monitoring.llm_token_count_callback(response_model, input_) - if settings.ai_monitoring.llm_token_count_callback - else None - ), "request.model": kwargs.get("model") or kwargs.get("engine"), "request_id": request_id, "duration": ft.duration * 1000, @@ -319,6 +320,7 @@ def _record_embedding_success(transaction, embedding_id, linking_metadata, kwarg "response.headers.ratelimitRemainingRequests": check_rate_limit_header( response_headers, "x-ratelimit-remaining-requests", True ), + "response.usage.total_tokens": total_tokens, "vendor": "openai", "ingest_source": "Python", } @@ -489,6 +491,7 @@ def _handle_completion_success( def _record_completion_success( transaction, linking_metadata, completion_id, kwargs, ft, response_headers, response, request_timestamp=None ): + settings = transaction.settings if transaction.settings is not None else global_settings() span_id = linking_metadata.get("span.id") trace_id = linking_metadata.get("trace.id") @@ -496,6 +499,7 @@ def _record_completion_success( if response: response_model = response.get("model") response_id = response.get("id") + token_usage = response.get("usage") or {} output_message_list = [] finish_reason = None choices = response.get("choices") or [] @@ -509,6 +513,7 @@ def _record_completion_success( else: response_model = kwargs.get("response.model") response_id = kwargs.get("id") + token_usage = kwargs.get("response.usage") or {} finish_reason = kwargs.get("finish_reason") content = kwargs.get("content") # Tool-call responses may carry an empty content string; in that case the @@ -519,12 +524,39 @@ def _record_completion_success( output_message_list = [{"content": content, "role": kwargs.get("role")}] else: output_message_list = [] + request_model = kwargs.get("model") or kwargs.get("engine") - request_id = response_headers.get("x-request-id") - organization = response_headers.get("openai-organization") or getattr(response, "organization", None) messages = kwargs.get("messages") or [{"content": kwargs.get("prompt"), "role": "user"}] input_message_list = list(messages) + + # Token counts default to those reported in the response object if available, + # but the user registered callback below may override them. + response_prompt_tokens = token_usage.get("prompt_tokens") + response_completion_tokens = token_usage.get("completion_tokens") + response_total_tokens = token_usage.get("total_tokens") + + # If the user has registered a callback to compute token counts it should always be preferred. + token_count_callback = settings.ai_monitoring.llm_token_count_callback + if token_count_callback: + input_message_content = " ".join(content for msg in input_message_list if (content := msg.get("content"))) + if input_message_content: + response_prompt_tokens = token_count_callback(request_model, input_message_content) + output_message_content = " ".join(content for msg in output_message_list if (content := msg.get("content"))) + if output_message_content: + response_completion_tokens = token_count_callback(response_model, output_message_content) + + # Prefer the sum of individual counts as the total whenever both are available. + # This ensures consistency in the event that the token counting callback has reported + # different values for prompt or completion tokens. + if response_prompt_tokens and response_completion_tokens: + response_total_tokens = response_prompt_tokens + response_completion_tokens + + all_token_counts = bool(response_prompt_tokens and response_completion_tokens and response_total_tokens) + + request_id = response_headers.get("x-request-id") + organization = response_headers.get("openai-organization") or getattr(response, "organization", None) + full_chat_completion_summary_dict = { "id": completion_id, "span_id": span_id, @@ -571,6 +603,12 @@ def _record_completion_success( "response.number_of_messages": len(input_message_list) + len(output_message_list), "timestamp": request_timestamp, } + + if all_token_counts: + full_chat_completion_summary_dict["response.usage.prompt_tokens"] = response_prompt_tokens + full_chat_completion_summary_dict["response.usage.completion_tokens"] = response_completion_tokens + full_chat_completion_summary_dict["response.usage.total_tokens"] = response_total_tokens + llm_metadata = _get_llm_attributes(transaction) if "time_to_first_token" in kwargs: @@ -586,11 +624,11 @@ def _record_completion_success( span_id, trace_id, response_model, - request_model, response_id, request_id, llm_metadata, output_message_list, + all_token_counts, request_timestamp, ) except Exception: @@ -602,6 +640,7 @@ def _record_completion_error(transaction, linking_metadata, completion_id, kwarg trace_id = linking_metadata.get("trace.id") request_message_list = kwargs.get("messages", None) or [] notice_error_attributes = {} + try: if OPENAI_V1: response = getattr(exc, "response", None) @@ -667,6 +706,7 @@ def _record_completion_error(transaction, linking_metadata, completion_id, kwarg output_message_list = [] if "content" in kwargs: output_message_list = [{"content": kwargs.get("content"), "role": kwargs.get("role")}] + create_chat_completion_message_event( transaction, request_message_list, @@ -674,11 +714,12 @@ def _record_completion_error(transaction, linking_metadata, completion_id, kwarg span_id, trace_id, kwargs.get("response.model"), - request_model, response_id, request_id, llm_metadata, output_message_list, + # We do not record token counts in error cases, so set all_token_counts to True so the pipeline tokenizer does not run + True, request_timestamp, ) except Exception: @@ -787,6 +828,7 @@ def _record_stream_chunk(self, return_val): self._nr_openai_attrs["response.model"] = return_val.get("model") self._nr_openai_attrs["id"] = return_val.get("id") self._nr_openai_attrs["response.organization"] = return_val.get("organization") + self._nr_openai_attrs["response.usage"] = return_val.get("usage") if choices: delta = choices[0].get("delta") or {} if delta: diff --git a/tests/mlmodel_langchain/_mock_external_openai_server.py b/tests/mlmodel_langchain/_mock_external_openai_server.py index 74740ba520..fd8cc1231b 100644 --- a/tests/mlmodel_langchain/_mock_external_openai_server.py +++ b/tests/mlmodel_langchain/_mock_external_openai_server.py @@ -33,7 +33,7 @@ "system: You are a world class algorithm for extracting information in structured formats. | user: Use the given format to extract information from the following input: Hello, world | user: Tip: Make sure to answer in the correct format": [ { "content-type": "text/event-stream; charset=utf-8", - "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw", + "openai-organization": "nr-test-org", "openai-processing-ms": "440", "openai-project": "proj_0Wv6taeZjWf793P67JMswYY3", "openai-version": "2020-10-01", @@ -125,7 +125,7 @@ 'user: Use a tool to add an exclamation to the word "Hello"': [ { "content-type": "text/event-stream; charset=utf-8", - "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw", + "openai-organization": "nr-test-org", "openai-processing-ms": "134", "openai-project": "proj_0Wv6taeZjWf793P67JMswYY3", "openai-version": "2020-10-01", @@ -334,7 +334,7 @@ 'system: You are a text manipulation algorithm. | user: Use a tool to add an exclamation to the word "Hello"': [ { "content-type": "application/json", - "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw", + "openai-organization": "nr-test-org", "openai-processing-ms": "324", "openai-project": "proj_0Wv6taeZjWf793P67JMswYY3", "openai-version": "2020-10-01", @@ -391,7 +391,7 @@ 'system: You are a text manipulation algorithm. | user: Use a tool to add an exclamation to the word "Hello" | assistant: None | tool: Hello!': [ { "content-type": "application/json", - "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw", + "openai-organization": "nr-test-org", "openai-processing-ms": "751", "openai-project": "proj_0Wv6taeZjWf793P67JMswYY3", "openai-version": "2020-10-01", @@ -441,7 +441,7 @@ 'system: You are a text manipulation algorithm. | user: Use a tool to add an exclamation to the word "exc"': [ { "content-type": "application/json", - "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw", + "openai-organization": "nr-test-org", "openai-processing-ms": "767", "openai-project": "proj_0Wv6taeZjWf793P67JMswYY3", "openai-version": "2020-10-01", @@ -498,7 +498,7 @@ "system: You are a helpful assistant who generates a random first name. A user will pass in a first letter, and you should generate a name that starts with that first letter. | user: M": [ { "content-type": "application/json", - "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw", + "openai-organization": "nr-test-org", "openai-processing-ms": "236", "openai-project": "proj_0Wv6taeZjWf793P67JMswYY3", "openai-version": "2020-10-01", @@ -543,7 +543,7 @@ "system: You are a helpful assistant who generates comma separated lists.\n A user will pass in a category, and you should generate 5 objects in that category in a comma separated list.\n ONLY return a comma separated list, and nothing more. | user: colors": [ { "content-type": "application/json", - "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw", + "openai-organization": "nr-test-org", "openai-processing-ms": "289", "openai-project": "proj_0Wv6taeZjWf793P67JMswYY3", "openai-version": "2020-10-01", @@ -593,7 +593,7 @@ "system: You are a world class algorithm for extracting information in structured formats. | user: Use the given format to extract information from the following input: Sally is 13 | user: Tip: Make sure to answer in the correct format": [ { "content-type": "application/json", - "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw", + "openai-organization": "nr-test-org", "openai-processing-ms": "201", "openai-project": "proj_0Wv6taeZjWf793P67JMswYY3", "openai-version": "2020-10-01", @@ -656,7 +656,7 @@ "system: You are a generator of quiz questions for a seminar. Use the following pieces of retrieved context to generate 5 multiple choice questions (A,B,C,D) on the subject matter. Use a three sentence maximum and keep the answer concise. Render the output as HTML\n\nWhat is 2 + 4? | user: math": [ { "content-type": "application/json", - "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw", + "openai-organization": "nr-test-org", "openai-processing-ms": "2029", "openai-project": "proj_0Wv6taeZjWf793P67JMswYY3", "openai-version": "2020-10-01", @@ -708,7 +708,7 @@ { "content-type": "application/json", "openai-model": "text-embedding-ada-002-v2", - "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw", + "openai-organization": "nr-test-org", "openai-processing-ms": "42", "openai-project": "proj_0Wv6taeZjWf793P67JMswYY3", "openai-version": "2020-10-01", @@ -738,7 +738,7 @@ { "content-type": "application/json", "openai-model": "text-embedding-ada-002-v2", - "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw", + "openai-organization": "nr-test-org", "openai-processing-ms": "82", "openai-project": "proj_0Wv6taeZjWf793P67JMswYY3", "openai-version": "2020-10-01", @@ -768,7 +768,7 @@ { "content-type": "application/json", "openai-model": "text-embedding-ada-002-v2", - "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw", + "openai-organization": "nr-test-org", "openai-processing-ms": "158", "openai-project": "proj_0Wv6taeZjWf793P67JMswYY3", "openai-version": "2020-10-01", @@ -798,7 +798,7 @@ { "content-type": "application/json", "openai-model": "text-embedding-ada-002-v2", - "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw", + "openai-organization": "nr-test-org", "openai-processing-ms": "116", "openai-project": "proj_0Wv6taeZjWf793P67JMswYY3", "openai-version": "2020-10-01", @@ -827,7 +827,7 @@ 'user: Use a tool to add an exclamation to the word "Hello"': [ { "content-type": "application/json", - "openai-organization": "user-rk8wq9voijy9sejrncvgi0iw", + "openai-organization": "nr-test-org", "openai-processing-ms": "238", "openai-project": "proj_0Wv6taeZjWf793P67JMswYY3", "openai-version": "2020-10-01", diff --git a/tests/mlmodel_langchain/test_chain.py b/tests/mlmodel_langchain/test_chain.py index 89757675d7..156a2c8d4b 100644 --- a/tests/mlmodel_langchain/test_chain.py +++ b/tests/mlmodel_langchain/test_chain.py @@ -377,7 +377,7 @@ "request_id": None, "duration": None, "response.model": "text-embedding-ada-002-v2", - "response.organization": "user-rk8wq9voijy9sejrncvgi0iw", + "response.organization": "nr-test-org", "response.headers.llmVersion": "2020-10-01", "response.headers.ratelimitLimitRequests": 10000, "response.headers.ratelimitLimitTokens": 10000000, @@ -385,6 +385,7 @@ "response.headers.ratelimitRemainingTokens": 9999992, "response.headers.ratelimitResetRequests": "6ms", "response.headers.ratelimitResetTokens": "0s", + "response.usage.total_tokens": 8, "vendor": "openai", "ingest_source": "Python", "input": "[[3923, 374, 220, 17, 489, 220, 19, 30]]", @@ -400,7 +401,7 @@ "request_id": None, "duration": None, "response.model": "text-embedding-ada-002-v2", - "response.organization": "user-rk8wq9voijy9sejrncvgi0iw", + "response.organization": "nr-test-org", "response.headers.llmVersion": "2020-10-01", "response.headers.ratelimitLimitRequests": 10000, "response.headers.ratelimitLimitTokens": 10000000, @@ -408,6 +409,7 @@ "response.headers.ratelimitRemainingTokens": 9999998, "response.headers.ratelimitResetRequests": "6ms", "response.headers.ratelimitResetTokens": "0s", + "response.usage.total_tokens": 1, "vendor": "openai", "ingest_source": "Python", "input": "[[10590]]", @@ -471,7 +473,7 @@ "request_id": None, "duration": None, "response.model": "gpt-3.5-turbo-0125", - "response.organization": "user-rk8wq9voijy9sejrncvgi0iw", + "response.organization": "nr-test-org", "response.choices.finish_reason": "stop", "response.headers.llmVersion": "2020-10-01", "response.headers.ratelimitLimitRequests": 10000, @@ -480,6 +482,9 @@ "response.headers.ratelimitRemainingTokens": 49999927, "response.headers.ratelimitResetRequests": "6ms", "response.headers.ratelimitResetTokens": "0s", + "response.usage.prompt_tokens": 73, + "response.usage.completion_tokens": 337, + "response.usage.total_tokens": 410, "response.number_of_messages": 3, }, ], @@ -496,6 +501,7 @@ "sequence": 0, "response.model": "gpt-3.5-turbo-0125", "vendor": "openai", + "token_count": 0, "ingest_source": "Python", "content": "You are a generator of quiz questions for a seminar. Use the following pieces of retrieved context to generate 5 multiple choice questions (A,B,C,D) on the subject matter. Use a three sentence maximum and keep the answer concise. Render the output as HTML\n\nWhat is 2 + 4?", }, @@ -513,6 +519,7 @@ "sequence": 1, "response.model": "gpt-3.5-turbo-0125", "vendor": "openai", + "token_count": 0, "ingest_source": "Python", "content": "math", }, @@ -529,6 +536,7 @@ "sequence": 2, "response.model": "gpt-3.5-turbo-0125", "vendor": "openai", + "token_count": 0, "ingest_source": "Python", "is_response": True, "content": "```html\n\n\n\n Math Quiz\n\n\n

Math Quiz Questions

\n
    \n
  1. What is the result of 5 + 3?
  2. \n \n
  3. What is the product of 6 x 7?
  4. \n \n
  5. What is the square root of 64?
  6. \n \n
  7. What is the result of 12 / 4?
  8. \n \n
  9. What is the sum of 15 + 9?
  10. \n \n
\n\n\n```", diff --git a/tests/mlmodel_langchain/test_state_graph.py b/tests/mlmodel_langchain/test_state_graph.py index a47ad5f3d6..799124bb0a 100644 --- a/tests/mlmodel_langchain/test_state_graph.py +++ b/tests/mlmodel_langchain/test_state_graph.py @@ -43,7 +43,10 @@ "response.headers.ratelimitResetTokens": "0s", "response.model": "gpt-3.5-turbo-0125", "response.number_of_messages": 2, - "response.organization": "user-rk8wq9voijy9sejrncvgi0iw", + "response.organization": "nr-test-org", + "response.usage.completion_tokens": 2, + "response.usage.prompt_tokens": 21, + "response.usage.total_tokens": 23, "span_id": None, "timestamp": None, "trace_id": None, @@ -63,6 +66,7 @@ "sequence": 0, "span_id": None, "timestamp": None, + "token_count": 0, "trace_id": None, "vendor": "openai", }, @@ -80,6 +84,7 @@ "role": "assistant", "sequence": 1, "span_id": None, + "token_count": 0, "trace_id": None, "vendor": "openai", }, @@ -106,7 +111,12 @@ "response.headers.ratelimitResetTokens": "0s", "response.model": "gpt-3.5-turbo-0125", "response.number_of_messages": 2, - "response.organization": "user-rk8wq9voijy9sejrncvgi0iw", + "response.organization": "nr-test-org", + # langchain's ChatOpenAI.stream() passes stream_options={"include_usage": True} + # by default, so the final usage chunk is captured and these are populated. + "response.usage.completion_tokens": 2, + "response.usage.prompt_tokens": 21, + "response.usage.total_tokens": 23, "span_id": None, "time_to_first_token": None, "timestamp": None, @@ -127,6 +137,7 @@ "sequence": 0, "span_id": None, "timestamp": None, + "token_count": 0, "trace_id": None, "vendor": "openai", }, @@ -144,6 +155,7 @@ "role": "assistant", "sequence": 1, "span_id": None, + "token_count": 0, "trace_id": None, "vendor": "openai", }, diff --git a/tests/mlmodel_openai/_mock_external_openai_server.py b/tests/mlmodel_openai/_mock_external_openai_server.py index e218b4939a..73de8e202c 100644 --- a/tests/mlmodel_openai/_mock_external_openai_server.py +++ b/tests/mlmodel_openai/_mock_external_openai_server.py @@ -30,7 +30,7 @@ # created by an external call. # 3) This app runs on a separate thread meaning it won't block the test app. -STREAMED_RESPONSES = { +STREAMED_RESPONSES_V0 = { "Stream parsing error.": [ { "Content-Type": "text/event-stream", @@ -506,7 +506,156 @@ ], ], } -RESPONSES = { + +# Streaming responses returned when the request includes `stream_options={"include_usage": True}`. +# OpenAI emits one extra trailing chunk with `choices: []` and a populated `usage` block before +# sending [DONE]. +STREAMED_RESPONSES_V1_WITH_USAGE = { + "You are a scientist.": [ + { + "content-type": "text/event-stream; charset=utf-8", + "openai-organization": "nr-test-org", + "openai-processing-ms": "334", + "openai-project": "proj_id", + "openai-version": "2020-10-01", + "x-ratelimit-limit-requests": "15000", + "x-ratelimit-limit-tokens": "40000", + "x-ratelimit-remaining-requests": "14999", + "x-ratelimit-remaining-tokens": "39999978", + "x-ratelimit-reset-requests": "4ms", + "x-ratelimit-reset-tokens": "0s", + "x-request-id": "req_f821c73df45f4e30821a81a2d751fe64", + }, + 200, + [ + { + "id": "chatcmpl-CocmvmDih6DGKIgPUbrzKFxGnMyco", + "object": "chat.completion.chunk", + "created": 1779406074, + "model": "gpt-5.1-2025-11-13", + "service_tier": "default", + "system_fingerprint": None, + "choices": [ + {"index": 0, "delta": {"role": "assistant", "content": "", "refusal": None}, "finish_reason": None} + ], + "usage": None, + "obfuscation": "n8bcfdX5", + }, + { + "id": "chatcmpl-CocmvmDih6DGKIgPUbrzKFxGnMyco", + "object": "chat.completion.chunk", + "created": 1779406074, + "model": "gpt-5.1-2025-11-13", + "service_tier": "default", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": "212"}, "finish_reason": None}], + "usage": None, + "obfuscation": "t2ASUcp", + }, + { + "id": "chatcmpl-CocmvmDih6DGKIgPUbrzKFxGnMyco", + "object": "chat.completion.chunk", + "created": 1779406074, + "model": "gpt-5.1-2025-11-13", + "service_tier": "default", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": "\u00b0F"}, "finish_reason": None}], + "usage": None, + "obfuscation": "QehtIgXV", + }, + { + "id": "chatcmpl-CocmvmDih6DGKIgPUbrzKFxGnMyco", + "object": "chat.completion.chunk", + "created": 1779406074, + "model": "gpt-5.1-2025-11-13", + "service_tier": "default", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": " is"}, "finish_reason": None}], + "usage": None, + "obfuscation": "lEH0ats", + }, + { + "id": "chatcmpl-CocmvmDih6DGKIgPUbrzKFxGnMyco", + "object": "chat.completion.chunk", + "created": 1779406074, + "model": "gpt-5.1-2025-11-13", + "service_tier": "default", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": " "}, "finish_reason": None}], + "usage": None, + "obfuscation": "SzPHsL8tM", + }, + { + "id": "chatcmpl-CocmvmDih6DGKIgPUbrzKFxGnMyco", + "object": "chat.completion.chunk", + "created": 1779406074, + "model": "gpt-5.1-2025-11-13", + "service_tier": "default", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": "100"}, "finish_reason": None}], + "usage": None, + "obfuscation": "Z0vwQZq", + }, + { + "id": "chatcmpl-CocmvmDih6DGKIgPUbrzKFxGnMyco", + "object": "chat.completion.chunk", + "created": 1779406074, + "model": "gpt-5.1-2025-11-13", + "service_tier": "default", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": "\u00b0C"}, "finish_reason": None}], + "usage": None, + "obfuscation": "HYe0zapB", + }, + { + "id": "chatcmpl-CocmvmDih6DGKIgPUbrzKFxGnMyco", + "object": "chat.completion.chunk", + "created": 1779406074, + "model": "gpt-5.1-2025-11-13", + "service_tier": "default", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {"content": "."}, "finish_reason": None}], + "usage": None, + "obfuscation": "0XyvpKQ0L", + }, + { + "id": "chatcmpl-CocmvmDih6DGKIgPUbrzKFxGnMyco", + "object": "chat.completion.chunk", + "created": 1779406074, + "model": "gpt-5.1-2025-11-13", + "service_tier": "default", + "system_fingerprint": None, + "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}], + "usage": None, + "obfuscation": "EE9z", + }, + { + "id": "chatcmpl-CocmvmDih6DGKIgPUbrzKFxGnMyco", + "object": "chat.completion.chunk", + "created": 1779406074, + "model": "gpt-5.1-2025-11-13", + "service_tier": "default", + "system_fingerprint": None, + "choices": [], + "usage": { + "prompt_tokens": 25, + "completion_tokens": 16, + "total_tokens": 41, + "prompt_tokens_details": {"cached_tokens": 0, "audio_tokens": 0}, + "completion_tokens_details": { + "reasoning_tokens": 0, + "audio_tokens": 0, + "accepted_prediction_tokens": 0, + "rejected_prediction_tokens": 0, + }, + }, + "obfuscation": "kV6540YmG", + }, + ], + ] +} + +RESPONSES_V0 = { "Invalid API key.": ( {"Content-Type": "application/json; charset=utf-8", "x-request-id": "4f8f61a7d0401e42a6760ea2ca2049f6"}, 401, @@ -678,7 +827,10 @@ def simple_get(openai_version, extract_shortened_prompt): def _simple_get(self): content_len = int(self.headers.get("content-length")) content = json.loads(self.rfile.read(content_len).decode("utf-8")) - stream = content.get("stream", False) + is_openai_v0 = openai_version < (1, 0) + is_streaming = content.get("stream", False) + # Streaming responses include a final usage chunk only when stream_options.include_usage is True. + include_usage = bool(is_streaming and (content.get("stream_options") or {}).get("include_usage")) prompt = extract_shortened_prompt(content) if not prompt: self.send_response(500) @@ -688,14 +840,18 @@ def _simple_get(self): headers, response = ({}, "") - if openai_version < (1, 0): - mocked_responses = RESPONSES - if stream: - mocked_responses = STREAMED_RESPONSES + if is_openai_v0: + if is_streaming: + mocked_responses = STREAMED_RESPONSES_V0 + else: + mocked_responses = RESPONSES_V0 else: - mocked_responses = RESPONSES_V1 - if stream: + if is_streaming and include_usage: + mocked_responses = STREAMED_RESPONSES_V1_WITH_USAGE + elif is_streaming: mocked_responses = STREAMED_RESPONSES_V1 + else: + mocked_responses = RESPONSES_V1 for k, v in mocked_responses.items(): if prompt.startswith(k): @@ -704,7 +860,7 @@ def _simple_get(self): else: # If no matches found self.send_response(500) self.end_headers() - self.wfile.write(f"Unknown Prompt ({'Streaming' if stream else 'Non-Streaming'}):\n{prompt}".encode()) + self.wfile.write(f"Unknown Prompt ({'Streaming' if is_streaming else 'Non-Streaming'}):\n{prompt}".encode()) return # Send response code @@ -716,7 +872,7 @@ def _simple_get(self): self.end_headers() # Send response body - if stream and status_code < 400: + if is_streaming and status_code < 400: for resp in response: data = json.dumps(resp).encode("utf-8") if prompt == "Stream parsing error.": diff --git a/tests/mlmodel_openai/test_chat_completion.py b/tests/mlmodel_openai/test_chat_completion.py index fc7f7f3852..243d3d7f18 100644 --- a/tests/mlmodel_openai/test_chat_completion.py +++ b/tests/mlmodel_openai/test_chat_completion.py @@ -15,7 +15,7 @@ import openai from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, disabled_ai_monitoring_streaming_settings, @@ -56,6 +56,9 @@ "response.organization": "new-relic-nkmd8b", "request.temperature": 0.7, "request.max_tokens": 100, + "response.usage.completion_tokens": 11, + "response.usage.total_tokens": 64, + "response.usage.prompt_tokens": 53, "response.choices.finish_reason": "stop", "response.headers.llmVersion": "2020-10-01", "response.headers.ratelimitLimitRequests": 200, @@ -83,6 +86,7 @@ "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "response.model": "gpt-3.5-turbo-0613", "vendor": "openai", "ingest_source": "Python", @@ -102,6 +106,7 @@ "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "response.model": "gpt-3.5-turbo-0613", "vendor": "openai", "ingest_source": "Python", @@ -120,6 +125,7 @@ "role": "assistant", "completion_id": None, "sequence": 2, + "token_count": 0, "response.model": "gpt-3.5-turbo-0613", "vendor": "openai", "is_response": True, @@ -175,7 +181,7 @@ def test_openai_chat_completion_sync_no_content(set_trace_info): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events)) +@validate_custom_events(add_token_counts_to_chat_events(chat_completion_recorded_events)) # One summary event, one system message, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( @@ -346,7 +352,7 @@ def test_openai_chat_completion_async_no_content(loop, set_trace_info): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events)) +@validate_custom_events(add_token_counts_to_chat_events(chat_completion_recorded_events)) # One summary event, one system message, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( diff --git a/tests/mlmodel_openai/test_chat_completion_error.py b/tests/mlmodel_openai/test_chat_completion_error.py index 79cc79d6db..042cdef31a 100644 --- a/tests/mlmodel_openai/test_chat_completion_error.py +++ b/tests/mlmodel_openai/test_chat_completion_error.py @@ -15,13 +15,11 @@ import openai import pytest -from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine +from testing_support.fixtures import dt_enabled, reset_core_stats_engine from testing_support.ml_testing_utils import ( - add_token_count_to_events, disabled_ai_monitoring_record_content_settings, events_sans_content, events_with_context_attrs, - llm_token_count_callback, set_trace_info, ) from testing_support.validators.validate_custom_event import validate_custom_event_count @@ -70,6 +68,7 @@ "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -86,6 +85,7 @@ "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -191,6 +191,7 @@ def test_chat_completion_invalid_request_error_no_model_no_content(set_trace_inf "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -198,36 +199,6 @@ def test_chat_completion_invalid_request_error_no_model_no_content(set_trace_inf ] -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.InvalidRequestError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"}) -@validate_transaction_metrics( - "test_chat_completion_error:test_chat_completion_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/completion/OpenAI/create", 1)], - rollup_metrics=[("Llm/completion/OpenAI/create", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count(set_trace_info): - set_trace_info() - with pytest.raises(openai.InvalidRequestError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - - openai.ChatCompletion.create( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - ) - - # Invalid model provided @dt_enabled @reset_core_stats_engine() @@ -288,6 +259,7 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info): "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -304,6 +276,7 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info): "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -370,6 +343,7 @@ def test_chat_completion_authentication_error(monkeypatch, set_trace_info): "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -481,37 +455,6 @@ def test_chat_completion_invalid_request_error_no_model_async_no_content(loop, s ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.InvalidRequestError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"}) -@validate_transaction_metrics( - "test_chat_completion_error:test_chat_completion_invalid_request_error_invalid_model_with_token_count_async", - scoped_metrics=[("Llm/completion/OpenAI/acreate", 1)], - rollup_metrics=[("Llm/completion/OpenAI/acreate", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count_async(loop, set_trace_info): - set_trace_info() - with pytest.raises(openai.InvalidRequestError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - loop.run_until_complete( - openai.ChatCompletion.acreate( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - ) - ) - - # Invalid model provided @dt_enabled @reset_core_stats_engine() diff --git a/tests/mlmodel_openai/test_chat_completion_error_v1.py b/tests/mlmodel_openai/test_chat_completion_error_v1.py index 555001a702..5e83b415e6 100644 --- a/tests/mlmodel_openai/test_chat_completion_error_v1.py +++ b/tests/mlmodel_openai/test_chat_completion_error_v1.py @@ -14,13 +14,11 @@ import openai import pytest -from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine +from testing_support.fixtures import dt_enabled, reset_core_stats_engine from testing_support.ml_testing_utils import ( - add_token_count_to_events, disabled_ai_monitoring_record_content_settings, events_sans_content, events_with_context_attrs, - llm_token_count_callback, set_trace_info, ) from testing_support.validators.validate_custom_event import validate_custom_event_count @@ -69,6 +67,7 @@ "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -85,6 +84,7 @@ "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -234,6 +234,7 @@ def test_chat_completion_invalid_request_error_no_model_async_no_content(loop, s "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -271,37 +272,6 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info, syn ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events( - exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."} -) -@validate_transaction_metrics( - "test_chat_completion_error_v1:test_chat_completion_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/completion/OpenAI/create", 1)], - rollup_metrics=[("Llm/completion/OpenAI/create", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count(set_trace_info, sync_openai_client): - set_trace_info() - with pytest.raises(openai.NotFoundError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - sync_openai_client.chat.completions.create( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_completion_tokens=100, - ) - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( @@ -334,41 +304,6 @@ def test_chat_completion_invalid_request_error_invalid_model_async(loop, set_tra ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events( - exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."} -) -@validate_transaction_metrics( - "test_chat_completion_error_v1:test_chat_completion_invalid_request_error_invalid_model_with_token_count_async", - scoped_metrics=[("Llm/completion/OpenAI/create", 1)], - rollup_metrics=[("Llm/completion/OpenAI/create", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count_async( - loop, set_trace_info, async_openai_client -): - set_trace_info() - with pytest.raises(openai.NotFoundError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - loop.run_until_complete( - async_openai_client.chat.completions.create( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_completion_tokens=100, - ) - ) - - expected_events_on_wrong_api_key_error = [ ( {"type": "LlmChatCompletionSummary"}, @@ -398,6 +333,7 @@ def test_chat_completion_invalid_request_error_invalid_model_with_token_count_as "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -617,39 +553,6 @@ def test_chat_completion_invalid_request_error_invalid_model_with_raw_response(s ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events( - exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."} -) -@validate_transaction_metrics( - "test_chat_completion_error_v1:test_chat_completion_invalid_request_error_invalid_model_with_token_count_with_raw_response", - scoped_metrics=[("Llm/completion/OpenAI/create", 1)], - rollup_metrics=[("Llm/completion/OpenAI/create", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count_with_raw_response( - set_trace_info, sync_openai_client -): - set_trace_info() - with pytest.raises(openai.NotFoundError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - sync_openai_client.chat.completions.with_raw_response.create( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_completion_tokens=100, - ) - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( @@ -684,41 +587,6 @@ def test_chat_completion_invalid_request_error_invalid_model_async_with_raw_resp ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events( - exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."} -) -@validate_transaction_metrics( - "test_chat_completion_error_v1:test_chat_completion_invalid_request_error_invalid_model_with_token_count_async_with_raw_response", - scoped_metrics=[("Llm/completion/OpenAI/create", 1)], - rollup_metrics=[("Llm/completion/OpenAI/create", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count_async_with_raw_response( - loop, set_trace_info, async_openai_client -): - set_trace_info() - with pytest.raises(openai.NotFoundError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - loop.run_until_complete( - async_openai_client.chat.completions.with_raw_response.create( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_completion_tokens=100, - ) - ) - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( diff --git a/tests/mlmodel_openai/test_chat_completion_stream.py b/tests/mlmodel_openai/test_chat_completion_stream.py index 6858da8107..b1679ba4d2 100644 --- a/tests/mlmodel_openai/test_chat_completion_stream.py +++ b/tests/mlmodel_openai/test_chat_completion_stream.py @@ -15,7 +15,8 @@ import openai from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_count_streaming_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, disabled_ai_monitoring_streaming_settings, @@ -188,9 +189,101 @@ def test_openai_chat_completion_sync_no_content(set_trace_info): assert resp +chat_completion_recorded_token_events = [ + ( + {"type": "LlmChatCompletionSummary"}, + { + "id": None, # UUID that varies with each run + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "span_id": None, + "trace_id": "trace-id", + "request_id": "49dbbffbd3c3f4612aa48def69059ccd", + "duration": None, # Response time varies each test run + "request.model": "gpt-3.5-turbo", + "response.model": "gpt-3.5-turbo-0613", + "response.organization": "new-relic-nkmd8b", + "request.temperature": 0.7, + "request.max_tokens": 100, + "response.choices.finish_reason": "stop", + "response.headers.llmVersion": "2020-10-01", + "response.headers.ratelimitLimitRequests": 200, + "response.headers.ratelimitLimitTokens": 40000, + "response.headers.ratelimitResetTokens": "90ms", + "response.headers.ratelimitResetRequests": "7m12s", + "response.headers.ratelimitRemainingTokens": 39940, + "response.headers.ratelimitRemainingRequests": 199, + "vendor": "openai", + "ingest_source": "Python", + "response.number_of_messages": 3, + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-0", + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "request_id": "49dbbffbd3c3f4612aa48def69059ccd", + "span_id": None, + "trace_id": "trace-id", + "content": "You are a scientist.", + "role": "system", + "completion_id": None, + "sequence": 0, + "token_count": 0, + "response.model": "gpt-3.5-turbo-0613", + "vendor": "openai", + "ingest_source": "Python", + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-1", + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "request_id": "49dbbffbd3c3f4612aa48def69059ccd", + "span_id": None, + "trace_id": "trace-id", + "content": "What is 212 degrees Fahrenheit converted to Celsius?", + "role": "user", + "completion_id": None, + "sequence": 1, + "token_count": 0, + "response.model": "gpt-3.5-turbo-0613", + "vendor": "openai", + "ingest_source": "Python", + }, + ), + ( + {"type": "LlmChatCompletionMessage"}, + { + "id": "chatcmpl-87sb95K4EF2nuJRcTs43Tm9ntTemv-2", + "llm.conversation_id": "my-awesome-id", + "llm.foo": "bar", + "request_id": "49dbbffbd3c3f4612aa48def69059ccd", + "span_id": None, + "trace_id": "trace-id", + "content": "212 degrees Fahrenheit is equal to 100 degrees Celsius.", + "role": "assistant", + "completion_id": None, + "sequence": 2, + "token_count": 0, + "response.model": "gpt-3.5-turbo-0613", + "vendor": "openai", + "is_response": True, + "ingest_source": "Python", + }, + ), +] + + @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events)) +@validate_custom_events( + add_token_counts_to_chat_events(add_token_count_streaming_events(chat_completion_recorded_events)) +) # One summary event, one system message, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( @@ -253,25 +346,29 @@ def test_openai_chat_completion_sync_no_llm_metadata(set_trace_info): ) @background_task() def test_openai_chat_completion_sync_ai_monitoring_streaming_disabled(): - openai.ChatCompletion.create( + generator = openai.ChatCompletion.create( model="gpt-3.5-turbo", messages=_test_openai_chat_completion_messages, temperature=0.7, max_tokens=100, stream=True, ) + for resp in generator: + assert resp @reset_core_stats_engine() @validate_custom_event_count(count=0) def test_openai_chat_completion_sync_outside_txn(): - openai.ChatCompletion.create( + generator = openai.ChatCompletion.create( model="gpt-3.5-turbo", messages=_test_openai_chat_completion_messages, temperature=0.7, max_tokens=100, stream=True, ) + for resp in generator: + assert resp @disabled_ai_monitoring_settings @@ -279,13 +376,15 @@ def test_openai_chat_completion_sync_outside_txn(): @validate_custom_event_count(count=0) @background_task() def test_openai_chat_completion_sync_ai_monitoring_disabled(): - openai.ChatCompletion.create( + generator = openai.ChatCompletion.create( model="gpt-3.5-turbo", messages=_test_openai_chat_completion_messages, temperature=0.7, max_tokens=100, stream=True, ) + for resp in generator: + assert resp @reset_core_stats_engine() @@ -382,7 +481,9 @@ async def consumer(): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events)) +@validate_custom_events( + add_token_counts_to_chat_events(add_token_count_streaming_events(chat_completion_recorded_events)) +) @validate_custom_event_count(count=4) @validate_transaction_metrics( name="test_chat_completion_stream:test_openai_chat_completion_async_with_token_count", diff --git a/tests/mlmodel_openai/test_chat_completion_stream_error.py b/tests/mlmodel_openai/test_chat_completion_stream_error.py index 0fb0d06867..2b01813d9f 100644 --- a/tests/mlmodel_openai/test_chat_completion_stream_error.py +++ b/tests/mlmodel_openai/test_chat_completion_stream_error.py @@ -15,13 +15,11 @@ import openai import pytest -from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine +from testing_support.fixtures import dt_enabled, reset_core_stats_engine from testing_support.ml_testing_utils import ( - add_token_count_to_events, disabled_ai_monitoring_record_content_settings, events_sans_content, events_with_context_attrs, - llm_token_count_callback, set_trace_info, ) from testing_support.validators.validate_custom_event import validate_custom_event_count @@ -70,6 +68,7 @@ "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -86,6 +85,7 @@ "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -196,6 +196,7 @@ def test_chat_completion_invalid_request_error_no_model_no_content(set_trace_inf "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -203,38 +204,6 @@ def test_chat_completion_invalid_request_error_no_model_no_content(set_trace_inf ] -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.InvalidRequestError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"}) -@validate_transaction_metrics( - "test_chat_completion_stream_error:test_chat_completion_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/completion/OpenAI/create", 1)], - rollup_metrics=[("Llm/completion/OpenAI/create", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count(set_trace_info): - set_trace_info() - with pytest.raises(openai.InvalidRequestError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - generator = openai.ChatCompletion.create( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - stream=True, - ) - for resp in generator: - assert resp - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( @@ -297,6 +266,7 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info): "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -313,6 +283,7 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info): "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -384,6 +355,7 @@ def test_chat_completion_authentication_error(monkeypatch, set_trace_info): "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -498,38 +470,6 @@ def test_chat_completion_invalid_request_error_no_model_async_no_content(loop, s ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.InvalidRequestError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"}) -@validate_transaction_metrics( - "test_chat_completion_stream_error:test_chat_completion_invalid_request_error_invalid_model_with_token_count_async", - scoped_metrics=[("Llm/completion/OpenAI/acreate", 1)], - rollup_metrics=[("Llm/completion/OpenAI/acreate", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count_async(loop, set_trace_info): - set_trace_info() - with pytest.raises(openai.InvalidRequestError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - loop.run_until_complete( - openai.ChatCompletion.acreate( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_tokens=100, - stream=True, - ) - ) - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( @@ -661,6 +601,7 @@ def test_chat_completion_wrong_api_key_error_async(loop, monkeypatch, set_trace_ "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, diff --git a/tests/mlmodel_openai/test_chat_completion_stream_error_v1.py b/tests/mlmodel_openai/test_chat_completion_stream_error_v1.py index ce3ce8061e..074a482669 100644 --- a/tests/mlmodel_openai/test_chat_completion_stream_error_v1.py +++ b/tests/mlmodel_openai/test_chat_completion_stream_error_v1.py @@ -12,16 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. - import openai import pytest -from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine +from testing_support.fixtures import dt_enabled, reset_core_stats_engine from testing_support.ml_testing_utils import ( - add_token_count_to_events, disabled_ai_monitoring_record_content_settings, events_sans_content, events_with_context_attrs, - llm_token_count_callback, set_trace_info, ) from testing_support.validators.validate_custom_event import validate_custom_event_count @@ -70,6 +67,7 @@ "role": "system", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -86,6 +84,7 @@ "role": "user", "completion_id": None, "sequence": 1, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -251,6 +250,7 @@ async def consumer(): "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -291,81 +291,6 @@ def test_chat_completion_invalid_request_error_invalid_model(set_trace_info, syn assert resp -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events( - exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."} -) -@validate_transaction_metrics( - "test_chat_completion_stream_error_v1:test_chat_completion_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/completion/OpenAI/create", 1)], - rollup_metrics=[("Llm/completion/OpenAI/create", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_with_token_count(set_trace_info, sync_openai_client): - set_trace_info() - with pytest.raises(openai.NotFoundError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - - generator = sync_openai_client.chat.completions.create( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_completion_tokens=100, - stream=True, - ) - for resp in generator: - assert resp - - -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"error.code": "model_not_found", "http.statusCode": 404}}, -) -@validate_span_events( - exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."} -) -@validate_transaction_metrics( - "test_chat_completion_stream_error_v1:test_chat_completion_invalid_request_error_invalid_model_async_with_token_count", - scoped_metrics=[("Llm/completion/OpenAI/create", 1)], - rollup_metrics=[("Llm/completion/OpenAI/create", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error)) -@validate_custom_event_count(count=2) -@background_task() -def test_chat_completion_invalid_request_error_invalid_model_async_with_token_count( - loop, set_trace_info, async_openai_client -): - set_trace_info() - with pytest.raises(openai.NotFoundError): - add_custom_attribute("llm.conversation_id", "my-awesome-id") - - async def consumer(): - generator = await async_openai_client.chat.completions.create( - model="does-not-exist", - messages=({"role": "user", "content": "Model does not exist."},), - temperature=0.7, - max_completion_tokens=100, - stream=True, - ) - async for resp in generator: - assert resp - - loop.run_until_complete(consumer()) - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( @@ -432,6 +357,7 @@ async def consumer(): "role": "user", "completion_id": None, "sequence": 0, + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, diff --git a/tests/mlmodel_openai/test_chat_completion_stream_v1.py b/tests/mlmodel_openai/test_chat_completion_stream_v1.py index 1343c208ac..25dbba8cea 100644 --- a/tests/mlmodel_openai/test_chat_completion_stream_v1.py +++ b/tests/mlmodel_openai/test_chat_completion_stream_v1.py @@ -17,7 +17,8 @@ from conftest import get_openai_version from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_count_streaming_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, disabled_ai_monitoring_streaming_settings, @@ -64,7 +65,8 @@ "request.model": "gpt-5.1", "response.model": "gpt-5.1-2025-11-13", "response.organization": "nr-test-org", - # Usage tokens aren't available when streaming. + # Usage tokens are only emitted by OpenAI when stream_options={"include_usage": True}. + # See test_openai_chat_completion_sync_with_stream_options_include_usage for that path. "request.temperature": 0.7, "request.max_tokens": 100, "response.choices.finish_reason": "stop", @@ -141,6 +143,14 @@ ] +# When stream_options={"include_usage": True} is set, the final stream chunk carries usage data +# and the agent populates response.usage.* on the summary + token_count: 0 on each message. +chat_completion_recorded_events_include_usage = add_token_count_streaming_events(chat_completion_recorded_events) +chat_completion_recorded_events_include_usage[0][1].update( + {"response.usage.prompt_tokens": 25, "response.usage.completion_tokens": 16, "response.usage.total_tokens": 41} +) + + @reset_core_stats_engine() @validate_custom_events(events_with_context_attrs(chat_completion_recorded_events)) # One summary event, one system message, one user message, and one response message from the assistant @@ -302,9 +312,39 @@ def test_openai_chat_completion_sync_no_content(set_trace_info, sync_openai_clie assert resp +@reset_core_stats_engine() +@validate_custom_events(chat_completion_recorded_events_include_usage) +@validate_custom_event_count(count=4) +@validate_transaction_metrics( + name="test_chat_completion_stream_v1:test_openai_chat_completion_sync_with_stream_options_include_usage", + custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)], + background_task=True, +) +@validate_attributes("agent", ["llm"]) +@background_task() +def test_openai_chat_completion_sync_with_stream_options_include_usage(set_trace_info, sync_openai_client): + """Streaming with stream_options={"include_usage": True} populates response.usage.* on the summary.""" + set_trace_info() + add_custom_attribute("llm.conversation_id", "my-awesome-id") + add_custom_attribute("llm.foo", "bar") + + generator = sync_openai_client.chat.completions.create( + model="gpt-5.1", + messages=_test_openai_chat_completion_messages, + temperature=0.7, + max_completion_tokens=100, + stream=True, + stream_options={"include_usage": True}, + ) + for resp in generator: + assert resp + + @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events)) +@validate_custom_events( + add_token_counts_to_chat_events(add_token_count_streaming_events(chat_completion_recorded_events)) +) # One summary event, one system message, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( @@ -624,9 +664,42 @@ async def consumer(): loop.run_until_complete(consumer()) +@reset_core_stats_engine() +@validate_custom_events(chat_completion_recorded_events_include_usage) +@validate_custom_event_count(count=4) +@validate_transaction_metrics( + name="test_chat_completion_stream_v1:test_openai_chat_completion_async_with_stream_options_include_usage", + custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)], + background_task=True, +) +@validate_attributes("agent", ["llm"]) +@background_task() +def test_openai_chat_completion_async_with_stream_options_include_usage(set_trace_info, loop, async_openai_client): + """Streaming with stream_options={"include_usage": True} populates response.usage.* on the summary.""" + set_trace_info() + add_custom_attribute("llm.conversation_id", "my-awesome-id") + add_custom_attribute("llm.foo", "bar") + + async def consumer(): + generator = await async_openai_client.chat.completions.create( + model="gpt-5.1", + messages=_test_openai_chat_completion_messages, + temperature=0.7, + max_completion_tokens=100, + stream=True, + stream_options={"include_usage": True}, + ) + async for resp in generator: + assert resp + + loop.run_until_complete(consumer()) + + @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events)) +@validate_custom_events( + add_token_counts_to_chat_events(add_token_count_streaming_events(chat_completion_recorded_events)) +) # One summary event, one system message, one user message, and one response message from the assistant # @validate_custom_event_count(count=4) @validate_transaction_metrics( diff --git a/tests/mlmodel_openai/test_chat_completion_v1.py b/tests/mlmodel_openai/test_chat_completion_v1.py index ec636ca7d2..ae87f1757b 100644 --- a/tests/mlmodel_openai/test_chat_completion_v1.py +++ b/tests/mlmodel_openai/test_chat_completion_v1.py @@ -15,7 +15,7 @@ import openai from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_counts_to_chat_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, disabled_ai_monitoring_streaming_settings, @@ -55,6 +55,9 @@ "response.organization": "nr-test-org", "request.temperature": 0.7, "request.max_tokens": 100, + "response.usage.completion_tokens": 16, + "response.usage.prompt_tokens": 25, + "response.usage.total_tokens": 41, "response.choices.finish_reason": "stop", "response.headers.llmVersion": "2020-10-01", "response.headers.ratelimitLimitRequests": 15000, @@ -83,6 +86,7 @@ "completion_id": None, "sequence": 0, "response.model": "gpt-5.1-2025-11-13", + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -102,6 +106,7 @@ "completion_id": None, "sequence": 1, "response.model": "gpt-5.1-2025-11-13", + "token_count": 0, "vendor": "openai", "ingest_source": "Python", }, @@ -120,6 +125,7 @@ "completion_id": None, "sequence": 2, "response.model": "gpt-5.1-2025-11-13", + "token_count": 0, "vendor": "openai", "is_response": True, "ingest_source": "Python", @@ -196,7 +202,7 @@ def test_openai_chat_completion_sync_no_content(set_trace_info, sync_openai_clie @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events)) +@validate_custom_events(add_token_counts_to_chat_events(chat_completion_recorded_events)) # One summary event, one system message, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( @@ -395,7 +401,7 @@ def test_openai_chat_completion_async_with_llm_metadata_no_content(loop, set_tra @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(chat_completion_recorded_events)) +@validate_custom_events(add_token_counts_to_chat_events(chat_completion_recorded_events)) # One summary event, one system message, one user message, and one response message from the assistant @validate_custom_event_count(count=4) @validate_transaction_metrics( diff --git a/tests/mlmodel_openai/test_embeddings.py b/tests/mlmodel_openai/test_embeddings.py index c3c3e7c429..935db04fe0 100644 --- a/tests/mlmodel_openai/test_embeddings.py +++ b/tests/mlmodel_openai/test_embeddings.py @@ -19,7 +19,7 @@ validate_attributes, ) from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_count_to_embedding_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, events_sans_content, @@ -55,6 +55,7 @@ "response.headers.ratelimitResetRequests": "19m45.394s", "response.headers.ratelimitRemainingTokens": 149994, "response.headers.ratelimitRemainingRequests": 197, + "response.usage.total_tokens": 6, "vendor": "openai", "ingest_source": "Python", }, @@ -107,7 +108,7 @@ def test_openai_embedding_sync_no_content(set_trace_info): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(embedding_recorded_events)) +@validate_custom_events(add_token_count_to_embedding_events(embedding_recorded_events)) @validate_custom_event_count(count=1) @validate_transaction_metrics( name="test_embeddings:test_openai_embedding_sync_with_token_count", @@ -191,7 +192,7 @@ def test_openai_embedding_async_no_content(loop, set_trace_info): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(embedding_recorded_events)) +@validate_custom_events(add_token_count_to_embedding_events(embedding_recorded_events)) @validate_custom_event_count(count=1) @validate_transaction_metrics( name="test_embeddings:test_openai_embedding_async_with_token_count", diff --git a/tests/mlmodel_openai/test_embeddings_error.py b/tests/mlmodel_openai/test_embeddings_error.py index a8e46bf23a..f80e6ff41d 100644 --- a/tests/mlmodel_openai/test_embeddings_error.py +++ b/tests/mlmodel_openai/test_embeddings_error.py @@ -14,12 +14,10 @@ import openai import pytest -from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine +from testing_support.fixtures import dt_enabled, reset_core_stats_engine from testing_support.ml_testing_utils import ( - add_token_count_to_events, disabled_ai_monitoring_record_content_settings, events_sans_content, - llm_token_count_callback, set_trace_info, ) from testing_support.validators.validate_custom_event import validate_custom_event_count @@ -128,35 +126,6 @@ def test_embeddings_invalid_request_error_no_model_no_content(set_trace_info): ] -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.InvalidRequestError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404}}, -) -@validate_span_events( - exact_agents={ - "error.message": "The model `does-not-exist` does not exist" - # "http.statusCode": 404, - } -) -@validate_transaction_metrics( - name="test_embeddings_error:test_embeddings_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/embedding/OpenAI/create", 1)], - rollup_metrics=[("Llm/embedding/OpenAI/create", 1)], - custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(invalid_model_events)) -@validate_custom_event_count(count=1) -@background_task() -def test_embeddings_invalid_request_error_invalid_model_with_token_count(set_trace_info): - set_trace_info() - with pytest.raises(openai.InvalidRequestError): - openai.Embedding.create(input="Model does not exist.", model="does-not-exist") - - # Invalid model provided @dt_enabled @reset_core_stats_engine() @@ -348,30 +317,6 @@ def test_embeddings_invalid_request_error_no_model_async_no_content(loop, set_tr ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.InvalidRequestError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404}}, -) -@validate_span_events(exact_agents={"error.message": "The model `does-not-exist` does not exist"}) -@validate_transaction_metrics( - name="test_embeddings_error:test_embeddings_invalid_request_error_invalid_model_with_token_count_async", - scoped_metrics=[("Llm/embedding/OpenAI/acreate", 1)], - rollup_metrics=[("Llm/embedding/OpenAI/acreate", 1)], - custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(invalid_model_events)) -@validate_custom_event_count(count=1) -@background_task() -def test_embeddings_invalid_request_error_invalid_model_with_token_count_async(set_trace_info, loop): - set_trace_info() - with pytest.raises(openai.InvalidRequestError): - loop.run_until_complete(openai.Embedding.acreate(input="Model does not exist.", model="does-not-exist")) - - # Invalid model provided @dt_enabled @reset_core_stats_engine() diff --git a/tests/mlmodel_openai/test_embeddings_error_v1.py b/tests/mlmodel_openai/test_embeddings_error_v1.py index fd29236122..499f96893b 100644 --- a/tests/mlmodel_openai/test_embeddings_error_v1.py +++ b/tests/mlmodel_openai/test_embeddings_error_v1.py @@ -16,12 +16,10 @@ import openai import pytest -from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine +from testing_support.fixtures import dt_enabled, reset_core_stats_engine from testing_support.ml_testing_utils import ( - add_token_count_to_events, disabled_ai_monitoring_record_content_settings, events_sans_content, - llm_token_count_callback, set_trace_info, ) from testing_support.validators.validate_custom_event import validate_custom_event_count @@ -149,32 +147,6 @@ def test_embeddings_invalid_request_error_no_model_async(set_trace_info, async_o ] -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404, "error.code": "model_not_found"}}, -) -@validate_span_events( - exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."} -) -@validate_transaction_metrics( - name="test_embeddings_error_v1:test_embeddings_invalid_request_error_invalid_model_with_token_count", - scoped_metrics=[("Llm/embedding/OpenAI/create", 1)], - rollup_metrics=[("Llm/embedding/OpenAI/create", 1)], - custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(invalid_model_events)) -@validate_custom_event_count(count=1) -@background_task() -def test_embeddings_invalid_request_error_invalid_model_with_token_count(set_trace_info, sync_openai_client): - set_trace_info() - with pytest.raises(openai.NotFoundError): - sync_openai_client.embeddings.create(input="Model does not exist.", model="does-not-exist") - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( @@ -255,36 +227,6 @@ def test_embeddings_invalid_request_error_invalid_model_async_no_content(set_tra ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404, "error.code": "model_not_found"}}, -) -@validate_span_events( - exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."} -) -@validate_transaction_metrics( - name="test_embeddings_error_v1:test_embeddings_invalid_request_error_invalid_model_async_with_token_count", - scoped_metrics=[("Llm/embedding/OpenAI/create", 1)], - rollup_metrics=[("Llm/embedding/OpenAI/create", 1)], - custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(invalid_model_events)) -@validate_custom_event_count(count=1) -@background_task() -def test_embeddings_invalid_request_error_invalid_model_async_with_token_count( - set_trace_info, async_openai_client, loop -): - set_trace_info() - with pytest.raises(openai.NotFoundError): - loop.run_until_complete( - async_openai_client.embeddings.create(input="Model does not exist.", model="does-not-exist") - ) - - embedding_invalid_key_error_events = [ ( {"type": "LlmEmbedding"}, @@ -449,34 +391,6 @@ def test_embeddings_invalid_request_error_no_model_async_with_raw_response(set_t ) # no model provided -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404, "error.code": "model_not_found"}}, -) -@validate_span_events( - exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."} -) -@validate_transaction_metrics( - name="test_embeddings_error_v1:test_embeddings_invalid_request_error_invalid_model_with_token_count_with_raw_response", - scoped_metrics=[("Llm/embedding/OpenAI/create", 1)], - rollup_metrics=[("Llm/embedding/OpenAI/create", 1)], - custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(invalid_model_events)) -@validate_custom_event_count(count=1) -@background_task() -def test_embeddings_invalid_request_error_invalid_model_with_token_count_with_raw_response( - set_trace_info, sync_openai_client -): - set_trace_info() - with pytest.raises(openai.NotFoundError): - sync_openai_client.embeddings.with_raw_response.create(input="Model does not exist.", model="does-not-exist") - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( @@ -566,38 +480,6 @@ def test_embeddings_invalid_request_error_invalid_model_async_no_content_with_ra ) -@dt_enabled -@reset_core_stats_engine() -@override_llm_token_callback_settings(llm_token_count_callback) -@validate_error_trace_attributes( - callable_name(openai.NotFoundError), - exact_attrs={"agent": {}, "intrinsic": {}, "user": {"http.statusCode": 404, "error.code": "model_not_found"}}, -) -@validate_span_events( - exact_agents={"error.message": "The model `does-not-exist` does not exist or you do not have access to it."} -) -@validate_transaction_metrics( - name="test_embeddings_error_v1:test_embeddings_invalid_request_error_invalid_model_async_with_token_count_with_raw_response", - scoped_metrics=[("Llm/embedding/OpenAI/create", 1)], - rollup_metrics=[("Llm/embedding/OpenAI/create", 1)], - custom_metrics=[(f"Supportability/Python/ML/OpenAI/{openai.__version__}", 1)], - background_task=True, -) -@validate_custom_events(add_token_count_to_events(invalid_model_events)) -@validate_custom_event_count(count=1) -@background_task() -def test_embeddings_invalid_request_error_invalid_model_async_with_token_count_with_raw_response( - set_trace_info, async_openai_client, loop -): - set_trace_info() - with pytest.raises(openai.NotFoundError): - loop.run_until_complete( - async_openai_client.embeddings.with_raw_response.create( - input="Model does not exist.", model="does-not-exist" - ) - ) - - @dt_enabled @reset_core_stats_engine() @validate_error_trace_attributes( diff --git a/tests/mlmodel_openai/test_embeddings_v1.py b/tests/mlmodel_openai/test_embeddings_v1.py index 9dd10262a5..004f8ba729 100644 --- a/tests/mlmodel_openai/test_embeddings_v1.py +++ b/tests/mlmodel_openai/test_embeddings_v1.py @@ -15,7 +15,7 @@ import openai from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes from testing_support.ml_testing_utils import ( - add_token_count_to_events, + add_token_count_to_embedding_events, disabled_ai_monitoring_record_content_settings, disabled_ai_monitoring_settings, events_sans_content, @@ -48,6 +48,7 @@ "response.headers.ratelimitResetRequests": "6ms", "response.headers.ratelimitRemainingTokens": 9999994, "response.headers.ratelimitRemainingRequests": 9999, + "response.usage.total_tokens": 6, "vendor": "openai", "ingest_source": "Python", }, @@ -111,7 +112,7 @@ def test_openai_embedding_sync_no_content(set_trace_info, sync_openai_client): @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(embedding_recorded_events)) +@validate_custom_events(add_token_count_to_embedding_events(embedding_recorded_events)) @validate_custom_event_count(count=1) @validate_transaction_metrics( name="test_embeddings_v1:test_openai_embedding_sync_with_token_count", @@ -206,7 +207,7 @@ def test_openai_embedding_async_no_content(loop, set_trace_info, async_openai_cl @reset_core_stats_engine() @override_llm_token_callback_settings(llm_token_count_callback) -@validate_custom_events(add_token_count_to_events(embedding_recorded_events)) +@validate_custom_events(add_token_count_to_embedding_events(embedding_recorded_events)) @validate_custom_event_count(count=1) @validate_transaction_metrics( name="test_embeddings_v1:test_openai_embedding_async_with_token_count", diff --git a/tests/testing_support/ml_testing_utils.py b/tests/testing_support/ml_testing_utils.py index 4ff70c7ed4..8c2c0444f0 100644 --- a/tests/testing_support/ml_testing_utils.py +++ b/tests/testing_support/ml_testing_utils.py @@ -29,6 +29,7 @@ def llm_token_count_callback(model, content): return 105 +# This will be removed once all LLM instrumentations have been converted to use new token count design def add_token_count_to_events(expected_events): events = copy.deepcopy(expected_events) for event in events: @@ -37,6 +38,32 @@ def add_token_count_to_events(expected_events): return events +def add_token_count_to_embedding_events(expected_events): + events = copy.deepcopy(expected_events) + for event in events: + if event[0]["type"] == "LlmEmbedding": + event[1]["response.usage.total_tokens"] = 105 + return events + + +def add_token_count_streaming_events(expected_events): + events = copy.deepcopy(expected_events) + for event in events: + if event[0]["type"] == "LlmChatCompletionMessage": + event[1]["token_count"] = 0 + return events + + +def add_token_counts_to_chat_events(expected_events): + events = copy.deepcopy(expected_events) + for event in events: + if event[0]["type"] == "LlmChatCompletionSummary": + event[1]["response.usage.prompt_tokens"] = 105 + event[1]["response.usage.completion_tokens"] = 105 + event[1]["response.usage.total_tokens"] = 210 + return events + + def events_sans_content(event): new_event = copy.deepcopy(event) for _event in new_event: