Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 62 additions & 20 deletions newrelic/hooks/mlmodel_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,11 +133,11 @@ def create_chat_completion_message_event(
span_id,
trace_id,
response_model,
request_model,
response_id,
request_id,
llm_metadata,
output_message_list,
all_token_counts,
request_timestamp=None,
):
settings = transaction.settings if transaction.settings is not None else global_settings()
Expand All @@ -158,20 +158,20 @@ def create_chat_completion_message_event(
"request_id": request_id,
"span_id": span_id,
"trace_id": trace_id,
"token_count": (
settings.ai_monitoring.llm_token_count_callback(request_model, message_content)
if settings.ai_monitoring.llm_token_count_callback
else None
),
"role": message.get("role"),
"completion_id": chat_completion_id,
"sequence": index,
"response.model": response_model,
"vendor": "openai",
"ingest_source": "Python",
}

if settings.ai_monitoring.record_content.enabled and message_content:
chat_completion_input_message_dict["content"] = message_content

if all_token_counts:
chat_completion_input_message_dict["token_count"] = 0

if request_timestamp:
chat_completion_input_message_dict["timestamp"] = request_timestamp

Expand Down Expand Up @@ -199,11 +199,6 @@ def create_chat_completion_message_event(
"request_id": request_id,
"span_id": span_id,
"trace_id": trace_id,
"token_count": (
settings.ai_monitoring.llm_token_count_callback(response_model, message_content)
if settings.ai_monitoring.llm_token_count_callback
else None
),
"role": message.get("role"),
"completion_id": chat_completion_id,
"sequence": index,
Expand All @@ -216,6 +211,9 @@ def create_chat_completion_message_event(
if settings.ai_monitoring.record_content.enabled and message_content:
chat_completion_output_message_dict["content"] = message_content

if all_token_counts:
chat_completion_output_message_dict["token_count"] = 0

chat_completion_output_message_dict.update(llm_metadata)

transaction.record_custom_event("LlmChatCompletionMessage", chat_completion_output_message_dict)
Expand Down Expand Up @@ -286,15 +284,18 @@ def _record_embedding_success(transaction, embedding_id, linking_metadata, kwarg
else getattr(attribute_response, "organization", None)
)

response_total_tokens = attribute_response.get("usage", {}).get("total_tokens") if response else None

total_tokens = (
settings.ai_monitoring.llm_token_count_callback(response_model, input_)
if settings.ai_monitoring.llm_token_count_callback and input_
else response_total_tokens
)

full_embedding_response_dict = {
"id": embedding_id,
"span_id": span_id,
"trace_id": trace_id,
"token_count": (
settings.ai_monitoring.llm_token_count_callback(response_model, input_)
if settings.ai_monitoring.llm_token_count_callback
else None
),
"request.model": kwargs.get("model") or kwargs.get("engine"),
"request_id": request_id,
"duration": ft.duration * 1000,
Expand All @@ -319,6 +320,7 @@ def _record_embedding_success(transaction, embedding_id, linking_metadata, kwarg
"response.headers.ratelimitRemainingRequests": check_rate_limit_header(
response_headers, "x-ratelimit-remaining-requests", True
),
"response.usage.total_tokens": total_tokens,
"vendor": "openai",
"ingest_source": "Python",
}
Expand Down Expand Up @@ -489,13 +491,15 @@ def _handle_completion_success(
def _record_completion_success(
transaction, linking_metadata, completion_id, kwargs, ft, response_headers, response, request_timestamp=None
):
settings = transaction.settings if transaction.settings is not None else global_settings()
span_id = linking_metadata.get("span.id")
trace_id = linking_metadata.get("trace.id")

try:
if response:
response_model = response.get("model")
response_id = response.get("id")
token_usage = response.get("usage") or {}
output_message_list = []
finish_reason = None
choices = response.get("choices") or []
Expand All @@ -509,6 +513,7 @@ def _record_completion_success(
else:
response_model = kwargs.get("response.model")
response_id = kwargs.get("id")
token_usage = kwargs.get("response.usage") or {}
finish_reason = kwargs.get("finish_reason")
content = kwargs.get("content")
# Tool-call responses may carry an empty content string; in that case the
Expand All @@ -519,12 +524,39 @@ def _record_completion_success(
output_message_list = [{"content": content, "role": kwargs.get("role")}]
else:
output_message_list = []

request_model = kwargs.get("model") or kwargs.get("engine")

request_id = response_headers.get("x-request-id")
organization = response_headers.get("openai-organization") or getattr(response, "organization", None)
messages = kwargs.get("messages") or [{"content": kwargs.get("prompt"), "role": "user"}]
input_message_list = list(messages)

# Token counts default to those reported in the response object if available,
# but the user registered callback below may override them.
response_prompt_tokens = token_usage.get("prompt_tokens")
response_completion_tokens = token_usage.get("completion_tokens")
response_total_tokens = token_usage.get("total_tokens")

# If the user has registered a callback to compute token counts it should always be preferred.
token_count_callback = settings.ai_monitoring.llm_token_count_callback
if token_count_callback:
input_message_content = " ".join(content for msg in input_message_list if (content := msg.get("content")))
if input_message_content:
response_prompt_tokens = token_count_callback(request_model, input_message_content)
output_message_content = " ".join(content for msg in output_message_list if (content := msg.get("content")))
if output_message_content:
response_completion_tokens = token_count_callback(response_model, output_message_content)

# Prefer the sum of individual counts as the total whenever both are available.
# This ensures consistency in the event that the token counting callback has reported
# different values for prompt or completion tokens.
if response_prompt_tokens and response_completion_tokens:
response_total_tokens = response_prompt_tokens + response_completion_tokens

all_token_counts = bool(response_prompt_tokens and response_completion_tokens and response_total_tokens)

request_id = response_headers.get("x-request-id")
organization = response_headers.get("openai-organization") or getattr(response, "organization", None)

full_chat_completion_summary_dict = {
"id": completion_id,
"span_id": span_id,
Expand Down Expand Up @@ -571,6 +603,12 @@ def _record_completion_success(
"response.number_of_messages": len(input_message_list) + len(output_message_list),
"timestamp": request_timestamp,
}

if all_token_counts:
full_chat_completion_summary_dict["response.usage.prompt_tokens"] = response_prompt_tokens
full_chat_completion_summary_dict["response.usage.completion_tokens"] = response_completion_tokens
full_chat_completion_summary_dict["response.usage.total_tokens"] = response_total_tokens

llm_metadata = _get_llm_attributes(transaction)

if "time_to_first_token" in kwargs:
Expand All @@ -586,11 +624,11 @@ def _record_completion_success(
span_id,
trace_id,
response_model,
request_model,
response_id,
request_id,
llm_metadata,
output_message_list,
all_token_counts,
request_timestamp,
)
except Exception:
Expand All @@ -602,6 +640,7 @@ def _record_completion_error(transaction, linking_metadata, completion_id, kwarg
trace_id = linking_metadata.get("trace.id")
request_message_list = kwargs.get("messages", None) or []
notice_error_attributes = {}

try:
if OPENAI_V1:
response = getattr(exc, "response", None)
Expand Down Expand Up @@ -667,18 +706,20 @@ def _record_completion_error(transaction, linking_metadata, completion_id, kwarg
output_message_list = []
if "content" in kwargs:
output_message_list = [{"content": kwargs.get("content"), "role": kwargs.get("role")}]

create_chat_completion_message_event(
transaction,
request_message_list,
completion_id,
span_id,
trace_id,
kwargs.get("response.model"),
request_model,
response_id,
request_id,
llm_metadata,
output_message_list,
# We do not record token counts in error cases, so set all_token_counts to True so the pipeline tokenizer does not run
True,
request_timestamp,
)
except Exception:
Expand Down Expand Up @@ -787,6 +828,7 @@ def _record_stream_chunk(self, return_val):
self._nr_openai_attrs["response.model"] = return_val.get("model")
self._nr_openai_attrs["id"] = return_val.get("id")
self._nr_openai_attrs["response.organization"] = return_val.get("organization")
self._nr_openai_attrs["response.usage"] = return_val.get("usage")
if choices:
delta = choices[0].get("delta") or {}
if delta:
Expand Down
28 changes: 14 additions & 14 deletions tests/mlmodel_langchain/_mock_external_openai_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
"system: You are a world class algorithm for extracting information in structured formats. | user: Use the given format to extract information from the following input: Hello, world | user: Tip: Make sure to answer in the correct format": [
{
"content-type": "text/event-stream; charset=utf-8",
"openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
"openai-organization": "nr-test-org",
"openai-processing-ms": "440",
"openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
"openai-version": "2020-10-01",
Expand Down Expand Up @@ -125,7 +125,7 @@
'user: Use a tool to add an exclamation to the word "Hello"': [
{
"content-type": "text/event-stream; charset=utf-8",
"openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
"openai-organization": "nr-test-org",
"openai-processing-ms": "134",
"openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
"openai-version": "2020-10-01",
Expand Down Expand Up @@ -334,7 +334,7 @@
'system: You are a text manipulation algorithm. | user: Use a tool to add an exclamation to the word "Hello"': [
{
"content-type": "application/json",
"openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
"openai-organization": "nr-test-org",
"openai-processing-ms": "324",
"openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
"openai-version": "2020-10-01",
Expand Down Expand Up @@ -391,7 +391,7 @@
'system: You are a text manipulation algorithm. | user: Use a tool to add an exclamation to the word "Hello" | assistant: None | tool: Hello!': [
{
"content-type": "application/json",
"openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
"openai-organization": "nr-test-org",
"openai-processing-ms": "751",
"openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
"openai-version": "2020-10-01",
Expand Down Expand Up @@ -441,7 +441,7 @@
'system: You are a text manipulation algorithm. | user: Use a tool to add an exclamation to the word "exc"': [
{
"content-type": "application/json",
"openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
"openai-organization": "nr-test-org",
"openai-processing-ms": "767",
"openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
"openai-version": "2020-10-01",
Expand Down Expand Up @@ -498,7 +498,7 @@
"system: You are a helpful assistant who generates a random first name. A user will pass in a first letter, and you should generate a name that starts with that first letter. | user: M": [
{
"content-type": "application/json",
"openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
"openai-organization": "nr-test-org",
"openai-processing-ms": "236",
"openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
"openai-version": "2020-10-01",
Expand Down Expand Up @@ -543,7 +543,7 @@
"system: You are a helpful assistant who generates comma separated lists.\n A user will pass in a category, and you should generate 5 objects in that category in a comma separated list.\n ONLY return a comma separated list, and nothing more. | user: colors": [
{
"content-type": "application/json",
"openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
"openai-organization": "nr-test-org",
"openai-processing-ms": "289",
"openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
"openai-version": "2020-10-01",
Expand Down Expand Up @@ -593,7 +593,7 @@
"system: You are a world class algorithm for extracting information in structured formats. | user: Use the given format to extract information from the following input: Sally is 13 | user: Tip: Make sure to answer in the correct format": [
{
"content-type": "application/json",
"openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
"openai-organization": "nr-test-org",
"openai-processing-ms": "201",
"openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
"openai-version": "2020-10-01",
Expand Down Expand Up @@ -656,7 +656,7 @@
"system: You are a generator of quiz questions for a seminar. Use the following pieces of retrieved context to generate 5 multiple choice questions (A,B,C,D) on the subject matter. Use a three sentence maximum and keep the answer concise. Render the output as HTML\n\nWhat is 2 + 4? | user: math": [
{
"content-type": "application/json",
"openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
"openai-organization": "nr-test-org",
"openai-processing-ms": "2029",
"openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
"openai-version": "2020-10-01",
Expand Down Expand Up @@ -708,7 +708,7 @@
{
"content-type": "application/json",
"openai-model": "text-embedding-ada-002-v2",
"openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
"openai-organization": "nr-test-org",
"openai-processing-ms": "42",
"openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
"openai-version": "2020-10-01",
Expand Down Expand Up @@ -738,7 +738,7 @@
{
"content-type": "application/json",
"openai-model": "text-embedding-ada-002-v2",
"openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
"openai-organization": "nr-test-org",
"openai-processing-ms": "82",
"openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
"openai-version": "2020-10-01",
Expand Down Expand Up @@ -768,7 +768,7 @@
{
"content-type": "application/json",
"openai-model": "text-embedding-ada-002-v2",
"openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
"openai-organization": "nr-test-org",
"openai-processing-ms": "158",
"openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
"openai-version": "2020-10-01",
Expand Down Expand Up @@ -798,7 +798,7 @@
{
"content-type": "application/json",
"openai-model": "text-embedding-ada-002-v2",
"openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
"openai-organization": "nr-test-org",
"openai-processing-ms": "116",
"openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
"openai-version": "2020-10-01",
Expand Down Expand Up @@ -827,7 +827,7 @@
'user: Use a tool to add an exclamation to the word "Hello"': [
{
"content-type": "application/json",
"openai-organization": "user-rk8wq9voijy9sejrncvgi0iw",
"openai-organization": "nr-test-org",
"openai-processing-ms": "238",
"openai-project": "proj_0Wv6taeZjWf793P67JMswYY3",
"openai-version": "2020-10-01",
Expand Down
Loading
Loading