Skip to content
75 changes: 49 additions & 26 deletions newrelic/hooks/mlmodel_anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,6 +425,8 @@ def _record_completion_error(*, transaction, linking_metadata, completion_id, kw
request_model=request_model,
llm_metadata=llm_metadata,
response_content=None,
# We do not record token counts in error cases, so set all_token_counts to True so the pipeline tokenizer does not run
all_token_counts=True,
request_timestamp=request_timestamp,
)
except Exception:
Expand All @@ -447,6 +449,7 @@ def _record_completion_success(
request_timestamp=None,
time_to_first_token=None,
):
settings = transaction.settings or global_settings()
span_id = linking_metadata.get("span.id")
trace_id = linking_metadata.get("trace.id")
try:
Expand All @@ -455,10 +458,39 @@ def _record_completion_success(
request_temperature = kwargs.get("temperature")
request_max_tokens = kwargs.get("max_tokens")

# TODO: Complete token counting
# total_tokens = (
# (input_tokens + output_tokens) if (input_tokens is not None and output_tokens is not None) else None
# )
# Token counts default to those reported in the response object if available,
# but the user registered callback below may override them.
# Anthropic does not include a total in usage, so it is always recomputed from the parts below.
response_prompt_tokens = input_tokens
response_completion_tokens = output_tokens
response_total_tokens = None

# If the user has registered a callback to compute token counts it should always be preferred.
token_count_callback = settings.ai_monitoring.llm_token_count_callback
if token_count_callback:
input_message_content = " ".join(
content
for msg in messages
if (
content := _extract_message_content(
msg.get("content") if isinstance(msg, dict) else getattr(msg, "content", None)
)
)
)
if input_message_content:
response_prompt_tokens = token_count_callback(request_model, input_message_content)
response_text = _extract_message_content(response_content)
if response_text:
response_completion_tokens = token_count_callback(response_model, response_text)

# Prefer the sum of individual counts as the total whenever both are available.
# This ensures consistency in the event that the token counting callback has reported
# different values for prompt or completion tokens.
if response_prompt_tokens and response_completion_tokens:
response_total_tokens = response_prompt_tokens + response_completion_tokens

all_token_counts = bool(response_prompt_tokens and response_completion_tokens and response_total_tokens)

number_of_messages = len(messages) + (1 if response_content else 0)

full_chat_completion_summary_dict = {
Expand All @@ -474,13 +506,15 @@ def _record_completion_success(
"response.model": response_model,
"response.choices.finish_reason": stop_reason,
"response.number_of_messages": number_of_messages,
# "response.usage.total_tokens": total_tokens,
# "response.usage.prompt_tokens": input_tokens,
# "response.usage.completion_tokens": output_tokens,
"timestamp": request_timestamp,
"time_to_first_token": time_to_first_token,
}

if all_token_counts:
full_chat_completion_summary_dict["response.usage.prompt_tokens"] = response_prompt_tokens
full_chat_completion_summary_dict["response.usage.completion_tokens"] = response_completion_tokens
full_chat_completion_summary_dict["response.usage.total_tokens"] = response_total_tokens

llm_metadata = _get_llm_attributes(transaction)
full_chat_completion_summary_dict.update(llm_metadata)
transaction.record_custom_event("LlmChatCompletionSummary", full_chat_completion_summary_dict)
Expand All @@ -496,6 +530,7 @@ def _record_completion_success(
request_model=request_model,
llm_metadata=llm_metadata,
response_content=response_content,
all_token_counts=all_token_counts,
request_timestamp=request_timestamp,
)
except Exception:
Expand All @@ -514,6 +549,7 @@ def create_chat_completion_message_event(
request_model,
llm_metadata,
response_content,
all_token_counts,
request_timestamp=None,
):
try:
Expand All @@ -530,18 +566,15 @@ def create_chat_completion_message_event(
"id": message_id,
"span_id": span_id,
"trace_id": trace_id,
"token_count": (
settings.ai_monitoring.llm_token_count_callback(request_model, message_content)
if settings.ai_monitoring.llm_token_count_callback and message_content
else None
),
"role": role,
"completion_id": completion_id,
"sequence": sequence,
"response.model": response_model,
"vendor": "anthropic",
"ingest_source": "Python",
}
if all_token_counts:
input_message_dict["token_count"] = 0
if settings.ai_monitoring.record_content.enabled and message_content is not None:
input_message_dict["content"] = message_content
if request_timestamp:
Expand All @@ -551,26 +584,14 @@ def create_chat_completion_message_event(
transaction.record_custom_event("LlmChatCompletionMessage", input_message_dict)

# Record one event for the response
if response_content:
response_text = _extract_message_content(response_content)
if response_text:
response_sequence = len(messages)
# response_content may be a plain string (streaming path) or a list of content blocks (non-streaming).
if isinstance(response_content, str):
response_text = response_content
else:
response_text = " ".join(
block.text for block in response_content if getattr(block, "type", None) == "text"
)

response_message_id = f"{response_id}-{response_sequence}" if response_id else str(uuid.uuid4())
output_message_dict = {
"id": response_message_id,
"span_id": span_id,
"trace_id": trace_id,
"token_count": (
settings.ai_monitoring.llm_token_count_callback(response_model, response_text)
if settings.ai_monitoring.llm_token_count_callback and response_text
else None
),
"role": "assistant",
"completion_id": completion_id,
"sequence": response_sequence,
Expand All @@ -579,6 +600,8 @@ def create_chat_completion_message_event(
"ingest_source": "Python",
"is_response": True,
}
if all_token_counts:
output_message_dict["token_count"] = 0
if settings.ai_monitoring.record_content.enabled and response_text:
output_message_dict["content"] = response_text

Expand Down
9 changes: 7 additions & 2 deletions tests/mlmodel_anthropic/test_chat_completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from conftest import ANTHROPIC_VERSION_METRIC
from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes
from testing_support.ml_testing_utils import (
add_token_count_to_events,
add_token_counts_to_chat_events,
disabled_ai_monitoring_record_content_settings,
disabled_ai_monitoring_settings,
events_sans_content,
Expand Down Expand Up @@ -51,6 +51,9 @@ def chat_completion_events(is_streaming):
"response.model": "claude-sonnet-4-5-20250929",
"request.temperature": 0.7,
"request.max_tokens": 100,
"response.usage.prompt_tokens": 16,
"response.usage.completion_tokens": 26,
"response.usage.total_tokens": 42,
"response.choices.finish_reason": "end_turn",
"vendor": "anthropic",
"ingest_source": "Python",
Expand All @@ -71,6 +74,7 @@ def chat_completion_events(is_streaming):
"completion_id": None,
"sequence": 0,
"response.model": "claude-sonnet-4-5-20250929",
"token_count": 0,
"vendor": "anthropic",
"ingest_source": "Python",
},
Expand All @@ -88,6 +92,7 @@ def chat_completion_events(is_streaming):
"completion_id": None,
"sequence": 1,
"response.model": "claude-sonnet-4-5-20250929",
"token_count": 0,
"vendor": "anthropic",
"is_response": True,
"ingest_source": "Python",
Expand Down Expand Up @@ -238,7 +243,7 @@ def _test():
def test_anthropic_chat_completion_with_token_count(
exercise_model, chat_completion_metrics, set_trace_info, chat_completion_events
):
@validate_custom_events(add_token_count_to_events(chat_completion_events))
@validate_custom_events(add_token_counts_to_chat_events(chat_completion_events))
@validate_custom_event_count(count=3)
@validate_transaction_metrics(
name="test_anthropic_chat_completion_with_token_count",
Expand Down
6 changes: 4 additions & 2 deletions tests/mlmodel_anthropic/test_chat_completion_error.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
from conftest import ANTHROPIC_VERSION_METRIC
from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine
from testing_support.ml_testing_utils import (
add_token_count_to_events,
disabled_ai_monitoring_record_content_settings,
events_sans_content,
events_with_context_attrs,
Expand Down Expand Up @@ -69,6 +68,7 @@
"role": "user",
"completion_id": None,
"sequence": 0,
"token_count": 0,
"vendor": "anthropic",
"ingest_source": "Python",
},
Expand Down Expand Up @@ -190,6 +190,7 @@ def _test():
"completion_id": None,
"response.model": "does-not-exist",
"sequence": 0,
"token_count": 0,
"vendor": "anthropic",
"ingest_source": "Python",
},
Expand Down Expand Up @@ -230,7 +231,7 @@ def test_chat_completion_invalid_request_error_invalid_model_with_token_count(
custom_metrics=[(ANTHROPIC_VERSION_METRIC, 1)],
background_task=True,
)
@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error))
@validate_custom_events(expected_events_on_invalid_model_error)
@validate_custom_event_count(count=2)
@background_task(name="test_chat_completion_invalid_request_error_invalid_model_with_token_count")
def _test():
Expand Down Expand Up @@ -277,6 +278,7 @@ def _test():
"response.model": "claude-4-5-sonnet",
"completion_id": None,
"sequence": 0,
"token_count": 0,
"vendor": "anthropic",
"ingest_source": "Python",
},
Expand Down
27 changes: 27 additions & 0 deletions tests/testing_support/ml_testing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def llm_token_count_callback(model, content):
return 105


# This will be removed once all LLM instrumentations have been converted to use new token count design
def add_token_count_to_events(expected_events):
events = copy.deepcopy(expected_events)
for event in events:
Expand All @@ -37,6 +38,32 @@ def add_token_count_to_events(expected_events):
return events


def add_token_count_to_embedding_events(expected_events):
events = copy.deepcopy(expected_events)
for event in events:
if event[0]["type"] == "LlmEmbedding":
event[1]["response.usage.total_tokens"] = 105
return events


def add_token_count_streaming_events(expected_events):
events = copy.deepcopy(expected_events)
for event in events:
if event[0]["type"] == "LlmChatCompletionMessage":
event[1]["token_count"] = 0
return events


def add_token_counts_to_chat_events(expected_events):
events = copy.deepcopy(expected_events)
for event in events:
if event[0]["type"] == "LlmChatCompletionSummary":
event[1]["response.usage.prompt_tokens"] = 105
event[1]["response.usage.completion_tokens"] = 105
event[1]["response.usage.total_tokens"] = 210
return events


def events_sans_content(event):
new_event = copy.deepcopy(event)
for _event in new_event:
Expand Down
Loading