Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 90 additions & 0 deletions backend/tests/unit/test_prompt_cache_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -861,6 +861,96 @@ def test_page_context_in_dynamic_section():
assert "Meeting with team" in dynamic_suffix


# ---------------------------------------------------------------------------
# Tests: Current datetime is kept out of the cached system prefix
# ---------------------------------------------------------------------------


class _FixedDatetime:
"""datetime stand-in whose now() returns a fixed instant (other attrs pass through)."""

def __init__(self, fixed):
self._fixed = fixed

def now(self, tz=None):
if tz is not None:
return self._fixed.astimezone(tz)
return self._fixed

def __getattr__(self, name):
from datetime import datetime as _real_datetime

return getattr(_real_datetime, name)


def test_system_prompt_is_time_invariant():
"""
The whole agentic system prompt is wrapped in one cache_control breakpoint, so it must
be byte-identical across requests even as wall-clock time advances. The live datetime
must NOT leak into it (it goes into the user turn instead).
"""
from datetime import datetime as _dt

chat_mod = _get_chat_module()
fn = chat_mod._get_agentic_qa_prompt
_set_user(chat_mod, "Alice", "America/New_York")

real_datetime = chat_mod.datetime
try:
chat_mod.datetime = _FixedDatetime(_dt(2024, 1, 19, 14, 23, 45, 123456, tzinfo=timezone.utc))
prompt_early = fn("uid_alice")
chat_mod.datetime = _FixedDatetime(_dt(2024, 6, 1, 9, 0, 0, 654321, tzinfo=timezone.utc))
prompt_late = fn("uid_alice")
finally:
chat_mod.datetime = real_datetime

assert prompt_early == prompt_late, (
"System prompt changed as time advanced — it must be time-invariant for cache hits.\n"
f"First diff at: {_find_first_diff(prompt_early, prompt_late)}"
)
# The microsecond-precision live timestamp must not appear anywhere in the prompt.
assert "123456" not in prompt_early, "Live timestamp leaked into the cached system prompt"
assert "654321" not in prompt_late, "Live timestamp leaked into the cached system prompt"


def test_current_datetime_block_carries_live_time():
"""get_current_datetime_block must produce the live time for injection into the user turn."""
from datetime import datetime as _dt

chat_mod = _get_chat_module()
_set_user(chat_mod, "Alice", "America/New_York")

real_datetime = chat_mod.datetime
try:
chat_mod.datetime = _FixedDatetime(_dt(2024, 1, 19, 14, 23, 45, 123456, tzinfo=timezone.utc))
block = chat_mod.get_current_datetime_block("uid_alice")
finally:
chat_mod.datetime = real_datetime

assert "<current_datetime>" in block
assert "2024-01-19" in block, "Datetime block should contain the live date"


def test_datetime_injected_into_user_turn_not_system():
"""
_inject_current_datetime must attach the datetime block to the latest user turn so the
model still sees the current time without touching the cached system prefix.
"""
agentic_mod = _get_agentic_module()

messages = [
{"role": "user", "content": "what did I do yesterday?"},
{"role": "assistant", "content": "let me check"},
{"role": "user", "content": "thanks, and today?"},
]
block = "<current_datetime>\nCurrent date time in UTC: 2024-01-19 14:23:45\n</current_datetime>"
result = agentic_mod._inject_current_datetime(list(messages), block)

# The block must be attached to the LAST user message, not the earlier one.
assert result[-1]["content"].startswith(block), "Datetime block should prepend the latest user turn"
assert result[0]["content"] == "what did I do yesterday?", "Earlier user turns must be untouched"


# ---------------------------------------------------------------------------
# Utility
# ---------------------------------------------------------------------------
Expand Down
78 changes: 56 additions & 22 deletions backend/utils/llm/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,47 @@ def _get_qa_rag_prompt(
""".replace(' ', '').replace('\n\n\n', '\n\n').strip()


# The agentic system prompt is wrapped in a single Anthropic cache_control breakpoint,
# so any byte that changes per request invalidates the whole cached prefix. The current
# datetime is the only such value (microsecond ISO), so it is kept OUT of the system prompt
# and injected into the user turn instead (see get_current_datetime_block / agentic.py).
# The system prompt references this placeholder so the datetime instructions still make sense.
CURRENT_DATETIME_PLACEHOLDER = "(see <current_datetime> in the latest user message)"


def get_user_timezone(uid: str) -> str:
"""Resolve the user's timezone, falling back to UTC when missing/invalid."""
tz = notification_db.get_user_time_zone(uid)
try:
ZoneInfo(tz)
return tz
except Exception:
return "UTC"


def get_current_datetime_block(uid: str) -> str:
"""Build the current-datetime block injected into the user turn.

Kept out of the cached system prefix so the cached bytes stay stable across requests
while the model still receives the live time. Mirrors the timezone resolution used by
_get_agentic_qa_prompt.
"""
tz = get_user_timezone(uid)
try:
current_datetime_user = datetime.now(ZoneInfo(tz))
except Exception:
current_datetime_user = datetime.now(timezone.utc)
tz = "UTC"
current_datetime_str = current_datetime_user.strftime('%Y-%m-%d %H:%M:%S')
current_datetime_iso = current_datetime_user.isoformat()
return (
"<current_datetime>\n"
f"Current date time in {tz}: {current_datetime_str}\n"
f"Current date time ISO format: {current_datetime_iso}\n"
"</current_datetime>"
)
Comment on lines +399 to +429

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Double notification_db.get_user_time_zone call per request

Both _get_agentic_qa_prompt (via get_user_timezone) and get_current_datetime_block (via get_user_timezone) call notification_db.get_user_time_zone(uid) independently on every request. Since execute_agent_chat_stream calls both in sequence, every agentic request makes two round-trips to the notification database for the same immutable-per-request value. Passing the resolved timezone string into get_current_datetime_block (e.g., get_current_datetime_block(uid, tz=tz)) would eliminate the redundant lookup without changing any observable behaviour.

Note: If this suggestion doesn't match your team's coding style, reply to this and let me know. I'll remember it for next time!



def _get_agentic_qa_prompt(
uid: str, app: Optional[App] = None, messages: List[Message] = None, context: Optional[PageContext] = None
) -> str:
Expand All @@ -397,6 +438,10 @@ def _get_agentic_qa_prompt(
Uses LangSmith-controlled prompt template with dynamic variable injection.
Falls back to hardcoded prompt if LangSmith is unavailable.

The current datetime is intentionally NOT embedded here — it changes every request and
would invalidate the cache_control prefix. It is injected into the user turn instead
(see get_current_datetime_block); the prompt only carries a stable placeholder.

Args:
uid: User ID
app: Optional app/plugin for personalized behavior
Expand All @@ -408,23 +453,12 @@ def _get_agentic_qa_prompt(
"""
user_name = get_user_name(uid)

# Get timezone and current datetime in user's timezone
tz = notification_db.get_user_time_zone(uid)
try:
user_tz = ZoneInfo(tz)
current_datetime_user = datetime.now(user_tz)
current_datetime_str = current_datetime_user.strftime('%Y-%m-%d %H:%M:%S')
current_datetime_iso = current_datetime_user.isoformat()
logger.info(f"🌍 _get_agentic_qa_prompt - User timezone: {tz}, Current time: {current_datetime_str}")
except Exception:
# Fallback to UTC if timezone is invalid
current_datetime_user = datetime.now(timezone.utc)
current_datetime_str = current_datetime_user.strftime('%Y-%m-%d %H:%M:%S')
current_datetime_iso = current_datetime_user.isoformat()
tz = "UTC"
logger.warning(
f"🌍 _get_agentic_qa_prompt - User timezone: UTC (fallback), Current time: {current_datetime_str}"
)
# Resolve timezone only — the live datetime is injected into the user turn, not here,
# so the cached system prefix stays byte-identical across requests.
tz = get_user_timezone(uid)
current_datetime_str = CURRENT_DATETIME_PLACEHOLDER
current_datetime_iso = CURRENT_DATETIME_PLACEHOLDER
logger.info(f"🌍 _get_agentic_qa_prompt - User timezone: {tz}")

# Handle persona apps - they override the entire system prompt
if app and app.is_a_persona():
Expand Down Expand Up @@ -686,19 +720,19 @@ def _get_agentic_qa_prompt(
- Format: YYYY-MM-DDTHH:MM:SS+HH:MM (e.g., "2024-01-19T15:00:00-08:00" for PST)
- NEVER use datetime without timezone (e.g., "2024-01-19T07:15:00" is WRONG)
- The timezone offset must match {user_name}'s timezone ({tz})
- Current time reference: {current_datetime_iso}
- Use the current time from the <current_datetime> block in the latest user message as your reference

2. **For "X hours ago" or "X minutes ago" queries:**
- Work in {user_name}'s timezone: {tz}
- Identify the specific hour that was X hours/minutes ago
- start_date: Beginning of that hour (HH:00:00)
- end_date: End of that hour (HH:59:59)
- Example: User asks "3 hours ago", current time in {tz} is {current_datetime_iso}
* Calculate: {current_datetime_iso} minus 3 hours
* Get the hour boundary: if result is 2024-01-19T14:23:45-08:00, use hour 14
- Example (illustrative): if the current time were "2024-01-19T17:23:45-08:00" and the user asks "3 hours ago"
* Calculate: 17:23:45 minus 3 hours
* Get the hour boundary: result is 2024-01-19T14:23:45-08:00, so use hour 14
* start_date = "2024-01-19T14:00:00-08:00"
* end_date = "2024-01-19T14:59:59-08:00"
- Format both with the timezone offset for {tz}
- Always use the actual current time from the <current_datetime> block, formatted with the timezone offset for {tz}

3. **For "today" queries:**
- start_date: Start of today in {tz} (00:00:00)
Expand Down
2 changes: 1 addition & 1 deletion backend/utils/observability/langsmith_prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ def _get_fallback_agentic_prompt_template() -> str:
- Format: YYYY-MM-DDTHH:MM:SS+HH:MM (e.g., "2024-01-19T15:00:00-08:00" for PST)
- NEVER use datetime without timezone (e.g., "2024-01-19T07:15:00" is WRONG)
- The timezone offset must match {user_name}'s timezone ({tz})
- Current time reference: {current_datetime_iso}
- Use the current time from the <current_datetime> block in the latest user message as your reference

2. **For "X hours ago" or "X minutes ago" queries:**
- Work in {user_name}'s timezone: {tz}
Expand Down
24 changes: 22 additions & 2 deletions backend/utils/retrieval/agentic.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
from utils.retrieval.tools.app_tools import load_app_tools, get_tool_status_message
from utils.retrieval.safety import AgentSafetyGuard, SafetyGuardError
from utils.llm.clients import anthropic_client, ANTHROPIC_AGENT_MODEL
from utils.llm.chat import _get_agentic_qa_prompt
from utils.llm.chat import _get_agentic_qa_prompt, get_current_datetime_block
from utils.other.endpoints import timeit
from utils.observability.langsmith import is_langsmith_enabled
import logging
Expand Down Expand Up @@ -340,6 +340,24 @@ def _messages_to_anthropic(messages: List[Message]) -> list:
return anthropic_messages


def _inject_current_datetime(anthropic_messages: list, datetime_block: str) -> list:
"""Prepend the current-datetime block to the latest user turn.

The datetime changes every request, so it is kept out of the cache_control system
prefix (which must stay byte-identical for prompt-cache hits) and delivered here in the
user turn instead. Falls back to appending a new user message if there is no trailing
user turn to attach it to.
"""
if not datetime_block:
return anthropic_messages
for msg in reversed(anthropic_messages):
if msg["role"] == "user" and isinstance(msg.get("content"), str):
msg["content"] = f"{datetime_block}\n\n{msg['content']}"
return anthropic_messages
Comment on lines +353 to +356

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 List-content user messages silently skipped

_inject_current_datetime only injects into user messages whose content is a plain str. If _messages_to_anthropic ever produces a user message with list content (e.g., when file-attachment support is added or if the multi-turn tool-result format changes), this function silently falls through and appends a standalone user message with just the datetime block. Appending a bare user message after the real last message alters the expected user→assistant turn structure and could break the API call or cause the model to respond to only the datetime prompt instead of the real question. Adding a branch that handles list content (prepending a text block to the list) would make the function safe for that format.

anthropic_messages.append({"role": "user", "content": datetime_block})
return anthropic_messages


# ---------------------------------------------------------------------------
# Core Anthropic agent streaming loop
# ---------------------------------------------------------------------------
Expand Down Expand Up @@ -575,8 +593,10 @@ async def execute_agentic_chat_stream(
# Convert tools to Anthropic format (core = visible, app = defer_loading)
tool_schemas, tool_registry = _convert_tools(core_tools, app_tools)

# Convert messages to Anthropic format
# Convert messages to Anthropic format. The current datetime is injected into the user
# turn (not the system prompt) so the cache_control system prefix stays byte-stable.
anthropic_messages = _messages_to_anthropic(messages)
anthropic_messages = _inject_current_datetime(anthropic_messages, get_current_datetime_block(uid))

callback = AsyncStreamingCallback()

Expand Down