diff --git a/backend/tests/unit/test_prompt_cache_integration.py b/backend/tests/unit/test_prompt_cache_integration.py index 0cff89a867e..862d191e313 100644 --- a/backend/tests/unit/test_prompt_cache_integration.py +++ b/backend/tests/unit/test_prompt_cache_integration.py @@ -861,6 +861,96 @@ def test_page_context_in_dynamic_section(): assert "Meeting with team" in dynamic_suffix +# --------------------------------------------------------------------------- +# Tests: Current datetime is kept out of the cached system prefix +# --------------------------------------------------------------------------- + + +class _FixedDatetime: + """datetime stand-in whose now() returns a fixed instant (other attrs pass through).""" + + def __init__(self, fixed): + self._fixed = fixed + + def now(self, tz=None): + if tz is not None: + return self._fixed.astimezone(tz) + return self._fixed + + def __getattr__(self, name): + from datetime import datetime as _real_datetime + + return getattr(_real_datetime, name) + + +def test_system_prompt_is_time_invariant(): + """ + The whole agentic system prompt is wrapped in one cache_control breakpoint, so it must + be byte-identical across requests even as wall-clock time advances. The live datetime + must NOT leak into it (it goes into the user turn instead). + """ + from datetime import datetime as _dt + + chat_mod = _get_chat_module() + fn = chat_mod._get_agentic_qa_prompt + _set_user(chat_mod, "Alice", "America/New_York") + + real_datetime = chat_mod.datetime + try: + chat_mod.datetime = _FixedDatetime(_dt(2024, 1, 19, 14, 23, 45, 123456, tzinfo=timezone.utc)) + prompt_early = fn("uid_alice") + chat_mod.datetime = _FixedDatetime(_dt(2024, 6, 1, 9, 0, 0, 654321, tzinfo=timezone.utc)) + prompt_late = fn("uid_alice") + finally: + chat_mod.datetime = real_datetime + + assert prompt_early == prompt_late, ( + "System prompt changed as time advanced — it must be time-invariant for cache hits.\n" + f"First diff at: {_find_first_diff(prompt_early, prompt_late)}" + ) + # The microsecond-precision live timestamp must not appear anywhere in the prompt. + assert "123456" not in prompt_early, "Live timestamp leaked into the cached system prompt" + assert "654321" not in prompt_late, "Live timestamp leaked into the cached system prompt" + + +def test_current_datetime_block_carries_live_time(): + """get_current_datetime_block must produce the live time for injection into the user turn.""" + from datetime import datetime as _dt + + chat_mod = _get_chat_module() + _set_user(chat_mod, "Alice", "America/New_York") + + real_datetime = chat_mod.datetime + try: + chat_mod.datetime = _FixedDatetime(_dt(2024, 1, 19, 14, 23, 45, 123456, tzinfo=timezone.utc)) + block = chat_mod.get_current_datetime_block("uid_alice") + finally: + chat_mod.datetime = real_datetime + + assert "" in block + assert "2024-01-19" in block, "Datetime block should contain the live date" + + +def test_datetime_injected_into_user_turn_not_system(): + """ + _inject_current_datetime must attach the datetime block to the latest user turn so the + model still sees the current time without touching the cached system prefix. + """ + agentic_mod = _get_agentic_module() + + messages = [ + {"role": "user", "content": "what did I do yesterday?"}, + {"role": "assistant", "content": "let me check"}, + {"role": "user", "content": "thanks, and today?"}, + ] + block = "\nCurrent date time in UTC: 2024-01-19 14:23:45\n" + result = agentic_mod._inject_current_datetime(list(messages), block) + + # The block must be attached to the LAST user message, not the earlier one. + assert result[-1]["content"].startswith(block), "Datetime block should prepend the latest user turn" + assert result[0]["content"] == "what did I do yesterday?", "Earlier user turns must be untouched" + + # --------------------------------------------------------------------------- # Utility # --------------------------------------------------------------------------- diff --git a/backend/utils/llm/chat.py b/backend/utils/llm/chat.py index 9bdcdbb054e..3b27c92de32 100644 --- a/backend/utils/llm/chat.py +++ b/backend/utils/llm/chat.py @@ -388,6 +388,47 @@ def _get_qa_rag_prompt( """.replace(' ', '').replace('\n\n\n', '\n\n').strip() +# The agentic system prompt is wrapped in a single Anthropic cache_control breakpoint, +# so any byte that changes per request invalidates the whole cached prefix. The current +# datetime is the only such value (microsecond ISO), so it is kept OUT of the system prompt +# and injected into the user turn instead (see get_current_datetime_block / agentic.py). +# The system prompt references this placeholder so the datetime instructions still make sense. +CURRENT_DATETIME_PLACEHOLDER = "(see in the latest user message)" + + +def get_user_timezone(uid: str) -> str: + """Resolve the user's timezone, falling back to UTC when missing/invalid.""" + tz = notification_db.get_user_time_zone(uid) + try: + ZoneInfo(tz) + return tz + except Exception: + return "UTC" + + +def get_current_datetime_block(uid: str) -> str: + """Build the current-datetime block injected into the user turn. + + Kept out of the cached system prefix so the cached bytes stay stable across requests + while the model still receives the live time. Mirrors the timezone resolution used by + _get_agentic_qa_prompt. + """ + tz = get_user_timezone(uid) + try: + current_datetime_user = datetime.now(ZoneInfo(tz)) + except Exception: + current_datetime_user = datetime.now(timezone.utc) + tz = "UTC" + current_datetime_str = current_datetime_user.strftime('%Y-%m-%d %H:%M:%S') + current_datetime_iso = current_datetime_user.isoformat() + return ( + "\n" + f"Current date time in {tz}: {current_datetime_str}\n" + f"Current date time ISO format: {current_datetime_iso}\n" + "" + ) + + def _get_agentic_qa_prompt( uid: str, app: Optional[App] = None, messages: List[Message] = None, context: Optional[PageContext] = None ) -> str: @@ -397,6 +438,10 @@ def _get_agentic_qa_prompt( Uses LangSmith-controlled prompt template with dynamic variable injection. Falls back to hardcoded prompt if LangSmith is unavailable. + The current datetime is intentionally NOT embedded here — it changes every request and + would invalidate the cache_control prefix. It is injected into the user turn instead + (see get_current_datetime_block); the prompt only carries a stable placeholder. + Args: uid: User ID app: Optional app/plugin for personalized behavior @@ -408,23 +453,12 @@ def _get_agentic_qa_prompt( """ user_name = get_user_name(uid) - # Get timezone and current datetime in user's timezone - tz = notification_db.get_user_time_zone(uid) - try: - user_tz = ZoneInfo(tz) - current_datetime_user = datetime.now(user_tz) - current_datetime_str = current_datetime_user.strftime('%Y-%m-%d %H:%M:%S') - current_datetime_iso = current_datetime_user.isoformat() - logger.info(f"🌍 _get_agentic_qa_prompt - User timezone: {tz}, Current time: {current_datetime_str}") - except Exception: - # Fallback to UTC if timezone is invalid - current_datetime_user = datetime.now(timezone.utc) - current_datetime_str = current_datetime_user.strftime('%Y-%m-%d %H:%M:%S') - current_datetime_iso = current_datetime_user.isoformat() - tz = "UTC" - logger.warning( - f"🌍 _get_agentic_qa_prompt - User timezone: UTC (fallback), Current time: {current_datetime_str}" - ) + # Resolve timezone only — the live datetime is injected into the user turn, not here, + # so the cached system prefix stays byte-identical across requests. + tz = get_user_timezone(uid) + current_datetime_str = CURRENT_DATETIME_PLACEHOLDER + current_datetime_iso = CURRENT_DATETIME_PLACEHOLDER + logger.info(f"🌍 _get_agentic_qa_prompt - User timezone: {tz}") # Handle persona apps - they override the entire system prompt if app and app.is_a_persona(): @@ -686,19 +720,19 @@ def _get_agentic_qa_prompt( - Format: YYYY-MM-DDTHH:MM:SS+HH:MM (e.g., "2024-01-19T15:00:00-08:00" for PST) - NEVER use datetime without timezone (e.g., "2024-01-19T07:15:00" is WRONG) - The timezone offset must match {user_name}'s timezone ({tz}) - - Current time reference: {current_datetime_iso} + - Use the current time from the block in the latest user message as your reference 2. **For "X hours ago" or "X minutes ago" queries:** - Work in {user_name}'s timezone: {tz} - Identify the specific hour that was X hours/minutes ago - start_date: Beginning of that hour (HH:00:00) - end_date: End of that hour (HH:59:59) - - Example: User asks "3 hours ago", current time in {tz} is {current_datetime_iso} - * Calculate: {current_datetime_iso} minus 3 hours - * Get the hour boundary: if result is 2024-01-19T14:23:45-08:00, use hour 14 + - Example (illustrative): if the current time were "2024-01-19T17:23:45-08:00" and the user asks "3 hours ago" + * Calculate: 17:23:45 minus 3 hours + * Get the hour boundary: result is 2024-01-19T14:23:45-08:00, so use hour 14 * start_date = "2024-01-19T14:00:00-08:00" * end_date = "2024-01-19T14:59:59-08:00" - - Format both with the timezone offset for {tz} + - Always use the actual current time from the block, formatted with the timezone offset for {tz} 3. **For "today" queries:** - start_date: Start of today in {tz} (00:00:00) diff --git a/backend/utils/observability/langsmith_prompts.py b/backend/utils/observability/langsmith_prompts.py index 15e020e5a9a..2a696004a5a 100644 --- a/backend/utils/observability/langsmith_prompts.py +++ b/backend/utils/observability/langsmith_prompts.py @@ -278,7 +278,7 @@ def _get_fallback_agentic_prompt_template() -> str: - Format: YYYY-MM-DDTHH:MM:SS+HH:MM (e.g., "2024-01-19T15:00:00-08:00" for PST) - NEVER use datetime without timezone (e.g., "2024-01-19T07:15:00" is WRONG) - The timezone offset must match {user_name}'s timezone ({tz}) - - Current time reference: {current_datetime_iso} + - Use the current time from the block in the latest user message as your reference 2. **For "X hours ago" or "X minutes ago" queries:** - Work in {user_name}'s timezone: {tz} diff --git a/backend/utils/retrieval/agentic.py b/backend/utils/retrieval/agentic.py index 00a31df2432..573f87b8eec 100644 --- a/backend/utils/retrieval/agentic.py +++ b/backend/utils/retrieval/agentic.py @@ -49,7 +49,7 @@ from utils.retrieval.tools.app_tools import load_app_tools, get_tool_status_message from utils.retrieval.safety import AgentSafetyGuard, SafetyGuardError from utils.llm.clients import anthropic_client, ANTHROPIC_AGENT_MODEL -from utils.llm.chat import _get_agentic_qa_prompt +from utils.llm.chat import _get_agentic_qa_prompt, get_current_datetime_block from utils.other.endpoints import timeit from utils.observability.langsmith import is_langsmith_enabled import logging @@ -340,6 +340,24 @@ def _messages_to_anthropic(messages: List[Message]) -> list: return anthropic_messages +def _inject_current_datetime(anthropic_messages: list, datetime_block: str) -> list: + """Prepend the current-datetime block to the latest user turn. + + The datetime changes every request, so it is kept out of the cache_control system + prefix (which must stay byte-identical for prompt-cache hits) and delivered here in the + user turn instead. Falls back to appending a new user message if there is no trailing + user turn to attach it to. + """ + if not datetime_block: + return anthropic_messages + for msg in reversed(anthropic_messages): + if msg["role"] == "user" and isinstance(msg.get("content"), str): + msg["content"] = f"{datetime_block}\n\n{msg['content']}" + return anthropic_messages + anthropic_messages.append({"role": "user", "content": datetime_block}) + return anthropic_messages + + # --------------------------------------------------------------------------- # Core Anthropic agent streaming loop # --------------------------------------------------------------------------- @@ -575,8 +593,10 @@ async def execute_agentic_chat_stream( # Convert tools to Anthropic format (core = visible, app = defer_loading) tool_schemas, tool_registry = _convert_tools(core_tools, app_tools) - # Convert messages to Anthropic format + # Convert messages to Anthropic format. The current datetime is injected into the user + # turn (not the system prompt) so the cache_control system prefix stays byte-stable. anthropic_messages = _messages_to_anthropic(messages) + anthropic_messages = _inject_current_datetime(anthropic_messages, get_current_datetime_block(uid)) callback = AsyncStreamingCallback()