diff --git a/hindsight-api-slim/hindsight_api/engine/reflect/agent.py b/hindsight-api-slim/hindsight_api/engine/reflect/agent.py index 04ea2427b..c8bf007cc 100644 --- a/hindsight-api-slim/hindsight_api/engine/reflect/agent.py +++ b/hindsight-api-slim/hindsight_api/engine/reflect/agent.py @@ -109,6 +109,72 @@ def _clean_answer_text(text: str) -> str: return cleaned if cleaned else text +# Raw, unparsed tool-call scaffolding that some providers leak as answer text +# when they fail to turn a model's tool-call turn into structured tool_calls +# (notably gpt-oss / harmony-format models served via litellm Vertex MaaS). +# See issue #2222. +# +# Harmony frame tokens delimit a real harmony turn. A frame token must be present +# before anything is flagged, so prose that merely quotes the routing namespace +# (``to=functions.x``) or a lone control token while explaining the format is not +# mistaken for a leak. +_HARMONY_FRAME_TOKEN = re.compile(r"<\|(?:start|channel|message)\|>") +# Evidence of an actual tool call inside that frame: the ``<|call|>`` commit token +# that terminates a call, or the tool-routing namespace (e.g. ``to=functions.recall``). +_HARMONY_CALL_TOKEN = re.compile(r"<\|call\|>") +_HARMONY_TOOL_ROUTING = re.compile(r"(?:^|[\s>])to=(?:functions|tool)\b", re.IGNORECASE) +# The done() tool's internal citation fields. They never appear in a legitimate +# free-text answer, so a JSON object carrying any of them is an unambiguous +# leaked done() payload (vs. a plain ``{"answer": ...}`` blob with no id fields, +# which is intentionally left alone since it is indistinguishable from a valid +# JSON answer). +_DONE_TOOL_ID_FIELDS = ("memory_ids", "mental_model_ids", "observation_ids") + + +def _looks_like_unparsed_tool_call(text: str) -> bool: + """Return True if ``text`` is raw, unparsed tool-call scaffolding. + + Some providers intermittently fail to parse a model's tool-call turn into + structured ``tool_calls`` (notably gpt-oss / harmony-format models served + through litellm Vertex MaaS). The raw harmony tool-call structure + (``<|channel|>commentary to=functions.recall<|message|>{...}<|call|>``), or a + done() payload emitted as bare JSON that carries the tool's internal citation + id fields, then land in ``message.content`` with no tool executed. Surfacing + that verbatim returns corrupt, unsynthesized output to the user (issue + #2222), so it must be treated as a failed parse and routed to final synthesis + instead of accepted as an answer. + + Detection is intentionally narrow to avoid rejecting valid answers. Harmony + scaffolding is only flagged when a harmony frame token is present together + with call evidence (a ``<|call|>`` commit token or the ``to=functions/tool`` + routing namespace); an answer that merely quotes one of those tokens while + explaining the format has no such pairing and is left untouched. A bare + ``{"answer": ...}`` object with no id fields is likewise left alone, being + indistinguishable from a valid JSON answer. (An answer that embeds a full + harmony tool-call example verbatim would be re-synthesized rather than echoed + — an accepted, non-destructive trade-off given how rare that is for a + memory-synthesis answer and that the fallback reuses the same evidence.) + """ + if not text: + return False + stripped = text.strip() + if _HARMONY_FRAME_TOKEN.search(stripped) and ( + _HARMONY_CALL_TOKEN.search(stripped) or _HARMONY_TOOL_ROUTING.search(stripped) + ): + return True + # done() emitted as bare JSON text instead of a structured tool call. Key on + # the internal id fields (which never appear in a real answer); the ``answer`` + # field is optional so a leaked payload that omits it is still caught. + if stripped.startswith("{") and stripped.endswith("}"): + try: + obj = json.loads(stripped) + except (ValueError, TypeError): + return False + if isinstance(obj, dict) and any(f in obj for f in _DONE_TOOL_ID_FIELDS): + return True + return False + + def _clean_done_answer(text: str) -> str: """Clean up the answer field from a done() tool call. @@ -750,7 +816,19 @@ def _log_completion(answer: str, iterations: int, forced: bool = False): bool(available_memory_ids) or bool(available_mental_model_ids) or bool(available_observation_ids) ) directive_leak_risk = directives and not has_gathered_evidence - if result.content and not directive_leak_risk: + # Some providers (e.g. gpt-oss / harmony models via litellm Vertex + # MaaS) intermittently fail to parse a tool-call turn into structured + # ``tool_calls`` and leave raw harmony scaffolding (or a done() payload + # carrying internal citation ids) in ``content`` with ``tool_calls`` + # empty. Accepting that as the answer surfaces corrupt, unsynthesized + # text to the user (issue #2222), so route it to final synthesis. + unparsed_tool_call = bool(result.content) and _looks_like_unparsed_tool_call(result.content) + if unparsed_tool_call: + logger.warning( + f"[REFLECT {reflect_id}] Discarding unparsed tool-call scaffolding in answer " + f"text on iteration {iteration + 1}; forcing final synthesis." + ) + if result.content and not directive_leak_risk and not unparsed_tool_call: answer = _clean_answer_text(result.content.strip()) # The call_with_tools call above is intentionally uncapped so the diff --git a/hindsight-api-slim/tests/test_reflect_agent.py b/hindsight-api-slim/tests/test_reflect_agent.py index 5eb86ddb6..5a355f5a7 100644 --- a/hindsight-api-slim/tests/test_reflect_agent.py +++ b/hindsight-api-slim/tests/test_reflect_agent.py @@ -20,6 +20,7 @@ _count_messages_tokens, _is_context_overflow_error, _is_done_tool, + _looks_like_unparsed_tool_call, _normalize_tool_name, run_reflect_agent, ) @@ -142,6 +143,53 @@ def test_clean_answer_multiline_with_markdown(self): assert "mental_model_ids" not in cleaned +class TestUnparsedToolCallDetection: + """Detect raw harmony / tool-call scaffolding leaking as answer text (issue #2222).""" + + @pytest.mark.parametrize( + "text", + [ + # Full leaked harmony tool call: frame tokens + routing + commit. + "<|start|>assistant<|channel|>commentary to=functions.recall<|message|>{\"query\": \"x\"}<|call|>", + # Frame token + <|call|> commit (no routing namespace visible). + "<|channel|>commentary<|message|>{\"name\": \"recall\"}<|call|>", + # Frame token + routing, truncated before the commit token. + "<|channel|>commentary to=functions.recall<|message|>{\"query\": \"x\"}", + # done() emitted as bare JSON carrying the tool's internal id fields. + '{"answer": "Confirmed.", "memory_ids": ["mem-1"]}', + ' {"answer": "no data", "observation_ids": [], "mental_model_ids": []} ', + # Leaked done payload that omits "answer" but keeps the id fields. + '{"memory_ids": ["mem-1"], "mental_model_ids": []}', + ], + ) + def test_detects_scaffolding(self, text): + assert _looks_like_unparsed_tool_call(text) is True + + @pytest.mark.parametrize( + "text", + [ + "", + None, + "The team meets every Tuesday to review the roadmap.", + "Here is the answer: the launch is on Friday.", + "I found that the user prefers concise answers and dislikes long emails.", + "The function to=string mapping was discussed, but no tool was called.", + # Legitimate answers that merely *explain* harmony syntax must not be + # flagged: a frame token alone, or the routing namespace alone, is not + # a leaked call. + "The `<|channel|>` token marks a section boundary in the harmony format.", + "Use `<|start|>` and `<|message|>` to frame a turn; they are control tokens.", + "Use `to=functions.recall` for the recall tool and `to=tool.foo` for foo.", + # A bare answer object with no internal id fields is indistinguishable + # from a valid JSON answer, so it is deliberately NOT flagged. + '{"answer": "I\'m sorry, but I don\'t have that information."}', + '{"result": 42, "status": "ok"}', + ], + ) + def test_passes_clean_prose(self, text): + assert _looks_like_unparsed_tool_call(text) is False + + class TestToolNameNormalization: """Test tool name normalization for various LLM output formats.""" @@ -750,6 +798,65 @@ async def test_short_circuit_answer_under_cap_is_not_rewritten(self, mock_llm, m assert result.text == short_answer mock_llm.call.assert_not_called() + @pytest.mark.asyncio + async def test_unparsed_harmony_scaffolding_is_not_returned_as_answer(self, mock_llm, mock_functions): + """If a provider leaves raw harmony tool-call scaffolding in ``content`` with empty + ``tool_calls`` (litellm failing to parse a gpt-oss tool turn, issue #2222), the agent + must NOT surface that scaffolding; it routes to final synthesis instead. + """ + leaked = ( + "<|start|>assistant<|channel|>commentary " + 'to=functions.recall<|message|>{"query": "recent activity"}<|call|>' + ) + # Every agent turn leaks scaffolding instead of a parsed tool call. + mock_llm.call_with_tools.return_value = LLMToolCallResult( + tool_calls=[], + content=leaked, + finish_reason="stop", + input_tokens=10, + output_tokens=40, + ) + + result = await run_reflect_agent( + llm_config=mock_llm, + bank_id="test-bank", + query="summarize recent activity", + bank_profile={"name": "Test", "mission": "Testing"}, + **mock_functions, + ) + + # The corrupt scaffolding must never reach the user-visible answer. + assert "<|" not in result.text + assert "to=functions" not in result.text + # Instead the agent fell through to the forced final-synthesis path. + assert result.text == "Fallback answer from final iteration" + mock_llm.call.assert_called() + + @pytest.mark.asyncio + async def test_leaked_done_payload_with_ids_is_not_returned_as_answer(self, mock_llm, mock_functions): + """A done() payload emitted as bare JSON carrying the tool's internal id fields + (no real tool call) must be rejected and routed to final synthesis, not surfaced. + """ + mock_llm.call_with_tools.return_value = LLMToolCallResult( + tool_calls=[], + content='{"answer": "I\'m sorry, but I don\'t have that information.", "memory_ids": []}', + finish_reason="stop", + input_tokens=10, + output_tokens=15, + ) + + result = await run_reflect_agent( + llm_config=mock_llm, + bank_id="test-bank", + query="what is the status?", + bank_profile={"name": "Test", "mission": "Testing"}, + **mock_functions, + ) + + assert "memory_ids" not in result.text + assert result.text == "Fallback answer from final iteration" + mock_llm.call.assert_called() + @pytest.mark.asyncio async def test_max_iterations_reached(self, mock_llm, mock_functions): """Test that agent stops after max iterations even with errors."""