From edb81275be86330702c324dc99a3e5c353f2d945 Mon Sep 17 00:00:00 2001
From: sanil-23 <sanil@alphahuman.xyz>
Date: Thu, 2 Jul 2026 18:22:58 +0530
Subject: [PATCH] fix(tinyagents): keep the attempted tool name in the timeline
 for unavailable tools
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The tinyagents harness (#4249) rewrites a call for a tool the agent can't see
to UNKNOWN_TOOL_SENTINEL before the progress event fires. The observability
bridge then emitted ToolCallStarted{tool_name: sentinel} with the same call_id,
and the frontend (keying tool-timeline rows by call_id) overwrote the real
streamed name (e.g. web_fetch) with the sentinel — dropping the attempted tool
from the UI timeline. The pre-tinyagents engine emitted the real name first, so
this regressed the Playwright chat-tool-call specs on every PR.

Skip forwarding the sentinel ToolStarted; the streamed tool_args_delta row keeps
the attempted name. The sentinel ToolCompleted only updates status by call_id.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 src/openhuman/tinyagents/observability.rs | 48 ++++++++++++++++++++++-
 1 file changed, 47 insertions(+), 1 deletion(-)

diff --git a/src/openhuman/tinyagents/observability.rs b/src/openhuman/tinyagents/observability.rs
index 49df8652d0..6b29049a08 100644
--- a/src/openhuman/tinyagents/observability.rs
+++ b/src/openhuman/tinyagents/observability.rs
@@ -259,7 +259,19 @@ impl EventListener for OpenhumanEventBridge {
             // exactly once per model call; prefer it over `ModelCompleted`'s
             // optional usage to avoid double counting.
             AgentEvent::UsageRecorded { usage } => self.record_usage(usage),
-            AgentEvent::ToolStarted { call_id, tool_name } => {
+            AgentEvent::ToolStarted { call_id, tool_name }
+                if tool_name.as_str() != super::tools::UNKNOWN_TOOL_SENTINEL =>
+            {
+                // Skip the sentinel Started event. When the model calls a tool the
+                // agent can't see, `UnknownToolRewriteMiddleware` rewrites the name
+                // to `UNKNOWN_TOOL_SENTINEL` *before* this event fires. The frontend
+                // keys tool-timeline rows by `call_id` and overwrites the name on
+                // Started, so a real streamed `web_fetch` row would be clobbered to
+                // the sentinel — dropping the attempted tool name from the timeline
+                // (regression vs. the pre-tinyagents engine, which emitted the real
+                // name before the availability block). The streamed
+                // `tool_args_delta` row (carrying the attempted name) survives, and
+                // the sentinel `ToolCompleted` only updates status by `call_id`.
                 let iteration = self.iteration();
                 match &self.scope {
                     None => self.send(AgentProgress::ToolCallStarted {
@@ -360,6 +372,40 @@ mod tests {
         let (input, output, _) = bridge.totals();
         assert_eq!((input, output), (100, 40));
     }
+
+    #[tokio::test]
+    async fn sentinel_tool_started_is_not_forwarded() {
+        // #4249 regression guard: a `ToolStarted` for the unknown-tool sentinel
+        // must NOT emit a `ToolCallStarted`. The frontend keys tool-timeline rows
+        // by `call_id` and overwrites the name on Started, so forwarding the
+        // sentinel would clobber the real streamed row (e.g. `web_fetch`) and drop
+        // the attempted tool from the UI timeline. A real tool name still forwards.
+        let (tx, mut rx) = tokio::sync::mpsc::channel(64);
+        let bridge = OpenhumanEventBridge::new(Some(tx), "mock-model", 10);
+        let sink = EventSink::new();
+        sink.subscribe(bridge.clone());
+
+        sink.emit(AgentEvent::ToolStarted {
+            call_id: "c1".into(),
+            tool_name: crate::openhuman::tinyagents::tools::UNKNOWN_TOOL_SENTINEL.to_string(),
+        });
+        sink.emit(AgentEvent::ToolStarted {
+            call_id: "c2".into(),
+            tool_name: "web_fetch".to_string(),
+        });
+
+        let mut started_names = Vec::new();
+        while let Ok(p) = rx.try_recv() {
+            if let AgentProgress::ToolCallStarted { tool_name, .. } = p {
+                started_names.push(tool_name);
+            }
+        }
+        assert_eq!(
+            started_names,
+            vec!["web_fetch".to_string()],
+            "sentinel Started must be skipped; the real tool name must still forward"
+        );
+    }
 }
 
 /// A [`GraphEventSink`] that mirrors the `tinyagents` graph executor's lifecycle