From edb81275be86330702c324dc99a3e5c353f2d945 Mon Sep 17 00:00:00 2001 From: sanil-23 Date: Thu, 2 Jul 2026 18:22:58 +0530 Subject: [PATCH] fix(tinyagents): keep the attempted tool name in the timeline for unavailable tools MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The tinyagents harness (#4249) rewrites a call for a tool the agent can't see to UNKNOWN_TOOL_SENTINEL before the progress event fires. The observability bridge then emitted ToolCallStarted{tool_name: sentinel} with the same call_id, and the frontend (keying tool-timeline rows by call_id) overwrote the real streamed name (e.g. web_fetch) with the sentinel — dropping the attempted tool from the UI timeline. The pre-tinyagents engine emitted the real name first, so this regressed the Playwright chat-tool-call specs on every PR. Skip forwarding the sentinel ToolStarted; the streamed tool_args_delta row keeps the attempted name. The sentinel ToolCompleted only updates status by call_id. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/openhuman/tinyagents/observability.rs | 48 ++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/src/openhuman/tinyagents/observability.rs b/src/openhuman/tinyagents/observability.rs index 49df8652d0..6b29049a08 100644 --- a/src/openhuman/tinyagents/observability.rs +++ b/src/openhuman/tinyagents/observability.rs @@ -259,7 +259,19 @@ impl EventListener for OpenhumanEventBridge { // exactly once per model call; prefer it over `ModelCompleted`'s // optional usage to avoid double counting. AgentEvent::UsageRecorded { usage } => self.record_usage(usage), - AgentEvent::ToolStarted { call_id, tool_name } => { + AgentEvent::ToolStarted { call_id, tool_name } + if tool_name.as_str() != super::tools::UNKNOWN_TOOL_SENTINEL => + { + // Skip the sentinel Started event. When the model calls a tool the + // agent can't see, `UnknownToolRewriteMiddleware` rewrites the name + // to `UNKNOWN_TOOL_SENTINEL` *before* this event fires. The frontend + // keys tool-timeline rows by `call_id` and overwrites the name on + // Started, so a real streamed `web_fetch` row would be clobbered to + // the sentinel — dropping the attempted tool name from the timeline + // (regression vs. the pre-tinyagents engine, which emitted the real + // name before the availability block). The streamed + // `tool_args_delta` row (carrying the attempted name) survives, and + // the sentinel `ToolCompleted` only updates status by `call_id`. let iteration = self.iteration(); match &self.scope { None => self.send(AgentProgress::ToolCallStarted { @@ -360,6 +372,40 @@ mod tests { let (input, output, _) = bridge.totals(); assert_eq!((input, output), (100, 40)); } + + #[tokio::test] + async fn sentinel_tool_started_is_not_forwarded() { + // #4249 regression guard: a `ToolStarted` for the unknown-tool sentinel + // must NOT emit a `ToolCallStarted`. The frontend keys tool-timeline rows + // by `call_id` and overwrites the name on Started, so forwarding the + // sentinel would clobber the real streamed row (e.g. `web_fetch`) and drop + // the attempted tool from the UI timeline. A real tool name still forwards. + let (tx, mut rx) = tokio::sync::mpsc::channel(64); + let bridge = OpenhumanEventBridge::new(Some(tx), "mock-model", 10); + let sink = EventSink::new(); + sink.subscribe(bridge.clone()); + + sink.emit(AgentEvent::ToolStarted { + call_id: "c1".into(), + tool_name: crate::openhuman::tinyagents::tools::UNKNOWN_TOOL_SENTINEL.to_string(), + }); + sink.emit(AgentEvent::ToolStarted { + call_id: "c2".into(), + tool_name: "web_fetch".to_string(), + }); + + let mut started_names = Vec::new(); + while let Ok(p) = rx.try_recv() { + if let AgentProgress::ToolCallStarted { tool_name, .. } = p { + started_names.push(tool_name); + } + } + assert_eq!( + started_names, + vec!["web_fetch".to_string()], + "sentinel Started must be skipped; the real tool name must still forward" + ); + } } /// A [`GraphEventSink`] that mirrors the `tinyagents` graph executor's lifecycle