Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/bin/harness_subagent_audit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -419,6 +419,7 @@ async fn drain_progress(
output_chars,
elapsed_ms,
iteration,
..
} => {
eprintln!(
"[harness_subagent_audit] progress turn={} parent_tool_completed tool={} call_id={} success={} output_chars={} elapsed_ms={} iteration={}",
Expand Down
6 changes: 6 additions & 0 deletions src/core/socketio.rs
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,12 @@ pub struct WebChannelEvent {
/// `tool_result` events.
#[serde(skip_serializing_if = "Option::is_none")]
pub tool_call_id: Option<String>,
/// Structured, user-facing classification of a failed tool call (class,
/// category, plain-language cause + next action). Present on `tool_result`
/// events when the tool failed; the chat "View processing" timeline renders
/// the "why / what to do next" pair. `None` on success.
#[serde(skip_serializing_if = "Option::is_none")]
pub failure: Option<serde_json::Value>,
/// Optional citations attached to `chat_done` payloads.
#[serde(skip_serializing_if = "Option::is_none")]
pub citations: Option<serde_json::Value>,
Expand Down
1 change: 1 addition & 0 deletions src/openhuman/agent/harness/session/tool_progress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ impl ProgressReporter for TurnProgress {
output_chars: output.chars().count(),
elapsed_ms,
iteration,
failure: None,
},
);
}
Expand Down
5 changes: 5 additions & 0 deletions src/openhuman/agent/progress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,11 @@ pub enum AgentProgress {
elapsed_ms: u64,
/// 1-based iteration index.
iteration: u32,
/// Present when `success` is false: a user-facing classification of the
/// failure (class, category, plain-language cause + next action) that
/// the chat "View processing" timeline renders. `None` on success and
/// on legacy snapshots. See `crate::openhuman::tool_status`.
failure: Option<crate::openhuman::tool_status::ClassifiedFailure>,
},

/// A sub-agent was spawned during tool execution.
Expand Down
1 change: 1 addition & 0 deletions src/openhuman/agent/progress_tracing/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ fn tool_completed(
output_chars: chars,
elapsed_ms: elapsed,
iteration: 1,
failure: None,
}
}

Expand Down
1 change: 1 addition & 0 deletions src/openhuman/channels/proactive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ impl EventHandler for ProactiveMessageSubscriber {
delta: None,
delta_kind: None,
tool_call_id: None,
failure: None,
citations: None,
subagent: None,
task_board: None,
Expand Down
3 changes: 3 additions & 0 deletions src/openhuman/channels/providers/presentation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ pub(crate) async fn deliver_response(
delta: None,
delta_kind: None,
tool_call_id: None,
failure: None,
subagent: None,
task_board: None,
tool_display_label: None,
Expand Down Expand Up @@ -148,6 +149,7 @@ pub(crate) async fn deliver_response(
delta: None,
delta_kind: None,
tool_call_id: None,
failure: None,
subagent: None,
task_board: None,
tool_display_label: None,
Expand Down Expand Up @@ -188,6 +190,7 @@ pub(crate) async fn deliver_response(
delta: None,
delta_kind: None,
tool_call_id: None,
failure: None,
subagent: None,
task_board: None,
tool_display_label: None,
Expand Down
7 changes: 6 additions & 1 deletion src/openhuman/channels/providers/web/progress_bridge.rs
Original file line number Diff line number Diff line change
Expand Up @@ -410,7 +410,10 @@ pub(crate) fn spawn_progress_bridge(
output_chars,
elapsed_ms,
iteration,
failure,
} => {
// Serialize the classified failure (if any) for the UI + ledger.
let failure_json = failure.as_ref().and_then(|f| serde_json::to_value(f).ok());
ledger_append_event(
&config,
RunEventAppend {
Expand All @@ -422,7 +425,8 @@ pub(crate) fn spawn_progress_bridge(
"success": success,
"outputChars": output_chars,
"elapsedMs": elapsed_ms,
"iteration": iteration
"iteration": iteration,
"failure": failure_json,
}),
},
);
Expand All @@ -440,6 +444,7 @@ pub(crate) fn spawn_progress_bridge(
success: Some(success),
round: Some(iteration),
tool_call_id: Some(call_id),
failure: failure_json,
..Default::default()
});
}
Expand Down
1 change: 1 addition & 0 deletions src/openhuman/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ pub mod tls;
pub mod todos;
pub mod tokenjuice;
pub mod tool_registry;
pub mod tool_status;
pub mod tool_timeout;
pub mod tools;
pub mod update;
Expand Down
2 changes: 2 additions & 0 deletions src/openhuman/threads/turn_state/mirror_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ fn tool_call_start_and_complete_track_timeline() {
output_chars: 12,
elapsed_ms: 50,
iteration: 1,
failure: None,
});
let s = m.snapshot();
assert_eq!(s.tool_timeline[0].status, ToolTimelineStatus::Success);
Expand Down Expand Up @@ -203,6 +204,7 @@ fn tool_call_started_reuses_args_delta_placeholder_for_same_call_id() {
output_chars: 1,
elapsed_ms: 5,
iteration: 1,
failure: None,
});
assert_eq!(m.snapshot().tool_timeline.len(), 1);
assert_eq!(
Expand Down
37 changes: 34 additions & 3 deletions src/openhuman/tinyagents/middleware.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1360,11 +1360,18 @@ impl ToolMiddleware<()> for ToolPolicyMiddleware {
/// chain so it records the final (summarized/capped) content the transcript keeps.
pub(crate) struct ToolOutcomeCaptureMiddleware {
sink: super::ToolOutcomeSink,
/// `call_id → (success, classified failure)` side-channel read by the event
/// bridge when projecting `ToolCallCompleted` (the crate event lacks the
/// success/error the failure UI needs).
failure_map: super::observability::ToolFailureMap,
}

impl ToolOutcomeCaptureMiddleware {
pub(crate) fn new(sink: super::ToolOutcomeSink) -> Self {
Self { sink }
pub(crate) fn new(
sink: super::ToolOutcomeSink,
failure_map: super::observability::ToolFailureMap,
) -> Self {
Self { sink, failure_map }
}
}

Expand All @@ -1380,11 +1387,35 @@ impl Middleware<()> for ToolOutcomeCaptureMiddleware {
_state: &(),
result: &mut TaToolResult,
) -> TaResult<()> {
let success = result.error.is_none();
// Classify the failure so the live `ToolCallCompleted` event and the
// persisted timeline can explain it in plain language. A hard
// policy/permission denial is its own class; otherwise heuristics over
// the error text (`timed_out` detected from the timeout branch's phrase).
let failure = if success {
None
} else {
let text = result.error.as_deref().unwrap_or(result.content.as_str());
if result
.content
.contains(crate::openhuman::security::POLICY_BLOCKED_MARKER)
{
Some(crate::openhuman::tool_status::describe(
crate::openhuman::tool_status::ToolFailureClass::BlockedByPolicy,
))
} else {
let timed_out = result.content.contains("timed out");
Some(crate::openhuman::tool_status::classify(text, timed_out))
}
};
if let Ok(mut map) = self.failure_map.lock() {
map.insert(result.call_id.clone(), (success, failure));
}
if let Ok(mut sink) = self.sink.lock() {
sink.push(super::ToolCallOutcome {
call_id: result.call_id.clone(),
name: result.name.clone(),
success: result.error.is_none(),
success,
content: result.content.clone(),
});
}
Expand Down
13 changes: 12 additions & 1 deletion src/openhuman/tinyagents/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,9 @@ pub(crate) use embeddings::ProviderEmbeddingModel;
pub(crate) use middleware::{HandoffConfig, SuperContextConfig, TurnContextMiddleware};
use model::ProviderModel;
pub(crate) use observability::SubagentScope;
use observability::{CapPauser, IterationCursor, OpenhumanEventBridge, ToolNameMap};
use observability::{
CapPauser, IterationCursor, OpenhumanEventBridge, ToolFailureMap, ToolNameMap,
};
pub(crate) use run_cancellation_context::{current_run_cancellation, with_run_cancellation};
#[cfg(test)]
use tools::ToolAdapter;
Expand Down Expand Up @@ -396,6 +398,7 @@ pub(crate) async fn run_turn_via_tinyagents_shared(
harness,
cursor,
tool_names,
failure_map,
error_slot,
halt_summary,
tool_outcome_sink,
Expand Down Expand Up @@ -540,6 +543,7 @@ pub(crate) async fn run_turn_via_tinyagents_shared(
subagent_scope.clone(),
cursor.clone(),
tool_names.clone(),
failure_map.clone(),
);
events.subscribe(bridge.clone());
Some(bridge)
Expand Down Expand Up @@ -858,6 +862,10 @@ struct AssembledTurnHarness {
/// writes it on tool-call start; the event bridge reads it to label the
/// tool-argument fragments it now projects off the crate stream.
tool_names: ToolNameMap,
/// Shared `call_id → (success, failure)` side-channel: the tool-outcome
/// capture middleware classifies each outcome; the event bridge reads it to
/// project real success + a user-facing failure onto `ToolCallCompleted`.
failure_map: ToolFailureMap,
/// Recovers the original (downcastable) provider error on run failure.
error_slot: crate::openhuman::tinyagents::model::ProviderErrorSlot,
/// Root-cause summary recorded by the repeated-tool-failure breaker.
Expand Down Expand Up @@ -1352,8 +1360,10 @@ fn assemble_turn_harness(
// result into a `Message::tool` that drops the failure flag, so the turn can
// build honest per-call `ToolCallRecord`s (post-turn hooks + cap checkpoint).
let tool_outcome_sink: ToolOutcomeSink = std::sync::Arc::new(std::sync::Mutex::new(Vec::new()));
let failure_map: ToolFailureMap = Arc::default();
harness.push_middleware(Arc::new(middleware::ToolOutcomeCaptureMiddleware::new(
tool_outcome_sink.clone(),
failure_map.clone(),
)));

// Builder-configured tool policy (`.tool_policy()`), enforced at the tool
Expand Down Expand Up @@ -1382,6 +1392,7 @@ fn assemble_turn_harness(
harness,
cursor,
tool_names,
failure_map,
error_slot,
halt_summary,
tool_outcome_sink,
Expand Down
57 changes: 48 additions & 9 deletions src/openhuman/tinyagents/observability.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,25 @@ pub(crate) type IterationCursor = Arc<AtomicU32>;
/// `tool_name` contract without the forwarder emitting those fragments itself.
pub(crate) type ToolNameMap = Arc<Mutex<std::collections::HashMap<String, String>>>;

/// Shared `call_id → (success, classified failure)` side-channel. The crate's
/// `AgentEvent::ToolCompleted` carries only `call_id` + `tool_name` (no
/// success/error), so `ToolOutcomeCaptureMiddleware::after_tool` — which does
/// see the `ToolResult` — classifies each outcome and writes it here; the bridge
/// reads it when projecting the live `ToolCallCompleted` event, so a failed tool
/// surfaces real `success: false` + a user-facing `failure`. Absent entry (event
/// projected before the middleware ran) falls back to `(true, None)`.
pub(crate) type ToolFailureMap = Arc<
Mutex<
std::collections::HashMap<
String,
(
bool,
Option<crate::openhuman::tool_status::ClassifiedFailure>,
),
>,
>,
>;

/// An [`EventListener`] that pauses the run once `cap` model calls have
/// completed, so the loop stops gracefully at the iteration budget (returning
/// the partial transcript) instead of erroring with `LimitExceeded`. The harness
Expand Down Expand Up @@ -120,6 +139,9 @@ pub(crate) struct OpenhumanEventBridge {
/// `ThinkingForwarder` on tool-call start; read here to label the
/// incremental tool-argument fragments projected off the crate stream.
tool_names: ToolNameMap,
/// Shared `call_id → (success, failure)` side-channel written by
/// `ToolOutcomeCaptureMiddleware`; read when projecting `ToolCallCompleted`.
failure_map: ToolFailureMap,
state: Mutex<BridgeState>,
}

Expand All @@ -137,6 +159,7 @@ impl OpenhumanEventBridge {
None,
Arc::default(),
Arc::default(),
Arc::default(),
)
}

Expand All @@ -150,6 +173,7 @@ impl OpenhumanEventBridge {
scope: Option<SubagentScope>,
cursor: IterationCursor,
tool_names: ToolNameMap,
failure_map: ToolFailureMap,
) -> Arc<Self> {
Arc::new(Self {
on_progress,
Expand All @@ -158,6 +182,7 @@ impl OpenhumanEventBridge {
scope,
cursor,
tool_names,
failure_map,
state: Mutex::new(BridgeState::default()),
})
}
Expand Down Expand Up @@ -489,21 +514,35 @@ impl EventListener for OpenhumanEventBridge {
}
AgentEvent::ToolCompleted { call_id, tool_name } => {
let iteration = self.iteration();
// The crate event carries no success/error, so read what the
// outcome-capture middleware classified for this call. Absent →
// the event was projected before the middleware ran; assume
// success (never worse than the previous hardcoded `true`).
let outcome = self
.failure_map
.lock()
.ok()
.and_then(|mut m| m.remove(call_id.as_str()));
let success = outcome.as_ref().map(|(ok, _)| *ok).unwrap_or(true);
match &self.scope {
None => self.send(AgentProgress::ToolCallCompleted {
call_id: call_id.as_str().to_string(),
tool_name: tool_name.clone(),
success: true,
output_chars: 0,
elapsed_ms: 0,
iteration,
}),
None => {
let failure = outcome.and_then(|(_, f)| f);
self.send(AgentProgress::ToolCallCompleted {
call_id: call_id.as_str().to_string(),
tool_name: tool_name.clone(),
success,
output_chars: 0,
elapsed_ms: 0,
iteration,
failure,
})
}
Some(s) => self.send(AgentProgress::SubagentToolCallCompleted {
agent_id: s.agent_id.clone(),
task_id: s.task_id.clone(),
call_id: call_id.as_str().to_string(),
tool_name: tool_name.clone(),
success: true,
success,
output_chars: 0,
output: String::new(),
elapsed_ms: 0,
Expand Down
20 changes: 20 additions & 0 deletions src/openhuman/tool_status/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
//! Tool-call lifecycle state and human-readable failure classification.
//!
//! Foundation for the "Visible tool status, failure diagnosis, and safe
//! recovery flows" epic (#4254). This module owns the shared vocabulary that
//! later phases (status panel, bounded retry, in-app diagnostics) build on:
//!
//! - [`ToolLifecycleState`] — where a call is (queued/running/…/needs-input).
//! - [`ToolFailureClass`] / [`FailureCategory`] — what kind of failure it was.
//! - [`ClassifiedFailure`] — a class plus plain-language cause + next action.
//! - [`classify`] — the pure heuristic mapping raw tool error text → the above.
//!
//! It owns no agent tools, no persistence, and no event subscribers; it is a
//! pure data + logic module consumed by the agent tool executor and surfaced
//! over the `tool` event-bus domain.

mod ops;
mod types;

pub use ops::{classify, describe};
pub use types::{ClassifiedFailure, FailureCategory, ToolFailureClass, ToolLifecycleState};
Loading
Loading