diff --git a/hindsight-docs/docs-integrations/opencode.md b/hindsight-docs/docs-integrations/opencode.md index 26857561f..1b364be50 100644 --- a/hindsight-docs/docs-integrations/opencode.md +++ b/hindsight-docs/docs-integrations/opencode.md @@ -6,7 +6,7 @@ description: "Add long-term memory to OpenCode with Hindsight. Automatically cap # OpenCode -Persistent long-term memory plugin for [OpenCode](https://opencode.ai) using [Hindsight](https://vectorize.io/hindsight). Automatically captures conversations, recalls relevant context on session start, and provides retain/recall/reflect tools the agent can call directly. +Persistent long-term memory plugin for [OpenCode](https://opencode.ai) using [Hindsight](https://vectorize.io/hindsight). Automatically captures conversations, recalls relevant context on every turn, and provides retain/recall/reflect tools the agent can call directly. ## Quick Start @@ -66,11 +66,11 @@ The plugin registers three tools the agent can call explicitly: ### Auto-Retain -When the session goes idle (`session.idle` event), the plugin automatically retains the conversation transcript to Hindsight. Configurable via `retainEveryNTurns` to control frequency. +After each agent response (when the `session.idle` event fires), the plugin automatically retains the full conversation transcript to Hindsight as an upsert. This ensures even one-shot prompts are captured reliably. A pre-compaction retain serves as a backup before context is compressed. -### Session Recall +### Per-Turn Recall -When a new session starts, the plugin recalls relevant project context and injects it into the system prompt, giving the agent access to memories from prior sessions. +On every turn, the plugin recalls relevant memories keyed on the latest user message and injects them into the system prompt. This ensures injected memories are always contextually relevant to the current question, not stale from a previous turn. ### Compaction Hook @@ -94,16 +94,22 @@ This ensures memories survive context window trimming. "autoRecall": true, "autoRetain": true, "recallBudget": "mid", + "recallMaxTokens": 1024, + "recallTypes": ["observation", "world", "experience"], + "recallContextTurns": 1, "recallTags": [], "recallTagsMatch": "any", + "retainContext": "conversation between OpenCode Agent and the User", "retainTags": [], - "retainEveryNTurns": 3, "debug": false }] ] } ``` +> **Note:** The plugin performs one recall API call per turn and one retain upsert per agent response. +> If you want to reduce API load, you can disable `autoRecall` or `autoRetain`, or lower `recallMaxTokens`. + ### Config File Create `~/.hindsight/opencode.json` for persistent configuration that applies across all projects: @@ -120,17 +126,19 @@ Create `~/.hindsight/opencode.json` for persistent configuration that applies ac | Variable | Description | Default | |---|---|---| -| `HINDSIGHT_API_URL` | Hindsight API base URL | *(required)* | +| `HINDSIGHT_API_URL` | Hindsight API base URL | `https://api.hindsight.vectorize.io` | | `HINDSIGHT_API_TOKEN` | API key for authentication | | | `HINDSIGHT_BANK_ID` | Static memory bank ID | `opencode` | | `HINDSIGHT_AGENT_NAME` | Agent name for dynamic bank IDs | `opencode` | -| `HINDSIGHT_AUTO_RECALL` | Auto-recall on session start | `true` | +| `HINDSIGHT_AUTO_RECALL` | Auto-recall on every turn | `true` | | `HINDSIGHT_AUTO_RETAIN` | Auto-retain on session idle | `true` | -| `HINDSIGHT_RETAIN_MODE` | `full-session` or `last-turn` | `full-session` | | `HINDSIGHT_RECALL_BUDGET` | Recall budget: `low`, `mid`, `high` | `mid` | | `HINDSIGHT_RECALL_MAX_TOKENS` | Max tokens for recall results | `1024` | +| `HINDSIGHT_RECALL_MAX_QUERY_CHARS` | Max chars for recall query | `800` | +| `HINDSIGHT_RECALL_CONTEXT_TURNS` | Context turns for recall query | `1` | | `HINDSIGHT_RECALL_TAGS` | Comma-separated tags to filter recall results | | | `HINDSIGHT_RECALL_TAGS_MATCH` | Tag match mode: `any`, `all`, `any_strict`, `all_strict` | `any` | +| `HINDSIGHT_RETAIN_TAGS` | Comma-separated tags for retained documents | | | `HINDSIGHT_DYNAMIC_BANK_ID` | Enable dynamic bank ID derivation | `false` | | `HINDSIGHT_BANK_MISSION` | Bank mission/context for reflect | | @@ -168,8 +176,7 @@ export HINDSIGHT_USER_ID="user123" ## How It Works 1. **Plugin loads** when OpenCode starts — creates a `HindsightClient`, derives the bank ID, and registers tools + hooks -2. **Session starts** — `session.created` event triggers, plugin marks session for recall injection -3. **System transform** — on the first LLM call, recalled memories are injected into the system prompt -4. **Agent works** — can call `hindsight_recall` and `hindsight_retain` explicitly during the session -5. **Session idles** — `session.idle` event triggers auto-retain of the conversation -6. **Compaction** — if the context window fills up, memories are preserved through the compaction +2. **Every turn** — `system.transform` hook recalls relevant memories keyed on the latest user message and injects them into the system prompt +3. **Agent works** — can call `hindsight_recall` and `hindsight_retain` explicitly during the session +4. **Agent responds** — `session.idle` event fires after each agent response, triggering auto-retain (upsert) of the conversation +5. **Compaction** — if the context window fills up, memories are preserved through the compaction \ No newline at end of file diff --git a/hindsight-integrations/opencode/src/config.test.ts b/hindsight-integrations/opencode/src/config.test.ts index 2dff0f681..6ac0e7e9d 100644 --- a/hindsight-integrations/opencode/src/config.test.ts +++ b/hindsight-integrations/opencode/src/config.test.ts @@ -23,7 +23,8 @@ describe("loadConfig", () => { expect(config.autoRetain).toBe(true); expect(config.recallBudget).toBe("mid"); expect(config.recallMaxTokens).toBe(1024); - expect(config.retainContext).toBe("opencode"); + expect(config.recallContextTurns).toBe(1); + expect(config.retainContext).toBe("conversation between OpenCode Agent and the User"); expect(config.agentName).toBe("opencode"); expect(config.dynamicBankId).toBe(false); expect(config.debug).toBe(false); @@ -127,14 +128,6 @@ describe("loadConfig", () => { expect(config.debug).toBe(false); // stays default }); - it("invalid retainMode falls back to full-session with warning", () => { - const spy = vi.spyOn(console, "error").mockImplementation(() => {}); - const config = loadConfig({ retainMode: "full_session" }); - expect(config.retainMode).toBe("full-session"); - expect(spy).toHaveBeenCalledWith(expect.stringContaining("Unknown retainMode")); - spy.mockRestore(); - }); - it("invalid recallBudget falls back to mid with warning", () => { const spy = vi.spyOn(console, "error").mockImplementation(() => {}); const config = loadConfig({ recallBudget: "maximum" }); @@ -142,13 +135,4 @@ describe("loadConfig", () => { expect(spy).toHaveBeenCalledWith(expect.stringContaining("Unknown recallBudget")); spy.mockRestore(); }); - - it("valid retainMode and recallBudget pass without warning", () => { - const spy = vi.spyOn(console, "error").mockImplementation(() => {}); - const config = loadConfig({ retainMode: "last-turn", recallBudget: "high" }); - expect(config.retainMode).toBe("last-turn"); - expect(config.recallBudget).toBe("high"); - expect(spy).not.toHaveBeenCalled(); - spy.mockRestore(); - }); }); diff --git a/hindsight-integrations/opencode/src/config.ts b/hindsight-integrations/opencode/src/config.ts index b48eb2860..16cf21920 100644 --- a/hindsight-integrations/opencode/src/config.ts +++ b/hindsight-integrations/opencode/src/config.ts @@ -23,15 +23,11 @@ export interface HindsightConfig { recallTypes: string[]; recallContextTurns: number; recallMaxQueryChars: number; - recallPromptPreamble: string; recallTags: string[]; recallTagsMatch: "any" | "all" | "any_strict" | "all_strict"; // Retain autoRetain: boolean; - retainMode: string; - retainEveryNTurns: number; - retainOverlapTurns: number; retainContext: string; retainTags: string[]; retainMetadata: Record; @@ -53,27 +49,31 @@ export interface HindsightConfig { debug: boolean; } +// IMPORTANT: These defaults control per-turn recall and per-idle retain volume. +// Changing any of the following values has a direct impact on API load and +// injected token count: +// +// - autoRecall: true → one recall API call per turn (system.transform) +// - autoRetain: true → one retain upsert per session.idle event +// - recallMaxTokens → max tokens injected into the system prompt each turn +// - recallTypes → broader types = more recall results = more tokens +// +// If you reduce recallMaxTokens or disable autoRecall/autoRetain, do so +// deliberately — the defaults are tuned for balanced memory quality vs cost. const DEFAULTS: HindsightConfig = { // Recall autoRecall: true, recallBudget: "mid", recallMaxTokens: 1024, - recallTypes: ["world", "experience"], + recallTypes: ["observation", "world", "experience"], recallContextTurns: 1, recallMaxQueryChars: 800, recallTags: [], recallTagsMatch: "any", - recallPromptPreamble: - "Relevant memories from past conversations (prioritize recent when " + - "conflicting). Only use memories that are directly useful to continue " + - "this conversation; ignore the rest:", - // Retain + // Retain — upserts the full conversation on every session.idle event autoRetain: true, - retainMode: "full-session", - retainEveryNTurns: 3, - retainOverlapTurns: 2, - retainContext: "opencode", + retainContext: "conversation between OpenCode Agent and the User", retainTags: [], retainMetadata: {}, @@ -102,7 +102,6 @@ const ENV_OVERRIDES: Record): HindsightCo const result = config as unknown as HindsightConfig; // Validate enum-like fields to catch typos early - const VALID_RETAIN_MODES = ["full-session", "last-turn"]; - if (!VALID_RETAIN_MODES.includes(result.retainMode)) { - console.error( - `[Hindsight] Unknown retainMode "${result.retainMode}" — ` + - `valid: ${VALID_RETAIN_MODES.join(", ")}. Falling back to "full-session".` - ); - result.retainMode = "full-session"; - } - const VALID_TAGS_MATCH = ["any", "all", "any_strict", "all_strict"]; if (!VALID_TAGS_MATCH.includes(result.recallTagsMatch)) { console.error( diff --git a/hindsight-integrations/opencode/src/content.test.ts b/hindsight-integrations/opencode/src/content.test.ts index 2fe809600..de53f1edc 100644 --- a/hindsight-integrations/opencode/src/content.test.ts +++ b/hindsight-integrations/opencode/src/content.test.ts @@ -152,8 +152,7 @@ describe("prepareRetentionTranscript", () => { ]; it("retains last turn by default", () => { - const { transcript, messageCount } = prepareRetentionTranscript(messages); - expect(messageCount).toBe(2); + const transcript = prepareRetentionTranscript(messages); expect(transcript).toContain("[role: user]"); expect(transcript).toContain("How are you?"); expect(transcript).toContain("I am doing well"); @@ -161,16 +160,14 @@ describe("prepareRetentionTranscript", () => { }); it("retains full window when requested", () => { - const { transcript, messageCount } = prepareRetentionTranscript(messages, true); - expect(messageCount).toBe(4); + const transcript = prepareRetentionTranscript(messages, true); expect(transcript).toContain("Hello"); expect(transcript).toContain("How are you?"); }); it("returns null for empty messages", () => { - const { transcript, messageCount } = prepareRetentionTranscript([]); + const transcript = prepareRetentionTranscript([]); expect(transcript).toBeNull(); - expect(messageCount).toBe(0); }); it("strips memory tags from content", () => { @@ -178,7 +175,7 @@ describe("prepareRetentionTranscript", () => { { role: "user", content: "Query data" }, { role: "assistant", content: "Response" }, ]; - const { transcript } = prepareRetentionTranscript(msgs); + const transcript = prepareRetentionTranscript(msgs); expect(transcript).not.toContain("hindsight_memories"); expect(transcript).toContain("Query"); }); @@ -188,8 +185,7 @@ describe("prepareRetentionTranscript", () => { { role: "user", content: "only tags" }, { role: "assistant", content: "Response" }, ]; - const { transcript, messageCount } = prepareRetentionTranscript(msgs, true); - expect(messageCount).toBe(1); // only assistant message + const transcript = prepareRetentionTranscript(msgs, true); expect(transcript).toContain("Response"); }); }); diff --git a/hindsight-integrations/opencode/src/content.ts b/hindsight-integrations/opencode/src/content.ts index 2a8ae2c57..62281a8db 100644 --- a/hindsight-integrations/opencode/src/content.ts +++ b/hindsight-integrations/opencode/src/content.ts @@ -143,8 +143,8 @@ export function sliceLastTurnsByUserBoundary(messages: Message[], turns: number) export function prepareRetentionTranscript( messages: Message[], retainFullWindow: boolean = false -): { transcript: string | null; messageCount: number } { - if (!messages.length) return { transcript: null, messageCount: 0 }; +): string | null { + if (!messages.length) return null; let targetMessages: Message[]; if (retainFullWindow) { @@ -158,7 +158,7 @@ export function prepareRetentionTranscript( break; } } - if (lastUserIdx === -1) return { transcript: null, messageCount: 0 }; + if (lastUserIdx === -1) return null; targetMessages = messages.slice(lastUserIdx); } @@ -169,10 +169,10 @@ export function prepareRetentionTranscript( parts.push(`[role: ${msg.role}]\n${content}\n[${msg.role}:end]`); } - if (!parts.length) return { transcript: null, messageCount: 0 }; + if (!parts.length) return null; const transcript = parts.join("\n\n"); - if (transcript.trim().length < 10) return { transcript: null, messageCount: 0 }; + if (transcript.trim().length < 10) return null; - return { transcript, messageCount: parts.length }; + return transcript; } diff --git a/hindsight-integrations/opencode/src/hooks.test.ts b/hindsight-integrations/opencode/src/hooks.test.ts index 6e1082297..576eaca58 100644 --- a/hindsight-integrations/opencode/src/hooks.test.ts +++ b/hindsight-integrations/opencode/src/hooks.test.ts @@ -6,8 +6,7 @@ function makeState(): PluginState { return { turnCount: 0, missionsSet: new Set(), - recalledSessions: new Set(), - lastRetainedTurn: new Map(), + sessionTurnCount: new Map(), }; } @@ -54,13 +53,7 @@ describe("event hook — session.idle", () => { ]; const opencodeClient = makeOpencodeClient(messages); const state = makeState(); - const hooks = createHooks( - client, - "bank", - makeConfig({ retainEveryNTurns: 1 }), - state, - opencodeClient - ); + const hooks = createHooks(client, "bank", makeConfig(), state, opencodeClient); await hooks.event({ event: { type: "session.idle", properties: { sessionID: "sess-1" } }, @@ -68,7 +61,7 @@ describe("event hook — session.idle", () => { expect(client.retain).toHaveBeenCalledTimes(1); expect(client.retain.mock.calls[0][0]).toBe("bank"); - // Full-session mode uses session ID as document_id + // Always uses session ID as document_id (upsert) const opts = client.retain.mock.calls[0][2]; expect(opts.documentId).toBe("sess-1"); expect(opts.metadata.session_id).toBe("sess-1"); @@ -85,7 +78,7 @@ describe("event hook — session.idle", () => { const hooks = createHooks( client, "bank", - makeConfig({ retainTags: ["user:alice", "shared"], retainEveryNTurns: 1 }), + makeConfig({ retainTags: ["user:alice", "shared"] }), state, opencodeClient ); @@ -120,48 +113,25 @@ describe("event hook — session.idle", () => { expect(client.retain).not.toHaveBeenCalled(); }); - it("uses chunked document_id with overlap in last-turn mode", async () => { + it("retains on every idle (no deduplication — upsert wins)", async () => { const client = makeClient(); const messages = [ - { info: { role: "user" }, parts: [{ type: "text", text: "Turn 1" }] }, - { info: { role: "assistant" }, parts: [{ type: "text", text: "Reply 1" }] }, - { info: { role: "user" }, parts: [{ type: "text", text: "Turn 2" }] }, - { info: { role: "assistant" }, parts: [{ type: "text", text: "Reply 2" }] }, + { info: { role: "user" }, parts: [{ type: "text", text: "Hello" }] }, + { info: { role: "assistant" }, parts: [{ type: "text", text: "Hi" }] }, ]; - const config = makeConfig({ - retainMode: "last-turn", - retainEveryNTurns: 1, - retainOverlapTurns: 1, - }); const state = makeState(); - const hooks = createHooks(client, "bank", config, state, makeOpencodeClient(messages)); + const hooks = createHooks(client, "bank", makeConfig(), state, makeOpencodeClient(messages)); + // First idle await hooks.event({ event: { type: "session.idle", properties: { sessionID: "sess-1" } }, }); - - expect(client.retain).toHaveBeenCalledTimes(1); - const opts = client.retain.mock.calls[0][2]; - // Chunked mode uses session-timestamp format - expect(opts.documentId).toMatch(/^sess-1-\d+$/); - }); - - it("respects retainEveryNTurns", async () => { - const client = makeClient(); - const messages = [ - { info: { role: "user" }, parts: [{ type: "text", text: "Hello" }] }, - { info: { role: "assistant" }, parts: [{ type: "text", text: "Hi" }] }, - ]; - const config = makeConfig({ retainEveryNTurns: 5 }); - const state = makeState(); - const hooks = createHooks(client, "bank", config, state, makeOpencodeClient(messages)); - + // Second idle — should also retain (upsert) await hooks.event({ event: { type: "session.idle", properties: { sessionID: "sess-1" } }, }); - // Only 1 user turn, needs 5 — should not retain - expect(client.retain).not.toHaveBeenCalled(); + expect(client.retain).toHaveBeenCalledTimes(2); }); it("does not throw on client error", async () => { @@ -174,7 +144,7 @@ describe("event hook — session.idle", () => { const hooks = createHooks( client, "bank", - makeConfig({ retainEveryNTurns: 1 }), + makeConfig(), makeState(), makeOpencodeClient(messages) ); @@ -187,40 +157,194 @@ describe("event hook — session.idle", () => { }); }); -describe("autoRecall is independent of session.created ordering (#1758)", () => { - it("injects recall on the first system.transform even if session.created never fired", async () => { +// Helper messages that the system transform hook needs (it fetches session +// messages to build a contextual recall query). +const CONVO_MESSAGES = [ + { info: { role: "user" }, parts: [{ type: "text", text: "Help me with React" }] }, + { info: { role: "assistant" }, parts: [{ type: "text", text: "Sure, let me help" }] }, +]; + +describe("system transform hook — recalls every turn", () => { + it("injects memory instructions on the first turn", async () => { + const client = makeClient(); + client.recall.mockResolvedValue({ results: [] }); + const state = makeState(); + const output = { system: ["You are a helpful assistant."] as string[] }; + const hooks = createHooks( + client, + "bank", + makeConfig(), + state, + makeOpencodeClient(CONVO_MESSAGES) + ); + + await hooks["experimental.chat.system.transform"]({ sessionID: "sess-1", model: {} }, output); + + // Should inject memory instructions on turn 1 + expect(output.system[0]).toContain("Hindsight Memory"); + expect(output.system[0]).toContain("WHEN TO SAVE"); + expect(output.system[0]).toContain("proactively"); + expect(output.system[0]).toContain("PRIORITY"); + expect(output.system[0]).toContain("hindsight_recall"); + expect(output.system[0]).toContain("hindsight_reflect"); + expect(output.system[0]).toContain("hindsight_retain"); + }); + + it("recalls on every turn (not just once per session)", async () => { + const client = makeClient(); + client.recall.mockResolvedValue({ + results: [{ text: "User is a developer", type: "world" }], + }); const state = makeState(); + const hooks = createHooks( + client, + "bank", + makeConfig(), + state, + makeOpencodeClient(CONVO_MESSAGES) + ); + + // Turn 1 — live recall fires + const output1 = { system: [] as string[] }; + await hooks["experimental.chat.system.transform"]({ sessionID: "sess-1", model: {} }, output1); + expect(client.recall).toHaveBeenCalledTimes(1); + + // Turn 2 — should recall again + const output2 = { system: [] as string[] }; + await hooks["experimental.chat.system.transform"]({ sessionID: "sess-1", model: {} }, output2); + expect(client.recall).toHaveBeenCalledTimes(2); + + // Turn 3 — still recalls + const output3 = { system: [] as string[] }; + await hooks["experimental.chat.system.transform"]({ sessionID: "sess-1", model: {} }, output3); + expect(client.recall).toHaveBeenCalledTimes(3); + }); + + it("appends recall into the existing first system section, not a new one", async () => { const client = makeClient(); - client.recall.mockResolvedValue({ results: [{ text: "User is a developer", type: "world" }] }); - const hooks = createHooks(client, "bank", makeConfig(), state, makeOpencodeClient()); + client.recall.mockResolvedValue({ + results: [{ text: "User is a developer", type: "world" }], + }); + const state = makeState(); + const output = { system: ["You are a helpful coding assistant."] as string[] }; + const hooks = createHooks( + client, + "bank", + makeConfig(), + state, + makeOpencodeClient(CONVO_MESSAGES) + ); - // No session.created beforehand — this is the #1758 reproduction. - const output: { system: string[] } = { system: [] }; await hooks["experimental.chat.system.transform"]({ sessionID: "sess-1", model: {} }, output); - expect(output.system.length).toBeGreaterThan(0); + // Still a single system section — appended, not pushed. + expect(output.system.length).toBe(1); + expect(output.system[0]).toContain("You are a helpful coding assistant."); expect(output.system[0]).toContain("hindsight_memories"); - // Marked as recalled so it won't repeat on the next message. - expect(state.recalledSessions.has("sess-1")).toBe(true); + expect(output.system[0]).toContain("User is a developer"); + // Preamble + expect(output.system[0]).toContain( + "Do not call tools to look up information that is already present here" + ); }); - it("injects recall even when system.transform fires BEFORE session.created", async () => { + it("tracks turn count per session", async () => { + const client = makeClient(); + client.recall.mockResolvedValue({ results: [] }); const state = makeState(); + const hooks = createHooks( + client, + "bank", + makeConfig({ autoRetain: false }), + state, + makeOpencodeClient(CONVO_MESSAGES) + ); + + await hooks["experimental.chat.system.transform"]( + { sessionID: "sess-1", model: {} }, + { system: [] } + ); + await hooks["experimental.chat.system.transform"]( + { sessionID: "sess-1", model: {} }, + { system: [] } + ); + await hooks["experimental.chat.system.transform"]( + { sessionID: "sess-1", model: {} }, + { system: [] } + ); + + // Each system.transform increments the turn counter exactly once + expect(state.sessionTurnCount.get("sess-1")).toBe(3); + }); + + it("does not inject memory instructions on subsequent turns", async () => { const client = makeClient(); - client.recall.mockResolvedValue({ results: [{ text: "User is a developer", type: "world" }] }); - const hooks = createHooks(client, "bank", makeConfig(), state, makeOpencodeClient()); + client.recall.mockResolvedValue({ results: [] }); + const state = makeState(); + const hooks = createHooks( + client, + "bank", + makeConfig(), + state, + makeOpencodeClient(CONVO_MESSAGES) + ); - const out1: { system: string[] } = { system: [] }; - await hooks["experimental.chat.system.transform"]({ sessionID: "sess-1", model: {} }, out1); - expect(out1.system.length).toBeGreaterThan(0); // recall happened on turn 1 + // Turn 1 — instructions injected + const output1 = { system: ["Base prompt."] as string[] }; + await hooks["experimental.chat.system.transform"]({ sessionID: "sess-1", model: {} }, output1); + expect(output1.system[0]).toContain("Hindsight Memory"); - // session.created arriving late is a no-op and must not re-trigger recall. - await hooks.event({ - event: { type: "session.created", properties: { info: { id: "sess-1" } } }, + // Turn 2 — instructions NOT re-injected + const output2 = { system: ["Base prompt."] as string[] }; + await hooks["experimental.chat.system.transform"]({ sessionID: "sess-1", model: {} }, output2); + expect(output2.system[0]).not.toContain("Hindsight Memory"); + }); + + it("skips when autoRecall is false", async () => { + const client = makeClient(); + const state = makeState(); + const output = { system: [] as string[] }; + const hooks = createHooks( + client, + "bank", + makeConfig({ autoRecall: false }), + state, + makeOpencodeClient() + ); + + await hooks["experimental.chat.system.transform"]({ sessionID: "sess-1", model: {} }, output); + + expect(output.system.length).toBe(0); + expect(client.recall).not.toHaveBeenCalled(); + }); + + it("handles transient API failure gracefully", async () => { + const client = makeClient(); + // First call: API error + client.recall.mockRejectedValueOnce(new Error("Connection refused")); + // Second call: succeeds + client.recall.mockResolvedValueOnce({ + results: [{ text: "Found it", type: "world" }], }); - const out2: { system: string[] } = { system: [] }; - await hooks["experimental.chat.system.transform"]({ sessionID: "sess-1", model: {} }, out2); - expect(out2.system.length).toBe(0); // already recalled — deduped + const state = makeState(); + const hooks = createHooks( + client, + "bank", + makeConfig(), + state, + makeOpencodeClient(CONVO_MESSAGES) + ); + + // First attempt — API error, recall content not injected (but memory instructions ARE on turn 1) + const output1 = { system: [] as string[] }; + await hooks["experimental.chat.system.transform"]({ sessionID: "sess-1", model: {} }, output1); + // system[0] has memory instructions but no hindsight_memories (recall failed) + expect(output1.system[0]).not.toContain("hindsight_memories"); + + // Second attempt — succeeds (different turn) + const output2 = { system: [] as string[] }; + await hooks["experimental.chat.system.transform"]({ sessionID: "sess-1", model: {} }, output2); + expect(output2.system[0]).toContain("Found it"); }); }); @@ -300,24 +424,7 @@ describe("compacting hook", () => { expect(opts.tags).toEqual(["user:alice", "auto-tag"]); }); - it("pre-compaction retain uses chunked documentId in last-turn mode", async () => { - const client = makeClient(); - client.recall.mockResolvedValue({ results: [] }); - const messages = [ - { info: { role: "user" }, parts: [{ type: "text", text: "Hello" }] }, - { info: { role: "assistant" }, parts: [{ type: "text", text: "Hi" }] }, - ]; - const config = makeConfig({ retainMode: "last-turn", retainEveryNTurns: 1 }); - const output = { context: [] as string[] }; - const hooks = createHooks(client, "bank", config, makeState(), makeOpencodeClient(messages)); - - await hooks["experimental.session.compacting"]({ sessionID: "sess-1" }, output); - - const opts = client.retain.mock.calls[0][2]; - expect(opts.documentId).toMatch(/^sess-1-\d+$/); - }); - - it("resets lastRetainedTurn so idle-retain resumes after compaction", async () => { + it("resets sessionTurnCount after compaction", async () => { const client = makeClient(); client.recall.mockResolvedValue({ results: [] }); const messages = [ @@ -325,15 +432,15 @@ describe("compacting hook", () => { { info: { role: "assistant" }, parts: [{ type: "text", text: "Hi" }] }, ]; const state = makeState(); - // Simulate prior retain at turn 10 - state.lastRetainedTurn.set("sess-1", 10); + // Simulate prior turn count + state.sessionTurnCount.set("sess-1", 10); const output = { context: [] as string[] }; const hooks = createHooks(client, "bank", makeConfig(), state, makeOpencodeClient(messages)); await hooks["experimental.session.compacting"]({ sessionID: "sess-1" }, output); - // After compaction, lastRetainedTurn should be cleared so idle-retain works again - expect(state.lastRetainedTurn.has("sess-1")).toBe(false); + // After compaction, sessionTurnCount should be cleared + expect(state.sessionTurnCount.has("sess-1")).toBe(false); }); it("does not throw on error", async () => { @@ -355,111 +462,23 @@ describe("compacting hook", () => { }); }); -describe("system transform hook", () => { - it("injects memories on the first transform for a session", async () => { - const client = makeClient(); - client.recall.mockResolvedValue({ - results: [{ text: "User is a developer", type: "world" }], - }); - const state = makeState(); - const output = { system: [] as string[] }; - const hooks = createHooks(client, "bank", makeConfig(), state, makeOpencodeClient()); - - await hooks["experimental.chat.system.transform"]({ sessionID: "sess-1", model: {} }, output); - - expect(output.system.length).toBeGreaterThan(0); - expect(output.system[0]).toContain("hindsight_memories"); - // Marked as recalled so it won't repeat. - expect(state.recalledSessions.has("sess-1")).toBe(true); - }); - - it("appends recall into the existing first system section, not a new one", async () => { - // OpenCode emits each system[] entry as a separate system message and some - // providers only honor the first; recall must fold into system[0]. +describe("system transform does not retain", () => { + it("does not call retain during system.transform (retain is handled by session.idle)", async () => { const client = makeClient(); - client.recall.mockResolvedValue({ - results: [{ text: "User is a developer", type: "world" }], - }); - const state = makeState(); - const output = { system: ["You are a helpful coding assistant."] as string[] }; - const hooks = createHooks(client, "bank", makeConfig(), state, makeOpencodeClient()); - - await hooks["experimental.chat.system.transform"]({ sessionID: "sess-1", model: {} }, output); - - // Still a single system section — appended, not pushed. - expect(output.system.length).toBe(1); - expect(output.system[0]).toContain("You are a helpful coding assistant."); - expect(output.system[0]).toContain("hindsight_memories"); - expect(output.system[0]).toContain("User is a developer"); - }); - - it("deduplicates: does not recall again for an already-recalled session", async () => { - const client = makeClient(); - const state = makeState(); - state.recalledSessions.add("sess-1"); // already recalled - const output = { system: [] as string[] }; - const hooks = createHooks(client, "bank", makeConfig(), state, makeOpencodeClient()); - - await hooks["experimental.chat.system.transform"]({ sessionID: "sess-1", model: {} }, output); - - expect(output.system.length).toBe(0); - expect(client.recall).not.toHaveBeenCalled(); - }); - - it("marks session recalled on empty recall (no repeated queries for empty banks)", async () => { - const client = makeClient(); - // No results — empty bank client.recall.mockResolvedValue({ results: [] }); const state = makeState(); - const output = { system: [] as string[] }; - const hooks = createHooks(client, "bank", makeConfig(), state, makeOpencodeClient()); - - await hooks["experimental.chat.system.transform"]({ sessionID: "sess-1", model: {} }, output); - - // No injection, but session marked — won't re-query on next transform - expect(output.system.length).toBe(0); - expect(state.recalledSessions.has("sess-1")).toBe(true); - }); - - it("retries recall on next transform after transient API failure", async () => { - const client = makeClient(); - // First call: API error (transient) - client.recall.mockRejectedValueOnce(new Error("Connection refused")); - // Second call: succeeds - client.recall.mockResolvedValueOnce({ - results: [{ text: "Found it", type: "world" }], - }); - const state = makeState(); - const hooks = createHooks(client, "bank", makeConfig(), state, makeOpencodeClient()); - - // First attempt — API error, NOT marked, so it retries next time - const output1 = { system: [] as string[] }; - await hooks["experimental.chat.system.transform"]({ sessionID: "sess-1", model: {} }, output1); - expect(output1.system.length).toBe(0); - expect(state.recalledSessions.has("sess-1")).toBe(false); - - // Second attempt — succeeds, injected and marked - const output2 = { system: [] as string[] }; - await hooks["experimental.chat.system.transform"]({ sessionID: "sess-1", model: {} }, output2); - expect(output2.system.length).toBeGreaterThan(0); - expect(state.recalledSessions.has("sess-1")).toBe(true); - }); - - it("skips when autoRecall is false", async () => { - const client = makeClient(); - const state = makeState(); - const output = { system: [] as string[] }; const hooks = createHooks( client, "bank", - makeConfig({ autoRecall: false }), + makeConfig({ autoRetain: true }), state, - makeOpencodeClient() + makeOpencodeClient(CONVO_MESSAGES) ); + const output = { system: [] as string[] }; await hooks["experimental.chat.system.transform"]({ sessionID: "sess-1", model: {} }, output); - expect(output.system.length).toBe(0); - expect(client.recall).not.toHaveBeenCalled(); + // system.transform should NOT call retain — that's the job of session.idle + expect(client.retain).not.toHaveBeenCalled(); }); }); diff --git a/hindsight-integrations/opencode/src/hooks.ts b/hindsight-integrations/opencode/src/hooks.ts index 5b48df0fc..ff554b7f9 100644 --- a/hindsight-integrations/opencode/src/hooks.ts +++ b/hindsight-integrations/opencode/src/hooks.ts @@ -2,10 +2,26 @@ * Hook implementations for the Hindsight OpenCode plugin. * * Hooks: - * - experimental.chat.system.transform → recall memories once per session and - * inject them into the system prompt (order-independent; see #1758) - * - event (session.idle) → auto-retain conversation transcript - * - experimental.session.compacting → inject memories into compaction context + * - experimental.chat.system.transform → recall memories on **every turn** and + * inject them into the system prompt with contextual, query-based recall + * - event (session.idle) → auto-retain conversation transcript (fires after + * each agent response, upserting the same document so the latest state wins) + * - experimental.session.compacting → retain + inject memories into compaction context + * + * Key design decisions: + * + * 1. **Recall every turn** — we recall on every `system.transform`, keyed on + * the latest user message. This ensures injected memories are always relevant + * to the current question. + * + * 2. **Retain on session idle** — `session.idle` fires after each agent + * response. We always retain the full conversation (upsert with the same + * documentId), so the latest state is stored reliably, including for + * one-shot prompts. A pre-compaction retain serves as a backup before + * context is compressed. + * + * 3. **System prompt instructions** — we inject memory usage instructions + * into the system prompt so the LLM knows when and how to use memory tools. */ import type { HindsightClient } from "@vectorize-io/hindsight-client"; @@ -17,7 +33,7 @@ import { composeRecallQuery, truncateRecallQuery, prepareRetentionTranscript, - sliceLastTurnsByUserBoundary, + stripMemoryTags, type Message, } from "./content.js"; import { ensureBankMission } from "./bank.js"; @@ -25,10 +41,8 @@ import { ensureBankMission } from "./bank.js"; export interface PluginState { turnCount: number; missionsSet: Set; - /** Track sessions we've already injected recall into */ - recalledSessions: Set; - /** Track last retained turn count per session to avoid duplicates */ - lastRetainedTurn: Map; + /** Per-session turn counter — incremented on each system.transform */ + sessionTurnCount: Map; } interface EventInput { @@ -82,6 +96,39 @@ export interface HindsightHooks { ) => Promise; } +/** + * System prompt instructions that teach the LLM to use memory tools + * proactively. Combines hermes-agent's memory tool description style + * (motivational, concrete examples) with Hindsight-specific tool names. + */ +function buildMemoryInstructions(bankId: string): string { + return [ + "# Hindsight Memory", + `Active. Bank: ${bankId}.`, + "Relevant memories are automatically injected into context.", + "", + "Save durable information to persistent memory that survives across sessions. " + + "Memory is injected into future turns, so keep it compact and focused on facts " + + "that will still matter later.", + "", + "WHEN TO SAVE (do this proactively, don't wait to be asked):", + "- User corrects you or says 'remember this' / 'don't do that again'", + "- User shares a preference, habit, or personal detail (name, role, timezone, coding style)", + "- You discover something about the environment (OS, installed tools, project structure)", + "- You learn a convention, API quirk, or workflow specific to this user's setup", + "- You identify a stable fact that will be useful again in future sessions", + "- After completing a task or making a decision", + "", + "PRIORITY: User preferences and corrections > environment facts > procedural knowledge. " + + "The most valuable memory prevents the user from having to repeat themselves.", + "", + "Use hindsight_recall to search for relevant memories before answering questions about " + + "past work, user preferences, or project context. When in doubt, recall first.", + "Use hindsight_reflect to synthesize a reasoned answer from all stored memories.", + "Use hindsight_retain to store facts, decisions, and preferences.", + ].join("\n"); +} + export function createHooks( hindsightClient: HindsightClient, bankId: string, @@ -114,7 +161,9 @@ export function createHooks( const formatted = formatMemories(results); const context = `\n` + - `${config.recallPromptPreamble}\n` + + `Hindsight Memory (persistent cross-session context)\n` + + `Use this to answer questions about the user and prior sessions. ` + + `Do not call tools to look up information that is already present here.\n` + `Current time: ${formatCurrentTime()} UTC\n\n` + `${formatted}\n` + ``; @@ -156,32 +205,16 @@ export function createHooks( } /** - * Retain messages for a session, respecting retainMode and documentId semantics. + * Retain the full conversation for a session as a single document (upsert). * Used by both idle-retain and pre-compaction retain. */ async function retainSession(sessionId: string, messages: Message[]): Promise { - const retainFullWindow = config.retainMode === "full-session"; - let targetMessages: Message[]; - let documentId: string; - - if (retainFullWindow) { - targetMessages = messages; - // Full-session upserts the same document each time - documentId = sessionId; - } else { - // Sliding window: retainEveryNTurns + overlap - const windowTurns = config.retainEveryNTurns + config.retainOverlapTurns; - targetMessages = sliceLastTurnsByUserBoundary(messages, windowTurns); - // Chunked mode: unique document per chunk - documentId = `${sessionId}-${Date.now()}`; - } - - const { transcript } = prepareRetentionTranscript(targetMessages, true); + const transcript = prepareRetentionTranscript(messages, true); if (!transcript) return; await ensureBankMission(hindsightClient, bankId, config, state.missionsSet, logger); await hindsightClient.retain(bankId, transcript, { - documentId, + documentId: sessionId, context: config.retainContext, tags: config.retainTags.length ? config.retainTags : undefined, metadata: Object.keys(config.retainMetadata).length @@ -191,7 +224,7 @@ export function createHooks( }); } - /** Auto-retain conversation transcript */ + /** Auto-retain conversation transcript on session idle */ async function handleSessionIdle(sessionId: string): Promise { logger.debug(`handleSessionIdle called for session ${sessionId}`); if (!config.autoRetain) return; @@ -199,19 +232,8 @@ export function createHooks( const messages = await getSessionMessages(sessionId); if (!messages.length) return; - // Count user turns - const userTurns = messages.filter((m) => m.role === "user").length; - const lastRetained = state.lastRetainedTurn.get(sessionId) || 0; - logger.debug( - `handleSessionIdle: userTurns=${userTurns}, lastRetained=${lastRetained}, retainEveryNTurns=${config.retainEveryNTurns}` - ); - - // Only retain if enough new turns since last retain - if (userTurns - lastRetained < config.retainEveryNTurns) return; - try { await retainSession(sessionId, messages); - state.lastRetainedTurn.set(sessionId, userTurns); logger.info(`Auto-retained ${messages.length} messages`, { session: sessionId, bank: bankId, @@ -232,11 +254,6 @@ export function createHooks( await handleSessionIdle(sessionId); } } - // NOTE: autoRecall is driven entirely by `experimental.chat.system.transform` - // (see below). We deliberately do NOT key it off `session.created` — the - // relative firing order of `session.created` vs `system.transform` is an - // undocumented OpenCode implementation detail that has differed between - // versions, and relying on it silently disabled recall (see #1758). } catch (e) { logger.error("Event hook error", e); } @@ -244,14 +261,13 @@ export function createHooks( const compacting = async (input: CompactingInput, output: CompactingOutput): Promise => { try { - // First, retain what we have before compaction (using shared retention logic) + // First, retain what we have before compaction const messages = await getSessionMessages(input.sessionID); if (messages.length && config.autoRetain) { try { await retainSession(input.sessionID, messages); - // Reset turn tracking — after compaction the message list shrinks, - // so the old lastRetainedTurn value would block future idle retains. - state.lastRetainedTurn.delete(input.sessionID); + // Reset turn tracking — after compaction the message list shrinks. + state.sessionTurnCount.delete(input.sessionID); logger.debug("Pre-compaction retain completed"); } catch (e) { logger.error("Pre-compaction retain failed", e); @@ -288,41 +304,48 @@ export function createHooks( output: SystemTransformOutput ): Promise => { try { - if (!config.autoRecall) return; const sessionId = input.sessionID; if (!sessionId) return; - // Recall once per session, on the first system.transform we see for it. - // `recalledSessions` is a dedup marker for sessions we've ALREADY recalled - // into — not an event-ordering gate. This makes autoRecall independent of - // whether session.created fired first (see #1758). - if (state.recalledSessions.has(sessionId)) return; + if (!config.autoRecall) { + // Still count turns for state consistency, but skip all recall work + const turnNum = (state.sessionTurnCount.get(sessionId) || 0) + 1; + state.sessionTurnCount.set(sessionId, turnNum); + return; + } + + // Increment per-session turn counter + const turnNum = (state.sessionTurnCount.get(sessionId) || 0) + 1; + state.sessionTurnCount.set(sessionId, turnNum); + + // Inject memory instructions into system prompt on the first turn only + if (turnNum === 1) { + const instructions = buildMemoryInstructions(bankId); + output.system[0] = output.system[0] + ? `${output.system[0]}\n\n${instructions}` + : instructions; + logger.debug(`Injected memory instructions for session ${sessionId}`); + } await ensureBankMission(hindsightClient, bankId, config, state.missionsSet, logger); - // Use a generic project-context query for session start - const query = `project context and recent work`; - const { context, ok } = await recallForContext(query); - - // Mark as recalled only after a successful API round-trip (even with 0 - // results), so transient failures retry on the next message. - if (ok) { - state.recalledSessions.add(sessionId); - // Cap tracked sessions - if (state.recalledSessions.size > 1000) { - const first = state.recalledSessions.values().next().value; - if (first) state.recalledSessions.delete(first); - } - } + // Build a contextual recall query from the current conversation + const messages = await getSessionMessages(sessionId); + if (!messages.length) return; + + const lastUserMsg = [...messages].reverse().find((m) => m.role === "user"); + if (!lastUserMsg) return; + + // Strip injected memory tags from the query to avoid feedback loops + const cleanUserMsg = stripMemoryTags(lastUserMsg.content); + const query = composeRecallQuery(cleanUserMsg, messages, config.recallContextTurns); + const truncated = truncateRecallQuery(query, cleanUserMsg, config.recallMaxQueryChars); + // Always do a live recall + const { context } = await recallForContext(truncated); if (context) { - // Fold recall into the FIRST system section rather than pushing a new - // one. OpenCode emits each system[] entry as a separate system message, - // and some providers/LLMs only honor the first — a pushed section can be - // silently dropped. Appending to system[0] guarantees recall is seen. - // (Original approach from #1988 by @sdrobov.) output.system[0] = output.system[0] ? `${output.system[0]}\n\n${context}` : context; - logger.debug(`Injected recall context for session ${sessionId}`); + logger.debug(`Injected recall context for session ${sessionId} turn ${turnNum}`); } } catch (e) { logger.error("System transform hook error", e); diff --git a/hindsight-integrations/opencode/src/index.ts b/hindsight-integrations/opencode/src/index.ts index 507583762..23dd80a0a 100644 --- a/hindsight-integrations/opencode/src/index.ts +++ b/hindsight-integrations/opencode/src/index.ts @@ -30,8 +30,7 @@ import { Logger, type OpencodeLogClient } from "./logger.js"; const state: PluginState = { turnCount: 0, missionsSet: new Set(), - recalledSessions: new Set(), - lastRetainedTurn: new Map(), + sessionTurnCount: new Map(), }; const HindsightPlugin: Plugin = async (input, options) => { diff --git a/hindsight-integrations/opencode/src/plugin.test.ts b/hindsight-integrations/opencode/src/plugin.test.ts index 1c6d3189d..24d447aa7 100644 --- a/hindsight-integrations/opencode/src/plugin.test.ts +++ b/hindsight-integrations/opencode/src/plugin.test.ts @@ -15,10 +15,15 @@ import { HindsightPlugin } from "./index.js"; import { DEFAULT_HINDSIGHT_API_URL } from "./config.js"; import { HindsightClient } from "@vectorize-io/hindsight-client"; +const MOCK_MESSAGES = [ + { info: { role: "user" }, parts: [{ type: "text", text: "Help me with this" }] }, + { info: { role: "assistant" }, parts: [{ type: "text", text: "Sure, let me help" }] }, +]; + const mockPluginInput = { client: { session: { - messages: vi.fn().mockResolvedValue({ data: [] }), + messages: vi.fn().mockResolvedValue({ data: MOCK_MESSAGES }), }, }, project: { id: "test-project", worktree: "/tmp/test", vcs: "git" }, @@ -117,21 +122,22 @@ describe("HindsightPlugin state sharing", () => { const result1 = await HindsightPlugin(mockPluginInput as any); const result2 = await HindsightPlugin(mockPluginInput as any); - // Trigger session.created on session 1 — should track 'sess-A' - await result1.event!({ - event: { type: "session.created", properties: { info: { id: "sess-A" } } }, - }); + // Trigger system.transform on session 1 — should recall and track turn count + const output1 = { system: [] as string[] }; + await result1["experimental.chat.system.transform"]!( + { sessionID: "sess-A", model: {} }, + output1 + ); - // Session 2's system transform should see 'sess-A' because state is shared - const output = { system: [] as string[] }; + // Session 2's system transform should continue the turn count because state is shared + const output2 = { system: [] as string[] }; await result2["experimental.chat.system.transform"]!( { sessionID: "sess-A", model: {} }, - output + output2 ); - // The recall was attempted (state was shared — sess-A was found in recalledSessions). - // If state were per-instance, result2 would have an empty recalledSessions and skip recall. // result2 uses the second HindsightClient instance (index 1). + // Since we recall every turn (not just once per session), the client should have been called. const clientInstance = (HindsightClient as any).mock.instances[1]; expect(clientInstance.recall).toHaveBeenCalled(); }); diff --git a/hindsight-integrations/opencode/src/test-helpers.ts b/hindsight-integrations/opencode/src/test-helpers.ts index 9bb9b51d3..39f6f141e 100644 --- a/hindsight-integrations/opencode/src/test-helpers.ts +++ b/hindsight-integrations/opencode/src/test-helpers.ts @@ -5,17 +5,13 @@ export function makeConfig(overrides: Partial = {}): HindsightC autoRecall: true, recallBudget: "mid", recallMaxTokens: 1024, - recallTypes: ["world", "experience"], + recallTypes: ["observation", "world", "experience"], recallContextTurns: 1, recallMaxQueryChars: 800, - recallPromptPreamble: "", recallTags: [], recallTagsMatch: "any", autoRetain: true, - retainMode: "full-session", - retainEveryNTurns: 3, - retainOverlapTurns: 2, - retainContext: "opencode", + retainContext: "conversation between OpenCode Agent and the User", retainTags: [], retainMetadata: {}, hindsightApiUrl: "https://api.hindsight.vectorize.io", diff --git a/hindsight-integrations/opencode/src/tools.test.ts b/hindsight-integrations/opencode/src/tools.test.ts index 4cc6801d2..deb62afd7 100644 --- a/hindsight-integrations/opencode/src/tools.test.ts +++ b/hindsight-integrations/opencode/src/tools.test.ts @@ -48,7 +48,7 @@ describe("createTools", () => { ); expect(client.retain).toHaveBeenCalledWith("test-bank", "User likes TypeScript", { - context: "opencode", + context: "conversation between OpenCode Agent and the User", tags: undefined, metadata: undefined, }); @@ -90,7 +90,7 @@ describe("createTools", () => { await tools.hindsight_retain.execute({ content: "Fact" }, mockContext); expect(client.retain).toHaveBeenCalledWith("test-bank", "Fact", { - context: "opencode", + context: "conversation between OpenCode Agent and the User", tags: ["coding"], metadata: { source: "opencode" }, }); @@ -116,7 +116,9 @@ describe("createTools", () => { expect(client.recall).toHaveBeenCalledWith("test-bank", "user preferences", { budget: "mid", maxTokens: 1024, - types: ["world", "experience"], + tags: undefined, + tagsMatch: undefined, + types: ["observation", "world", "experience"], }); expect(result).toContain("User likes Python"); expect(result).toContain("[world]"); @@ -148,7 +150,9 @@ describe("createTools", () => { expect(client.recall).toHaveBeenCalledWith("test-bank", "test", { budget: "high", maxTokens: 4096, - types: ["world", "experience"], + tags: undefined, + tagsMatch: undefined, + types: ["observation", "world", "experience"], }); }); });