diff --git a/docs/autoresearch.md b/docs/autoresearch.md new file mode 100644 index 0000000..d80f34e --- /dev/null +++ b/docs/autoresearch.md @@ -0,0 +1,120 @@ +# AutoResearch — Knowledge Gap Detection & Active Learning + +Inspired by [Karpathy's autoresearch](https://github.com/karpathy/autoresearch), this feature extends the dreaming system with an active research loop that fills knowledge gaps detected during daily use. + +## How it works + +``` + Day (passive) Night (active) + ───────────── ────────────── + memory_search("X") Dream cycle starts + ↓ ↓ + 0 results / low score REM phase reads gaps + ↓ ↓ + GapTracker records ResearchEngine.runResearchLoop() + knowledge-gaps.json ├── broader keyword search + ├── partial-match search + └── cross-reference memory + ↓ + Confidence scoring + ├── ≥ 0.7 → KEEP → Deep phase candidate + └── < 0.7 → DISCARD → logged in DREAMS.md +``` + +## Gap detection + +Every `memory_search` call goes through `trackRecall()`, which now also calls `gapTracker.recordGap()`. A gap is classified when: + +| Result | Classification | +|---|---| +| 0 results | `no_results` | +| maxScore < 0.3 | `low_confidence` | +| ≤2 results, maxScore < 0.5 | `partial_match` | +| Otherwise | Not a gap | + +Duplicate queries increment `occurrences` — frequently-asked gaps are researched first. + +## Storage + +`memory/.dreams/knowledge-gaps.json` — array of `KnowledgeGap` objects: + +```json +[ + { + "query": "redis cache invalidation strategy", + "timestamp": "2026-04-15T...", + "resultCount": 0, + "maxScore": 0, + "gapType": "no_results", + "occurrences": 3, + "firstSeen": "2026-04-13T...", + "lastSeen": "2026-04-15T..." + } +] +``` + +## Research loop (REM phase) + +During `dream(action='run')`, if `dreaming.autoresearch.enabled` is true: + +1. Load top N gaps (sorted by occurrences). +2. For each gap: + - **Broader search**: split query into keywords, search each individually. + - **Partial match**: search first half of the query string. + - **Deduplicate** sources by path + snippet prefix. +3. Compute confidence: `avgScore * 0.5 + sourceCount * 0.3 + typeDiversity * 0.2`. +4. If confidence ≥ threshold → **keep** (remove from gaps, promote to Deep phase). +5. If below → **discard** (log reason in DREAMS.md). + +## DREAMS.md output + +```markdown +## REM Research — 2026-04-15 03:00 + +### Investigated 3 knowledge gaps + +1. ✅ "redis cache invalidation strategy" (confidence: 0.89) + - Sources: memory (memory/2026-04-10.md) + - Learned: use pub/sub TTL-based invalidation with write-through pattern + - Validation: single-source + +2. ❌ "WebSocket reconnection backoff" (confidence: 0.45) — DISCARDED + - Reason: below confidence threshold +``` + +## MCP tools + +- `knowledge_gaps(action='list')` — view current gaps, sorted by frequency. +- `knowledge_gaps(action='add', query='...')` — manually register a gap. +- `dream(action='run')` — triggers the full sweep including autoresearch. + +## Configuration + +```json +{ + "dreaming": { + "autoresearch": { + "enabled": false, + "maxGapsPerNight": 5, + "confidenceThreshold": 0.6, + "sources": ["codebase", "memory"], + "maxResearchTimeMinutes": 10 + } + } +} +``` + +## Safety + +- **Read-only** — research never modifies code or external systems. +- **Confidence gating** — only verified knowledge passes the threshold. +- **Time budget** — capped at `maxResearchTimeMinutes` per cycle. +- **Opt-in** — disabled by default. +- **Transparent** — every investigation is logged in DREAMS.md. + +## References + +- [karpathy/autoresearch](https://github.com/karpathy/autoresearch) +- [Issue #2](https://github.com/crisandrews/ClawCode/issues/2) +- `lib/autoresearch.ts` — engine code +- `lib/dreaming.ts` — existing dreaming system diff --git a/lib/autoresearch.ts b/lib/autoresearch.ts new file mode 100644 index 0000000..923fabe --- /dev/null +++ b/lib/autoresearch.ts @@ -0,0 +1,449 @@ +/** + * AutoResearch — knowledge gap detection and research-loop engine. + * + * Inspired by Karpathy's autoresearch pattern (propose → test → keep/discard), + * adapted for knowledge consolidation during the dreaming REM phase. + * + * Gap detection runs passively during the day: every memory_search that returns + * zero or low-quality results is logged to memory/.dreams/knowledge-gaps.json. + * + * During REM, the research loop iterates over gaps and attempts to fill them + * using available sources (codebase search, existing memory, optionally web). + * Findings are validated with a confidence score and only promoted if they pass + * the threshold. + * + * This module is deterministic except for the optional LLM synthesis step + * (which is deferred to the caller — the MCP tool handler in server.ts). + */ + +import fs from "fs"; +import path from "path"; + +// --------------------------------------------------------------------------- +// Types +// --------------------------------------------------------------------------- + +export interface KnowledgeGap { + query: string; + timestamp: string; + resultCount: number; + maxScore: number; + gapType: "no_results" | "low_confidence" | "partial_match"; + occurrences: number; + firstSeen: string; + lastSeen: string; +} + +export interface ResearchSource { + type: "codebase" | "memory" | "web"; + path?: string; + snippet: string; + score: number; +} + +export interface ResearchCandidate { + query: string; + sources: ResearchSource[]; + snippet: string; + confidenceScore: number; + validationMethod: + | "code-crossref" + | "web-confirm" + | "multi-source" + | "single-source"; + kept: boolean; + discardReason?: string; +} + +export interface AutoResearchConfig { + enabled: boolean; + maxGapsPerNight: number; + confidenceThreshold: number; + sources: Array<"codebase" | "memory" | "web">; + maxResearchTimeMinutes: number; +} + +export interface AutoResearchResult { + gapsInvestigated: number; + kept: ResearchCandidate[]; + discarded: ResearchCandidate[]; + dreamsMarkdown: string; +} + +const DEFAULT_AUTORESEARCH_CONFIG: AutoResearchConfig = { + enabled: false, + maxGapsPerNight: 5, + confidenceThreshold: 0.6, + sources: ["codebase", "memory"], + maxResearchTimeMinutes: 10, +}; + +// --------------------------------------------------------------------------- +// Gap tracker — called from trackRecall in server.ts +// --------------------------------------------------------------------------- + +export class GapTracker { + private gapsFile: string; + + constructor(dreamsDir: string) { + this.gapsFile = path.join(dreamsDir, "knowledge-gaps.json"); + } + + /** + * Record a gap when a memory search returns poor results. + * Called from trackRecall after every memory_search. + */ + recordGap( + query: string, + resultCount: number, + maxScore: number + ): KnowledgeGap | null { + const gapType = this.classifyGap(resultCount, maxScore); + if (!gapType) return null; + + const gaps = this.loadGaps(); + const existing = gaps.find( + (g) => g.query.toLowerCase() === query.toLowerCase() + ); + const now = new Date().toISOString(); + + if (existing) { + existing.occurrences++; + existing.lastSeen = now; + // Only update search context if this was an actual search (not a manual add) + if (resultCount > 0 || existing.resultCount === 0) { + existing.resultCount = resultCount; + existing.maxScore = maxScore; + existing.gapType = gapType; + } + } else { + gaps.push({ + query, + timestamp: now, + resultCount, + maxScore, + gapType, + occurrences: 1, + firstSeen: now, + lastSeen: now, + }); + } + + this.saveGaps(gaps); + return existing || gaps[gaps.length - 1]; + } + + loadGaps(): KnowledgeGap[] { + try { + return JSON.parse(fs.readFileSync(this.gapsFile, "utf-8")); + } catch { + return []; + } + } + + /** + * Return gaps sorted by frequency (most-queried first), capped at `limit`. + */ + topGaps(limit: number): KnowledgeGap[] { + return this.loadGaps() + .sort((a, b) => b.occurrences - a.occurrences) + .slice(0, limit); + } + + /** + * Remove a gap after it has been researched and kept. + */ + removeGap(query: string): void { + const gaps = this.loadGaps().filter( + (g) => g.query.toLowerCase() !== query.toLowerCase() + ); + this.saveGaps(gaps); + } + + private classifyGap( + resultCount: number, + maxScore: number + ): KnowledgeGap["gapType"] | null { + if (resultCount === 0) return "no_results"; + if (maxScore < 0.3) return "low_confidence"; + if (resultCount <= 2 && maxScore < 0.5) return "partial_match"; + return null; + } + + private saveGaps(gaps: KnowledgeGap[]): void { + // Cap at 100 entries — keep the most frequent, drop the rest + const MAX_GAPS = 100; + const toSave = gaps.length > MAX_GAPS + ? gaps.sort((a, b) => b.occurrences - a.occurrences).slice(0, MAX_GAPS) + : gaps; + fs.mkdirSync(path.dirname(this.gapsFile), { recursive: true }); + fs.writeFileSync(this.gapsFile, JSON.stringify(toSave, null, 2)); + } +} + +// --------------------------------------------------------------------------- +// Research engine — runs during REM phase +// --------------------------------------------------------------------------- + +export type SearchFn = ( + query: string, + maxResults?: number +) => Array<{ path: string; snippet: string; score: number }>; + +function inferSourceType(filePath: string): ResearchSource["type"] { + const lower = filePath.toLowerCase(); + if (lower.startsWith("memory/") || lower.startsWith("memory\\") || lower.includes("/memory/")) { + return "memory"; + } + return "codebase"; +} + +export class ResearchEngine { + private workspace: string; + private dreamsDir: string; + private config: AutoResearchConfig; + private gapTracker: GapTracker; + + constructor( + workspace: string, + config?: Partial + ) { + this.workspace = workspace; + this.dreamsDir = path.join(workspace, "memory", ".dreams"); + this.config = { ...DEFAULT_AUTORESEARCH_CONFIG, ...config }; + this.gapTracker = new GapTracker(this.dreamsDir); + } + + /** + * Run the research loop over top gaps. Uses `searchFn` (the existing + * memory_search backend) as the primary codebase/memory source. + * + * Returns candidates with confidence scores. Promotion to MEMORY.md is + * handled by the caller (dreaming Deep phase). + */ + runResearchLoop(searchFn: SearchFn): AutoResearchResult { + const gaps = this.gapTracker.topGaps(this.config.maxGapsPerNight); + const kept: ResearchCandidate[] = []; + const discarded: ResearchCandidate[] = []; + const startTime = Date.now(); + const maxMs = this.config.maxResearchTimeMinutes * 60_000; + + for (const gap of gaps) { + if (Date.now() - startTime > maxMs) break; + + const candidate = this.investigateGap(gap, searchFn); + if (candidate.kept) { + kept.push(candidate); + this.gapTracker.removeGap(gap.query); + } else { + discarded.push(candidate); + } + } + + const dreamsMarkdown = this.formatDreamsBlock(kept, discarded); + this.appendToDreams(dreamsMarkdown); + + return { + gapsInvestigated: gaps.length, + kept, + discarded, + dreamsMarkdown, + }; + } + + private investigateGap( + gap: KnowledgeGap, + searchFn: SearchFn + ): ResearchCandidate { + const sources: ResearchSource[] = []; + + // Strategy 1: broader search with individual keywords + const keywords = gap.query + .split(/\s+/) + .filter((w) => w.length > 2); + for (const kw of keywords.slice(0, 3)) { + try { + const results = searchFn(kw, 3); + for (const r of results) { + if (r.score > 0.2) { + sources.push({ + type: inferSourceType(r.path), + path: r.path, + snippet: r.snippet.slice(0, 300), + score: r.score, + }); + } + } + } catch { + // Search failure for individual keyword — continue with remaining. + } + } + + // Strategy 2: partial-match search using word boundaries + if (sources.length === 0) { + const words = gap.query.split(/\s+/).filter((w) => w.length > 2); + const halfWords = words.slice(0, Math.max(1, Math.ceil(words.length / 2))); + if (halfWords.length > 0) { + try { + const results = searchFn(halfWords.join(" "), 3); + for (const r of results) { + sources.push({ + type: inferSourceType(r.path), + path: r.path, + snippet: r.snippet.slice(0, 300), + score: r.score, + }); + } + } catch { + // Partial-match search failure — proceed with what we have. + } + } + } + + // Deduplicate sources by path + const uniqueSources = this.deduplicateSources(sources); + + // Compute confidence + const confidence = this.computeConfidence(uniqueSources); + const validationMethod = this.classifyValidation(uniqueSources); + const kept = confidence >= this.config.confidenceThreshold; + + // Synthesize snippet from best sources + const snippet = uniqueSources.length > 0 + ? uniqueSources + .sort((a, b) => b.score - a.score) + .slice(0, 2) + .map((s) => s.snippet) + .join(" | ") + .slice(0, 500) + : ""; + + return { + query: gap.query, + sources: uniqueSources, + snippet, + confidenceScore: confidence, + validationMethod, + kept, + discardReason: kept + ? undefined + : confidence === 0 + ? "no sources found" + : "below confidence threshold", + }; + } + + private deduplicateSources(sources: ResearchSource[]): ResearchSource[] { + const seen = new Set(); + return sources.filter((s) => { + const key = `${s.type}:${s.path || ""}:${s.snippet.slice(0, 50)}`; + if (seen.has(key)) return false; + seen.add(key); + return true; + }); + } + + private computeConfidence(sources: ResearchSource[]): number { + if (sources.length === 0) return 0; + + const avgScore = + sources.reduce((sum, s) => sum + s.score, 0) / sources.length; + // Source count: 1 source gives 0.5, 2 gives 0.75, 3+ gives 1.0 + const sourceCount = Math.min(sources.length / 2, 1.0); + // Type diversity bonus: only applies when multiple distinct types are present + const uniqueTypes = new Set(sources.map((s) => s.type)).size; + const diversityBonus = uniqueTypes >= 2 ? 0.15 : 0; + + // Primary: avg relevance 65%, source count 25%, diversity bonus 10% + // A single high-quality source (score 0.8) → 0.8*0.65 + 0.5*0.25 = 0.645 + // Two sources avg 0.7 → 0.7*0.65 + 0.75*0.25 = 0.643 + // Two diverse sources avg 0.7 → 0.643 + 0.15*0.10 = 0.658 + return Math.min( + avgScore * 0.65 + sourceCount * 0.25 + diversityBonus * 0.10, + 1.0 + ); + } + + private classifyValidation( + sources: ResearchSource[] + ): ResearchCandidate["validationMethod"] { + const types = new Set(sources.map((s) => s.type)); + if (types.size >= 2) return "multi-source"; + if (types.has("codebase")) return "code-crossref"; + if (types.has("web")) return "web-confirm"; + return "single-source"; + } + + private formatDreamsBlock( + kept: ResearchCandidate[], + discarded: ResearchCandidate[] + ): string { + const now = new Date().toISOString().slice(0, 19).replace("T", " "); + const total = kept.length + discarded.length; + if (total === 0) return ""; + + const lines: string[] = [ + "", + `## REM Research — ${now}`, + "", + `### Investigated ${total} knowledge gap${total !== 1 ? "s" : ""}`, + "", + ]; + + let idx = 1; + for (const c of kept) { + const srcSummary = c.sources + .map((s) => `${s.type}${s.path ? ` (${s.path})` : ""}`) + .join(", "); + lines.push( + `${idx}. ✅ "${c.query}" (confidence: ${c.confidenceScore.toFixed(2)})` + ); + lines.push(` - Sources: ${srcSummary}`); + lines.push(` - Learned: ${c.snippet.slice(0, 200)}`); + lines.push(` - Validation: ${c.validationMethod}`); + lines.push(""); + idx++; + } + + for (const c of discarded) { + lines.push( + `${idx}. ❌ "${c.query}" (confidence: ${c.confidenceScore.toFixed(2)}) — DISCARDED` + ); + lines.push(` - Reason: ${c.discardReason}`); + lines.push(""); + idx++; + } + + return lines.join("\n"); + } + + private appendToDreams(markdown: string): void { + if (!markdown) return; + const dreamsPath = path.join(this.workspace, "DREAMS.md"); + try { + if (fs.existsSync(dreamsPath)) { + fs.appendFileSync(dreamsPath, markdown); + } else { + fs.writeFileSync( + dreamsPath, + `# Dreams\n\n*Memory consolidation diary.*\n${markdown}` + ); + } + } catch (err) { + // Log but don't crash — dreaming is best-effort + process.stderr.write( + `[clawcode/autoresearch] Failed to write DREAMS.md: ${err instanceof Error ? err.message : String(err)}\n` + ); + } + } + + getConfig(): AutoResearchConfig { + return { ...this.config }; + } +} + +export function mergeAutoResearchConfig( + partial?: Partial +): AutoResearchConfig { + return { ...DEFAULT_AUTORESEARCH_CONFIG, ...partial }; +} diff --git a/lib/config.ts b/lib/config.ts index 6303988..cae0aab 100644 --- a/lib/config.ts +++ b/lib/config.ts @@ -77,6 +77,19 @@ export interface AgentConfig { schedule?: string; /** Timezone for dreaming cron */ timezone?: string; + /** AutoResearch — active gap filling during REM phase (Karpathy-inspired) */ + autoresearch?: { + /** Master switch. Default: false (opt-in). */ + enabled?: boolean; + /** Max gaps to investigate per dream cycle. Default: 5. */ + maxGapsPerNight?: number; + /** Minimum confidence to keep a research finding. Default: 0.7. */ + confidenceThreshold?: number; + /** Sources to consult. Default: ["codebase", "memory"]. */ + sources?: Array<"codebase" | "memory" | "web">; + /** Time budget per cycle in minutes. Default: 10. */ + maxResearchTimeMinutes?: number; + }; }; memory: { /** "builtin" = SQLite+FTS5 (default), "qmd" = QMD external tool */ diff --git a/server.ts b/server.ts index 8d68572..b23d8cc 100644 --- a/server.ts +++ b/server.ts @@ -49,6 +49,7 @@ import { import { extractKeywords } from "./lib/keywords.ts"; import { MemoryDB } from "./lib/memory-db.ts"; import { QmdManager } from "./lib/qmd-manager.ts"; +import { GapTracker, ResearchEngine, mergeAutoResearchConfig } from "./lib/autoresearch.ts"; import type { SearchResult } from "./lib/types.ts"; // --------------------------------------------------------------------------- @@ -106,6 +107,9 @@ try { // Dream engine (always available — uses recall data from .dreams/) const dreamEngine = new DreamEngine(WORKSPACE); +// Gap tracker (always active — records low-quality searches for autoresearch) +const gapTracker = new GapTracker(DREAMS_DIR); + // Initialize QMD if configured (non-blocking, with full error isolation) let qmdManager: QmdManager | null = null; if (config.memory.backend === "qmd") { @@ -471,6 +475,12 @@ function trackRecall( recall.updatedAt = now; fs.writeFileSync(recallPath, JSON.stringify(recall, null, 2)); + + // Track knowledge gaps — searches with poor results feed the autoresearch loop + const maxScore = results.length > 0 + ? Math.max(...results.map((r) => r.score)) + : 0; + gapTracker.recordGap(query, results.length, maxScore); } catch { // Dream tracking is best-effort } @@ -484,7 +494,8 @@ function trackRecall( const MCP_TOOL_DIRECTORY: Array<{ name: string; description: string }> = [ { name: "memory_search", description: "Search memory with BM25, temporal decay, MMR." }, { name: "memory_get", description: "Read specific lines from a memory file." }, - { name: "dream", description: "Run memory consolidation (status / run / dry-run)." }, + { name: "dream", description: "Run memory consolidation (status / run / dry-run). AutoResearch runs during REM if enabled." }, + { name: "knowledge_gaps", description: "List detected knowledge gaps from low-quality searches, or manually add a gap for autoresearch to investigate." }, { name: "agent_config", description: "View or update agent settings." }, { name: "agent_status", description: "Show identity, memory stats, dreaming state." }, { name: "memory_context", description: "Active-memory turn-start reflex — digest relevant context." }, @@ -628,6 +639,30 @@ server.setRequestHandler(ListToolsRequestSchema, async () => ({ required: ["action"], }, }, + { + name: "knowledge_gaps", + description: + "View or add knowledge gaps detected from low-quality memory searches. Gaps feed the autoresearch loop during dreaming. Use action='list' to see current gaps, action='add' to manually register a gap you want researched.", + inputSchema: { + type: "object" as const, + properties: { + action: { + type: "string", + enum: ["list", "add"], + description: "'list' to view gaps, 'add' to register a new gap for research", + }, + query: { + type: "string", + description: "For action='add': the query/topic you want researched", + }, + limit: { + type: "number", + description: "For action='list': max gaps to return (default: 10)", + }, + }, + required: ["action"], + }, + }, { name: "agent_config", description: @@ -1058,6 +1093,51 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { const dryRun = action === "dry-run"; const result = dreamEngine.runFullSweep({ dryRun }); + // AutoResearch — run during REM if enabled + const arConfig = mergeAutoResearchConfig( + (loadConfig(WORKSPACE) as any).dreaming?.autoresearch + ); + let arSection = ""; + if (arConfig.enabled && !dryRun) { + const engine = new ResearchEngine(WORKSPACE, arConfig); + const arResult = engine.runResearchLoop( + (q, max) => searchMemory(q, max || 6) + ); + if (arResult.gapsInvestigated > 0) { + arSection = [ + "", + "### AutoResearch (REM)", + `Gaps investigated: ${arResult.gapsInvestigated}`, + `Kept: ${arResult.kept.length} | Discarded: ${arResult.discarded.length}`, + arResult.kept.length > 0 + ? arResult.kept + .map( + (c) => + `- ✅ "${c.query}" (${c.confidenceScore.toFixed(2)}, ${c.validationMethod})` + ) + .join("\n") + : "", + arResult.discarded.length > 0 + ? arResult.discarded + .map( + (c) => + `- ❌ "${c.query}" — ${c.discardReason}` + ) + .join("\n") + : "", + ] + .filter(Boolean) + .join("\n"); + } + } else if (arConfig.enabled && dryRun) { + const gaps = gapTracker.topGaps(arConfig.maxGapsPerNight); + arSection = + gaps.length > 0 + ? `\n### AutoResearch (would investigate ${gaps.length} gaps)\n` + + gaps.map((g) => `- "${g.query}" (${g.occurrences}x, ${g.gapType})`).join("\n") + : ""; + } + return { content: [ { @@ -1072,6 +1152,7 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { result.themes.length > 0 ? `Themes found: ${result.themes.join(", ")}` : "No recurring themes yet.", + arSection, "", "### Deep Phase", `Total candidates: ${result.candidates.length}`, @@ -1117,6 +1198,68 @@ server.setRequestHandler(CallToolRequestSchema, async (request) => { } } + if (name === "knowledge_gaps") { + const action = String(params.action || "list"); + try { + if (action === "list") { + const limit = Number(params.limit) || 10; + const gaps = gapTracker.topGaps(limit); + if (gaps.length === 0) { + return { + content: [ + { + type: "text", + text: "No knowledge gaps detected yet. Gaps are recorded automatically when memory_search returns poor results.", + }, + ], + }; + } + const lines = [ + `## Knowledge Gaps (${gaps.length})`, + "", + ...gaps.map( + (g, i) => + `${i + 1}. "${g.query}" — ${g.gapType}, ${g.occurrences}x (last: ${g.lastSeen.slice(0, 10)})` + ), + "", + 'Enable `dreaming.autoresearch.enabled: true` in agent-config.json to research these during the dream cycle.', + ]; + return { content: [{ type: "text", text: lines.join("\n") }] }; + } + + if (action === "add") { + const query = String(params.query || "").trim(); + if (!query) { + return { + content: [{ type: "text", text: "Error: query is required for action='add'" }], + isError: true, + }; + } + gapTracker.recordGap(query, 0, 0); + return { + content: [ + { + type: "text", + text: `Knowledge gap registered: "${query}". It will be investigated during the next dream cycle (if autoresearch is enabled).`, + }, + ], + }; + } + + return { + content: [{ type: "text", text: 'Unknown action. Use: "list" or "add"' }], + isError: true, + }; + } catch (err) { + return { + content: [ + { type: "text", text: `Knowledge gaps error: ${err instanceof Error ? err.message : String(err)}` }, + ], + isError: true, + }; + } + } + if (name === "agent_config") { const action = String(params.action || "get");