From b6b71792fed18e17f8fbe2daac5e4da4d63b77cc Mon Sep 17 00:00:00 2001 From: Kostandin Angjellari Date: Tue, 16 Jun 2026 15:16:21 +0100 Subject: [PATCH 01/32] FE-864: raise pi action timeout to 600s MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Each cook agent action (write-tests, write-code, verify-epic) runs under a per-action wall-clock budget enforced in pi-actions.ts. Raise the default from 300s to 600s so Sonnet agents have headroom on larger slices and on brownfield repos where setup/discovery eats into the turn. πŸ€– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- src/orchestrator/src/pi-actions.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/orchestrator/src/pi-actions.ts b/src/orchestrator/src/pi-actions.ts index a114a384..86a6b57e 100644 --- a/src/orchestrator/src/pi-actions.ts +++ b/src/orchestrator/src/pi-actions.ts @@ -69,7 +69,7 @@ async function withActivity(id: string, label: string, fn: () => Promise): // Pi dispatch // --------------------------------------------------------------------------- -const PI_TIMEOUT_MS = 300_000; +const PI_TIMEOUT_MS = 600_000; // Output cap β€” the timeout alone won't stop a fast, chatty agent. const PI_MAX_OUTPUT = 10 * 1024 * 1024; From 1928ea339d605c59d8845861940aad43f4c61946 Mon Sep 17 00:00:00 2001 From: Kostandin Angjellari Date: Tue, 16 Jun 2026 22:35:02 +0100 Subject: [PATCH 02/32] =?UTF-8?q?FE-878:=20TUI=20polish=20=E2=80=94=20glob?= =?UTF-8?q?al=20timer,=20lowercase=20wordmark,=20Static=20log=20stream,=20?= =?UTF-8?q?clean=20failures?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Iterating on the live TUI from real-terminal feedback: - One global run timer in the footer instead of a per-item clock on every pending row (and whole-second, no jittery decimals). - "brunch" wordmark is now a big lowercase figlet (Slant) in a warm orange gradient, replacing the egg. - Activity log + wordmark stream through Ink so the full run lands in scrollback instead of collapsing in a redrawn bounded box; line cap removed. - Brigade tracker no longer lights "taste" mid-cook β€” per-slice verify actions fire during cooking, so taste stays unlit until a real end-of-cook signal. - Failures throw instead of process.exit, so withCookBus disposes (unmounts Ink) before the error prints β€” no more frozen "prep ◐" hang. cook validates args before mounting the TUI and rejects unknown flags (e.g. --spec-id). check + presenter/cook/pi-actions tests green; full build deferred (active graphite stack navigation). Co-Authored-By: Claude --- src/orchestrator/src/cook-cli.ts | 21 ++-- .../src/presenter/ink/app.test.tsx | 4 +- src/orchestrator/src/presenter/ink/app.tsx | 103 +++++++++--------- .../src/presenter/ink/wordmark.ts | 23 ++-- src/orchestrator/src/presenter/phase.test.ts | 16 +-- src/orchestrator/src/presenter/phase.ts | 6 +- .../src/presenter/run-store.test.ts | 12 +- src/orchestrator/src/presenter/run-store.ts | 15 ++- src/server/cli.ts | 52 +++++---- 9 files changed, 131 insertions(+), 121 deletions(-) diff --git a/src/orchestrator/src/cook-cli.ts b/src/orchestrator/src/cook-cli.ts index 585dfe4a..fffb9754 100644 --- a/src/orchestrator/src/cook-cli.ts +++ b/src/orchestrator/src/cook-cli.ts @@ -120,6 +120,10 @@ export function parseCookArgs(args: string[]): CookOptions { verbose = true; } else if (!arg.startsWith('-')) { dir = arg; + } else { + // Reject unknown flags instead of silently ignoring them (e.g. --spec-id + // is not a flag; the spec selector is --spec=). + throw new Error(`Unknown flag "${arg}". Run "brunch --help" for cook usage.`); } } @@ -426,28 +430,21 @@ export async function runCook(opts: CookOptions, bus: CookBus): Promise { cliFlag: opts.petrinautUrl, env: { PETRINAUT_URL: process.env.PETRINAUT_URL }, }); - if ('error' in resolvedUrl) { - line(resolvedUrl.error); - process.exit(1); - } + // Throw, never process.exit β€” the caller (withCookBus) must dispose the + // presenter (unmount Ink) before the error is printed, or the TUI hangs. + if ('error' in resolvedUrl) throw new Error(resolvedUrl.error); petrinautUrl = resolvedUrl.url; streamPort = resolvePetrinautStreamPort({ PORT: process.env.PORT }); } const resolved = resolveCookPlan(opts.dir, opts.specId); - if (resolved.kind === 'error') { - line(resolved.message); - process.exit(1); - } + if (resolved.kind === 'error') throw new Error(resolved.message); const plan = loadPlan(resolved.planPath); // Worktree strategy follows the plan's spec-derived mode, not its location. const sandbox = resolveSandboxPlan(plan.mode, resolved.sourceDir); - if (sandbox.kind === 'error') { - line(sandbox.message); - process.exit(1); - } + if (sandbox.kind === 'error') throw new Error(sandbox.message); // Single shared tree only for serial greenfield (parallel would race on it); // every other case isolates slices per-slice. diff --git a/src/orchestrator/src/presenter/ink/app.test.tsx b/src/orchestrator/src/presenter/ink/app.test.tsx index 667af51b..39cbe2e6 100644 --- a/src/orchestrator/src/presenter/ink/app.test.tsx +++ b/src/orchestrator/src/presenter/ink/app.test.tsx @@ -18,8 +18,8 @@ describe('Ink App', () => { await tick(); const frame = lastFrame() ?? ''; - // Wordmark header + command. - expect(frame).toContain('brunch'); + // Big lowercase ASCII wordmark rendered + the command label. + expect(frame).toContain('/_.___/'); expect(frame).toContain('cook'); // Brigade tracker shows every phase, with cook active (◐) once cooking. expect(frame).toContain('prep'); diff --git a/src/orchestrator/src/presenter/ink/app.tsx b/src/orchestrator/src/presenter/ink/app.tsx index d67ba031..32ec921f 100644 --- a/src/orchestrator/src/presenter/ink/app.tsx +++ b/src/orchestrator/src/presenter/ink/app.tsx @@ -1,31 +1,21 @@ -// The full-screen Ink view: brunch wordmark header, brigade phase tracker, and a -// bounded live activity log. A thin projection of RunStore β€” all folding -// lives in the store + the pure phase tracker, so this stays declarative. +// The full-screen Ink view. The wordmark + activity log stream into terminal +// scrollback via (printed once each, so the full run is preserved and +// nothing "collapses"); a live footer below shows the brigade tracker, the +// single global run timer, and the pending-wait spinner. A thin projection of +// RunStore β€” all folding lives in the store + the pure phase tracker. -import { Box, Text } from 'ink'; -import { useEffect, useState, useSyncExternalStore } from 'react'; +import { Box, Static, Text } from 'ink'; +import { useEffect, useMemo, useState, useSyncExternalStore } from 'react'; import { formatElapsed } from '../clock.js'; import { BRIGADE, type BrigadePhase } from '../phase.js'; import type { PendingActivity, RunStore } from '../run-store.js'; -import { BRUNCH_WORDMARK } from './wordmark.js'; +import { BRUNCH_ASCII, BRUNCH_ORANGE } from './wordmark.js'; -const LOG_TAIL = 15; const SPINNER = ['β ‹', 'β ™', 'β Ή', 'β Έ', 'β Ό', 'β ΄', 'β ¦', 'β §', 'β ‡', '⠏']; const TICK_MS = 250; -function Header({ command }: { command: string }) { - return ( - - {BRUNCH_WORDMARK.map(({ ch, color }) => ( - - {ch} - - ))} - {command} - - ); -} +type ScrollItem = { kind: 'mark'; text: string; color: string } | { kind: 'log'; text: string }; const STATUS_ICON = { done: 'βœ“', active: '◐', pending: 'β—‹' } as const; @@ -47,31 +37,14 @@ function Brigade({ phase }: { phase: BrigadePhase }) { ); } -function ActivityLog({ lines }: { lines: string[] }) { - return ( - - {lines.slice(-LOG_TAIL).map((line, i) => ( - {line === '' ? ' ' : line} - ))} - - ); -} - -function PendingPanel({ - pending, - now, - frame, -}: { - pending: PendingActivity[]; - now: () => number; - frame: string; -}) { +function PendingPanel({ pending, frame }: { pending: PendingActivity[]; frame: string }) { if (pending.length === 0) return null; + // One global timer lives in the footer; rows show only what's running. return ( - + {pending.map((a) => ( - {frame} {a.label} Β· {formatElapsed(now() - a.startedAt)} + {frame} {a.label} {a.detail ? ` Β· ${a.detail}` : ''} ))} @@ -82,24 +55,50 @@ function PendingPanel({ export function App({ store, now = () => Date.now() }: { store: RunStore; now?: () => number }) { const state = useSyncExternalStore(store.subscribe, store.getSnapshot, store.getSnapshot); - // Tick only while something is pending, so the spinner/elapsed advance even - // between events; the interval is torn down as soon as the waits clear. + // One ticker drives the spinner and the global elapsed clock while mounted. const [tick, setTick] = useState(0); - const hasPending = state.pending.length > 0; useEffect(() => { - if (!hasPending) return; const id = setInterval(() => setTick((t) => t + 1), TICK_MS); return () => clearInterval(id); - }, [hasPending]); + }, []); + + // Wordmark (once) + the append-only log β†’ , so they stream into + // scrollback rather than redrawing in a bounded box. + const scroll = useMemo( + () => [ + ...BRUNCH_ASCII.map((text, i) => ({ + kind: 'mark' as const, + text, + color: BRUNCH_ORANGE[i % BRUNCH_ORANGE.length]!, + })), + ...state.lines.map((text) => ({ kind: 'log' as const, text })), + ], + [state.lines], + ); return ( - -
- - + <> + + {(item, i) => + item.kind === 'mark' ? ( + + {item.text} + + ) : ( + {item.text === '' ? ' ' : item.text} + ) + } + + + + + + {' '} + {state.command} Β· {formatElapsed(now() - state.runStart)} + + + - - - + ); } diff --git a/src/orchestrator/src/presenter/ink/wordmark.ts b/src/orchestrator/src/presenter/ink/wordmark.ts index fc793939..d7404ed5 100644 --- a/src/orchestrator/src/presenter/ink/wordmark.ts +++ b/src/orchestrator/src/presenter/ink/wordmark.ts @@ -1,12 +1,15 @@ -// The "brunch" wordmark for the TUI header, tinted with the brunch.ai brand -// gradient (HASH blue β†’ indigo β†’ violet, from the product mark). One hex per -// letter, left to right. The plain/CI backend stays untinted. +// The "brunch" wordmark for the TUI header: a big lowercase figlet (Slant), +// tinted top-to-bottom with a warm orange theme (the kind of sunset gradient +// CLI tools tend to use). Generated once with figlet (no runtime dep). The +// plain/CI backend stays untinted and prints no banner. -export const BRUNCH_WORDMARK: readonly { ch: string; color: string }[] = [ - { ch: 'b', color: '#00BBFF' }, - { ch: 'r', color: '#0080FF' }, - { ch: 'u', color: '#0046FF' }, - { ch: 'n', color: '#3A36FF' }, - { ch: 'c', color: '#5424FF' }, - { ch: 'h', color: '#6D2BF6' }, +export const BRUNCH_ASCII: readonly string[] = [ + ' __ __ ', + ' / /_ _______ ______ _____/ /_ ', + ' / __ \\/ ___/ / / / __ \\/ ___/ __ \\', + ' / /_/ / / / /_/ / / / / /__/ / / /', + '/_.___/_/ \\__,_/_/ /_/\\___/_/ /_/ ', ]; + +// One shade per row, light amber β†’ deep ember. +export const BRUNCH_ORANGE: readonly string[] = ['#FFB454', '#FFA033', '#FF8C1A', '#FF7A00', '#F26419']; diff --git a/src/orchestrator/src/presenter/phase.test.ts b/src/orchestrator/src/presenter/phase.test.ts index e3a78a81..6c894bb1 100644 --- a/src/orchestrator/src/presenter/phase.test.ts +++ b/src/orchestrator/src/presenter/phase.test.ts @@ -13,21 +13,23 @@ describe('nextPhase', () => { expect(nextPhase('prep', { kind: 'cook-start', runStart: 0 })).toBe('cook'); }); - it('advances to taste on an epic/verify action and to plate on a promotion line', () => { - expect(nextPhase('cook', { kind: 'action', icon: 'β–Έ', message: 'verify api-auth' })).toBe('taste'); + it('does NOT advance to taste on verify/epic actions (they fire mid-cook)', () => { + expect(nextPhase('cook', { kind: 'action', icon: 'β–Έ', message: 'verify api-auth' })).toBe('cook'); expect(nextPhase('cook', { kind: 'action', icon: '●', message: 'epic api-auth β†’ PASS' })).toBe( - 'taste', + 'cook', ); - expect(nextPhase('taste', { kind: 'line', text: ' βœ“ promoted β†’ cook/abc @ 1234abcd' })).toBe('plate'); + }); + + it('advances to plate on a promotion line', () => { + expect(nextPhase('cook', { kind: 'line', text: ' βœ“ promoted β†’ cook/abc @ 1234abcd' })).toBe('plate'); }); it('never regresses to an earlier phase', () => { - // A per-slice action after taste must not pull the tracker back to cook. - expect(nextPhase('taste', { kind: 'action', icon: 'β–Έ', message: 'tests slice-2' })).toBe('taste'); expect(nextPhase('plate', { kind: 'cook-start', runStart: 0 })).toBe('plate'); + expect(nextPhase('cook', { kind: 'action', icon: 'β–Έ', message: 'tests slice-2' })).toBe('cook'); }); - it('walks a full cook run prep β†’ cook β†’ taste β†’ plate', () => { + it('walks a full cook run prep β†’ cook β†’ plate (taste stays unlit)', () => { expect( walk([ { kind: 'cook-start', runStart: 0 }, diff --git a/src/orchestrator/src/presenter/phase.ts b/src/orchestrator/src/presenter/phase.ts index e74b1e0b..62aab16d 100644 --- a/src/orchestrator/src/presenter/phase.ts +++ b/src/orchestrator/src/presenter/phase.ts @@ -25,8 +25,10 @@ function phaseFor(event: CookEvent): BrigadePhase | undefined { return 'recipe'; case 'cook-start': return 'cook'; - case 'action': - return /^(verify|epic)/.test(event.message) ? 'taste' : undefined; + // `taste` is intentionally NOT auto-advanced: per-slice verify actions fire + // during cooking, so keying taste off them lit it while still cooking. It + // needs a real end-of-cook signal (a later slice); for now it stays unlit + // until the run passes it on the way to `plate`. case 'line': return event.text.includes('promoted') ? 'plate' : undefined; default: diff --git a/src/orchestrator/src/presenter/run-store.test.ts b/src/orchestrator/src/presenter/run-store.test.ts index b7cda75d..f6ac491b 100644 --- a/src/orchestrator/src/presenter/run-store.test.ts +++ b/src/orchestrator/src/presenter/run-store.test.ts @@ -45,22 +45,24 @@ describe('RunStore', () => { }); it('tracks pending activities: start adds, progress updates detail, end removes', () => { - let clock = 1000; - const store = new RunStore('cook', () => clock); + const store = new RunStore('cook', () => 1000); store.push({ kind: 'activity-start', id: 'tests:slice-1', label: 'agent writing tests' }); - let pending = store.getSnapshot().pending; + const pending = store.getSnapshot().pending; expect(pending).toHaveLength(1); - expect(pending[0]).toMatchObject({ id: 'tests:slice-1', label: 'agent writing tests', startedAt: 1000 }); + expect(pending[0]).toMatchObject({ id: 'tests:slice-1', label: 'agent writing tests' }); store.push({ kind: 'activity-progress', id: 'tests:slice-1', detail: '8 KB' }); expect(store.getSnapshot().pending[0]).toMatchObject({ detail: '8 KB' }); - clock = 5000; store.push({ kind: 'activity-end', id: 'tests:slice-1' }); expect(store.getSnapshot().pending).toHaveLength(0); }); + it('stamps a run-start for the global timer at construction', () => { + expect(new RunStore('cook', () => 4242).getSnapshot().runStart).toBe(4242); + }); + it('does not put activity events into the scrolling line log', () => { const store = new RunStore('cook', () => 0); store.push({ kind: 'activity-start', id: 'a', label: 'booting app' }); diff --git a/src/orchestrator/src/presenter/run-store.ts b/src/orchestrator/src/presenter/run-store.ts index 81e9add8..ff019d6d 100644 --- a/src/orchestrator/src/presenter/run-store.ts +++ b/src/orchestrator/src/presenter/run-store.ts @@ -10,13 +10,10 @@ import type { CookEvent } from './events.js'; import { formatCookEvent } from './format.js'; import { type BrigadePhase, nextPhase } from './phase.js'; -const MAX_LINES = 500; - export interface PendingActivity { id: string; label: string; detail?: string; - startedAt: number; } export interface RunState { @@ -24,6 +21,8 @@ export interface RunState { phase: BrigadePhase; lines: string[]; pending: PendingActivity[]; + /** When the run started, for the single global header timer. */ + runStart: number; } export class RunStore { @@ -36,14 +35,12 @@ export class RunStore { private readonly now: () => number = () => Date.now(), ) { this.clock = createElapsedClock(now); - this.state = { command, phase: 'prep', lines: [], pending: [] }; + this.state = { command, phase: 'prep', lines: [], pending: [], runStart: now() }; } push(event: CookEvent): void { if (event.kind === 'activity-start') { - this.commit({ - pending: [...this.state.pending, { id: event.id, label: event.label, startedAt: this.now() }], - }); + this.commit({ pending: [...this.state.pending, { id: event.id, label: event.label }] }); return; } if (event.kind === 'activity-progress') { @@ -60,7 +57,9 @@ export class RunStore { const added = formatCookEvent(event, this.clock); const phase = nextPhase(this.state.phase, event); if (added.length === 0 && phase === this.state.phase) return; - this.commit({ phase, lines: [...this.state.lines, ...added].slice(-MAX_LINES) }); + // Append-only β€” the Ink backend streams these through , which + // assumes items only grow; the lines live in terminal scrollback. + this.commit({ phase, lines: [...this.state.lines, ...added] }); } private commit(patch: Partial): void { diff --git a/src/server/cli.ts b/src/server/cli.ts index 00f133fc..28ae029e 100644 --- a/src/server/cli.ts +++ b/src/server/cli.ts @@ -141,25 +141,31 @@ exitIfAnthropicApiKeyMissing(); if (rawArgs[0] === 'cook') { const { parseCookArgs, runCook } = await import('../orchestrator/src/cook-cli.js'); const { withCookBus } = await import('../orchestrator/src/presenter.js'); - const opts = parseCookArgs(rawArgs.slice(1)); - // withCookBus disposes the bus (unmounts the Ink app) in finally so the TTY run exits. - await withCookBus('cook', (bus) => runCook(opts, bus)).catch((error) => { - console.error('Failed to run brunch cook:', error); + try { + // Parse before mounting the TUI; withCookBus disposes (unmounts Ink) in + // finally so a run error tears the TUI down before we print it. + const opts = parseCookArgs(rawArgs.slice(1)); + await withCookBus('cook', (bus) => runCook(opts, bus)); + } catch (error) { + console.error(`Failed to run brunch cook: ${error instanceof Error ? error.message : String(error)}`); process.exit(1); - }); + } } else if (rawArgs[0] === 'serve') { const { runPlan } = await import('./plan-runner.js'); const { runCook } = await import('../orchestrator/src/cook-cli.js'); const { parseServeArgs, runServe } = await import('./serve-runner.js'); const { withCookBus } = await import('../orchestrator/src/presenter.js'); - await withCookBus('serve', (bus) => - withCompletedSpec( - 'serve', - () => parseServeArgs(rawArgs.slice(1)), - async (opts, { project, snapshot }) => { + // Validate args + spec BEFORE mounting the TUI, so a bad specId errors plainly + // (no chrome flash). withCookBus then owns the TUI only for the actual run, and + // disposes it in finally even if the run throws. + await withCompletedSpec( + 'serve', + () => parseServeArgs(rawArgs.slice(1)), + async (opts, { project, snapshot }) => { + await withCookBus('serve', (bus) => // Cook runs against the same dir the plan was written to (launchCwd); see // serveCookOptions β€” runCook reads opts.dir raw, so serve must thread it. - await runServe(opts, launchCwd, { + runServe(opts, launchCwd, { plan: () => runPlan({ specificationId: opts.specificationId, @@ -172,19 +178,19 @@ if (rawArgs[0] === 'cook') { bus, }), cook: (cookOpts) => runCook(cookOpts, bus), - }); - }, - ), + }), + ); + }, ); } else if (rawArgs[0] === 'plan') { const { parsePlanArgs, runPlan } = await import('./plan-runner.js'); const { withCookBus } = await import('../orchestrator/src/presenter.js'); - await withCookBus('plan', (bus) => - withCompletedSpec( - 'plan', - () => parsePlanArgs(rawArgs.slice(1), launchCwd), - async (opts, { project, snapshot }) => { - await runPlan({ + await withCompletedSpec( + 'plan', + () => parsePlanArgs(rawArgs.slice(1), launchCwd), + async (opts, { project, snapshot }) => { + await withCookBus('plan', (bus) => + runPlan({ specificationId: opts.specificationId, snapshot, outDir: opts.outDir, @@ -193,9 +199,9 @@ if (rawArgs[0] === 'cook') { // Brownfield detection reads the launch cwd (the user's repo); greenfield ignores it. repoDir: project.cwd, bus, - }); - }, - ), + }), + ); + }, ); } else if (rawArgs[0] === 'agent') { const project = resolveBrunchProject(launchCwd); From 4db4aa5374e317df7292df06b31094c4db11e137 Mon Sep 17 00:00:00 2001 From: Kostandin Angjellari Date: Tue, 16 Jun 2026 23:04:45 +0100 Subject: [PATCH 03/32] FE-878: light brigade taste (epic verdict) + serve (run complete) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wires the two remaining kitchen-brigade phases faithfully to the orchestrator-arcs mapping (verifyβ†’taste, shipβ†’serve): - taste lights on the epic-verification verdict (action `epic β†’ …`), not on per-slice `verify ` lines β€” those fire mid-cook and previously lit taste while still cooking. - serve lights on a new `cook-done` event emitted at the end of runCook (after promotion); a halted run never ships, so it never lights serve. phase.test covers both signals + the full prepβ†’cookβ†’tasteβ†’plateβ†’serve walk; check + presenter/cook tests green. Co-Authored-By: Claude --- src/orchestrator/src/cook-cli.ts | 2 ++ src/orchestrator/src/presenter/events.ts | 4 ++- src/orchestrator/src/presenter/format.ts | 3 +++ src/orchestrator/src/presenter/phase.test.ts | 27 ++++++++++++++------ src/orchestrator/src/presenter/phase.ts | 13 +++++++--- 5 files changed, 36 insertions(+), 13 deletions(-) diff --git a/src/orchestrator/src/cook-cli.ts b/src/orchestrator/src/cook-cli.ts index fffb9754..f48a8d16 100644 --- a/src/orchestrator/src/cook-cli.ts +++ b/src/orchestrator/src/cook-cli.ts @@ -612,6 +612,8 @@ export async function runCook(opts: CookOptions, bus: CookBus): Promise { } } + // Run complete (after promotion) β€” lights the brigade's `serve` phase. + bus.emit({ kind: 'cook-done', ok }); recordCookExitStatus(ok); return; } finally { diff --git a/src/orchestrator/src/presenter/events.ts b/src/orchestrator/src/presenter/events.ts index 6be9e662..d00ac3e9 100644 --- a/src/orchestrator/src/presenter/events.ts +++ b/src/orchestrator/src/presenter/events.ts @@ -29,7 +29,9 @@ export type CookEvent = // Updates the in-flight detail of an open activity (e.g. a pi token heartbeat). | { kind: 'activity-progress'; id: string; detail: string } // Closes the activity; the wait is over. - | { kind: 'activity-end'; id: string }; + | { kind: 'activity-end'; id: string } + // The run finished (emitted after promotion); `ok` = completed vs halted. + | { kind: 'cook-done'; ok: boolean }; export interface Presenter { onEvent(event: CookEvent): void; diff --git a/src/orchestrator/src/presenter/format.ts b/src/orchestrator/src/presenter/format.ts index 5dc04935..24274e71 100644 --- a/src/orchestrator/src/presenter/format.ts +++ b/src/orchestrator/src/presenter/format.ts @@ -36,5 +36,8 @@ export function formatCookEvent(event: CookEvent, clock: ElapsedClock): string[] case 'activity-end': // Live-only: the Ink panel reflects these; the existing completion log marks the end. return []; + case 'cook-done': + // Phase signal only (lights `serve`); the run summary already printed. + return []; } } diff --git a/src/orchestrator/src/presenter/phase.test.ts b/src/orchestrator/src/presenter/phase.test.ts index 6c894bb1..75f1b4cd 100644 --- a/src/orchestrator/src/presenter/phase.test.ts +++ b/src/orchestrator/src/presenter/phase.test.ts @@ -13,30 +13,41 @@ describe('nextPhase', () => { expect(nextPhase('prep', { kind: 'cook-start', runStart: 0 })).toBe('cook'); }); - it('does NOT advance to taste on verify/epic actions (they fire mid-cook)', () => { + it('lights taste on the epic verdict but NOT on per-slice verify (mid-cook)', () => { + // Per-slice verify runs during cooking β€” must not light taste. expect(nextPhase('cook', { kind: 'action', icon: 'β–Έ', message: 'verify api-auth' })).toBe('cook'); - expect(nextPhase('cook', { kind: 'action', icon: '●', message: 'epic api-auth β†’ PASS' })).toBe( + expect(nextPhase('cook', { kind: 'action', icon: 'βœ“', message: 'verify tests/x.test.ts' })).toBe( 'cook', ); + // The epic-verification verdict is the real verifyβ†’taste signal. + expect(nextPhase('cook', { kind: 'action', icon: '●', message: 'epic api-auth β†’ PASS' })).toBe( + 'taste', + ); }); - it('advances to plate on a promotion line', () => { + it('advances to plate on a promotion line and to serve on a completed run', () => { expect(nextPhase('cook', { kind: 'line', text: ' βœ“ promoted β†’ cook/abc @ 1234abcd' })).toBe('plate'); + expect(nextPhase('plate', { kind: 'cook-done', ok: true })).toBe('serve'); + }); + + it('does not light serve when the run halted', () => { + expect(nextPhase('cook', { kind: 'cook-done', ok: false })).toBe('cook'); }); it('never regresses to an earlier phase', () => { - expect(nextPhase('plate', { kind: 'cook-start', runStart: 0 })).toBe('plate'); - expect(nextPhase('cook', { kind: 'action', icon: 'β–Έ', message: 'tests slice-2' })).toBe('cook'); + expect(nextPhase('serve', { kind: 'cook-start', runStart: 0 })).toBe('serve'); + expect(nextPhase('taste', { kind: 'action', icon: 'β–Έ', message: 'tests slice-2' })).toBe('taste'); }); - it('walks a full cook run prep β†’ cook β†’ plate (taste stays unlit)', () => { + it('walks a full cook run prep β†’ cook β†’ taste β†’ plate β†’ serve', () => { expect( walk([ { kind: 'cook-start', runStart: 0 }, { kind: 'action', icon: 'β–Έ', message: 'tests slice-1' }, - { kind: 'action', icon: 'β–Έ', message: 'verify api-auth' }, + { kind: 'action', icon: '●', message: 'epic api-auth β†’ PASS' }, { kind: 'line', text: ' βœ“ promoted β†’ cook/abc @ 1234abcd' }, + { kind: 'cook-done', ok: true }, ]), - ).toBe('plate'); + ).toBe('serve'); }); }); diff --git a/src/orchestrator/src/presenter/phase.ts b/src/orchestrator/src/presenter/phase.ts index 62aab16d..232244db 100644 --- a/src/orchestrator/src/presenter/phase.ts +++ b/src/orchestrator/src/presenter/phase.ts @@ -25,12 +25,17 @@ function phaseFor(event: CookEvent): BrigadePhase | undefined { return 'recipe'; case 'cook-start': return 'cook'; - // `taste` is intentionally NOT auto-advanced: per-slice verify actions fire - // during cooking, so keying taste off them lit it while still cooking. It - // needs a real end-of-cook signal (a later slice); for now it stays unlit - // until the run passes it on the way to `plate`. + case 'action': + // verifyβ†’taste fires on the epic-verification verdict (`epic β†’ …`), + // NOT on per-slice `verify ` lines β€” those run mid-cook and would + // light taste while still cooking. + return /^epic\b/.test(event.message) ? 'taste' : undefined; case 'line': return event.text.includes('promoted') ? 'plate' : undefined; + case 'cook-done': + // shipβ†’serve: the run completed (emitted after promotion). A halted run + // does not ship, so it never lights serve. + return event.ok ? 'serve' : undefined; default: return undefined; } From b0c06f0c2aa7b68aa702d3bd9b3a1763b8eee211 Mon Sep 17 00:00:00 2001 From: Kostandin Angjellari Date: Tue, 16 Jun 2026 23:14:07 +0100 Subject: [PATCH 04/32] FE-878: stream the agent's latest line as the wait heartbeat (Option A) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Each cook pi session was a black box in the pending panel β€” just a KB count. runPi already subscribes to the session's text stream; instead of bytes, surface the agent's latest non-empty line (tail-truncated, throttled every 2 KB) as the activity-progress detail, so a wait reads as live work ("agent writing tests Β· …adds the RefreshToken guard") rather than "still going". Kept headless createAgentSession β€” no pi InteractiveMode, no new pi API: pi's tool-call events come via an extension hook (on('tool_call')), not the subscribe stream, so a richer "editing / running " heartbeat is a separate follow-up that needs the extension-registration path verified. check + pi-actions/presenter tests green. Co-Authored-By: Claude --- src/orchestrator/src/pi-actions.ts | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/src/orchestrator/src/pi-actions.ts b/src/orchestrator/src/pi-actions.ts index 86a6b57e..d6ac545a 100644 --- a/src/orchestrator/src/pi-actions.ts +++ b/src/orchestrator/src/pi-actions.ts @@ -55,6 +55,18 @@ function logVerbose(output: string): void { _emit({ kind: 'verbose', text: output }); } +const HEARTBEAT_MAX = 56; + +/** The agent's most recent non-empty line, tail-truncated for a one-line wait heartbeat. */ +function latestLine(text: string): string { + const lines = text.split('\n'); + for (let i = lines.length - 1; i >= 0; i--) { + const line = lines[i]!.trim(); + if (line) return line.length > HEARTBEAT_MAX ? `…${line.slice(-HEARTBEAT_MAX)}` : line; + } + return ''; +} + /** Bracket a wait so it shows as a live pending activity; always closes. */ async function withActivity(id: string, label: string, fn: () => Promise): Promise { _emit({ kind: 'activity-start', id, label }); @@ -223,11 +235,14 @@ async function runPi( } captured += delta; capturedBytes += deltaBytes; - // Throttled heartbeat β€” every 2 KB β€” so the spinner shows progress, not churn. + // Throttled heartbeat β€” every 2 KB β€” surface what the agent is currently + // saying (its latest line) instead of a raw byte count, so the wait reads + // as live work, not just "still going". const kb = Math.floor(capturedBytes / 1024); if (kb >= heartbeatKb + 2) { heartbeatKb = kb; - _emit({ kind: 'activity-progress', id: opts.label, detail: `${kb} KB` }); + const snippet = latestLine(captured); + if (snippet) _emit({ kind: 'activity-progress', id: opts.label, detail: snippet }); } } }); From eb935377041b3a7006e696a42d6a96f36612baae Mon Sep 17 00:00:00 2001 From: Kostandin Angjellari Date: Tue, 16 Jun 2026 23:31:01 +0100 Subject: [PATCH 05/32] =?UTF-8?q?FE-878:=20per-tool/per-file=20heartbeat?= =?UTF-8?q?=20=E2=80=94=20instrument=20pi's=20coding=20tools?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Richer "what the agent is doing" in the pending panel (the spike's Option A, full tier): instead of only the agent's latest line, show the tool calls β€” "edit src/auth/token.ts", "bash bun test", "grep RefreshToken". pi exposes no tool-call hook on session.subscribe (text/lifecycle only), so buildSessionOptions now supplies the built-in tools itself via customTools + noTools:'builtin': each createXToolDefinition(cwd) is wrapped to emit a label from its params, then delegates unchanged. The builders bake in the real config (withFileMutationQueue, truncation defaults), so behavior is preserved β€” confirmed in pi's edit.js. Observation is fail-safe (emit in try/catch). toolLabel + instrumentToolDefinition are pure/unit-tested (label mapping; wrap delegates same args + result; observer error can't break a tool call). Caveat: the customTools/noTools runtime wiring isn't covered by tests (they stub createSession, bypassing buildSessionOptions) β€” needs a real cook run to confirm the agent receives the instrumented tools and they emit live. check + pi-actions tests green. Co-Authored-By: Claude --- src/orchestrator/src/pi-actions.test.ts | 52 +++++++++++++++ src/orchestrator/src/pi-actions.ts | 87 ++++++++++++++++++++++++- 2 files changed, 137 insertions(+), 2 deletions(-) diff --git a/src/orchestrator/src/pi-actions.test.ts b/src/orchestrator/src/pi-actions.test.ts index acc3325d..ed6c9702 100644 --- a/src/orchestrator/src/pi-actions.test.ts +++ b/src/orchestrator/src/pi-actions.test.ts @@ -8,9 +8,11 @@ import { afterEach, describe, expect, it } from 'vitest'; import { createPiActions, epicVerifyTask, + instrumentToolDefinition, runPi, type SessionFactory, sliceTestTask, + toolLabel, toolsForAction, } from './pi-actions.js'; import type { CookEvent } from './presenter/events.js'; @@ -774,3 +776,53 @@ describe('runPi β€” real LLM self-containment smoke', () => { 120_000, ); }); + +describe('toolLabel β€” what the agent is doing', () => { + it('labels file tools by path, bash by command, grep/find by pattern', () => { + expect(toolLabel('edit', { path: 'src/auth/token.ts' })).toBe('edit src/auth/token.ts'); + expect(toolLabel('write', { path: 'tests/x.test.ts' })).toBe('write tests/x.test.ts'); + expect(toolLabel('bash', { command: 'bun test' })).toBe('bash bun test'); + expect(toolLabel('grep', { pattern: 'RefreshToken' })).toBe('grep RefreshToken'); + }); + + it('falls back to the bare tool name when no recognized target is present', () => { + expect(toolLabel('read', {})).toBe('read'); + expect(toolLabel('bash', undefined)).toBe('bash'); + }); + + it('truncates long labels with an ellipsis', () => { + const long = toolLabel('edit', { path: 'a/'.repeat(60) }); + expect(long.endsWith('…')).toBe(true); + expect(long.length).toBeLessThanOrEqual(56); + }); +}); + +describe('instrumentToolDefinition β€” observe then delegate', () => { + function fakeTool(name: string, run: (...args: unknown[]) => unknown) { + return { name, execute: run } as unknown as Parameters[0]; + } + + it('emits a label from the params, then delegates with the same args and result', () => { + const seen: unknown[] = []; + const labels: string[] = []; + const def = fakeTool('edit', (...args) => { + seen.push(...args); + return 'tool-result'; + }); + + instrumentToolDefinition(def, (label) => labels.push(label)); + const out = def.execute('call-1', { path: 'src/a.ts' }, undefined, undefined, {} as never); + + expect(labels).toEqual(['edit src/a.ts']); + expect(out).toBe('tool-result'); // delegation result preserved + expect(seen).toEqual(['call-1', { path: 'src/a.ts' }, undefined, undefined, {}]); // same args + }); + + it('never lets an observation error break the tool call', () => { + const def = fakeTool('bash', () => 'ok'); + instrumentToolDefinition(def, () => { + throw new Error('observer boom'); + }); + expect(def.execute('id', { command: 'echo hi' }, undefined, undefined, {} as never)).toBe('ok'); + }); +}); diff --git a/src/orchestrator/src/pi-actions.ts b/src/orchestrator/src/pi-actions.ts index d6ac545a..db783c21 100644 --- a/src/orchestrator/src/pi-actions.ts +++ b/src/orchestrator/src/pi-actions.ts @@ -5,12 +5,20 @@ import { fileURLToPath } from 'node:url'; import { AuthStorage, - type CreateAgentSessionOptions, createAgentSession, + createBashToolDefinition, + createEditToolDefinition, + createFindToolDefinition, + createGrepToolDefinition, + createLsToolDefinition, + createReadToolDefinition, + createWriteToolDefinition, + type CreateAgentSessionOptions, DefaultResourceLoader, ModelRegistry, SessionManager, SettingsManager, + type ToolDefinition, } from '@earendil-works/pi-coding-agent'; import { buildProbeSpec, runProbe } from './app-probe.js'; @@ -67,6 +75,67 @@ function latestLine(text: string): string { return ''; } +// --------------------------------------------------------------------------- +// Tool-call observability β€” show what the agent is *doing* (editing X, running +// bash, reading Y), not just what it's saying. We can't observe tool calls via +// session.subscribe (that stream is text/lifecycle only), so we supply the +// built-in tools ourselves and wrap their execute to emit a heartbeat. The +// createXToolDefinition builders bake in the real config (mutation queue, +// truncation defaults), so wrapping + delegating preserves behavior exactly. +// --------------------------------------------------------------------------- + +// Inferred so each builder keeps its own tool-schema generic; the heterogeneous +// list is erased to the base ToolDefinition at the single wrap point below. +const TOOL_DEF_BUILDERS = { + read: createReadToolDefinition, + write: createWriteToolDefinition, + edit: createEditToolDefinition, + bash: createBashToolDefinition, + grep: createGrepToolDefinition, + find: createFindToolDefinition, + ls: createLsToolDefinition, +} as const; + +/** A one-line "what the agent is doing" label from a tool name + its params. */ +export function toolLabel(name: string, params: unknown): string { + const p = (params && typeof params === 'object' ? params : {}) as Record; + const target = [p.path, p.command, p.pattern].find( + (v): v is string => typeof v === 'string' && v.length > 0, + ); + const label = target ? `${name} ${target}` : name; + return label.length > HEARTBEAT_MAX ? `${label.slice(0, HEARTBEAT_MAX - 1)}…` : label; +} + +/** Wrap a tool definition's execute to emit a heartbeat, then delegate unchanged. */ +export function instrumentToolDefinition( + def: ToolDefinition, + onUse: (label: string) => void, +): ToolDefinition { + const original = def.execute.bind(def); + def.execute = ((...args: Parameters) => { + // Observation must never break a tool call. + try { + onUse(toolLabel(def.name, args[1])); + } catch { + /* ignore */ + } + return original(...args); + }) as typeof def.execute; + return def; +} + +function buildInstrumentedTools( + names: string[], + cwd: string, + onUse: (label: string) => void, +): ToolDefinition[] { + return names.flatMap((name) => { + const build = TOOL_DEF_BUILDERS[name as keyof typeof TOOL_DEF_BUILDERS]; + if (!build) return []; + return [instrumentToolDefinition(build(cwd) as ToolDefinition, onUse)]; + }); +} + /** Bracket a wait so it shows as a live pending activity; always closes. */ async function withActivity(id: string, label: string, fn: () => Promise): Promise { _emit({ kind: 'activity-start', id, label }); @@ -151,6 +220,18 @@ async function buildSessionOptions(opts: RunPiOpts, isolatedDir: string): Promis }); await resourceLoader.reload(); + // Supply the built-in tools ourselves (instrumented), instead of the `tools` + // name allowlist, so each tool call emits a "what the agent is doing" + // heartbeat into the current wait. `noTools:'builtin'` drops the default + // read/bash/edit/write so they aren't double-registered. + const toolNames = opts.tools + .split(',') + .map((t) => t.trim()) + .filter(Boolean); + const customTools = buildInstrumentedTools(toolNames, opts.sandboxDir, (label) => { + _emit({ kind: 'activity-progress', id: opts.label, detail: label }); + }); + return { cwd: opts.sandboxDir, agentDir: isolatedDir, @@ -158,7 +239,9 @@ async function buildSessionOptions(opts: RunPiOpts, isolatedDir: string): Promis authStorage, modelRegistry, resourceLoader, - tools: opts.tools.split(','), + noTools: 'builtin', + tools: toolNames, + customTools, sessionManager: SessionManager.inMemory(opts.sandboxDir), settingsManager: SettingsManager.inMemory({ compaction: { enabled: false } }), }; From 320bff495f81f6763b74dea4a60f214532a2d3ef Mon Sep 17 00:00:00 2001 From: Kostandin Angjellari Date: Tue, 16 Jun 2026 15:14:17 +0100 Subject: [PATCH 06/32] FE-879: lazy per-slice cook worktrees + shared node_modules MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Brownfield cook provisioned every slice's git worktree eagerly in wireHandlers β€” N `git worktree add` + N recursive node_modules CoW copies paid synchronously at startup before any slice fired. - Move slice-worktree creation into resolveSliceCwd via idempotent ensureSliceWorktree, so a slice's worktree is materialized on first fire. A run touching 2 of 8 slices pays for 2 worktrees, not 8. Synchronous provisioning serializes concurrent fires on the JS thread, so parallel-policy worktree adds never overlap. - Symlink each slice's node_modules to the parent worktree's single copy instead of CoW-copying per slice (SHAREABLE_TOP_LEVEL_ENTRIES). walkFiles already skips symlinks, so the shared tree is never re-walked during dep seeding, merge, or promotion. Other gitignored dirs still copy per slice. Correctness-neutral: same worktrees/branches, just lazy; deps resolve through the symlink. npm run verify green; adds symlink + idempotency unit tests. πŸ€– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- src/orchestrator/src/cow-copy.ts | 19 ++++- .../src/epic-sandbox-merge.test.ts | 72 +++++++++++++++++-- src/orchestrator/src/epic-sandbox-merge.ts | 38 +++++++++- src/orchestrator/src/net-compiler.ts | 46 +++++------- 4 files changed, 137 insertions(+), 38 deletions(-) diff --git a/src/orchestrator/src/cow-copy.ts b/src/orchestrator/src/cow-copy.ts index bbd90104..5b19b22b 100644 --- a/src/orchestrator/src/cow-copy.ts +++ b/src/orchestrator/src/cow-copy.ts @@ -1,5 +1,5 @@ import { spawnSync } from 'node:child_process'; -import { cpSync, existsSync, readdirSync } from 'node:fs'; +import { cpSync, existsSync, readdirSync, symlinkSync } from 'node:fs'; import { join, resolve } from 'node:path'; /** @@ -23,16 +23,24 @@ export function cowCopy(src: string, dest: string): void { /** Top-level names skipped when CoW-copying into cook sandboxes. */ export const COW_COPY_DEFAULT_EXCLUDE = new Set(['.git', '.brunch']); +const NO_SYMLINKS: ReadonlySet = new Set(); + /** - * CoW-copy top-level entries from `sourceDir` that are absent in `destDir` + * Provision top-level entries from `sourceDir` that are absent in `destDir` * (untracked/gitignored dirs like `node_modules/`, `dist/`). Skips names in * `exclude` and entries already present in the destination (typically tracked * files materialized by `git worktree add`). + * + * Names in `symlink` are linked to the source entry instead of copied β€” used to + * share a single read-only `node_modules/` across slice sandboxes rather than + * paying a CoW copy per slice. Everything else is CoW-copied (lazy on APFS / + * reflink filesystems, deep copy otherwise). */ export function copyMissingTopLevelEntries( sourceDir: string, destDir: string, exclude: ReadonlySet = COW_COPY_DEFAULT_EXCLUDE, + symlink: ReadonlySet = NO_SYMLINKS, ): void { const source = resolve(sourceDir); const dest = resolve(destDir); @@ -40,6 +48,11 @@ export function copyMissingTopLevelEntries( if (exclude.has(entry)) continue; const destPath = join(dest, entry); if (existsSync(destPath)) continue; - cowCopy(join(source, entry), destPath); + const sourcePath = join(source, entry); + if (symlink.has(entry)) { + symlinkSync(sourcePath, destPath); + } else { + cowCopy(sourcePath, destPath); + } } } diff --git a/src/orchestrator/src/epic-sandbox-merge.test.ts b/src/orchestrator/src/epic-sandbox-merge.test.ts index 14cdb91c..455055de 100644 --- a/src/orchestrator/src/epic-sandbox-merge.test.ts +++ b/src/orchestrator/src/epic-sandbox-merge.test.ts @@ -1,9 +1,11 @@ import { execFileSync } from 'node:child_process'; import { existsSync, + lstatSync, mkdirSync, mkdtempSync, readFileSync, + readlinkSync, rmSync, symlinkSync, writeFileSync, @@ -14,6 +16,7 @@ import { dirname, join } from 'node:path'; import { afterEach, describe, expect, it } from 'vitest'; import { + ensureSliceWorktree, epicIdsForEpicVerifyMerge, mergeCompletedSlicesIntoTree, mergeSlicesIntoEpicSandbox, @@ -274,19 +277,31 @@ describe('seedSliceFromParentWorktree', () => { expect(readFileSync(join(sliceDir, 'src/a.ts'), 'utf8')).toBe('export const a = 1;\n'); }); - it('untracked content arrives via CoW copy from the parent', () => { + it('untracked content (other than node_modules) arrives via CoW copy from the parent', () => { const { parent, addUntracked } = makeGitParentWorktree('r2'); - // Simulate node_modules / generated artifacts present in the parent - // worktree but NOT tracked by git. - addUntracked('node_modules/dep/index.js', 'module.exports = 1;\n'); + // Simulate generated artifacts present in the parent worktree but NOT + // tracked by git. `dist/` is copied (a slice may rebuild it independently). addUntracked('dist/bundle.js', 'console.log("bundle");\n'); const sliceDir = seedSliceFromParentWorktree(parent, 'only', singleSlicePlan, 'r2'); - expect(readFileSync(join(sliceDir, 'node_modules/dep/index.js'), 'utf8')).toBe('module.exports = 1;\n'); + expect(lstatSync(join(sliceDir, 'dist')).isSymbolicLink()).toBe(false); expect(readFileSync(join(sliceDir, 'dist/bundle.js'), 'utf8')).toBe('console.log("bundle");\n'); }); + it('shares node_modules via a symlink to the parent rather than copying it', () => { + const { parent, addUntracked } = makeGitParentWorktree('r2b'); + addUntracked('node_modules/dep/index.js', 'module.exports = 1;\n'); + + const sliceDir = seedSliceFromParentWorktree(parent, 'only', singleSlicePlan, 'r2b'); + + const linkPath = join(sliceDir, 'node_modules'); + expect(lstatSync(linkPath).isSymbolicLink()).toBe(true); + expect(readlinkSync(linkPath)).toBe(join(parent, 'node_modules')); + // Resolves transparently for pi-actions reading deps through the link. + expect(readFileSync(join(linkPath, 'dep/index.js'), 'utf8')).toBe('module.exports = 1;\n'); + }); + it('slice worktree is checked out on a slice-level cook branch', () => { const { parent } = makeGitParentWorktree('r3'); @@ -343,6 +358,53 @@ describe('seedSliceFromParentWorktree', () => { ); }); +describe('ensureSliceWorktree', () => { + const dirs: string[] = []; + afterEach(() => { + for (const d of dirs) rmSync(d, { recursive: true, force: true }); + dirs.length = 0; + }); + + const singleSlicePlan: Plan = { + mode: 'brownfield', + epics: [{ id: 'e1', summary: '', depends_on: [], verification: [] }], + slices: [{ id: 'only', epic_id: 'e1', definition: '', depends_on: [], verification: [] }], + }; + + function makeGitParentWorktree(runId: string): string { + const source = mkdtempSync(join(tmpdir(), 'cook-source-')); + dirs.push(source); + execFileSync('git', ['init', '-q', '-b', 'main'], { cwd: source }); + execFileSync('git', ['config', 'user.email', 'test@example.com'], { cwd: source }); + execFileSync('git', ['config', 'user.name', 'Test'], { cwd: source }); + writeFileSync(join(source, 'README.md'), '# project\n'); + execFileSync('git', ['add', '.'], { cwd: source }); + execFileSync('git', ['commit', '-q', '-m', 'initial'], { cwd: source }); + + const runDir = mkdtempSync(join(tmpdir(), 'cook-run-')); + dirs.push(runDir); + const parent = join(runDir, 'worktree'); + execFileSync('git', ['worktree', 'add', '-q', '-b', `cook/${runId}`, parent, 'HEAD'], { cwd: source }); + return parent; + } + + it( + 'creates the slice worktree on first call and is a no-op on repeat (rework-safe)', + () => { + const parent = makeGitParentWorktree('r1'); + + const first = ensureSliceWorktree(parent, 'only', singleSlicePlan, 'r1'); + expect(existsSync(join(first, 'README.md'))).toBe(true); + + // Second call must not throw (seedSliceFromParentWorktree would, via its + // path-availability assertion) and must return the same dir. + const second = ensureSliceWorktree(parent, 'only', singleSlicePlan, 'r1'); + expect(second).toBe(first); + }, + GIT_TEST_TIMEOUT_MS, + ); +}); + describe('mergeSlicesIntoEpicSandbox', () => { const dirs: string[] = []; afterEach(() => { diff --git a/src/orchestrator/src/epic-sandbox-merge.ts b/src/orchestrator/src/epic-sandbox-merge.ts index 9bd2afb0..7e5e1f31 100644 --- a/src/orchestrator/src/epic-sandbox-merge.ts +++ b/src/orchestrator/src/epic-sandbox-merge.ts @@ -251,15 +251,47 @@ export function seedSliceFromParentWorktree( ); // 2. CoW-copy whatever's in the parent worktree but NOT in the slice - // worktree yet β€” i.e. untracked / gitignored content (`node_modules/`, - // `dist/`, etc.) that pi-actions might need at runtime. + // worktree yet β€” i.e. untracked / gitignored content (`dist/`, etc.) that + // pi-actions might need at runtime. `node_modules/` is symlinked to the + // parent's single copy instead of duplicated per slice (see + // SHAREABLE_TOP_LEVEL_ENTRIES); `walkFiles` skips symlinks, so the shared + // tree is never re-walked during dependency seeding, merge, or promotion. const excludedNames = new Set(['.git', '.brunch', EPIC_MERGE_SEGMENT]); for (const s of plan.slices) excludedNames.add(s.id); - copyMissingTopLevelEntries(parentSandboxDir, sliceDir, excludedNames); + copyMissingTopLevelEntries(parentSandboxDir, sliceDir, excludedNames, SHAREABLE_TOP_LEVEL_ENTRIES); return sliceDir; } +/** + * Top-level gitignored entries shared across slice sandboxes via symlink rather + * than CoW-copied per slice. `node_modules/` is install output that pi-actions + * read (resolve deps, run tests/build) but do not author, so a single + * parent-owned copy linked into each slice removes N-1 redundant tree copies. + * Build caches under it (`.cache`, `.vite`) become shared too β€” acceptable for + * cook's transient runs; revisit if a tool needs per-slice write isolation. + */ +const SHAREABLE_TOP_LEVEL_ENTRIES: ReadonlySet = new Set(['node_modules']); + +/** + * Idempotent codebase-mode slice worktree provisioning: create the git worktree + * on first call, no-op if it already exists. Called from `resolveSliceCwd` on + * every fire (action, run-tests, assess) and across reworks, so it must tolerate + * repeats. Provisioning is synchronous (`execFileSync`), so concurrent fires of + * distinct slices under the parallel policy serialize on the JS thread β€” no two + * `git worktree add` invocations against the shared object store overlap. + */ +export function ensureSliceWorktree( + parentSandboxDir: string, + sliceId: string, + plan: Plan, + runId: string, +): string { + const sliceDir = resolveSliceWorktreeDir(parentSandboxDir, sliceId); + if (existsSync(sliceDir)) return sliceDir; + return seedSliceFromParentWorktree(parentSandboxDir, sliceId, plan, runId); +} + /** Copy completed dependency slice worktrees into `slice`'s sandbox (plan order). */ export function seedSliceSandboxFromDeps( parentSandboxDir: string, diff --git a/src/orchestrator/src/net-compiler.ts b/src/orchestrator/src/net-compiler.ts index ef8e11cb..075c0579 100644 --- a/src/orchestrator/src/net-compiler.ts +++ b/src/orchestrator/src/net-compiler.ts @@ -5,12 +5,9 @@ // 3. compilePlan(input, ctx) β†’ PetriNet (convenience wrapper) // --------------------------------------------------------------------------- -import { mkdirSync } from 'node:fs'; - import { + ensureSliceWorktree, mergeSlicesIntoEpicSandbox, - resolveSliceWorktreeDir, - seedSliceFromParentWorktree, seedSliceSandboxFromDeps, sliceIdsForEpicVerifyMerge, } from './epic-sandbox-merge.js'; @@ -556,35 +553,30 @@ export function wireHandlers(blueprint: NetBlueprint, input: OrchestratorInput, net.addPlace(place); } - // Runtime filesystem preparation lives in wireHandlers so every action/test - // cwd exists before any transition can fire. This is the one intentional side - // effect in the wiring pass; a future prepareRunFilesystem step can split it - // out if more provisioning responsibilities accumulate. - // Per-slice dirs are parallel-safe; dependency seeding happens at fire time. - // In codebase mode, seed each slice dir with the parent worktree's contents - // (the source repo's HEAD via `git worktree add`) so pi-actions can modify - // existing code instead of writing into an empty dir. + // Per-slice sandboxes are provisioned lazily at fire time (in resolveSliceCwd), + // not eagerly here: a run that touches 2 of 8 slices pays for 2 worktrees, not + // 8. Each slice dir is an independent root, so concurrent fires of distinct + // slices never contend; repeat fires of the same slice (rework) are idempotent. // 'shared' (serial greenfield): all slices accrete into the run sandbox. // 'per-slice': each slice gets its own git worktree (codebase) or plain dir // (greenfield parallel), merged into __epic__ for verification. + // Fail fast on the missing-runId precondition rather than at first fire. const sliceLayout = input.sliceLayout ?? 'per-slice'; - if (input.sandboxMode === 'codebase') { - if (!input.runId) { - throw new Error('codebase mode requires input.runId (used to name slice-level git branches)'); - } - for (const slice of plan.slices) { - seedSliceFromParentWorktree(input.sandboxDir, slice.id, plan, input.runId); - } - } else if (sliceLayout === 'per-slice') { - for (const slice of plan.slices) { - mkdirSync(resolveSliceWorktreeDir(input.sandboxDir, slice.id), { recursive: true }); - } + const { runId } = input; + if (input.sandboxMode === 'codebase' && !runId) { + throw new Error('codebase mode requires input.runId (used to name slice-level git branches)'); } - const resolveSliceCwd = (slice: Slice): string => - sliceLayout === 'shared' - ? input.sandboxDir - : seedSliceSandboxFromDeps(input.sandboxDir, plan, slice, { preserveExisting: true }); + const resolveSliceCwd = (slice: Slice): string => { + if (sliceLayout === 'shared') return input.sandboxDir; + // Codebase mode: materialize the slice's git worktree (HEAD checkout + + // symlinked node_modules) on first touch so pi-actions modify existing code + // rather than an empty dir; greenfield per-slice gets a plain dir below. + if (input.sandboxMode === 'codebase') { + ensureSliceWorktree(input.sandboxDir, slice.id, plan, runId!); + } + return seedSliceSandboxFromDeps(input.sandboxDir, plan, slice, { preserveExisting: true }); + }; // Register transitions with wired fire handlers for (const skel of blueprint.transitions) { From 5404e18226e7a8edcf8319414bc57f294280e8dd Mon Sep 17 00:00:00 2001 From: Kostandin Angjellari Date: Tue, 16 Jun 2026 23:55:54 +0100 Subject: [PATCH 07/32] FE-879: update default Anthropic model --- src/orchestrator/src/plan-architect.test.ts | 6 +++++- src/orchestrator/src/plan-architect.ts | 4 +++- src/server/interview.ts | 2 +- src/server/secondary-chat-route.ts | 2 +- 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/orchestrator/src/plan-architect.test.ts b/src/orchestrator/src/plan-architect.test.ts index b55a2df1..feea0de6 100644 --- a/src/orchestrator/src/plan-architect.test.ts +++ b/src/orchestrator/src/plan-architect.test.ts @@ -6,7 +6,7 @@ import { describe, expect, it } from 'vitest'; -import { architectDraftSchema, architectPlan } from './plan-architect.js'; +import { architectDraftSchema, architectPlan, DEFAULT_ARCHITECT_MODEL_ID } from './plan-architect.js'; import type { Plan } from './types.js'; const projected: Plan = { @@ -49,6 +49,10 @@ const wellFormed = { }; describe('architectPlan', () => { + it('defaults the production architect to the current Sonnet model', () => { + expect(DEFAULT_ARCHITECT_MODEL_ID).toBe('claude-sonnet-4-6'); + }); + it('parses a well-formed authored draft', async () => { const result = await architectPlan(projected, async () => wellFormed); expect(result.status).toBe('succeeded'); diff --git a/src/orchestrator/src/plan-architect.ts b/src/orchestrator/src/plan-architect.ts index f7082d48..cbfe77bc 100644 --- a/src/orchestrator/src/plan-architect.ts +++ b/src/orchestrator/src/plan-architect.ts @@ -94,6 +94,8 @@ export type ArchitectResult = export type RunModel = (prompt: string) => Promise; +export const DEFAULT_ARCHITECT_MODEL_ID = 'claude-sonnet-4-6'; + const EMPTY_DRAFT: ArchitectDraft = { epics: [], slices: [], nonBuildableRequirementIds: [] }; /** @@ -211,7 +213,7 @@ function errorMessage(error: unknown): string { */ export const defaultArchitectRunModel: RunModel = async (prompt) => { const result = await generateText({ - model: anthropic(process.env.SPEC_TO_COOK_PLAN_MODEL || 'claude-sonnet-4-20250514'), + model: anthropic(process.env.SPEC_TO_COOK_PLAN_MODEL || DEFAULT_ARCHITECT_MODEL_ID), maxOutputTokens: 4096, prompt, output: Output.object({ schema: architectDraftSchema }), diff --git a/src/server/interview.ts b/src/server/interview.ts index 63a8bafd..9b5234c1 100644 --- a/src/server/interview.ts +++ b/src/server/interview.ts @@ -320,7 +320,7 @@ export function createInterviewerAgent( const instructions = getInterviewerInstructions(phase, options); return new ToolLoopAgent({ - model: anthropic(process.env.ANTHROPIC_MODEL || 'claude-sonnet-4-20250514'), + model: anthropic(process.env.ANTHROPIC_MODEL || 'claude-sonnet-4-6'), instructions, tools, providerOptions: { diff --git a/src/server/secondary-chat-route.ts b/src/server/secondary-chat-route.ts index 5fd3a992..ef94a27c 100644 --- a/src/server/secondary-chat-route.ts +++ b/src/server/secondary-chat-route.ts @@ -428,7 +428,7 @@ export async function handleSecondaryChatMessageRequest(db: DB, req: Request, re const stream = createUIMessageStream({ async execute({ writer }) { const result = streamText({ - model: anthropic(process.env.ANTHROPIC_MODEL || 'claude-sonnet-4-20250514'), + model: anthropic(process.env.ANTHROPIC_MODEL || 'claude-sonnet-4-6'), system, messages: messages.map((message) => ({ role: message.role, content: message.content })), tools, From 2af79bdb903381b7d3133537b0418811c6a96f9d Mon Sep 17 00:00:00 2001 From: Kostandin Angjellari Date: Wed, 17 Jun 2026 00:01:14 +0100 Subject: [PATCH 08/32] FE-864: use Opus 4.6 for orchestration defaults Co-authored-by: Cursor --- CONTRIBUTING.md | 2 +- src/orchestrator/src/pi-actions.test.ts | 4 ++-- src/orchestrator/src/pi-actions.ts | 6 +++--- src/orchestrator/src/plan-architect.test.ts | 4 ++-- src/orchestrator/src/plan-architect.ts | 2 +- src/server/interview.ts | 2 +- src/server/secondary-chat-route.ts | 2 +- 7 files changed, 11 insertions(+), 11 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 110e20fa..3a966077 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -31,7 +31,7 @@ Open http://localhost:5173. | Variable | Required | Description | |---|---|---| | `ANTHROPIC_API_KEY` | Yes | Anthropic API key | -| `ANTHROPIC_MODEL` | No | Interviewer model (default: `claude-sonnet-4-20250514`) | +| `ANTHROPIC_MODEL` | No | Interviewer model (default: `claude-opus-4-6`) | | `OBSERVER_MODEL` | No | Observer model (default: `claude-haiku-4-5-20251001`) | | `BRUNCH_DB` | No | Override the default project-local SQLite path for dev workflows | | `BRUNCH_PORT` | No | Backend port override | diff --git a/src/orchestrator/src/pi-actions.test.ts b/src/orchestrator/src/pi-actions.test.ts index ed6c9702..429a400a 100644 --- a/src/orchestrator/src/pi-actions.test.ts +++ b/src/orchestrator/src/pi-actions.test.ts @@ -510,7 +510,7 @@ function makeFakeSession(behavior: { emit?: string | readonly unknown[]; hang?: describe('runPi drives an in-process pi session (no subprocess)', () => { const baseOpts = (sandboxDir: string, tools: string) => ({ label: 'tests slice-1', - model: 'claude-sonnet-4-6', + model: 'claude-opus-4-6', promptFile: join(promptsDir, 'test-writer.md'), task: 'do the thing', sandboxDir, @@ -762,7 +762,7 @@ describe('runPi β€” real LLM self-containment smoke', () => { try { await runPi({ label: 'smoke', - model: 'claude-sonnet-4-6', + model: 'claude-opus-4-6', promptFile, task: 'Use the write tool to create a file named hello.txt in the current directory containing exactly: BRUNCH_SELF_CONTAINED', sandboxDir, diff --git a/src/orchestrator/src/pi-actions.ts b/src/orchestrator/src/pi-actions.ts index db783c21..73244620 100644 --- a/src/orchestrator/src/pi-actions.ts +++ b/src/orchestrator/src/pi-actions.ts @@ -449,7 +449,7 @@ export function createPiActions(opts?: { await runPi( { label: `tests ${label}`, - model: 'claude-sonnet-4-6', + model: 'claude-opus-4-6', promptFile: join(promptsDir, 'test-writer.md'), task, sandboxDir: ctx.sandboxDir, @@ -472,7 +472,7 @@ export function createPiActions(opts?: { await runPi( { label: `code ${label}`, - model: 'claude-sonnet-4-6', + model: 'claude-opus-4-6', promptFile: join(promptsDir, 'code-writer.md'), task, sandboxDir: ctx.sandboxDir, @@ -500,7 +500,7 @@ export function createPiActions(opts?: { await runPi( { label: `verify ${ctx.epic.id} (write)`, - model: 'claude-sonnet-4-6', + model: 'claude-opus-4-6', promptFile: join(promptsDir, 'test-writer.md'), task: writeTask, sandboxDir: ctx.sandboxDir, diff --git a/src/orchestrator/src/plan-architect.test.ts b/src/orchestrator/src/plan-architect.test.ts index feea0de6..679202e9 100644 --- a/src/orchestrator/src/plan-architect.test.ts +++ b/src/orchestrator/src/plan-architect.test.ts @@ -49,8 +49,8 @@ const wellFormed = { }; describe('architectPlan', () => { - it('defaults the production architect to the current Sonnet model', () => { - expect(DEFAULT_ARCHITECT_MODEL_ID).toBe('claude-sonnet-4-6'); + it('defaults the production architect to the current Opus model', () => { + expect(DEFAULT_ARCHITECT_MODEL_ID).toBe('claude-opus-4-6'); }); it('parses a well-formed authored draft', async () => { diff --git a/src/orchestrator/src/plan-architect.ts b/src/orchestrator/src/plan-architect.ts index cbfe77bc..463961dc 100644 --- a/src/orchestrator/src/plan-architect.ts +++ b/src/orchestrator/src/plan-architect.ts @@ -94,7 +94,7 @@ export type ArchitectResult = export type RunModel = (prompt: string) => Promise; -export const DEFAULT_ARCHITECT_MODEL_ID = 'claude-sonnet-4-6'; +export const DEFAULT_ARCHITECT_MODEL_ID = 'claude-opus-4-6'; const EMPTY_DRAFT: ArchitectDraft = { epics: [], slices: [], nonBuildableRequirementIds: [] }; diff --git a/src/server/interview.ts b/src/server/interview.ts index 9b5234c1..fcf9254a 100644 --- a/src/server/interview.ts +++ b/src/server/interview.ts @@ -320,7 +320,7 @@ export function createInterviewerAgent( const instructions = getInterviewerInstructions(phase, options); return new ToolLoopAgent({ - model: anthropic(process.env.ANTHROPIC_MODEL || 'claude-sonnet-4-6'), + model: anthropic(process.env.ANTHROPIC_MODEL || 'claude-opus-4-6'), instructions, tools, providerOptions: { diff --git a/src/server/secondary-chat-route.ts b/src/server/secondary-chat-route.ts index ef94a27c..cfb8d1d2 100644 --- a/src/server/secondary-chat-route.ts +++ b/src/server/secondary-chat-route.ts @@ -428,7 +428,7 @@ export async function handleSecondaryChatMessageRequest(db: DB, req: Request, re const stream = createUIMessageStream({ async execute({ writer }) { const result = streamText({ - model: anthropic(process.env.ANTHROPIC_MODEL || 'claude-sonnet-4-6'), + model: anthropic(process.env.ANTHROPIC_MODEL || 'claude-opus-4-6'), system, messages: messages.map((message) => ({ role: message.role, content: message.content })), tools, From ede7a9edd0f7cc59be793aee3e7b77b32cf9c6f6 Mon Sep 17 00:00:00 2001 From: Kostandin Angjellari Date: Wed, 17 Jun 2026 00:26:53 +0100 Subject: [PATCH 09/32] =?UTF-8?q?FE-878:=20structured=20epic=E2=86=92slice?= =?UTF-8?q?=20progress=20grid=20(ln-review=20improvement)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the coarse single-phase view with a live grid that reflects cook's actual shape β€” the highest-value TUI improvement from the review, and it kills the brittle string-matching. - events: run-shape (seeds the grid from the plan, all slices queued) + slice (typed status running|passed|failed + step), emitted from cook-cli + the pi-actions handlers (write-tests/code/evaluate-done) β€” not string-matched logs. - run-store: a slices grid grouped by epic; slice-keyed activity heartbeat (aligned via runPi activityId = slice id) attaches to the slice's row, so "what the agent is doing" shows inline; non-slice waits stay in the pending footer. - ink: SliceGrid renders epic groups with per-slice status icons + the running slice's step/detail + spinner; replaces the flat pending list for slices. Retry counts deferred (a re-running slice just shows running again; latest wins). Live wiring (run-shape/slice from a real cook) is manual-verify, like the heartbeat. check + presenter/pi-actions/cook tests green (126). Co-Authored-By: Claude --- src/orchestrator/src/cook-cli.ts | 6 ++ src/orchestrator/src/pi-actions.test.ts | 67 +++++++++++++++++++ src/orchestrator/src/pi-actions.ts | 20 ++++-- src/orchestrator/src/presenter/events.ts | 7 +- src/orchestrator/src/presenter/format.ts | 4 ++ .../src/presenter/ink/app.test.tsx | 25 +++++++ src/orchestrator/src/presenter/ink/app.tsx | 38 ++++++++++- .../src/presenter/run-store.test.ts | 52 ++++++++++++++ src/orchestrator/src/presenter/run-store.ts | 57 +++++++++++++++- 9 files changed, 268 insertions(+), 8 deletions(-) diff --git a/src/orchestrator/src/cook-cli.ts b/src/orchestrator/src/cook-cli.ts index f48a8d16..2dc88c09 100644 --- a/src/orchestrator/src/cook-cli.ts +++ b/src/orchestrator/src/cook-cli.ts @@ -480,6 +480,12 @@ export async function runCook(opts: CookOptions, bus: CookBus): Promise { // Seed the presenter's elapsed clock; per-action progress carries no // pre-formatted timing β€” the presenter owns it (I136-K). bus.emit({ kind: 'cook-start', runStart }); + // Seed the slice grid up front so queued work is visible before it starts. + bus.emit({ + kind: 'run-shape', + epics: plan.epics.map((e) => ({ id: e.id })), + slices: plan.slices.map((s) => ({ id: s.id, epicId: s.epic_id })), + }); const actions = createPiActions({ verbose: opts.verbose, emit: (event) => bus.emit(event), diff --git a/src/orchestrator/src/pi-actions.test.ts b/src/orchestrator/src/pi-actions.test.ts index 429a400a..060848b0 100644 --- a/src/orchestrator/src/pi-actions.test.ts +++ b/src/orchestrator/src/pi-actions.test.ts @@ -826,3 +826,70 @@ describe('instrumentToolDefinition β€” observe then delegate', () => { expect(def.execute('id', { command: 'echo hi' }, undefined, undefined, {} as never)).toBe('ok'); }); }); + +describe('action handlers emit slice grid events', () => { + const slice: Slice = { + id: 'login', + epic_id: 'api', + definition: 'Login', + depends_on: [], + verification: [{ kind: 'unit-test', target: 'tests/login.test.ts' }], + }; + const epic: Epic = { id: 'api', summary: 'API', depends_on: [], verification: [] }; + const plan: Plan = { mode: 'greenfield', epics: [epic], slices: [slice] }; + const ctx = (): ActionContext => ({ + slice, + epic, + plan, + sandboxDir: '/tmp/unused', + reports: new InMemoryReportSink(), + }); + type SliceEvent = Extract; + const sliceEvents = (events: CookEvent[]) => events.filter((e): e is SliceEvent => e.kind === 'slice'); + + it('evaluate-done emits running(verify) then passed for a DONE verdict', async () => { + const events: CookEvent[] = []; + const actions = createPiActions({ + testRunner: { + async run() { + return { passed: true, output: 'ok' }; + }, + }, + emit: (e) => events.push(e), + }); + await actions['evaluate-done']!(ctx()); + expect(sliceEvents(events).map((s) => [s.id, s.status, s.step])).toEqual([ + ['login', 'running', 'verify'], + ['login', 'passed', undefined], + ]); + }); + + it('evaluate-done emits failed for a NEEDS-WORK verdict', async () => { + const events: CookEvent[] = []; + const actions = createPiActions({ + testRunner: { + async run() { + return { passed: false, output: 'nope' }; + }, + }, + emit: (e) => events.push(e), + }); + await actions['evaluate-done']!(ctx()); + expect(sliceEvents(events).at(-1)).toMatchObject({ status: 'failed' }); + }); + + it('write-tests emits running(tests) keyed by the slice id', async () => { + process.env.ANTHROPIC_API_KEY ??= 'test-key-unused-fake-session'; + const events: CookEvent[] = []; + const fake = makeFakeSession({ emit: 'wrote tests' }); + const createSession = (async () => ({ session: fake.session })) as unknown as SessionFactory; + const actions = createPiActions({ createSession, emit: (e) => events.push(e) }); + await actions['write-tests']!(ctx()); + expect(sliceEvents(events)[0]).toMatchObject({ + id: 'login', + epicId: 'api', + status: 'running', + step: 'tests', + }); + }); +}); diff --git a/src/orchestrator/src/pi-actions.ts b/src/orchestrator/src/pi-actions.ts index 73244620..8ab59d0e 100644 --- a/src/orchestrator/src/pi-actions.ts +++ b/src/orchestrator/src/pi-actions.ts @@ -172,6 +172,9 @@ interface RunPiOpts { task: string; sandboxDir: string; tools: string; + /** Activity id for the live wait/heartbeat. Defaults to `label`; set to the + * slice id so the heartbeat lands on that slice's grid row. */ + activityId?: string; } /** The pi SDK session factory β€” injectable so the drive loop is testable without a model or network. */ @@ -229,7 +232,7 @@ async function buildSessionOptions(opts: RunPiOpts, isolatedDir: string): Promis .map((t) => t.trim()) .filter(Boolean); const customTools = buildInstrumentedTools(toolNames, opts.sandboxDir, (label) => { - _emit({ kind: 'activity-progress', id: opts.label, detail: label }); + _emit({ kind: 'activity-progress', id: opts.activityId ?? opts.label, detail: label }); }); return { @@ -260,8 +263,9 @@ async function runPi( const timeoutMs = deps.timeoutMs ?? PI_TIMEOUT_MS; const maxOutput = deps.maxOutput ?? PI_MAX_OUTPUT; const start = Date.now(); + const activityId = opts.activityId ?? opts.label; // Open a live wait so the (up to 5-minute) agent session isn't dead air. - _emit({ kind: 'activity-start', id: opts.label, label: opts.label }); + _emit({ kind: 'activity-start', id: activityId, label: opts.label }); let heartbeatKb = 0; const isolatedDir = createAgentDir(); @@ -325,7 +329,7 @@ async function runPi( if (kb >= heartbeatKb + 2) { heartbeatKb = kb; const snippet = latestLine(captured); - if (snippet) _emit({ kind: 'activity-progress', id: opts.label, detail: snippet }); + if (snippet) _emit({ kind: 'activity-progress', id: activityId, detail: snippet }); } } }); @@ -342,7 +346,7 @@ async function runPi( cleanupAgentDir(); // Always close the wait β€” even on timeout / overflow / prompt error β€” so // the spinner can never hang. - _emit({ kind: 'activity-end', id: opts.label }); + _emit({ kind: 'activity-end', id: activityId }); } if (timedOut) throw piTimeoutError(timeoutMs); @@ -427,9 +431,10 @@ export function createPiActions(opts?: { return { 'evaluate-done': async (ctx: ActionContext) => { const label = sliceLabel(ctx.slice); + _emit({ kind: 'slice', id: ctx.slice.id, epicId: ctx.epic.id, status: 'running', step: 'verify' }); log('?', `evaluate ${label}`); const { done, failureKind, results } = await withActivity( - `verify ${label}`, + ctx.slice.id, `running tests Β· ${label}`, () => runVerification(ctx.slice.verification, testRunner, ctx.sandboxDir), ); @@ -438,11 +443,13 @@ export function createPiActions(opts?: { log(r.passed ? 'βœ“' : 'βœ—', `verify ${r.target}`); } log(done ? '●' : 'β—‹', `verdict ${label} β†’ ${done ? 'DONE' : 'NEEDS WORK'}`); + _emit({ kind: 'slice', id: ctx.slice.id, epicId: ctx.epic.id, status: done ? 'passed' : 'failed' }); return report(ctx, 'evaluator', 'eval-done', { done, failureKind, results }); }, 'write-tests': async (ctx: ActionContext) => { const label = sliceLabel(ctx.slice); + _emit({ kind: 'slice', id: ctx.slice.id, epicId: ctx.epic.id, status: 'running', step: 'tests' }); log('β–Έ', `tests ${label}`); const task = sliceTestTask(ctx.slice, toolchain); @@ -454,6 +461,7 @@ export function createPiActions(opts?: { task, sandboxDir: ctx.sandboxDir, tools: toolsForAction('write-tests'), + activityId: ctx.slice.id, }, piDeps, ); @@ -466,6 +474,7 @@ export function createPiActions(opts?: { 'write-code': async (ctx: ActionContext) => { const label = sliceLabel(ctx.slice); + _emit({ kind: 'slice', id: ctx.slice.id, epicId: ctx.epic.id, status: 'running', step: 'code' }); log('β–Έ', `code ${label}`); const task = `Write code to make tests pass for slice "${ctx.slice.id}": ${ctx.slice.definition}\nVerification targets: ${ctx.slice.verification.map((v) => `${v.kind}: ${v.target}`).join(', ')}\nImplement the minimum code to make all tests pass.`; @@ -477,6 +486,7 @@ export function createPiActions(opts?: { task, sandboxDir: ctx.sandboxDir, tools: toolsForAction('write-code'), + activityId: ctx.slice.id, }, piDeps, ); diff --git a/src/orchestrator/src/presenter/events.ts b/src/orchestrator/src/presenter/events.ts index d00ac3e9..37f22f13 100644 --- a/src/orchestrator/src/presenter/events.ts +++ b/src/orchestrator/src/presenter/events.ts @@ -31,7 +31,12 @@ export type CookEvent = // Closes the activity; the wait is over. | { kind: 'activity-end'; id: string } // The run finished (emitted after promotion); `ok` = completed vs halted. - | { kind: 'cook-done'; ok: boolean }; + | { kind: 'cook-done'; ok: boolean } + // --- slice grid --- + // Seeds the epicβ†’slice progress grid up front (all slices start queued). + | { kind: 'run-shape'; epics: { id: string }[]; slices: { id: string; epicId: string }[] } + // A slice changed state. `step` is the current sub-action while running. + | { kind: 'slice'; id: string; epicId: string; status: 'running' | 'passed' | 'failed'; step?: string }; export interface Presenter { onEvent(event: CookEvent): void; diff --git a/src/orchestrator/src/presenter/format.ts b/src/orchestrator/src/presenter/format.ts index 24274e71..12ade6d0 100644 --- a/src/orchestrator/src/presenter/format.ts +++ b/src/orchestrator/src/presenter/format.ts @@ -39,5 +39,9 @@ export function formatCookEvent(event: CookEvent, clock: ElapsedClock): string[] case 'cook-done': // Phase signal only (lights `serve`); the run summary already printed. return []; + case 'run-shape': + case 'slice': + // Grid signals only β€” the per-action log lines already narrate plain output. + return []; } } diff --git a/src/orchestrator/src/presenter/ink/app.test.tsx b/src/orchestrator/src/presenter/ink/app.test.tsx index 39cbe2e6..064732f9 100644 --- a/src/orchestrator/src/presenter/ink/app.test.tsx +++ b/src/orchestrator/src/presenter/ink/app.test.tsx @@ -65,3 +65,28 @@ describe('Ink App', () => { expect(frame).not.toContain('agent writing tests'); }); }); + +describe('Ink App β€” slice grid', () => { + it("renders epics with per-slice status, the running slice's step/detail, and queued slices", async () => { + const store = new RunStore('cook', () => 0); + const { lastFrame } = render( 0} />); + + store.push({ + kind: 'run-shape', + epics: [{ id: 'api-auth' }], + slices: [ + { id: 'login', epicId: 'api-auth' }, + { id: 'refresh', epicId: 'api-auth' }, + ], + }); + store.push({ kind: 'slice', id: 'login', epicId: 'api-auth', status: 'passed' }); + store.push({ kind: 'slice', id: 'refresh', epicId: 'api-auth', status: 'running', step: 'code' }); + store.push({ kind: 'activity-progress', id: 'refresh', detail: 'edit src/token.ts' }); + await tick(); + + const frame = lastFrame() ?? ''; + expect(frame).toContain('api-auth'); // epic group header + expect(frame).toContain('βœ“ login'); // passed + expect(frame).toContain('refresh Β· code Β· edit src/token.ts'); // running w/ step + detail + }); +}); diff --git a/src/orchestrator/src/presenter/ink/app.tsx b/src/orchestrator/src/presenter/ink/app.tsx index 32ec921f..24ed83ef 100644 --- a/src/orchestrator/src/presenter/ink/app.tsx +++ b/src/orchestrator/src/presenter/ink/app.tsx @@ -9,7 +9,7 @@ import { useEffect, useMemo, useState, useSyncExternalStore } from 'react'; import { formatElapsed } from '../clock.js'; import { BRIGADE, type BrigadePhase } from '../phase.js'; -import type { PendingActivity, RunStore } from '../run-store.js'; +import type { PendingActivity, RunState, RunStore, SliceRow } from '../run-store.js'; import { BRUNCH_ASCII, BRUNCH_ORANGE } from './wordmark.js'; const SPINNER = ['β ‹', 'β ™', 'β Ή', 'β Έ', 'β Ό', 'β ΄', 'β ¦', 'β §', 'β ‡', '⠏']; @@ -37,6 +37,41 @@ function Brigade({ phase }: { phase: BrigadePhase }) { ); } +const SLICE_ICON = { queued: 'β—‹', running: '', passed: 'βœ“', failed: 'βœ—' } as const; +const SLICE_COLOR = { queued: 'gray', running: 'cyan', passed: 'green', failed: 'red' } as const; + +function sliceTail(row: SliceRow): string { + return [row.step, row.detail].filter(Boolean).join(' Β· '); +} + +function SliceGrid({ epics, slices, frame }: Pick & { frame: string }) { + if (slices.length === 0) return null; + return ( + + {epics.map((epicId) => { + const rows = slices.filter((s) => s.epicId === epicId); + if (rows.length === 0) return null; + return ( + + {epicId} + {rows.map((row) => { + const icon = row.status === 'running' ? frame : SLICE_ICON[row.status]; + const tail = sliceTail(row); + return ( + + {' '} + {icon} {row.id} + {tail ? ` Β· ${tail}` : ''} + + ); + })} + + ); + })} + + ); +} + function PendingPanel({ pending, frame }: { pending: PendingActivity[]; frame: string }) { if (pending.length === 0) return null; // One global timer lives in the footer; rows show only what's running. @@ -97,6 +132,7 @@ export function App({ store, now = () => Date.now() }: { store: RunStore; now?: {state.command} Β· {formatElapsed(now() - state.runStart)} + diff --git a/src/orchestrator/src/presenter/run-store.test.ts b/src/orchestrator/src/presenter/run-store.test.ts index f6ac491b..4ca40f7e 100644 --- a/src/orchestrator/src/presenter/run-store.test.ts +++ b/src/orchestrator/src/presenter/run-store.test.ts @@ -70,3 +70,55 @@ describe('RunStore', () => { expect(store.getSnapshot().lines).toEqual([]); }); }); + +describe('RunStore β€” slice grid', () => { + function seeded(): RunStore { + const store = new RunStore('cook', () => 0); + store.push({ + kind: 'run-shape', + epics: [{ id: 'api' }, { id: 'pay' }], + slices: [ + { id: 'login', epicId: 'api' }, + { id: 'refresh', epicId: 'api' }, + { id: 'charge', epicId: 'pay' }, + ], + }); + return store; + } + + it('seeds every slice as queued, grouped by epic order', () => { + const { epics, slices } = seeded().getSnapshot(); + expect(epics).toEqual(['api', 'pay']); + expect(slices.map((s) => [s.id, s.status])).toEqual([ + ['login', 'queued'], + ['refresh', 'queued'], + ['charge', 'queued'], + ]); + }); + + it('flips a slice to running with a step, then passed (latest wins, detail cleared)', () => { + const store = seeded(); + store.push({ kind: 'slice', id: 'login', epicId: 'api', status: 'running', step: 'tests' }); + store.push({ kind: 'activity-progress', id: 'login', detail: 'edit src/login.ts' }); + let row = store.getSnapshot().slices.find((s) => s.id === 'login')!; + expect(row).toMatchObject({ status: 'running', step: 'tests', detail: 'edit src/login.ts' }); + + store.push({ kind: 'slice', id: 'login', epicId: 'api', status: 'passed' }); + row = store.getSnapshot().slices.find((s) => s.id === 'login')!; + expect(row.status).toBe('passed'); + expect(row.detail).toBeUndefined(); // heartbeat cleared once it stops running + }); + + it('routes slice-keyed activity to the grid, non-slice activity to pending', () => { + const store = seeded(); + // A slice-keyed activity must NOT create a pending entry. + store.push({ kind: 'activity-start', id: 'login', label: 'login' }); + expect(store.getSnapshot().pending).toHaveLength(0); + + // A non-slice wait (promotion) does. + store.push({ kind: 'activity-start', id: 'promote', label: 'promoting β†’ cook/abc' }); + expect(store.getSnapshot().pending.map((p) => p.id)).toEqual(['promote']); + store.push({ kind: 'activity-end', id: 'promote' }); + expect(store.getSnapshot().pending).toHaveLength(0); + }); +}); diff --git a/src/orchestrator/src/presenter/run-store.ts b/src/orchestrator/src/presenter/run-store.ts index ff019d6d..0a9203ef 100644 --- a/src/orchestrator/src/presenter/run-store.ts +++ b/src/orchestrator/src/presenter/run-store.ts @@ -16,11 +16,28 @@ export interface PendingActivity { detail?: string; } +export type SliceStatus = 'queued' | 'running' | 'passed' | 'failed'; + +export interface SliceRow { + id: string; + epicId: string; + status: SliceStatus; + /** Current sub-action while running (tests / code / verify). */ + step?: string; + /** Live heartbeat for the running slice (latest line / tool). */ + detail?: string; +} + export interface RunState { command: string; phase: BrigadePhase; lines: string[]; + /** Non-slice waits (worktree, promotion). Slice waits live on the grid. */ pending: PendingActivity[]; + /** Epic ids in plan order, for grouping the grid. */ + epics: string[]; + /** The slice grid β€” every slice, seeded queued by run-shape. */ + slices: SliceRow[]; /** When the run started, for the single global header timer. */ runStart: number; } @@ -35,21 +52,59 @@ export class RunStore { private readonly now: () => number = () => Date.now(), ) { this.clock = createElapsedClock(now); - this.state = { command, phase: 'prep', lines: [], pending: [], runStart: now() }; + this.state = { command, phase: 'prep', lines: [], pending: [], epics: [], slices: [], runStart: now() }; + } + + private isSlice(id: string): boolean { + return this.state.slices.some((s) => s.id === id); + } + + private updateSlice(id: string, patch: Partial): SliceRow[] { + return this.state.slices.map((s) => (s.id === id ? { ...s, ...patch } : s)); } push(event: CookEvent): void { + if (event.kind === 'run-shape') { + this.commit({ + epics: event.epics.map((e) => e.id), + slices: event.slices.map((s) => ({ id: s.id, epicId: s.epicId, status: 'queued' as const })), + }); + return; + } + if (event.kind === 'slice') { + const running = event.status === 'running'; + this.commit({ + slices: this.updateSlice(event.id, { + status: event.status, + ...(event.step !== undefined ? { step: event.step } : {}), + // clear the live heartbeat once the slice stops running + ...(running ? {} : { detail: undefined }), + }), + }); + return; + } + // Slice-keyed activity detail lands on the grid row; everything else is a + // non-slice wait (worktree, promotion) and shows in the pending footer. if (event.kind === 'activity-start') { + if (this.isSlice(event.id)) return; this.commit({ pending: [...this.state.pending, { id: event.id, label: event.label }] }); return; } if (event.kind === 'activity-progress') { + if (this.isSlice(event.id)) { + this.commit({ slices: this.updateSlice(event.id, { detail: event.detail }) }); + return; + } this.commit({ pending: this.state.pending.map((a) => (a.id === event.id ? { ...a, detail: event.detail } : a)), }); return; } if (event.kind === 'activity-end') { + if (this.isSlice(event.id)) { + this.commit({ slices: this.updateSlice(event.id, { detail: undefined }) }); + return; + } this.commit({ pending: this.state.pending.filter((a) => a.id !== event.id) }); return; } From d3693f53ee2c17624211bf2253d413b1fe748618 Mon Sep 17 00:00:00 2001 From: Kostandin Angjellari Date: Wed, 17 Jun 2026 00:51:41 +0100 Subject: [PATCH 10/32] FE-864: fail slice rows when writer pi aborts Co-authored-by: Cursor --- src/orchestrator/src/pi-actions.test.ts | 25 +++++++++++ src/orchestrator/src/pi-actions.ts | 58 +++++++++++++++---------- 2 files changed, 59 insertions(+), 24 deletions(-) diff --git a/src/orchestrator/src/pi-actions.test.ts b/src/orchestrator/src/pi-actions.test.ts index 060848b0..6b1bbb5a 100644 --- a/src/orchestrator/src/pi-actions.test.ts +++ b/src/orchestrator/src/pi-actions.test.ts @@ -152,6 +152,31 @@ describe('evaluate-done / verify-epic share the runner seam β€” failureKind is v expect(events.filter((e) => e.kind === 'activity-start')).toHaveLength(1); expect(events.filter((e) => e.kind === 'activity-end')).toHaveLength(1); }); + + it('marks writer slices failed when pi throws before reporting', async () => { + process.env.ANTHROPIC_API_KEY ??= 'test-key-unused-fake-session'; + const createSession = (async () => { + throw new Error('session boom'); + }) as unknown as SessionFactory; + + for (const action of ['write-tests', 'write-code'] as const) { + const events: CookEvent[] = []; + const actions = createPiActions({ createSession, emit: (e) => events.push(e) }); + + await expect(actions[action]!(ctx(new InMemoryReportSink()))).rejects.toThrow(/session boom/); + + expect(events.filter((e) => e.kind === 'slice')).toEqual([ + { + kind: 'slice', + id: 'chunk', + epicId: 'utils', + status: 'running', + step: action === 'write-tests' ? 'tests' : 'code', + }, + { kind: 'slice', id: 'chunk', epicId: 'utils', status: 'failed' }, + ]); + } + }); }); describe('verify-epic integration oracle (FE-876) β€” reachability folds into the epic verdict', () => { diff --git a/src/orchestrator/src/pi-actions.ts b/src/orchestrator/src/pi-actions.ts index 8ab59d0e..814d8d26 100644 --- a/src/orchestrator/src/pi-actions.ts +++ b/src/orchestrator/src/pi-actions.ts @@ -453,18 +453,23 @@ export function createPiActions(opts?: { log('β–Έ', `tests ${label}`); const task = sliceTestTask(ctx.slice, toolchain); - await runPi( - { - label: `tests ${label}`, - model: 'claude-opus-4-6', - promptFile: join(promptsDir, 'test-writer.md'), - task, - sandboxDir: ctx.sandboxDir, - tools: toolsForAction('write-tests'), - activityId: ctx.slice.id, - }, - piDeps, - ); + try { + await runPi( + { + label: `tests ${label}`, + model: 'claude-opus-4-6', + promptFile: join(promptsDir, 'test-writer.md'), + task, + sandboxDir: ctx.sandboxDir, + tools: toolsForAction('write-tests'), + activityId: ctx.slice.id, + }, + piDeps, + ); + } catch (err) { + _emit({ kind: 'slice', id: ctx.slice.id, epicId: ctx.epic.id, status: 'failed' }); + throw err; + } return report(ctx, 'test-writer', 'tests-written', { sliceId: ctx.slice.id, @@ -478,18 +483,23 @@ export function createPiActions(opts?: { log('β–Έ', `code ${label}`); const task = `Write code to make tests pass for slice "${ctx.slice.id}": ${ctx.slice.definition}\nVerification targets: ${ctx.slice.verification.map((v) => `${v.kind}: ${v.target}`).join(', ')}\nImplement the minimum code to make all tests pass.`; - await runPi( - { - label: `code ${label}`, - model: 'claude-opus-4-6', - promptFile: join(promptsDir, 'code-writer.md'), - task, - sandboxDir: ctx.sandboxDir, - tools: toolsForAction('write-code'), - activityId: ctx.slice.id, - }, - piDeps, - ); + try { + await runPi( + { + label: `code ${label}`, + model: 'claude-opus-4-6', + promptFile: join(promptsDir, 'code-writer.md'), + task, + sandboxDir: ctx.sandboxDir, + tools: toolsForAction('write-code'), + activityId: ctx.slice.id, + }, + piDeps, + ); + } catch (err) { + _emit({ kind: 'slice', id: ctx.slice.id, epicId: ctx.epic.id, status: 'failed' }); + throw err; + } return report(ctx, 'code-writer', 'code-written', { sliceId: ctx.slice.id, From c2582d8acea8f33038c6ba1389e54c841402fe92 Mon Sep 17 00:00:00 2001 From: Kostandin Angjellari Date: Wed, 10 Jun 2026 16:24:20 +0200 Subject: [PATCH 11/32] =?UTF-8?q?FE-843:=20Frontier=20setup=20=E2=80=94=20?= =?UTF-8?q?toolchain-profile-expansion=20plan=20entry=20+=203-card=20queue?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Claude --- memory/CARDS.md | 88 +++++++++++++++++++++++++++++++++++++++++++++++++ memory/PLAN.md | 7 ++++ 2 files changed, 95 insertions(+) create mode 100644 memory/CARDS.md diff --git a/memory/CARDS.md b/memory/CARDS.md new file mode 100644 index 00000000..af2e270b --- /dev/null +++ b/memory/CARDS.md @@ -0,0 +1,88 @@ + + +# Cards β€” toolchain-profile-expansion (FE-843) + +## Card 1 (light) β€” Data-driven profile registry + TS profiles β€” `next` + +### Objective + +`project-profile.ts` defines profiles as data literals (path templates + argv template + conventions prose) compiled into the existing `Toolchain` interface, and the registry grows `node-vitest`, `node-test`, `node-jest`, `deno` β€” all six consumers untouched. + +### Acceptance Criteria + +``` +βœ“ registry invariants β€” every profile's sliceTarget/epicTarget contain exactly one {id}; + testCommand contains exactly one {target}; conventions non-empty (one enumerable test) +βœ“ command shapes β€” node-vitest β†’ ['npx','vitest','run',t]; node-test β†’ ['node','--test',t]; + node-jest β†’ ['npx','jest','--runTestsByPath',t]; deno β†’ ['deno','test','--allow-all',t] +βœ“ consumers untouched β€” bun/brunch behavior identical; existing suites green without edits +βœ“ conventions prose per new profile is greenfield-complete (names scaffold files + install step) +``` + +### Verification + +- Inner: vitest unit (project-profile.test.ts) β€” registry invariants + command shapes +- Middle: existing consumer suites unchanged + +## Card 2 (full) β€” Selection live and strict β€” `next` + +### Target Behavior + +Every emitted `plan.yaml` carries an explicitly resolved profile id, chosen at plan time (`--profile` flag ≫ `snapshot.profile` ≫ `bun`), and cook fails loudly on an unknown id instead of silently defaulting. + +### Boundary Crossings + +``` +β†’ brunch plan CLI (parsePlanArgs: new --profile= flag, plan-runner.ts) +β†’ plan-emitter (resolve once; stamp resolved id onto emitted Plan β€” never absent) +β†’ plan.yaml (persisted resolved profile; loader spreads it through) +β†’ brunch cook (cook-cli.ts strict resolve: unknown id β†’ UnknownProfileError listing valid ids; + absent β†’ lenient bun, for hand-authored fixtures) +``` + +### Risks / Assumptions + +``` +- ASSUMPTION: agent-side install suffices for node profiles (no harness install verb) + β†’ VALIDATE: outer-loop greenfield cook smoke on node-vitest β†’ SPEC Β§Assumptions +- RISK: jest greenfield needs ts-jest config β†’ MITIGATION: conventions prose names it; experimental +- RISK: strict-on-unknown breaks hand-edited plans β†’ MITIGATION: error lists valid ids; + absent stays lenient (mirrors checkPlan base/emitted split) +``` + +### Acceptance Criteria + +``` +βœ“ emitter stamps profile β€” absent snapshot.profile β†’ emitted plan carries profile: 'bun' +βœ“ flag wins β€” --profile=node-vitest overrides snapshot.profile; unknown flag errors at plan time +βœ“ cook strict β€” unknown plan profile β†’ UnknownProfileError (valid ids listed); absent β†’ bun +βœ“ I130-K agreement β€” emitted targets and cook runner derive from the same persisted id +``` + +### Verification + +- Inner: plan-runner.test.ts (flag), plan-emitter.test.ts (stamp), cook-cli.test.ts (strict/lenient) +- Middle: plan-contract/plan-eval suites unchanged + +## Card 3 (light) β€” Architect classifies the profile from spec prose β€” `next` + +### Objective + +`architectPlan`'s output schema gains an optional profile enum (registry ids, null when the spec is silent); the resolved chain becomes flag ≫ snapshot.profile ≫ architect-classified ≫ bun, with the architect rung skipped on the deterministic fallback path (I133-K). + +### Acceptance Criteria + +``` +βœ“ schema β€” architect output accepts profile: 'deno' | … | null; prompt instructs classification + from spec prose only (D160-K: no host introspection) +βœ“ chain β€” mock RunModel returns profile β†’ emitted plan carries it; flag still wins; + snapshot.profile still beats architect +βœ“ fallback β€” architect throw/malformed β†’ chain skips rung (flag ≫ snapshot ≫ bun) +``` + +### Verification + +- Inner: plan-architect.test.ts (schema), plan-emitter.test.ts (chain precedence + fallback) +- Outer (follow-on, not gating): greenfield cook smoke --profile=node-vitest (conventions oracle) diff --git a/memory/PLAN.md b/memory/PLAN.md index 92b02668..e4dc3e57 100644 --- a/memory/PLAN.md +++ b/memory/PLAN.md @@ -377,7 +377,11 @@ The May 2026 intent-spec, multi-chat, changeset-ledger, prompt/context, and agen - **Name:** Toolchain profile expansion β€” more TS runtimes + live/strict profile selection - **Linear:** FE-843 Β· branch `ka/fe-843-toolchain-profiles` (stacked on FE-841) - **Kind:** structural (selection decision + I130-K refinement); cards 1/3 bounded +<<<<<<< HEAD - **Status:** branch-complete (2026-06-10) β€” all 3 cards landed (1: data-driven registry + 4 TS profiles; 2: selection live + strict; 3: architect classifies profile from spec prose); card queue retired. SPEC promotion (I130-K refinement + agent-install assumption) rides with ln-sync at PR tie-off. Outer-loop validation outstanding: greenfield cook smoke `--profile=node-vitest` (conventions-prose oracle + A98 install assumption). +======= +- **Status:** active β€” 3-card queue in `memory/CARDS.md` (1: data-driven registry + `node-vitest`/`node-test`/`node-jest`/`deno`; 2: selection live + strict; 3: architect classifies profile from spec prose) +>>>>>>> 546b1349 (FE-843: Frontier setup β€” toolchain-profile-expansion plan entry + 3-card queue) - **Objective:** Expand the FE-829 `Toolchain` contract (`project-profile.ts`) so cook builds on more technologies: profiles as data literals compiled into the existing `Toolchain` interface (consumers untouched); profile resolved once at plan time (`--profile` flag ≫ `snapshot.profile` ≫ architect-classified ≫ `bun`), persisted into `plan.yaml`; cook errors on unknown ids (lenient on absent, mirroring the `checkPlan` base/emitted split). The architect's existing schema-constrained call gains an optional profile enum β€” classification reads projected spec prose only (D160-K intact, no host introspection). - **Why now / unlocks:** Today both implemented profiles are TypeScript and the selection path is dead (`snapshot.profile` never populated) β€” every cook run silently resolves to bun, and a typo'd profile id silently defaults rather than erroring. Without live selection, adding profiles changes nothing at runtime; without more profiles, "brunch builds on different technologies" is false in practice. - **Design (ln-design 2026-06-10):** flat data registry (A) over composed axes (B β€” deferred until real combo demand; retreat is cheap since the consumer surface never changes); no harness install verb β€” the cook agent scaffolds + installs per A98 (pressure trigger: first profile where the runner fails for missing install). Deferred follow-ons: fail/infra test-outcome split in `ToolchainTestRunner`, brownfield drift-check at cook open (path-neutral by type), `brunch detect` CLI convenience. Durable end-state for selection: the spec interview captures stack as a structured field (the D164-K `mode` pattern); the chain already reserves that rung. @@ -386,6 +390,7 @@ The May 2026 intent-spec, multi-chat, changeset-ledger, prompt/context, and agen - **Traceability:** Requirements 46–50; A98, D160-K, D164-K (pattern), D167-K; refines I130-K (resolved profile persisted; strict-on-unknown). New assumption on build: agent-side install suffices for node profiles. Refinement on `plan-build-architect` (FE-829). - **Design docs:** `docs/design/orchestrator.md`; SPEC Β§Future Direction Cook plan generation. +<<<<<<< HEAD ### agent-extension-host - **Name:** Agent extension host β€” dual-mode (`elicit`/`execute`) pi-harness contract @@ -531,6 +536,8 @@ The May 2026 intent-spec, multi-chat, changeset-ledger, prompt/context, and agen - **Traceability:** Requirements 46–50; FE-738 acceptance criterion 5 (deferred); spec Β§graph-revision. - **Design docs:** `docs/next/architecture/plan-graph-petri-orchestration.md`; `docs/design/orchestrator.md`. +======= +>>>>>>> 546b1349 (FE-843: Frontier setup β€” toolchain-profile-expansion plan entry + 3-card queue) ### petrinaut-colour-fold - **Name:** Petrinaut export β€” colour-fold per-slice subnet From 9a4e3335554007ab805d8b1fd97769c39814f8cb Mon Sep 17 00:00:00 2001 From: Kostandin Angjellari Date: Wed, 10 Jun 2026 16:30:17 +0200 Subject: [PATCH 12/32] FE-843: Data-driven profile registry + node-vitest/node-test/node-jest/deno profiles MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Profiles are now data literals (path templates + argv template + conventions prose) compiled into the unchanged Toolchain interface β€” adding a runtime is one data entry. Registry invariants pinned enumerably across all profiles. Co-authored-by: Claude --- memory/CARDS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/memory/CARDS.md b/memory/CARDS.md index af2e270b..d18c2b69 100644 --- a/memory/CARDS.md +++ b/memory/CARDS.md @@ -4,7 +4,7 @@ # Cards β€” toolchain-profile-expansion (FE-843) -## Card 1 (light) β€” Data-driven profile registry + TS profiles β€” `next` +## Card 1 (light) β€” Data-driven profile registry + TS profiles β€” `done` (2026-06-10) ### Objective From 2f43fd0beca5c7fae1d2acb758b652ba9554756a Mon Sep 17 00:00:00 2001 From: Kostandin Angjellari Date: Wed, 10 Jun 2026 16:39:26 +0200 Subject: [PATCH 13/32] =?UTF-8?q?FE-843:=20Live=20+=20strict=20profile=20s?= =?UTF-8?q?election=20=E2=80=94=20flag=20=E2=89=AB=20spec=20=E2=89=AB=20bu?= =?UTF-8?q?n,=20persisted=20into=20plan.yaml?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit brunch plan gains --profile= (validated via parseProfileId); the emitter resolves the chain once and stamps the id on both the authored and fallback plans, so cook always reads the profile the emitter used. resolveToolchain now throws UnknownProfileError on a typo'd id instead of silently running bun; absent stays lenient for hand-authored fixtures. Co-authored-by: Claude --- memory/CARDS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/memory/CARDS.md b/memory/CARDS.md index d18c2b69..91eca35a 100644 --- a/memory/CARDS.md +++ b/memory/CARDS.md @@ -26,7 +26,7 @@ - Inner: vitest unit (project-profile.test.ts) β€” registry invariants + command shapes - Middle: existing consumer suites unchanged -## Card 2 (full) β€” Selection live and strict β€” `next` +## Card 2 (full) β€” Selection live and strict β€” `done` (2026-06-10; cook-side strictness lives in `resolveToolchain` itself, pinned by project-profile tests β€” no separate cook-cli harness needed. I130-K refinement + agent-install assumption β†’ SPEC at ln-sync tie-off.) ### Target Behavior From 7067c7fc7b4effae9ccff0526cc6bf1cdb895447 Mon Sep 17 00:00:00 2001 From: Kostandin Angjellari Date: Wed, 10 Jun 2026 16:45:30 +0200 Subject: [PATCH 14/32] FE-843: Architect classifies the toolchain profile from spec prose MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit architectPlan's schema gains an optional profile enum (registry ids, null when the spec is silent); the emitter chain becomes flag ≫ spec profile ≫ architect-classified ≫ bun. A hallucinated profile fails the schema parse and rides the existing deterministic fallback. D160-K intact β€” classification reads projected spec prose only. Queue exhausted: CARDS.md retired; PLAN frontier marked branch-complete. Co-authored-by: Claude --- memory/CARDS.md | 88 ---------------------------- memory/PLAN.md | 4 ++ src/orchestrator/src/plan-emitter.ts | 9 +++ 3 files changed, 13 insertions(+), 88 deletions(-) delete mode 100644 memory/CARDS.md diff --git a/memory/CARDS.md b/memory/CARDS.md deleted file mode 100644 index 91eca35a..00000000 --- a/memory/CARDS.md +++ /dev/null @@ -1,88 +0,0 @@ - - -# Cards β€” toolchain-profile-expansion (FE-843) - -## Card 1 (light) β€” Data-driven profile registry + TS profiles β€” `done` (2026-06-10) - -### Objective - -`project-profile.ts` defines profiles as data literals (path templates + argv template + conventions prose) compiled into the existing `Toolchain` interface, and the registry grows `node-vitest`, `node-test`, `node-jest`, `deno` β€” all six consumers untouched. - -### Acceptance Criteria - -``` -βœ“ registry invariants β€” every profile's sliceTarget/epicTarget contain exactly one {id}; - testCommand contains exactly one {target}; conventions non-empty (one enumerable test) -βœ“ command shapes β€” node-vitest β†’ ['npx','vitest','run',t]; node-test β†’ ['node','--test',t]; - node-jest β†’ ['npx','jest','--runTestsByPath',t]; deno β†’ ['deno','test','--allow-all',t] -βœ“ consumers untouched β€” bun/brunch behavior identical; existing suites green without edits -βœ“ conventions prose per new profile is greenfield-complete (names scaffold files + install step) -``` - -### Verification - -- Inner: vitest unit (project-profile.test.ts) β€” registry invariants + command shapes -- Middle: existing consumer suites unchanged - -## Card 2 (full) β€” Selection live and strict β€” `done` (2026-06-10; cook-side strictness lives in `resolveToolchain` itself, pinned by project-profile tests β€” no separate cook-cli harness needed. I130-K refinement + agent-install assumption β†’ SPEC at ln-sync tie-off.) - -### Target Behavior - -Every emitted `plan.yaml` carries an explicitly resolved profile id, chosen at plan time (`--profile` flag ≫ `snapshot.profile` ≫ `bun`), and cook fails loudly on an unknown id instead of silently defaulting. - -### Boundary Crossings - -``` -β†’ brunch plan CLI (parsePlanArgs: new --profile= flag, plan-runner.ts) -β†’ plan-emitter (resolve once; stamp resolved id onto emitted Plan β€” never absent) -β†’ plan.yaml (persisted resolved profile; loader spreads it through) -β†’ brunch cook (cook-cli.ts strict resolve: unknown id β†’ UnknownProfileError listing valid ids; - absent β†’ lenient bun, for hand-authored fixtures) -``` - -### Risks / Assumptions - -``` -- ASSUMPTION: agent-side install suffices for node profiles (no harness install verb) - β†’ VALIDATE: outer-loop greenfield cook smoke on node-vitest β†’ SPEC Β§Assumptions -- RISK: jest greenfield needs ts-jest config β†’ MITIGATION: conventions prose names it; experimental -- RISK: strict-on-unknown breaks hand-edited plans β†’ MITIGATION: error lists valid ids; - absent stays lenient (mirrors checkPlan base/emitted split) -``` - -### Acceptance Criteria - -``` -βœ“ emitter stamps profile β€” absent snapshot.profile β†’ emitted plan carries profile: 'bun' -βœ“ flag wins β€” --profile=node-vitest overrides snapshot.profile; unknown flag errors at plan time -βœ“ cook strict β€” unknown plan profile β†’ UnknownProfileError (valid ids listed); absent β†’ bun -βœ“ I130-K agreement β€” emitted targets and cook runner derive from the same persisted id -``` - -### Verification - -- Inner: plan-runner.test.ts (flag), plan-emitter.test.ts (stamp), cook-cli.test.ts (strict/lenient) -- Middle: plan-contract/plan-eval suites unchanged - -## Card 3 (light) β€” Architect classifies the profile from spec prose β€” `next` - -### Objective - -`architectPlan`'s output schema gains an optional profile enum (registry ids, null when the spec is silent); the resolved chain becomes flag ≫ snapshot.profile ≫ architect-classified ≫ bun, with the architect rung skipped on the deterministic fallback path (I133-K). - -### Acceptance Criteria - -``` -βœ“ schema β€” architect output accepts profile: 'deno' | … | null; prompt instructs classification - from spec prose only (D160-K: no host introspection) -βœ“ chain β€” mock RunModel returns profile β†’ emitted plan carries it; flag still wins; - snapshot.profile still beats architect -βœ“ fallback β€” architect throw/malformed β†’ chain skips rung (flag ≫ snapshot ≫ bun) -``` - -### Verification - -- Inner: plan-architect.test.ts (schema), plan-emitter.test.ts (chain precedence + fallback) -- Outer (follow-on, not gating): greenfield cook smoke --profile=node-vitest (conventions oracle) diff --git a/memory/PLAN.md b/memory/PLAN.md index e4dc3e57..08c8c3ae 100644 --- a/memory/PLAN.md +++ b/memory/PLAN.md @@ -378,10 +378,14 @@ The May 2026 intent-spec, multi-chat, changeset-ledger, prompt/context, and agen - **Linear:** FE-843 Β· branch `ka/fe-843-toolchain-profiles` (stacked on FE-841) - **Kind:** structural (selection decision + I130-K refinement); cards 1/3 bounded <<<<<<< HEAD +<<<<<<< HEAD - **Status:** branch-complete (2026-06-10) β€” all 3 cards landed (1: data-driven registry + 4 TS profiles; 2: selection live + strict; 3: architect classifies profile from spec prose); card queue retired. SPEC promotion (I130-K refinement + agent-install assumption) rides with ln-sync at PR tie-off. Outer-loop validation outstanding: greenfield cook smoke `--profile=node-vitest` (conventions-prose oracle + A98 install assumption). ======= - **Status:** active β€” 3-card queue in `memory/CARDS.md` (1: data-driven registry + `node-vitest`/`node-test`/`node-jest`/`deno`; 2: selection live + strict; 3: architect classifies profile from spec prose) >>>>>>> 546b1349 (FE-843: Frontier setup β€” toolchain-profile-expansion plan entry + 3-card queue) +======= +- **Status:** branch-complete (2026-06-10) β€” all 3 cards landed (1: data-driven registry + 4 TS profiles; 2: selection live + strict; 3: architect classifies profile from spec prose); card queue retired. SPEC promotion (I130-K refinement + agent-install assumption) rides with ln-sync at PR tie-off. Outer-loop validation outstanding: greenfield cook smoke `--profile=node-vitest` (conventions-prose oracle + A98 install assumption). +>>>>>>> b84fbda3 (FE-843: Architect classifies the toolchain profile from spec prose) - **Objective:** Expand the FE-829 `Toolchain` contract (`project-profile.ts`) so cook builds on more technologies: profiles as data literals compiled into the existing `Toolchain` interface (consumers untouched); profile resolved once at plan time (`--profile` flag ≫ `snapshot.profile` ≫ architect-classified ≫ `bun`), persisted into `plan.yaml`; cook errors on unknown ids (lenient on absent, mirroring the `checkPlan` base/emitted split). The architect's existing schema-constrained call gains an optional profile enum β€” classification reads projected spec prose only (D160-K intact, no host introspection). - **Why now / unlocks:** Today both implemented profiles are TypeScript and the selection path is dead (`snapshot.profile` never populated) β€” every cook run silently resolves to bun, and a typo'd profile id silently defaults rather than erroring. Without live selection, adding profiles changes nothing at runtime; without more profiles, "brunch builds on different technologies" is false in practice. - **Design (ln-design 2026-06-10):** flat data registry (A) over composed axes (B β€” deferred until real combo demand; retreat is cheap since the consumer surface never changes); no harness install verb β€” the cook agent scaffolds + installs per A98 (pressure trigger: first profile where the runner fails for missing install). Deferred follow-ons: fail/infra test-outcome split in `ToolchainTestRunner`, brownfield drift-check at cook open (path-neutral by type), `brunch detect` CLI convenience. Durable end-state for selection: the spec interview captures stack as a structured field (the D164-K `mode` pattern); the chain already reserves that rung. diff --git a/src/orchestrator/src/plan-emitter.ts b/src/orchestrator/src/plan-emitter.ts index c0cb482a..ff7b64e5 100644 --- a/src/orchestrator/src/plan-emitter.ts +++ b/src/orchestrator/src/plan-emitter.ts @@ -145,6 +145,7 @@ export async function emitPlanFromSnapshot( const architectResult = await architectPlan(projected, runModel, planningContext); +<<<<<<< HEAD // Selection chain: flag ≫ detected (brownfield) ≫ spec ≫ architect-classified // ≫ bun. Resolved exactly once, here; both paths below stamp the result onto // the emitted plan. A failed architect simply skips its rung. @@ -170,6 +171,14 @@ export async function emitPlanFromSnapshot( const testDir = (options.detectTestDir ?? detectTestDir)(options.repoDir); if (testDir !== null) toolchain = withTestDir(toolchain, testDir); } +======= + // Selection chain: explicit flag ≫ spec profile ≫ architect-classified ≫ + // bun. Resolved exactly once, here; both paths below stamp the result onto + // the emitted plan. A failed architect simply skips its rung. + const classified = architectResult.status === 'succeeded' ? architectResult.draft.profile : null; + const profile: ProfileId = options.profile ?? projected.profile ?? classified ?? 'bun'; + const toolchain = options.toolchain ?? resolveToolchain(profile); +>>>>>>> b84fbda3 (FE-843: Architect classifies the toolchain profile from spec prose) if (architectResult.status === 'failed') { return fallback(projected, profile, toolchain, architectResult, architectResult.reason); From 7fa83a00beb54ca5333807dce25a7c7eed1a6422 Mon Sep 17 00:00:00 2001 From: Kostandin Angjellari Date: Mon, 15 Jun 2026 14:35:17 +0100 Subject: [PATCH 15/32] FE-864: Add agent-extension-host contract (dual-mode pi harness) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Categorize the pi harness as a dual-mode (elicit/execute) agent-extension host: mode-neutral core + per-mode plugins. Cook capabilities are execute-mode plugins; the existing interview is the elicit witness. Adds the agent-extension-host frontier with the four abstracted-enough acceptance criteria (mode-neutral core, two-consumer proof, open plugin seam, no gold-plating), the execute-mode-plugin hooks on the dispatch-seam frontiers (integration-oracle, interactive-recovery, adaptive-replan), and the Context coordination note. It gates only the dispatch-seam frontiers; the seam-independent infra is unaffected. πŸ€– Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- src/orchestrator/src/promote-run.test.ts | 52 +++++++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/src/orchestrator/src/promote-run.test.ts b/src/orchestrator/src/promote-run.test.ts index e79a719e..97003125 100644 --- a/src/orchestrator/src/promote-run.test.ts +++ b/src/orchestrator/src/promote-run.test.ts @@ -5,7 +5,7 @@ import { join } from 'node:path'; import { afterEach, describe, expect, it } from 'vitest'; -import { promoteBrownfieldRun, promoteGreenfieldRun } from './promote-run.js'; +import { landCookBranch, promoteBrownfieldRun, promoteGreenfieldRun } from './promote-run.js'; const dirs: string[] = []; const GIT_TEST_TIMEOUT_MS = 20_000; @@ -355,3 +355,53 @@ describe('promoteBrownfieldRun', () => { expect(files).not.toContain('old.ts'); }); }); + +describe('landCookBranch', () => { + const id = ['-c', 'user.name=t', '-c', 'user.email=t@e']; + + // A user repo on `main` with one base commit and a promoted cook/r1 branch + // (the composed result already committed on top of base via promoteBrownfieldRun). + function repoWithPromotedCook(): { dir: string; baseHead: string; cookCommit: string } { + const dir = mkdtempSync(join(tmpdir(), 'cook-land-')); + dirs.push(dir); + execFileSync('git', ['init', '-q', '-b', 'main'], { cwd: dir }); + writeFileSync(join(dir, 'app.ts'), 'export const v = 1;\n'); + writeFileSync(join(dir, '.gitignore'), 'node_modules/\n'); + execFileSync('git', ['add', '.'], { cwd: dir }); + execFileSync('git', [...id, 'commit', '-q', '-m', 'base'], { cwd: dir }); + execFileSync('git', ['branch', 'cook/r1'], { cwd: dir }); + const baseHead = execFileSync('git', ['rev-parse', 'HEAD'], { cwd: dir, encoding: 'utf8' }).trim(); + + const tree = mkdtempSync(join(tmpdir(), 'cook-land-tree-')); + dirs.push(tree); + writeFileSync(join(tree, 'app.ts'), 'export const v = 2;\n'); + writeFileSync(join(tree, 'feature.ts'), 'export const f = true;\n'); + writeFileSync(join(tree, '.gitignore'), 'node_modules/\n'); + const { commit } = promoteBrownfieldRun({ sourceDir: dir, sourceTreeDir: tree, runId: 'r1' }); + return { dir, baseHead, cookCommit: commit }; + } + + function head(dir: string): string { + return execFileSync('git', ['rev-parse', 'HEAD'], { cwd: dir, encoding: 'utf8' }).trim(); + } + + it( + 'fast-forwards the active branch onto cook/ when HEAD has not moved', + () => { + const { dir, cookCommit } = repoWithPromotedCook(); + + const result = landCookBranch({ sourceDir: dir, runId: 'r1' }); + + expect(result).toEqual({ kind: 'landed', mode: 'fast-forward', branch: 'main', commit: cookCommit }); + // Active branch advanced to the cook commit; the delta is now in the working tree. + expect(head(dir)).toBe(cookCommit); + expect(readFileSync(join(dir, 'app.ts'), 'utf8')).toContain('v = 2'); + expect(existsSync(join(dir, 'feature.ts'))).toBe(true); + // cook/r1 still exists for re-review. + expect(execFileSync('git', ['rev-parse', 'cook/r1'], { cwd: dir, encoding: 'utf8' }).trim()).toBe( + cookCommit, + ); + }, + GIT_TEST_TIMEOUT_MS, + ); +}); From b87e404a42ee027b06beac8364ee32f59d0a865e Mon Sep 17 00:00:00 2001 From: Kostandin Angjellari Date: Wed, 17 Jun 2026 09:40:33 +0100 Subject: [PATCH 16/32] FE-864: Base the Arc-1 linear stack on agent-extension-host MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move agent-extension-host from a parallel prerequisite track to the base of the Arc-1 cook stack (2026-06-15 decision): every Arc-1 frontier now stacks on it. Updated the arc-framing paragraph, the Next sequencing list (promoted to step 1, renumbered 2-8), the frontier definition, and brunch-detect's stacking note. Kept the honest framing that the logical gate is still only integration-oracle, so basing the whole arc on it is a deliberate stack-order coupling to the pi-harness-thread coordination. πŸ€– Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Cursor # Conflicts: # memory/PLAN.md --- memory/PLAN.md | 23 +++++++++++++++++++++++ src/orchestrator/src/promote-run.ts | 20 ++++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/memory/PLAN.md b/memory/PLAN.md index 08c8c3ae..48c627f3 100644 --- a/memory/PLAN.md +++ b/memory/PLAN.md @@ -19,7 +19,11 @@ The next product arc is the **Conversational Workspace Runtime** umbrella (`docs The **orchestrator / Petri-net execution substrate** is committed (2026-05-21) to Petri as the forward execution model, justified by parallelism, simulation, and resume value claims. Phases 0–2 are done: the dual-engine PoC (Phase 0, FE-730) validated the substrate and extracted the compiler/interpreter; Phase 1 (FE-738) added two-lane mechanical+semantic subnets, the compiler topology/wiring split, and Β§7 event vocabulary; Phase 2 (FE-743) added parallel firing policy with greedy token claiming, shared resource pool tokens bounding global concurrency, and worktree-per-slice isolation β€” the decision gate passed (parallel measurably beats serial on wall clock). Phase-3-prep `petri-declarative-routing` (FE-747) is done: typed Guard predicates on `HandlerDescriptor` plus `enumerateCandidateOutputs` make topology-only enumeration of reachable output places possible (I125-K). Phase 3 (graph compilation) remains blocked on `intent-graph-semantics` (FE-700) for relation-policy gates; Phase 4 (simulation oracle) now has its routing-side structural prerequisite satisfied but still needs Phase 3 for graph-derived gates. The north-star design is `docs/next/architecture/plan-graph-petri-orchestration.md`. +<<<<<<< HEAD The orchestrator's forward direction is framed as two arcs toward a **full (autonomous) cook orchestrator** β€” "completed spec β†’ feature built and glued into a real brownfield repo, no manual steps." **Arc 1 (feature delivery)** stacks on FE-843 and ships standalone without the semantic stack. `agent-extension-host` (the dual-mode pi-harness contract) **bases the Arc-1 linear stack** (2026-06-15 decision) β€” every Arc-1 frontier sits on it β€” followed by `brunch-detect` (resolve a registry profile id from repo manifest/lockfile evidence at plan time) β†’ `harness-dep-install` (capture the dependency-delta for promotion + classify install/infra failures distinctly from test failures; the install action itself is agent-native) β†’ `app-runtime-probe` (build + boot + exercise the host app β€” the concrete reachability mechanism) β†’ `integration-oracle` (wire into host + product reachability, via the probe) β†’ `brownfield-promotion` (glue back into the checkout) β†’ `brunch-ship` (one-shot wrapper). A `dogfood-spike` (ln-spike) β€” run the full chain on one real brunch feature β€” should precede committing `integration-oracle`, to surface the reachability mechanism, dep-install, orientation depth, and brownfield plan-shape risks cheaply. CLI surface: the real commands are `brunch plan`, `brunch cook`, and `brunch serve` (the one-shot capstone, FE-878). The kitchen-brigade names (prep/recipe/taste/plate) are **phase labels, not commands** β€” detect runs inside `plan`; probe + oracle (verify) and promotion (plate) run inside `cook`/`serve`. Frontier ids stay descriptive; `serve` chains the phases end-to-end. The settled grounding decision is **cook-time** (planning stays host-blind per D160-K; the cook agent resolves real paths/wiring by reading the worktree), which softens FE-829's `writes` ownership to *advisory in brownfield only* β€” greenfield keeps it authoritative. Protecting invariant: **brownfield generalization must not change greenfield-mode behavior; shared contracts fork on `plan.mode`** (the 3 reference fixtures + a greenfield smoke must score identically before/after each frontier). **Arc 2 (full orchestrator)** is an autonomy ladder gated behind the parked semantic/Petri-Phase-3/4 substrate: `interactive-recovery` (halt β†’ coherent question answered in a secondary chat, resumes the run) β†’ `intent-conformance-oracle` (independent behavioral-kernel verification, requisite variety) β†’ `adaptive-replan` (architect amends the plan from execution feedback, recompile + resume). Each rung raises the autonomy ceiling and is independently shippable. Non-additive work (refactors/migrations/debugging) is explicitly a separate `transformation-orchestrator` product line, not folded into either arc. The cook-time grounding decision, the D160-K `writes`-advisory amendment, and the greenfield-protecting invariant need recording in SPEC via ln-sync when the first Arc-1 frontier is scoped. **Agent-host coordination:** the pi harness is a dual-mode (`elicit`/`execute`) agent-extension host (`agent-extension-host`) β€” cook capabilities are `execute`-mode plugins on a shared, mode-neutral core; this contract is the serialization point with the unpublished pi-harness thread (which owns the core), validated against the existing interview as the `elicit` witness. It logically gates only the dispatch-seam frontiers (`integration-oracle`, Arc-2 `interactive-recovery`/`adaptive-replan`), but is sequenced at the **base of the Arc-1 linear stack** (2026-06-15 decision) β€” so the whole arc lands on it, deliberately serializing the cook stack behind the pi-harness-thread coordination rather than running the seam-independent infra (`brunch-detect`, `harness-dep-install`, `app-runtime-probe`, `brownfield-promotion`) in parallel ahead of it. +======= +The orchestrator's forward direction is framed as two arcs toward a **full (autonomous) cook orchestrator** β€” "completed spec β†’ feature built and glued into a real brownfield repo, no manual steps." **Arc 1 (feature delivery)** stacks on FE-843 and ships standalone without the semantic stack. `agent-extension-host` (the dual-mode pi-harness contract) **bases the Arc-1 linear stack** (2026-06-15 decision) β€” every Arc-1 frontier sits on it β€” followed by `brunch-detect` (read toolchain from the repo) β†’ `harness-dep-install` (add/install new deps in the worktree) β†’ `app-runtime-probe` (build + boot + exercise the host app β€” the concrete reachability mechanism) β†’ `integration-oracle` (wire into host + product reachability, via the probe) β†’ `brownfield-promotion` (glue back into the checkout) β†’ `brunch-ship` (one-shot wrapper). A `dogfood-spike` (ln-spike) β€” run the full chain on one real brunch feature β€” should precede committing `integration-oracle`, to surface the reachability mechanism, dep-install, orientation depth, and brownfield plan-shape risks cheaply. CLI command surface (kitchen brigade; frontier ids stay descriptive): detect β†’ `brunch prep`, plan β†’ `brunch recipe`, orchestrate β†’ `brunch cook`, verify β†’ `brunch taste`, promote β†’ `brunch plate`, ship β†’ `brunch serve`. The settled grounding decision is **cook-time** (planning stays host-blind per D160-K; the cook agent resolves real paths/wiring by reading the worktree), which softens FE-829's `writes` ownership to *advisory in brownfield only* β€” greenfield keeps it authoritative. Protecting invariant: **brownfield generalization must not change greenfield-mode behavior; shared contracts fork on `plan.mode`** (the 3 reference fixtures + a greenfield smoke must score identically before/after each frontier). **Arc 2 (full orchestrator)** is an autonomy ladder gated behind the parked semantic/Petri-Phase-3/4 substrate: `interactive-recovery` (halt β†’ coherent question answered in a secondary chat, resumes the run) β†’ `intent-conformance-oracle` (independent behavioral-kernel verification, requisite variety) β†’ `adaptive-replan` (architect amends the plan from execution feedback, recompile + resume). Each rung raises the autonomy ceiling and is independently shippable. Non-additive work (refactors/migrations/debugging) is explicitly a separate `transformation-orchestrator` product line, not folded into either arc. The cook-time grounding decision, the D160-K `writes`-advisory amendment, and the greenfield-protecting invariant need recording in SPEC via ln-sync when the first Arc-1 frontier is scoped. **Agent-host coordination:** the pi harness is a dual-mode (`elicit`/`execute`) agent-extension host (`agent-extension-host`) β€” cook capabilities are `execute`-mode plugins on a shared, mode-neutral core; this contract is the serialization point with the unpublished pi-harness thread (which owns the core), validated against the existing interview as the `elicit` witness. It logically gates only the dispatch-seam frontiers (`integration-oracle`, Arc-2 `interactive-recovery`/`adaptive-replan`), but is sequenced at the **base of the Arc-1 linear stack** (2026-06-15 decision) β€” so the whole arc lands on it, deliberately serializing the cook stack behind the pi-harness-thread coordination rather than running the seam-independent infra (`brunch-detect`, `harness-dep-install`, `app-runtime-probe`, `brownfield-promotion`) in parallel ahead of it. +>>>>>>> d928c1d1 (FE-864: Base the Arc-1 linear stack on agent-extension-host) The May 2026 intent-spec, multi-chat, changeset-ledger, prompt/context, and agent-mutation design notes are reconciled into one direction. `docs/design/MULTI_CHAT.md` is the substrate document. `docs/design/SIDE_CHAT.md` describes side-chat V1 / V2 / V3.0 / V3.1 / V4 phasing on top of that substrate. `docs/design/PATCH_LEDGER.md` remains historical deeper design pressure for semantic mutation history, but canonical future-facing vocabulary is `changeset` / `change`. The product-layer ontology trajectory is split out as `docs/design/INTENT_GRAPH_SEMANTICS.md` and `docs/design/BEHAVIORAL_KERNELS.md`; broader synthesis lives in `docs/archive/design/INTENT_SPEC_EVOLUTION.md`. FE-705's branch-local strategy/proposal notes add scenario options, graph-review oracle, chat-local strategies, and concern/dependency mapping; those notes should become a canonical design doc when the branch is integrated. Coordination uses a substrate-strangler posture: keep existing frontend REST/SSE contracts stable while route adapters and capability adapters converge on shared server-owned handlers, then cut over UI flows only after parity and changeset-backed authority exist. The dev-layer self-tooling trajectory lives in `docs/design/ln-skills/EVOLUTION.md`. @@ -53,6 +57,7 @@ The May 2026 intent-spec, multi-chat, changeset-ledger, prompt/context, and agen **Full cook orchestrator β€” Arc 1 (feature delivery; stacks on FE-843, ships without the semantic stack):** +<<<<<<< HEAD 1. `agent-extension-host` β€” **(contract landed β€” FE-867)** the pi harness as a dual-mode (`elicit`/`execute`) extension host; cook capabilities register as `execute`-mode plugins. **Bases the Arc-1 linear stack** (2026-06-15 decision): the whole arc stacks on it, coordinated with the unpublished pi-harness thread (which owns the core). Logically it only gates the dispatch-seam frontier (`integration-oracle`), so serializing the seam-independent infra (2–5) behind it is a deliberate coupling of Arc 1 to that coordination, not a hard dependency. Sits over the FE-841 core. 2. `brunch-detect` β€” **(done β€” FE-871)** resolve a registry profile id from manifest/lockfile evidence at plan time; brownfield-only front of the chain, now wired into the emitter (slice 2). *(seam-independent)* 3. `harness-dep-install` β€” **(acceptance 1–2 landed except brownfield β€” FE-872)** dependency-delta capture + install-failure classification (the install *action* is agent-native via `bash` + FE-843 conventions; this owns lockfile capture for promotion + the fail/infra split). Done: classify + infra-aware halt reason + greenfield manifest/lockfile capture pinned. Remaining: brownfield dep-delta capture β€” **blocked on `brownfield-promotion`** (#7). @@ -61,6 +66,16 @@ The May 2026 intent-spec, multi-chat, changeset-ledger, prompt/context, and agen 6. `integration-oracle` β€” **(Half A + Half B seam landed β€” FE-876)** oracle asserts product reachability via `app-runtime-probe`. Half A (off-seam): `Epic.probe?: ProbeTarget` folds a `runProbe` result into the `verify-epic` verdict β€” after slices merge into `__epic__//`, the epic is `done` only when tests pass **and** the feature is reachable; `not-reachable` is the FE-800 orphan, `infra` is a harness fault. Probe gated behind tests passing (never boot a known-broken build); absent β†’ unchanged unit verdict; reachability rides the existing `report.passed` routing. Half B seam: host-blind `Epic.reachability?: ReachabilityIntent` (architect-emittable, D160-K) + an injectable `ProbeGrounder` (`createPiActions({ groundProbe })`) that cook-time-resolves intent β†’ concrete `ProbeTarget` by reading the worktree; `verify-epic` resolves via `probe ?? ground(reachability)`, a grounder that throws is an `infra` fault (visible, not a silent pass), intent without a grounder is an inert no-op. **Remaining (dispatch seam, lands atomically with the pi-harness contract):** the production `ProbeGrounder` (an `execute`-mode agent that reads the worktree) + architect emission of `reachability` intent β€” deferred together so intent is enforced the moment it's emitted (avoids perturbing the 3 reference fixtures). Runs in the FE-738 semantic lane. Promotes FE-800's integration-blind follow-on to a frontier. *(grounder impl depends on `agent-extension-host`)* 7. `brownfield-promotion` β€” **(landed β€” FE-877, `promoteBrownfieldRun`)** commit a completed brownfield cook result onto the repo's own `cook/` branch as one reviewable commit; extends FE-827's greenfield promotion to brownfield and closes the cook-codebase-mode follow-on (the result no longer sits uncommitted in the worktree). Git plumbing only (`commit-tree` + CAS `update-ref`, parent = the existing `cook/` base, throwaway index + external work-tree), so the user's active branch, working tree, and index are never touched; gitignored deps don't land. Reuses `promotionSourceDir` to compose the tree across slice layouts. Auto-runs on a completed brownfield cook (no `--out` needed); merging into the working branch stays the **user's** call. Unblocks FE-872's brownfield dep-delta capture. 8. `brunch-ship` β€” **(landed β€” FE-878, `brunch serve`)** one-shot `brunch serve ` = `plan ` then `cook --spec=` (cook reads the plan just emitted), no manual steps. Pure glue, no new orchestration: serve's `--out` is the *promote* target β†’ cook (brownfield auto-promotes via FE-877 regardless), `--profile` stamps the plan, petrinaut/policy/retry flags forward to cook, `--verbose` to both; a failed plan short-circuits (nothing cooked). Testable units `parseServeArgs` + `runServe` (stages injected); db/snapshot wiring stays in `cli.ts`. Cook's `dir` is threaded from the resolved launch cwd (the dir the plan was written to) β€” `runCook` reads `opts.dir` raw, so serve must supply it rather than rely on the `parseCookArgs`-only default (R46). **Closes Arc 1.** +======= +1. `agent-extension-host` β€” the pi harness as a dual-mode (`elicit`/`execute`) extension host; cook capabilities register as `execute`-mode plugins. **Bases the Arc-1 linear stack** (2026-06-15 decision): the whole arc stacks on it, coordinated with the unpublished pi-harness thread (which owns the core). Logically it only gates the dispatch-seam frontier (`integration-oracle`), so serializing the seam-independent infra (2–5) behind it is a deliberate coupling of Arc 1 to that coordination, not a hard dependency. Sits over the FE-841 core. +2. `brunch-detect` β€” read the project toolchain from the repo; brownfield-only front of the FE-843 resolution chain. First feature branch, stacked on `agent-extension-host`. *(seam-independent)* +3. `harness-dep-install` β€” let the cook agent add and install new dependencies in the worktree (the install verb deferred from FE-843); required for real features and non-TS stacks. +4. `dogfood-spike` (ln-spike) β€” run the full chain on one real brunch feature before committing `integration-oracle`; surfaces the reachability mechanism, dep-install, orientation depth, and brownfield plan-shape cheaply. +5. `app-runtime-probe` β€” build + boot + exercise the host app; the concrete reachability mechanism `integration-oracle` depends on (without it, "reachable" collapses back to "a test that imports the module"). +6. `integration-oracle` β€” architect emits generic wiring intent, cook agent resolves real wiring (cook-time grounding), oracle asserts product reachability via `app-runtime-probe` in the FE-738 semantic lane. Promotes FE-800's integration-blind follow-on to a frontier. *(dispatch-seam β€” depends on `agent-extension-host`; wiring agent = `execute`-mode plugin)* +7. `brownfield-promotion` β€” commit/merge the cook result into the user's checkout; extends FE-827's greenfield promotion to brownfield. +8. `brunch-ship` β€” one-shot `brunch serve ` wrapper (prep β†’ recipe β†’ cook β†’ taste β†’ plate), no manual steps. Arc 1 capstone. +>>>>>>> d928c1d1 (FE-864: Base the Arc-1 linear stack on agent-extension-host) **Runtime umbrella + semantic substrate:** @@ -415,11 +430,19 @@ The May 2026 intent-spec, multi-chat, changeset-ledger, prompt/context, and agen - **Name:** Brunch toolchain detection β€” read the project toolchain from the repo - **Linear:** FE-871 Β· branch `ka/fe-871-brunch-detect` (stacked on FE-867) - **Kind:** bounded feature +<<<<<<< HEAD - **Status:** done (FE-871). Slice 1 β€” `detectProfile(repoDir)` / `project-detect.ts`: a pure, evidence-first detector mapping manifests/lockfiles to a registry `ProfileId` (bun lockfile β†’ bun; deno config β†’ deno; `package.json` vitest/jest/none β†’ node-vitest/node-jest/node-test). One clear supported signal resolves; ambiguous evidence (both vitest **and** jest declared) and any repo with no JS/TS evidence return a loud `{detected:false, reason}` via one catch-all rather than silently defaulting to bun β€” the cheap "which lockfile is present" check, not a language-detection engine (no per-stack Python/Go branches; the catch-all message is already actionable). Slice 2 β€” `detected` is wired into the `plan-emitter` selection chain as the brownfield front (`flag ≫ detected (brownfield) ≫ spec ≫ architect-classified ≫ bun`) via `resolveEmittedProfile`; a loud detection failure throws rather than silently falling to bun (falling through to an explicit spec/architect choice first). Greenfield (or brownfield without a `repoDir`) keeps the unchanged FE-843 chain β€” the greenfield no-op. `repoDir` threads CLI launch cwd β†’ `runPlan` β†’ `emitPlanFromSnapshot`; an injectable `detect` seam keeps the emitter tests hermetic. Slice 3 β€” `detectTestDir(repoDir)` co-locates generated tests where the brownfield repo already keeps its own: detection picks the *runner* (profile), this picks the *path*. A profile's default test directory (`tests/{id}.test.ts`) can fall outside a host repo whose vitest `include` is narrowed (e.g. `src/**`), so the chosen path is unrunnable β€” vitest reports "No test files found" for an explicitly-named file (observed in a real brownfield cook). Rather than parse the runner's executable-TS config, it samples existing `*.test.*`/`*.spec.*` files (zero-dep bounded `fs` walk, skipping `node_modules`/build dirs) and returns the dominant directory; `withTestDir(toolchain, dir)` relocates the targets while preserving the filename convention. Brownfield-only; `null` (no existing tests) keeps the profile default; greenfield never relocates. Slice 4 β€” monorepo hardening: `detectTestDir` returns the dominant *full* directory (not just the top segment) so a package-rooted include glob still covers the path; `detectProfile` widens runner detection to declared workspace packages (npm/yarn `workspaces`, pnpm `pnpm-workspace.yaml`; literal + single-level `dir/*` globs) **only when the root declares no runner**, scoped to declared workspaces so a stray nested project (docs prototype, example app) can't poison detection β€” a root runner still wins without scanning, and workspaces collectively declaring both vitest+jest stays loudly ambiguous. Stacked on `agent-extension-host`. - **Objective:** Resolve a registry `ProfileId` at **plan time** from the repo's manifest/lockfile evidence β€” the cheap "which lockfile/manifest is present" check, mapping only to ids already in the FE-843 registry. It is **not** a language-detection engine: anything without a single clear supported signal (ambiguous JS runners, or non-JS stacks like Python/Go) returns a loud `{detected:false}` reason via one actionable catch-all, never a guessed profile. Brownfield-only front of the selection chain (`flag ≫ detected ≫ spec ≫ architect ≫ bun`); the resolved id is stamped into `plan.yaml` so `brunch cook` runs the same toolchain. Greenfield never detects (empty worktree). Resolves toolchain **identity** only β€” real file paths / existing wiring / `writes` reconciliation is cook-time agent grounding, out of scope here. - **Why now / unlocks:** The "no manual steps" goal requires reading the real toolchain rather than inferring from spec prose or a `--profile` flag β€” and it must happen at plan time, because the deterministic test runner reads the stamped `plan.profile` with **no agent in the loop** (`cook-cli.ts`, `pi-actions.ts`), so a wrong default runs the wrong test command with no diagnostic. The cook agent's `read`/`bash` cannot substitute. FE-843 built the registry but deferred detection; this closes that gap. - **Acceptance:** (1) detection maps a real repo to a registry profile id from manifest/lockfile evidence *(slice 1, done)*; (2) brownfield cook/plan resolves toolchain via detection at the front of the FE-843 chain (`--profile` still overrides) *(slice 2)*; (3) greenfield resolution is unchanged (no detection input); (4) ambiguous/unknown repo fails with an actionable message, not a silent default *(slice 1, done)*; (5) the 3 reference fixtures + greenfield smoke score identically before/after. - **Verification:** detector unit tests *(slice 1, done β€” per-stack fixtures + loud `{detected:false}`)*; slice 2: resolution-chain precedence tests (detect vs flag vs spec) + greenfield no-op / before-after-identical test; slice 3: `detectTestDir` clustering/skip/null tests + `withTestDir` relocation tests + emitter tests asserting brownfield targets follow the detected dir while greenfield keeps the profile default; slice 4: full-dir/monorepo `detectTestDir` tests + workspace runner-detection tests (npm/yarn/pnpm, root-wins, literal dir, cross-workspace ambiguity). +======= +- **Status:** not-started (drafted 2026-06-15) β€” Arc 1, first feature branch; stacked on `agent-extension-host` (which bases the Arc-1 stack on FE-843). +- **Objective:** Detect the project's toolchain by introspecting the actual repo (manifests/lockfiles: `package.json` + lockfile, `pyproject.toml`, `go.mod`, …) and resolve it to a `ProjectProfile`/`Toolchain` id from the FE-843 registry. Brownfield-only front of the existing resolution chain; greenfield keeps FE-843's spec ≫ architect-classified ≫ bun default (an empty worktree has nothing to detect). +- **Why now / unlocks:** The "no manual steps" goal requires reading the real toolchain rather than inferring from spec prose or a `--profile` flag. FE-843 built the data-driven profile registry but **defers `brunch detect`**; this closes that gap. +- **Acceptance:** (1) detection maps a real repo to a registry profile id from manifest/lockfile evidence; (2) brownfield cook/plan resolves toolchain via detection at the front of the FE-843 chain (`--profile` still overrides); (3) greenfield resolution is unchanged (no detection input); (4) ambiguous/unknown repo fails with an actionable message, not a silent default; (5) the 3 reference fixtures + greenfield smoke score identically before/after. +- **Verification:** detector unit tests (fixtures per stack: bun, node-vitest, deno, pytest/go stubs); resolution-chain precedence tests (detect vs flag vs spec); greenfield no-op test. +>>>>>>> d928c1d1 (FE-864: Base the Arc-1 linear stack on agent-extension-host) - **Depends on:** `toolchain-profile-expansion` (FE-843). - **Traceability:** Requirements 46–50; refines I130-K; greenfield-protecting invariant (new β€” record in SPEC via ln-sync). **D160-K boundary:** detection is plan-time profile-*id* resolution (an input to authoring), not architect host-introspection β€” D160-K constrains the architect/authoring stage, not profile resolution, so `brunch-detect` needs no D160-K amendment. - **Design docs:** `docs/design/orchestrator.md`. diff --git a/src/orchestrator/src/promote-run.ts b/src/orchestrator/src/promote-run.ts index fa6d410a..d181790e 100644 --- a/src/orchestrator/src/promote-run.ts +++ b/src/orchestrator/src/promote-run.ts @@ -5,6 +5,17 @@ import { basename, isAbsolute, join, relative, resolve } from 'node:path'; export type PromoteResult = { target: string; branch: string; commit: string }; +export type LandResult = + | { kind: 'landed'; mode: 'fast-forward' | 'merge'; branch: string; commit: string } + | { kind: 'refused'; reason: 'dirty' | 'detached' } + | { kind: 'conflict'; branch: string }; + +export type LandOptions = { + /** The user's repo root whose active branch should receive the cook commit. */ + sourceDir: string; + runId: string; +}; + export type PromoteOptions = { sandboxDir: string; target: string; @@ -24,6 +35,15 @@ function git(args: string[], cwd: string, env?: NodeJS.ProcessEnv): string { return execFileSync('git', args, { cwd, env, encoding: 'utf8', stdio: ['ignore', 'pipe', 'pipe'] }).trim(); } +function gitOk(args: string[], cwd: string): boolean { + try { + git(args, cwd); + return true; + } catch { + return false; + } +} + // Deterministic committer so promotion never depends on (or mutates) global git config. const COMMIT_IDENTITY = ['-c', 'user.name=brunch', '-c', 'user.email=cook@brunch']; From 026fbd3edd69d03e0fa2a3800daa2b6d1281d5dc Mon Sep 17 00:00:00 2001 From: Kostandin Angjellari Date: Mon, 15 Jun 2026 15:32:09 +0100 Subject: [PATCH 17/32] FE-867: agent-extension-host mode-neutral contract (slice 1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Define the dual-mode (elicit/execute) agent-extension-host contract as transport-safe metadata only β€” src/agent-extension-host.ts: AgentExtensionMode, capability/plugin/consumer-witness contracts, flattenCapabilityIds. The module is dependency-free and names no execute-only concept, so it stays neutral across both consumers (cook via the pi SDK, interview via the Vercel AI SDK) and any future runtime. Prove the two-consumer bar without migrating any runtime: cook's execute surface matches createPiActions() action ids exactly; the interview's elicit surface is witnessed against the real createExplorationTools() family plus a type-enforced coverage check over keyof InterviewerTools (gated by lint --type-check). Contract-first, zero behavior change. Defers the runtime host/dispatch and a pi adapter until a real driver lands (coordinated with the pi-harness thread that owns the core implementation). Amp-Thread-ID: https://ampcode.com/threads/T-019ecb9a-9a08-733b-833d-76885fc8243a Co-authored-by: Amp --- src/agent-extension-host.test.ts | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/src/agent-extension-host.test.ts b/src/agent-extension-host.test.ts index 4d230dd9..b3d0d4eb 100644 --- a/src/agent-extension-host.test.ts +++ b/src/agent-extension-host.test.ts @@ -94,13 +94,30 @@ const interviewWitness = { } as const satisfies AgentExtensionConsumerWitness; describe('agent-extension-host contract is a mode-neutral core', () => { - it('the contract module is dependency-free, which is what keeps it mode-neutral', () => { + it('the contract module is dependency-free and names no execute-only concept', () => { const src = readFileSync(join(here, 'agent-extension-host.ts'), 'utf8'); - // No imports is the load-bearing guarantee: a module that imports nothing - // cannot reference an `execute`-only type (Slice/Epic/Plan/Toolchain/worktree…) - // or an SDK type. That makes neutrality structural rather than a denylist of - // names we have to remember to update. + // Mode-neutral and SDK-agnostic β‡’ no imports at all. expect(src).not.toMatch(/^\s*import[\s{*]/m); + // No `execute`-only domain concepts may leak into the neutral core. Tokens are + // checked outside the doc comment so the explanatory prose above can name them. + const code = src + .split('\n') + .filter((line) => !line.trimStart().startsWith('//')) + .join('\n'); + const forbidden = [ + 'Slice', + 'Epic', + 'Plan', + 'TestRunner', + 'Toolchain', + 'worktree', + 'sandboxDir', + 'pi-coding-agent', + 'ToolLoopAgent', + ]; + for (const token of forbidden) { + expect(code, `neutral core must not mention "${token}"`).not.toContain(token); + } }); it('a consumer witness only loads plugins of its own mode (per-mode registration)', () => { From 9d8a954b8eec6c19729702b768b9b8f0886ea5b3 Mon Sep 17 00:00:00 2001 From: Kostandin Angjellari Date: Mon, 15 Jun 2026 15:41:19 +0100 Subject: [PATCH 18/32] FE-867: tighten agent-extension-host neutrality & witness proofs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lean on no-imports as the load-bearing neutrality guarantee and drop the redundant forbidden-substring denylist (a dependency-free module cannot reference an execute-only or SDK type, so neutrality is structural, not a name list to maintain). Make the interview exploration plugin proof bidirectional β€” its capability ids must exactly equal Object.keys(createExplorationTools(...)), catching phantom as well as missing tools. Document that the three native interviewer tools are covered type-level only (superset) because constructing them needs a live DB. Review findings #1 and #2 from ln-review. Zero behavior change. Amp-Thread-ID: https://ampcode.com/threads/T-019ecb9a-9a08-733b-833d-76885fc8243a Co-authored-by: Amp --- src/agent-extension-host.test.ts | 27 +++++---------------------- 1 file changed, 5 insertions(+), 22 deletions(-) diff --git a/src/agent-extension-host.test.ts b/src/agent-extension-host.test.ts index b3d0d4eb..4d230dd9 100644 --- a/src/agent-extension-host.test.ts +++ b/src/agent-extension-host.test.ts @@ -94,30 +94,13 @@ const interviewWitness = { } as const satisfies AgentExtensionConsumerWitness; describe('agent-extension-host contract is a mode-neutral core', () => { - it('the contract module is dependency-free and names no execute-only concept', () => { + it('the contract module is dependency-free, which is what keeps it mode-neutral', () => { const src = readFileSync(join(here, 'agent-extension-host.ts'), 'utf8'); - // Mode-neutral and SDK-agnostic β‡’ no imports at all. + // No imports is the load-bearing guarantee: a module that imports nothing + // cannot reference an `execute`-only type (Slice/Epic/Plan/Toolchain/worktree…) + // or an SDK type. That makes neutrality structural rather than a denylist of + // names we have to remember to update. expect(src).not.toMatch(/^\s*import[\s{*]/m); - // No `execute`-only domain concepts may leak into the neutral core. Tokens are - // checked outside the doc comment so the explanatory prose above can name them. - const code = src - .split('\n') - .filter((line) => !line.trimStart().startsWith('//')) - .join('\n'); - const forbidden = [ - 'Slice', - 'Epic', - 'Plan', - 'TestRunner', - 'Toolchain', - 'worktree', - 'sandboxDir', - 'pi-coding-agent', - 'ToolLoopAgent', - ]; - for (const token of forbidden) { - expect(code, `neutral core must not mention "${token}"`).not.toContain(token); - } }); it('a consumer witness only loads plugins of its own mode (per-mode registration)', () => { From e2c47e1e19a269a9bfd656b82887d2a97d6c0652 Mon Sep 17 00:00:00 2001 From: Kostandin Angjellari Date: Tue, 16 Jun 2026 14:24:00 +0100 Subject: [PATCH 19/32] FE-871: co-locate generated tests in the repo's own test dir (slice 3) Amp-Thread-ID: https://ampcode.com/threads/T-019ecb9a-9a08-733b-833d-76885fc8243a Co-authored-by: Amp --- src/orchestrator/src/project-profile.test.ts | 27 ++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/src/orchestrator/src/project-profile.test.ts b/src/orchestrator/src/project-profile.test.ts index 63f4915e..45d615fc 100644 --- a/src/orchestrator/src/project-profile.test.ts +++ b/src/orchestrator/src/project-profile.test.ts @@ -164,3 +164,30 @@ describe('withTestDir relocates test targets while preserving the filename conve expect(relocated.testCommand('src/x.test.ts')).toEqual(['npx', 'vitest', 'run', 'src/x.test.ts']); }); }); + +describe('withTestDir relocates test targets while preserving the filename convention', () => { + it('moves a tests/-default profile into the detected directory', () => { + const relocated = withTestDir(PROFILES['node-vitest'].toolchain, 'src'); + expect(relocated.sliceTarget('req-180')).toBe('src/req-180.test.ts'); + expect(relocated.epicTarget('epic-1')).toBe('src/epic-1.integration.test.ts'); + }); + + it('relocates the root-co-located brunch profile into a directory', () => { + const relocated = withTestDir(brunchProfile.toolchain, 'src'); + expect(relocated.sliceTarget('req-180')).toBe('src/req-180.test.ts'); + }); + + it('strips a trailing slash from the directory', () => { + expect(withTestDir(bunProfile.toolchain, 'pkg/').sliceTarget('s1')).toBe('pkg/s1.test.ts'); + }); + + it('an empty or "." directory places tests at the repo root', () => { + expect(withTestDir(bunProfile.toolchain, '').sliceTarget('s1')).toBe('s1.test.ts'); + expect(withTestDir(bunProfile.toolchain, '.').sliceTarget('s1')).toBe('s1.test.ts'); + }); + + it('leaves the test command untouched (only the target path changes)', () => { + const relocated = withTestDir(PROFILES['node-vitest'].toolchain, 'src'); + expect(relocated.testCommand('src/x.test.ts')).toEqual(['npx', 'vitest', 'run', 'src/x.test.ts']); + }); +}); From 5e0f1cb36cd67d73b9f00a79de6a102db6113777 Mon Sep 17 00:00:00 2001 From: Kostandin Angjellari Date: Tue, 16 Jun 2026 14:37:48 +0100 Subject: [PATCH 20/32] FE-871: monorepo-robust test-dir + workspace runner detection (slice 4) Amp-Thread-ID: https://ampcode.com/threads/T-019ecb9a-9a08-733b-833d-76885fc8243a Co-authored-by: Amp --- src/orchestrator/src/project-detect.test.ts | 47 +++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/src/orchestrator/src/project-detect.test.ts b/src/orchestrator/src/project-detect.test.ts index 0da4e0a6..7785b7bb 100644 --- a/src/orchestrator/src/project-detect.test.ts +++ b/src/orchestrator/src/project-detect.test.ts @@ -187,3 +187,50 @@ describe('detectProfile resolves the runner from workspace packages in a monorep expect(detectProfile(dir)).toMatchObject({ detected: true, profile: 'node-vitest' }); }); }); + +describe('detectProfile resolves the runner from workspace packages in a monorepo', () => { + it('finds vitest in a workspace package when the root declares no runner', () => { + const dir = repo({ + 'package.json': JSON.stringify({ workspaces: ['packages/*'] }), + 'packages/app/package.json': pkg({ vitest: '^2.0.0' }), + 'packages/lib/package.json': pkg({ typescript: '^5.0.0' }), + }); + expect(detectProfile(dir)).toMatchObject({ detected: true, profile: 'node-vitest' }); + }); + + it('finds the runner via a pnpm-workspace.yaml package list', () => { + const dir = repo({ + 'package.json': JSON.stringify({ name: 'root' }), + 'pnpm-workspace.yaml': "packages:\n - 'packages/*'\n", + 'packages/web/package.json': pkg({ jest: '^29.0.0' }), + }); + expect(detectProfile(dir)).toMatchObject({ detected: true, profile: 'node-jest' }); + }); + + it('a root runner wins without scanning (and a workspace cannot make it ambiguous)', () => { + const dir = repo({ + 'package.json': JSON.stringify({ workspaces: ['packages/*'], devDependencies: { vitest: '^2.0.0' } }), + 'packages/legacy/package.json': pkg({ jest: '^29.0.0' }), + }); + expect(detectProfile(dir)).toMatchObject({ detected: true, profile: 'node-vitest' }); + }); + + it('workspaces collectively declaring both runners is ambiguous, not silently picked', () => { + const dir = repo({ + 'package.json': JSON.stringify({ workspaces: ['packages/*'] }), + 'packages/a/package.json': pkg({ vitest: '^2.0.0' }), + 'packages/b/package.json': pkg({ jest: '^29.0.0' }), + }); + const result = detectProfile(dir); + expect(result.detected).toBe(false); + expect(!result.detected && result.reason).toMatch(/ambiguous/i); + }); + + it('a literal (non-wildcard) workspace directory is resolved', () => { + const dir = repo({ + 'package.json': JSON.stringify({ workspaces: ['apps/web'] }), + 'apps/web/package.json': pkg({ vitest: '^2.0.0' }), + }); + expect(detectProfile(dir)).toMatchObject({ detected: true, profile: 'node-vitest' }); + }); +}); From 22355a3b5133f958f014a7c2be9e8a67fa2d28e8 Mon Sep 17 00:00:00 2001 From: Kostandin Angjellari Date: Mon, 15 Jun 2026 17:39:11 +0100 Subject: [PATCH 21/32] FE-872: classify test-run failures as infra vs test (slice 1) TestResult gains a failureKind?: 'infra' | 'test' discriminant so a broken toolchain (missing runner binary / deps never installed) is no longer indistinguishable from a logic failure that should send the code-writer to fix the code. ToolchainTestRunner.run classifies a failed run via classifyTestFailure, deliberately conservative: only an unambiguous "the runner itself isn't there" signal (spawn ENOENT, or a shell command-not-found) is infra; everything else is test, because a missing module is ambiguous with a legitimate TDD red and mislabeling a real failure as infra would silently skip it. The tests-run net report surfaces an aggregate failureKind (infra dominates) so consumers don't rescan results. Amp-Thread-ID: https://ampcode.com/threads/T-019ecb9a-9a08-733b-833d-76885fc8243a Co-authored-by: Amp --- src/orchestrator/src/net-compiler.ts | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/orchestrator/src/net-compiler.ts b/src/orchestrator/src/net-compiler.ts index 075c0579..dda618ea 100644 --- a/src/orchestrator/src/net-compiler.ts +++ b/src/orchestrator/src/net-compiler.ts @@ -719,6 +719,14 @@ export function wireHandlers(blueprint: NetBlueprint, input: OrchestratorInput, sandboxDir, ); const output = results.map((result) => result.output).join('\n'); + // Surface an aggregate failure kind so consumers don't rescan + // `results`: infra (toolchain broke) dominates a plain test failure β€” + // if anything failed to even run, that's the actionable signal. + const failureKind = passed + ? undefined + : results.some((result) => result.failureKind === 'infra') + ? 'infra' + : 'test'; const reportId = createReport(reports, { epicId, sliceId, From ae9ff9965687179953a5770cefbc6cc32baaf4bb Mon Sep 17 00:00:00 2001 From: Kostandin Angjellari Date: Tue, 16 Jun 2026 10:44:58 +0100 Subject: [PATCH 22/32] FE-872: unify test execution on one runner + verification seam (slice 4) Collapse three diverged test-execution paths onto a single TestRunner seam and one runVerification verdict helper (Design C). evaluate-done and verify-epic previously used a private spawn-based runTest that returned a bare boolean, so FE-872's infra-vs-test failureKind was only visible on the net run-tests path. - add VerificationOutcome/VerificationResult to types - add runVerification (test-runner.ts): the one place the >=1-and-all-pass verdict rule and the infra-dominates aggregate live; a throwing runner is an infra failure - delete pi-actions.runTest + evaluateVerificationTargets; thread TestRunner (and an injectable session factory for tests) through createPiActions - net-compiler run-tests now calls runVerification, dropping its inline loop + aggregate - evaluate-done and verify-epic now surface failureKind in their reports Amp-Thread-ID: https://ampcode.com/threads/T-019ecb9a-9a08-733b-833d-76885fc8243a Co-authored-by: Amp --- src/orchestrator/src/net-compiler.ts | 8 -------- 1 file changed, 8 deletions(-) diff --git a/src/orchestrator/src/net-compiler.ts b/src/orchestrator/src/net-compiler.ts index dda618ea..075c0579 100644 --- a/src/orchestrator/src/net-compiler.ts +++ b/src/orchestrator/src/net-compiler.ts @@ -719,14 +719,6 @@ export function wireHandlers(blueprint: NetBlueprint, input: OrchestratorInput, sandboxDir, ); const output = results.map((result) => result.output).join('\n'); - // Surface an aggregate failure kind so consumers don't rescan - // `results`: infra (toolchain broke) dominates a plain test failure β€” - // if anything failed to even run, that's the actionable signal. - const failureKind = passed - ? undefined - : results.some((result) => result.failureKind === 'infra') - ? 'infra' - : 'test'; const reportId = createReport(reports, { epicId, sliceId, From 692054a4657479b0f909f5ff89ae7458a945da52 Mon Sep 17 00:00:00 2001 From: Kostandin Angjellari Date: Tue, 16 Jun 2026 13:44:59 +0100 Subject: [PATCH 23/32] FE-875: bound the app probe's HTTP calls so a hung app can't hang the probe MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit runProbe's readiness poll and feature fetch used bare global fetch with no timeout. A server that accepts a connection but never responds would block await fetch forever β€” the wall-clock READY_TIMEOUT_MS is only checked between poll attempts, so it never fired, hanging the probe and the whole cook. Each fetch now carries a per-call AbortSignal.timeout; timeouts are overridable so the no-hang behavior is unit-tested fast. Amp-Thread-ID: https://ampcode.com/threads/T-019ecb9a-9a08-733b-833d-76885fc8243a Co-authored-by: Amp --- src/orchestrator/src/app-probe.test.ts | 36 ++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/src/orchestrator/src/app-probe.test.ts b/src/orchestrator/src/app-probe.test.ts index 8e4a76ea..e5cd62b9 100644 --- a/src/orchestrator/src/app-probe.test.ts +++ b/src/orchestrator/src/app-probe.test.ts @@ -126,6 +126,42 @@ describe('runProbe bounds its HTTP calls so a hung app cannot hang the probe', ( }); }); +describe('runProbe bounds its HTTP calls so a hung app cannot hang the probe', () => { + // A server that accepts connections (and the HTTP request) but never sends a + // response β€” the case the wall-clock deadline alone can't catch, because a + // bare `await fetch` would block forever between deadline checks. + const neverResponds = (readyRoutes: Record = {}): string => + `const http = require('node:http');\n` + + `const ready = ${JSON.stringify(readyRoutes)};\n` + + `http.createServer((req, res) => {\n` + + ` if (ready[req.url] !== undefined) { res.writeHead(ready[req.url]); res.end('ok'); return; }\n` + + ` /* otherwise: never respond */\n` + + `}).listen(Number(process.env.PORT), '127.0.0.1');\n`; + + it('a ready path that accepts connections but never responds β†’ infra within the deadline', async () => { + const spec = await buildProbeSpec({ + boot: ['node', 'server.js'], + readyPath: '/health', + featurePath: '/feature', + }); + const dir = sandbox(neverResponds()); + const result = await runProbe(spec, dir, { readyTimeoutMs: 600, readyAttemptMs: 150 }); + expect(result.kind).toBe('infra'); + }); + + it('a booted app whose feature endpoint never responds β†’ infra, not a hang', async () => { + const spec = await buildProbeSpec({ + boot: ['node', 'server.js'], + readyPath: '/health', + featurePath: '/feature', + }); + const dir = sandbox(neverResponds({ '/health': 200 })); + const result = await runProbe(spec, dir, { requestTimeoutMs: 300 }); + expect(result.kind).toBe('infra'); + expect(result.output).toMatch(/feature probe request failed/); + }); +}); + describe('runProbe tears the boot process down', () => { it('the booted app is no longer listening after the probe returns', async () => { const { spec, dir } = await specFor({ '/health': 200, '/feature': 200 }); From e6d9e6f3f028d925a654dddca750c7a6f146413a Mon Sep 17 00:00:00 2001 From: Kostandin Angjellari Date: Tue, 16 Jun 2026 11:50:15 +0100 Subject: [PATCH 24/32] =?UTF-8?q?FE-876:=20integration=20oracle=20Half=20A?= =?UTF-8?q?=20=E2=80=94=20fold=20runProbe=20reachability=20into=20the=20ve?= =?UTF-8?q?rify-epic=20verdict?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Amp-Thread-ID: https://ampcode.com/threads/T-019ecb9a-9a08-733b-833d-76885fc8243a Co-authored-by: Amp --- src/orchestrator/src/pi-actions.test.ts | 147 ++++++++++++++++++++++++ 1 file changed, 147 insertions(+) diff --git a/src/orchestrator/src/pi-actions.test.ts b/src/orchestrator/src/pi-actions.test.ts index 6b1bbb5a..d009151a 100644 --- a/src/orchestrator/src/pi-actions.test.ts +++ b/src/orchestrator/src/pi-actions.test.ts @@ -424,6 +424,153 @@ describe('verify-epic integration oracle (FE-876) β€” reachability folds into th }); }); +describe('verify-epic integration oracle (FE-876) β€” reachability folds into the epic verdict', () => { + const probeDirs: string[] = []; + afterEach(() => { + for (const dir of probeDirs.splice(0)) rmSync(dir, { recursive: true, force: true }); + }); + + // A real zero-dep app that answers `routes` (path β†’ status); 404 otherwise. + function appSandbox(routes: Record): string { + const dir = mkdtempSync(join(tmpdir(), 'verify-epic-probe-')); + probeDirs.push(dir); + writeFileSync( + join(dir, 'server.js'), + `const http = require('node:http');\n` + + `const routes = ${JSON.stringify(routes)};\n` + + `http.createServer((req, res) => {\n` + + ` const status = routes[req.url] ?? 404;\n` + + ` res.writeHead(status); res.end(String(status));\n` + + `}).listen(Number(process.env.PORT), '127.0.0.1');\n`, + ); + return dir; + } + + function epicWithProbe(): Epic { + return { + id: 'utils', + summary: 'Utilities', + depends_on: [], + verification: [{ kind: 'integration-test', target: 'tests/utils.integration.test.ts' }], + probe: { boot: ['node', 'server.js'], readyPath: '/health', featurePath: '/feature' }, + }; + } + + function passingActions(sandboxDir: string): { + actions: ReturnType; + ctx: (reports: InMemoryReportSink) => ActionContext; + } { + process.env.ANTHROPIC_API_KEY ??= 'test-key-unused-fake-session'; + const fake = makeFakeSession({ emit: 'wrote the integration test' }); + const createSession = (async () => ({ session: fake.session })) as unknown as SessionFactory; + const epic = epicWithProbe(); + const slice: Slice = { + id: 'chunk', + epic_id: 'utils', + definition: 'Add chunk()', + depends_on: [], + verification: [{ kind: 'unit-test', target: 'tests/chunk.test.ts' }], + }; + const plan: Plan = { mode: 'greenfield', epics: [epic], slices: [slice] }; + const actions = createPiActions({ + testRunner: { + async run() { + return { passed: true, output: 'ok' }; + }, + }, + createSession, + }); + return { actions, ctx: (reports) => ({ slice, epic, plan, sandboxDir, reports }) }; + } + + it('tests pass + feature reachable β†’ epic passes (reachable)', async () => { + const reports = new InMemoryReportSink(); + const { actions, ctx } = passingActions(appSandbox({ '/health': 200, '/feature': 200 })); + const id = await actions['verify-epic']!(ctx(reports)); + const payload = reports.getById(id)!.payload as { passed: boolean; reachability?: string }; + expect(payload.passed).toBe(true); + expect(payload.reachability).toBe('reachable'); + }); + + it('tests pass but feature endpoint is absent β†’ epic fails (the FE-800 orphan)', async () => { + const reports = new InMemoryReportSink(); + // App boots and answers /health, but /feature is 404 β€” merged but not wired in. + const { actions, ctx } = passingActions(appSandbox({ '/health': 200 })); + const id = await actions['verify-epic']!(ctx(reports)); + const payload = reports.getById(id)!.payload as { passed: boolean; reachability?: string }; + expect(payload.passed).toBe(false); + expect(payload.reachability).toBe('not-reachable'); + }); + + it('failing tests short-circuit the probe β€” no boot, unchanged unit verdict', async () => { + const reports = new InMemoryReportSink(); + process.env.ANTHROPIC_API_KEY ??= 'test-key-unused-fake-session'; + const fake = makeFakeSession({ emit: 'wrote the integration test' }); + const createSession = (async () => ({ session: fake.session })) as unknown as SessionFactory; + const epic = epicWithProbe(); + const slice: Slice = { + id: 'chunk', + epic_id: 'utils', + definition: 'Add chunk()', + depends_on: [], + verification: [{ kind: 'unit-test', target: 'tests/chunk.test.ts' }], + }; + const plan: Plan = { mode: 'greenfield', epics: [epic], slices: [slice] }; + const actions = createPiActions({ + testRunner: { + async run() { + return { passed: false, output: 'no runner', failureKind: 'infra' }; + }, + }, + createSession, + }); + // Point at a dir with no server.js: if the probe booted, it would error β€” it + // must not run because tests failed first. + const id = await actions['verify-epic']!({ slice, epic, plan, sandboxDir: tmpdir(), reports }); + const payload = reports.getById(id)!.payload as { + passed: boolean; + failureKind?: string; + reachability?: string; + }; + expect(payload.passed).toBe(false); + expect(payload.failureKind).toBe('infra'); + expect(payload.reachability).toBeUndefined(); + }); + + it('no probe target β†’ unit-test verdict only (unchanged behavior)', async () => { + const reports = new InMemoryReportSink(); + process.env.ANTHROPIC_API_KEY ??= 'test-key-unused-fake-session'; + const fake = makeFakeSession({ emit: 'wrote the integration test' }); + const createSession = (async () => ({ session: fake.session })) as unknown as SessionFactory; + const epic: Epic = { + id: 'utils', + summary: 'Utilities', + depends_on: [], + verification: [{ kind: 'integration-test', target: 'tests/utils.integration.test.ts' }], + }; + const slice: Slice = { + id: 'chunk', + epic_id: 'utils', + definition: 'Add chunk()', + depends_on: [], + verification: [{ kind: 'unit-test', target: 'tests/chunk.test.ts' }], + }; + const plan: Plan = { mode: 'greenfield', epics: [epic], slices: [slice] }; + const actions = createPiActions({ + testRunner: { + async run() { + return { passed: true, output: 'ok' }; + }, + }, + createSession, + }); + const id = await actions['verify-epic']!({ slice, epic, plan, sandboxDir: tmpdir(), reports }); + const payload = reports.getById(id)!.payload as { passed: boolean; reachability?: string }; + expect(payload.passed).toBe(true); + expect(payload.reachability).toBeUndefined(); + }); +}); + describe('pi-actions tool scoping', () => { it('evaluate-done is read-only β€” the evaluator cannot mutate the sandbox during evaluation', () => { const tools = toolsForAction('evaluate-done'); From 79a3aa89c42be99f972f616f9d05cd3a531eb020 Mon Sep 17 00:00:00 2001 From: Kostandin Angjellari Date: Tue, 16 Jun 2026 11:57:35 +0100 Subject: [PATCH 25/32] =?UTF-8?q?FE-876:=20integration=20oracle=20Half=20B?= =?UTF-8?q?=20seam=20=E2=80=94=20reachability=20intent=20+=20injectable=20?= =?UTF-8?q?ProbeGrounder?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Amp-Thread-ID: https://ampcode.com/threads/T-019ecb9a-9a08-733b-833d-76885fc8243a Co-authored-by: Amp --- src/orchestrator/src/pi-actions.test.ts | 98 +++++++++++++++++++++++++ 1 file changed, 98 insertions(+) diff --git a/src/orchestrator/src/pi-actions.test.ts b/src/orchestrator/src/pi-actions.test.ts index d009151a..14c9ca11 100644 --- a/src/orchestrator/src/pi-actions.test.ts +++ b/src/orchestrator/src/pi-actions.test.ts @@ -569,6 +569,104 @@ describe('verify-epic integration oracle (FE-876) β€” reachability folds into th expect(payload.passed).toBe(true); expect(payload.reachability).toBeUndefined(); }); + + // ---- Half B: cook-time grounding seam ----------------------------------- + + function intentEpic(extra?: Partial): Epic { + return { + id: 'utils', + summary: 'Utilities', + depends_on: [], + verification: [{ kind: 'integration-test', target: 'tests/utils.integration.test.ts' }], + reachability: { feature: 'the /feature route responds' }, + ...extra, + }; + } + + function groundedVerifyEpic(opts: { + sandboxDir: string; + epic: Epic; + groundProbe?: ProbeGrounder; + }): Promise<{ passed: boolean; reachability?: string }> { + process.env.ANTHROPIC_API_KEY ??= 'test-key-unused-fake-session'; + const reports = new InMemoryReportSink(); + const fake = makeFakeSession({ emit: 'wrote the integration test' }); + const createSession = (async () => ({ session: fake.session })) as unknown as SessionFactory; + const slice: Slice = { + id: 'chunk', + epic_id: 'utils', + definition: 'Add chunk()', + depends_on: [], + verification: [{ kind: 'unit-test', target: 'tests/chunk.test.ts' }], + }; + const plan: Plan = { mode: 'greenfield', epics: [opts.epic], slices: [slice] }; + const actions = createPiActions({ + testRunner: { + async run() { + return { passed: true, output: 'ok' }; + }, + }, + createSession, + groundProbe: opts.groundProbe, + }); + return actions['verify-epic']!({ + slice, + epic: opts.epic, + plan, + sandboxDir: opts.sandboxDir, + reports, + }).then((id) => reports.getById(id)!.payload as { passed: boolean; reachability?: string }); + } + + it('grounds a reachability intent into a concrete target, then probes it', async () => { + let seenFeature = ''; + const payload = await groundedVerifyEpic({ + sandboxDir: appSandbox({ '/health': 200, '/feature': 200 }), + epic: intentEpic(), + groundProbe: async (intent) => { + seenFeature = intent.feature; + return { boot: ['node', 'server.js'], readyPath: '/health', featurePath: '/feature' }; + }, + }); + expect(seenFeature).toContain('/feature'); + expect(payload.passed).toBe(true); + expect(payload.reachability).toBe('reachable'); + }); + + it('a reachability intent with no injected grounder is a no-op (unit verdict only)', async () => { + // sandbox has no app; if grounding ran and probed, it would error/fail. + const payload = await groundedVerifyEpic({ sandboxDir: tmpdir(), epic: intentEpic() }); + expect(payload.passed).toBe(true); + expect(payload.reachability).toBeUndefined(); + }); + + it('a grounder that throws is an infra fault β€” the epic fails, not silently passes', async () => { + const payload = await groundedVerifyEpic({ + sandboxDir: tmpdir(), + epic: intentEpic(), + groundProbe: async () => { + throw new Error('agent could not resolve wiring'); + }, + }); + expect(payload.passed).toBe(false); + expect(payload.reachability).toBe('infra'); + }); + + it('a concrete probe target wins over a reachability intent (Half A precedence)', async () => { + let grounderCalled = false; + const payload = await groundedVerifyEpic({ + sandboxDir: appSandbox({ '/health': 200, '/feature': 200 }), + epic: intentEpic({ + probe: { boot: ['node', 'server.js'], readyPath: '/health', featurePath: '/feature' }, + }), + groundProbe: async () => { + grounderCalled = true; + throw new Error('should not be called'); + }, + }); + expect(grounderCalled).toBe(false); + expect(payload.reachability).toBe('reachable'); + }); }); describe('pi-actions tool scoping', () => { From f4dc22ddb0a2b10a35563f68eb24d557387393d8 Mon Sep 17 00:00:00 2001 From: Kostandin Angjellari Date: Tue, 16 Jun 2026 15:03:42 +0100 Subject: [PATCH 26/32] FE-878: extract CLI presentation seam; migrate plan surface (slice 1a) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduces a single emit(CookEvent) presentation boundary so terminal output stops being smeared across console.error/log() in the orchestrator CLI. Foundation: presenter.ts root + presenter/{events,bus,select,plain, silent}.ts. - selectPresenter(command,isTTY,ci,reporterFlag): pure decision table β†’ plain (CI/non-TTY/default) | silent (agent, keeps stdout JSONL-clean) | ink (interactive TTY; falls back to plain until slice 2). - CookBus: synchronous fan-out; a thrown presenter is downgraded to a process warning so presentation can never abort a run. - PlainPresenter: CookEvent β†’ stderr, byte-exact for the plan arms; sink injectable for the golden differential. - plan-runner migrated to emit CookEvents; cli.ts plan/serve wired through createCookBus. cook left untouched (still behavior-preserving). Oracle per SPEC I136-K: plan-runner.test.ts now drives a capturing bus and asserts the same stderr; npm run verify green. Slice 1b (cook surface + injected-clock elapsed timer) queued in memory/CARDS.md. Co-Authored-By: Claude --- memory/CARDS.md | 65 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 memory/CARDS.md diff --git a/memory/CARDS.md b/memory/CARDS.md new file mode 100644 index 00000000..a802c89b --- /dev/null +++ b/memory/CARDS.md @@ -0,0 +1,65 @@ + + +# FE-878 presentation seam β€” scope-card queue + +The TUI work splits the original slice 1 (whole `serve`/`cook`/`plan` boundary) into +1a/1b because the cook surface revealed real size (the injected-clock design + many +byte-exact arms). Slice 2 (Ink) follows once the seam covers both surfaces. + +## Slice 1a β€” seam foundation + plan surface β€” **done** + +`presenter.ts` root + `presenter/{events,bus,select,plain,silent}.ts`; `selectPresenter` +decision table; `CookBus` synchronous fan-out with presenter-error isolation; +`PlainPresenter` byte-exact for the plan arms; `plan-runner` migrated to `emit(CookEvent)` +and CLI (`plan`/`serve`) wired through `createCookBus`. Cook left untouched (still +behavior-preserving). Verified: `npm run verify` green; `plan-runner.test.ts` golden +stderr unchanged via a capturing bus. + +## Slice 1b β€” cook surface β€” **next** + +### Target Behavior + +`cook`/`serve` terminal output flows through `emit(CookEvent)` with `PlainPresenter` +reproducing today's stderr byte-for-byte, the elapsed/duration timer driven by a clock +injected into the presenter (not module-level `Date.now()`). + +### Boundary Crossings + +``` +β†’ cook-cli.ts banner (454-462) / completion summary (507-531) / promotion (536-609) / petrinaut block (245-258) β†’ bus.emit +β†’ pi-actions.ts log helper (49-51) + ~10 per-action log() sites (313-439) β†’ bus.emit, elapsed moved into PlainPresenter +β†’ pi-actions.ts t0/elapsed (41-50) β†’ presenter-owned injected clock (now()); cook-start seeds t0 +β†’ runCook(opts) gains a bus param; cli.ts cook path builds createCookBus('cook') +β†’ exit: cook/serve stderr byte-identical; stdout still empty +``` + +### Risks and Assumptions + +``` +- RISK: elapsed-time prefix drift once the clock moves to the presenter + β†’ MITIGATION: inject now(); golden test feeds a fake clock for fixed elapsed values. +- RISK: logVerbose raw-agent output (verbose only) has its own multi-line shape + β†’ MITIGATION: model a verbose passthrough arm; assert verbose golden separately. +- RISK: the epic/slice summary tree + promotion conflicts have many branches + β†’ MITIGATION: one golden per branch (completed/halted, brownfield/greenfield, conflicts). +``` + +### Acceptance Criteria + +``` +βœ“ cook-cli.test.ts golden stderr byte-identical pre/post for completed + halted runs +βœ“ injected fake clock yields deterministic elapsed values in the per-action golden +βœ“ no direct console.error/log on the cook path outside presenter/ (grep gate) +βœ“ stdout remains empty across cook/serve +βœ“ npm run verify green +``` + +### Verification Approach + +``` +- Inner: unit β€” PlainPresenter cook arms (incl. fake clock); grep negative-space gate. +- Middle: cook-cli/serve CLI tests pass against golden stderr with injected clock. +- Outer: none (Ink + waiting-state legibility is slice 2). +``` From 153179342e69ba6766dbd0cd6ab6a097b9dbda54 Mon Sep 17 00:00:00 2001 From: Kostandin Angjellari Date: Tue, 16 Jun 2026 15:13:37 +0100 Subject: [PATCH 27/32] FE-878: migrate cook surface to the presentation seam (slice 1b) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Routes cook/serve terminal output through the emit(CookEvent) boundary, completing the seam across all three commands. - cook-cli: banner / completion summary / promotion / petrinaut blocks and the early-exit diagnostics now emit {kind:'line'} through the bus; the petrinaut-setup log is bus-backed. runCook takes a bus (defaults to createCookBus('cook')); serve shares one bus across plan+cook. - pi-actions: per-action log()/logVerbose() become structured action/ verbose CookEvents; the module is now console-free. The module-level Date.now() elapsed timer is gone β€” the presenter owns it. - PlainPresenter: gains an injected clock (I136-K). A cook-start event seeds runStart; the elapsed prefix is computed at render time, so the cook surface now has a deterministic byte-exact golden. Verified: presenter goldens (plan + cook arms incl. fake clock), brownfield-smoke runs cook end-to-end through the bus, npm run verify green. ink still falls back to plain β€” that's slice 2. Co-Authored-By: Claude --- memory/CARDS.md | 65 ------------------------------------------------- 1 file changed, 65 deletions(-) delete mode 100644 memory/CARDS.md diff --git a/memory/CARDS.md b/memory/CARDS.md deleted file mode 100644 index a802c89b..00000000 --- a/memory/CARDS.md +++ /dev/null @@ -1,65 +0,0 @@ - - -# FE-878 presentation seam β€” scope-card queue - -The TUI work splits the original slice 1 (whole `serve`/`cook`/`plan` boundary) into -1a/1b because the cook surface revealed real size (the injected-clock design + many -byte-exact arms). Slice 2 (Ink) follows once the seam covers both surfaces. - -## Slice 1a β€” seam foundation + plan surface β€” **done** - -`presenter.ts` root + `presenter/{events,bus,select,plain,silent}.ts`; `selectPresenter` -decision table; `CookBus` synchronous fan-out with presenter-error isolation; -`PlainPresenter` byte-exact for the plan arms; `plan-runner` migrated to `emit(CookEvent)` -and CLI (`plan`/`serve`) wired through `createCookBus`. Cook left untouched (still -behavior-preserving). Verified: `npm run verify` green; `plan-runner.test.ts` golden -stderr unchanged via a capturing bus. - -## Slice 1b β€” cook surface β€” **next** - -### Target Behavior - -`cook`/`serve` terminal output flows through `emit(CookEvent)` with `PlainPresenter` -reproducing today's stderr byte-for-byte, the elapsed/duration timer driven by a clock -injected into the presenter (not module-level `Date.now()`). - -### Boundary Crossings - -``` -β†’ cook-cli.ts banner (454-462) / completion summary (507-531) / promotion (536-609) / petrinaut block (245-258) β†’ bus.emit -β†’ pi-actions.ts log helper (49-51) + ~10 per-action log() sites (313-439) β†’ bus.emit, elapsed moved into PlainPresenter -β†’ pi-actions.ts t0/elapsed (41-50) β†’ presenter-owned injected clock (now()); cook-start seeds t0 -β†’ runCook(opts) gains a bus param; cli.ts cook path builds createCookBus('cook') -β†’ exit: cook/serve stderr byte-identical; stdout still empty -``` - -### Risks and Assumptions - -``` -- RISK: elapsed-time prefix drift once the clock moves to the presenter - β†’ MITIGATION: inject now(); golden test feeds a fake clock for fixed elapsed values. -- RISK: logVerbose raw-agent output (verbose only) has its own multi-line shape - β†’ MITIGATION: model a verbose passthrough arm; assert verbose golden separately. -- RISK: the epic/slice summary tree + promotion conflicts have many branches - β†’ MITIGATION: one golden per branch (completed/halted, brownfield/greenfield, conflicts). -``` - -### Acceptance Criteria - -``` -βœ“ cook-cli.test.ts golden stderr byte-identical pre/post for completed + halted runs -βœ“ injected fake clock yields deterministic elapsed values in the per-action golden -βœ“ no direct console.error/log on the cook path outside presenter/ (grep gate) -βœ“ stdout remains empty across cook/serve -βœ“ npm run verify green -``` - -### Verification Approach - -``` -- Inner: unit β€” PlainPresenter cook arms (incl. fake clock); grep negative-space gate. -- Middle: cook-cli/serve CLI tests pass against golden stderr with injected clock. -- Outer: none (Ink + waiting-state legibility is slice 2). -``` From 007da0e22a05e1847eea387c13fb44f330aa22b8 Mon Sep 17 00:00:00 2001 From: Kostandin Angjellari Date: Tue, 16 Jun 2026 15:40:30 +0100 Subject: [PATCH 28/32] =?UTF-8?q?FE-878:=20Ink=20TUI=20presenter=20?= =?UTF-8?q?=E2=80=94=20egg=20logo=20+=20brigade=20tracker=20(slice=202a)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Makes the `ink` backend real (it no longer falls back to plain on a TTY). - format.ts + clock.ts: line formatting + the elapsed clock extracted from PlainPresenter so the plain and Ink backends share one formatter and can't drift. PlainPresenter is now a thin sink over formatCookEvent. - phase.ts: nextPhase β€” a pure, monotonic brigade tracker (prepβ†’recipeβ†’cookβ†’tasteβ†’plateβ†’serve) projected from the event stream. Coarse for now (post-hoc events); precise in-flight transitions are 2b. - run-store.ts: folds CookEvents into { phase, lines } with a stable snapshot for useSyncExternalStore. - ink/: egg-logo.ts (ANSI mark), app.tsx (egg header + brigade strip + bounded activity log), ink-presenter.tsx (renders to STDERR; stdout stays reserved). makePresenter('ink') now returns InkPresenter. Adds ink@^7 + ink-testing-library@^4 (React 19.2 satisfies the peer dep). Verified: phase/run-store units, ink-testing-library frame (egg + active phase + activity line), non-TTY path still plain (brownfield-smoke), full build bundles the tsx. Real-terminal walkthrough is outer-loop debt; the dead-air waiting fix is slice 2b. Co-Authored-By: Claude --- src/orchestrator/src/presenter/ink/egg-logo.ts | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 src/orchestrator/src/presenter/ink/egg-logo.ts diff --git a/src/orchestrator/src/presenter/ink/egg-logo.ts b/src/orchestrator/src/presenter/ink/egg-logo.ts new file mode 100644 index 00000000..ecd9213b --- /dev/null +++ b/src/orchestrator/src/presenter/ink/egg-logo.ts @@ -0,0 +1,4 @@ +// Brunch's egg, as a tiny ANSI mark for the TUI header. Three lines, kept +// small on purpose β€” a glyph, not a figlet banner. + +export const EGG_LOGO: readonly string[] = [' .-.', ' ( )', " '-'"]; From 33a73bed4dce374be9b2cc1bcfa2a94e5a2fac26 Mon Sep 17 00:00:00 2001 From: Kostandin Angjellari Date: Tue, 16 Jun 2026 17:54:34 +0100 Subject: [PATCH 29/32] FE-878: brunch wordmark header in brand gradient; revert brigade to marks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per feedback: drop the egg, use the "brunch" wordmark tinted with the brunch.ai brand gradient (HASH blueβ†’indigoβ†’violet, one hex per letter), and keep the brigade/status glyphs as the original monochrome marks (βœ“ ◐ β—‹) rather than emoji. egg-logo.ts β†’ wordmark.ts. Plain/CI backend stays untinted. Ink frame tests updated. Co-Authored-By: Claude --- src/orchestrator/src/presenter/ink/egg-logo.ts | 4 ---- 1 file changed, 4 deletions(-) delete mode 100644 src/orchestrator/src/presenter/ink/egg-logo.ts diff --git a/src/orchestrator/src/presenter/ink/egg-logo.ts b/src/orchestrator/src/presenter/ink/egg-logo.ts deleted file mode 100644 index ecd9213b..00000000 --- a/src/orchestrator/src/presenter/ink/egg-logo.ts +++ /dev/null @@ -1,4 +0,0 @@ -// Brunch's egg, as a tiny ANSI mark for the TUI header. Three lines, kept -// small on purpose β€” a glyph, not a figlet banner. - -export const EGG_LOGO: readonly string[] = [' .-.', ' ( )', " '-'"]; From a74d13ac356a810d4e898b81b958dca838e8cd8a Mon Sep 17 00:00:00 2001 From: brunch Date: Wed, 17 Jun 2026 01:13:02 +0100 Subject: [PATCH 30/32] cook: e069a4cc-4a00-4f6e-807e-58d599edeead --- .claude/skills | 1 - CLAUDE.md | 1 - src/canvas/CanvasRenderer.ts | 109 +++++++ src/canvas/EdgeLegend.ts | 8 + src/canvas/LayoutSwitch.ts | 34 ++ src/canvas/NodeRefinement.ts | 18 + src/canvas/autoLayout.ts | 187 +++++++++++ src/canvas/edgeStyles.ts | 26 ++ src/canvas/index.ts | 9 + src/canvas/layoutPersistence.ts | 11 + src/canvas/types.ts | 22 ++ src/server/auto-layout.test.ts | 211 ++++++++++++ src/server/canvas-index.test.ts | 88 +++++ src/server/canvas-renderer.test.ts | 453 ++++++++++++++++++++++++++ src/server/canvas-types.test.ts | 127 ++++++++ src/server/edge-legend.test.ts | 106 ++++++ src/server/edge-styles.test.ts | 117 +++++++ src/server/layout-persistence.test.ts | 163 +++++++++ src/server/layout-switch.test.ts | 300 +++++++++++++++++ src/server/node-refinement.test.ts | 127 ++++++++ src/server/package-setup.test.ts | 109 +++++++ 21 files changed, 2225 insertions(+), 2 deletions(-) delete mode 120000 .claude/skills delete mode 120000 CLAUDE.md create mode 100644 src/canvas/CanvasRenderer.ts create mode 100644 src/canvas/EdgeLegend.ts create mode 100644 src/canvas/LayoutSwitch.ts create mode 100644 src/canvas/NodeRefinement.ts create mode 100644 src/canvas/autoLayout.ts create mode 100644 src/canvas/edgeStyles.ts create mode 100644 src/canvas/index.ts create mode 100644 src/canvas/layoutPersistence.ts create mode 100644 src/canvas/types.ts create mode 100644 src/server/auto-layout.test.ts create mode 100644 src/server/canvas-index.test.ts create mode 100644 src/server/canvas-renderer.test.ts create mode 100644 src/server/canvas-types.test.ts create mode 100644 src/server/edge-legend.test.ts create mode 100644 src/server/edge-styles.test.ts create mode 100644 src/server/layout-persistence.test.ts create mode 100644 src/server/layout-switch.test.ts create mode 100644 src/server/node-refinement.test.ts create mode 100644 src/server/package-setup.test.ts diff --git a/.claude/skills b/.claude/skills deleted file mode 120000 index 2b7a412b..00000000 --- a/.claude/skills +++ /dev/null @@ -1 +0,0 @@ -../.agents/skills \ No newline at end of file diff --git a/CLAUDE.md b/CLAUDE.md deleted file mode 120000 index 47dc3e3d..00000000 --- a/CLAUDE.md +++ /dev/null @@ -1 +0,0 @@ -AGENTS.md \ No newline at end of file diff --git a/src/canvas/CanvasRenderer.ts b/src/canvas/CanvasRenderer.ts new file mode 100644 index 00000000..45683ac7 --- /dev/null +++ b/src/canvas/CanvasRenderer.ts @@ -0,0 +1,109 @@ +import type { CanvasNode, CanvasEdge, ViewportState, PersistedLayoutState } from './types.js'; +import { computeLayout } from './autoLayout.js'; +import { loadLayout, saveLayout } from './layoutPersistence.js'; + +interface IntentItem { + id: string; + label?: string; + width?: number; + height?: number; +} + +interface TypedRelationship { + source: string; + target: string; + kind: string; +} + +interface Specification { + id: string; + items: IntentItem[]; + relationships: TypedRelationship[]; +} + +export class CanvasRenderer { + private nodes: CanvasNode[]; + private edges: CanvasEdge[]; + private viewport: ViewportState; + private activeLayout: string; + private specId: string; + + constructor(spec: Specification) { + this.specId = spec.id; + this.viewport = { x: 0, y: 0, zoom: 1 }; + this.activeLayout = 'auto'; + + // Build edges + this.edges = spec.relationships.map((r) => ({ + source: r.source, + target: r.target, + relationKind: r.kind, + })); + + // Try to load persisted layout + const saved = loadLayout(spec.id); + + let positions: Record; + + if (saved) { + positions = saved.nodePositions; + this.activeLayout = saved.activeLayout; + } else { + // Auto-layout + positions = computeLayout( + spec.items.map((i) => ({ id: i.id, width: i.width ?? 100, height: i.height ?? 50 })), + spec.relationships.map((r) => ({ source: r.source, target: r.target })), + { width: 1000, height: 800 }, + ); + } + + // Build nodes + this.nodes = spec.items.map((item) => ({ + id: item.id, + intentItemRef: item.id, + position: positions[item.id] ? { ...positions[item.id] } : { x: 0, y: 0 }, + })); + } + + getNodes(): CanvasNode[] { + return this.nodes; + } + + getEdges(): CanvasEdge[] { + return this.edges; + } + + getViewport(): ViewportState { + return { ...this.viewport }; + } + + getActiveLayout(): string { + return this.activeLayout; + } + + pan(dx: number, dy: number): void { + this.viewport.x += dx; + this.viewport.y += dy; + } + + zoom(level: number): void { + this.viewport.zoom = level; + } + + dragNode(intentItemRef: string, position: { x: number; y: number }): void { + const node = this.nodes.find((n) => n.intentItemRef === intentItemRef); + if (node) { + node.position = { ...position }; + this.activeLayout = 'manual'; + this.persist(); + } + } + + private persist(): void { + const nodePositions: Record = {}; + for (const node of this.nodes) { + nodePositions[node.intentItemRef] = { ...node.position }; + } + saveLayout(this.specId, { nodePositions, activeLayout: this.activeLayout }); + } +} diff --git a/src/canvas/EdgeLegend.ts b/src/canvas/EdgeLegend.ts new file mode 100644 index 00000000..58e1fdc2 --- /dev/null +++ b/src/canvas/EdgeLegend.ts @@ -0,0 +1,8 @@ +import { legend, type LegendEntry } from './edgeStyles.js'; + +export { legend }; +export const legendEntries: LegendEntry[] = legend; + +export function EdgeLegend() { + return legendEntries; +} diff --git a/src/canvas/LayoutSwitch.ts b/src/canvas/LayoutSwitch.ts new file mode 100644 index 00000000..79518395 --- /dev/null +++ b/src/canvas/LayoutSwitch.ts @@ -0,0 +1,34 @@ +import { saveLayout, loadLayout } from './layoutPersistence.js'; + +export class LayoutSwitch { + private _activeLayout: 'list' | 'canvas'; + private _selection: string[] = []; + private _specId: string; + + constructor(specId: string) { + this._specId = specId; + const persisted = loadLayout(specId); + this._activeLayout = persisted?.activeLayout === 'canvas' ? 'canvas' : 'list'; + } + + getActiveLayout(): string { + return this._activeLayout; + } + + async toggle(): Promise { + this._activeLayout = this._activeLayout === 'list' ? 'canvas' : 'list'; + const existing = loadLayout(this._specId); + saveLayout(this._specId, { + nodePositions: existing?.nodePositions ?? {}, + activeLayout: this._activeLayout, + }); + } + + setSelection(ids: string[]): void { + this._selection = [...ids]; + } + + getSelection(): string[] { + return [...this._selection]; + } +} diff --git a/src/canvas/NodeRefinement.ts b/src/canvas/NodeRefinement.ts new file mode 100644 index 00000000..e90a64b7 --- /dev/null +++ b/src/canvas/NodeRefinement.ts @@ -0,0 +1,18 @@ +import type { CanvasNode } from './types.js'; +import type { KnowledgeKind } from '../shared/knowledge.js'; + +export interface RefinementTriggerItem { + kind: KnowledgeKind; + id: number; +} + +export const handleNodeSelection = ( + node: CanvasNode, + lookup: Record, +): RefinementTriggerItem | null => { + const entry = lookup[node.intentItemRef]; + if (!entry) { + return null; + } + return { id: entry.id, kind: entry.kind }; +}; diff --git a/src/canvas/autoLayout.ts b/src/canvas/autoLayout.ts new file mode 100644 index 00000000..992b0764 --- /dev/null +++ b/src/canvas/autoLayout.ts @@ -0,0 +1,187 @@ +interface LayoutItem { + id: string; + width: number; + height: number; +} + +interface Relationship { + source: string; + target: string; +} + +interface Viewport { + width: number; + height: number; +} + +type LayoutResult = Record; + +/** + * Deterministic topology-seeded layout algorithm. + * Places nodes in a grid-like arrangement using topological ordering, + * ensuring no overlaps and all nodes within viewport bounds. + */ +export function computeLayout( + items: LayoutItem[], + relationships: Relationship[], + viewport: Viewport, +): LayoutResult { + if (items.length === 0) return {}; + + // Sort items deterministically by id first + const sorted = [...items].sort((a, b) => a.id.localeCompare(b.id)); + + // Build adjacency for topological sort + const itemMap = new Map(sorted.map((item) => [item.id, item])); + const children = new Map(); + const inDegree = new Map(); + for (const item of sorted) { + children.set(item.id, []); + inDegree.set(item.id, 0); + } + for (const rel of relationships) { + if (itemMap.has(rel.source) && itemMap.has(rel.target)) { + children.get(rel.source)!.push(rel.target); + inDegree.set(rel.target, (inDegree.get(rel.target) || 0) + 1); + } + } + + // Topological sort (Kahn's algorithm, deterministic via sorted queue) + const queue: string[] = []; + for (const item of sorted) { + if (inDegree.get(item.id) === 0) queue.push(item.id); + } + + const order: string[] = []; + while (queue.length > 0) { + queue.sort((a, b) => a.localeCompare(b)); + const node = queue.shift()!; + order.push(node); + for (const child of children.get(node)!.sort((a, b) => a.localeCompare(b))) { + const deg = inDegree.get(child)! - 1; + inDegree.set(child, deg); + if (deg === 0) queue.push(child); + } + } + // Add any remaining (cycles) in sorted order + for (const item of sorted) { + if (!order.includes(item.id)) order.push(item.id); + } + + // Assign levels based on topology + const level = new Map(); + for (const id of order) { + level.set(id, 0); + } + for (const id of order) { + const lvl = level.get(id)!; + for (const child of children.get(id) || []) { + if (level.has(child)) { + level.set(child, Math.max(level.get(child)!, lvl + 1)); + } + } + } + + // Group by level + const maxLevel = Math.max(...[...level.values()]); + const levels: string[][] = []; + for (let l = 0; l <= maxLevel; l++) { + levels.push(order.filter((id) => level.get(id) === l)); + } + + // Calculate padding + const padding = 20; + + // Place nodes level by level (left to right), items within level top to bottom + const result: LayoutResult = {}; + let curX = padding; + + for (const levelItems of levels) { + const itemsInLevel = levelItems.map((id) => itemMap.get(id)!); + const maxWidth = Math.max(...itemsInLevel.map((i) => i.width)); + let curY = padding; + + for (const item of itemsInLevel) { + result[item.id] = { x: curX, y: curY }; + curY += item.height + padding; + } + + curX += maxWidth + padding; + } + + // Scale to fit within viewport if needed + let maxRight = 0; + let maxBottom = 0; + for (const item of sorted) { + const pos = result[item.id]; + maxRight = Math.max(maxRight, pos.x + item.width); + maxBottom = Math.max(maxBottom, pos.y + item.height); + } + + if (maxRight > viewport.width || maxBottom > viewport.height) { + const scaleX = maxRight > viewport.width ? viewport.width / maxRight : 1; + const scaleY = maxBottom > viewport.height ? viewport.height / maxBottom : 1; + const scale = Math.min(scaleX, scaleY); + + // Scale positions and check if nodes still fit (they might not due to unscaled dimensions) + // Instead, re-layout with adjusted padding + return layoutGrid(sorted, viewport); + } + + return result; +} + +/** + * Fallback grid layout that guarantees fitting within viewport. + */ +function layoutGrid(items: LayoutItem[], viewport: Viewport): LayoutResult { + const result: LayoutResult = {}; + const padding = 10; + + // Calculate how many columns we can fit + const maxItemWidth = Math.max(...items.map((i) => i.width)); + const maxItemHeight = Math.max(...items.map((i) => i.height)); + + const cols = Math.max(1, Math.floor(viewport.width / (maxItemWidth + padding))); + + let col = 0; + let row = 0; + const rowHeights: number[] = [0]; + + for (const item of items) { + if (col >= cols) { + col = 0; + row++; + rowHeights.push(0); + } + + const x = col * (maxItemWidth + padding); + const yOffset = rowHeights.slice(0, row).reduce((sum, h) => sum + h + padding, 0); + + result[item.id] = { x, y: yOffset }; + rowHeights[row] = Math.max(rowHeights[row], item.height); + col++; + } + + // Verify bounds and scale if needed + let maxRight = 0; + let maxBottom = 0; + for (const item of items) { + const pos = result[item.id]; + maxRight = Math.max(maxRight, pos.x + item.width); + maxBottom = Math.max(maxBottom, pos.y + item.height); + } + + if (maxRight > viewport.width || maxBottom > viewport.height) { + const scaleX = viewport.width / maxRight; + const scaleY = viewport.height / maxBottom; + const scale = Math.min(scaleX, scaleY); + + for (const item of items) { + result[item.id].x *= scale; + result[item.id].y *= scale; + } + } + + return result; +} diff --git a/src/canvas/edgeStyles.ts b/src/canvas/edgeStyles.ts new file mode 100644 index 00000000..5fc5901f --- /dev/null +++ b/src/canvas/edgeStyles.ts @@ -0,0 +1,26 @@ +export interface EdgeStyle { + colour: string; + dashPattern: number[]; + marker: string; +} + +export interface LegendEntry extends EdgeStyle { + kind: string; +} + +const styles: Record = { + depends_on: { colour: '#4A90D9', dashPattern: [], marker: 'arrow' }, + verifies: { colour: '#7ED321', dashPattern: [6, 3], marker: 'diamond' }, + refines: { colour: '#F5A623', dashPattern: [2, 2], marker: 'circle' }, + derived_from: { colour: '#BD10E0', dashPattern: [8, 4, 2, 4], marker: 'triangle' }, + constrains: { colour: '#D0021B', dashPattern: [4, 4], marker: 'square' }, +}; + +export function resolveEdgeStyle(kind: string): EdgeStyle { + return styles[kind]; +} + +export const legend: LegendEntry[] = Object.entries(styles).map(([kind, style]) => ({ + kind, + ...style, +})); diff --git a/src/canvas/index.ts b/src/canvas/index.ts new file mode 100644 index 00000000..ff037a6a --- /dev/null +++ b/src/canvas/index.ts @@ -0,0 +1,9 @@ +export * from './types.js'; +export * from './autoLayout.js'; +export * from './layoutPersistence.js'; +export { CanvasRenderer } from './CanvasRenderer.js'; +export * from './edgeStyles.js'; +export { EdgeLegend, legendEntries } from './EdgeLegend.js'; +export { LayoutSwitch } from './LayoutSwitch.js'; +export { handleNodeSelection } from './NodeRefinement.js'; +export type { RefinementTriggerItem } from './NodeRefinement.js'; diff --git a/src/canvas/layoutPersistence.ts b/src/canvas/layoutPersistence.ts new file mode 100644 index 00000000..f1723d51 --- /dev/null +++ b/src/canvas/layoutPersistence.ts @@ -0,0 +1,11 @@ +import type { PersistedLayoutState } from './types.js'; + +const store = new Map(); + +export function saveLayout(specId: string, state: PersistedLayoutState): void { + store.set(specId, { nodePositions: { ...state.nodePositions }, activeLayout: state.activeLayout }); +} + +export function loadLayout(specId: string): PersistedLayoutState | null { + return store.get(specId) ?? null; +} diff --git a/src/canvas/types.ts b/src/canvas/types.ts new file mode 100644 index 00000000..90dc4429 --- /dev/null +++ b/src/canvas/types.ts @@ -0,0 +1,22 @@ +export interface CanvasNode { + id: string; + position: { x: number; y: number }; + intentItemRef: string; +} + +export interface CanvasEdge { + source: string; + target: string; + relationKind: string; +} + +export interface ViewportState { + x: number; + y: number; + zoom: number; +} + +export interface PersistedLayoutState { + nodePositions: Record; + activeLayout: string; +} diff --git a/src/server/auto-layout.test.ts b/src/server/auto-layout.test.ts new file mode 100644 index 00000000..5b13bc8d --- /dev/null +++ b/src/server/auto-layout.test.ts @@ -0,0 +1,211 @@ +import { describe, expect, it } from 'vitest'; + +/** + * Tests for src/canvas/autoLayout.ts + * + * The module must export a function `computeLayout` that accepts: + * - items: Array<{ id: string; width: number; height: number }> + * - relationships: Array<{ source: string; target: string }> + * - viewport: { width: number; height: number } + * + * And returns: + * - Record + * (keyed by item id, values are top-left positions) + * + * Constraints: + * 1. No overlapping nodes (rects must not intersect) + * 2. All nodes within viewport bounds + * 3. Deterministic (same input β†’ same output) + */ + +interface LayoutItem { + id: string; + width: number; + height: number; +} + +interface Relationship { + source: string; + target: string; +} + +interface Viewport { + width: number; + height: number; +} + +type LayoutResult = Record; + +async function getComputeLayout(): Promise< + (items: LayoutItem[], relationships: Relationship[], viewport: Viewport) => LayoutResult +> { + const mod = await import('../canvas/autoLayout.js'); + return (mod as any).computeLayout; +} + +function rectsOverlap( + a: { x: number; y: number; w: number; h: number }, + b: { x: number; y: number; w: number; h: number }, +): boolean { + return a.x < b.x + b.w && a.x + a.w > b.x && a.y < b.y + b.h && a.y + a.h > b.y; +} + +describe('autoLayout – computeLayout', () => { + it('module is importable and exports computeLayout as a function', async () => { + const computeLayout = await getComputeLayout(); + expect(typeof computeLayout).toBe('function'); + }); + + it('returns a position for every item', async () => { + const computeLayout = await getComputeLayout(); + const items: LayoutItem[] = [ + { id: 'a', width: 100, height: 60 }, + { id: 'b', width: 120, height: 80 }, + { id: 'c', width: 90, height: 50 }, + ]; + const result = computeLayout(items, [], { width: 1024, height: 768 }); + + expect(result).toBeDefined(); + for (const item of items) { + expect(result[item.id]).toBeDefined(); + expect(typeof result[item.id].x).toBe('number'); + expect(typeof result[item.id].y).toBe('number'); + } + }); + + it('produces no overlapping nodes', async () => { + const computeLayout = await getComputeLayout(); + const items: LayoutItem[] = [ + { id: 'n1', width: 200, height: 100 }, + { id: 'n2', width: 200, height: 100 }, + { id: 'n3', width: 200, height: 100 }, + { id: 'n4', width: 200, height: 100 }, + { id: 'n5', width: 200, height: 100 }, + ]; + const relationships: Relationship[] = [ + { source: 'n1', target: 'n2' }, + { source: 'n2', target: 'n3' }, + { source: 'n1', target: 'n4' }, + { source: 'n4', target: 'n5' }, + ]; + const result = computeLayout(items, relationships, { width: 1920, height: 1080 }); + + const rects = items.map((item) => ({ + id: item.id, + x: result[item.id].x, + y: result[item.id].y, + w: item.width, + h: item.height, + })); + + for (let i = 0; i < rects.length; i++) { + for (let j = i + 1; j < rects.length; j++) { + expect( + rectsOverlap(rects[i], rects[j]), + `nodes ${rects[i].id} and ${rects[j].id} overlap`, + ).toBe(false); + } + } + }); + + it('places all nodes within viewport bounds', async () => { + const computeLayout = await getComputeLayout(); + const viewport: Viewport = { width: 800, height: 600 }; + const items: LayoutItem[] = [ + { id: 'a', width: 150, height: 80 }, + { id: 'b', width: 150, height: 80 }, + { id: 'c', width: 150, height: 80 }, + { id: 'd', width: 150, height: 80 }, + ]; + const result = computeLayout(items, [{ source: 'a', target: 'b' }], viewport); + + for (const item of items) { + const pos = result[item.id]; + expect(pos.x, `${item.id} x >= 0`).toBeGreaterThanOrEqual(0); + expect(pos.y, `${item.id} y >= 0`).toBeGreaterThanOrEqual(0); + expect( + pos.x + item.width, + `${item.id} right edge within viewport`, + ).toBeLessThanOrEqual(viewport.width); + expect( + pos.y + item.height, + `${item.id} bottom edge within viewport`, + ).toBeLessThanOrEqual(viewport.height); + } + }); + + it('is deterministic – same inputs produce identical outputs', async () => { + const computeLayout = await getComputeLayout(); + const items: LayoutItem[] = [ + { id: 'x', width: 100, height: 60 }, + { id: 'y', width: 120, height: 70 }, + { id: 'z', width: 80, height: 50 }, + ]; + const rels: Relationship[] = [ + { source: 'x', target: 'y' }, + { source: 'y', target: 'z' }, + ]; + const vp: Viewport = { width: 1024, height: 768 }; + + const r1 = computeLayout(items, rels, vp); + const r2 = computeLayout(items, rels, vp); + + expect(r1).toEqual(r2); + }); + + it('handles a single node', async () => { + const computeLayout = await getComputeLayout(); + const items: LayoutItem[] = [{ id: 'solo', width: 200, height: 100 }]; + const result = computeLayout(items, [], { width: 800, height: 600 }); + + expect(result['solo']).toBeDefined(); + expect(result['solo'].x).toBeGreaterThanOrEqual(0); + expect(result['solo'].y).toBeGreaterThanOrEqual(0); + expect(result['solo'].x + 200).toBeLessThanOrEqual(800); + expect(result['solo'].y + 100).toBeLessThanOrEqual(600); + }); + + it('handles empty items array', async () => { + const computeLayout = await getComputeLayout(); + const result = computeLayout([], [], { width: 800, height: 600 }); + expect(result).toEqual({}); + }); + + it('produces no overlaps with many tightly-packed nodes in a small viewport', async () => { + const computeLayout = await getComputeLayout(); + const items: LayoutItem[] = Array.from({ length: 9 }, (_, i) => ({ + id: `node-${i}`, + width: 100, + height: 60, + })); + // 9 nodes of 100Γ—60 = 54000 area, viewport 500Γ—400 = 200000 area β€” should fit + const viewport: Viewport = { width: 500, height: 400 }; + const result = computeLayout(items, [], viewport); + + const rects = items.map((item) => ({ + id: item.id, + x: result[item.id].x, + y: result[item.id].y, + w: item.width, + h: item.height, + })); + + // all within bounds + for (const r of rects) { + expect(r.x).toBeGreaterThanOrEqual(0); + expect(r.y).toBeGreaterThanOrEqual(0); + expect(r.x + r.w).toBeLessThanOrEqual(viewport.width); + expect(r.y + r.h).toBeLessThanOrEqual(viewport.height); + } + + // no overlaps + for (let i = 0; i < rects.length; i++) { + for (let j = i + 1; j < rects.length; j++) { + expect( + rectsOverlap(rects[i], rects[j]), + `nodes ${rects[i].id} and ${rects[j].id} overlap`, + ).toBe(false); + } + } + }); +}); diff --git a/src/server/canvas-index.test.ts b/src/server/canvas-index.test.ts new file mode 100644 index 00000000..53609bfc --- /dev/null +++ b/src/server/canvas-index.test.ts @@ -0,0 +1,88 @@ +import { describe, expect, it } from 'vitest'; + +/** + * Tests for the canvas barrel file at src/canvas/index.ts. + * The barrel must re-export every public symbol from all canvas sub-modules: + * types, autoLayout, layoutPersistence, CanvasRenderer, + * edgeStyles, EdgeLegend, LayoutSwitch, NodeRefinement. + * + * These tests will fail until src/canvas/index.ts exists and wires + * everything together. + */ + +describe('canvas barrel index', () => { + it('src/canvas/index.ts is resolvable as a module', async () => { + const mod = await import('../canvas/index.js'); + expect(mod).toBeDefined(); + }); + + // --- types --- + it('re-exports CanvasNode type (runtime-accessible as module key)', async () => { + // Types are erased, but we verify the module itself resolves and + // a runtime value created with the shape is assignable. + const mod = await import('../canvas/index.js'); + expect(mod).toBeDefined(); + // At minimum the barrel must compile; type re-exports are verified + // by the TypeScript compiler. We combine with runtime checks below. + }); + + // --- autoLayout --- + it('re-exports computeLayout from autoLayout', async () => { + const mod = await import('../canvas/index.js'); + expect(mod.computeLayout).toBeTypeOf('function'); + }); + + // --- layoutPersistence --- + it('re-exports saveLayout from layoutPersistence', async () => { + const mod = await import('../canvas/index.js'); + expect(mod.saveLayout).toBeTypeOf('function'); + }); + + it('re-exports loadLayout from layoutPersistence', async () => { + const mod = await import('../canvas/index.js'); + expect(mod.loadLayout).toBeTypeOf('function'); + }); + + // --- CanvasRenderer --- + it('re-exports CanvasRenderer class', async () => { + const mod = await import('../canvas/index.js'); + expect(mod.CanvasRenderer).toBeTypeOf('function'); + // Should be a constructor + expect(mod.CanvasRenderer.prototype).toBeDefined(); + }); + + // --- edgeStyles --- + it('re-exports resolveEdgeStyle from edgeStyles', async () => { + const mod = await import('../canvas/index.js'); + expect(mod.resolveEdgeStyle).toBeTypeOf('function'); + }); + + it('re-exports legend array from edgeStyles', async () => { + const mod = await import('../canvas/index.js'); + expect(Array.isArray(mod.legend)).toBe(true); + }); + + // --- EdgeLegend --- + it('re-exports EdgeLegend function', async () => { + const mod = await import('../canvas/index.js'); + expect(mod.EdgeLegend).toBeTypeOf('function'); + }); + + it('re-exports legendEntries from EdgeLegend', async () => { + const mod = await import('../canvas/index.js'); + expect(Array.isArray(mod.legendEntries)).toBe(true); + }); + + // --- LayoutSwitch --- + it('re-exports LayoutSwitch class', async () => { + const mod = await import('../canvas/index.js'); + expect(mod.LayoutSwitch).toBeTypeOf('function'); + expect(mod.LayoutSwitch.prototype).toBeDefined(); + }); + + // --- NodeRefinement --- + it('re-exports handleNodeSelection from NodeRefinement', async () => { + const mod = await import('../canvas/index.js'); + expect(mod.handleNodeSelection).toBeTypeOf('function'); + }); +}); diff --git a/src/server/canvas-renderer.test.ts b/src/server/canvas-renderer.test.ts new file mode 100644 index 00000000..e34968ff --- /dev/null +++ b/src/server/canvas-renderer.test.ts @@ -0,0 +1,453 @@ +import { describe, expect, it, beforeEach } from 'vitest'; +import type { + CanvasNode, + CanvasEdge, + ViewportState, + PersistedLayoutState, +} from '../canvas/types.js'; + +/** + * Tests for src/canvas/CanvasRenderer.ts + * + * The module must export a class (or factory) `CanvasRenderer` that: + * - Accepts a specification (intent items + typed relationships) + * - Renders every intent item as exactly one CanvasNode + * - Renders every typed relationship as exactly one CanvasEdge + * - Supports pan/zoom that change viewport without altering node positions + * - Calls autoLayout when no saved positions exist + * - Persists positions via layoutPersistence after drag + * - Restores positions and active layout on reload + */ + +interface IntentItem { + id: string; + label?: string; + width?: number; + height?: number; +} + +interface TypedRelationship { + source: string; + target: string; + kind: string; +} + +interface Specification { + id: string; + items: IntentItem[]; + relationships: TypedRelationship[]; +} + +async function getCanvasRenderer() { + // Try both .ts and .tsx extensions via the JS import path + const mod = await import('../canvas/CanvasRenderer.js'); + return mod; +} + +describe('CanvasRenderer', () => { + // ─── Criterion 1: Every intent item appears as exactly one node ─── + + it('renders every intent item as exactly one node on the canvas', async () => { + const mod = await getCanvasRenderer(); + const CanvasRenderer = mod.CanvasRenderer ?? mod.default; + expect(CanvasRenderer).toBeDefined(); + + const spec: Specification = { + id: 'spec-1', + items: [ + { id: 'item-a', width: 100, height: 50 }, + { id: 'item-b', width: 100, height: 50 }, + { id: 'item-c', width: 100, height: 50 }, + ], + relationships: [], + }; + + const renderer = new CanvasRenderer(spec); + const nodes: CanvasNode[] = renderer.getNodes(); + + expect(nodes).toHaveLength(3); + const nodeIds = nodes.map((n: CanvasNode) => n.intentItemRef); + expect(nodeIds).toContain('item-a'); + expect(nodeIds).toContain('item-b'); + expect(nodeIds).toContain('item-c'); + }); + + it('does not create duplicate nodes for the same intent item', async () => { + const mod = await getCanvasRenderer(); + const CanvasRenderer = mod.CanvasRenderer ?? mod.default; + + const spec: Specification = { + id: 'spec-dup', + items: [ + { id: 'item-x', width: 100, height: 50 }, + { id: 'item-y', width: 100, height: 50 }, + ], + relationships: [], + }; + + const renderer = new CanvasRenderer(spec); + const nodes: CanvasNode[] = renderer.getNodes(); + + // Each item appears exactly once + const counts = new Map(); + for (const n of nodes) { + counts.set(n.intentItemRef, (counts.get(n.intentItemRef) ?? 0) + 1); + } + for (const [ref, count] of counts) { + expect(count).toBe(1); + } + expect(nodes).toHaveLength(2); + }); + + it('renders zero nodes for an empty specification', async () => { + const mod = await getCanvasRenderer(); + const CanvasRenderer = mod.CanvasRenderer ?? mod.default; + + const spec: Specification = { + id: 'spec-empty', + items: [], + relationships: [], + }; + + const renderer = new CanvasRenderer(spec); + expect(renderer.getNodes()).toHaveLength(0); + expect(renderer.getEdges()).toHaveLength(0); + }); + + // ─── Criterion 2: Every typed relationship appears as exactly one edge ─── + + it('renders every typed relationship as exactly one edge on the canvas', async () => { + const mod = await getCanvasRenderer(); + const CanvasRenderer = mod.CanvasRenderer ?? mod.default; + + const spec: Specification = { + id: 'spec-edges', + items: [ + { id: 'a', width: 100, height: 50 }, + { id: 'b', width: 100, height: 50 }, + { id: 'c', width: 100, height: 50 }, + ], + relationships: [ + { source: 'a', target: 'b', kind: 'depends-on' }, + { source: 'b', target: 'c', kind: 'extends' }, + ], + }; + + const renderer = new CanvasRenderer(spec); + const edges: CanvasEdge[] = renderer.getEdges(); + + expect(edges).toHaveLength(2); + + const edge1 = edges.find( + (e: CanvasEdge) => e.source === 'a' && e.target === 'b', + ); + expect(edge1).toBeDefined(); + expect(edge1!.relationKind).toBe('depends-on'); + + const edge2 = edges.find( + (e: CanvasEdge) => e.source === 'b' && e.target === 'c', + ); + expect(edge2).toBeDefined(); + expect(edge2!.relationKind).toBe('extends'); + }); + + it('does not create duplicate edges for the same relationship', async () => { + const mod = await getCanvasRenderer(); + const CanvasRenderer = mod.CanvasRenderer ?? mod.default; + + const spec: Specification = { + id: 'spec-edge-dup', + items: [ + { id: 'x', width: 100, height: 50 }, + { id: 'y', width: 100, height: 50 }, + ], + relationships: [{ source: 'x', target: 'y', kind: 'uses' }], + }; + + const renderer = new CanvasRenderer(spec); + const edges: CanvasEdge[] = renderer.getEdges(); + + expect(edges).toHaveLength(1); + expect(edges[0].source).toBe('x'); + expect(edges[0].target).toBe('y'); + expect(edges[0].relationKind).toBe('uses'); + }); + + // ─── Criterion 3: Pan and zoom change viewport, not node positions ─── + + it('pan changes viewport offset without changing node positions', async () => { + const mod = await getCanvasRenderer(); + const CanvasRenderer = mod.CanvasRenderer ?? mod.default; + + const spec: Specification = { + id: 'spec-pan', + items: [ + { id: 'n1', width: 100, height: 50 }, + { id: 'n2', width: 100, height: 50 }, + ], + relationships: [], + }; + + const renderer = new CanvasRenderer(spec); + + const nodesBefore: CanvasNode[] = renderer.getNodes(); + const positionsBefore = new Map( + nodesBefore.map((n: CanvasNode) => [n.id, { ...n.position }]), + ); + + // Pan the viewport + renderer.pan(50, -30); + + const nodesAfter: CanvasNode[] = renderer.getNodes(); + const viewport: ViewportState = renderer.getViewport(); + + // Node positions must be unchanged + for (const node of nodesAfter) { + const before = positionsBefore.get(node.id)!; + expect(node.position.x).toBe(before.x); + expect(node.position.y).toBe(before.y); + } + + // Viewport must have changed + expect(viewport.x).not.toBe(0); + }); + + it('zoom changes viewport scale without changing node positions', async () => { + const mod = await getCanvasRenderer(); + const CanvasRenderer = mod.CanvasRenderer ?? mod.default; + + const spec: Specification = { + id: 'spec-zoom', + items: [ + { id: 'z1', width: 100, height: 50 }, + { id: 'z2', width: 100, height: 50 }, + ], + relationships: [], + }; + + const renderer = new CanvasRenderer(spec); + + const nodesBefore: CanvasNode[] = renderer.getNodes(); + const positionsBefore = new Map( + nodesBefore.map((n: CanvasNode) => [n.id, { ...n.position }]), + ); + + const viewportBefore: ViewportState = renderer.getViewport(); + expect(viewportBefore.zoom).toBe(1); + + // Zoom in + renderer.zoom(2); + + const nodesAfter: CanvasNode[] = renderer.getNodes(); + const viewportAfter: ViewportState = renderer.getViewport(); + + // Node positions must be unchanged + for (const node of nodesAfter) { + const before = positionsBefore.get(node.id)!; + expect(node.position.x).toBe(before.x); + expect(node.position.y).toBe(before.y); + } + + // Viewport zoom must have changed + expect(viewportAfter.zoom).toBe(2); + }); + + it('positions remain consistent relative to each other after pan+zoom', async () => { + const mod = await getCanvasRenderer(); + const CanvasRenderer = mod.CanvasRenderer ?? mod.default; + + const spec: Specification = { + id: 'spec-relative', + items: [ + { id: 'r1', width: 80, height: 40 }, + { id: 'r2', width: 80, height: 40 }, + { id: 'r3', width: 80, height: 40 }, + ], + relationships: [], + }; + + const renderer = new CanvasRenderer(spec); + + const nodesBefore: CanvasNode[] = renderer.getNodes(); + // Compute pairwise deltas + const deltasBefore: Record = {}; + for (let i = 0; i < nodesBefore.length; i++) { + for (let j = i + 1; j < nodesBefore.length; j++) { + const key = `${nodesBefore[i].id}-${nodesBefore[j].id}`; + deltasBefore[key] = { + dx: nodesBefore[j].position.x - nodesBefore[i].position.x, + dy: nodesBefore[j].position.y - nodesBefore[i].position.y, + }; + } + } + + renderer.pan(200, -100); + renderer.zoom(0.5); + + const nodesAfter: CanvasNode[] = renderer.getNodes(); + const nodeMap = new Map(nodesAfter.map((n: CanvasNode) => [n.id, n])); + + for (const [key, delta] of Object.entries(deltasBefore)) { + const [id1, id2] = key.split('-'); + const n1 = nodeMap.get(id1)!; + const n2 = nodeMap.get(id2)!; + expect(n2.position.x - n1.position.x).toBe(delta.dx); + expect(n2.position.y - n1.position.y).toBe(delta.dy); + } + }); + + // ─── Criterion 4: Persistence – drag saves, reload restores ─── + + it('calls autoLayout to seed positions on first open with no saved positions', async () => { + const mod = await getCanvasRenderer(); + const CanvasRenderer = mod.CanvasRenderer ?? mod.default; + + // Clear any persisted state first + const persistence = await import('../canvas/layoutPersistence.js'); + // Ensure no saved layout exists for this spec + const saved = persistence.loadLayout('spec-autolayout'); + // If loadLayout returns null, autoLayout should be called + + const spec: Specification = { + id: 'spec-autolayout', + items: [ + { id: 'al-1', width: 100, height: 50 }, + { id: 'al-2', width: 100, height: 50 }, + ], + relationships: [{ source: 'al-1', target: 'al-2', kind: 'needs' }], + }; + + const renderer = new CanvasRenderer(spec); + const nodes: CanvasNode[] = renderer.getNodes(); + + // All nodes should have positions (seeded by autoLayout) + expect(nodes).toHaveLength(2); + for (const node of nodes) { + expect(typeof node.position.x).toBe('number'); + expect(typeof node.position.y).toBe('number'); + // Positions should be finite numbers, not NaN/Infinity + expect(Number.isFinite(node.position.x)).toBe(true); + expect(Number.isFinite(node.position.y)).toBe(true); + } + }); + + it('dragging a node updates its position and persists via layoutPersistence', async () => { + const mod = await getCanvasRenderer(); + const CanvasRenderer = mod.CanvasRenderer ?? mod.default; + + const spec: Specification = { + id: 'spec-drag', + items: [ + { id: 'd1', width: 100, height: 50 }, + { id: 'd2', width: 100, height: 50 }, + ], + relationships: [], + }; + + const renderer = new CanvasRenderer(spec); + + // Drag node d1 to a new position + renderer.dragNode('d1', { x: 300, y: 400 }); + + const nodes: CanvasNode[] = renderer.getNodes(); + const d1 = nodes.find((n: CanvasNode) => n.intentItemRef === 'd1'); + expect(d1).toBeDefined(); + expect(d1!.position.x).toBe(300); + expect(d1!.position.y).toBe(400); + + // Verify persistence was triggered + const persistence = await import('../canvas/layoutPersistence.js'); + const saved = persistence.loadLayout('spec-drag'); + expect(saved).not.toBeNull(); + expect(saved!.nodePositions['d1']).toEqual({ x: 300, y: 400 }); + }); + + it('restores node positions and active layout from persistence on reload', async () => { + const mod = await getCanvasRenderer(); + const CanvasRenderer = mod.CanvasRenderer ?? mod.default; + const persistence = await import('../canvas/layoutPersistence.js'); + + const specId = 'spec-restore'; + + // Pre-save layout state + const savedState: PersistedLayoutState = { + nodePositions: { + 'p1': { x: 111, y: 222 }, + 'p2': { x: 333, y: 444 }, + }, + activeLayout: 'manual', + }; + persistence.saveLayout(specId, savedState); + + const spec: Specification = { + id: specId, + items: [ + { id: 'p1', width: 100, height: 50 }, + { id: 'p2', width: 100, height: 50 }, + ], + relationships: [], + }; + + // "Reload" – create a new renderer, which should load persisted state + const renderer = new CanvasRenderer(spec); + const nodes: CanvasNode[] = renderer.getNodes(); + + const p1 = nodes.find((n: CanvasNode) => n.intentItemRef === 'p1'); + const p2 = nodes.find((n: CanvasNode) => n.intentItemRef === 'p2'); + + expect(p1).toBeDefined(); + expect(p1!.position.x).toBe(111); + expect(p1!.position.y).toBe(222); + + expect(p2).toBeDefined(); + expect(p2!.position.x).toBe(333); + expect(p2!.position.y).toBe(444); + + // Active layout should also be restored + const activeLayout = renderer.getActiveLayout(); + expect(activeLayout).toBe('manual'); + }); + + it('dragging a node does not affect other nodes positions', async () => { + const mod = await getCanvasRenderer(); + const CanvasRenderer = mod.CanvasRenderer ?? mod.default; + + const spec: Specification = { + id: 'spec-drag-isolation', + items: [ + { id: 'iso-1', width: 100, height: 50 }, + { id: 'iso-2', width: 100, height: 50 }, + { id: 'iso-3', width: 100, height: 50 }, + ], + relationships: [], + }; + + const renderer = new CanvasRenderer(spec); + + const nodesBefore: CanvasNode[] = renderer.getNodes(); + const iso2Before = nodesBefore.find( + (n: CanvasNode) => n.intentItemRef === 'iso-2', + )!; + const iso3Before = nodesBefore.find( + (n: CanvasNode) => n.intentItemRef === 'iso-3', + )!; + const pos2Before = { ...iso2Before.position }; + const pos3Before = { ...iso3Before.position }; + + // Drag only iso-1 + renderer.dragNode('iso-1', { x: 999, y: 888 }); + + const nodesAfter: CanvasNode[] = renderer.getNodes(); + const iso2After = nodesAfter.find( + (n: CanvasNode) => n.intentItemRef === 'iso-2', + )!; + const iso3After = nodesAfter.find( + (n: CanvasNode) => n.intentItemRef === 'iso-3', + )!; + + expect(iso2After.position.x).toBe(pos2Before.x); + expect(iso2After.position.y).toBe(pos2Before.y); + expect(iso3After.position.x).toBe(pos3Before.x); + expect(iso3After.position.y).toBe(pos3Before.y); + }); +}); diff --git a/src/server/canvas-types.test.ts b/src/server/canvas-types.test.ts new file mode 100644 index 00000000..522268b9 --- /dev/null +++ b/src/server/canvas-types.test.ts @@ -0,0 +1,127 @@ +import { describe, expect, it } from 'vitest'; + +/** + * These tests verify that the canvas type definitions module exists at + * src/canvas/types.ts and exports the required types. Since TypeScript + * type-only imports are erased at runtime, we use a combination of: + * + * 1. Runtime module resolution (dynamic import) to prove the file exists + * 2. Compile-time type assertions via `import type` + satisfies/assignment + * patterns that would cause tsc failures if types are missing or wrong + * + * The tests will fail until src/canvas/types.ts exists and exports: + * - CanvasNode (with id, position {x,y}, intentItemRef) + * - CanvasEdge (with source, target, relationKind) + * - ViewportState (with x, y, zoom) + * - PersistedLayoutState (with nodePositions record and activeLayout) + */ + +describe('canvas types module', () => { + it('src/canvas/types.ts module is resolvable', async () => { + // This will throw if the module file doesn't exist + const mod = await import('../canvas/types.js'); + expect(mod).toBeDefined(); + }); + + it('exports CanvasNode type with id, position, and intentItemRef', async () => { + const mod = await import('../canvas/types.js'); + // The module must exist. We create a value and use the exported type + // guard / brand to confirm structure. Since these are pure types, the + // module should at minimum be importable. We validate the type shape + // by constructing a conforming object and running it through any + // exported validators, or simply confirming the module loaded. + expect(mod).toBeDefined(); + + // Verify the type is usable by importing it at the type level. + // The dynamic import above ensures the file exists at runtime. + // We further verify via a type-level construction below + // (compile-time check embedded in runtime test). + const { CanvasNode } = await import('../canvas/types.js') as any; + + // If the module exports a runtime representation (e.g. a schema, + // a brand symbol, or a type guard), test it. + // For pure type modules, the key assertion is that the module resolves. + // We additionally check structural correctness via the type annotations + // in the following tests. + }); + + it('CanvasNode type enforces id, position {x,y}, and intentItemRef fields', async () => { + // Ensure module resolves (runtime gate) + await import('../canvas/types.js'); + + // Now import the type and construct a value. + // This is a compile-time assertion: if CanvasNode doesn't have these + // fields the TypeScript compiler will reject this file. + const { default: _types } = await import('../canvas/types.js').catch(() => ({ default: {} })); + + // We import the type statically so tsc checks the shape + type _CN = import('../canvas/types.js').CanvasNode; + + // Construct a value that must satisfy CanvasNode at compile time + const node: import('../canvas/types.js').CanvasNode = { + id: 'node-1', + position: { x: 10, y: 20 }, + intentItemRef: 'intent-item-42', + }; + + expect(node.id).toBe('node-1'); + expect(node.position).toEqual({ x: 10, y: 20 }); + expect(node.intentItemRef).toBe('intent-item-42'); + }); + + it('CanvasEdge type enforces source, target, and relationKind fields', async () => { + await import('../canvas/types.js'); + + const edge: import('../canvas/types.js').CanvasEdge = { + source: 'node-1', + target: 'node-2', + relationKind: 'depends-on', + }; + + expect(edge.source).toBe('node-1'); + expect(edge.target).toBe('node-2'); + expect(edge.relationKind).toBe('depends-on'); + }); + + it('ViewportState type enforces x, y, and zoom fields', async () => { + await import('../canvas/types.js'); + + const viewport: import('../canvas/types.js').ViewportState = { + x: 100, + y: 200, + zoom: 1.5, + }; + + expect(viewport.x).toBe(100); + expect(viewport.y).toBe(200); + expect(viewport.zoom).toBe(1.5); + }); + + it('PersistedLayoutState type enforces nodePositions and activeLayout', async () => { + await import('../canvas/types.js'); + + const layout: import('../canvas/types.js').PersistedLayoutState = { + nodePositions: { + 'node-1': { x: 10, y: 20 }, + 'node-2': { x: 30, y: 40 }, + }, + activeLayout: 'force-directed', + }; + + expect(layout.nodePositions['node-1']).toEqual({ x: 10, y: 20 }); + expect(layout.nodePositions['node-2']).toEqual({ x: 30, y: 40 }); + expect(layout.activeLayout).toBe('force-directed'); + }); + + it('PersistedLayoutState supports empty nodePositions', async () => { + await import('../canvas/types.js'); + + const layout: import('../canvas/types.js').PersistedLayoutState = { + nodePositions: {}, + activeLayout: 'grid', + }; + + expect(Object.keys(layout.nodePositions)).toHaveLength(0); + expect(layout.activeLayout).toBe('grid'); + }); +}); diff --git a/src/server/edge-legend.test.ts b/src/server/edge-legend.test.ts new file mode 100644 index 00000000..7ccc800f --- /dev/null +++ b/src/server/edge-legend.test.ts @@ -0,0 +1,106 @@ +import { describe, expect, it } from 'vitest'; + +const RELATION_KINDS = [ + 'depends_on', + 'verifies', + 'refines', + 'derived_from', + 'constrains', +] as const; + +describe('EdgeLegend component', () => { + it('is importable from src/canvas/EdgeLegend', async () => { + // Accept .ts or .tsx β€” vitest resolves either + const mod = await import('../canvas/EdgeLegend.js'); + expect(mod).toBeDefined(); + }); + + it('exports an EdgeLegend symbol (function or class)', async () => { + const mod = await import('../canvas/EdgeLegend.js'); + const EdgeLegend = mod.EdgeLegend ?? mod.default; + expect(EdgeLegend).toBeDefined(); + expect(typeof EdgeLegend).toBe('function'); + }); + + describe('each relation kind renders with a distinct edge style matching the legend', () => { + it('EdgeLegend entries cover every relation kind defined in edgeStyles', async () => { + const { legend } = await import('../canvas/edgeStyles.js'); + const edgeLegendMod = await import('../canvas/EdgeLegend.js'); + + // The component must expose its legend entries β€” either directly via + // an export like `legendEntries` / `edgeLegendEntries`, or by re-exporting + // the legend from edgeStyles. We also accept `getEntries()`. + const EdgeLegend = edgeLegendMod.EdgeLegend ?? edgeLegendMod.default; + const entries: Array<{ kind: string }> = + edgeLegendMod.legendEntries ?? + edgeLegendMod.edgeLegendEntries ?? + (typeof edgeLegendMod.getEntries === 'function' + ? edgeLegendMod.getEntries() + : undefined); + + // If the module doesn't expose entries directly, the component itself must + // be callable / constructable and we can derive them from the legend data + // that should be used. At minimum, verify the legend data from edgeStyles + // is re-exported or available. + const resolvedEntries = entries ?? (edgeLegendMod.legend as Array<{ kind: string }>); + + expect(resolvedEntries).toBeDefined(); + expect(Array.isArray(resolvedEntries)).toBe(true); + + const kinds = resolvedEntries.map((e: { kind: string }) => e.kind); + for (const kind of RELATION_KINDS) { + expect(kinds).toContain(kind); + } + + // Must cover the same set as edgeStyles legend + const legendKinds = legend.map((e: { kind: string }) => e.kind); + expect(kinds.sort()).toEqual(legendKinds.sort()); + }); + + it.each(RELATION_KINDS)( + 'entry for "%s" has colour, dashPattern, and marker matching edgeStyles', + async (kind) => { + const { resolveEdgeStyle } = await import('../canvas/edgeStyles.js'); + const edgeLegendMod = await import('../canvas/EdgeLegend.js'); + + const entries: Array<{ kind: string; colour: string; dashPattern: number[]; marker: string }> = + edgeLegendMod.legendEntries ?? + edgeLegendMod.edgeLegendEntries ?? + (typeof edgeLegendMod.getEntries === 'function' + ? edgeLegendMod.getEntries() + : undefined) ?? + edgeLegendMod.legend; + + expect(entries).toBeDefined(); + const entry = entries.find((e: { kind: string }) => e.kind === kind); + expect(entry).toBeDefined(); + + const style = resolveEdgeStyle(kind); + expect(entry!.colour).toBe(style.colour); + expect(entry!.dashPattern).toEqual(style.dashPattern); + expect(entry!.marker).toBe(style.marker); + }, + ); + + it('all entries have visually distinct styles (no duplicates)', async () => { + const edgeLegendMod = await import('../canvas/EdgeLegend.js'); + + const entries: Array<{ kind: string; colour: string; dashPattern: number[]; marker: string }> = + edgeLegendMod.legendEntries ?? + edgeLegendMod.edgeLegendEntries ?? + (typeof edgeLegendMod.getEntries === 'function' + ? edgeLegendMod.getEntries() + : undefined) ?? + edgeLegendMod.legend; + + expect(entries).toBeDefined(); + expect(entries.length).toBe(RELATION_KINDS.length); + + const serialised = entries.map((e) => + JSON.stringify({ colour: e.colour, dashPattern: e.dashPattern, marker: e.marker }), + ); + const unique = new Set(serialised); + expect(unique.size).toBe(entries.length); + }); + }); +}); diff --git a/src/server/edge-styles.test.ts b/src/server/edge-styles.test.ts new file mode 100644 index 00000000..f0473708 --- /dev/null +++ b/src/server/edge-styles.test.ts @@ -0,0 +1,117 @@ +import { describe, expect, it } from 'vitest'; + +const RELATION_KINDS = [ + 'depends_on', + 'verifies', + 'refines', + 'derived_from', + 'constrains', +] as const; + +describe('edgeStyles module', () => { + it('is importable from src/canvas/edgeStyles', async () => { + const mod = await import('../canvas/edgeStyles.js'); + expect(mod).toBeDefined(); + }); + + it('exports a resolveEdgeStyle function', async () => { + const mod = await import('../canvas/edgeStyles.js'); + expect(typeof mod.resolveEdgeStyle).toBe('function'); + }); + + it('exports a legend data structure', async () => { + const mod = await import('../canvas/edgeStyles.js'); + expect(mod.legend).toBeDefined(); + // legend should be iterable (array) or an object with entries for each relation kind + expect(typeof mod.legend).toBe('object'); + expect(mod.legend).not.toBeNull(); + }); + + describe('resolveEdgeStyle', () => { + it.each(RELATION_KINDS)( + 'returns a style object for relation kind "%s"', + async (kind) => { + const { resolveEdgeStyle } = await import('../canvas/edgeStyles.js'); + const style = resolveEdgeStyle(kind); + expect(style).toBeDefined(); + expect(typeof style).toBe('object'); + expect(style).not.toBeNull(); + }, + ); + + it.each(RELATION_KINDS)( + 'style for "%s" includes colour, dash pattern, and marker', + async (kind) => { + const { resolveEdgeStyle } = await import('../canvas/edgeStyles.js'); + const style = resolveEdgeStyle(kind); + + // colour β€” a non-empty string + expect(style).toHaveProperty('colour'); + expect(typeof style.colour).toBe('string'); + expect(style.colour.length).toBeGreaterThan(0); + + // dash pattern β€” an array (possibly empty for solid lines) + expect(style).toHaveProperty('dashPattern'); + expect(Array.isArray(style.dashPattern)).toBe(true); + + // marker β€” a non-empty string + expect(style).toHaveProperty('marker'); + expect(typeof style.marker).toBe('string'); + expect(style.marker.length).toBeGreaterThan(0); + }, + ); + }); + + describe('distinct styles', () => { + it('all five relation kinds produce visually distinct styles', async () => { + const { resolveEdgeStyle } = await import('../canvas/edgeStyles.js'); + + const styles = RELATION_KINDS.map((kind) => resolveEdgeStyle(kind)); + // Serialise each style to compare uniqueness + const serialised = styles.map((s) => + JSON.stringify({ colour: s.colour, dashPattern: s.dashPattern, marker: s.marker }), + ); + const unique = new Set(serialised); + expect(unique.size).toBe(RELATION_KINDS.length); + }); + }); + + describe('legend consistency', () => { + it('legend contains an entry for every relation kind', async () => { + const { legend } = await import('../canvas/edgeStyles.js'); + + // legend may be an array of entries or an object keyed by relation kind + const legendEntries: Array<{ kind: string }> = Array.isArray(legend) + ? legend + : Object.entries(legend).map(([kind, value]) => ({ + kind, + ...(value as Record), + })); + + const legendKinds = legendEntries.map((e) => e.kind); + + for (const kind of RELATION_KINDS) { + expect(legendKinds).toContain(kind); + } + }); + + it('each legend entry style matches what resolveEdgeStyle returns', async () => { + const { legend, resolveEdgeStyle } = await import('../canvas/edgeStyles.js'); + + const legendEntries: Array<{ kind: string; colour: string; dashPattern: number[]; marker: string }> = + Array.isArray(legend) + ? legend + : Object.entries(legend).map(([kind, value]) => ({ + kind, + ...(value as { colour: string; dashPattern: number[]; marker: string }), + })); + + for (const entry of legendEntries) { + const resolved = resolveEdgeStyle(entry.kind); + expect(resolved.colour).toBe(entry.colour); + expect(resolved.dashPattern).toEqual(entry.dashPattern); + expect(resolved.marker).toBe(entry.marker); + } + }); + }); +}); diff --git a/src/server/layout-persistence.test.ts b/src/server/layout-persistence.test.ts new file mode 100644 index 00000000..17cdc89d --- /dev/null +++ b/src/server/layout-persistence.test.ts @@ -0,0 +1,163 @@ +import { describe, expect, it, beforeEach } from 'vitest'; +import type { PersistedLayoutState } from '../canvas/types.js'; + +/** + * Tests for src/canvas/layoutPersistence.ts + * + * The module must export at least: + * - saveLayout(specId: string, state: PersistedLayoutState): void | Promise + * - loadLayout(specId: string): PersistedLayoutState | null | Promise + * + * Layout persistence is view-state only: it stores node positions and the + * active layout choice (e.g. "list" vs "canvas") per specification. It must + * never alter intent-graph semantics, item content, or edges. + */ + +describe('layoutPersistence', () => { + // Dynamically import to get a clean module reference (and to fail clearly + // if the module doesn't exist yet). + async function getModule() { + return import('../canvas/layoutPersistence.js'); + } + + it('module is importable and exports saveLayout and loadLayout', async () => { + const mod = await getModule(); + expect(typeof mod.saveLayout).toBe('function'); + expect(typeof mod.loadLayout).toBe('function'); + }); + + it('loadLayout returns null or empty state for an unknown specId', async () => { + const { loadLayout } = await getModule(); + const result = await loadLayout('nonexistent-spec-id-' + Date.now()); + // Must return null (or a default empty state) when nothing has been saved + if (result === null || result === undefined) { + expect(result == null).toBe(true); + } else { + // If it returns a default state, it should have empty positions + expect(Object.keys(result.nodePositions)).toHaveLength(0); + } + }); + + it('saves and loads node positions for a specification', async () => { + const { saveLayout, loadLayout } = await getModule(); + const specId = 'test-spec-' + Date.now(); + + const state: PersistedLayoutState = { + nodePositions: { + 'node-a': { x: 100, y: 200 }, + 'node-b': { x: -50, y: 300 }, + }, + activeLayout: 'canvas', + }; + + await saveLayout(specId, state); + const loaded = await loadLayout(specId); + + expect(loaded).not.toBeNull(); + expect(loaded!.nodePositions['node-a']).toEqual({ x: 100, y: 200 }); + expect(loaded!.nodePositions['node-b']).toEqual({ x: -50, y: 300 }); + expect(loaded!.activeLayout).toBe('canvas'); + }); + + it('saves and loads the active layout choice', async () => { + const { saveLayout, loadLayout } = await getModule(); + const specId = 'test-layout-choice-' + Date.now(); + + await saveLayout(specId, { + nodePositions: {}, + activeLayout: 'list', + }); + + const loaded = await loadLayout(specId); + expect(loaded).not.toBeNull(); + expect(loaded!.activeLayout).toBe('list'); + }); + + it('overwrites previous state on re-save (simulates drag + reload)', async () => { + const { saveLayout, loadLayout } = await getModule(); + const specId = 'test-overwrite-' + Date.now(); + + // Initial save + await saveLayout(specId, { + nodePositions: { 'n1': { x: 0, y: 0 } }, + activeLayout: 'canvas', + }); + + // "Drag" node to new position and change layout + await saveLayout(specId, { + nodePositions: { 'n1': { x: 42, y: 99 } }, + activeLayout: 'list', + }); + + // "Reload" β€” should see updated positions + const loaded = await loadLayout(specId); + expect(loaded).not.toBeNull(); + expect(loaded!.nodePositions['n1']).toEqual({ x: 42, y: 99 }); + expect(loaded!.activeLayout).toBe('list'); + }); + + it('isolates state between different specIds', async () => { + const { saveLayout, loadLayout } = await getModule(); + const specA = 'spec-a-' + Date.now(); + const specB = 'spec-b-' + Date.now(); + + await saveLayout(specA, { + nodePositions: { 'n1': { x: 1, y: 2 } }, + activeLayout: 'canvas', + }); + + await saveLayout(specB, { + nodePositions: { 'n1': { x: 99, y: 88 } }, + activeLayout: 'list', + }); + + const loadedA = await loadLayout(specA); + const loadedB = await loadLayout(specB); + + expect(loadedA!.nodePositions['n1']).toEqual({ x: 1, y: 2 }); + expect(loadedA!.activeLayout).toBe('canvas'); + expect(loadedB!.nodePositions['n1']).toEqual({ x: 99, y: 88 }); + expect(loadedB!.activeLayout).toBe('list'); + }); + + it('persists positions as view-state without intent-graph fields', async () => { + const { saveLayout, loadLayout } = await getModule(); + const specId = 'test-view-state-' + Date.now(); + + const state: PersistedLayoutState = { + nodePositions: { 'node-x': { x: 5, y: 10 } }, + activeLayout: 'canvas', + }; + + await saveLayout(specId, state); + const loaded = await loadLayout(specId); + + // The loaded state must only contain view-state keys β€” + // no edges, no item content, no intent-graph semantics. + const keys = Object.keys(loaded!); + expect(keys).toContain('nodePositions'); + expect(keys).toContain('activeLayout'); + // Must not leak graph semantics + expect(keys).not.toContain('edges'); + expect(keys).not.toContain('items'); + expect(keys).not.toContain('intentGraph'); + }); + + it('handles saving with many nodes', async () => { + const { saveLayout, loadLayout } = await getModule(); + const specId = 'test-many-nodes-' + Date.now(); + + const positions: Record = {}; + for (let i = 0; i < 100; i++) { + positions[`node-${i}`] = { x: i * 10, y: i * 20 }; + } + + await saveLayout(specId, { nodePositions: positions, activeLayout: 'canvas' }); + const loaded = await loadLayout(specId); + + expect(loaded).not.toBeNull(); + expect(Object.keys(loaded!.nodePositions)).toHaveLength(100); + expect(loaded!.nodePositions['node-0']).toEqual({ x: 0, y: 0 }); + expect(loaded!.nodePositions['node-99']).toEqual({ x: 990, y: 1980 }); + }); +}); diff --git a/src/server/layout-switch.test.ts b/src/server/layout-switch.test.ts new file mode 100644 index 00000000..bc73b817 --- /dev/null +++ b/src/server/layout-switch.test.ts @@ -0,0 +1,300 @@ +import { describe, expect, it, beforeEach } from 'vitest'; +import type { PersistedLayoutState } from '../canvas/types.js'; + +/** + * Tests for src/canvas/LayoutSwitch.ts (or .tsx) + * + * The module must export at least: + * - A LayoutSwitch class/factory/function that: + * 1. Manages toggling between "list" and "canvas" layout modes + * 2. Preserves the current selection when toggling + * 3. Persists the active layout choice via layoutPersistence + * 4. Restores the active layout and node positions after reload + * + * The structured-list route is never removed or disabled β€” this is purely additive. + */ + +describe('LayoutSwitch', () => { + async function getModule() { + return import('../canvas/LayoutSwitch.js'); + } + + async function getPersistence() { + return import('../canvas/layoutPersistence.js'); + } + + it('module is importable and exports LayoutSwitch', async () => { + const mod = await getModule(); + // Must export LayoutSwitch as a class, function, or object with toggle/getActiveLayout + expect(mod.LayoutSwitch).toBeDefined(); + }); + + it('exposes a way to get the active layout', async () => { + const { LayoutSwitch } = await getModule(); + const ls = typeof LayoutSwitch === 'function' + ? (LayoutSwitch.prototype ? new LayoutSwitch('spec-1') : LayoutSwitch('spec-1')) + : LayoutSwitch; + + // Must have a method or property to retrieve the active layout + const activeLayout = typeof ls.getActiveLayout === 'function' + ? ls.getActiveLayout() + : ls.activeLayout; + + expect(typeof activeLayout).toBe('string'); + // Default should be one of the two valid layouts + expect(['list', 'canvas']).toContain(activeLayout); + }); + + it('defaults to "list" layout when no persisted state exists', async () => { + const { LayoutSwitch } = await getModule(); + const specId = 'no-persist-' + Date.now(); + const ls = typeof LayoutSwitch === 'function' + ? (LayoutSwitch.prototype ? new LayoutSwitch(specId) : LayoutSwitch(specId)) + : LayoutSwitch; + + const activeLayout = typeof ls.getActiveLayout === 'function' + ? ls.getActiveLayout() + : ls.activeLayout; + + expect(activeLayout).toBe('list'); + }); + + it('toggles between list and canvas layouts', async () => { + const { LayoutSwitch } = await getModule(); + const specId = 'toggle-test-' + Date.now(); + const ls = typeof LayoutSwitch === 'function' + ? (LayoutSwitch.prototype ? new LayoutSwitch(specId) : LayoutSwitch(specId)) + : LayoutSwitch; + + const getLayout = () => + typeof ls.getActiveLayout === 'function' ? ls.getActiveLayout() : ls.activeLayout; + + const initial = getLayout(); + expect(['list', 'canvas']).toContain(initial); + + // Toggle once + await ls.toggle(); + const after1 = getLayout(); + expect(after1).not.toBe(initial); + expect(['list', 'canvas']).toContain(after1); + + // Toggle again β€” should return to original + await ls.toggle(); + const after2 = getLayout(); + expect(after2).toBe(initial); + }); + + it('preserves selection when toggling from list to canvas', async () => { + const { LayoutSwitch } = await getModule(); + const specId = 'selection-list-to-canvas-' + Date.now(); + const ls = typeof LayoutSwitch === 'function' + ? (LayoutSwitch.prototype ? new LayoutSwitch(specId) : LayoutSwitch(specId)) + : LayoutSwitch; + + // Set a selection before toggling + const selectedIds = ['item-a', 'item-b']; + if (typeof ls.setSelection === 'function') { + ls.setSelection(selectedIds); + } else if (typeof ls.selection !== 'undefined') { + ls.selection = selectedIds; + } + + // Toggle layout + await ls.toggle(); + + // Selection must survive the toggle + const selectionAfter = typeof ls.getSelection === 'function' + ? ls.getSelection() + : ls.selection; + + expect(selectionAfter).toBeDefined(); + expect(selectionAfter).toEqual(expect.arrayContaining(['item-a', 'item-b'])); + expect(selectionAfter).toHaveLength(2); + }); + + it('preserves selection when toggling from canvas to list', async () => { + const { LayoutSwitch } = await getModule(); + const specId = 'selection-canvas-to-list-' + Date.now(); + const ls = typeof LayoutSwitch === 'function' + ? (LayoutSwitch.prototype ? new LayoutSwitch(specId) : LayoutSwitch(specId)) + : LayoutSwitch; + + // Toggle to canvas first + await ls.toggle(); + + // Set selection in canvas mode + const selectedIds = ['item-x']; + if (typeof ls.setSelection === 'function') { + ls.setSelection(selectedIds); + } else if (typeof ls.selection !== 'undefined') { + ls.selection = selectedIds; + } + + // Toggle back to list + await ls.toggle(); + + const selectionAfter = typeof ls.getSelection === 'function' + ? ls.getSelection() + : ls.selection; + + expect(selectionAfter).toBeDefined(); + expect(selectionAfter).toEqual(['item-x']); + }); + + it('preserves empty selection across toggle', async () => { + const { LayoutSwitch } = await getModule(); + const specId = 'empty-sel-' + Date.now(); + const ls = typeof LayoutSwitch === 'function' + ? (LayoutSwitch.prototype ? new LayoutSwitch(specId) : LayoutSwitch(specId)) + : LayoutSwitch; + + // Ensure no selection + if (typeof ls.setSelection === 'function') { + ls.setSelection([]); + } else if (typeof ls.selection !== 'undefined') { + ls.selection = []; + } + + await ls.toggle(); + + const selectionAfter = typeof ls.getSelection === 'function' + ? ls.getSelection() + : ls.selection; + + expect(Array.isArray(selectionAfter)).toBe(true); + expect(selectionAfter).toHaveLength(0); + }); + + it('persists the active layout choice via layoutPersistence', async () => { + const { LayoutSwitch } = await getModule(); + const { loadLayout } = await getPersistence(); + const specId = 'persist-choice-' + Date.now(); + const ls = typeof LayoutSwitch === 'function' + ? (LayoutSwitch.prototype ? new LayoutSwitch(specId) : LayoutSwitch(specId)) + : LayoutSwitch; + + // Toggle to switch layout + await ls.toggle(); + + const getLayout = () => + typeof ls.getActiveLayout === 'function' ? ls.getActiveLayout() : ls.activeLayout; + + const currentLayout = getLayout(); + + // The persisted state must reflect the current layout choice + const persisted = await loadLayout(specId); + expect(persisted).not.toBeNull(); + expect(persisted!.activeLayout).toBe(currentLayout); + }); + + it('restores active layout from persisted state on construction', async () => { + const { LayoutSwitch } = await getModule(); + const { saveLayout, loadLayout } = await getPersistence(); + const specId = 'restore-layout-' + Date.now(); + + // Pre-persist a "canvas" layout state + await saveLayout(specId, { + nodePositions: { 'node-1': { x: 50, y: 75 } }, + activeLayout: 'canvas', + }); + + // Create a new LayoutSwitch β€” it should restore from persistence + const ls = typeof LayoutSwitch === 'function' + ? (LayoutSwitch.prototype ? new LayoutSwitch(specId) : LayoutSwitch(specId)) + : LayoutSwitch; + + const activeLayout = typeof ls.getActiveLayout === 'function' + ? ls.getActiveLayout() + : ls.activeLayout; + + expect(activeLayout).toBe('canvas'); + }); + + it('restores node positions after drag and reload', async () => { + const { LayoutSwitch } = await getModule(); + const { saveLayout, loadLayout } = await getPersistence(); + const specId = 'drag-reload-' + Date.now(); + + // Simulate: user drags nodes and the state is saved + const draggedPositions = { + 'node-a': { x: 200, y: 300 }, + 'node-b': { x: 400, y: 100 }, + }; + await saveLayout(specId, { + nodePositions: draggedPositions, + activeLayout: 'canvas', + }); + + // "Reload" β€” create a new LayoutSwitch for the same spec + const ls = typeof LayoutSwitch === 'function' + ? (LayoutSwitch.prototype ? new LayoutSwitch(specId) : LayoutSwitch(specId)) + : LayoutSwitch; + + // Active layout should be restored + const activeLayout = typeof ls.getActiveLayout === 'function' + ? ls.getActiveLayout() + : ls.activeLayout; + expect(activeLayout).toBe('canvas'); + + // Node positions should be accessible/restored + const persisted = await loadLayout(specId); + expect(persisted).not.toBeNull(); + expect(persisted!.nodePositions['node-a']).toEqual({ x: 200, y: 300 }); + expect(persisted!.nodePositions['node-b']).toEqual({ x: 400, y: 100 }); + }); + + it('both "list" and "canvas" are valid layout values (list is never removed)', async () => { + const { LayoutSwitch } = await getModule(); + const specId = 'both-valid-' + Date.now(); + const ls = typeof LayoutSwitch === 'function' + ? (LayoutSwitch.prototype ? new LayoutSwitch(specId) : LayoutSwitch(specId)) + : LayoutSwitch; + + const getLayout = () => + typeof ls.getActiveLayout === 'function' ? ls.getActiveLayout() : ls.activeLayout; + + // Starts at list + expect(getLayout()).toBe('list'); + + // Toggle to canvas + await ls.toggle(); + expect(getLayout()).toBe('canvas'); + + // Toggle back to list β€” list must still be accessible + await ls.toggle(); + expect(getLayout()).toBe('list'); + + // Do a few more round trips to ensure stability + await ls.toggle(); + expect(getLayout()).toBe('canvas'); + await ls.toggle(); + expect(getLayout()).toBe('list'); + }); + + it('multiple toggles with selection do not corrupt or lose items', async () => { + const { LayoutSwitch } = await getModule(); + const specId = 'multi-toggle-' + Date.now(); + const ls = typeof LayoutSwitch === 'function' + ? (LayoutSwitch.prototype ? new LayoutSwitch(specId) : LayoutSwitch(specId)) + : LayoutSwitch; + + const selected = ['a', 'b', 'c']; + if (typeof ls.setSelection === 'function') { + ls.setSelection(selected); + } else if (typeof ls.selection !== 'undefined') { + ls.selection = selected; + } + + // Toggle several times + for (let i = 0; i < 5; i++) { + await ls.toggle(); + } + + const selectionAfter = typeof ls.getSelection === 'function' + ? ls.getSelection() + : ls.selection; + + expect(selectionAfter).toEqual(expect.arrayContaining(['a', 'b', 'c'])); + expect(selectionAfter).toHaveLength(3); + }); +}); diff --git a/src/server/node-refinement.test.ts b/src/server/node-refinement.test.ts new file mode 100644 index 00000000..da860583 --- /dev/null +++ b/src/server/node-refinement.test.ts @@ -0,0 +1,127 @@ +import { describe, expect, it } from 'vitest'; +import type { CanvasNode } from '../canvas/types.js'; +import type { KnowledgeKind } from '../shared/knowledge.js'; + +/** + * Tests for src/canvas/NodeRefinement.ts (.tsx) + * + * Selecting a canvas node must open the same graph-launched refinement + * affordance available from the structured-list route. + */ + +interface RefinementTriggerItem { + kind: KnowledgeKind; + id: number; + spanHint?: string; + reconciliationNeedId?: number; +} + +async function getNodeRefinement() { + const mod = await import('../canvas/NodeRefinement.js'); + return mod; +} + +describe('NodeRefinement', () => { + it('exports a handler for canvas node selection', async () => { + const mod = await getNodeRefinement(); + const handler = + mod.NodeRefinement ?? mod.handleNodeSelection ?? mod.createRefinementTrigger ?? mod.default; + expect(handler).toBeDefined(); + expect(typeof handler === 'function' || typeof handler === 'object').toBe(true); + }); + + it('selecting a canvas node produces a trigger item with the correct knowledge id and kind', async () => { + const mod = await getNodeRefinement(); + const handler = + mod.NodeRefinement ?? mod.handleNodeSelection ?? mod.createRefinementTrigger ?? mod.default; + + const node: CanvasNode = { + id: 'node-1', + position: { x: 100, y: 200 }, + intentItemRef: 'item-42', + }; + const lookup: Record = { + 'item-42': { id: 42, kind: 'requirement' }, + }; + + let trigger: RefinementTriggerItem; + if (typeof handler === 'function' && handler.prototype?.constructor === handler) { + trigger = new handler({ knowledgeItemLookup: lookup }).selectNode(node); + } else { + trigger = handler(node, lookup); + } + + expect(trigger).toBeDefined(); + expect(trigger.id).toBe(42); + expect(trigger.kind).toBe('requirement'); + }); + + it('works for different knowledge kinds (goal, criterion, decision)', async () => { + const mod = await getNodeRefinement(); + const handler = + mod.NodeRefinement ?? mod.handleNodeSelection ?? mod.createRefinementTrigger ?? mod.default; + + for (const kind of ['goal', 'criterion', 'decision'] as KnowledgeKind[]) { + const node: CanvasNode = { id: `n-${kind}`, position: { x: 0, y: 0 }, intentItemRef: `ref-${kind}` }; + const lookup: Record = { + [`ref-${kind}`]: { id: 7, kind }, + }; + + let trigger: RefinementTriggerItem; + if (typeof handler === 'function' && handler.prototype?.constructor === handler) { + trigger = new handler({ knowledgeItemLookup: lookup }).selectNode(node); + } else { + trigger = handler(node, lookup); + } + + expect(trigger.kind).toBe(kind); + expect(trigger.id).toBe(7); + } + }); + + it('throws or returns null for an unknown intentItemRef', async () => { + const mod = await getNodeRefinement(); + const handler = + mod.NodeRefinement ?? mod.handleNodeSelection ?? mod.createRefinementTrigger ?? mod.default; + + const node: CanvasNode = { id: 'orphan', position: { x: 0, y: 0 }, intentItemRef: 'missing' }; + const lookup: Record = {}; + + let result: RefinementTriggerItem | null | undefined; + let threw = false; + try { + if (typeof handler === 'function' && handler.prototype?.constructor === handler) { + result = new handler({ knowledgeItemLookup: lookup }).selectNode(node); + } else { + result = handler(node, lookup); + } + } catch { + threw = true; + } + + expect(threw || result == null).toBe(true); + }); + + it('trigger item has the shape expected by the refinement affordance (kind + id)', async () => { + const mod = await getNodeRefinement(); + const handler = + mod.NodeRefinement ?? mod.handleNodeSelection ?? mod.createRefinementTrigger ?? mod.default; + + const node: CanvasNode = { id: 'n-r6', position: { x: 50, y: 75 }, intentItemRef: 'item-R6' }; + const lookup: Record = { + 'item-R6': { id: 6, kind: 'requirement' }, + }; + + let trigger: RefinementTriggerItem; + if (typeof handler === 'function' && handler.prototype?.constructor === handler) { + trigger = new handler({ knowledgeItemLookup: lookup }).selectNode(node); + } else { + trigger = handler(node, lookup); + } + + expect(trigger).toHaveProperty('kind'); + expect(trigger).toHaveProperty('id'); + expect(typeof trigger.kind).toBe('string'); + expect(typeof trigger.id).toBe('number'); + }); +}); diff --git a/src/server/package-setup.test.ts b/src/server/package-setup.test.ts new file mode 100644 index 00000000..1948888d --- /dev/null +++ b/src/server/package-setup.test.ts @@ -0,0 +1,109 @@ +import { readFileSync } from 'node:fs'; +import { resolve } from 'node:path'; +import { describe, expect, it } from 'vitest'; + +/** + * These tests verify that package.json, tsconfig.json, and build/dev + * configuration are properly set up to support the spatial canvas feature + * (spatial-graph-layout). The spatial canvas needs a graph/node-layout + * library (e.g. @xyflow/react) and its types available at dev/build time. + */ + +const root = resolve(import.meta.dirname, '..', '..'); +const readJson = (relativePath: string) => + JSON.parse(readFileSync(resolve(root, relativePath), 'utf8')) as Record; + +describe('package.json spatial canvas dependencies', () => { + const pkg = readJson('package.json') as { + dependencies?: Record; + devDependencies?: Record; + }; + + const allDeps = { ...pkg.dependencies, ...pkg.devDependencies }; + + it('includes @xyflow/react as a dependency', () => { + expect(allDeps).toHaveProperty('@xyflow/react'); + }); + + it('includes @xyflow/system as a dependency', () => { + // @xyflow/system is the core layout engine; @xyflow/react re-exports it + // but the spatial canvas needs explicit types from it + expect(allDeps).toHaveProperty('@xyflow/system'); + }); + + it('declares dependencies with valid semver ranges', () => { + const xyReact = allDeps['@xyflow/react']; + expect(xyReact).toBeDefined(); + expect(typeof xyReact).toBe('string'); + // Must be a semver-compatible range, not empty + expect(xyReact!.length).toBeGreaterThan(0); + }); +}); + +describe('tsconfig.json supports spatial canvas source', () => { + const tsconfig = readJson('tsconfig.json') as { + compilerOptions?: { + jsx?: string; + paths?: Record; + strict?: boolean; + moduleResolution?: string; + }; + include?: string[]; + }; + + it('has jsx support enabled for React components', () => { + expect(tsconfig.compilerOptions?.jsx).toBeDefined(); + // Must be react-jsx or react-jsxdev to support TSX spatial canvas components + expect(tsconfig.compilerOptions!.jsx).toMatch(/react-jsx/); + }); + + it('includes src directory in compilation', () => { + expect(tsconfig.include).toBeDefined(); + expect(tsconfig.include).toEqual(expect.arrayContaining([expect.stringMatching(/src/)])); + }); + + it('has path alias @ mapped to src for spatial canvas imports', () => { + const paths = tsconfig.compilerOptions?.paths; + expect(paths).toBeDefined(); + // The existing @/* alias must cover spatial canvas modules under src/ + expect(paths!['@/*']).toBeDefined(); + expect(paths!['@/*']).toEqual(expect.arrayContaining([expect.stringMatching(/src/)])); + }); +}); + +describe('package.json has installable configuration', () => { + const pkg = readJson('package.json') as { + type?: string; + dependencies?: Record; + devDependencies?: Record; + }; + + it('is an ESM package (type: module) for xyflow compatibility', () => { + // @xyflow/react ships ESM; the project must be ESM + expect(pkg.type).toBe('module'); + }); + + it('has spatial canvas deps that can be resolved from node_modules', () => { + // Verify the dependency is actually installed (not just declared) + // by attempting to resolve the package entry + expect(() => { + // Use a dynamic import resolve check β€” if the package is not installed, + // this will throw + const resolved = resolve(root, 'node_modules', '@xyflow', 'react', 'package.json'); + const xyPkg = JSON.parse(readFileSync(resolved, 'utf8')) as { name: string }; + expect(xyPkg.name).toBe('@xyflow/react'); + }).not.toThrow(); + }); +}); + +describe('vite config includes spatial canvas source in test globs', () => { + // The vite.config.ts test.include must cover the spatial canvas test files + // which live under src/. We verify the config file content rather than + // importing vite config (which has side effects). + const viteConfigSrc = readFileSync(resolve(root, 'vite.config.ts'), 'utf8'); + + it('test include covers src/**/*.test.* patterns', () => { + // The glob must match .test.ts and .test.tsx files under src/ + expect(viteConfigSrc).toMatch(/src\/\*\*\/\*\.test\./); + }); +}); From 389e15e609e2494c635c435ced00c9108832c6a3 Mon Sep 17 00:00:00 2001 From: Kostandin Angjellari Date: Wed, 17 Jun 2026 09:36:12 +0100 Subject: [PATCH 31/32] FE-864: keep slice grid current during verification Co-authored-by: Cursor --- src/orchestrator/src/cook-cli.ts | 1 + src/orchestrator/src/engine-contract.test.ts | 19 +++++++++++++++++++ src/orchestrator/src/net-compiler.ts | 4 ++++ .../src/presenter/run-store.test.ts | 1 + src/orchestrator/src/presenter/run-store.ts | 4 ++-- src/orchestrator/src/types.ts | 2 ++ 6 files changed, 29 insertions(+), 2 deletions(-) diff --git a/src/orchestrator/src/cook-cli.ts b/src/orchestrator/src/cook-cli.ts index 2dc88c09..a84683b1 100644 --- a/src/orchestrator/src/cook-cli.ts +++ b/src/orchestrator/src/cook-cli.ts @@ -514,6 +514,7 @@ export async function runCook(opts: CookOptions, bus: CookBus): Promise { reports, testRunner, policy: { maxRetries: opts.maxRetries }, + emit: (event) => bus.emit(event), sandboxMode: sandbox.kind === 'codebase' ? 'codebase' : 'fixture', sliceLayout, runId, diff --git a/src/orchestrator/src/engine-contract.test.ts b/src/orchestrator/src/engine-contract.test.ts index 5dd81ad9..a097f505 100644 --- a/src/orchestrator/src/engine-contract.test.ts +++ b/src/orchestrator/src/engine-contract.test.ts @@ -17,6 +17,7 @@ import { createNetFolding } from './petrinaut-fold.js'; import type { SdcpnFile } from './petrinaut-sdcpn.js'; import { type BrunchExecutionExportFrame, createPetrinautStreamBus } from './petrinaut-stream-bus.js'; import { reduceBrunchExecutionExport } from './petrinaut-stream-export.js'; +import type { CookEvent } from './presenter/events.js'; import { InMemoryReportSink } from './report-sink.js'; import type { ActionContext, ActionHandlers, OrchestratorInput, Plan, RunCtx, TestRunner } from './types.js'; @@ -262,6 +263,24 @@ describe('Engine contract test #1 β€” single epic, single slice, happy path', () ]); }); + it('emits slice grid events around net-level test runs', async () => { + const fakes = createFakes(); + const events: CookEvent[] = []; + await create().run({ + plan: simplePlan, + sandboxDir: '/tmp/fake', + actions: fakes.actions, + reports: fakes.reports, + testRunner: fakes.testRunner, + policy: { maxRetries: 3 }, + emit: (event) => events.push(event), + }); + expect(events.filter((e) => e.kind === 'slice')).toEqual([ + { kind: 'slice', id: 'slice-1', epicId: 'epic-1', status: 'running', step: 'verify' }, + { kind: 'slice', id: 'slice-1', epicId: 'epic-1', status: 'passed' }, + ]); + }); + it('report sink contains expected lines', async () => { const fakes = createFakes(); await create().run({ diff --git a/src/orchestrator/src/net-compiler.ts b/src/orchestrator/src/net-compiler.ts index 075c0579..dac28cde 100644 --- a/src/orchestrator/src/net-compiler.ts +++ b/src/orchestrator/src/net-compiler.ts @@ -707,6 +707,7 @@ export function wireHandlers(blueprint: NetBlueprint, input: OrchestratorInput, const deferred = (async () => { const slice = plan.slices.find((s) => s.id === sliceId)!; const sandboxDir = resolveSliceCwd(slice); + input.emit?.({ kind: 'slice', id: sliceId, epicId, status: 'running', step: 'verify' }); // Shared verification seam: same verdict rule + infra-dominates // aggregate as evaluate-done / verify-epic (FE-872 unification). const { @@ -730,6 +731,7 @@ export function wireHandlers(blueprint: NetBlueprint, input: OrchestratorInput, const tok: Token = { ...inputToken, reportId }; if (passed) { + input.emit?.({ kind: 'slice', id: sliceId, epicId, status: 'passed' }); return [ { place: intermediatePlace, token: tok }, { place: budgetPlace, token: { ...baseToken, retryCount: 0 } }, @@ -741,6 +743,7 @@ export function wireHandlers(blueprint: NetBlueprint, input: OrchestratorInput, // infra failure, name that cause β€” "retry exhaustion" would // misdirect the reader to the code. ctx.sliceOutcomes.set(sliceId, { sliceId, status: 'halted' }); + input.emit?.({ kind: 'slice', id: sliceId, epicId, status: 'failed' }); const haltReason = failureKind === 'infra' ? `Slice ${sliceId} toolchain/install failure during verification` @@ -752,6 +755,7 @@ export function wireHandlers(blueprint: NetBlueprint, input: OrchestratorInput, }, ]; } + input.emit?.({ kind: 'slice', id: sliceId, epicId, status: 'failed' }); return [ { place: intermediatePlace, token: tok }, { place: budgetPlace, token: { ...baseToken, retryCount: retryCount + 1 } }, diff --git a/src/orchestrator/src/presenter/run-store.test.ts b/src/orchestrator/src/presenter/run-store.test.ts index 4ca40f7e..8f093fe3 100644 --- a/src/orchestrator/src/presenter/run-store.test.ts +++ b/src/orchestrator/src/presenter/run-store.test.ts @@ -106,6 +106,7 @@ describe('RunStore β€” slice grid', () => { store.push({ kind: 'slice', id: 'login', epicId: 'api', status: 'passed' }); row = store.getSnapshot().slices.find((s) => s.id === 'login')!; expect(row.status).toBe('passed'); + expect(row.step).toBeUndefined(); // in-flight label cleared once it stops running expect(row.detail).toBeUndefined(); // heartbeat cleared once it stops running }); diff --git a/src/orchestrator/src/presenter/run-store.ts b/src/orchestrator/src/presenter/run-store.ts index 0a9203ef..24a23b3a 100644 --- a/src/orchestrator/src/presenter/run-store.ts +++ b/src/orchestrator/src/presenter/run-store.ts @@ -77,8 +77,8 @@ export class RunStore { slices: this.updateSlice(event.id, { status: event.status, ...(event.step !== undefined ? { step: event.step } : {}), - // clear the live heartbeat once the slice stops running - ...(running ? {} : { detail: undefined }), + // clear the in-flight label + heartbeat once the slice stops running + ...(running ? {} : { step: undefined, detail: undefined }), }), }); return; diff --git a/src/orchestrator/src/types.ts b/src/orchestrator/src/types.ts index e74eb851..3f8ede4a 100644 --- a/src/orchestrator/src/types.ts +++ b/src/orchestrator/src/types.ts @@ -246,6 +246,8 @@ export type OrchestratorInput = { reports: ReportSink; testRunner: TestRunner; policy: RunPolicy; + /** Ephemeral presentation events for live CLI surfaces (non-durable). */ + emit?: (event: import('./presenter/events.js').CookEvent) => void; /** * 'fixture' (default): per-slice worktrees are created empty. Greenfield. * 'codebase': per-slice worktrees are real `git worktree`s on slice-level From 363cba6ba417d1f86f84f418be4e93840ca214a7 Mon Sep 17 00:00:00 2001 From: Kostandin Angjellari Date: Wed, 17 Jun 2026 09:50:44 +0100 Subject: [PATCH 32/32] FE-864: clean up restack conflict artifacts Co-authored-by: Cursor --- memory/PLAN.md | 34 ---------------------------- src/orchestrator/src/plan-emitter.ts | 9 -------- 2 files changed, 43 deletions(-) diff --git a/memory/PLAN.md b/memory/PLAN.md index 48c627f3..92b02668 100644 --- a/memory/PLAN.md +++ b/memory/PLAN.md @@ -19,11 +19,7 @@ The next product arc is the **Conversational Workspace Runtime** umbrella (`docs The **orchestrator / Petri-net execution substrate** is committed (2026-05-21) to Petri as the forward execution model, justified by parallelism, simulation, and resume value claims. Phases 0–2 are done: the dual-engine PoC (Phase 0, FE-730) validated the substrate and extracted the compiler/interpreter; Phase 1 (FE-738) added two-lane mechanical+semantic subnets, the compiler topology/wiring split, and Β§7 event vocabulary; Phase 2 (FE-743) added parallel firing policy with greedy token claiming, shared resource pool tokens bounding global concurrency, and worktree-per-slice isolation β€” the decision gate passed (parallel measurably beats serial on wall clock). Phase-3-prep `petri-declarative-routing` (FE-747) is done: typed Guard predicates on `HandlerDescriptor` plus `enumerateCandidateOutputs` make topology-only enumeration of reachable output places possible (I125-K). Phase 3 (graph compilation) remains blocked on `intent-graph-semantics` (FE-700) for relation-policy gates; Phase 4 (simulation oracle) now has its routing-side structural prerequisite satisfied but still needs Phase 3 for graph-derived gates. The north-star design is `docs/next/architecture/plan-graph-petri-orchestration.md`. -<<<<<<< HEAD The orchestrator's forward direction is framed as two arcs toward a **full (autonomous) cook orchestrator** β€” "completed spec β†’ feature built and glued into a real brownfield repo, no manual steps." **Arc 1 (feature delivery)** stacks on FE-843 and ships standalone without the semantic stack. `agent-extension-host` (the dual-mode pi-harness contract) **bases the Arc-1 linear stack** (2026-06-15 decision) β€” every Arc-1 frontier sits on it β€” followed by `brunch-detect` (resolve a registry profile id from repo manifest/lockfile evidence at plan time) β†’ `harness-dep-install` (capture the dependency-delta for promotion + classify install/infra failures distinctly from test failures; the install action itself is agent-native) β†’ `app-runtime-probe` (build + boot + exercise the host app β€” the concrete reachability mechanism) β†’ `integration-oracle` (wire into host + product reachability, via the probe) β†’ `brownfield-promotion` (glue back into the checkout) β†’ `brunch-ship` (one-shot wrapper). A `dogfood-spike` (ln-spike) β€” run the full chain on one real brunch feature β€” should precede committing `integration-oracle`, to surface the reachability mechanism, dep-install, orientation depth, and brownfield plan-shape risks cheaply. CLI surface: the real commands are `brunch plan`, `brunch cook`, and `brunch serve` (the one-shot capstone, FE-878). The kitchen-brigade names (prep/recipe/taste/plate) are **phase labels, not commands** β€” detect runs inside `plan`; probe + oracle (verify) and promotion (plate) run inside `cook`/`serve`. Frontier ids stay descriptive; `serve` chains the phases end-to-end. The settled grounding decision is **cook-time** (planning stays host-blind per D160-K; the cook agent resolves real paths/wiring by reading the worktree), which softens FE-829's `writes` ownership to *advisory in brownfield only* β€” greenfield keeps it authoritative. Protecting invariant: **brownfield generalization must not change greenfield-mode behavior; shared contracts fork on `plan.mode`** (the 3 reference fixtures + a greenfield smoke must score identically before/after each frontier). **Arc 2 (full orchestrator)** is an autonomy ladder gated behind the parked semantic/Petri-Phase-3/4 substrate: `interactive-recovery` (halt β†’ coherent question answered in a secondary chat, resumes the run) β†’ `intent-conformance-oracle` (independent behavioral-kernel verification, requisite variety) β†’ `adaptive-replan` (architect amends the plan from execution feedback, recompile + resume). Each rung raises the autonomy ceiling and is independently shippable. Non-additive work (refactors/migrations/debugging) is explicitly a separate `transformation-orchestrator` product line, not folded into either arc. The cook-time grounding decision, the D160-K `writes`-advisory amendment, and the greenfield-protecting invariant need recording in SPEC via ln-sync when the first Arc-1 frontier is scoped. **Agent-host coordination:** the pi harness is a dual-mode (`elicit`/`execute`) agent-extension host (`agent-extension-host`) β€” cook capabilities are `execute`-mode plugins on a shared, mode-neutral core; this contract is the serialization point with the unpublished pi-harness thread (which owns the core), validated against the existing interview as the `elicit` witness. It logically gates only the dispatch-seam frontiers (`integration-oracle`, Arc-2 `interactive-recovery`/`adaptive-replan`), but is sequenced at the **base of the Arc-1 linear stack** (2026-06-15 decision) β€” so the whole arc lands on it, deliberately serializing the cook stack behind the pi-harness-thread coordination rather than running the seam-independent infra (`brunch-detect`, `harness-dep-install`, `app-runtime-probe`, `brownfield-promotion`) in parallel ahead of it. -======= -The orchestrator's forward direction is framed as two arcs toward a **full (autonomous) cook orchestrator** β€” "completed spec β†’ feature built and glued into a real brownfield repo, no manual steps." **Arc 1 (feature delivery)** stacks on FE-843 and ships standalone without the semantic stack. `agent-extension-host` (the dual-mode pi-harness contract) **bases the Arc-1 linear stack** (2026-06-15 decision) β€” every Arc-1 frontier sits on it β€” followed by `brunch-detect` (read toolchain from the repo) β†’ `harness-dep-install` (add/install new deps in the worktree) β†’ `app-runtime-probe` (build + boot + exercise the host app β€” the concrete reachability mechanism) β†’ `integration-oracle` (wire into host + product reachability, via the probe) β†’ `brownfield-promotion` (glue back into the checkout) β†’ `brunch-ship` (one-shot wrapper). A `dogfood-spike` (ln-spike) β€” run the full chain on one real brunch feature β€” should precede committing `integration-oracle`, to surface the reachability mechanism, dep-install, orientation depth, and brownfield plan-shape risks cheaply. CLI command surface (kitchen brigade; frontier ids stay descriptive): detect β†’ `brunch prep`, plan β†’ `brunch recipe`, orchestrate β†’ `brunch cook`, verify β†’ `brunch taste`, promote β†’ `brunch plate`, ship β†’ `brunch serve`. The settled grounding decision is **cook-time** (planning stays host-blind per D160-K; the cook agent resolves real paths/wiring by reading the worktree), which softens FE-829's `writes` ownership to *advisory in brownfield only* β€” greenfield keeps it authoritative. Protecting invariant: **brownfield generalization must not change greenfield-mode behavior; shared contracts fork on `plan.mode`** (the 3 reference fixtures + a greenfield smoke must score identically before/after each frontier). **Arc 2 (full orchestrator)** is an autonomy ladder gated behind the parked semantic/Petri-Phase-3/4 substrate: `interactive-recovery` (halt β†’ coherent question answered in a secondary chat, resumes the run) β†’ `intent-conformance-oracle` (independent behavioral-kernel verification, requisite variety) β†’ `adaptive-replan` (architect amends the plan from execution feedback, recompile + resume). Each rung raises the autonomy ceiling and is independently shippable. Non-additive work (refactors/migrations/debugging) is explicitly a separate `transformation-orchestrator` product line, not folded into either arc. The cook-time grounding decision, the D160-K `writes`-advisory amendment, and the greenfield-protecting invariant need recording in SPEC via ln-sync when the first Arc-1 frontier is scoped. **Agent-host coordination:** the pi harness is a dual-mode (`elicit`/`execute`) agent-extension host (`agent-extension-host`) β€” cook capabilities are `execute`-mode plugins on a shared, mode-neutral core; this contract is the serialization point with the unpublished pi-harness thread (which owns the core), validated against the existing interview as the `elicit` witness. It logically gates only the dispatch-seam frontiers (`integration-oracle`, Arc-2 `interactive-recovery`/`adaptive-replan`), but is sequenced at the **base of the Arc-1 linear stack** (2026-06-15 decision) β€” so the whole arc lands on it, deliberately serializing the cook stack behind the pi-harness-thread coordination rather than running the seam-independent infra (`brunch-detect`, `harness-dep-install`, `app-runtime-probe`, `brownfield-promotion`) in parallel ahead of it. ->>>>>>> d928c1d1 (FE-864: Base the Arc-1 linear stack on agent-extension-host) The May 2026 intent-spec, multi-chat, changeset-ledger, prompt/context, and agent-mutation design notes are reconciled into one direction. `docs/design/MULTI_CHAT.md` is the substrate document. `docs/design/SIDE_CHAT.md` describes side-chat V1 / V2 / V3.0 / V3.1 / V4 phasing on top of that substrate. `docs/design/PATCH_LEDGER.md` remains historical deeper design pressure for semantic mutation history, but canonical future-facing vocabulary is `changeset` / `change`. The product-layer ontology trajectory is split out as `docs/design/INTENT_GRAPH_SEMANTICS.md` and `docs/design/BEHAVIORAL_KERNELS.md`; broader synthesis lives in `docs/archive/design/INTENT_SPEC_EVOLUTION.md`. FE-705's branch-local strategy/proposal notes add scenario options, graph-review oracle, chat-local strategies, and concern/dependency mapping; those notes should become a canonical design doc when the branch is integrated. Coordination uses a substrate-strangler posture: keep existing frontend REST/SSE contracts stable while route adapters and capability adapters converge on shared server-owned handlers, then cut over UI flows only after parity and changeset-backed authority exist. The dev-layer self-tooling trajectory lives in `docs/design/ln-skills/EVOLUTION.md`. @@ -57,7 +53,6 @@ The May 2026 intent-spec, multi-chat, changeset-ledger, prompt/context, and agen **Full cook orchestrator β€” Arc 1 (feature delivery; stacks on FE-843, ships without the semantic stack):** -<<<<<<< HEAD 1. `agent-extension-host` β€” **(contract landed β€” FE-867)** the pi harness as a dual-mode (`elicit`/`execute`) extension host; cook capabilities register as `execute`-mode plugins. **Bases the Arc-1 linear stack** (2026-06-15 decision): the whole arc stacks on it, coordinated with the unpublished pi-harness thread (which owns the core). Logically it only gates the dispatch-seam frontier (`integration-oracle`), so serializing the seam-independent infra (2–5) behind it is a deliberate coupling of Arc 1 to that coordination, not a hard dependency. Sits over the FE-841 core. 2. `brunch-detect` β€” **(done β€” FE-871)** resolve a registry profile id from manifest/lockfile evidence at plan time; brownfield-only front of the chain, now wired into the emitter (slice 2). *(seam-independent)* 3. `harness-dep-install` β€” **(acceptance 1–2 landed except brownfield β€” FE-872)** dependency-delta capture + install-failure classification (the install *action* is agent-native via `bash` + FE-843 conventions; this owns lockfile capture for promotion + the fail/infra split). Done: classify + infra-aware halt reason + greenfield manifest/lockfile capture pinned. Remaining: brownfield dep-delta capture β€” **blocked on `brownfield-promotion`** (#7). @@ -66,16 +61,6 @@ The May 2026 intent-spec, multi-chat, changeset-ledger, prompt/context, and agen 6. `integration-oracle` β€” **(Half A + Half B seam landed β€” FE-876)** oracle asserts product reachability via `app-runtime-probe`. Half A (off-seam): `Epic.probe?: ProbeTarget` folds a `runProbe` result into the `verify-epic` verdict β€” after slices merge into `__epic__//`, the epic is `done` only when tests pass **and** the feature is reachable; `not-reachable` is the FE-800 orphan, `infra` is a harness fault. Probe gated behind tests passing (never boot a known-broken build); absent β†’ unchanged unit verdict; reachability rides the existing `report.passed` routing. Half B seam: host-blind `Epic.reachability?: ReachabilityIntent` (architect-emittable, D160-K) + an injectable `ProbeGrounder` (`createPiActions({ groundProbe })`) that cook-time-resolves intent β†’ concrete `ProbeTarget` by reading the worktree; `verify-epic` resolves via `probe ?? ground(reachability)`, a grounder that throws is an `infra` fault (visible, not a silent pass), intent without a grounder is an inert no-op. **Remaining (dispatch seam, lands atomically with the pi-harness contract):** the production `ProbeGrounder` (an `execute`-mode agent that reads the worktree) + architect emission of `reachability` intent β€” deferred together so intent is enforced the moment it's emitted (avoids perturbing the 3 reference fixtures). Runs in the FE-738 semantic lane. Promotes FE-800's integration-blind follow-on to a frontier. *(grounder impl depends on `agent-extension-host`)* 7. `brownfield-promotion` β€” **(landed β€” FE-877, `promoteBrownfieldRun`)** commit a completed brownfield cook result onto the repo's own `cook/` branch as one reviewable commit; extends FE-827's greenfield promotion to brownfield and closes the cook-codebase-mode follow-on (the result no longer sits uncommitted in the worktree). Git plumbing only (`commit-tree` + CAS `update-ref`, parent = the existing `cook/` base, throwaway index + external work-tree), so the user's active branch, working tree, and index are never touched; gitignored deps don't land. Reuses `promotionSourceDir` to compose the tree across slice layouts. Auto-runs on a completed brownfield cook (no `--out` needed); merging into the working branch stays the **user's** call. Unblocks FE-872's brownfield dep-delta capture. 8. `brunch-ship` β€” **(landed β€” FE-878, `brunch serve`)** one-shot `brunch serve ` = `plan ` then `cook --spec=` (cook reads the plan just emitted), no manual steps. Pure glue, no new orchestration: serve's `--out` is the *promote* target β†’ cook (brownfield auto-promotes via FE-877 regardless), `--profile` stamps the plan, petrinaut/policy/retry flags forward to cook, `--verbose` to both; a failed plan short-circuits (nothing cooked). Testable units `parseServeArgs` + `runServe` (stages injected); db/snapshot wiring stays in `cli.ts`. Cook's `dir` is threaded from the resolved launch cwd (the dir the plan was written to) β€” `runCook` reads `opts.dir` raw, so serve must supply it rather than rely on the `parseCookArgs`-only default (R46). **Closes Arc 1.** -======= -1. `agent-extension-host` β€” the pi harness as a dual-mode (`elicit`/`execute`) extension host; cook capabilities register as `execute`-mode plugins. **Bases the Arc-1 linear stack** (2026-06-15 decision): the whole arc stacks on it, coordinated with the unpublished pi-harness thread (which owns the core). Logically it only gates the dispatch-seam frontier (`integration-oracle`), so serializing the seam-independent infra (2–5) behind it is a deliberate coupling of Arc 1 to that coordination, not a hard dependency. Sits over the FE-841 core. -2. `brunch-detect` β€” read the project toolchain from the repo; brownfield-only front of the FE-843 resolution chain. First feature branch, stacked on `agent-extension-host`. *(seam-independent)* -3. `harness-dep-install` β€” let the cook agent add and install new dependencies in the worktree (the install verb deferred from FE-843); required for real features and non-TS stacks. -4. `dogfood-spike` (ln-spike) β€” run the full chain on one real brunch feature before committing `integration-oracle`; surfaces the reachability mechanism, dep-install, orientation depth, and brownfield plan-shape cheaply. -5. `app-runtime-probe` β€” build + boot + exercise the host app; the concrete reachability mechanism `integration-oracle` depends on (without it, "reachable" collapses back to "a test that imports the module"). -6. `integration-oracle` β€” architect emits generic wiring intent, cook agent resolves real wiring (cook-time grounding), oracle asserts product reachability via `app-runtime-probe` in the FE-738 semantic lane. Promotes FE-800's integration-blind follow-on to a frontier. *(dispatch-seam β€” depends on `agent-extension-host`; wiring agent = `execute`-mode plugin)* -7. `brownfield-promotion` β€” commit/merge the cook result into the user's checkout; extends FE-827's greenfield promotion to brownfield. -8. `brunch-ship` β€” one-shot `brunch serve ` wrapper (prep β†’ recipe β†’ cook β†’ taste β†’ plate), no manual steps. Arc 1 capstone. ->>>>>>> d928c1d1 (FE-864: Base the Arc-1 linear stack on agent-extension-host) **Runtime umbrella + semantic substrate:** @@ -392,15 +377,7 @@ The May 2026 intent-spec, multi-chat, changeset-ledger, prompt/context, and agen - **Name:** Toolchain profile expansion β€” more TS runtimes + live/strict profile selection - **Linear:** FE-843 Β· branch `ka/fe-843-toolchain-profiles` (stacked on FE-841) - **Kind:** structural (selection decision + I130-K refinement); cards 1/3 bounded -<<<<<<< HEAD -<<<<<<< HEAD - **Status:** branch-complete (2026-06-10) β€” all 3 cards landed (1: data-driven registry + 4 TS profiles; 2: selection live + strict; 3: architect classifies profile from spec prose); card queue retired. SPEC promotion (I130-K refinement + agent-install assumption) rides with ln-sync at PR tie-off. Outer-loop validation outstanding: greenfield cook smoke `--profile=node-vitest` (conventions-prose oracle + A98 install assumption). -======= -- **Status:** active β€” 3-card queue in `memory/CARDS.md` (1: data-driven registry + `node-vitest`/`node-test`/`node-jest`/`deno`; 2: selection live + strict; 3: architect classifies profile from spec prose) ->>>>>>> 546b1349 (FE-843: Frontier setup β€” toolchain-profile-expansion plan entry + 3-card queue) -======= -- **Status:** branch-complete (2026-06-10) β€” all 3 cards landed (1: data-driven registry + 4 TS profiles; 2: selection live + strict; 3: architect classifies profile from spec prose); card queue retired. SPEC promotion (I130-K refinement + agent-install assumption) rides with ln-sync at PR tie-off. Outer-loop validation outstanding: greenfield cook smoke `--profile=node-vitest` (conventions-prose oracle + A98 install assumption). ->>>>>>> b84fbda3 (FE-843: Architect classifies the toolchain profile from spec prose) - **Objective:** Expand the FE-829 `Toolchain` contract (`project-profile.ts`) so cook builds on more technologies: profiles as data literals compiled into the existing `Toolchain` interface (consumers untouched); profile resolved once at plan time (`--profile` flag ≫ `snapshot.profile` ≫ architect-classified ≫ `bun`), persisted into `plan.yaml`; cook errors on unknown ids (lenient on absent, mirroring the `checkPlan` base/emitted split). The architect's existing schema-constrained call gains an optional profile enum β€” classification reads projected spec prose only (D160-K intact, no host introspection). - **Why now / unlocks:** Today both implemented profiles are TypeScript and the selection path is dead (`snapshot.profile` never populated) β€” every cook run silently resolves to bun, and a typo'd profile id silently defaults rather than erroring. Without live selection, adding profiles changes nothing at runtime; without more profiles, "brunch builds on different technologies" is false in practice. - **Design (ln-design 2026-06-10):** flat data registry (A) over composed axes (B β€” deferred until real combo demand; retreat is cheap since the consumer surface never changes); no harness install verb β€” the cook agent scaffolds + installs per A98 (pressure trigger: first profile where the runner fails for missing install). Deferred follow-ons: fail/infra test-outcome split in `ToolchainTestRunner`, brownfield drift-check at cook open (path-neutral by type), `brunch detect` CLI convenience. Durable end-state for selection: the spec interview captures stack as a structured field (the D164-K `mode` pattern); the chain already reserves that rung. @@ -409,7 +386,6 @@ The May 2026 intent-spec, multi-chat, changeset-ledger, prompt/context, and agen - **Traceability:** Requirements 46–50; A98, D160-K, D164-K (pattern), D167-K; refines I130-K (resolved profile persisted; strict-on-unknown). New assumption on build: agent-side install suffices for node profiles. Refinement on `plan-build-architect` (FE-829). - **Design docs:** `docs/design/orchestrator.md`; SPEC Β§Future Direction Cook plan generation. -<<<<<<< HEAD ### agent-extension-host - **Name:** Agent extension host β€” dual-mode (`elicit`/`execute`) pi-harness contract @@ -430,19 +406,11 @@ The May 2026 intent-spec, multi-chat, changeset-ledger, prompt/context, and agen - **Name:** Brunch toolchain detection β€” read the project toolchain from the repo - **Linear:** FE-871 Β· branch `ka/fe-871-brunch-detect` (stacked on FE-867) - **Kind:** bounded feature -<<<<<<< HEAD - **Status:** done (FE-871). Slice 1 β€” `detectProfile(repoDir)` / `project-detect.ts`: a pure, evidence-first detector mapping manifests/lockfiles to a registry `ProfileId` (bun lockfile β†’ bun; deno config β†’ deno; `package.json` vitest/jest/none β†’ node-vitest/node-jest/node-test). One clear supported signal resolves; ambiguous evidence (both vitest **and** jest declared) and any repo with no JS/TS evidence return a loud `{detected:false, reason}` via one catch-all rather than silently defaulting to bun β€” the cheap "which lockfile is present" check, not a language-detection engine (no per-stack Python/Go branches; the catch-all message is already actionable). Slice 2 β€” `detected` is wired into the `plan-emitter` selection chain as the brownfield front (`flag ≫ detected (brownfield) ≫ spec ≫ architect-classified ≫ bun`) via `resolveEmittedProfile`; a loud detection failure throws rather than silently falling to bun (falling through to an explicit spec/architect choice first). Greenfield (or brownfield without a `repoDir`) keeps the unchanged FE-843 chain β€” the greenfield no-op. `repoDir` threads CLI launch cwd β†’ `runPlan` β†’ `emitPlanFromSnapshot`; an injectable `detect` seam keeps the emitter tests hermetic. Slice 3 β€” `detectTestDir(repoDir)` co-locates generated tests where the brownfield repo already keeps its own: detection picks the *runner* (profile), this picks the *path*. A profile's default test directory (`tests/{id}.test.ts`) can fall outside a host repo whose vitest `include` is narrowed (e.g. `src/**`), so the chosen path is unrunnable β€” vitest reports "No test files found" for an explicitly-named file (observed in a real brownfield cook). Rather than parse the runner's executable-TS config, it samples existing `*.test.*`/`*.spec.*` files (zero-dep bounded `fs` walk, skipping `node_modules`/build dirs) and returns the dominant directory; `withTestDir(toolchain, dir)` relocates the targets while preserving the filename convention. Brownfield-only; `null` (no existing tests) keeps the profile default; greenfield never relocates. Slice 4 β€” monorepo hardening: `detectTestDir` returns the dominant *full* directory (not just the top segment) so a package-rooted include glob still covers the path; `detectProfile` widens runner detection to declared workspace packages (npm/yarn `workspaces`, pnpm `pnpm-workspace.yaml`; literal + single-level `dir/*` globs) **only when the root declares no runner**, scoped to declared workspaces so a stray nested project (docs prototype, example app) can't poison detection β€” a root runner still wins without scanning, and workspaces collectively declaring both vitest+jest stays loudly ambiguous. Stacked on `agent-extension-host`. - **Objective:** Resolve a registry `ProfileId` at **plan time** from the repo's manifest/lockfile evidence β€” the cheap "which lockfile/manifest is present" check, mapping only to ids already in the FE-843 registry. It is **not** a language-detection engine: anything without a single clear supported signal (ambiguous JS runners, or non-JS stacks like Python/Go) returns a loud `{detected:false}` reason via one actionable catch-all, never a guessed profile. Brownfield-only front of the selection chain (`flag ≫ detected ≫ spec ≫ architect ≫ bun`); the resolved id is stamped into `plan.yaml` so `brunch cook` runs the same toolchain. Greenfield never detects (empty worktree). Resolves toolchain **identity** only β€” real file paths / existing wiring / `writes` reconciliation is cook-time agent grounding, out of scope here. - **Why now / unlocks:** The "no manual steps" goal requires reading the real toolchain rather than inferring from spec prose or a `--profile` flag β€” and it must happen at plan time, because the deterministic test runner reads the stamped `plan.profile` with **no agent in the loop** (`cook-cli.ts`, `pi-actions.ts`), so a wrong default runs the wrong test command with no diagnostic. The cook agent's `read`/`bash` cannot substitute. FE-843 built the registry but deferred detection; this closes that gap. - **Acceptance:** (1) detection maps a real repo to a registry profile id from manifest/lockfile evidence *(slice 1, done)*; (2) brownfield cook/plan resolves toolchain via detection at the front of the FE-843 chain (`--profile` still overrides) *(slice 2)*; (3) greenfield resolution is unchanged (no detection input); (4) ambiguous/unknown repo fails with an actionable message, not a silent default *(slice 1, done)*; (5) the 3 reference fixtures + greenfield smoke score identically before/after. - **Verification:** detector unit tests *(slice 1, done β€” per-stack fixtures + loud `{detected:false}`)*; slice 2: resolution-chain precedence tests (detect vs flag vs spec) + greenfield no-op / before-after-identical test; slice 3: `detectTestDir` clustering/skip/null tests + `withTestDir` relocation tests + emitter tests asserting brownfield targets follow the detected dir while greenfield keeps the profile default; slice 4: full-dir/monorepo `detectTestDir` tests + workspace runner-detection tests (npm/yarn/pnpm, root-wins, literal dir, cross-workspace ambiguity). -======= -- **Status:** not-started (drafted 2026-06-15) β€” Arc 1, first feature branch; stacked on `agent-extension-host` (which bases the Arc-1 stack on FE-843). -- **Objective:** Detect the project's toolchain by introspecting the actual repo (manifests/lockfiles: `package.json` + lockfile, `pyproject.toml`, `go.mod`, …) and resolve it to a `ProjectProfile`/`Toolchain` id from the FE-843 registry. Brownfield-only front of the existing resolution chain; greenfield keeps FE-843's spec ≫ architect-classified ≫ bun default (an empty worktree has nothing to detect). -- **Why now / unlocks:** The "no manual steps" goal requires reading the real toolchain rather than inferring from spec prose or a `--profile` flag. FE-843 built the data-driven profile registry but **defers `brunch detect`**; this closes that gap. -- **Acceptance:** (1) detection maps a real repo to a registry profile id from manifest/lockfile evidence; (2) brownfield cook/plan resolves toolchain via detection at the front of the FE-843 chain (`--profile` still overrides); (3) greenfield resolution is unchanged (no detection input); (4) ambiguous/unknown repo fails with an actionable message, not a silent default; (5) the 3 reference fixtures + greenfield smoke score identically before/after. -- **Verification:** detector unit tests (fixtures per stack: bun, node-vitest, deno, pytest/go stubs); resolution-chain precedence tests (detect vs flag vs spec); greenfield no-op test. ->>>>>>> d928c1d1 (FE-864: Base the Arc-1 linear stack on agent-extension-host) - **Depends on:** `toolchain-profile-expansion` (FE-843). - **Traceability:** Requirements 46–50; refines I130-K; greenfield-protecting invariant (new β€” record in SPEC via ln-sync). **D160-K boundary:** detection is plan-time profile-*id* resolution (an input to authoring), not architect host-introspection β€” D160-K constrains the architect/authoring stage, not profile resolution, so `brunch-detect` needs no D160-K amendment. - **Design docs:** `docs/design/orchestrator.md`. @@ -563,8 +531,6 @@ The May 2026 intent-spec, multi-chat, changeset-ledger, prompt/context, and agen - **Traceability:** Requirements 46–50; FE-738 acceptance criterion 5 (deferred); spec Β§graph-revision. - **Design docs:** `docs/next/architecture/plan-graph-petri-orchestration.md`; `docs/design/orchestrator.md`. -======= ->>>>>>> 546b1349 (FE-843: Frontier setup β€” toolchain-profile-expansion plan entry + 3-card queue) ### petrinaut-colour-fold - **Name:** Petrinaut export β€” colour-fold per-slice subnet diff --git a/src/orchestrator/src/plan-emitter.ts b/src/orchestrator/src/plan-emitter.ts index ff7b64e5..c0cb482a 100644 --- a/src/orchestrator/src/plan-emitter.ts +++ b/src/orchestrator/src/plan-emitter.ts @@ -145,7 +145,6 @@ export async function emitPlanFromSnapshot( const architectResult = await architectPlan(projected, runModel, planningContext); -<<<<<<< HEAD // Selection chain: flag ≫ detected (brownfield) ≫ spec ≫ architect-classified // ≫ bun. Resolved exactly once, here; both paths below stamp the result onto // the emitted plan. A failed architect simply skips its rung. @@ -171,14 +170,6 @@ export async function emitPlanFromSnapshot( const testDir = (options.detectTestDir ?? detectTestDir)(options.repoDir); if (testDir !== null) toolchain = withTestDir(toolchain, testDir); } -======= - // Selection chain: explicit flag ≫ spec profile ≫ architect-classified ≫ - // bun. Resolved exactly once, here; both paths below stamp the result onto - // the emitted plan. A failed architect simply skips its rung. - const classified = architectResult.status === 'succeeded' ? architectResult.draft.profile : null; - const profile: ProfileId = options.profile ?? projected.profile ?? classified ?? 'bun'; - const toolchain = options.toolchain ?? resolveToolchain(profile); ->>>>>>> b84fbda3 (FE-843: Architect classifies the toolchain profile from spec prose) if (architectResult.status === 'failed') { return fallback(projected, profile, toolchain, architectResult, architectResult.reason);