From f1bfdeebb66a3a0038048170a9d25e5ffeac4761 Mon Sep 17 00:00:00 2001 From: Kostandin Angjellari Date: Thu, 18 Jun 2026 13:40:40 +0100 Subject: [PATCH 1/9] FE-884 (slice A): recoverable epic verification MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A failed epic verification now dispatches a remediation code agent against the folded __epic__ tree and re-verifies, reaching the halt sink only after an epic-retry-budget is exhausted — mirroring the slice-level run-tests loop. The fail-sibling routes to a new epic-remediate dispatch/complete chain instead of straight to halt. Oracle integrity: a remediation that edits the epic integration test is rejected (detect-and-reject) and counts against budget. Dual re-verify: acceptance requires the epic integration test AND the slice suites to pass on the folded tree, with the combined verdict carried on the routed token. Round-trip: harvestCookRun folds only slice worktrees, so the folded-tree fix is diff-transferred and committed to the representative slice branch (transferFoldedFixToSlice) to reach the promoted artifact. Verified by topology goldens, run-artifact unit tests, and a seeded scripted- agent e2e (fixable / reject / exhaustion). Slices B (epic infra/timeout classification) and C (partial promotion) remain. --- memory/CARDS.md | 197 ++++++----- .../src/epic-recovery.integration.test.ts | 305 ++++++++++++++++++ src/orchestrator/src/net-blueprint.ts | 53 ++- src/orchestrator/src/net-compiler.ts | 223 ++++++++++++- src/orchestrator/src/run-artifact.test.ts | 100 ++++++ src/orchestrator/src/run-artifact.ts | 64 +++- src/orchestrator/src/topology.test.ts | 115 ++++++- 7 files changed, 946 insertions(+), 111 deletions(-) create mode 100644 src/orchestrator/src/epic-recovery.integration.test.ts diff --git a/memory/CARDS.md b/memory/CARDS.md index 744c78273..af64f142c 100644 --- a/memory/CARDS.md +++ b/memory/CARDS.md @@ -1,107 +1,140 @@ -# Scope cards — cook-artifact-lifecycle (FE-883) +# Scope cards — epic-verify-recovery (FE-884) + +Execution queue for `epic-verify-recovery` (FE-884, branch +`ka/fe-884-epic-verify-recovery`, stacked on FE-883's `ka/fe-883-worktree-gc`). + +**Core problem:** the orchestrator's two verification tiers are asymmetric. The +slice tier is recoverable (`failing-tests → code-agent → run-tests`, in-net +retry budget). The epic tier is terminal: `epic-verify::fail` routes +straight to `epicHaltedPlace` via `attach-halt-reason` (`net-compiler.ts` +~458–535). So the one place cross-slice defects surface — epic integration — is +the one place the harness cannot act on what it found. A failed epic halts the +whole run and promotes nothing, discarding the diagnosis, the folded worktree, +and every passing epic. + +**Worked example:** run `59100820-...` (spec 49, 3 epics / 11 slices, 60m26s). +11/11 slices + 2/3 epics passed; `route-integration` failed on a real bug (view +toggle wrote `?view=graph` but the sibling `useViewParam()` never resynced — +`pushState` doesn't emit `popstate`). The verify agent named the exact fix; the +run halted anyway. The fix (`brunch:viewparamchange` event, ~10 lines) was +applied by hand afterward — on a worktree the harness already had, from a +diagnosis it already produced. + +**Builds on FE-883:** the epic verify already composes the folded +`__epic__//` tree (`materializeEpicVerifyTree`), and promotion folds +slice commits (`harvestCookRun`, idempotent `commitSliceWorktree`). FE-884 makes +a failed epic *recoverable* rather than *terminal* — substrate-free, distinct +from Arc-2 `interactive-recovery`/`adaptive-replan` (which need the parked +semantic substrate). + +**Owed reconciliation:** FE-884 is not yet a frontier in `memory/PLAN.md`, and +the oracle strategy below is not yet folded into SPEC §Verification Design. +Reconcile via ln-plan + ln-sync when FE-884 registers / lands. -Execution queue for `cook-artifact-lifecycle` (FE-883, branch -`ka/fe-883-orchestrator-improvements`, on FE-864). +--- -**Reality check (corrected after basing on FE-864, the current seam):** the -brownfield git-merge composer already exists — `run-artifact.ts` (commit -871ef087): `commitSliceWorktree` + `foldSliceBranches` do a real `git merge-tree` -3-way fold of per-slice branches in dependency order, fail-closed on conflicts, -pure plumbing (I135-K preserved). It was deliberately left **unwired** pending "a -live-run check of the dependency-seed interaction". So FE-883 is *wire the -existing composer*, not *build it*. +## Slice A — recoverable epic verification (the missing green step) -This matches the Slice-1 spike decision (2026-06-18): git-merge for brownfield -(common ancestor → real 3-way), file-copy union for greenfield (no common -ancestor), elevate collisions to a first-class outcome. +Status: **done** (2026-06-18). All 7 acceptance criteria proven (run-59100820 +by analog; real-agent dogfood is outer-loop, not run). Gate green: check 0 +errors, build pass, orchestrator + epic-recovery e2e 104 tests pass (full suite +2101 pass / 2 skip; the single `build-boundary` failure is the pre-existing +dev-worktree `node_modules` symlink artifact documented on FE-883's PR). ---- +**Design finding (round-trip assumption — VALIDATED with caveat):** the naive +assumption was false — `harvestCookRun` folds only *slice* worktrees; the +`__epic__//` tree is detached and discarded. A remediation fix made in the +folded tree must be **diff-transferred and committed to the representative slice +branch** (`transferFoldedFixToSlice` in `run-artifact.ts`) to be folded into the +promoted artifact. → record as a SPEC decision + invariant on canonical +reconciliation (owed). + +Full scope card — structural (changes the epic-verify topology; establishes the +invariant *a failed epic is recoverable, not terminal*). -## Slice 1 — wire the run-artifact composer into the live path +### Target Behavior -Status: **in progress.** +A failed epic verification dispatches a remediation code agent against the folded +epic tree and re-verifies, reaching the halt sink only after the epic's +remediation budget is exhausted. -### Sub-steps +### Boundary Crossings ``` -✓ 1a (done, commit 2357f941) — composer correct under dependency-seeding. The - deferred "live-run check" failed: a dependent slice extending a dep-seeded file - false-conflicted because slice branches share no inter-slice ancestry. Fix: - commit each slice recording its dependency commits as parents, so the fold's - merge-base is the dependency. Regression test added; unfaithful happy-path test - corrected. (epic-sandbox-merge.ts file-copy untouched.) - -✓ mechanism (commits fadb1b52, 5e1d8d32) — proved + factored the fold so both - 1b and 1c can use it: foldToCommit (fold N slice commits onto a base, fail-closed, - no ref write) + materializeFoldedWorktree (fold + `git worktree add --detach`, - rework-safe). Tests pin: 3-way merge of different-hunk edits to one file keeps - both; the fold materializes on disk in a verify worktree. - -✓ 1c DECISION (2026-06-18): verify against the folded tree (option i). One - composition path → the tree verified == the tree shipped; no verify≠ship gap on - same-file edits. The worktree-checkout unknown is de-risked by materializeFoldedWorktree. - -✓ 1b/1c INTEGRATION (done, commit d92ce38b) — engine wired end-to-end: - - net-compiler verify-epic: brownfield uses materializeEpicVerifyTree (commit - slices dep-order → fold → detached worktree at __epic__// → relink - node_modules); fold conflict → fail the epic (passed:false report → fail sibling). - Greenfield keeps the file-copy union. - - cook-cli promotion: brownfield calls harvestCookRun; fold conflicts → fatal run - outcome. I135-K preserved (all plumbing). - - commitSliceWorktree made idempotent so promotion reuses the commits verify made. - - Stale epic-sandbox-merge.ts TODO updated; SPEC I124-K amended (plan.mode fork). - - Full orchestrator suite green (672). Single-slice brownfield-smoke exercises the - engine plumbing; a *multi-slice* end-to-end engine test is still a gap to add. - -○ 1d (remaining) — retire the now-dead promoteBrownfieldRun + BrownfieldPromoteOptions. - Blocked on rewriting the landCookBranch test fixture (repoWithPromotedCook uses - promoteBrownfieldRun to build a promoted branch — rebuild it via harvestCookRun or - a plain commit). mergeSlicesIntoEpicSandbox STAYS (it is the greenfield composer). +→ epic-verify::fail (report.passed falsy — today's dead-end sibling) +→ epic-remediate::dispatch → epic-remediate:running (new; mirrors the slice dispatch/running split) +→ code agent in __epic__// folded worktree (FE-883), fed the verify diagnosis +→ detect-and-reject guard: post-attempt git diff touches the epic integration test path → discard, count against budget +→ commit fix into the owning slice branch via idempotent commitSliceWorktree (FE-883) +→ epic-remediate::complete → back to verifyPlace (re-run verify-epic + slice suites on the folded tree) +→ epic-retry-budget place: decrement; on exhaustion → epicHaltedPlace (attach-halt-reason, honest cause) ``` -### Acceptance Criteria (slice-level) +### Risks and Assumptions ``` -✓ dep-seed — a dependent slice extending a dep-seeded file folds clean (done, 1a) -○ brownfield-3way — two brownfield slices editing different hunks of the same - pre-existing file both survive promotion (the file-copy union drops one) -○ brownfield-conflict — a true overlapping-hunk conflict surfaces as a fatal run - outcome, not a buried event field -○ checkout-untouched — promotion still never touches the user's branch / tree / - index (I135-K) -○ greenfield-unchanged — serial-greenfield shared-tree + parallel-greenfield - file-copy paths preserved +- RISK: a remediation agent greens the epic by editing the integration test, not product code + → MITIGATION: detect-and-reject (git diff touches the epic test path → discard + budget); dual re-verify (slice suites must also pass) +- RISK: a fix in the detached folded tree never reaches promotion + → MITIGATION: round-trip through commitSliceWorktree onto the owning slice branch so harvestCookRun folds it +- ASSUMPTION: an epic-level fix can be attributed to one slice's branch (vs a synthetic "integration slice" commit) + → VALIDATE: trace harvestCookRun's fold over an added commit on a representative slice → [→ memory/SPEC.md §Assumptions] +- ASSUMPTION: the slice-loop retry-budget machinery generalizes to the epic lane unchanged + → VALIDATE: epic-retry-budget place + dispatch/complete siblings reuse the existing in-net retry pattern ``` -### Verification Approach +### Acceptance Criteria ``` -- Inner: run-artifact.test.ts (done), promote-run.test.ts, epic-sandbox-merge.test.ts -- Middle: brownfield-smoke.integration.test.ts — seeded repo, overlapping slices -- Outer: dogfood a multi-slice brownfield cook with an intentional file overlap +✓ epic-remediation-fires — a falsy verify report routes to epic-remediate, not directly to halt +✓ re-verify-loop — remediate:complete returns to verifyPlace and re-runs verify-epic +✓ dual-re-verify — remediation is accepted only if the epic integration test AND the slice suites pass on the folded tree +✓ budget-exhaustion-halts — after N failed attempts the epic reaches epicHaltedPlace with an honest reason +✓ oracle-integrity — an attempt that modifies the epic integration test file is rejected and counts against budget +✓ fix-promotes — a remediation commit is folded by harvestCookRun (the fix survives into the promoted artifact) +✓ run-59100820-closes — replaying the example run, the route-integration epic self-heals within budget (outer) ``` ---- +### Verification Approach (oracle strategy) -## Slice 2 — worktree + branch GC / lifecycle (light) — `done` +``` +- Inner: + · topology golden/adapter — :fail routes → epic-remediate → verifyPlace; budget decrement; exhaustion → halt + · negative-space test-path guard — post-attempt git diff touching the epic test path → reject + budget + · engine contract suite stays green (runtime equivalence on the unchanged paths) +- Middle: + · scripted-agent integration (model-based) over the SYNTHETIC broken-then-fixable epic fixture: + (fail → edit product code → pass) reaches `done`; (fail → edit test) is rejected + · dual re-verify (invariant) — epic integration test + slice suites both green on the folded tree + · promotion round-trip (differential) — the remediation commit appears in the harvested tree +- Outer: + · real-agent dogfood replay of run 59100820 — epic self-heals unattended (one-shot confidence, human-observed) +``` -Branch `ka/fe-883-worktree-gc` (stacked on FE-883). `gcCookRun` (run-refs.ts, -commit bf43477f) reclaims the run's worktrees (run + nested slice/__epic__, -deepest-first) + the intermediate `brunch/slice//*` branches, keeping the -`brunch/run/` artifact branch and every other run untouched; realpath-safe -(macOS /var→/private/var). Wired into cook-cli: auto-GC on a **completed + -promoted** brownfield run, best-effort (never fails a good run); halted/conflicted -runs return earlier and keep their worktrees for inspection (keep-on-failure). -Decision: auto-GC (no flag) — "no leaks by default". Tests: run-refs.test.ts -(reclaim + unrelated-run-untouched). Gap: no end-to-end runCook test exercises the -auto-GC call (same gap as the promotion wiring). +### Acknowledged blind spots + +``` +- LLM remediation COMPETENCE is not oracle-able — only loop mechanics are. Mitigation: budget + honest halt. + Revisit: dogfood shows low fix-rate. +- detect-and-reject guards only the EPIC test path; an agent could weaken a SLICE test instead. + Mitigation: dual re-verify (slice suites must pass). Revisit: a remediation greens by editing a slice test → freeze all *.test.* under the epic. +- a flaky epic test (the original ETIMEDOUT) misread as a logic fail → deferred to Slice B. +- wall-clock cost of extra agent round-trips — no time budget gate. Accept for now. +``` + +--- -## Slice 3 — per-slice build-cache write isolation (candidate) +## Slice B — infra/timeout classification at the epic verdict — `next` after A (independent) -May instead be an FE-879 follow-on (FE-879 owns `SHAREABLE_TOP_LEVEL_ENTRIES`). -Decide ownership before scoping. +Extend FE-872's `failureKind: 'infra' | 'test'` from the slice `tests-run` report +to the `verify-epic` report; an infra/timeout failure (e.g. `spawnSync npx +ETIMEDOUT`) gets a bounded infra-retry, not an immediate halt; size the verify +subprocess timeout to the target's real cost (the `graph-route-wiring` test alone +ran 25s on code-split + React-Flow warmup). Independent of A — could land first. -## Out of scope (noted) +## Slice C — partial promotion / salvage — deferred (not pre-carded) -- Sync `git worktree add` serialization (`epic-sandbox-merge.ts:288`) — perf, not - correctness; FE-879 laziness already bounds worktree count. +Extend `harvestCookRun` to promote passing epics and hand back the folded +worktree + the failing epic's diagnosis instead of `nothing promoted`. Shape +depends on A's commit-round-trip topology and FE-883's GC ref-set, so do **not** +pre-card it until A lands. diff --git a/src/orchestrator/src/epic-recovery.integration.test.ts b/src/orchestrator/src/epic-recovery.integration.test.ts new file mode 100644 index 000000000..0b82c3a9e --- /dev/null +++ b/src/orchestrator/src/epic-recovery.integration.test.ts @@ -0,0 +1,305 @@ +// FE-884 Slice A — recoverable epic verification, end-to-end in codebase mode. +// +// Drives the full orchestrator over a seeded git repo whose epic carries an +// integration-test target, with a SCRIPTED stub remediation agent (no real pi). +// Proves the loop the topology tests can only show structurally: +// +// - broken → remediation edits product code → re-verify passes → epic done, +// and the fix round-trips onto the slice branch (so harvest folds it); +// - a remediation that edits the epic integration test is rejected +// (detect-and-reject) and burns a budget unit; +// - a no-op remediation exhausts the budget and halts with an honest reason; +// - dual re-verify: a verify that greens the epic integration test but breaks a +// slice suite on the folded tree is NOT accepted. +// +// Like brownfield-smoke, the "fixture" is a setup function (a real nested .git/ +// under the brunch repo would create submodule weirdness). + +import { execFileSync } from 'node:child_process'; +import { existsSync, mkdirSync, mkdtempSync, readFileSync, rmSync, writeFileSync } from 'node:fs'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; + +import { afterEach, describe, expect, it } from 'vitest'; + +import { resolveCookPlan, resolveSandboxPlan } from './cook-cli.js'; +import { createOrchestrator } from './engine.js'; +import { loadPlan } from './plan-loader.js'; +import { InMemoryReportSink } from './report-sink.js'; +import type { ActionContext, ActionHandlers, ReportLine, TestRunner } from './types.js'; +import { createSandbox } from './worktree.js'; + +const GIT_TEST_TIMEOUT_MS = 30_000; +const EPIC_TEST_TARGET = 'epic-itest.txt'; + +describe('FE-884 — recoverable epic verification (codebase mode)', () => { + const dirs: string[] = []; + afterEach(() => { + for (const d of dirs) rmSync(d, { recursive: true, force: true }); + dirs.length = 0; + }); + + // A 1-slice epic that carries an integration-test target, so the verify-epic + // transition (and the FE-884 remediation chain) is compiled. + function makeSeededRepo(): string { + const dir = mkdtempSync(join(tmpdir(), 'epic-recovery-')); + dirs.push(dir); + execFileSync('git', ['init', '-q', '-b', 'main'], { cwd: dir }); + execFileSync('git', ['config', 'user.email', 'test@example.com'], { cwd: dir }); + execFileSync('git', ['config', 'user.name', 'Test'], { cwd: dir }); + writeFileSync(join(dir, '.gitignore'), '.brunch/\n'); + writeFileSync(join(dir, 'src.txt'), 'seed\n'); + execFileSync('git', ['add', '.'], { cwd: dir }); + execFileSync('git', ['commit', '-q', '-m', 'initial'], { cwd: dir }); + + mkdirSync(join(dir, '.brunch', 'cook'), { recursive: true }); + writeFileSync( + join(dir, '.brunch', 'cook', 'plan.yaml'), + [ + 'mode: brownfield', + 'epics:', + ' - id: ep', + ' summary: recoverable epic', + ' depends_on: []', + ' verification:', + ' - kind: integration-test', + ` target: ${EPIC_TEST_TARGET}`, + 'slices:', + ' - id: s1', + ' epic_id: ep', + ' definition: seed src.txt', + ' depends_on: []', + ' verification:', + ' - kind: unit-test', + ' target: src.txt', + '', + ].join('\n'), + ); + return dir; + } + + // Scripted actions. `remediation` selects what the stub remediation agent does + // to the folded epic tree; `epicPasses` lets a scenario force the integration + // test green to isolate dual re-verify. + function makeFakeActions( + reports: InMemoryReportSink, + opts: { remediation: 'fix' | 'touch-test' | 'noop'; epicPasses?: boolean }, + ): ActionHandlers { + const evalCalls = new Map(); + return { + 'evaluate-done': async (ctx: ActionContext) => { + const n = (evalCalls.get(ctx.slice.id) ?? 0) + 1; + evalCalls.set(ctx.slice.id, n); + const done = n >= 2; // NO then YES + const id = `eval-${ctx.slice.id}-${n}`; + reports.append(line(id, ctx, 'evaluator', 'eval-done', { done })); + return id; + }, + 'write-tests': async (ctx: ActionContext) => { + const id = `wt-${ctx.slice.id}`; + reports.append(line(id, ctx, 'test-writer', 'tests-written', {})); + return id; + }, + 'write-code': async (ctx: ActionContext) => { + // Product code in the slice worktree: a known value WITHOUT the fix token. + writeFileSync(join(ctx.sandboxDir, 'src.txt'), 'v1\n'); + const id = `wc-${ctx.slice.id}`; + reports.append(line(id, ctx, 'code-writer', 'code-written', {})); + return id; + }, + 'assess-semantic': async (ctx: ActionContext) => { + const id = `sem-${ctx.slice.id}`; + reports.append(line(id, ctx, 'semantic-assessor', 'semantic-assessed', { satisfied: true })); + return id; + }, + // The epic integration test: passes iff the folded src.txt carries the fix + // token (or the scenario forces it green to isolate dual re-verify). + 'verify-epic': async (ctx: ActionContext) => { + const srcPath = join(ctx.sandboxDir, 'src.txt'); + const txt = existsSync(srcPath) ? readFileSync(srcPath, 'utf8') : ''; + const passed = opts.epicPasses ?? txt.includes('FIXED'); + const id = `ve-${ctx.epic.id}-${reports.getAll().length}`; + reports.append(line(id, ctx, 'orchestrator', 'epic-verified', { passed })); + return id; + }, + // The stub remediation agent acts on the FOLDED epic tree. + 'remediate-epic': async (ctx: ActionContext) => { + if (opts.remediation === 'fix') { + const srcPath = join(ctx.sandboxDir, 'src.txt'); + const before = existsSync(srcPath) ? readFileSync(srcPath, 'utf8') : ''; + writeFileSync(srcPath, `${before}FIXED\n`); // product code only + } else if (opts.remediation === 'touch-test') { + // Try to green the epic by editing its own oracle — must be rejected. + writeFileSync(join(ctx.sandboxDir, EPIC_TEST_TARGET), 'tampered\n'); + } + // 'noop': touch nothing. + const id = `rem-${ctx.epic.id}-${reports.getAll().length}`; + reports.append(line(id, ctx, 'coding-agent', 'remediation-agent-done', {})); + return id; + }, + }; + } + + function line( + id: string, + ctx: ActionContext, + actor: ReportLine['actor'], + event: string, + payload: Record, + ): ReportLine { + return { + id, + ts: new Date().toISOString(), + epicId: ctx.epic.id, + sliceId: ctx.slice.id, + actor, + event, + payload, + }; + } + + function passingRunner(): TestRunner { + return { + async run() { + return { passed: true, output: 'ok' }; + }, + }; + } + + // Pass in the slice loop (slice worktree cwd), fail in dual re-verify (folded + // __epic__ cwd) — isolates the slice-suite-on-folded-tree signal. + function failOnFoldedRunner(): TestRunner { + return { + async run(_target: string, sandboxDir: string) { + if (sandboxDir.includes('__epic__')) + return { passed: false, output: 'slice regressed', failureKind: 'test' }; + return { passed: true, output: 'ok' }; + }, + }; + } + + async function runCook( + source: string, + actions: ActionHandlers, + testRunner: TestRunner, + maxRetries: number, + ) { + const resolved = resolveCookPlan(source); + if (resolved.kind !== 'resolved') throw new Error('plan not resolved'); + const plan = loadPlan(resolved.planPath); + const sandboxPlan = resolveSandboxPlan(plan.mode, resolved.sourceDir); + if (sandboxPlan.kind !== 'codebase') throw new Error('expected codebase sandbox'); + const sandbox = createSandbox(source, undefined, { mode: 'codebase', sourceDir: sandboxPlan.sourceDir }); + const reports = (actions as { __reports?: InMemoryReportSink }).__reports!; + const engine = createOrchestrator('serial'); + const result = await engine.run({ + plan, + sandboxDir: sandbox.sandboxDir, + actions, + reports, + testRunner, + policy: { maxRetries }, + sandboxMode: 'codebase', + runId: sandbox.runId, + }); + return { result, reports, sandbox }; + } + + function withReports(reports: InMemoryReportSink, actions: ActionHandlers): ActionHandlers { + (actions as { __reports?: InMemoryReportSink }).__reports = reports; + return actions; + } + + it( + 'broken epic self-heals: remediation edits product code, re-verify passes, fix round-trips to the slice branch', + async () => { + const source = makeSeededRepo(); + const reports = new InMemoryReportSink(); + const actions = withReports(reports, makeFakeActions(reports, { remediation: 'fix' })); + + const { result, sandbox } = await runCook(source, actions, passingRunner(), 3); + + // Epic recovered → run completes. + expect(result.status).toBe('completed'); + expect(result.epics).toContainEqual({ epicId: 'ep', status: 'completed' }); + + // verify-epic ran ≥2 times: at least one FAIL then a PASS. + const verdicts = reports + .getAll() + .filter((r) => r.event === 'epic-verified') + .map((r) => (r.payload as { passed: boolean }).passed); + expect(verdicts).toContain(false); + expect(verdicts).toContain(true); + + // Remediation was accepted (round-trip), not rejected. + const remediations = reports.getAll().filter((r) => r.event === 'epic-remediated'); + expect(remediations.some((r) => (r.payload as { accepted: boolean }).accepted)).toBe(true); + + // fix-promotes: the representative slice branch carries the fix. + const sliceSrc = readFileSync(join(sandbox.sandboxDir, 's1', 'src.txt'), 'utf8'); + expect(sliceSrc).toContain('FIXED'); + }, + GIT_TEST_TIMEOUT_MS, + ); + + it( + 'oracle integrity: a remediation that edits the epic integration test is rejected and the epic halts', + async () => { + const source = makeSeededRepo(); + const reports = new InMemoryReportSink(); + const actions = withReports(reports, makeFakeActions(reports, { remediation: 'touch-test' })); + + const { result } = await runCook(source, actions, passingRunner(), 2); + + expect(result.status).toBe('halted'); + const remediations = reports.getAll().filter((r) => r.event === 'epic-remediated'); + expect(remediations.length).toBeGreaterThan(0); + expect(remediations.every((r) => (r.payload as { accepted: boolean }).accepted === false)).toBe(true); + expect(remediations.some((r) => (r.payload as { reason?: string }).reason === 'touched-test')).toBe( + true, + ); + }, + GIT_TEST_TIMEOUT_MS, + ); + + it( + 'budget exhaustion: a no-op remediation burns the budget and halts with an honest reason', + async () => { + const source = makeSeededRepo(); + const reports = new InMemoryReportSink(); + const actions = withReports(reports, makeFakeActions(reports, { remediation: 'noop' })); + + const { result } = await runCook(source, actions, passingRunner(), 2); + + expect(result.status).toBe('halted'); + expect(result.reason ?? '').toMatch(/remediation attempts/); + // Verify was attempted maxRetries+1 times (initial + one per budget unit). + const verifies = reports.getAll().filter((r) => r.event === 'epic-verified'); + expect(verifies.length).toBe(3); + }, + GIT_TEST_TIMEOUT_MS, + ); + + it( + 'dual re-verify: an epic that greens the integration test but breaks a slice suite is not accepted', + async () => { + const source = makeSeededRepo(); + const reports = new InMemoryReportSink(); + // Force the integration test green; the slice suite fails on the folded tree. + const actions = withReports( + reports, + makeFakeActions(reports, { remediation: 'noop', epicPasses: true }), + ); + + const { result } = await runCook(source, actions, failOnFoldedRunner(), 1); + + // The integration test passed every time, yet the epic never completes: + // the slice-suite re-verify on the folded tree vetoes it. + expect(result.status).toBe('halted'); + const reverify = reports.getAll().filter((r) => r.event === 'epic-slice-reverify'); + expect(reverify.length).toBeGreaterThan(0); + expect(reverify.every((r) => (r.payload as { passed: boolean }).passed === false)).toBe(true); + }, + GIT_TEST_TIMEOUT_MS, + ); +}); diff --git a/src/orchestrator/src/net-blueprint.ts b/src/orchestrator/src/net-blueprint.ts index 129698c67..d5a38ae39 100644 --- a/src/orchestrator/src/net-blueprint.ts +++ b/src/orchestrator/src/net-blueprint.ts @@ -190,11 +190,14 @@ type CompleteEpicDescriptor = { }; /** - * Verify epic — producer. Runs verification synchronously against the - * merged epic sandbox, attaches the verify-epic report to the output - * token, and emits to a single intermediate place. Sibling-passthrough - * transitions downstream route by the report's `passed` field — pass - * marks the epic completed and emits done + dep-signals; fail halts. + * Verify epic — producer with a remediation budget (FE-884). Runs verification + * synchronously against the merged epic sandbox, attaches the verify-epic report + * to the output token, and emits to a single intermediate place plus the epic + * retry-budget place. Sibling-passthrough transitions downstream route by the + * report's `passed` field — pass marks the epic completed and emits done + + * dep-signals; fail (with budget remaining) routes to remediation. On budget + * exhaustion the producer instead emits a halt token (carrying its own + * `haltReason`) to `epic::halted` — mirroring the slice run-tests loop. */ type VerifyEpicDescriptor = { kind: 'verify-epic'; @@ -204,6 +207,37 @@ type VerifyEpicDescriptor = { representativeSliceId: string; /** Single intermediate output place; siblings route from here. */ intermediatePlace: string; + /** Place to emit the (decremented or reset) epic retry-budget token to. */ + budgetPlace: string; + maxRetries: number; +}; + +/** + * Remediate epic — producer (FE-884). On a failed epic verification with budget + * remaining, a code agent is dispatched against the folded `__epic__//` + * tree (where the integration test actually runs), fed the verify diagnosis, to + * fix the cross-slice defect. Two guards on the result: + * - detect-and-reject: if the agent touched any epic integration test target, + * the attempt is reverted (the fix is discarded) so re-verify fails again and + * the budget burns — a remediation may only edit product code, never weaken + * its own oracle. + * - round-trip: an accepted product-code fix is transferred from the detached + * folded tree onto the representative slice's branch (commitSliceWorktree) so + * `harvestCookRun` folds it into the promoted artifact. + * Then loops back to the epic verify-ready place to re-verify. + */ +type RemediateEpicDescriptor = { + kind: 'remediate-epic'; + actionKey: string; + epicId: string; + /** Slice whose branch carries an accepted fix (round-trip target). */ + representativeSliceId: string; + /** Loop-back output set (the epic verify-ready place). */ + outputs: string[]; + /** Place to return the code-agent resource token to. */ + agentReturnPlace: string; + /** Epic integration test targets — touching any rejects the attempt. */ + epicTestTargets: string[]; }; export type HandlerDescriptor = @@ -215,7 +249,8 @@ export type HandlerDescriptor = | AssessSemanticDescriptor | CompleteSliceDescriptor | CompleteEpicDescriptor - | VerifyEpicDescriptor; + | VerifyEpicDescriptor + | RemediateEpicDescriptor; // --------------------------------------------------------------------------- // Transition skeleton — topology + declarative handler recipe @@ -288,6 +323,12 @@ export function enumerateCandidateOutputs(transition: TransitionSkeleton): Set v.target), }, }); } @@ -853,9 +914,55 @@ export function wireHandlers(blueprint: NetBlueprint, input: OrchestratorInput, } case 'verify-epic': { - const { actionKey, epicId, representativeSliceId, intermediatePlace } = h; + const { actionKey, epicId, representativeSliceId, intermediatePlace, budgetPlace, maxRetries } = h; const epic = plan.epics.find((e) => e.id === epicId)!; const slice = plan.slices.find((s) => s.id === representativeSliceId)!; + const epicBaseToken: Token = { sliceId: '', epicId }; + // FE-884: route a verify report through the epic remediation budget. + // Brownfield (codebase) is recoverable: a failing verify with budget + // remaining loops to remediation via the intermediate place; exhaustion + // halts with an honest reason. Greenfield is unchanged — a failing verify + // halts immediately (the remediation round-trip is git-based, so it is a + // brownfield-only capability; the greenfield-protecting invariant holds). + const routeVerdict = ( + inputToken: Token, + reportId: string, + passed: boolean, + ): { place: string; token: Token }[] => { + const tok: Token = { ...inputToken, reportId }; + if (passed) { + return [ + { place: intermediatePlace, token: tok }, + { place: budgetPlace, token: { ...epicBaseToken, retryCount: 0 } }, + ]; + } + if (input.sandboxMode !== 'codebase') { + ctx.epicOutcomes.set(epicId, { epicId, status: 'halted' }); + return [ + { + place: ep(epicId, 'halted'), + token: { ...tok, haltReason: `Epic ${epicId} verification failed` }, + }, + ]; + } + const retryCount = inputToken.retryCount ?? 0; + if (retryCount >= maxRetries) { + ctx.epicOutcomes.set(epicId, { epicId, status: 'halted' }); + return [ + { + place: ep(epicId, 'halted'), + token: { + ...tok, + haltReason: `Epic ${epicId} verification failed after ${maxRetries} remediation attempts`, + }, + }, + ]; + } + return [ + { place: intermediatePlace, token: tok }, + { place: budgetPlace, token: { ...epicBaseToken, retryCount: retryCount + 1 } }, + ]; + }; // Epic verification runs against a freshly-merged `__epic__//` // dir built from completed slice worktrees (cross-epic slice deps included). const sliceIdsInMergeOrder = sliceIdsForEpicVerifyMerge(plan, epicId); @@ -907,7 +1014,7 @@ export function wireHandlers(blueprint: NetBlueprint, input: OrchestratorInput, payload: { passed: false, reason: 'merge-conflict', conflicts: folded.conflicts }, }); ctx.reportIds.push(failId); - return [{ place: intermediatePlace, token: { ...inputToken, reportId: failId } }]; + return routeVerdict(inputToken, failId, false); } epicSandboxDir = folded.epicSandboxDir; } else { @@ -944,9 +1051,97 @@ export function wireHandlers(blueprint: NetBlueprint, input: OrchestratorInput, }; const reportId = await actions[actionKey]!(actCtx); ctx.reportIds.push(reportId); - // Producer emits to the intermediate place; pass/fail routing - // happens in sibling-passthrough transitions downstream. - return [{ place: intermediatePlace, token: { ...inputToken, reportId } }]; + const epicPassed = !!(reports.getById(reportId)?.payload as { passed?: boolean } | undefined) + ?.passed; + // FE-884 dual re-verify (codebase-only): a remediation must not green + // the epic integration test while breaking a slice. Re-run the slice + // suites on the SAME folded tree and AND them with the epic verdict — + // a fix that greens the seam but regresses a unit is rejected and burns + // a budget unit, never promoted. Greenfield is untouched (its verify + // halts immediately; re-running slice suites there would change behavior + // and violate the greenfield-protecting invariant). + let passed = epicPassed; + // The pass/fail siblings route on the token's report `passed` field, so + // the combined verdict must be carried by the report the token points at + // — not the raw integration-test report (which a slice veto contradicts). + let verdictReportId = reportId; + if (epicPassed && input.sandboxMode === 'codebase') { + const sliceTargets = plan.slices + .filter((s) => s.epic_id === epicId) + .flatMap((s) => s.verification.map((v) => ({ target: v.target }))); + if (sliceTargets.length > 0) { + const sliceVerify = await runVerification(sliceTargets, testRunner, epicSandboxDir); + if (!sliceVerify.done) { + passed = false; + verdictReportId = createReport(reports, { + epicId, + sliceId: '', + actor: 'orchestrator', + event: 'epic-slice-reverify', + payload: { + passed: false, + failureKind: sliceVerify.failureKind, + results: sliceVerify.results, + }, + }); + ctx.reportIds.push(verdictReportId); + } + } + } + // Route through the epic remediation budget. Pass → done (+ budget + // reset); fail with budget → re-loop via the intermediate place to the + // fail sibling → remediation; fail exhausted → halt. + return routeVerdict(inputToken, verdictReportId, passed); + })(); + net.scheduleDeferred(skel.id, skel.contract, { places: skel.inputs, tokens: consumed }, deferred); + return []; + }; + break; + } + + case 'remediate-epic': { + const { + actionKey, + epicId, + representativeSliceId, + outputs: loopOutputs, + agentReturnPlace, + epicTestTargets, + } = h; + const epic = plan.epics.find((e) => e.id === epicId)!; + const slice = plan.slices.find((s) => s.id === representativeSliceId)!; + const baseToken: Token = { sliceId: '', epicId }; + + // FE-884: run a code agent against the folded epic tree (where the + // integration test actually runs), then detect-and-reject + round-trip + // the fix onto the representative slice's branch so harvestCookRun folds + // it. Always loops back to verify-ready — a rejected/no-op attempt simply + // re-fails verify, burning a budget unit. Brownfield-only by construction. + fire = async (consumed) => { + const deferred = (async () => { + const foldedDir = resolveEpicSandboxDir(input.sandboxDir, epicId); + const actCtx: ActionContext = { slice, epic, plan, sandboxDir: foldedDir, reports }; + await actions[actionKey]!(actCtx); + const outcome = transferFoldedFixToSlice({ + parentSandboxDir: input.sandboxDir, + foldedDir, + slice, + epicTestTargets, + }); + ctx.reportIds.push( + createReport(reports, { + epicId, + sliceId: representativeSliceId, + actor: 'coding-agent', + event: 'epic-remediated', + payload: { accepted: outcome.accepted, reason: outcome.reason, touched: outcome.touched }, + }), + ); + // Loop back to re-verify + return the code agent to its pool. + return [ + ...loopOutputs.map((pl) => ({ place: pl, token: { ...baseToken } })), + { place: agentReturnPlace, token: { ...baseToken } }, + ]; })(); net.scheduleDeferred(skel.id, skel.contract, { places: skel.inputs, tokens: consumed }, deferred); return []; diff --git a/src/orchestrator/src/run-artifact.test.ts b/src/orchestrator/src/run-artifact.test.ts index 73f5a2779..7af4d1c5a 100644 --- a/src/orchestrator/src/run-artifact.test.ts +++ b/src/orchestrator/src/run-artifact.test.ts @@ -23,6 +23,7 @@ import { materializeEpicVerifyTree, materializeFoldedWorktree, type SliceCommit, + transferFoldedFixToSlice, } from './run-artifact.js'; import type { Plan, Slice } from './types.js'; @@ -310,3 +311,102 @@ describe('harvestCookRun (commit slice worktrees + fold)', () => { expect(readFileSync(join(link, 'added/index.js'), 'utf8')).toBe('added\n'); }); }); + +// FE-884: the remediation round-trip — the riskiest assumption. A fix made in the +// detached folded epic tree must reach a *slice branch* (harvest never folds the +// epic dir), and a fix that edits the epic's integration test must be rejected. +describe('transferFoldedFixToSlice (FE-884 remediation round-trip)', () => { + let source: string; + let parent: string; + let foldedDir: string; + const plan: Plan = { + mode: 'brownfield', + epics: [ + { + id: 'e', + summary: 'E', + depends_on: [], + verification: [{ kind: 'integration-test', target: 'it.test.ts' }], + }, + ], + slices: [slice('a')], + }; + + beforeEach(() => { + source = mkdtempSync(join(tmpdir(), 'brunch-remediate-')); + gitC(source, 'init', '-q', '-b', 'main'); + writeFileSync(join(source, 'base.txt'), 'base\n'); + gitC(source, 'add', '-A'); + gitC(source, 'commit', '-q', '-m', 'base'); + parent = join(source, 'sandbox'); + gitC(source, 'worktree', 'add', '-q', '-b', brunchRef.run('r1'), parent, 'HEAD'); + // Slice 'a' worktree carries the (buggy) product file the agent will fix. + const sliceDir = join(parent, 'a'); + gitC(source, 'worktree', 'add', '-q', '-b', brunchRef.slice('r1', 'a'), sliceDir, brunchRef.run('r1')); + writeFileSync(join(sliceDir, 'lib.ts'), 'export const view = "broken";\n'); + // verify-epic composed the folded tree (commits slice 'a', folds it detached). + foldedDir = materializeEpicVerifyTree({ + parentSandboxDir: parent, + runId: 'r1', + plan, + sliceIds: ['a'], + epicId: 'e', + }).epicSandboxDir; + }); + afterEach(() => rmSync(source, { recursive: true, force: true })); + + it('round-trips a product-code fix onto the slice branch so harvest folds it', () => { + // The remediation agent fixes the bug in the folded tree (where the epic test runs). + writeFileSync(join(foldedDir, 'lib.ts'), 'export const view = "fixed";\n'); + + const outcome = transferFoldedFixToSlice({ + parentSandboxDir: parent, + foldedDir, + slice: slice('a'), + epicTestTargets: ['it.test.ts'], + }); + expect(outcome.accepted).toBe(true); + + // The fix is on the slice branch... + expect(gitC(source, 'show', `${brunchRef.slice('r1', 'a')}:lib.ts`)).toContain('fixed'); + // ...and therefore survives promotion (harvest folds slice branches, not the epic dir). + const artifact = harvestCookRun({ + sourceDir: source, + parentSandboxDir: parent, + runId: 'r1', + plan, + completedSliceIds: ['a'], + }); + expect(artifact.conflicts).toEqual([]); + expect(gitC(source, 'show', `${brunchRef.run('r1')}:lib.ts`)).toContain('fixed'); + }); + + it('rejects (detect-and-reject) an attempt that edits the epic integration test', () => { + // An agent tries to green the epic by gutting its own oracle. + writeFileSync(join(foldedDir, 'it.test.ts'), 'it("passes", () => {});\n'); + writeFileSync(join(foldedDir, 'lib.ts'), 'export const view = "sneaky";\n'); + + const outcome = transferFoldedFixToSlice({ + parentSandboxDir: parent, + foldedDir, + slice: slice('a'), + epicTestTargets: ['it.test.ts'], + }); + + expect(outcome.accepted).toBe(false); + expect(outcome.reason).toBe('touched-test'); + // The whole attempt is discarded — nothing reaches the slice branch. + expect(gitC(source, 'show', `${brunchRef.slice('r1', 'a')}:lib.ts`)).toContain('broken'); + }); + + it('rejects a no-op attempt (agent changed nothing)', () => { + const outcome = transferFoldedFixToSlice({ + parentSandboxDir: parent, + foldedDir, + slice: slice('a'), + epicTestTargets: ['it.test.ts'], + }); + expect(outcome.accepted).toBe(false); + expect(outcome.reason).toBe('no-op'); + }); +}); diff --git a/src/orchestrator/src/run-artifact.ts b/src/orchestrator/src/run-artifact.ts index 300a851e4..0cbcee36f 100644 --- a/src/orchestrator/src/run-artifact.ts +++ b/src/orchestrator/src/run-artifact.ts @@ -11,7 +11,7 @@ import { execFileSync, spawnSync } from 'node:child_process'; import { existsSync, lstatSync, rmSync } from 'node:fs'; -import { join, resolve } from 'node:path'; +import { basename, join, resolve } from 'node:path'; import { linkSharedTopLevelEntries } from './cow-copy.js'; import { @@ -166,6 +166,68 @@ export function commitSliceWorktree(opts: { return { sliceId: opts.slice.id, commit, title }; } +/** Does a git-touched path correspond to one of the epic's integration test targets? */ +function touchesTestTarget(touched: string, targets: readonly string[]): boolean { + const base = basename(touched); + return targets.some((t) => touched === t || touched.endsWith(`/${t}`) || basename(t) === base); +} + +/** + * FE-884 remediation round-trip. After a remediation agent edits the *folded* + * `__epic__//` tree (the detached worktree where the integration test runs), + * carry the fix back so it can be verified and promoted: + * + * - detect-and-reject (oracle integrity): if the agent touched any epic + * integration test target, discard the whole attempt (`reset --hard` + + * `clean`) and reject — a remediation may only edit product code, never + * weaken its own oracle. The caller re-verifies (which fails again, burning a + * budget unit). + * - round-trip: otherwise apply the product-code diff onto the representative + * slice's worktree and commit it to `brunch/slice//`, so the + * next verify-epic re-fold *and* `harvestCookRun` both include the fix. The + * detached folded tree is never harvested directly — only slice branches are. + * + * Apply is `--3way` and failure is non-fatal (returns `apply-failed`): a fix that + * cannot be attributed to the representative slice's tree is rejected rather than + * thrown, so the loop degrades to a burned budget unit instead of crashing. + */ +export function transferFoldedFixToSlice(opts: { + parentSandboxDir: string; + foldedDir: string; + slice: Slice; + epicTestTargets: readonly string[]; +}): { accepted: boolean; reason?: 'no-op' | 'touched-test' | 'apply-failed'; touched: string[] } { + const { foldedDir } = opts; + const sliceDir = resolveSliceWorktreeDir(opts.parentSandboxDir, opts.slice.id); + // Stage everything so new files are enumerated too, then list touched paths. + git(['add', '-A'], foldedDir); + const touched = git(['diff', '--cached', '--name-only'], foldedDir).split('\n').filter(Boolean); + if (touched.length === 0) { + git(['reset'], foldedDir); + return { accepted: false, reason: 'no-op', touched: [] }; + } + if (touched.some((pth) => touchesTestTarget(pth, opts.epicTestTargets))) { + // Discard the whole attempt — the agent tried to edit its own oracle. + git(['reset', '--hard'], foldedDir); + git(['clean', '-fd'], foldedDir); + return { accepted: false, reason: 'touched-test', touched }; + } + // Raw (untrimmed) capture: a patch must keep its trailing newline or `git + // apply` rejects it as corrupt — the trimming `git()` helper would break it. + const patch = execFileSync('git', ['diff', '--cached'], { cwd: foldedDir, encoding: 'utf8' }); + git(['reset'], foldedDir); // leave the folded tree unstaged; it is force-rebuilt on re-verify + const applied = spawnSync('git', ['apply', '--3way', '--whitespace=nowarn'], { + cwd: sliceDir, + input: patch, + encoding: 'utf8', + }); + if (applied.status !== 0) { + return { accepted: false, reason: 'apply-failed', touched }; + } + commitSliceWorktree({ parentSandboxDir: opts.parentSandboxDir, slice: opts.slice }); + return { accepted: true, touched }; +} + /** * Fold the given slice commits into the run branch in order via `merge-tree`, * producing one merge node per slice. Stops at the first real conflict and diff --git a/src/orchestrator/src/topology.test.ts b/src/orchestrator/src/topology.test.ts index 91f0c0bc6..93f0dcf61 100644 --- a/src/orchestrator/src/topology.test.ts +++ b/src/orchestrator/src/topology.test.ts @@ -338,7 +338,7 @@ describe('FE-761 Slice 1: sibling-transition decomposition', () => { } }); - it('verify-epic decomposes into producer + pass sibling + fail halt-sibling', () => { + it('verify-epic decomposes into producer + pass sibling + fail→remediate router (FE-884)', () => { // verifyPlan: epic-1 has verification, slice-1 inside it. const verifyPlan = { mode: 'greenfield' as const, @@ -366,13 +366,17 @@ describe('FE-761 Slice 1: sibling-transition decomposition', () => { expect(producer, 'expect verify-epic producer').toBeDefined(); expect(producer!.handler.kind).toBe('verify-epic'); - // Producer emits to single intermediate place (no direct done/halt routes). - expect(enumerateCandidateOutputs(producer!)).toEqual(new Set(['epic:epic-1:verify:reported'])); + // FE-884: the verify producer now carries the epic retry budget — it emits to + // the intermediate place, the budget place, and (on exhaustion) the halt place, + // mirroring the slice run-tests producer. + expect(enumerateCandidateOutputs(producer!)).toEqual( + new Set(['epic:epic-1:verify:reported', 'epic:epic-1:retry-budget', 'epic:epic-1:halted']), + ); const passSibling = blueprint.transitions.find((t) => t.id === 'epic-verify:epic-1:pass'); const failSibling = blueprint.transitions.find((t) => t.id === 'epic-verify:epic-1:fail'); expect(passSibling, 'expect epic-verify:pass sibling').toBeDefined(); - expect(failSibling, 'expect epic-verify:fail halt-sibling').toBeDefined(); + expect(failSibling, 'expect epic-verify:fail router').toBeDefined(); for (const sibling of [passSibling!, failSibling!]) { expect(sibling.inputs).toEqual(['epic:epic-1:verify:reported']); @@ -381,10 +385,9 @@ describe('FE-761 Slice 1: sibling-transition decomposition', () => { // Pass sibling emits to the epic done place (no depSignals here — epic-1 has no epic dependents). expect(enumerateCandidateOutputs(passSibling!)).toEqual(new Set(['epic:epic-1:done'])); - // Fail halt-sibling emits to the epic halted place (FE-761 Slice 2a: - // halted-as-place — halt is now a structural place-token, not a ctx side - // effect alone). - expect(enumerateCandidateOutputs(failSibling!)).toEqual(new Set(['epic:epic-1:halted'])); + // FE-884: the fail sibling is now a router to remediation, not a halt sink. + // Halt is the producer's job on budget exhaustion. + expect(enumerateCandidateOutputs(failSibling!)).toEqual(new Set(['epic:epic-1:remediate:ready'])); // Branching descriptor fields are gone from the producer. const producerHandler = producer!.handler; @@ -479,3 +482,99 @@ describe('FE-761 Slice 2a: halted-as-place', () => { expect(blueprint.places).not.toContain('epic:epic-1:halted'); }); }); + +// --------------------------------------------------------------------------- +// FE-884: recoverable epic verification +// +// A failed epic verification routes to a remediation code agent against the +// folded epic tree and re-verifies, mirroring the slice run-tests retry loop. +// The budget lives in the verify producer; the fail sibling is a pure router to +// remediation; halt happens only on budget exhaustion. +// --------------------------------------------------------------------------- + +describe('FE-884: recoverable epic verification', () => { + // The remediation topology is compiled for every plan; codebase-only behavior + // (the actual remediation round-trip) is gated at runtime via sandboxMode, so + // the topology fixture is a plain brownfield plan, not the (invalid) 'codebase' + // plan mode. + const verifyPlan: Plan = { + mode: 'brownfield', + epics: [ + { + id: 'epic-1', + summary: 'E', + depends_on: [], + verification: [{ kind: 'integration-test', target: 'it.test.ts' }], + }, + ], + slices: [ + { + id: 'slice-1', + epic_id: 'epic-1', + definition: 'D', + depends_on: [], + verification: [{ kind: 'unit-test', target: 't' }], + }, + ], + }; + + it('declares the epic remediation places and seeds the retry budget', () => { + const blueprint = compileTopology(verifyPlan, { maxRetries: 3 }); + expect(blueprint.places).toContain('epic:epic-1:retry-budget'); + expect(blueprint.places).toContain('epic:epic-1:remediate:ready'); + expect(blueprint.places).toContain('epic:epic-1:remediate:running'); + + const budgetSeed = blueprint.initialTokens.find((t) => t.place === 'epic:epic-1:retry-budget'); + expect(budgetSeed, 'expect a seeded epic retry-budget token').toBeDefined(); + expect(budgetSeed!.token.retryCount).toBe(0); + }); + + it('verify dispatch consumes the epic retry-budget (budget checked out for the verify)', () => { + const blueprint = compileTopology(verifyPlan, { maxRetries: 3 }); + const dispatch = blueprint.transitions.find((t) => t.id === 'epic-verify:epic-1:dispatch'); + expect(dispatch).toBeDefined(); + expect(dispatch!.inputs).toEqual(['epic:epic-1:verify-ready', 'epic:epic-1:retry-budget']); + }); + + it('the fail sibling routes to remediation, not the halt place', () => { + const blueprint = compileTopology(verifyPlan, { maxRetries: 3 }); + const failSibling = blueprint.transitions.find((t) => t.id === 'epic-verify:epic-1:fail'); + expect(failSibling).toBeDefined(); + expect(enumerateCandidateOutputs(failSibling!)).toEqual(new Set(['epic:epic-1:remediate:ready'])); + }); + + it('remediation decomposes into dispatch (grabs code agent) + complete (loops back to verify)', () => { + const blueprint = compileTopology(verifyPlan, { maxRetries: 3 }); + + const dispatch = blueprint.transitions.find((t) => t.id === 'epic-remediate:epic-1:dispatch'); + expect(dispatch, 'expect remediate dispatch').toBeDefined(); + expect(dispatch!.inputs).toEqual(['epic:epic-1:remediate:ready', 'pool:code-agent']); + expect(enumerateCandidateOutputs(dispatch!)).toEqual(new Set(['epic:epic-1:remediate:running'])); + + const complete = blueprint.transitions.find((t) => t.id === 'epic-remediate:epic-1:complete'); + expect(complete, 'expect remediate complete').toBeDefined(); + expect(complete!.handler.kind).toBe('remediate-epic'); + expect(complete!.inputs).toEqual(['epic:epic-1:remediate:running']); + // Loops back to re-verify and returns the code agent to its pool. + expect(enumerateCandidateOutputs(complete!)).toEqual( + new Set(['epic:epic-1:verify-ready', 'pool:code-agent']), + ); + + const handler = complete!.handler; + if (handler.kind === 'remediate-epic') { + // Detect-and-reject knows the epic's own integration test targets. + expect(handler.epicTestTargets).toEqual(['it.test.ts']); + } + }); + + it('the verify producer carries the epic retry budget and max-retries', () => { + const blueprint = compileTopology(verifyPlan, { maxRetries: 5 }); + const producer = blueprint.transitions.find((t) => t.id === 'epic-verify:epic-1:complete'); + const handler = producer!.handler; + expect(handler.kind).toBe('verify-epic'); + if (handler.kind === 'verify-epic') { + expect(handler.budgetPlace).toBe('epic:epic-1:retry-budget'); + expect(handler.maxRetries).toBe(5); + } + }); +}); From 34a1f44c6031fc14b09e4d001f6e2fc1c8fa7422 Mon Sep 17 00:00:00 2001 From: Kostandin Angjellari Date: Thu, 18 Jun 2026 13:44:45 +0100 Subject: [PATCH 2/9] FE-884: reconcile SPEC/PLAN for recoverable epic verification MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SPEC: add D170-K (a failed epic verification is recoverable, not terminal — remediation loop + detect-and-reject + dual re-verify + diff-transfer round-trip) and I138-K; the validated round-trip assumption retires into the decision rather than a standalone row. Fold the oracle strategy into §Verification Design (topology + scripted-agent recovery e2e) and add the LLM-remediation-competence blind spot. PLAN: register epic-verify-recovery (FE-884) as a frontier under Sequencing + Frontier Definitions — Slice A done, B (epic infra/timeout classification) and C (partial promotion) remaining. --- memory/PLAN.md | 16 ++++++++++++++++ memory/SPEC.md | 4 ++++ 2 files changed, 20 insertions(+) diff --git a/memory/PLAN.md b/memory/PLAN.md index 8a15dc7c9..cb7378353 100644 --- a/memory/PLAN.md +++ b/memory/PLAN.md @@ -76,6 +76,7 @@ The May 2026 intent-spec, multi-chat, changeset-ledger, prompt/context, and agen ### Parallel / Low-conflict - `cook-artifact-lifecycle` — **FE-883**, branch `ka/fe-883-orchestrator-improvements` (on FE-864). Wire the already-built `run-artifact.ts` git-merge composer (871ef087) into the live promotion/verify path, replacing the file-copy union; then worktree/branch GC. Slice 1a (composer correct under dep-seeding) landed; wiring + GC remain. Execution queue in `memory/CARDS.md`. +- `epic-verify-recovery` — **FE-884**, branch `ka/fe-884-epic-verify-recovery` (stacked on FE-883's `ka/fe-883-worktree-gc`). Make a failed epic verification recoverable instead of terminal: a remediation loop on the verify-epic fail-sibling (detect-and-reject + dual re-verify + diff-transfer round-trip), then epic infra/timeout classification (B) + partial promotion (C). Slice A landed (`580ed50f`); B + C remain. Per-branch queue in `memory/CARDS.md` (FE-884 on this branch). - `first-run-provider-setup` — provider/key UX and runtime seam can progress independently of semantic-stack work. - `workspace-gitignore-assist` — small workspace hygiene surface with low overlap. - `productized-web-research` — waits on prompt/context scenario substrate for probe quality, but can remain separate from semantic schema work. @@ -505,6 +506,21 @@ The May 2026 intent-spec, multi-chat, changeset-ledger, prompt/context, and agen - **Traceability:** Requirement 49; I124-K (file-copy → fork on `plan.mode` on wiring), I135-K (promotion checkout-untouched, preserved); A49. Precursor to Horizon `parallel-merge-conflict-reconciliation`. - **Design docs:** `docs/design/orchestrator.md`; SPEC §A49. +### epic-verify-recovery + +- **Name:** Epic verification recovery — recoverable epic verify instead of terminal halt +- **Linear:** FE-884 +- **Kind:** structural (Slice A establishes I138-K, amends the verify-epic topology); hardening +- **Status:** in progress (2026-06-18) — branch `ka/fe-884-epic-verify-recovery` on FE-883's GC tip. Slice A landed (`580ed50f`); B + C remain. Queue in `memory/CARDS.md`. +- **Objective:** Close the orchestrator's verification asymmetry — the slice tier is recoverable (run-tests retry loop) but the epic tier routes `epic-verify::fail` straight to halt, so a diagnosed cross-slice defect discards the whole run and promotes nothing. Make a failed epic recoverable: a remediation code agent on the folded `__epic__` tree, bounded by an `epic-retry-budget`, with detect-and-reject (no editing the epic test) + dual re-verify (epic test AND slice suites) integrity guards, and a diff-transfer round-trip so the fix promotes. Substrate-free — distinct from Arc-2 `interactive-recovery`/`adaptive-replan`. +- **Why now / unlocks:** Builds directly on FE-883's folded-tree composition (`materializeEpicVerifyTree`, `harvestCookRun`); without it, the worked example (run `59100820`: a one-line fixable `useViewParam` defect) halts a 60-minute run and is fixed by hand. Raises cook's unattended completion rate before the Arc-2 autonomy ladder. +- **Slice A (done, `580ed50f`):** the remediation loop + detect-reject + dual re-verify + `transferFoldedFixToSlice` round-trip. Round-trip design finding: `harvestCookRun` folds only slice worktrees, so the folded-tree fix is committed to the representative slice branch. Proven by topology goldens, run-artifact units, and a scripted-agent e2e (fixable / reject / veto / exhaustion). Real-agent dogfood of run 59100820 is outer-loop, deferred. +- **Remaining:** B — extend FE-872's `failureKind: 'infra'|'test'` to the verify-epic verdict so an infra/timeout failure (e.g. `spawnSync npx ETIMEDOUT`) retries instead of halting (independent of A). C — partial promotion: `harvestCookRun` lands passing epics + returns the failing epic's diagnosis instead of `nothing promoted` (depends on A's round-trip + FE-883 GC ref-set; not yet carded). +- **Acceptance / verification:** see `memory/CARDS.md`; oracle strategy folded into SPEC §Verification Design. +- **Depends on:** `cook-artifact-lifecycle` FE-883 (folded tree + harvest + idempotent `commitSliceWorktree`); FE-872 (failureKind, for B). +- **Traceability:** Requirement 49; establishes I138-K, D170-K; builds on I124-K, D159-K. +- **Design docs:** `docs/design/orchestrator.md`. + ### brunch-ship - **Name:** Brunch ship — one-shot autonomous spec→feature wrapper diff --git a/memory/SPEC.md b/memory/SPEC.md index 93b327599..d3a4f0bab 100644 --- a/memory/SPEC.md +++ b/memory/SPEC.md @@ -221,6 +221,7 @@ Brunch operates inside a **workspace**: the cwd-backed software context whose lo 167. **The emitter guarantees cook-executability through a self-contained `PlanContract` + deterministic repair, separate from intent projection** (FE-829) — `brunch plan` gates its output on a producer-agnostic `PlanContract` that checks the schema-checkable executability invariants (I129-K), plus a deterministic repair loop that fixes the **mechanical class** (Kahn cycle-break; mint a missing verification target; **synthesize an `integration-test` seam on every multi-slice epic** so the per-epic merge runs and composition is proven) while surfacing the **design class** (uncovered requirement; shared file with no declared join owner) as typed warnings rather than silently inventing or dropping scope. This splits today's reconciliation ("always repair, never check") into detect-then-repair, makes "is this plan cook-executable?" one reusable predicate that also validates hand-authored fixtures, and directly closes the FE-800 integration-blind / "green checks, no assembled artifact" gap. Slice 1 (contract + repair; no LLM; no file/decomposition authoring) does **not** touch D160-K. **Slice-1 refinement (2026-06-09):** the reusable-predicate goal collided with the read-only reference fixtures — two of them carry intentionally bare multi-slice epics (their `core` and `pipeline` epics) — so the seam invariant is enforced through **two `checkPlan` profiles**: `base` (default, for authored/producer-input plans) reports the missing seam as a *warning*; `emitted` (for `brunch plan` output) reports it as an *error*. `repairPlan` always synthesizes the seam regardless of profile, so emitted plans pass `emitted` while fixtures pass `base` unmodified. Implemented as `plan-contract.ts` (`checkPlan`/`repairPlan`) + a shared `plan-graph.ts` Kahn helper (reused by `reconcilePlan` so the two cycle-break policies cannot drift) + a `project-profile.ts` `Toolchain` descriptor that *derives* verification targets (`sliceTarget`/`epicTarget`) instead of hardcoding `tests/.test.ts`. Depends on: Requirements 46–50; A97, D158-K, D161-K; establishes I129-K. See Future Direction §Cook plan generation for the build-architect arc and the deferred D160-K amendment. 168. **Brownfield promotion is automatic and plumbing-only; `brunch serve` is the one-shot capstone** (FE-877, FE-878) — a completed brownfield cook auto-commits its composed tree onto the repo's own `cook/` branch (the branch the CoW sandbox already created from `HEAD`) via git plumbing (`commit-tree` + CAS `update-ref`, throwaway index + external work-tree), so the user's active branch, working tree, and index are never touched; merging stays the user's call. `--out` is therefore greenfield-only — for brownfield it is ignored with a warning. `brunch serve ` = `plan ` then `cook --spec=` (cook reads the just-emitted plan; serve threads the resolved launch cwd as cook's `dir` because `runCook` reads `opts.dir` raw — the launch-cwd default lives only in `parseCookArgs`, R46); serve's `--out` is the greenfield promote target, petrinaut/policy/retry flags forward to cook, and a failed plan short-circuits (nothing cooked). Pure glue — no new orchestration; the testable units are `parseServeArgs` + `runServe` (stages injected) with db/snapshot wiring in `cli.ts`. **Closes Arc 1.** Depends on: Requirements 46, 49; decision 166; establishes I135-K. (FE-877, FE-878) 169. **Cook epic dep resolution links the richest completed install; deps are never copied** (cook-artifact-lifecycle follow-on, 2026-06-18) — `node_modules` (and any `SHAREABLE_TOP_LEVEL_ENTRIES`) is a derived artifact shared by symlink, never walked or copied during slice→epic merge or dependency seeding. `walkFiles` skips it by NAME, robust to an in-slice install clobbering the shared symlink into a real ~900M tree (the prior per-slice/per-epic deep copy filled the disk — ENOSPC). The epic verify tree then links its `node_modules` from a completed slice that materialized a real install (a superset of the parent's pre-run install) when one exists, else the parent; multiple divergent slice installs resolve last-in-plan-order with the divergence reported as a `node_modules` MergeConflict, not unioned. No harness install verb is added — install stays agent-native (A98). The correct union ("reconstruct derived from source": merge `package.json`/lockfile and reinstall into a shared store) is the deferred git-merge remodel; run-dir GC stays operator-owned. Depends on: Requirement 49; A98, A101-K; establishes I137-K; refines I123-K. +170. **A failed epic verification is recoverable, not terminal** (epic-verify-recovery, FE-884, 2026-06-18) — the `epic-verify::fail` sibling no longer routes straight to the epic halted place; it routes to a new `epic-remediate` dispatch/complete chain gated by an `epic-retry-budget`, mirroring the slice-level run-tests loop. A remediation code agent runs against the folded `__epic__//` tree (the same tree FE-883's `materializeEpicVerifyTree` verifies), fed the verify diagnosis; the epic reaches the halt sink only after the budget is exhausted, with an honest reason. Two integrity guards: **detect-and-reject** — an attempt whose folded-tree diff touches the epic integration test path is rejected and counts against budget (the agent may fix only product code); **dual re-verify** — acceptance requires the epic integration test AND the slice suites to pass on the folded tree, with the combined verdict carried on the routed token. Round-trip correction (the naive assumption was false): `harvestCookRun` folds only slice worktrees, so a folded-tree fix is diff-transferred and committed to the representative slice branch (`transferFoldedFixToSlice`) to reach the promoted artifact. Codebase-mode only; greenfield keeps immediate halt. Substrate-free — distinct from the Arc-2 `interactive-recovery`/`adaptive-replan` rungs. Slices B (epic infra/timeout classification) and C (partial promotion) remain. Depends on: Requirement 49; D159-K, I124-K (FE-883 folded tree); establishes I138-K. (FE-884 slice A) #### Provider, prompt/context, and agent substrate @@ -284,6 +285,7 @@ Each invariant is a formalization candidate: the property is stated in human lan | I132-K | `Slice.writes?: string[]` declares the repo-relative POSIX file paths a slice exclusively mutates (exact paths only — no globs/directories), and `checkPlan` enforces single-writer-per-file: a path declared by ≥2 slices is a `file-write-conflict` — a **design-class warning** (never an error, never auto-repaired), since resolving it changes decomposition/ownership. Duplicate paths within one slice are deduped first and never self-conflict. A "join slice" is the sole writer of a shared coordination file that `depends_on` the slices it joins — not a multi-writer exception. `repairPlan` preserves `writes` verbatim and never moves ownership or synthesizes a join slice; `loadPlan` round-trips the field (absent → undefined). Emitter/LLM authoring of `writes` + requirement decomposition + join synthesis is deferred (D160-K amendment + slice-5 eval). | plan-contract.test.ts (disjoint accepted, overlap warns, intra-slice dup no-false-positive, repair preserves), plan-loader.test.ts (writes round-trip) | Requirements 46–50; A98, A100-K; D160-K (amended), D167-K (FE-829 slice 4) | | I131-K | **Retired (FE-829 post-slice-5)** — `planExecutionOrdering` and its whole `plan-llm-planning.ts` module (+ test) are deleted, having been superseded on the mainline by the authoring architect (I133-K, slice 4B). The only load-bearing survivor, the `PlanningEnrichment` type (reconcile's deterministic-fallback input contract), now lives in `plan-reconciliation.ts` next to its consumer; the duplicate `RunModel` type consolidated onto `plan-architect.ts`. The Zod `planningEnrichmentSchema` and `defaultRunModel` went with the deleted function. Historical record (the enrichment-over-projected-slices stage): the slice-3 planner only classified/grouped/ordered the existing `req-*` slices — it never invented, split, merged, renamed, or removed them — and was prompt-enriched with per-slice criteria + `projectPlanningContext` relation edges + the inlined reference-fixture exemplars. That enrichment seam was never validated for model quality and is fully replaced by `architectPlan` (I133-K) + the slice-5 eval harness (I134-K). | plan-planning-context.test.ts (edge lifting/ownership/dedupe — the surviving context seam); architect + eval coverage per I133-K / I134-K | Requirements 46–50; A97; D167-K; superseded by I133-K, retired post-I134-K (FE-829 slice 3 → retired) | | I136-K | **FE-878 presentation seam.** All `serve`/`cook`/`plan` terminal output flows through one `emit(CookEvent)` boundary, never direct `console.*`/`log()` outside `presenter/`; the orchestrator never imports the renderer. A pure `selectPresenter({command,isTTY,ci,reporterFlag})` chooses the backend — `plain` (CI / non-TTY / default), `silent` (`agent` mode), `ink` (interactive TTY; falls back to plain until slice 2). `PlainPresenter` reproduces pre-refactor stderr **byte-identically**; for the cook surface this is made deterministic by an **injected clock** (the presenter owns the elapsed/duration timer) plus a redaction normalizer for absolute paths and `runId`. The bus fans out synchronously and **swallows a thrown presenter** (`emitWarning`) so presentation can never abort a run; **stdout stays empty / JSONL-only**. Behavior-preserving — no `*-started`/activity instrumentation and no live Ink rendering (slice 2). **Slices 1a + 1b (done):** seam foundation (`presenter.ts` root + `presenter/{events,bus,select,plain,silent}.ts`) + CLI wiring; **both surfaces migrated** — `plan` (`plan-runner`) and `cook` (`cook-cli` banner/summary/promotion/petrinaut via a `line` passthrough arm + `pi-actions` per-action progress as structured `action`/`verbose` arms). The elapsed timer moved off `pi-actions`' module-level `Date.now()` into the presenter's **injected clock**, seeded by a `cook-start` event. `pi-actions` is now console-free; `cook-cli`'s only residual `console.error` is the injectable Petrinaut-setup default, which the cook path overrides with a bus-backed `log`. **Slice 2a (done):** the `ink` backend is real (no longer a plain fallback) — formatting consolidated into a shared `format.ts` + `clock.ts` (used by both backends so log bodies can't drift), a `RunStore` folds the event stream into `{phase, lines}`, a pure monotonic `nextPhase` projects the brigade tracker (coarse, from post-hoc events; precise in-flight transitions are 2b), and the Ink `App` (brunch-wordmark header in the brand gradient + brigade strip with `✓/◐/○` marks + bounded activity log) renders to **stderr**. **Slice 2b (done):** the dead-air fix — `activity-start`/`activity-progress`/`activity-end` events; the four long waits are bracketed (the three agent sessions self-bracket inside `runPi` with a throttled KB heartbeat; the test-run + probe waits use a `withActivity` helper; promotion brackets in `cook-cli`), always closing via `finally`. `RunStore` tracks a `pending` map; the Ink `PendingPanel` shows a live spinner + label + elapsed + detail (a tick interval runs only while pending is non-empty). Plain/CI renders one `⋯` start line per wait. The seam is now complete across all three commands and both backends. **Lifecycle:** the bus creator owns disposal — entry points run through `withCookBus(command, fn)`, which builds the bus and `dispose()`s it (unmounting Ink) in `finally`, so the TUI can't be left mounted and hang the process (ln-review finding). | bus.test.ts (fan-out + error isolation), presenter.test.ts (withCookBus disposes on success + throw), select.test.ts (decision table), plain.test.ts (byte-exact plan + cook arms incl. injected-clock elapsed + activity start-line), plan-runner.test.ts (golden stderr via capturing bus), brownfield-smoke.integration.test.ts (cook end-to-end through the bus), phase.test.ts (monotonic brigade), run-store.test.ts (event fold + pending map + stable snapshot), ink/app.test.tsx (frame: egg + active phase + activity + pending panel), pi-actions.test.ts (balanced activity start/end incl. on session failure), cook-report.test.ts (banner + completion-summary golden — the cook line strings are pure-tested, ln-review #3) | Requirements 46–50; D156-K (reports.jsonl stays the durable medium; CookEvent is ephemeral presentation only) (FE-878) | +| I138-K | A failed epic verification is recoverable, not terminal: the verify-epic fail-sibling routes to an `epic-remediate` dispatch/complete chain bounded by an `epic-retry-budget` (mirroring the slice run-tests loop), reaching the epic halted place only on budget exhaustion. A remediation attempt is accepted only if it does NOT modify the epic integration test (detect-and-reject — counts against budget) AND both the epic integration test and the slice suites pass on the folded tree (dual re-verify); the combined verdict rides the routed token. An accepted fix is diff-transferred and committed to the representative slice branch (`transferFoldedFixToSlice`) so `harvestCookRun` folds it into the promoted artifact. Codebase-mode only; greenfield keeps immediate halt. | topology.test.ts (fail→remediate→verify routing, budget decrement, exhaustion→halt), run-artifact.test.ts (detect-reject + round-trip to slice branch), epic-recovery.integration.test.ts (scripted-agent e2e: fixable / reject / dual-re-verify veto / exhaustion) | Requirement 49; D170-K; builds on I124-K (FE-884 slice A) | | I137-K | The cook epic verify tree resolves deps from the richest completed install and never deep-copies them. `walkFiles` skips `SHAREABLE_TOP_LEVEL_ENTRIES` (`node_modules`) by NAME — not only when they are symlinks — so an in-slice `npm install` that clobbers the shared symlink into a real tree is never copied per-slice or per-epic (the prior copy of that ~900M tree caused ENOSPC). `mergeSlicesIntoEpicSandbox` links `node_modules` from a completed slice that materialized a real install (it holds the manifest-reconciled tree with slice-added deps) in preference to the parent's pre-run install; last installer in plan declaration order wins and divergent slice installs are reported as a `node_modules` `MergeConflict`. Whole-plan promotion (`mergeCompletedSlicesIntoTree`) never relinks deps (source-only). Full union of divergent installs is deferred to the lockfile-merge-and-reinstall remodel. | epic-sandbox-merge.test.ts (clobbered real node_modules linked not copied; links to slice install so added dep resolves; parent fallback when no slice installed; divergence reported + last-installer wins) | Requirement 49; D169-K; A101-K; refines I123-K (cook-artifact-lifecycle follow-on) | ## Future Direction Register @@ -458,6 +460,7 @@ Every meaningful code change should pass `npm run fix` in the inner loop and `np | Middle | Context-snapshot replay and handle-refresh oracles | Turn-level snapshots replay unchanged after graph edits; active handles re-snapshot only when changeset-backed item versions advance. | Requirement 45; A95; D154; I120 | | Middle | Structured context-builder assertions plus selected golden renderings | Item-list, neighborhood, and economic whole-graph snapshots contain required ids, sections, relation/provenance signals, and stable rendering boundaries without overfitting prose. | Requirements 40, 45; A84, A95; I112, I120 | | Middle | Differential / golden-master with injected clock + path/runId redaction | The `serve`/`cook`/`plan` presentation refactor preserves stderr byte-for-byte; output stays behind the `emit(CookEvent)` boundary and off stdout. | Requirements 46–50; I136-K | +| Inner/Middle | Topology golden + scripted-agent recovery e2e (negative-space test-path guard, dual re-verify invariant, diff-transfer round-trip) | A failed epic self-heals within budget; a remediation that edits the epic test or breaks a slice suite is rejected; an accepted fix promotes via the slice branch; exhaustion halts honestly. | Requirement 49; I138-K; D170-K | | Outer | Fixture-backed manual walkthroughs | Phase transitions, export, resume, graph view, and waiting states feel legible. | Requirements 5, 13–15, 33 | | Outer | Brownfield and scenario-quality review | Generated questions/bundles are useful, grounded, honest about tradeoffs, and not overconfident. | Requirements 3, 16, 20; A67, A68, A90, A91 | | Outer | Dense cascade/reconciliation walkthroughs | Users can understand and resolve downstream graph impact without skipping necessary judgment. | A48, A88, I113, I114 | @@ -480,6 +483,7 @@ Every meaningful code change should pass `npm run fix` in the inner loop and `np | Context-handle refresh before real item versions | Defer handle freshness semantics until `changeset-ledger` supplies real item versions rather than blessing a temporary content fingerprint. | `chat-context-provision` is pulled before changeset-backed item versions exist. | | Frozen spinner during a synchronous test run | Slice 2b brackets every wait, but `test-runner` uses blocking `spawnSync`, so the spinner can't animate (only the label + start-elapsed show) while a ≤60s test runs; the async pi session animates fine. | The test-run wait becomes a felt pain point — then move `test-runner` to an async spawn so the event loop can tick. | | Real-terminal Ink *visual* behavior (resize, Ctrl-C, escape codes) | Teardown is now wired + tested (`withCookBus` disposes the bus → unmounts Ink in `finally`; ln-review caught that nothing disposed it before). Frames are unit-tested via ink-testing-library and bundled in the build; what's left is purely visual — not yet walked through in a live terminal. | A manual `brunch cook`/`serve` run shows visual/resize glitches, or before relying on the TUI for a demo. | +| LLM epic-remediation competence (can it fix arbitrary integration defects?) | Loop mechanics are oracle-tested with a scripted agent; competence is bounded by the epic-retry-budget + an honest halt. Detect-and-reject guards only the epic test path — a slice test could be weakened, partially covered by dual re-verify (slice suites must pass). | Dogfood shows a low real-agent fix-rate, or a remediation greens the epic by editing a slice test → freeze all `*.test.*` under the epic. | ### Design Notes From 6c53695e49ff6055cf47d8f0de7a6fa5c9d3fc11 Mon Sep 17 00:00:00 2001 From: Kostandin Angjellari Date: Thu, 18 Jun 2026 14:02:59 +0100 Subject: [PATCH 3/9] FE-884 (slice B): classify infra/timeout failures at the epic verdict MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The verify-epic fail path now routes on failureKind: an infra/timeout failure re-verifies (bounded by a separate infraRetryCount / RunPolicy.maxInfraRetries), reaching the halt sink with an honest infra reason on exhaustion — never the remediation code agent. A test/logic failure still drives the Slice-A remediation loop. Correctness fix: spawnSync surfaces a verify timeout as ETIMEDOUT, but only ENOENT was classified infra, so a timeout was misclassified as `test` and (with slice A) would have fed the remediation agent a non-bug. isInfraSpawnError now treats ENOENT and ETIMEDOUT as infra, and the verify ceiling is raised from 60s to VERIFY_TIMEOUT_MS=180s (npx + code-split warmup). Distinct from FE-864's pi session deadline. --- memory/CARDS.md | 51 ++++++++++++-- .../src/epic-recovery.integration.test.ts | 67 ++++++++++++++++++- src/orchestrator/src/net-blueprint.ts | 8 +++ src/orchestrator/src/net-compiler.ts | 59 ++++++++++++++-- src/orchestrator/src/petri-net.ts | 5 ++ src/orchestrator/src/test-runner.test.ts | 21 ++++++ src/orchestrator/src/test-runner.ts | 34 ++++++++-- src/orchestrator/src/topology.test.ts | 24 +++++-- src/orchestrator/src/types.ts | 3 + 9 files changed, 248 insertions(+), 24 deletions(-) diff --git a/memory/CARDS.md b/memory/CARDS.md index af64f142c..bbb791985 100644 --- a/memory/CARDS.md +++ b/memory/CARDS.md @@ -124,13 +124,52 @@ remediation budget is exhausted. --- -## Slice B — infra/timeout classification at the epic verdict — `next` after A (independent) +## Slice B — infra/timeout classification at the epic verdict — `done` (2026-06-18) + +All 4 acceptance criteria met; gate green (check 0 errors, build ✓, full suite +2110 pass / 2 skip; the lone `build-boundary` failure is the pre-existing +dev-worktree symlink artifact). **Correctness finding:** the prior verify +subprocess timeout was `60_000`, and `spawnSync` timeout surfaces as +`error.code === 'ETIMEDOUT'` — but only `ENOENT` was classified infra, so a +**timeout was misclassified as `test`** and (with Slice A) would have wrongly +fed the remediation code agent a non-bug. Fixed: `ETIMEDOUT → infra` +(`isInfraSpawnError`) + raise `VERIFY_TIMEOUT_MS` to `180_000` (npx + code-split +warmup, ~25s observed). Distinct from FE-864's pi *session* deadline. Infra +re-verify is counted by a separate `Token.infraRetryCount` / +`RunPolicy.maxInfraRetries` (defaults to `maxRetries`), so blips don't consume +remediation attempts. + +Light-ish card (adds a small topology arm inside A's now-settled verify-epic seam). + +**Objective:** at the epic verdict, route on `failureKind` (already computed by +`runVerification`, FE-872) so an infra/timeout failure is retried as a toolchain +blip, not fed to the remediation code agent or silently halted. + +**Design decisions:** +- Split the verify-epic fail-sibling by `failureKind`: `infra` → a bounded + **infra-retry** chain (re-dispatch verify; **no** code agent — nothing for an + agent to fix) → `verifyPlace`; exhaustion → `epicHaltedPlace` with an honest + *infra* reason. `test`/logic → the Slice-A remediation loop (unchanged). +- A **separate `epic-infra-budget`** distinct from A's `epic-retry-budget`, so a + toolchain blip doesn't consume remediation attempts (and vice versa). +- **Timeout sizing:** size the verify subprocess (`spawnSync`) timeout to the + target's real cost so `npx` resolution + code-split warmup doesn't spuriously + `ETIMEDOUT` (the `graph-route-wiring` test alone ran 25s). Coordinate with + FE-864's pi-timeout work; do not regress it. + +**Acceptance:** +``` +✓ infra-retries — an infra/timeout verdict re-runs verify (bounded), not the code agent +✓ infra-exhaustion-halts-honestly — exhausted infra retries halt with an infra reason (not "tests failed"/"remediation attempts") +✓ logic-still-remediates — a test/logic failure still routes to the Slice-A remediation loop +✓ timeout-sized — the verify subprocess timeout accommodates code-split warmup (ETIMEDOUT-class regression) +``` + +**Verification:** topology goldens (fail-sibling splits on failureKind; infra-retry +chain + budget; exhaustion→halt reason); engine-contract green; e2e scenario where +verify returns `failureKind:'infra'` once then passes (retries, not remediated). -Extend FE-872's `failureKind: 'infra' | 'test'` from the slice `tests-run` report -to the `verify-epic` report; an infra/timeout failure (e.g. `spawnSync npx -ETIMEDOUT`) gets a bounded infra-retry, not an immediate halt; size the verify -subprocess timeout to the target's real cost (the `graph-route-wiring` test alone -ran 25s on code-split + React-Flow warmup). Independent of A — could land first. +Independent of A's logic path; lands on the same branch. ## Slice C — partial promotion / salvage — deferred (not pre-carded) diff --git a/src/orchestrator/src/epic-recovery.integration.test.ts b/src/orchestrator/src/epic-recovery.integration.test.ts index 0b82c3a9e..ae96cbe43 100644 --- a/src/orchestrator/src/epic-recovery.integration.test.ts +++ b/src/orchestrator/src/epic-recovery.integration.test.ts @@ -83,9 +83,16 @@ describe('FE-884 — recoverable epic verification (codebase mode)', () => { // test green to isolate dual re-verify. function makeFakeActions( reports: InMemoryReportSink, - opts: { remediation: 'fix' | 'touch-test' | 'noop'; epicPasses?: boolean }, + opts: { + remediation: 'fix' | 'touch-test' | 'noop'; + epicPasses?: boolean; + // FE-884 Slice B: force the verify-epic verdict to report an infra/timeout + // failure — 'always' (never recovers) or 'once' (infra then pass). + epicInfra?: 'always' | 'once'; + }, ): ActionHandlers { const evalCalls = new Map(); + let verifyCalls = 0; return { 'evaluate-done': async (ctx: ActionContext) => { const n = (evalCalls.get(ctx.slice.id) ?? 0) + 1; @@ -115,10 +122,19 @@ describe('FE-884 — recoverable epic verification (codebase mode)', () => { // The epic integration test: passes iff the folded src.txt carries the fix // token (or the scenario forces it green to isolate dual re-verify). 'verify-epic': async (ctx: ActionContext) => { + verifyCalls += 1; + const id = `ve-${ctx.epic.id}-${reports.getAll().length}`; + // FE-884 Slice B: infra/timeout verdicts re-run verify without remediation. + if (opts.epicInfra === 'always' || (opts.epicInfra === 'once' && verifyCalls === 1)) { + reports.append( + line(id, ctx, 'orchestrator', 'epic-verified', { passed: false, failureKind: 'infra' }), + ); + return id; + } const srcPath = join(ctx.sandboxDir, 'src.txt'); const txt = existsSync(srcPath) ? readFileSync(srcPath, 'utf8') : ''; - const passed = opts.epicPasses ?? txt.includes('FIXED'); - const id = `ve-${ctx.epic.id}-${reports.getAll().length}`; + // 'once': the infra blip cleared on the re-run, so the epic now passes. + const passed = opts.epicInfra === 'once' ? true : (opts.epicPasses ?? txt.includes('FIXED')); reports.append(line(id, ctx, 'orchestrator', 'epic-verified', { passed })); return id; }, @@ -302,4 +318,49 @@ describe('FE-884 — recoverable epic verification (codebase mode)', () => { }, GIT_TEST_TIMEOUT_MS, ); + + it( + 'infra retry (Slice B): an infra/timeout verdict re-runs verify — not the remediation agent — then completes', + async () => { + const source = makeSeededRepo(); + const reports = new InMemoryReportSink(); + // verify reports infra once, then passes on the re-run. + const actions = withReports( + reports, + makeFakeActions(reports, { remediation: 'noop', epicInfra: 'once' }), + ); + + const { result } = await runCook(source, actions, passingRunner(), 3); + + expect(result.status).toBe('completed'); + // Verify ran twice (infra → re-verify → pass); the remediation agent was + // never invoked — an infra blip is a toolchain re-run, not a logic fix. + const verifies = reports.getAll().filter((r) => r.event === 'epic-verified'); + expect(verifies.length).toBe(2); + expect(reports.getAll().filter((r) => r.event === 'epic-remediated')).toHaveLength(0); + }, + GIT_TEST_TIMEOUT_MS, + ); + + it( + 'infra exhaustion (Slice B): a persistent infra/timeout failure halts with an honest infra reason, never remediated', + async () => { + const source = makeSeededRepo(); + const reports = new InMemoryReportSink(); + const actions = withReports( + reports, + makeFakeActions(reports, { remediation: 'noop', epicInfra: 'always' }), + ); + + const { result } = await runCook(source, actions, passingRunner(), 2); + + expect(result.status).toBe('halted'); + // Honest cause — a toolchain/timeout failure, not "tests failed" / "remediation attempts". + expect(result.reason ?? '').toMatch(/infra retries \(toolchain\/timeout\)/); + expect(reports.getAll().filter((r) => r.event === 'epic-remediated')).toHaveLength(0); + // Verify was attempted maxInfraRetries+1 times (initial + one per budget unit). + expect(reports.getAll().filter((r) => r.event === 'epic-verified')).toHaveLength(3); + }, + GIT_TEST_TIMEOUT_MS, + ); }); diff --git a/src/orchestrator/src/net-blueprint.ts b/src/orchestrator/src/net-blueprint.ts index d5a38ae39..4a0260fc4 100644 --- a/src/orchestrator/src/net-blueprint.ts +++ b/src/orchestrator/src/net-blueprint.ts @@ -210,6 +210,11 @@ type VerifyEpicDescriptor = { /** Place to emit the (decremented or reset) epic retry-budget token to. */ budgetPlace: string; maxRetries: number; + /** FE-884 Slice B: the verify-ready place to re-route to on an infra/timeout + * failure (re-run verify without remediation). */ + reverifyPlace: string; + /** FE-884 Slice B: max infra/timeout re-verifies before halting. */ + maxInfraRetries: number; }; /** @@ -325,6 +330,9 @@ export function enumerateCandidateOutputs(transition: TransitionSkeleton): Set e.id === epicId)!; const slice = plan.slices.find((s) => s.id === representativeSliceId)!; const epicBaseToken: Token = { sliceId: '', epicId }; @@ -928,6 +949,7 @@ export function wireHandlers(blueprint: NetBlueprint, input: OrchestratorInput, inputToken: Token, reportId: string, passed: boolean, + failureKind?: TestFailureKind, ): { place: string; token: Token }[] => { const tok: Token = { ...inputToken, reportId }; if (passed) { @@ -945,6 +967,28 @@ export function wireHandlers(blueprint: NetBlueprint, input: OrchestratorInput, }, ]; } + // FE-884 Slice B: an infra/timeout failure is a toolchain blip, not a + // logic red — re-run verify (bounded by a separate infra counter), + // never the remediation agent. Exhaustion halts with an honest cause. + if (failureKind === 'infra') { + const infraCount = inputToken.infraRetryCount ?? 0; + if (infraCount >= maxInfraRetries) { + ctx.epicOutcomes.set(epicId, { epicId, status: 'halted' }); + return [ + { + place: ep(epicId, 'halted'), + token: { + ...tok, + haltReason: `Epic ${epicId} verification could not run after ${maxInfraRetries} infra retries (toolchain/timeout)`, + }, + }, + ]; + } + return [ + { place: reverifyPlace, token: { ...inputToken, infraRetryCount: infraCount + 1 } }, + { place: budgetPlace, token: { ...epicBaseToken, retryCount: inputToken.retryCount ?? 0 } }, + ]; + } const retryCount = inputToken.retryCount ?? 0; if (retryCount >= maxRetries) { ctx.epicOutcomes.set(epicId, { epicId, status: 'halted' }); @@ -1088,10 +1132,13 @@ export function wireHandlers(blueprint: NetBlueprint, input: OrchestratorInput, } } } - // Route through the epic remediation budget. Pass → done (+ budget - // reset); fail with budget → re-loop via the intermediate place to the - // fail sibling → remediation; fail exhausted → halt. - return routeVerdict(inputToken, verdictReportId, passed); + // Route through the epic budgets. Pass → done (+ budget reset); infra + // fail → re-verify (bounded, no remediation); test fail with budget → + // re-loop to the fail sibling → remediation; exhausted → halt. + const verdictFailureKind = ( + reports.getById(verdictReportId)?.payload as { failureKind?: TestFailureKind } | undefined + )?.failureKind; + return routeVerdict(inputToken, verdictReportId, passed, verdictFailureKind); })(); net.scheduleDeferred(skel.id, skel.contract, { places: skel.inputs, tokens: consumed }, deferred); return []; diff --git a/src/orchestrator/src/petri-net.ts b/src/orchestrator/src/petri-net.ts index e4f40f278..ce4ae8d5e 100644 --- a/src/orchestrator/src/petri-net.ts +++ b/src/orchestrator/src/petri-net.ts @@ -11,6 +11,11 @@ export type Token = { /** Semantic rework counter — carried on semantic-budget tokens. * Prevents infinite rework loops when assess-semantic always rejects. */ reworkCount?: number; + /** FE-884 Slice B: epic infra/timeout re-verify counter — carried on the + * verify-ready work token. A toolchain/timeout failure re-runs verify + * (bounded) without invoking remediation, so it is counted separately from + * `retryCount` (remediation attempts). */ + infraRetryCount?: number; /** * FE-761 Slice 2b: halt reason carried on tokens emitted to `:halted` * places. Engine derives `result.reason` from this field. Replaces the diff --git a/src/orchestrator/src/test-runner.test.ts b/src/orchestrator/src/test-runner.test.ts index 3c6931af2..dad372484 100644 --- a/src/orchestrator/src/test-runner.test.ts +++ b/src/orchestrator/src/test-runner.test.ts @@ -16,14 +16,35 @@ import { afterEach, describe, expect, it } from 'vitest'; import { bunProfile, type Toolchain } from './project-profile.js'; import { classifyTestFailure, + isInfraSpawnError, runVerification, stripAgentTailLines, ToolchainTestRunner, + VERIFY_TIMEOUT_MS, } from './test-runner.js'; import type { TestResult, TestRunner } from './types.js'; const bun = bunProfile.toolchain; +describe('FE-884 Slice B: infra/timeout classification', () => { + it('classifies a timeout-kill (ETIMEDOUT) and a missing runner (ENOENT) as infra', () => { + expect(isInfraSpawnError({ code: 'ETIMEDOUT' })).toBe(true); + expect(isInfraSpawnError({ code: 'ENOENT' })).toBe(true); + }); + + it('does not over-classify: post-start errors and no error stay non-infra', () => { + expect(isInfraSpawnError({ code: 'ENOBUFS' })).toBe(false); + expect(isInfraSpawnError(null)).toBe(false); + expect(isInfraSpawnError(undefined)).toBe(false); + }); + + it('sizes the verify subprocess timeout above a real warmup+run (was 60s)', () => { + // The wait includes npx/runner resolution + framework warmup + code-split + // lazy loading (~25s observed for a single code-split route test). + expect(VERIFY_TIMEOUT_MS).toBeGreaterThan(60_000); + }); +}); + describe('ToolchainTestRunner output fidelity (bun)', () => { const dirs: string[] = []; diff --git a/src/orchestrator/src/test-runner.ts b/src/orchestrator/src/test-runner.ts index 71fd93eb5..3953dd69d 100644 --- a/src/orchestrator/src/test-runner.ts +++ b/src/orchestrator/src/test-runner.ts @@ -23,6 +23,29 @@ const RUNNER_MISSING_PATTERNS: readonly RegExp[] = [ // `test`, never `absent`. const NO_TESTS_PATTERNS: readonly RegExp[] = [/No test files found/i]; +/** + * FE-884 Slice B: the verify subprocess timeout. Sized well above a real test + * run because the wait includes `npx`/runner resolution + framework warmup + + * code-split lazy loading (a single code-split route test was observed at ~25s), + * so the prior 60s ceiling spuriously `ETIMEDOUT`-ed and the timeout was then + * misread as a logic red. A timeout is now classified `infra` (see + * `isInfraSpawnError`) and re-run, but the ceiling is also raised so a healthy + * run does not trip it. Distinct from FE-864's pi *session* idle deadline. + */ +export const VERIFY_TIMEOUT_MS = 180_000; + +/** + * FE-884 Slice B: a spawn error that means "the runner never delivered a + * verdict" — the binary is missing (`ENOENT`) or the run was killed by the + * timeout (`ETIMEDOUT`). Both are toolchain/infra faults, not a code assertion, + * so they must not be routed to the (logic-fix) remediation agent. ENOBUFS and + * other post-start errors stay `test` — output exists to classify. + */ +export function isInfraSpawnError(error: unknown): boolean { + const code = (error as NodeJS.ErrnoException | null)?.code; + return code === 'ENOENT' || code === 'ETIMEDOUT'; +} + /** * Classify a non-passing test run. Deliberately conservative ordering: * 1. `infra` — a spawn failure (missing binary) or shell "command not found"; @@ -59,7 +82,7 @@ export class ToolchainTestRunner implements TestRunner { const result = spawnSync(command!, args, { cwd: sandboxDir, encoding: 'utf8', - timeout: 60_000, + timeout: VERIFY_TIMEOUT_MS, stdio: ['ignore', 'pipe', 'pipe'], }); // Test runners vary in which stream carries diagnostics (e.g. `bun test` @@ -70,10 +93,11 @@ export class ToolchainTestRunner implements TestRunner { ); const passed = result.status === 0; if (passed) return { passed, output }; - // `spawnSync.error` also covers timeout / ENOBUFS after the runner started; - // only ENOENT proves the runner binary itself is missing. - const runnerMissing = result.error != null && (result.error as NodeJS.ErrnoException).code === 'ENOENT'; - return { passed, output, failureKind: classifyTestFailure(output, runnerMissing) }; + // A missing runner binary (`ENOENT`) or a timeout-kill (`ETIMEDOUT`) means + // the runner never delivered a verdict — an infra fault, not a code red + // (FE-884 Slice B). Other post-start errors stay `test`. + const runnerFailed = isInfraSpawnError(result.error); + return { passed, output, failureKind: classifyTestFailure(output, runnerFailed) }; } } diff --git a/src/orchestrator/src/topology.test.ts b/src/orchestrator/src/topology.test.ts index 93f0dcf61..423a780c7 100644 --- a/src/orchestrator/src/topology.test.ts +++ b/src/orchestrator/src/topology.test.ts @@ -366,13 +366,29 @@ describe('FE-761 Slice 1: sibling-transition decomposition', () => { expect(producer, 'expect verify-epic producer').toBeDefined(); expect(producer!.handler.kind).toBe('verify-epic'); - // FE-884: the verify producer now carries the epic retry budget — it emits to - // the intermediate place, the budget place, and (on exhaustion) the halt place, - // mirroring the slice run-tests producer. + // FE-884: the verify producer carries the epic retry budget — it emits to the + // intermediate place, the budget place, and (on exhaustion) the halt place, + // mirroring the slice run-tests producer. Slice B adds the verify-ready place: + // an infra/timeout verdict re-runs verify (bounded) without remediation. expect(enumerateCandidateOutputs(producer!)).toEqual( - new Set(['epic:epic-1:verify:reported', 'epic:epic-1:retry-budget', 'epic:epic-1:halted']), + new Set([ + 'epic:epic-1:verify:reported', + 'epic:epic-1:retry-budget', + 'epic:epic-1:halted', + 'epic:epic-1:verify-ready', + ]), ); + // FE-884 Slice B: the verify producer descriptor carries the infra-retry + // budget + reverify target distinct from the remediation retry budget. + const verifyHandler = producer!.handler as { + kind: string; + maxInfraRetries: number; + reverifyPlace: string; + }; + expect(verifyHandler.maxInfraRetries).toBe(3); + expect(verifyHandler.reverifyPlace).toBe('epic:epic-1:verify-ready'); + const passSibling = blueprint.transitions.find((t) => t.id === 'epic-verify:epic-1:pass'); const failSibling = blueprint.transitions.find((t) => t.id === 'epic-verify:epic-1:fail'); expect(passSibling, 'expect epic-verify:pass sibling').toBeDefined(); diff --git a/src/orchestrator/src/types.ts b/src/orchestrator/src/types.ts index 43c7cfc4d..b111638d5 100644 --- a/src/orchestrator/src/types.ts +++ b/src/orchestrator/src/types.ts @@ -242,6 +242,9 @@ export type RunPolicy = { maxRetries: number; /** Maximum semantic rework cycles per slice before halting. Defaults to maxRetries. */ maxSemanticReworks?: number; + /** FE-884 Slice B: max epic verify re-runs on an infra/timeout failure before + * halting (toolchain blips are re-run, not remediated). Defaults to maxRetries. */ + maxInfraRetries?: number; /** Number of tokens per shared agent pool (test-agent, code-agent). * Defaults to slice count (unbounded — one token per slice). */ agentPoolSize?: number; From 7fd176f0ca6baad0138f7bc6593a285d7100ff7c Mon Sep 17 00:00:00 2001 From: Kostandin Angjellari Date: Thu, 18 Jun 2026 14:04:03 +0100 Subject: [PATCH 4/9] FE-884: reconcile SPEC/PLAN for slice B (infra/timeout classification) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Update I138-K + D170-K in place (same verify-epic seam): the fail path routes on failureKind — infra/timeout re-verifies under a separate infraRetryCount/ maxInfraRetries budget, ETIMEDOUT classified infra under a 180s ceiling, while test/logic still remediates. Mark Slice B done in the epic-verify-recovery frontier; only Slice C (partial promotion) remains. --- memory/PLAN.md | 7 ++++--- memory/SPEC.md | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/memory/PLAN.md b/memory/PLAN.md index cb7378353..92fd6ac43 100644 --- a/memory/PLAN.md +++ b/memory/PLAN.md @@ -76,7 +76,7 @@ The May 2026 intent-spec, multi-chat, changeset-ledger, prompt/context, and agen ### Parallel / Low-conflict - `cook-artifact-lifecycle` — **FE-883**, branch `ka/fe-883-orchestrator-improvements` (on FE-864). Wire the already-built `run-artifact.ts` git-merge composer (871ef087) into the live promotion/verify path, replacing the file-copy union; then worktree/branch GC. Slice 1a (composer correct under dep-seeding) landed; wiring + GC remain. Execution queue in `memory/CARDS.md`. -- `epic-verify-recovery` — **FE-884**, branch `ka/fe-884-epic-verify-recovery` (stacked on FE-883's `ka/fe-883-worktree-gc`). Make a failed epic verification recoverable instead of terminal: a remediation loop on the verify-epic fail-sibling (detect-and-reject + dual re-verify + diff-transfer round-trip), then epic infra/timeout classification (B) + partial promotion (C). Slice A landed (`580ed50f`); B + C remain. Per-branch queue in `memory/CARDS.md` (FE-884 on this branch). +- `epic-verify-recovery` — **FE-884**, branch `ka/fe-884-epic-verify-recovery` (stacked on FE-883's `ka/fe-883-worktree-gc`). Make a failed epic verification recoverable instead of terminal: a remediation loop on the verify-epic fail-sibling (detect-and-reject + dual re-verify + diff-transfer round-trip), epic infra/timeout classification (B), then partial promotion (C). Slices A + B landed; C remains. Per-branch queue in `memory/CARDS.md` (FE-884 on this branch). - `first-run-provider-setup` — provider/key UX and runtime seam can progress independently of semantic-stack work. - `workspace-gitignore-assist` — small workspace hygiene surface with low overlap. - `productized-web-research` — waits on prompt/context scenario substrate for probe quality, but can remain separate from semantic schema work. @@ -511,11 +511,12 @@ The May 2026 intent-spec, multi-chat, changeset-ledger, prompt/context, and agen - **Name:** Epic verification recovery — recoverable epic verify instead of terminal halt - **Linear:** FE-884 - **Kind:** structural (Slice A establishes I138-K, amends the verify-epic topology); hardening -- **Status:** in progress (2026-06-18) — branch `ka/fe-884-epic-verify-recovery` on FE-883's GC tip. Slice A landed (`580ed50f`); B + C remain. Queue in `memory/CARDS.md`. +- **Status:** in progress (2026-06-18) — branch `ka/fe-884-epic-verify-recovery` on FE-883's GC tip. Slices A + B landed; C remains. Queue in `memory/CARDS.md`. - **Objective:** Close the orchestrator's verification asymmetry — the slice tier is recoverable (run-tests retry loop) but the epic tier routes `epic-verify::fail` straight to halt, so a diagnosed cross-slice defect discards the whole run and promotes nothing. Make a failed epic recoverable: a remediation code agent on the folded `__epic__` tree, bounded by an `epic-retry-budget`, with detect-and-reject (no editing the epic test) + dual re-verify (epic test AND slice suites) integrity guards, and a diff-transfer round-trip so the fix promotes. Substrate-free — distinct from Arc-2 `interactive-recovery`/`adaptive-replan`. - **Why now / unlocks:** Builds directly on FE-883's folded-tree composition (`materializeEpicVerifyTree`, `harvestCookRun`); without it, the worked example (run `59100820`: a one-line fixable `useViewParam` defect) halts a 60-minute run and is fixed by hand. Raises cook's unattended completion rate before the Arc-2 autonomy ladder. - **Slice A (done, `580ed50f`):** the remediation loop + detect-reject + dual re-verify + `transferFoldedFixToSlice` round-trip. Round-trip design finding: `harvestCookRun` folds only slice worktrees, so the folded-tree fix is committed to the representative slice branch. Proven by topology goldens, run-artifact units, and a scripted-agent e2e (fixable / reject / veto / exhaustion). Real-agent dogfood of run 59100820 is outer-loop, deferred. -- **Remaining:** B — extend FE-872's `failureKind: 'infra'|'test'` to the verify-epic verdict so an infra/timeout failure (e.g. `spawnSync npx ETIMEDOUT`) retries instead of halting (independent of A). C — partial promotion: `harvestCookRun` lands passing epics + returns the failing epic's diagnosis instead of `nothing promoted` (depends on A's round-trip + FE-883 GC ref-set; not yet carded). +- **Slice B (done):** split the verify-epic fail path on `failureKind` (FE-872) — infra/timeout re-verifies under a separate `infraRetryCount`/`maxInfraRetries` budget (no agent), halting honestly on exhaustion; test/logic still remediates. Correctness fix: `spawnSync` timeout surfaces as `ETIMEDOUT`, previously misclassified `test` (would have fed the agent a non-bug) → now infra, ceiling raised 60s→180s. Distinct from FE-864's pi session deadline. +- **Remaining:** C — partial promotion: `harvestCookRun` lands passing epics + returns the failing epic's diagnosis instead of `nothing promoted` (depends on A's round-trip + FE-883 GC ref-set; not yet carded). - **Acceptance / verification:** see `memory/CARDS.md`; oracle strategy folded into SPEC §Verification Design. - **Depends on:** `cook-artifact-lifecycle` FE-883 (folded tree + harvest + idempotent `commitSliceWorktree`); FE-872 (failureKind, for B). - **Traceability:** Requirement 49; establishes I138-K, D170-K; builds on I124-K, D159-K. diff --git a/memory/SPEC.md b/memory/SPEC.md index d3a4f0bab..0a90285bb 100644 --- a/memory/SPEC.md +++ b/memory/SPEC.md @@ -221,7 +221,7 @@ Brunch operates inside a **workspace**: the cwd-backed software context whose lo 167. **The emitter guarantees cook-executability through a self-contained `PlanContract` + deterministic repair, separate from intent projection** (FE-829) — `brunch plan` gates its output on a producer-agnostic `PlanContract` that checks the schema-checkable executability invariants (I129-K), plus a deterministic repair loop that fixes the **mechanical class** (Kahn cycle-break; mint a missing verification target; **synthesize an `integration-test` seam on every multi-slice epic** so the per-epic merge runs and composition is proven) while surfacing the **design class** (uncovered requirement; shared file with no declared join owner) as typed warnings rather than silently inventing or dropping scope. This splits today's reconciliation ("always repair, never check") into detect-then-repair, makes "is this plan cook-executable?" one reusable predicate that also validates hand-authored fixtures, and directly closes the FE-800 integration-blind / "green checks, no assembled artifact" gap. Slice 1 (contract + repair; no LLM; no file/decomposition authoring) does **not** touch D160-K. **Slice-1 refinement (2026-06-09):** the reusable-predicate goal collided with the read-only reference fixtures — two of them carry intentionally bare multi-slice epics (their `core` and `pipeline` epics) — so the seam invariant is enforced through **two `checkPlan` profiles**: `base` (default, for authored/producer-input plans) reports the missing seam as a *warning*; `emitted` (for `brunch plan` output) reports it as an *error*. `repairPlan` always synthesizes the seam regardless of profile, so emitted plans pass `emitted` while fixtures pass `base` unmodified. Implemented as `plan-contract.ts` (`checkPlan`/`repairPlan`) + a shared `plan-graph.ts` Kahn helper (reused by `reconcilePlan` so the two cycle-break policies cannot drift) + a `project-profile.ts` `Toolchain` descriptor that *derives* verification targets (`sliceTarget`/`epicTarget`) instead of hardcoding `tests/.test.ts`. Depends on: Requirements 46–50; A97, D158-K, D161-K; establishes I129-K. See Future Direction §Cook plan generation for the build-architect arc and the deferred D160-K amendment. 168. **Brownfield promotion is automatic and plumbing-only; `brunch serve` is the one-shot capstone** (FE-877, FE-878) — a completed brownfield cook auto-commits its composed tree onto the repo's own `cook/` branch (the branch the CoW sandbox already created from `HEAD`) via git plumbing (`commit-tree` + CAS `update-ref`, throwaway index + external work-tree), so the user's active branch, working tree, and index are never touched; merging stays the user's call. `--out` is therefore greenfield-only — for brownfield it is ignored with a warning. `brunch serve ` = `plan ` then `cook --spec=` (cook reads the just-emitted plan; serve threads the resolved launch cwd as cook's `dir` because `runCook` reads `opts.dir` raw — the launch-cwd default lives only in `parseCookArgs`, R46); serve's `--out` is the greenfield promote target, petrinaut/policy/retry flags forward to cook, and a failed plan short-circuits (nothing cooked). Pure glue — no new orchestration; the testable units are `parseServeArgs` + `runServe` (stages injected) with db/snapshot wiring in `cli.ts`. **Closes Arc 1.** Depends on: Requirements 46, 49; decision 166; establishes I135-K. (FE-877, FE-878) 169. **Cook epic dep resolution links the richest completed install; deps are never copied** (cook-artifact-lifecycle follow-on, 2026-06-18) — `node_modules` (and any `SHAREABLE_TOP_LEVEL_ENTRIES`) is a derived artifact shared by symlink, never walked or copied during slice→epic merge or dependency seeding. `walkFiles` skips it by NAME, robust to an in-slice install clobbering the shared symlink into a real ~900M tree (the prior per-slice/per-epic deep copy filled the disk — ENOSPC). The epic verify tree then links its `node_modules` from a completed slice that materialized a real install (a superset of the parent's pre-run install) when one exists, else the parent; multiple divergent slice installs resolve last-in-plan-order with the divergence reported as a `node_modules` MergeConflict, not unioned. No harness install verb is added — install stays agent-native (A98). The correct union ("reconstruct derived from source": merge `package.json`/lockfile and reinstall into a shared store) is the deferred git-merge remodel; run-dir GC stays operator-owned. Depends on: Requirement 49; A98, A101-K; establishes I137-K; refines I123-K. -170. **A failed epic verification is recoverable, not terminal** (epic-verify-recovery, FE-884, 2026-06-18) — the `epic-verify::fail` sibling no longer routes straight to the epic halted place; it routes to a new `epic-remediate` dispatch/complete chain gated by an `epic-retry-budget`, mirroring the slice-level run-tests loop. A remediation code agent runs against the folded `__epic__//` tree (the same tree FE-883's `materializeEpicVerifyTree` verifies), fed the verify diagnosis; the epic reaches the halt sink only after the budget is exhausted, with an honest reason. Two integrity guards: **detect-and-reject** — an attempt whose folded-tree diff touches the epic integration test path is rejected and counts against budget (the agent may fix only product code); **dual re-verify** — acceptance requires the epic integration test AND the slice suites to pass on the folded tree, with the combined verdict carried on the routed token. Round-trip correction (the naive assumption was false): `harvestCookRun` folds only slice worktrees, so a folded-tree fix is diff-transferred and committed to the representative slice branch (`transferFoldedFixToSlice`) to reach the promoted artifact. Codebase-mode only; greenfield keeps immediate halt. Substrate-free — distinct from the Arc-2 `interactive-recovery`/`adaptive-replan` rungs. Slices B (epic infra/timeout classification) and C (partial promotion) remain. Depends on: Requirement 49; D159-K, I124-K (FE-883 folded tree); establishes I138-K. (FE-884 slice A) +170. **A failed epic verification is recoverable, not terminal** (epic-verify-recovery, FE-884, 2026-06-18) — the `epic-verify::fail` sibling no longer routes straight to the epic halted place; it routes to a new `epic-remediate` dispatch/complete chain gated by an `epic-retry-budget`, mirroring the slice-level run-tests loop. A remediation code agent runs against the folded `__epic__//` tree (the same tree FE-883's `materializeEpicVerifyTree` verifies), fed the verify diagnosis; the epic reaches the halt sink only after the budget is exhausted, with an honest reason. Two integrity guards: **detect-and-reject** — an attempt whose folded-tree diff touches the epic integration test path is rejected and counts against budget (the agent may fix only product code); **dual re-verify** — acceptance requires the epic integration test AND the slice suites to pass on the folded tree, with the combined verdict carried on the routed token. Round-trip correction (the naive assumption was false): `harvestCookRun` folds only slice worktrees, so a folded-tree fix is diff-transferred and committed to the representative slice branch (`transferFoldedFixToSlice`) to reach the promoted artifact. Codebase-mode only; greenfield keeps immediate halt. Substrate-free — distinct from the Arc-2 `interactive-recovery`/`adaptive-replan` rungs. **Slice B (done)** splits the fail path on `failureKind` (FE-872): an infra/timeout verdict re-verifies under a separate `infraRetryCount`/`maxInfraRetries` budget instead of invoking the agent, and a verify-subprocess `ETIMEDOUT` — previously misclassified `test`, so it would have fed the remediation agent a non-bug — is now classified infra under a 180s ceiling (was 60s). Slice C (partial promotion) remains. Depends on: Requirement 49; D159-K, I124-K (FE-883 folded tree); establishes I138-K. (FE-884 slices A+B) #### Provider, prompt/context, and agent substrate @@ -285,7 +285,7 @@ Each invariant is a formalization candidate: the property is stated in human lan | I132-K | `Slice.writes?: string[]` declares the repo-relative POSIX file paths a slice exclusively mutates (exact paths only — no globs/directories), and `checkPlan` enforces single-writer-per-file: a path declared by ≥2 slices is a `file-write-conflict` — a **design-class warning** (never an error, never auto-repaired), since resolving it changes decomposition/ownership. Duplicate paths within one slice are deduped first and never self-conflict. A "join slice" is the sole writer of a shared coordination file that `depends_on` the slices it joins — not a multi-writer exception. `repairPlan` preserves `writes` verbatim and never moves ownership or synthesizes a join slice; `loadPlan` round-trips the field (absent → undefined). Emitter/LLM authoring of `writes` + requirement decomposition + join synthesis is deferred (D160-K amendment + slice-5 eval). | plan-contract.test.ts (disjoint accepted, overlap warns, intra-slice dup no-false-positive, repair preserves), plan-loader.test.ts (writes round-trip) | Requirements 46–50; A98, A100-K; D160-K (amended), D167-K (FE-829 slice 4) | | I131-K | **Retired (FE-829 post-slice-5)** — `planExecutionOrdering` and its whole `plan-llm-planning.ts` module (+ test) are deleted, having been superseded on the mainline by the authoring architect (I133-K, slice 4B). The only load-bearing survivor, the `PlanningEnrichment` type (reconcile's deterministic-fallback input contract), now lives in `plan-reconciliation.ts` next to its consumer; the duplicate `RunModel` type consolidated onto `plan-architect.ts`. The Zod `planningEnrichmentSchema` and `defaultRunModel` went with the deleted function. Historical record (the enrichment-over-projected-slices stage): the slice-3 planner only classified/grouped/ordered the existing `req-*` slices — it never invented, split, merged, renamed, or removed them — and was prompt-enriched with per-slice criteria + `projectPlanningContext` relation edges + the inlined reference-fixture exemplars. That enrichment seam was never validated for model quality and is fully replaced by `architectPlan` (I133-K) + the slice-5 eval harness (I134-K). | plan-planning-context.test.ts (edge lifting/ownership/dedupe — the surviving context seam); architect + eval coverage per I133-K / I134-K | Requirements 46–50; A97; D167-K; superseded by I133-K, retired post-I134-K (FE-829 slice 3 → retired) | | I136-K | **FE-878 presentation seam.** All `serve`/`cook`/`plan` terminal output flows through one `emit(CookEvent)` boundary, never direct `console.*`/`log()` outside `presenter/`; the orchestrator never imports the renderer. A pure `selectPresenter({command,isTTY,ci,reporterFlag})` chooses the backend — `plain` (CI / non-TTY / default), `silent` (`agent` mode), `ink` (interactive TTY; falls back to plain until slice 2). `PlainPresenter` reproduces pre-refactor stderr **byte-identically**; for the cook surface this is made deterministic by an **injected clock** (the presenter owns the elapsed/duration timer) plus a redaction normalizer for absolute paths and `runId`. The bus fans out synchronously and **swallows a thrown presenter** (`emitWarning`) so presentation can never abort a run; **stdout stays empty / JSONL-only**. Behavior-preserving — no `*-started`/activity instrumentation and no live Ink rendering (slice 2). **Slices 1a + 1b (done):** seam foundation (`presenter.ts` root + `presenter/{events,bus,select,plain,silent}.ts`) + CLI wiring; **both surfaces migrated** — `plan` (`plan-runner`) and `cook` (`cook-cli` banner/summary/promotion/petrinaut via a `line` passthrough arm + `pi-actions` per-action progress as structured `action`/`verbose` arms). The elapsed timer moved off `pi-actions`' module-level `Date.now()` into the presenter's **injected clock**, seeded by a `cook-start` event. `pi-actions` is now console-free; `cook-cli`'s only residual `console.error` is the injectable Petrinaut-setup default, which the cook path overrides with a bus-backed `log`. **Slice 2a (done):** the `ink` backend is real (no longer a plain fallback) — formatting consolidated into a shared `format.ts` + `clock.ts` (used by both backends so log bodies can't drift), a `RunStore` folds the event stream into `{phase, lines}`, a pure monotonic `nextPhase` projects the brigade tracker (coarse, from post-hoc events; precise in-flight transitions are 2b), and the Ink `App` (brunch-wordmark header in the brand gradient + brigade strip with `✓/◐/○` marks + bounded activity log) renders to **stderr**. **Slice 2b (done):** the dead-air fix — `activity-start`/`activity-progress`/`activity-end` events; the four long waits are bracketed (the three agent sessions self-bracket inside `runPi` with a throttled KB heartbeat; the test-run + probe waits use a `withActivity` helper; promotion brackets in `cook-cli`), always closing via `finally`. `RunStore` tracks a `pending` map; the Ink `PendingPanel` shows a live spinner + label + elapsed + detail (a tick interval runs only while pending is non-empty). Plain/CI renders one `⋯` start line per wait. The seam is now complete across all three commands and both backends. **Lifecycle:** the bus creator owns disposal — entry points run through `withCookBus(command, fn)`, which builds the bus and `dispose()`s it (unmounting Ink) in `finally`, so the TUI can't be left mounted and hang the process (ln-review finding). | bus.test.ts (fan-out + error isolation), presenter.test.ts (withCookBus disposes on success + throw), select.test.ts (decision table), plain.test.ts (byte-exact plan + cook arms incl. injected-clock elapsed + activity start-line), plan-runner.test.ts (golden stderr via capturing bus), brownfield-smoke.integration.test.ts (cook end-to-end through the bus), phase.test.ts (monotonic brigade), run-store.test.ts (event fold + pending map + stable snapshot), ink/app.test.tsx (frame: egg + active phase + activity + pending panel), pi-actions.test.ts (balanced activity start/end incl. on session failure), cook-report.test.ts (banner + completion-summary golden — the cook line strings are pure-tested, ln-review #3) | Requirements 46–50; D156-K (reports.jsonl stays the durable medium; CookEvent is ephemeral presentation only) (FE-878) | -| I138-K | A failed epic verification is recoverable, not terminal: the verify-epic fail-sibling routes to an `epic-remediate` dispatch/complete chain bounded by an `epic-retry-budget` (mirroring the slice run-tests loop), reaching the epic halted place only on budget exhaustion. A remediation attempt is accepted only if it does NOT modify the epic integration test (detect-and-reject — counts against budget) AND both the epic integration test and the slice suites pass on the folded tree (dual re-verify); the combined verdict rides the routed token. An accepted fix is diff-transferred and committed to the representative slice branch (`transferFoldedFixToSlice`) so `harvestCookRun` folds it into the promoted artifact. Codebase-mode only; greenfield keeps immediate halt. | topology.test.ts (fail→remediate→verify routing, budget decrement, exhaustion→halt), run-artifact.test.ts (detect-reject + round-trip to slice branch), epic-recovery.integration.test.ts (scripted-agent e2e: fixable / reject / dual-re-verify veto / exhaustion) | Requirement 49; D170-K; builds on I124-K (FE-884 slice A) | +| I138-K | A failed epic verification is recoverable, not terminal: the verify-epic fail-sibling routes to an `epic-remediate` dispatch/complete chain bounded by an `epic-retry-budget` (mirroring the slice run-tests loop), reaching the epic halted place only on budget exhaustion. A remediation attempt is accepted only if it does NOT modify the epic integration test (detect-and-reject — counts against budget) AND both the epic integration test and the slice suites pass on the folded tree (dual re-verify); the combined verdict rides the routed token. An accepted fix is diff-transferred and committed to the representative slice branch (`transferFoldedFixToSlice`) so `harvestCookRun` folds it into the promoted artifact. The fail path routes on `failureKind` (FE-872): an **infra/timeout** verdict re-verifies under a separate `infraRetryCount`/`RunPolicy.maxInfraRetries` budget (never the remediation agent), halting with an honest infra reason on exhaustion, while a **test/logic** verdict drives remediation; a verify-subprocess `ETIMEDOUT` is classified infra (not a logic fail) under a 180s ceiling. Codebase-mode only; greenfield keeps immediate halt. | topology.test.ts (fail→remediate/​infra-retry routing, budget decrements, exhaustion→halt), run-artifact.test.ts (detect-reject + round-trip to slice branch), test-runner.test.ts (ETIMEDOUT→infra, 180s ceiling), epic-recovery.integration.test.ts (scripted-agent e2e: fixable / reject / dual-re-verify veto / exhaustion / infra-retry-not-remediate) | Requirement 49; D170-K; builds on I124-K (FE-884 slices A+B) | | I137-K | The cook epic verify tree resolves deps from the richest completed install and never deep-copies them. `walkFiles` skips `SHAREABLE_TOP_LEVEL_ENTRIES` (`node_modules`) by NAME — not only when they are symlinks — so an in-slice `npm install` that clobbers the shared symlink into a real tree is never copied per-slice or per-epic (the prior copy of that ~900M tree caused ENOSPC). `mergeSlicesIntoEpicSandbox` links `node_modules` from a completed slice that materialized a real install (it holds the manifest-reconciled tree with slice-added deps) in preference to the parent's pre-run install; last installer in plan declaration order wins and divergent slice installs are reported as a `node_modules` `MergeConflict`. Whole-plan promotion (`mergeCompletedSlicesIntoTree`) never relinks deps (source-only). Full union of divergent installs is deferred to the lockfile-merge-and-reinstall remodel. | epic-sandbox-merge.test.ts (clobbered real node_modules linked not copied; links to slice install so added dep resolves; parent fallback when no slice installed; divergence reported + last-installer wins) | Requirement 49; D169-K; A101-K; refines I123-K (cook-artifact-lifecycle follow-on) | ## Future Direction Register From 955ca3c98c93838f8fa7304417c876305a167273 Mon Sep 17 00:00:00 2001 From: Kostandin Angjellari Date: Fri, 19 Jun 2026 08:57:41 +0100 Subject: [PATCH 5/9] FE-884 (slice A): register the remediate-epic action in createPiActions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The epic-remediate:complete transition dispatches actions['remediate-epic'], but the production action factory never registered it — the recovery loop was only ever proven with a scripted/injected action in the integration test. Every real cook run therefore crashed with "actions[actionKey] is not a function" the instant an epic verdict failed. Add the production handler: it runs a code agent (code-writer prompt, write tools) against the folded epic tree to fix the cross-slice defect, then reports the attempt; the net-compiler still owns detect-reject, dual re-verify, and the diff-transfer round-trip. Pin it on the agent-extension-host capability witness. --- src/agent-extension-host.test.ts | 11 +++++ src/orchestrator/src/pi-actions.test.ts | 54 +++++++++++++++++++++++++ src/orchestrator/src/pi-actions.ts | 33 +++++++++++++++ 3 files changed, 98 insertions(+) diff --git a/src/agent-extension-host.test.ts b/src/agent-extension-host.test.ts index 4d230dd92..521ffcb58 100644 --- a/src/agent-extension-host.test.ts +++ b/src/agent-extension-host.test.ts @@ -50,6 +50,17 @@ const cookWitness = { mode: 'execute', capabilities: [{ id: 'verify-epic', summary: 'Write + run an epic integration test.', handler: null }], }, + { + id: 'execute.remediate-epic', + mode: 'execute', + capabilities: [ + { + id: 'remediate-epic', + summary: 'Fix a failed epic integration test on the folded tree.', + handler: null, + }, + ], + }, ], } as const satisfies AgentExtensionConsumerWitness; diff --git a/src/orchestrator/src/pi-actions.test.ts b/src/orchestrator/src/pi-actions.test.ts index c98317298..79381a115 100644 --- a/src/orchestrator/src/pi-actions.test.ts +++ b/src/orchestrator/src/pi-actions.test.ts @@ -9,6 +9,7 @@ import { afterEach, describe, expect, it } from 'vitest'; import { cookResourceLoader, createPiActions, + epicRemediateTask, epicVerifyTask, instrumentToolDefinition, runPi, @@ -1438,3 +1439,56 @@ describe('cookResourceLoader (FE-881) loads sandbox skills, excludes global', () expect(names).not.toContain('global-skill'); }); }); + +describe('remediate-epic action — FE-884 Slice A production wiring', () => { + const slice: Slice = { + id: 'login', + epic_id: 'api', + definition: 'Login', + depends_on: [], + verification: [{ kind: 'unit-test', target: 'tests/login.test.ts' }], + }; + const epic: Epic = { + id: 'api', + summary: 'API surface', + depends_on: [], + verification: [{ kind: 'integration-test', target: 'tests/api.integration.test.ts' }], + }; + const plan: Plan = { mode: 'greenfield', epics: [epic], slices: [slice] }; + + it('createPiActions registers a remediate-epic handler', () => { + const actions = createPiActions(); + expect(actions['remediate-epic']).toBeTypeOf('function'); + }); + + it('drives a write-capable agent against the folded epic tree and reports the attempt', async () => { + process.env.ANTHROPIC_API_KEY ??= 'test-key-unused-fake-session'; + const reports = new InMemoryReportSink(); + const fake = makeFakeSession({ emit: 'patched the product code' }); + let captured: { cwd?: string; tools?: string[] } | undefined; + const createSession = (async (options: { cwd?: string; tools?: string[] }) => { + captured = options; + return { session: fake.session }; + }) as unknown as SessionFactory; + const actions = createPiActions({ createSession }); + + const foldedDir = '/tmp/__epic__/api'; + const id = await actions['remediate-epic']!({ slice, epic, plan, sandboxDir: foldedDir, reports }); + + expect(captured?.cwd).toBe(foldedDir); + expect(captured?.tools).toEqual(['read', 'write', 'edit', 'bash']); + expect(fake.calls.prompt[0]).toContain('api'); + expect(fake.calls.prompt[0]).toContain('tests/api.integration.test.ts'); + const rec = reports.getById(id)!; + expect(rec.actor).toBe('coding-agent'); + expect(rec.event).toBe('remediation-agent-done'); + expect(rec.epicId).toBe('api'); + }); + + it('epicRemediateTask names the epic and instructs fixing code, not the oracle', () => { + const task = epicRemediateTask(epic); + expect(task).toContain('api'); + expect(task).toContain('tests/api.integration.test.ts'); + expect(task.toLowerCase()).toContain('do not'); + }); +}); diff --git a/src/orchestrator/src/pi-actions.ts b/src/orchestrator/src/pi-actions.ts index aa659fcc6..3869fc7eb 100644 --- a/src/orchestrator/src/pi-actions.ts +++ b/src/orchestrator/src/pi-actions.ts @@ -559,6 +559,11 @@ export function epicVerifyTask(epic: Epic, toolchain: Toolchain): string { return `Write an integration test for epic "${epic.id}": ${epic.summary}\nThis test should verify that all slices in this epic work together correctly.\nVerification targets: ${targets}\nWrite the test file(s). ${toolchain.testConventions} Then run them to verify they pass.`; } +export function epicRemediateTask(epic: Epic): string { + const targets = epic.verification.map((v) => `${v.kind}: ${v.target}`).join(', '); + return `Remediate the failing integration test for epic "${epic.id}": ${epic.summary}\nThe slices in this epic each pass on their own, but the epic's integration test fails now that they are folded together in this tree. Read the failing test, find the cross-slice defect, and fix the product code so the test passes.\nVerification targets: ${targets}\nDo not modify the integration test or any test file — fix the implementation, not the oracle.`; +} + export function createPiActions(opts?: { verbose?: boolean; /** Presentation sink. Per-action progress is emitted as CookEvents; defaults to no-op. */ @@ -775,5 +780,33 @@ export function createPiActions(opts?: { ...(probe ? { reachability: probe.kind } : {}), }); }, + + 'remediate-epic': async (ctx: ActionContext) => { + log('▸', `remediate ${ctx.epic.id}`); + try { + await runPi( + { + label: `remediate ${ctx.epic.id}`, + model: 'claude-opus-4-8', + promptFile: join(promptsDir, 'code-writer.md'), + task: epicRemediateTask(ctx.epic), + sandboxDir: ctx.sandboxDir, + tools: toolsForAction('remediate-epic'), + }, + piDeps, + ); + } catch (err) { + _emit({ + kind: 'slice', + id: ctx.slice.id, + epicId: ctx.epic.id, + status: 'failed', + reason: 'epic remediation failed', + }); + throw err; + } + + return report(ctx, 'coding-agent', 'remediation-agent-done', { sliceId: ctx.slice.id }); + }, }; } From 6bc4d71249635a32e8ef724ea3d4d1fbd3fb83e6 Mon Sep 17 00:00:00 2001 From: Kostandin Angjellari Date: Fri, 19 Jun 2026 09:02:00 +0100 Subject: [PATCH 6/9] FE-884: record the Slice A production-action gap in PLAN The scripted-agent e2e masked that createPiActions never registered a remediate-epic action; note the gap and its fix (79376fe0) on the Slice A entry so the record is honest about why real runs crashed. --- memory/PLAN.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/memory/PLAN.md b/memory/PLAN.md index 92fd6ac43..8d6cf1bfe 100644 --- a/memory/PLAN.md +++ b/memory/PLAN.md @@ -514,7 +514,7 @@ The May 2026 intent-spec, multi-chat, changeset-ledger, prompt/context, and agen - **Status:** in progress (2026-06-18) — branch `ka/fe-884-epic-verify-recovery` on FE-883's GC tip. Slices A + B landed; C remains. Queue in `memory/CARDS.md`. - **Objective:** Close the orchestrator's verification asymmetry — the slice tier is recoverable (run-tests retry loop) but the epic tier routes `epic-verify::fail` straight to halt, so a diagnosed cross-slice defect discards the whole run and promotes nothing. Make a failed epic recoverable: a remediation code agent on the folded `__epic__` tree, bounded by an `epic-retry-budget`, with detect-and-reject (no editing the epic test) + dual re-verify (epic test AND slice suites) integrity guards, and a diff-transfer round-trip so the fix promotes. Substrate-free — distinct from Arc-2 `interactive-recovery`/`adaptive-replan`. - **Why now / unlocks:** Builds directly on FE-883's folded-tree composition (`materializeEpicVerifyTree`, `harvestCookRun`); without it, the worked example (run `59100820`: a one-line fixable `useViewParam` defect) halts a 60-minute run and is fixed by hand. Raises cook's unattended completion rate before the Arc-2 autonomy ladder. -- **Slice A (done, `580ed50f`):** the remediation loop + detect-reject + dual re-verify + `transferFoldedFixToSlice` round-trip. Round-trip design finding: `harvestCookRun` folds only slice worktrees, so the folded-tree fix is committed to the representative slice branch. Proven by topology goldens, run-artifact units, and a scripted-agent e2e (fixable / reject / veto / exhaustion). Real-agent dogfood of run 59100820 is outer-loop, deferred. +- **Slice A (done, `580ed50f`; production action `79376fe0`):** the remediation loop + detect-reject + dual re-verify + `transferFoldedFixToSlice` round-trip. Round-trip design finding: `harvestCookRun` folds only slice worktrees, so the folded-tree fix is committed to the representative slice branch. Proven by topology goldens, run-artifact units, and a scripted-agent e2e (fixable / reject / veto / exhaustion). **Gap caught later:** the e2e only ever exercised an *injected* `remediate-epic` action, so the production `createPiActions` factory was never given one — every real cook run crashed with `actions[actionKey] is not a function` the instant an epic verdict failed. `79376fe0` registers the production action (code-writer agent on the folded tree) and pins it on the agent-extension-host capability witness. Real-agent dogfood of run 59100820 is outer-loop, deferred. - **Slice B (done):** split the verify-epic fail path on `failureKind` (FE-872) — infra/timeout re-verifies under a separate `infraRetryCount`/`maxInfraRetries` budget (no agent), halting honestly on exhaustion; test/logic still remediates. Correctness fix: `spawnSync` timeout surfaces as `ETIMEDOUT`, previously misclassified `test` (would have fed the agent a non-bug) → now infra, ceiling raised 60s→180s. Distinct from FE-864's pi session deadline. - **Remaining:** C — partial promotion: `harvestCookRun` lands passing epics + returns the failing epic's diagnosis instead of `nothing promoted` (depends on A's round-trip + FE-883 GC ref-set; not yet carded). - **Acceptance / verification:** see `memory/CARDS.md`; oracle strategy folded into SPEC §Verification Design. From cc35b290cbb0a0a9044a601b8b0569fccf1b25e3 Mon Sep 17 00:00:00 2001 From: Kostandin Angjellari Date: Fri, 19 Jun 2026 09:47:18 +0100 Subject: [PATCH 7/9] FE-884: ignore pre-existing verify tests during remediation transfer Co-authored-by: Cursor --- src/orchestrator/src/net-compiler.ts | 8 +++++++- src/orchestrator/src/run-artifact.test.ts | 23 +++++++++++++++++++++++ src/orchestrator/src/run-artifact.ts | 22 ++++++++++++++++++++++ 3 files changed, 52 insertions(+), 1 deletion(-) diff --git a/src/orchestrator/src/net-compiler.ts b/src/orchestrator/src/net-compiler.ts index 24ccaaa5d..3f3a0147e 100644 --- a/src/orchestrator/src/net-compiler.ts +++ b/src/orchestrator/src/net-compiler.ts @@ -17,7 +17,11 @@ import type { NetBlueprint, TokenSeed, TransitionSkeleton } from './net-blueprin import { PetriNet } from './petri-net.js'; import type { Token } from './petri-net.js'; import { createReport } from './report-helpers.js'; -import { materializeEpicVerifyTree, transferFoldedFixToSlice } from './run-artifact.js'; +import { + captureFoldedChangeBaseline, + materializeEpicVerifyTree, + transferFoldedFixToSlice, +} from './run-artifact.js'; import { runVerification } from './test-runner.js'; import type { ActionContext, @@ -1167,6 +1171,7 @@ export function wireHandlers(blueprint: NetBlueprint, input: OrchestratorInput, fire = async (consumed) => { const deferred = (async () => { const foldedDir = resolveEpicSandboxDir(input.sandboxDir, epicId); + const baseline = captureFoldedChangeBaseline(foldedDir); const actCtx: ActionContext = { slice, epic, plan, sandboxDir: foldedDir, reports }; await actions[actionKey]!(actCtx); const outcome = transferFoldedFixToSlice({ @@ -1174,6 +1179,7 @@ export function wireHandlers(blueprint: NetBlueprint, input: OrchestratorInput, foldedDir, slice, epicTestTargets, + baseline, }); ctx.reportIds.push( createReport(reports, { diff --git a/src/orchestrator/src/run-artifact.test.ts b/src/orchestrator/src/run-artifact.test.ts index 7af4d1c5a..112c12555 100644 --- a/src/orchestrator/src/run-artifact.test.ts +++ b/src/orchestrator/src/run-artifact.test.ts @@ -15,6 +15,7 @@ import { afterEach, beforeEach, describe, expect, it } from 'vitest'; import { brunchRef, + captureFoldedChangeBaseline, commitSliceWorktree, type CompletedRun, dependencyOrder, @@ -399,6 +400,28 @@ describe('transferFoldedFixToSlice (FE-884 remediation round-trip)', () => { expect(gitC(source, 'show', `${brunchRef.slice('r1', 'a')}:lib.ts`)).toContain('broken'); }); + it('ignores unchanged verify-test files that were dirty before remediation', () => { + // verify-epic writes the failing integration test before the remediation + // agent runs. That baseline oracle must not be mistaken for an agent edit. + writeFileSync(join(foldedDir, 'it.test.ts'), 'it("fails until product is fixed", () => {});\n'); + const baseline = captureFoldedChangeBaseline(foldedDir); + writeFileSync(join(foldedDir, 'lib.ts'), 'export const view = "fixed";\n'); + + const outcome = transferFoldedFixToSlice({ + parentSandboxDir: parent, + foldedDir, + slice: slice('a'), + epicTestTargets: ['it.test.ts'], + baseline, + }); + + expect(outcome).toMatchObject({ accepted: true, touched: ['lib.ts'] }); + expect(gitC(source, 'show', `${brunchRef.slice('r1', 'a')}:lib.ts`)).toContain('fixed'); + expect(gitC(source, 'ls-tree', '-r', '--name-only', brunchRef.slice('r1', 'a'))).not.toContain( + 'it.test.ts', + ); + }); + it('rejects a no-op attempt (agent changed nothing)', () => { const outcome = transferFoldedFixToSlice({ parentSandboxDir: parent, diff --git a/src/orchestrator/src/run-artifact.ts b/src/orchestrator/src/run-artifact.ts index 0cbcee36f..eebefc231 100644 --- a/src/orchestrator/src/run-artifact.ts +++ b/src/orchestrator/src/run-artifact.ts @@ -61,6 +61,8 @@ export type CompletedRun = { completedSliceIds: string[]; }; +export type FoldedChangeBaseline = ReadonlyMap; + // Deterministic committer so promotion never depends on (or mutates) global git config. const COMMIT_IDENTITY = ['-c', 'user.name=brunch', '-c', 'user.email=cook@brunch']; @@ -172,6 +174,20 @@ function touchesTestTarget(touched: string, targets: readonly string[]): boolean return targets.some((t) => touched === t || touched.endsWith(`/${t}`) || basename(t) === base); } +function worktreeObjectId(dir: string, path: string): string | null { + if (!existsSync(join(dir, path))) return null; + return git(['hash-object', '--', path], dir); +} + +/** Capture already-dirty folded-tree files before the remediation agent runs. */ +export function captureFoldedChangeBaseline(foldedDir: string): FoldedChangeBaseline { + git(['add', '-A'], foldedDir); + const paths = git(['diff', '--cached', '--name-only'], foldedDir).split('\n').filter(Boolean); + const baseline = new Map(paths.map((path) => [path, worktreeObjectId(foldedDir, path)])); + git(['reset'], foldedDir); + return baseline; +} + /** * FE-884 remediation round-trip. After a remediation agent edits the *folded* * `__epic__//` tree (the detached worktree where the integration test runs), @@ -196,11 +212,17 @@ export function transferFoldedFixToSlice(opts: { foldedDir: string; slice: Slice; epicTestTargets: readonly string[]; + baseline?: FoldedChangeBaseline; }): { accepted: boolean; reason?: 'no-op' | 'touched-test' | 'apply-failed'; touched: string[] } { const { foldedDir } = opts; const sliceDir = resolveSliceWorktreeDir(opts.parentSandboxDir, opts.slice.id); // Stage everything so new files are enumerated too, then list touched paths. git(['add', '-A'], foldedDir); + for (const [path, objectId] of opts.baseline ?? []) { + if (worktreeObjectId(foldedDir, path) === objectId) { + git(['reset', '--', path], foldedDir); + } + } const touched = git(['diff', '--cached', '--name-only'], foldedDir).split('\n').filter(Boolean); if (touched.length === 0) { git(['reset'], foldedDir); From 66407698a4039973d487258901dc848169dc0bd2 Mon Sep 17 00:00:00 2001 From: Kostandin Angjellari Date: Fri, 19 Jun 2026 11:44:51 +0100 Subject: [PATCH 8/9] FE-884: clean the slice worktree when a folded remediation fix fails to apply MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A failed `git apply --3way` of a remediation fix onto the representative slice worktree is not atomic — it can leave partially-applied hunks, .rej files, or conflict residue behind. Before returning `apply-failed`, reset --hard + clean -fd the slice worktree so the failed transfer is a true no-op (the loop degrades to a burned budget unit) and no corruption rides into the next re-fold/re-verify or the promoted artifact. Mirrors the touched-test reject path. (cherry picked from commit 2752eadfa5fee4b58211e7f10979c237d99b6220) --- src/orchestrator/src/run-artifact.test.ts | 17 +++++++++++++++++ src/orchestrator/src/run-artifact.ts | 2 ++ 2 files changed, 19 insertions(+) diff --git a/src/orchestrator/src/run-artifact.test.ts b/src/orchestrator/src/run-artifact.test.ts index 112c12555..5354c228e 100644 --- a/src/orchestrator/src/run-artifact.test.ts +++ b/src/orchestrator/src/run-artifact.test.ts @@ -422,6 +422,23 @@ describe('transferFoldedFixToSlice (FE-884 remediation round-trip)', () => { ); }); + it('cleans the slice worktree when a folded fix cannot be applied', () => { + const sliceDir = join(parent, 'a'); + writeFileSync(join(foldedDir, 'lib.ts'), 'export const view = "fixed";\n'); + writeFileSync(join(sliceDir, 'lib.ts'), 'export const view = "locally diverged";\n'); + + const outcome = transferFoldedFixToSlice({ + parentSandboxDir: parent, + foldedDir, + slice: slice('a'), + epicTestTargets: ['it.test.ts'], + }); + + expect(outcome).toMatchObject({ accepted: false, reason: 'apply-failed', touched: ['lib.ts'] }); + expect(readFileSync(join(sliceDir, 'lib.ts'), 'utf8')).toBe('export const view = "broken";\n'); + expect(gitC(sliceDir, 'status', '--short')).toBe(''); + }); + it('rejects a no-op attempt (agent changed nothing)', () => { const outcome = transferFoldedFixToSlice({ parentSandboxDir: parent, diff --git a/src/orchestrator/src/run-artifact.ts b/src/orchestrator/src/run-artifact.ts index eebefc231..7d50386d6 100644 --- a/src/orchestrator/src/run-artifact.ts +++ b/src/orchestrator/src/run-artifact.ts @@ -244,6 +244,8 @@ export function transferFoldedFixToSlice(opts: { encoding: 'utf8', }); if (applied.status !== 0) { + git(['reset', '--hard'], sliceDir); + git(['clean', '-fd'], sliceDir); return { accepted: false, reason: 'apply-failed', touched }; } commitSliceWorktree({ parentSandboxDir: opts.parentSandboxDir, slice: opts.slice }); From 554bcfcce34ddf10b029db0c812cc5ede39c05da Mon Sep 17 00:00:00 2001 From: Kostandin Angjellari Date: Fri, 19 Jun 2026 15:37:42 +0100 Subject: [PATCH 9/9] FE-884: classify probe infra as infra retry Route reachability probe infra failures through the bounded epic reverify path instead of the remediation loop. Co-authored-by: Cursor --- .../src/epic-recovery.integration.test.ts | 28 +++++++++++++++++++ src/orchestrator/src/net-compiler.ts | 8 ++++-- src/orchestrator/src/pi-actions.test.ts | 16 ++++++++--- src/orchestrator/src/pi-actions.ts | 3 +- 4 files changed, 47 insertions(+), 8 deletions(-) diff --git a/src/orchestrator/src/epic-recovery.integration.test.ts b/src/orchestrator/src/epic-recovery.integration.test.ts index ae96cbe43..801b50f3b 100644 --- a/src/orchestrator/src/epic-recovery.integration.test.ts +++ b/src/orchestrator/src/epic-recovery.integration.test.ts @@ -89,6 +89,9 @@ describe('FE-884 — recoverable epic verification (codebase mode)', () => { // FE-884 Slice B: force the verify-epic verdict to report an infra/timeout // failure — 'always' (never recovers) or 'once' (infra then pass). epicInfra?: 'always' | 'once'; + // Regression for Bugbot a4a0a7bc: probe infra used to report only + // `reachability: "infra"`, which must still route as infra, not logic. + epicProbeInfra?: 'always' | 'once'; }, ): ActionHandlers { const evalCalls = new Map(); @@ -131,6 +134,12 @@ describe('FE-884 — recoverable epic verification (codebase mode)', () => { ); return id; } + if (opts.epicProbeInfra === 'always' || (opts.epicProbeInfra === 'once' && verifyCalls === 1)) { + reports.append( + line(id, ctx, 'orchestrator', 'epic-verified', { passed: false, reachability: 'infra' }), + ); + return id; + } const srcPath = join(ctx.sandboxDir, 'src.txt'); const txt = existsSync(srcPath) ? readFileSync(srcPath, 'utf8') : ''; // 'once': the infra blip cleared on the re-run, so the epic now passes. @@ -342,6 +351,25 @@ describe('FE-884 — recoverable epic verification (codebase mode)', () => { GIT_TEST_TIMEOUT_MS, ); + it( + 'probe infra retry: a reachability infra verdict re-runs verify — not the remediation agent — then completes', + async () => { + const source = makeSeededRepo(); + const reports = new InMemoryReportSink(); + const actions = withReports( + reports, + makeFakeActions(reports, { remediation: 'noop', epicPasses: true, epicProbeInfra: 'once' }), + ); + + const { result } = await runCook(source, actions, passingRunner(), 3); + + expect(result.status).toBe('completed'); + expect(reports.getAll().filter((r) => r.event === 'epic-verified')).toHaveLength(2); + expect(reports.getAll().filter((r) => r.event === 'epic-remediated')).toHaveLength(0); + }, + GIT_TEST_TIMEOUT_MS, + ); + it( 'infra exhaustion (Slice B): a persistent infra/timeout failure halts with an honest infra reason, never remediated', async () => { diff --git a/src/orchestrator/src/net-compiler.ts b/src/orchestrator/src/net-compiler.ts index 3f3a0147e..801cbc1fb 100644 --- a/src/orchestrator/src/net-compiler.ts +++ b/src/orchestrator/src/net-compiler.ts @@ -1139,9 +1139,11 @@ export function wireHandlers(blueprint: NetBlueprint, input: OrchestratorInput, // Route through the epic budgets. Pass → done (+ budget reset); infra // fail → re-verify (bounded, no remediation); test fail with budget → // re-loop to the fail sibling → remediation; exhausted → halt. - const verdictFailureKind = ( - reports.getById(verdictReportId)?.payload as { failureKind?: TestFailureKind } | undefined - )?.failureKind; + const verdictPayload = reports.getById(verdictReportId)?.payload as + | { failureKind?: TestFailureKind; reachability?: string } + | undefined; + const verdictFailureKind = + verdictPayload?.failureKind ?? (verdictPayload?.reachability === 'infra' ? 'infra' : undefined); return routeVerdict(inputToken, verdictReportId, passed, verdictFailureKind); })(); net.scheduleDeferred(skel.id, skel.contract, { places: skel.inputs, tokens: consumed }, deferred); diff --git a/src/orchestrator/src/pi-actions.test.ts b/src/orchestrator/src/pi-actions.test.ts index 79381a115..1ccf70346 100644 --- a/src/orchestrator/src/pi-actions.test.ts +++ b/src/orchestrator/src/pi-actions.test.ts @@ -352,7 +352,7 @@ describe('verify-epic reachability grounding (FE-876) — intent resolves before sandboxDir: string; epic: Epic; groundProbe?: ProbeGrounder; - }): Promise<{ passed: boolean; reachability?: string }> { + }): Promise<{ passed: boolean; failureKind?: string; reachability?: string }> { process.env.ANTHROPIC_API_KEY ??= 'test-key-unused-fake-session'; const reports = new InMemoryReportSink(); const fake = makeFakeSession({ emit: 'wrote the integration test' }); @@ -380,7 +380,10 @@ describe('verify-epic reachability grounding (FE-876) — intent resolves before plan, sandboxDir: opts.sandboxDir, reports, - }).then((id) => reports.getById(id)!.payload as { passed: boolean; reachability?: string }); + }).then( + (id) => + reports.getById(id)!.payload as { passed: boolean; failureKind?: string; reachability?: string }, + ); } it('grounds a reachability intent into a concrete target, then probes it', async () => { @@ -414,6 +417,7 @@ describe('verify-epic reachability grounding (FE-876) — intent resolves before }, }); expect(payload.passed).toBe(false); + expect(payload.failureKind).toBe('infra'); expect(payload.reachability).toBe('infra'); }); @@ -597,7 +601,7 @@ describe('verify-epic integration oracle (FE-876) — reachability folds into th sandboxDir: string; epic: Epic; groundProbe?: ProbeGrounder; - }): Promise<{ passed: boolean; reachability?: string }> { + }): Promise<{ passed: boolean; failureKind?: string; reachability?: string }> { process.env.ANTHROPIC_API_KEY ??= 'test-key-unused-fake-session'; const reports = new InMemoryReportSink(); const fake = makeFakeSession({ emit: 'wrote the integration test' }); @@ -625,7 +629,10 @@ describe('verify-epic integration oracle (FE-876) — reachability folds into th plan, sandboxDir: opts.sandboxDir, reports, - }).then((id) => reports.getById(id)!.payload as { passed: boolean; reachability?: string }); + }).then( + (id) => + reports.getById(id)!.payload as { passed: boolean; failureKind?: string; reachability?: string }, + ); } it('grounds a reachability intent into a concrete target, then probes it', async () => { @@ -659,6 +666,7 @@ describe('verify-epic integration oracle (FE-876) — reachability folds into th }, }); expect(payload.passed).toBe(false); + expect(payload.failureKind).toBe('infra'); expect(payload.reachability).toBe('infra'); }); diff --git a/src/orchestrator/src/pi-actions.ts b/src/orchestrator/src/pi-actions.ts index 3869fc7eb..d01db02d4 100644 --- a/src/orchestrator/src/pi-actions.ts +++ b/src/orchestrator/src/pi-actions.ts @@ -772,11 +772,12 @@ export function createPiActions(opts?: { } } const passed = testsPassed && (probe === undefined || probe.reachable); + const combinedFailureKind = failureKind ?? (probe?.kind === 'infra' ? 'infra' : undefined); log(passed ? '●' : '✗', `epic ${ctx.epic.id} → ${passed ? 'PASS' : 'FAIL'}`); return report(ctx, 'orchestrator', 'epic-verified', { passed, - failureKind, + failureKind: combinedFailureKind, ...(probe ? { reachability: probe.kind } : {}), }); },