diff --git a/src/orchestrator/src/app-probe.test.ts b/src/orchestrator/src/app-probe.test.ts index e5cd62b9..8e4a76ea 100644 --- a/src/orchestrator/src/app-probe.test.ts +++ b/src/orchestrator/src/app-probe.test.ts @@ -126,42 +126,6 @@ describe('runProbe bounds its HTTP calls so a hung app cannot hang the probe', ( }); }); -describe('runProbe bounds its HTTP calls so a hung app cannot hang the probe', () => { - // A server that accepts connections (and the HTTP request) but never sends a - // response — the case the wall-clock deadline alone can't catch, because a - // bare `await fetch` would block forever between deadline checks. - const neverResponds = (readyRoutes: Record = {}): string => - `const http = require('node:http');\n` + - `const ready = ${JSON.stringify(readyRoutes)};\n` + - `http.createServer((req, res) => {\n` + - ` if (ready[req.url] !== undefined) { res.writeHead(ready[req.url]); res.end('ok'); return; }\n` + - ` /* otherwise: never respond */\n` + - `}).listen(Number(process.env.PORT), '127.0.0.1');\n`; - - it('a ready path that accepts connections but never responds → infra within the deadline', async () => { - const spec = await buildProbeSpec({ - boot: ['node', 'server.js'], - readyPath: '/health', - featurePath: '/feature', - }); - const dir = sandbox(neverResponds()); - const result = await runProbe(spec, dir, { readyTimeoutMs: 600, readyAttemptMs: 150 }); - expect(result.kind).toBe('infra'); - }); - - it('a booted app whose feature endpoint never responds → infra, not a hang', async () => { - const spec = await buildProbeSpec({ - boot: ['node', 'server.js'], - readyPath: '/health', - featurePath: '/feature', - }); - const dir = sandbox(neverResponds({ '/health': 200 })); - const result = await runProbe(spec, dir, { requestTimeoutMs: 300 }); - expect(result.kind).toBe('infra'); - expect(result.output).toMatch(/feature probe request failed/); - }); -}); - describe('runProbe tears the boot process down', () => { it('the booted app is no longer listening after the probe returns', async () => { const { spec, dir } = await specFor({ '/health': 200, '/feature': 200 }); diff --git a/src/orchestrator/src/cook-cli.ts b/src/orchestrator/src/cook-cli.ts index 4a82642b..20ebad41 100644 --- a/src/orchestrator/src/cook-cli.ts +++ b/src/orchestrator/src/cook-cli.ts @@ -603,8 +603,10 @@ export async function runCook(opts: CookOptions, bus: CookBus): Promise { } line(''); } catch (err) { - line(` ✗ promotion failed: ${err instanceof Error ? err.message : String(err)}`); + const reason = `promotion failed: ${err instanceof Error ? err.message : String(err)}`; + line(` ✗ ${reason}`); line(''); + bus.emit({ kind: 'cook-done', ok: false, reason }); recordCookExitStatus(false); return; } @@ -636,8 +638,10 @@ export async function runCook(opts: CookOptions, bus: CookBus): Promise { line(` ✓ promoted → ${promoted.target} (${promoted.branch} @ ${promoted.commit.slice(0, 8)})`); line(''); } catch (err) { - line(` ✗ promotion failed: ${err instanceof Error ? err.message : String(err)}`); + const reason = `promotion failed: ${err instanceof Error ? err.message : String(err)}`; + line(` ✗ ${reason}`); line(''); + bus.emit({ kind: 'cook-done', ok: false, reason }); recordCookExitStatus(false); return; } diff --git a/src/orchestrator/src/cow-copy.ts b/src/orchestrator/src/cow-copy.ts index c75d3da1..02763336 100644 --- a/src/orchestrator/src/cow-copy.ts +++ b/src/orchestrator/src/cow-copy.ts @@ -1,5 +1,5 @@ import { spawnSync } from 'node:child_process'; -import { cpSync, existsSync, readdirSync, symlinkSync } from 'node:fs'; +import { cpSync, existsSync, lstatSync, readdirSync, symlinkSync } from 'node:fs'; import { join, resolve } from 'node:path'; /** @@ -50,7 +50,11 @@ export function copyMissingTopLevelEntries( if (existsSync(destPath)) continue; const sourcePath = join(source, entry); if (symlink.has(entry)) { - symlinkSync(sourcePath, destPath); + symlinkSync( + sourcePath, + destPath, + process.platform === 'win32' && lstatSync(sourcePath).isDirectory() ? 'junction' : undefined, + ); } else { cowCopy(sourcePath, destPath); } diff --git a/src/orchestrator/src/pi-actions.test.ts b/src/orchestrator/src/pi-actions.test.ts index 1496643a..cce9ef3f 100644 --- a/src/orchestrator/src/pi-actions.test.ts +++ b/src/orchestrator/src/pi-actions.test.ts @@ -3,13 +3,16 @@ import { tmpdir } from 'node:os'; import { dirname, join } from 'node:path'; import { fileURLToPath } from 'node:url'; +import { type Skill } from '@earendil-works/pi-coding-agent'; import { afterEach, describe, expect, it } from 'vitest'; import { + cookResourceLoader, createPiActions, epicVerifyTask, instrumentToolDefinition, runPi, + sandboxScopedSkills, type SessionFactory, sliceTestTask, toolLabel, @@ -185,7 +188,7 @@ describe('evaluate-done / verify-epic share the runner seam — failureKind is v }); }); -describe('verify-epic integration oracle (FE-876) — reachability folds into the epic verdict', () => { +describe('verify-epic reachability grounding (FE-876) — intent resolves before the epic verdict', () => { const probeDirs: string[] = []; afterEach(() => { for (const dir of probeDirs.splice(0)) rmSync(dir, { recursive: true, force: true }); @@ -857,6 +860,25 @@ describe('runPi drives an in-process pi session (no subprocess)', () => { expect(writes.join('')).not.toContain('SECRET_AGENT_OUTPUT'); }); + it('caps activity heartbeat snippets including the ellipsis', async () => { + process.env.ANTHROPIC_API_KEY ??= 'test-key-unused-fake-session'; + const sandboxDir = mkdtempSync(join(tmpdir(), 'brunch-runpi-')); + const events: CookEvent[] = []; + createPiActions({ emit: (e) => events.push(e) }); + try { + const fake = makeFakeSession({ emit: 'x'.repeat(2_048) }); + const createSession = (async () => ({ session: fake.session })) as unknown as SessionFactory; + await runPi(baseOpts(sandboxDir, 'read'), { createSession }); + } finally { + rmSync(sandboxDir, { recursive: true, force: true }); + } + const progress = events.find( + (e): e is Extract => e.kind === 'activity-progress', + ); + expect(progress?.detail).toHaveLength(56); + expect(progress?.detail.startsWith('…')).toBe(true); + }); + it('aborts the session and rejects when the prompt exceeds the timeout', async () => { process.env.ANTHROPIC_API_KEY ??= 'test-key-unused-fake-session'; const sandboxDir = mkdtempSync(join(tmpdir(), 'brunch-runpi-')); @@ -1321,3 +1343,64 @@ describe('evaluate-done failure carries a reason', () => { expect(lastSlice(events)).toMatchObject({ status: 'failed', reason: 'infra error' }); }); }); + +describe('sandboxScopedSkills (FE-881) keeps only skills rooted under the sandbox', () => { + const skill = (filePath: string): Skill => ({ + name: filePath, + description: '', + filePath, + baseDir: dirname(filePath), + sourceInfo: {} as Skill['sourceInfo'], + disableModelInvocation: false, + }); + + it('keeps repo skills, drops sibling-slice / prefix-lookalike / global skills', () => { + const sandbox = '/tmp/run/worktree/slice-a'; + const kept = sandboxScopedSkills( + [ + skill('/tmp/run/worktree/slice-a/.agents/skills/foo/SKILL.md'), + skill('/tmp/run/worktree/slice-a/.claude/skills/bar/SKILL.md'), + skill('/tmp/run/worktree/slice-b/.agents/skills/sibling/SKILL.md'), + skill('/tmp/run/worktree/slice-a-other/.agents/skills/lookalike/SKILL.md'), + skill('/home/dev/.pi/skills/global/SKILL.md'), + ], + sandbox, + ); + expect(kept.map((s) => s.name)).toEqual([ + '/tmp/run/worktree/slice-a/.agents/skills/foo/SKILL.md', + '/tmp/run/worktree/slice-a/.claude/skills/bar/SKILL.md', + ]); + }); +}); + +describe('cookResourceLoader (FE-881) loads sandbox skills, excludes global', () => { + const dirs: string[] = []; + afterEach(() => { + for (const d of dirs) rmSync(d, { recursive: true, force: true }); + dirs.length = 0; + }); + + const writeSkill = (root: string, name: string) => { + mkdirSync(join(root, name), { recursive: true }); + writeFileSync( + join(root, name, 'SKILL.md'), + `---\nname: ${name}\ndescription: ${name} skill\n---\nbody\n`, + ); + }; + + it('discovers the repo .agents/skills and drops agentDir (global) skills', async () => { + const sandbox = mkdtempSync(join(tmpdir(), 'cook-sandbox-')); + dirs.push(sandbox); + const agentDir = mkdtempSync(join(tmpdir(), 'cook-agent-')); + dirs.push(agentDir); + writeSkill(join(sandbox, '.agents', 'skills'), 'repo-skill'); + writeSkill(join(agentDir, 'skills'), 'global-skill'); + + const loader = cookResourceLoader(sandbox, agentDir, 'system prompt'); + await loader.reload(); + const names = loader.getSkills().skills.map((s) => s.name); + + expect(names).toContain('repo-skill'); + expect(names).not.toContain('global-skill'); + }); +}); diff --git a/src/orchestrator/src/pi-actions.ts b/src/orchestrator/src/pi-actions.ts index 9ec2f8ad..4a921c00 100644 --- a/src/orchestrator/src/pi-actions.ts +++ b/src/orchestrator/src/pi-actions.ts @@ -1,6 +1,6 @@ -import { mkdtempSync, readFileSync, rmSync } from 'node:fs'; +import { existsSync, mkdtempSync, readFileSync, realpathSync, rmSync } from 'node:fs'; import { tmpdir } from 'node:os'; -import { dirname, join } from 'node:path'; +import { dirname, join, resolve, sep } from 'node:path'; import { fileURLToPath } from 'node:url'; import { @@ -18,6 +18,7 @@ import { ModelRegistry, SessionManager, SettingsManager, + type Skill, type ToolDefinition, } from '@earendil-works/pi-coding-agent'; @@ -70,7 +71,7 @@ function latestLine(text: string): string { const lines = text.split('\n'); for (let i = lines.length - 1; i >= 0; i--) { const line = lines[i]!.trim(); - if (line) return line.length > HEARTBEAT_MAX ? `…${line.slice(-HEARTBEAT_MAX)}` : line; + if (line) return line.length > HEARTBEAT_MAX ? `…${line.slice(-(HEARTBEAT_MAX - 1))}` : line; } return ''; } @@ -223,9 +224,63 @@ function piTimeoutError(timeoutMs: number): Error { return new Error(`pi timed out after ${timeoutMs / 1000}s`); } -// Map one action's inputs to SDK session config — tools/model/system-prompt, no -// context/skills, in-memory session. Auth from brunch's own ANTHROPIC_API_KEY, not -// the user's ~/.pi credentials, which is what keeps a fresh checkout self-contained. +/** + * Keep only skills rooted under `sandboxDir` (the cook worktree). Drops the + * developer's machine-global pi skills and any sibling-slice / look-alike paths, + * so a brownfield cook builds on the target repo's own skills without leaking the + * host's pi config — the "self-contained checkout" guarantee, narrowed from + * "no skills" to "no skills from outside the repo" (FE-881). + */ +export function sandboxScopedSkills(skills: Skill[], sandboxDir: string): Skill[] { + const root = resolve(sandboxDir); + const prefix = root + sep; + return skills.filter((s) => { + const p = resolve(s.filePath); + return p === root || p.startsWith(prefix); + }); +} + +/** + * Resource loader for a cook agent session. The agent still runs on the task + * prompt (system-prompt override) with prompts and AGENTS files suppressed, but + * it now sees the target repo's own skills: pi's default discovery scans + * `//skills` + `/skills` rather than the Agent-Skills + * convention dirs, so we point it at the repo's `.agents/skills` / `.claude/skills` + * (deduped by realpath since brunch-style repos symlink the two) and filter the + * result to paths under the sandbox. Greenfield worktrees have no such dir, so + * this resolves empty and leaves greenfield behavior unchanged. + */ +export function cookResourceLoader( + sandboxDir: string, + agentDir: string, + systemPrompt: string, +): DefaultResourceLoader { + const skillDirs = [ + ...new Set( + [join(sandboxDir, '.agents', 'skills'), join(sandboxDir, '.claude', 'skills')] + .filter((d) => existsSync(d)) + .map((d) => realpathSync(d)), + ), + ]; + return new DefaultResourceLoader({ + cwd: sandboxDir, + agentDir, + systemPromptOverride: () => systemPrompt, + appendSystemPromptOverride: () => [], + additionalSkillPaths: skillDirs, + agentsFilesOverride: () => ({ agentsFiles: [] }), + skillsOverride: (base) => ({ + skills: sandboxScopedSkills(base.skills, sandboxDir), + diagnostics: base.diagnostics, + }), + promptsOverride: () => ({ prompts: [], diagnostics: [] }), + }); +} + +// Map one action's inputs to SDK session config — tools/model/system-prompt + +// the target repo's sandbox-scoped skills (see cookResourceLoader), in-memory +// session. Auth from brunch's own ANTHROPIC_API_KEY, not the user's ~/.pi +// credentials, which keeps a fresh checkout self-contained. async function buildSessionOptions( opts: RunPiOpts, isolatedDir: string, @@ -247,15 +302,7 @@ async function buildSessionOptions( } const systemPrompt = readFileSync(opts.promptFile, 'utf8'); - const resourceLoader = new DefaultResourceLoader({ - cwd: opts.sandboxDir, - agentDir: isolatedDir, - systemPromptOverride: () => systemPrompt, - appendSystemPromptOverride: () => [], - agentsFilesOverride: () => ({ agentsFiles: [] }), - skillsOverride: () => ({ skills: [], diagnostics: [] }), - promptsOverride: () => ({ prompts: [], diagnostics: [] }), - }); + const resourceLoader = cookResourceLoader(opts.sandboxDir, isolatedDir, systemPrompt); await resourceLoader.reload(); // Supply the built-in tools ourselves (instrumented), instead of the `tools` @@ -311,9 +358,10 @@ async function runPi( _emit({ kind: 'activity-start', id: activityId, label: opts.label }); let heartbeatKb = 0; - const isolatedDir = createAgentDir(); + let isolatedDir: string | undefined; let cleanedAgentDir = false; const cleanupAgentDir = (): void => { + if (!isolatedDir) return; if (cleanedAgentDir) return; cleanedAgentDir = true; removeAgentDir(isolatedDir); @@ -366,9 +414,11 @@ async function runPi( }); try { + isolatedDir = createAgentDir(); + const agentDir = isolatedDir; const setup = (async () => { const created = await createSession( - await buildSessionOptions(opts, isolatedDir, { onStart: onToolStart, onSettle: onToolSettle }), + await buildSessionOptions(opts, agentDir, { onStart: onToolStart, onSettle: onToolSettle }), ); if (timedOut) { created.session.dispose(); diff --git a/src/orchestrator/src/presenter/phase.test.ts b/src/orchestrator/src/presenter/phase.test.ts index 437d4e82..ccb67d4d 100644 --- a/src/orchestrator/src/presenter/phase.test.ts +++ b/src/orchestrator/src/presenter/phase.test.ts @@ -57,6 +57,12 @@ describe('nextPhase', () => { expect(nextPhase('cook', { kind: 'cook-done', ok: false })).toBe('cook'); }); + it('does not light plate for a no-promotion failure message', () => { + expect(nextPhase('cook', { kind: 'line', text: ' ! run did not complete — nothing promoted.' })).toBe( + 'cook', + ); + }); + it('never regresses to an earlier phase', () => { expect(nextPhase('serve', { kind: 'cook-start', runStart: 0 })).toBe('serve'); expect(nextPhase('taste', { kind: 'action', icon: '▸', message: 'tests slice-2' })).toBe('taste'); diff --git a/src/orchestrator/src/presenter/phase.ts b/src/orchestrator/src/presenter/phase.ts index b9e9df68..7385f479 100644 --- a/src/orchestrator/src/presenter/phase.ts +++ b/src/orchestrator/src/presenter/phase.ts @@ -43,7 +43,7 @@ function phaseFor(event: CookEvent, ctx?: PhaseContext): BrigadePhase | undefine return ctx.epics.every((e) => ctx.verdictedEpics?.has(e)) ? 'taste' : undefined; } case 'line': - return event.text.includes('promoted') ? 'plate' : undefined; + return /^\s*✓\s+promoted\b/.test(event.text) ? 'plate' : undefined; case 'cook-done': // ship→serve: the run completed (emitted after promotion). A halted run // does not ship, so it never lights serve.