Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 0 additions & 36 deletions src/orchestrator/src/app-probe.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -126,42 +126,6 @@ describe('runProbe bounds its HTTP calls so a hung app cannot hang the probe', (
});
});

describe('runProbe bounds its HTTP calls so a hung app cannot hang the probe', () => {
// A server that accepts connections (and the HTTP request) but never sends a
// response — the case the wall-clock deadline alone can't catch, because a
// bare `await fetch` would block forever between deadline checks.
const neverResponds = (readyRoutes: Record<string, number> = {}): string =>
`const http = require('node:http');\n` +
`const ready = ${JSON.stringify(readyRoutes)};\n` +
`http.createServer((req, res) => {\n` +
` if (ready[req.url] !== undefined) { res.writeHead(ready[req.url]); res.end('ok'); return; }\n` +
` /* otherwise: never respond */\n` +
`}).listen(Number(process.env.PORT), '127.0.0.1');\n`;

it('a ready path that accepts connections but never responds → infra within the deadline', async () => {
const spec = await buildProbeSpec({
boot: ['node', 'server.js'],
readyPath: '/health',
featurePath: '/feature',
});
const dir = sandbox(neverResponds());
const result = await runProbe(spec, dir, { readyTimeoutMs: 600, readyAttemptMs: 150 });
expect(result.kind).toBe('infra');
});

it('a booted app whose feature endpoint never responds → infra, not a hang', async () => {
const spec = await buildProbeSpec({
boot: ['node', 'server.js'],
readyPath: '/health',
featurePath: '/feature',
});
const dir = sandbox(neverResponds({ '/health': 200 }));
const result = await runProbe(spec, dir, { requestTimeoutMs: 300 });
expect(result.kind).toBe('infra');
expect(result.output).toMatch(/feature probe request failed/);
});
});

describe('runProbe tears the boot process down', () => {
it('the booted app is no longer listening after the probe returns', async () => {
const { spec, dir } = await specFor({ '/health': 200, '/feature': 200 });
Expand Down
8 changes: 6 additions & 2 deletions src/orchestrator/src/cook-cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -603,8 +603,10 @@ export async function runCook(opts: CookOptions, bus: CookBus): Promise<void> {
}
line('');
} catch (err) {
line(` ✗ promotion failed: ${err instanceof Error ? err.message : String(err)}`);
const reason = `promotion failed: ${err instanceof Error ? err.message : String(err)}`;
line(` ✗ ${reason}`);
line('');
bus.emit({ kind: 'cook-done', ok: false, reason });
recordCookExitStatus(false);
return;
}
Expand Down Expand Up @@ -636,8 +638,10 @@ export async function runCook(opts: CookOptions, bus: CookBus): Promise<void> {
line(` ✓ promoted → ${promoted.target} (${promoted.branch} @ ${promoted.commit.slice(0, 8)})`);
line('');
} catch (err) {
line(` ✗ promotion failed: ${err instanceof Error ? err.message : String(err)}`);
const reason = `promotion failed: ${err instanceof Error ? err.message : String(err)}`;
line(` ✗ ${reason}`);
line('');
bus.emit({ kind: 'cook-done', ok: false, reason });
recordCookExitStatus(false);
return;
}
Expand Down
8 changes: 6 additions & 2 deletions src/orchestrator/src/cow-copy.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { spawnSync } from 'node:child_process';
import { cpSync, existsSync, readdirSync, symlinkSync } from 'node:fs';
import { cpSync, existsSync, lstatSync, readdirSync, symlinkSync } from 'node:fs';
import { join, resolve } from 'node:path';

/**
Expand Down Expand Up @@ -50,7 +50,11 @@ export function copyMissingTopLevelEntries(
if (existsSync(destPath)) continue;
const sourcePath = join(source, entry);
if (symlink.has(entry)) {
symlinkSync(sourcePath, destPath);
symlinkSync(
sourcePath,
destPath,
process.platform === 'win32' && lstatSync(sourcePath).isDirectory() ? 'junction' : undefined,
);
} else {
cowCopy(sourcePath, destPath);
}
Expand Down
85 changes: 84 additions & 1 deletion src/orchestrator/src/pi-actions.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,16 @@ import { tmpdir } from 'node:os';
import { dirname, join } from 'node:path';
import { fileURLToPath } from 'node:url';

import { type Skill } from '@earendil-works/pi-coding-agent';
import { afterEach, describe, expect, it } from 'vitest';

import {
cookResourceLoader,
createPiActions,
epicVerifyTask,
instrumentToolDefinition,
runPi,
sandboxScopedSkills,
type SessionFactory,
sliceTestTask,
toolLabel,
Expand Down Expand Up @@ -185,7 +188,7 @@ describe('evaluate-done / verify-epic share the runner seam — failureKind is v
});
});

describe('verify-epic integration oracle (FE-876) — reachability folds into the epic verdict', () => {
describe('verify-epic reachability grounding (FE-876) — intent resolves before the epic verdict', () => {
const probeDirs: string[] = [];
afterEach(() => {
for (const dir of probeDirs.splice(0)) rmSync(dir, { recursive: true, force: true });
Expand Down Expand Up @@ -857,6 +860,25 @@ describe('runPi drives an in-process pi session (no subprocess)', () => {
expect(writes.join('')).not.toContain('SECRET_AGENT_OUTPUT');
});

it('caps activity heartbeat snippets including the ellipsis', async () => {
process.env.ANTHROPIC_API_KEY ??= 'test-key-unused-fake-session';
const sandboxDir = mkdtempSync(join(tmpdir(), 'brunch-runpi-'));
const events: CookEvent[] = [];
createPiActions({ emit: (e) => events.push(e) });
try {
const fake = makeFakeSession({ emit: 'x'.repeat(2_048) });
const createSession = (async () => ({ session: fake.session })) as unknown as SessionFactory;
await runPi(baseOpts(sandboxDir, 'read'), { createSession });
} finally {
rmSync(sandboxDir, { recursive: true, force: true });
}
const progress = events.find(
(e): e is Extract<CookEvent, { kind: 'activity-progress' }> => e.kind === 'activity-progress',
);
expect(progress?.detail).toHaveLength(56);
expect(progress?.detail.startsWith('…')).toBe(true);
});

it('aborts the session and rejects when the prompt exceeds the timeout', async () => {
process.env.ANTHROPIC_API_KEY ??= 'test-key-unused-fake-session';
const sandboxDir = mkdtempSync(join(tmpdir(), 'brunch-runpi-'));
Expand Down Expand Up @@ -1321,3 +1343,64 @@ describe('evaluate-done failure carries a reason', () => {
expect(lastSlice(events)).toMatchObject({ status: 'failed', reason: 'infra error' });
});
});

describe('sandboxScopedSkills (FE-881) keeps only skills rooted under the sandbox', () => {
const skill = (filePath: string): Skill => ({
name: filePath,
description: '',
filePath,
baseDir: dirname(filePath),
sourceInfo: {} as Skill['sourceInfo'],
disableModelInvocation: false,
});

it('keeps repo skills, drops sibling-slice / prefix-lookalike / global skills', () => {
const sandbox = '/tmp/run/worktree/slice-a';
const kept = sandboxScopedSkills(
[
skill('/tmp/run/worktree/slice-a/.agents/skills/foo/SKILL.md'),
skill('/tmp/run/worktree/slice-a/.claude/skills/bar/SKILL.md'),
skill('/tmp/run/worktree/slice-b/.agents/skills/sibling/SKILL.md'),
skill('/tmp/run/worktree/slice-a-other/.agents/skills/lookalike/SKILL.md'),
skill('/home/dev/.pi/skills/global/SKILL.md'),
],
sandbox,
);
expect(kept.map((s) => s.name)).toEqual([
'/tmp/run/worktree/slice-a/.agents/skills/foo/SKILL.md',
'/tmp/run/worktree/slice-a/.claude/skills/bar/SKILL.md',
]);
});
});

describe('cookResourceLoader (FE-881) loads sandbox skills, excludes global', () => {
const dirs: string[] = [];
afterEach(() => {
for (const d of dirs) rmSync(d, { recursive: true, force: true });
dirs.length = 0;
});

const writeSkill = (root: string, name: string) => {
mkdirSync(join(root, name), { recursive: true });
writeFileSync(
join(root, name, 'SKILL.md'),
`---\nname: ${name}\ndescription: ${name} skill\n---\nbody\n`,
);
};

it('discovers the repo .agents/skills and drops agentDir (global) skills', async () => {
const sandbox = mkdtempSync(join(tmpdir(), 'cook-sandbox-'));
dirs.push(sandbox);
const agentDir = mkdtempSync(join(tmpdir(), 'cook-agent-'));
dirs.push(agentDir);
writeSkill(join(sandbox, '.agents', 'skills'), 'repo-skill');
writeSkill(join(agentDir, 'skills'), 'global-skill');

const loader = cookResourceLoader(sandbox, agentDir, 'system prompt');
await loader.reload();
const names = loader.getSkills().skills.map((s) => s.name);

expect(names).toContain('repo-skill');
expect(names).not.toContain('global-skill');
});
});
84 changes: 67 additions & 17 deletions src/orchestrator/src/pi-actions.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { mkdtempSync, readFileSync, rmSync } from 'node:fs';
import { existsSync, mkdtempSync, readFileSync, realpathSync, rmSync } from 'node:fs';
import { tmpdir } from 'node:os';
import { dirname, join } from 'node:path';
import { dirname, join, resolve, sep } from 'node:path';
import { fileURLToPath } from 'node:url';

import {
Expand All @@ -18,6 +18,7 @@ import {
ModelRegistry,
SessionManager,
SettingsManager,
type Skill,
type ToolDefinition,
} from '@earendil-works/pi-coding-agent';

Expand Down Expand Up @@ -70,7 +71,7 @@ function latestLine(text: string): string {
const lines = text.split('\n');
for (let i = lines.length - 1; i >= 0; i--) {
const line = lines[i]!.trim();
if (line) return line.length > HEARTBEAT_MAX ? `…${line.slice(-HEARTBEAT_MAX)}` : line;
if (line) return line.length > HEARTBEAT_MAX ? `…${line.slice(-(HEARTBEAT_MAX - 1))}` : line;
}
return '';
}
Expand Down Expand Up @@ -223,9 +224,63 @@ function piTimeoutError(timeoutMs: number): Error {
return new Error(`pi timed out after ${timeoutMs / 1000}s`);
}

// Map one action's inputs to SDK session config — tools/model/system-prompt, no
// context/skills, in-memory session. Auth from brunch's own ANTHROPIC_API_KEY, not
// the user's ~/.pi credentials, which is what keeps a fresh checkout self-contained.
/**
* Keep only skills rooted under `sandboxDir` (the cook worktree). Drops the
* developer's machine-global pi skills and any sibling-slice / look-alike paths,
* so a brownfield cook builds on the target repo's own skills without leaking the
* host's pi config — the "self-contained checkout" guarantee, narrowed from
* "no skills" to "no skills from outside the repo" (FE-881).
*/
export function sandboxScopedSkills(skills: Skill[], sandboxDir: string): Skill[] {
const root = resolve(sandboxDir);
const prefix = root + sep;
return skills.filter((s) => {
const p = resolve(s.filePath);
return p === root || p.startsWith(prefix);
});
Comment thread
cursor[bot] marked this conversation as resolved.
}

/**
* Resource loader for a cook agent session. The agent still runs on the task
* prompt (system-prompt override) with prompts and AGENTS files suppressed, but
* it now sees the target repo's own skills: pi's default discovery scans
* `<cwd>/<config>/skills` + `<agentDir>/skills` rather than the Agent-Skills
* convention dirs, so we point it at the repo's `.agents/skills` / `.claude/skills`
* (deduped by realpath since brunch-style repos symlink the two) and filter the
* result to paths under the sandbox. Greenfield worktrees have no such dir, so
* this resolves empty and leaves greenfield behavior unchanged.
*/
export function cookResourceLoader(
sandboxDir: string,
agentDir: string,
systemPrompt: string,
): DefaultResourceLoader {
const skillDirs = [
...new Set(
[join(sandboxDir, '.agents', 'skills'), join(sandboxDir, '.claude', 'skills')]
.filter((d) => existsSync(d))
.map((d) => realpathSync(d)),
),
];
return new DefaultResourceLoader({
cwd: sandboxDir,
agentDir,
systemPromptOverride: () => systemPrompt,
appendSystemPromptOverride: () => [],
additionalSkillPaths: skillDirs,
agentsFilesOverride: () => ({ agentsFiles: [] }),
skillsOverride: (base) => ({
skills: sandboxScopedSkills(base.skills, sandboxDir),
diagnostics: base.diagnostics,
}),
promptsOverride: () => ({ prompts: [], diagnostics: [] }),
});
}

// Map one action's inputs to SDK session config — tools/model/system-prompt +
// the target repo's sandbox-scoped skills (see cookResourceLoader), in-memory
// session. Auth from brunch's own ANTHROPIC_API_KEY, not the user's ~/.pi
// credentials, which keeps a fresh checkout self-contained.
async function buildSessionOptions(
opts: RunPiOpts,
isolatedDir: string,
Expand All @@ -247,15 +302,7 @@ async function buildSessionOptions(
}

const systemPrompt = readFileSync(opts.promptFile, 'utf8');
const resourceLoader = new DefaultResourceLoader({
cwd: opts.sandboxDir,
agentDir: isolatedDir,
systemPromptOverride: () => systemPrompt,
appendSystemPromptOverride: () => [],
agentsFilesOverride: () => ({ agentsFiles: [] }),
skillsOverride: () => ({ skills: [], diagnostics: [] }),
promptsOverride: () => ({ prompts: [], diagnostics: [] }),
});
const resourceLoader = cookResourceLoader(opts.sandboxDir, isolatedDir, systemPrompt);
await resourceLoader.reload();

// Supply the built-in tools ourselves (instrumented), instead of the `tools`
Expand Down Expand Up @@ -311,9 +358,10 @@ async function runPi(
_emit({ kind: 'activity-start', id: activityId, label: opts.label });
let heartbeatKb = 0;

const isolatedDir = createAgentDir();
let isolatedDir: string | undefined;
let cleanedAgentDir = false;
const cleanupAgentDir = (): void => {
if (!isolatedDir) return;
if (cleanedAgentDir) return;
cleanedAgentDir = true;
removeAgentDir(isolatedDir);
Expand Down Expand Up @@ -366,9 +414,11 @@ async function runPi(
});

try {
isolatedDir = createAgentDir();
const agentDir = isolatedDir;
const setup = (async () => {
const created = await createSession(
await buildSessionOptions(opts, isolatedDir, { onStart: onToolStart, onSettle: onToolSettle }),
await buildSessionOptions(opts, agentDir, { onStart: onToolStart, onSettle: onToolSettle }),
);
if (timedOut) {
created.session.dispose();
Expand Down
6 changes: 6 additions & 0 deletions src/orchestrator/src/presenter/phase.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,12 @@ describe('nextPhase', () => {
expect(nextPhase('cook', { kind: 'cook-done', ok: false })).toBe('cook');
});

it('does not light plate for a no-promotion failure message', () => {
expect(nextPhase('cook', { kind: 'line', text: ' ! run did not complete — nothing promoted.' })).toBe(
'cook',
);
});

it('never regresses to an earlier phase', () => {
expect(nextPhase('serve', { kind: 'cook-start', runStart: 0 })).toBe('serve');
expect(nextPhase('taste', { kind: 'action', icon: '▸', message: 'tests slice-2' })).toBe('taste');
Expand Down
2 changes: 1 addition & 1 deletion src/orchestrator/src/presenter/phase.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ function phaseFor(event: CookEvent, ctx?: PhaseContext): BrigadePhase | undefine
return ctx.epics.every((e) => ctx.verdictedEpics?.has(e)) ? 'taste' : undefined;
}
case 'line':
return event.text.includes('promoted') ? 'plate' : undefined;
return /^\s*✓\s+promoted\b/.test(event.text) ? 'plate' : undefined;
case 'cook-done':
// ship→serve: the run completed (emitted after promotion). A halted run
// does not ship, so it never lights serve.
Expand Down
Loading