Skip to content
Draft
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
140 changes: 140 additions & 0 deletions packages/warden/src/cli/files.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import {
createSyntheticFileChange,
expandFileGlobs,
expandAndCreateFileChanges,
getEffectivePrunePatterns,
} from './files.js';

function initGitRepo(dir: string): void {
Expand Down Expand Up @@ -108,6 +109,45 @@ describe('createSyntheticFileChange', () => {
});
});

describe('getEffectivePrunePatterns', () => {
it('returns all built-in prune patterns when no user overrides', () => {
const patterns = getEffectivePrunePatterns();
expect(patterns).toContain('**/vendor/**');
expect(patterns).toContain('**/node_modules/**');
expect(patterns).toContain('**/dist/**');
});

it('returns all built-in prune patterns when user paths have no negations', () => {
const patterns = getEffectivePrunePatterns(['*.log', 'tmp/']);
expect(patterns).toContain('**/vendor/**');
expect(patterns).toContain('**/node_modules/**');
});

it('removes vendor prune when user has a !vendor negation', () => {
const patterns = getEffectivePrunePatterns(['!vendor/**']);
expect(patterns).not.toContain('**/vendor/**');
// other prune patterns are unaffected
expect(patterns).toContain('**/node_modules/**');
});

it('removes node_modules prune when user has a !node_modules negation', () => {
const patterns = getEffectivePrunePatterns(['!node_modules/**']);
expect(patterns).not.toContain('**/node_modules/**');
expect(patterns).toContain('**/vendor/**');
});

it('handles negation with path separator prefix', () => {
const patterns = getEffectivePrunePatterns(['!src/vendor/special/**']);
expect(patterns).not.toContain('**/vendor/**');
});

it('handles undefined user paths gracefully', () => {
expect(() => getEffectivePrunePatterns(undefined)).not.toThrow();
const patterns = getEffectivePrunePatterns(undefined);
expect(patterns).toContain('**/vendor/**');
});
});

describe('expandFileGlobs', () => {
let tempDir: string;

Expand Down Expand Up @@ -193,6 +233,61 @@ describe('expandFileGlobs', () => {
expect(files).toHaveLength(0);
});

describe('built-in directory pruning', () => {
it('prunes vendor/ directory by default without gitignore', async () => {
// Simulate a new laravel-style app: app code + vendor/ with PHP files
mkdirSync(join(tempDir, 'app'), { recursive: true });
mkdirSync(join(tempDir, 'vendor', 'laravel', 'framework'), { recursive: true });
writeFileSync(join(tempDir, 'app', 'Controller.php'), '<?php class Controller {}');
writeFileSync(join(tempDir, 'vendor', 'laravel', 'framework', 'Framework.php'), '<?php');

const files = await expandFileGlobs(['**/*.php'], tempDir);

expect(files.some(f => f.includes('app/Controller.php'))).toBe(true);
expect(files.some(f => f.includes('vendor/'))).toBe(false);
});

it('prunes node_modules/ directory by default', async () => {
mkdirSync(join(tempDir, 'src'), { recursive: true });
mkdirSync(join(tempDir, 'node_modules', 'pkg'), { recursive: true });
writeFileSync(join(tempDir, 'src', 'index.ts'), 'export {}');
writeFileSync(join(tempDir, 'node_modules', 'pkg', 'index.ts'), 'module');

const files = await expandFileGlobs(['**/*.ts'], tempDir);

expect(files.some(f => f.includes('src/index.ts'))).toBe(true);
expect(files.some(f => f.includes('node_modules/'))).toBe(false);
});

it('prunes vendor/ even when not in a git repo (no gitignore fallback needed)', async () => {
// No git init — this tests that the fast-glob level prune works independently
mkdirSync(join(tempDir, 'app'), { recursive: true });
mkdirSync(join(tempDir, 'vendor', 'lib'), { recursive: true });
writeFileSync(join(tempDir, 'app', 'main.php'), '<?php');
writeFileSync(join(tempDir, 'vendor', 'lib', 'dep.php'), '<?php');

const files = await expandFileGlobs(['**/*.php'], tempDir);

expect(files.some(f => f.includes('app/main.php'))).toBe(true);
expect(files.some(f => f.includes('vendor/'))).toBe(false);
});

it('re-includes vendor/ when user ignore has a !vendor negation', async () => {
mkdirSync(join(tempDir, 'app'), { recursive: true });
mkdirSync(join(tempDir, 'vendor', 'lib'), { recursive: true });
writeFileSync(join(tempDir, 'app', 'main.php'), '<?php class App {}');
writeFileSync(join(tempDir, 'vendor', 'lib', 'dep.php'), '<?php class Dep {}');

const files = await expandFileGlobs(['**/*.php'], {
cwd: tempDir,
ignore: { paths: ['!vendor/**'] },
});

expect(files.some(f => f.includes('app/main.php'))).toBe(true);
expect(files.some(f => f.includes('vendor/lib/dep.php'))).toBe(true);
});
});

describe('gitignore support', () => {
it('excludes files matching .gitignore patterns by default', async () => {
initGitRepo(tempDir);
Expand Down Expand Up @@ -356,4 +451,49 @@ describe('expandAndCreateFileChanges', () => {
expect(file2).toBeDefined();
expect(file2?.additions).toBe(2);
});

it('passes ignore config through so user negations can re-include pruned dirs', async () => {
mkdirSync(join(tempDir, 'app'), { recursive: true });
mkdirSync(join(tempDir, 'vendor', 'lib'), { recursive: true });
writeFileSync(join(tempDir, 'app', 'main.php'), '<?php class App {}');
writeFileSync(join(tempDir, 'vendor', 'lib', 'dep.php'), '<?php class Dep {}');

// Without negation: vendor is pruned
const withoutOverride = await expandAndCreateFileChanges(['**/*.php'], tempDir);
expect(withoutOverride.some(f => f.filename.includes('vendor/'))).toBe(false);

// With negation: vendor is re-included at traversal time
const withOverride = await expandAndCreateFileChanges(['**/*.php'], tempDir, {
ignore: { paths: ['!vendor/**'] },
});
expect(withOverride.some(f => f.filename.includes('vendor/'))).toBe(true);
});
});

describe('WardenGlobExpansionError / MAX_GLOB_FILE_RESULTS guardrail', () => {
it('throws WardenGlobExpansionError when glob matches too many files', async () => {
const { WardenGlobExpansionError, MAX_GLOB_FILE_RESULTS } = await import('./files.js');
const { writeFileSync, mkdirSync, rmSync } = await import('node:fs');
const { join } = await import('node:path');
const { tmpdir } = await import('node:os');

const tempDir = join(tmpdir(), `warden-guardrail-test-${Date.now()}`);
mkdirSync(tempDir, { recursive: true });

try {
// Write MAX_GLOB_FILE_RESULTS + 1 files to trigger the guardrail
const count = MAX_GLOB_FILE_RESULTS + 1;
for (let i = 0; i < count; i++) {
writeFileSync(join(tempDir, `file${i}.ts`), `// file ${i}`);
}

await expect(expandFileGlobs(['**/*.ts'], tempDir))
.rejects.toThrow(WardenGlobExpansionError);

await expect(expandFileGlobs(['**/*.ts'], tempDir))
.rejects.toThrow(/Glob pattern matched/);
} finally {
rmSync(tempDir, { recursive: true, force: true });
}
});
});
116 changes: 109 additions & 7 deletions packages/warden/src/cli/files.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,93 @@
import { execGitNonInteractive } from '../utils/exec.js';
import { isRepoRelativePath, normalizePath } from '../utils/path.js';

/**
* Directory patterns that are safe to prune at traversal time — before fast-glob
* returns results. These are the same large dependency / generated-output
* directories that BUILTIN_IGNORE_PATTERNS in scan-policy blocks after the fact.
* Pruning them early prevents fast-glob from traversing tens-of-thousands of
* files inside a vendor/ or node_modules/ tree when a broad glob like
* `dieter/**\/*.php` is used against a new Laravel app.
*
* Exported so the gitignore fallback scan can reuse the list consistently.
*/
/**
* Hard upper bound on the number of files fast-glob may return from a single
* expandFileGlobs call. Exceeding this almost always means a dependency tree
* (vendor/, node_modules/, …) escaped the prune list — likely because the user
* negated the prune pattern in their config. Fail fast with an actionable
* message rather than silently burning memory.
*/
export const MAX_GLOB_FILE_RESULTS = 10_000;

/**
* Thrown by expandFileGlobs when the glob expansion returns more than
* MAX_GLOB_FILE_RESULTS candidates.
*/
export class WardenGlobExpansionError extends Error {
constructor(count: number, limit: number) {
super(
`Glob pattern matched ${count.toLocaleString()} files (limit is ${limit.toLocaleString()}).\n` +
`This usually means a dependency directory (vendor/, node_modules/, …) is being scanned.\n` +
`\nTry one of:\n` +
` • Quote the pattern to avoid shell expansion: warden 'dieter/**/*.php'\n` +
` • Narrow to your application code: warden dieter/app/**/*.php\n` +
` • Keep dependency dirs explicitly excluded in warden.toml:\n` +
` [defaults.ignore]\n` +
` paths = ["**/vendor/**"]`,
);
this.name = 'WardenGlobExpansionError';
}
}

export const BUILTIN_PRUNE_DIRECTORY_PATTERNS = [
'**/node_modules/**',
'**/vendor/**',
'**/dist/**',
'**/build/**',
'**/.next/**',
'**/.nuxt/**',
'**/out/**',
'**/coverage/**',
'**/.cache/**',
] as const;

/**
* Compute the fast-glob ignore list, starting from BUILTIN_PRUNE_DIRECTORY_PATTERNS
* and removing any directory whose name is explicitly un-ignored by a user
* negation pattern (e.g. `!vendor/**`). This lets advanced users opt a
* dependency directory back in without breaking the default safety behaviour.
*/
export function getEffectivePrunePatterns(userIgnorePaths?: string[]): string[] {
const negations = (userIgnorePaths ?? [])
.filter((p) => p.startsWith('!'))
.map((p) => p.slice(1));

if (!negations.length) {
return [...BUILTIN_PRUNE_DIRECTORY_PATTERNS];
}

return BUILTIN_PRUNE_DIRECTORY_PATTERNS.filter((prunePattern) => {
// Extract the bare directory name from a pattern like '**/vendor/**'
const match = prunePattern.match(/\*\*\/([^/]+)\/\*\*/);
if (!match) return true;
const dirName = match[1];
// Drop this prune entry if any negation path mentions the directory
return !negations.some((neg) => neg.includes(`${dirName}/`) || neg.includes(`/${dirName}`));
});
Comment thread
cursor[bot] marked this conversation as resolved.
Outdated
}

export interface ExpandGlobOptions {
/** Working directory for glob expansion (default: process.cwd()) */
cwd?: string;
/** Respect .gitignore files (default: true) */
gitignore?: boolean;
/**
* User-configured ignore rules from warden config. Negation patterns inside
* `paths` (e.g. `!vendor/**`) override the built-in directory prune list so
* that users who intentionally want to scan dependency trees can do so.
*/
ignore?: IgnoreConfig;
}

export interface SyntheticFileChangeOptions {
Expand Down Expand Up @@ -121,13 +203,14 @@
: [];
} catch {
// Not a real git repo or git not available. Walk directories manually,
// skipping common large directories that would never contain relevant
// .gitignore files.
// skipping large directories that would never contain relevant .gitignore
// files. Reuse the same prune list used by expandFileGlobs() so behaviour
// is consistent across both code paths.
gitignoreFiles = fg.sync('**/.gitignore', {
cwd: gitRoot,
absolute: true,
dot: true,
ignore: ['**/.git/**', '**/node_modules/**'],
ignore: ['**/.git/**', ...BUILTIN_PRUNE_DIRECTORY_PATTERNS],
});
}

Expand Down Expand Up @@ -163,6 +246,12 @@
* By default, respects .gitignore files to automatically exclude ignored
* directories like node_modules/. This can be disabled by setting
* gitignore: false.
*
* Large dependency and generated-output directories (vendor/, node_modules/,
* dist/, …) are also pruned at traversal time via BUILTIN_PRUNE_DIRECTORY_PATTERNS
* so that broad globs like `dieter/**\/*.php` against a Laravel app do not
* cause fast-glob to enumerate tens-of-thousands of files before the
* post-enumeration scan policy has a chance to skip them.
*/
export async function expandFileGlobs(
patterns: string[],
Expand All @@ -175,16 +264,27 @@
const useGitignore = options.gitignore ?? true;
const expandedPatterns = patterns.map((pattern) => expandDirectoryPattern(pattern, cwd));

// Get all matching files first
// Compute directory prune list, honouring user negation overrides.
const prunePatterns = getEffectivePrunePatterns(options.ignore?.paths);

// Enumerate matching files. Built-in directory prune patterns are applied at
// this stage so fast-glob never descends into vendor/ or node_modules/ trees,
// preventing excessive memory use when scanning broad globs. Gitignore-based
// filtering and the full BUILTIN_IGNORE_PATTERNS check happen afterward.
const files = await fg(expandedPatterns, {
cwd,
onlyFiles: true,
absolute: true,
dot: false,
// Always exclude .git directory
ignore: ['**/.git/**'],
Comment thread
cursor[bot] marked this conversation as resolved.
ignore: ['**/.git/**', ...prunePatterns],
});

// Guard against pathological expansion — e.g. user negated all prune patterns
// while pointing at a directory with tens-of-thousands of files.
if (files.length >= MAX_GLOB_FILE_RESULTS) {
throw new WardenGlobExpansionError(files.length, MAX_GLOB_FILE_RESULTS);
}

Check warning on line 287 in packages/warden/src/cli/files.ts

View check run for this annotation

@sentry/warden / warden: code-review

`MAX_GLOB_FILE_RESULTS` guard counts pre-gitignore results, throwing false errors on repos with large gitignored dirs outside the prune list

The `files.length >= MAX_GLOB_FILE_RESULTS` check runs right after `fg()`, before gitignore rules are applied. A repo with a gitignored directory that is not in `BUILTIN_PRUNE_DIRECTORY_PATTERNS` (e.g. Python `.venv/`, `target/`, `__pycache__/`) containing 10,000+ matching files trips the guard and throws `WardenGlobExpansionError`, causing warden to exit 1 even though gitignore filtering would have reduced the result to the real source files. Move the guard after the gitignore filtering step (or after the `!useGitignore`/no-git-root returns) so it bounds the final result set.
// If gitignore is disabled, return files as-is
if (!useGitignore) {
return files.sort();
Expand Down Expand Up @@ -298,6 +398,8 @@
options: SyntheticFileChangeOptions = {}
): Promise<FileChange[]> {
const resolvedCwd = resolve(cwd);
const files = await expandFileGlobs(patterns, resolvedCwd);
Comment thread
cursor[bot] marked this conversation as resolved.
// Pass the ignore config so that user negation patterns can override built-in
// prune directories at traversal time (e.g. `!vendor/**` re-includes vendor).
const files = await expandFileGlobs(patterns, { cwd: resolvedCwd, ignore: options.ignore });
return createSyntheticFileChanges(files, resolvedCwd, options);
}
23 changes: 17 additions & 6 deletions packages/warden/src/cli/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import { isRepoRelativePath, normalizePath, resolveConfigInput } from '../utils/
import { parseCliArgs, showVersion, classifyTargets, expandTargetFileReferences, type CLIOptions } from './args.js';
import { showHelp } from './help.js';
import { buildLocalEventContext, buildFileEventContext } from './context.js';
import { WardenGlobExpansionError } from './files.js';
import { getRepoRoot, getHeadSha, refExists, getDefaultBranch } from './git.js';
import { renderTerminalReport, filterReports } from './terminal.js';
import {
Expand Down Expand Up @@ -1271,12 +1272,22 @@ async function runFileMode(filePatterns: string[], options: CLIOptions, reporter

// Build context from files
reporter.step('Building context from files...');
const context = await buildFileEventContext({
patterns: filePatterns,
cwd,
ignore: config?.defaults?.ignore,
scan: config?.defaults?.scan,
});
let context: Awaited<ReturnType<typeof buildFileEventContext>>;
try {
context = await buildFileEventContext({
patterns: filePatterns,
cwd,
ignore: config?.defaults?.ignore,
scan: config?.defaults?.scan,
});
} catch (error) {
if (error instanceof WardenGlobExpansionError) {
reporter.error(error.message);
return 1;
}
reporter.error('Failed to build context');
return 1;

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Context build hides error text

Low Severity

The new runFileMode try/catch reports a helpful message for WardenGlobExpansionError, but any other error from buildFileEventContext is reported only as Failed to build context, dropping the underlying Error message that would explain I/O or config failures.

Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit 8c4d39e. Configure here.

}

const pullRequest = context.pullRequest;
if (!pullRequest) {
Expand Down
Loading