From bddfaed0ed87b5d54697a7f871ebccfabe7b5ad2 Mon Sep 17 00:00:00 2001 From: immutable dcramer Date: Tue, 16 Jun 2026 23:07:39 +0000 Subject: [PATCH 1/4] fix(files): prune dependency dirs in expandFileGlobs before fast-glob traversal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BUILTIN_IGNORE_PATTERNS already skips vendor/, node_modules/, dist/ etc. after enumeration (in createSyntheticFileChange via getPrePatchFileSkip), but fast-glob was still traversing those trees before the skip could apply. For a new Laravel app the vendor/ tree can contain 10,000–50,000 PHP files. Running `warden dieter/**/*.php` caused fast-glob to enumerate that entire tree, creating extreme memory pressure that likely triggered the reported segfault/crash. Fix: introduce BUILTIN_PRUNE_DIRECTORY_PATTERNS and getEffectivePrunePatterns() so that directory-level ignores are applied to the fast-glob ignore list at traversal time. User negation patterns (e.g. `!vendor/**` in warden config) are respected and remove the corresponding prune entry, allowing advanced users to re-include a dependency directory when needed. Also updates the gitignore-fallback directory scan to use the same prune list (previously only skipped node_modules/, now skips all built-in prune dirs) so behaviour is consistent across both code paths. expandAndCreateFileChanges now threads the ignore config through to expandFileGlobs so user negation overrides reach the traversal layer. Co-Authored-By: sentry-junior[bot] <264270552+sentry-junior[bot]@users.noreply.github.com> --- packages/warden/src/cli/files.test.ts | 112 ++++++++++++++++++++++++++ packages/warden/src/cli/files.ts | 81 +++++++++++++++++-- 2 files changed, 186 insertions(+), 7 deletions(-) diff --git a/packages/warden/src/cli/files.test.ts b/packages/warden/src/cli/files.test.ts index ca87314b..1c1ea674 100644 --- a/packages/warden/src/cli/files.test.ts +++ b/packages/warden/src/cli/files.test.ts @@ -8,6 +8,7 @@ import { createSyntheticFileChange, expandFileGlobs, expandAndCreateFileChanges, + getEffectivePrunePatterns, } from './files.js'; function initGitRepo(dir: string): void { @@ -108,6 +109,45 @@ describe('createSyntheticFileChange', () => { }); }); +describe('getEffectivePrunePatterns', () => { + it('returns all built-in prune patterns when no user overrides', () => { + const patterns = getEffectivePrunePatterns(); + expect(patterns).toContain('**/vendor/**'); + expect(patterns).toContain('**/node_modules/**'); + expect(patterns).toContain('**/dist/**'); + }); + + it('returns all built-in prune patterns when user paths have no negations', () => { + const patterns = getEffectivePrunePatterns(['*.log', 'tmp/']); + expect(patterns).toContain('**/vendor/**'); + expect(patterns).toContain('**/node_modules/**'); + }); + + it('removes vendor prune when user has a !vendor negation', () => { + const patterns = getEffectivePrunePatterns(['!vendor/**']); + expect(patterns).not.toContain('**/vendor/**'); + // other prune patterns are unaffected + expect(patterns).toContain('**/node_modules/**'); + }); + + it('removes node_modules prune when user has a !node_modules negation', () => { + const patterns = getEffectivePrunePatterns(['!node_modules/**']); + expect(patterns).not.toContain('**/node_modules/**'); + expect(patterns).toContain('**/vendor/**'); + }); + + it('handles negation with path separator prefix', () => { + const patterns = getEffectivePrunePatterns(['!src/vendor/special/**']); + expect(patterns).not.toContain('**/vendor/**'); + }); + + it('handles undefined user paths gracefully', () => { + expect(() => getEffectivePrunePatterns(undefined)).not.toThrow(); + const patterns = getEffectivePrunePatterns(undefined); + expect(patterns).toContain('**/vendor/**'); + }); +}); + describe('expandFileGlobs', () => { let tempDir: string; @@ -193,6 +233,61 @@ describe('expandFileGlobs', () => { expect(files).toHaveLength(0); }); + describe('built-in directory pruning', () => { + it('prunes vendor/ directory by default without gitignore', async () => { + // Simulate a new laravel-style app: app code + vendor/ with PHP files + mkdirSync(join(tempDir, 'app'), { recursive: true }); + mkdirSync(join(tempDir, 'vendor', 'laravel', 'framework'), { recursive: true }); + writeFileSync(join(tempDir, 'app', 'Controller.php'), ' f.includes('app/Controller.php'))).toBe(true); + expect(files.some(f => f.includes('vendor/'))).toBe(false); + }); + + it('prunes node_modules/ directory by default', async () => { + mkdirSync(join(tempDir, 'src'), { recursive: true }); + mkdirSync(join(tempDir, 'node_modules', 'pkg'), { recursive: true }); + writeFileSync(join(tempDir, 'src', 'index.ts'), 'export {}'); + writeFileSync(join(tempDir, 'node_modules', 'pkg', 'index.ts'), 'module'); + + const files = await expandFileGlobs(['**/*.ts'], tempDir); + + expect(files.some(f => f.includes('src/index.ts'))).toBe(true); + expect(files.some(f => f.includes('node_modules/'))).toBe(false); + }); + + it('prunes vendor/ even when not in a git repo (no gitignore fallback needed)', async () => { + // No git init — this tests that the fast-glob level prune works independently + mkdirSync(join(tempDir, 'app'), { recursive: true }); + mkdirSync(join(tempDir, 'vendor', 'lib'), { recursive: true }); + writeFileSync(join(tempDir, 'app', 'main.php'), ' f.includes('app/main.php'))).toBe(true); + expect(files.some(f => f.includes('vendor/'))).toBe(false); + }); + + it('re-includes vendor/ when user ignore has a !vendor negation', async () => { + mkdirSync(join(tempDir, 'app'), { recursive: true }); + mkdirSync(join(tempDir, 'vendor', 'lib'), { recursive: true }); + writeFileSync(join(tempDir, 'app', 'main.php'), ' f.includes('app/main.php'))).toBe(true); + expect(files.some(f => f.includes('vendor/lib/dep.php'))).toBe(true); + }); + }); + describe('gitignore support', () => { it('excludes files matching .gitignore patterns by default', async () => { initGitRepo(tempDir); @@ -356,4 +451,21 @@ describe('expandAndCreateFileChanges', () => { expect(file2).toBeDefined(); expect(file2?.additions).toBe(2); }); + + it('passes ignore config through so user negations can re-include pruned dirs', async () => { + mkdirSync(join(tempDir, 'app'), { recursive: true }); + mkdirSync(join(tempDir, 'vendor', 'lib'), { recursive: true }); + writeFileSync(join(tempDir, 'app', 'main.php'), ' f.filename.includes('vendor/'))).toBe(false); + + // With negation: vendor is re-included at traversal time + const withOverride = await expandAndCreateFileChanges(['**/*.php'], tempDir, { + ignore: { paths: ['!vendor/**'] }, + }); + expect(withOverride.some(f => f.filename.includes('vendor/'))).toBe(true); + }); }); diff --git a/packages/warden/src/cli/files.ts b/packages/warden/src/cli/files.ts index 9fc39558..4c378363 100644 --- a/packages/warden/src/cli/files.ts +++ b/packages/warden/src/cli/files.ts @@ -9,11 +9,64 @@ import { getPrePatchFileSkip } from '../sdk/scan-policy.js'; import { execGitNonInteractive } from '../utils/exec.js'; import { isRepoRelativePath, normalizePath } from '../utils/path.js'; +/** + * Directory patterns that are safe to prune at traversal time — before fast-glob + * returns results. These are the same large dependency / generated-output + * directories that BUILTIN_IGNORE_PATTERNS in scan-policy blocks after the fact. + * Pruning them early prevents fast-glob from traversing tens-of-thousands of + * files inside a vendor/ or node_modules/ tree when a broad glob like + * `dieter/**\/*.php` is used against a new Laravel app. + * + * Exported so the gitignore fallback scan can reuse the list consistently. + */ +export const BUILTIN_PRUNE_DIRECTORY_PATTERNS = [ + '**/node_modules/**', + '**/vendor/**', + '**/dist/**', + '**/build/**', + '**/.next/**', + '**/.nuxt/**', + '**/out/**', + '**/coverage/**', + '**/.cache/**', +] as const; + +/** + * Compute the fast-glob ignore list, starting from BUILTIN_PRUNE_DIRECTORY_PATTERNS + * and removing any directory whose name is explicitly un-ignored by a user + * negation pattern (e.g. `!vendor/**`). This lets advanced users opt a + * dependency directory back in without breaking the default safety behaviour. + */ +export function getEffectivePrunePatterns(userIgnorePaths?: string[]): string[] { + const negations = (userIgnorePaths ?? []) + .filter((p) => p.startsWith('!')) + .map((p) => p.slice(1)); + + if (!negations.length) { + return [...BUILTIN_PRUNE_DIRECTORY_PATTERNS]; + } + + return BUILTIN_PRUNE_DIRECTORY_PATTERNS.filter((prunePattern) => { + // Extract the bare directory name from a pattern like '**/vendor/**' + const match = prunePattern.match(/\*\*\/([^/]+)\/\*\*/); + if (!match) return true; + const dirName = match[1]; + // Drop this prune entry if any negation path mentions the directory + return !negations.some((neg) => neg.includes(`${dirName}/`) || neg.includes(`/${dirName}`)); + }); +} + export interface ExpandGlobOptions { /** Working directory for glob expansion (default: process.cwd()) */ cwd?: string; /** Respect .gitignore files (default: true) */ gitignore?: boolean; + /** + * User-configured ignore rules from warden config. Negation patterns inside + * `paths` (e.g. `!vendor/**`) override the built-in directory prune list so + * that users who intentionally want to scan dependency trees can do so. + */ + ignore?: IgnoreConfig; } export interface SyntheticFileChangeOptions { @@ -121,13 +174,14 @@ function loadGitignoreRules(gitRoot: string): Ignore { : []; } catch { // Not a real git repo or git not available. Walk directories manually, - // skipping common large directories that would never contain relevant - // .gitignore files. + // skipping large directories that would never contain relevant .gitignore + // files. Reuse the same prune list used by expandFileGlobs() so behaviour + // is consistent across both code paths. gitignoreFiles = fg.sync('**/.gitignore', { cwd: gitRoot, absolute: true, dot: true, - ignore: ['**/.git/**', '**/node_modules/**'], + ignore: ['**/.git/**', ...BUILTIN_PRUNE_DIRECTORY_PATTERNS], }); } @@ -163,6 +217,12 @@ function loadGitignoreRules(gitRoot: string): Ignore { * By default, respects .gitignore files to automatically exclude ignored * directories like node_modules/. This can be disabled by setting * gitignore: false. + * + * Large dependency and generated-output directories (vendor/, node_modules/, + * dist/, …) are also pruned at traversal time via BUILTIN_PRUNE_DIRECTORY_PATTERNS + * so that broad globs like `dieter/**\/*.php` against a Laravel app do not + * cause fast-glob to enumerate tens-of-thousands of files before the + * post-enumeration scan policy has a chance to skip them. */ export async function expandFileGlobs( patterns: string[], @@ -175,14 +235,19 @@ export async function expandFileGlobs( const useGitignore = options.gitignore ?? true; const expandedPatterns = patterns.map((pattern) => expandDirectoryPattern(pattern, cwd)); - // Get all matching files first + // Compute directory prune list, honouring user negation overrides. + const prunePatterns = getEffectivePrunePatterns(options.ignore?.paths); + + // Enumerate matching files. Built-in directory prune patterns are applied at + // this stage so fast-glob never descends into vendor/ or node_modules/ trees, + // preventing excessive memory use when scanning broad globs. Gitignore-based + // filtering and the full BUILTIN_IGNORE_PATTERNS check happen afterward. const files = await fg(expandedPatterns, { cwd, onlyFiles: true, absolute: true, dot: false, - // Always exclude .git directory - ignore: ['**/.git/**'], + ignore: ['**/.git/**', ...prunePatterns], }); // If gitignore is disabled, return files as-is @@ -298,6 +363,8 @@ export async function expandAndCreateFileChanges( options: SyntheticFileChangeOptions = {} ): Promise { const resolvedCwd = resolve(cwd); - const files = await expandFileGlobs(patterns, resolvedCwd); + // Pass the ignore config so that user negation patterns can override built-in + // prune directories at traversal time (e.g. `!vendor/**` re-includes vendor). + const files = await expandFileGlobs(patterns, { cwd: resolvedCwd, ignore: options.ignore }); return createSyntheticFileChanges(files, resolvedCwd, options); } From 8c4d39e6389790114dfbc48688af4e19754f9ca5 Mon Sep 17 00:00:00 2001 From: immutable dcramer Date: Tue, 16 Jun 2026 23:12:03 +0000 Subject: [PATCH 2/4] fix(files): add MAX_GLOB_FILE_RESULTS guardrail and WardenGlobExpansionError MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the built-in directory prune list is partially overridden (e.g. user negates !vendor/** in warden config) and a broad glob is run against a tree with more than 10,000 files, warden now throws WardenGlobExpansionError immediately with an actionable error message rather than silently consuming memory until crash. runFileMode catches the error and surfaces it via reporter.error so it renders cleanly in both TTY and JSON output modes. Message example: Glob pattern matched 15,432 files (limit is 10,000). This usually means a dependency directory (vendor/, node_modules/, ...) is being scanned. Try one of: • Quote the pattern to avoid shell expansion: warden 'dieter/**/*.php' • Narrow to your application code: warden dieter/app/**/*.php • Keep dependency dirs explicitly excluded in warden.toml: [defaults.ignore] paths = ["**/vendor/**"] Co-Authored-By: sentry-junior[bot] <264270552+sentry-junior[bot]@users.noreply.github.com> --- packages/warden/src/cli/files.test.ts | 28 +++++++++++++++++++++ packages/warden/src/cli/files.ts | 35 +++++++++++++++++++++++++++ packages/warden/src/cli/main.ts | 23 +++++++++++++----- 3 files changed, 80 insertions(+), 6 deletions(-) diff --git a/packages/warden/src/cli/files.test.ts b/packages/warden/src/cli/files.test.ts index 1c1ea674..9edaf83f 100644 --- a/packages/warden/src/cli/files.test.ts +++ b/packages/warden/src/cli/files.test.ts @@ -469,3 +469,31 @@ describe('expandAndCreateFileChanges', () => { expect(withOverride.some(f => f.filename.includes('vendor/'))).toBe(true); }); }); + +describe('WardenGlobExpansionError / MAX_GLOB_FILE_RESULTS guardrail', () => { + it('throws WardenGlobExpansionError when glob matches too many files', async () => { + const { WardenGlobExpansionError, MAX_GLOB_FILE_RESULTS } = await import('./files.js'); + const { writeFileSync, mkdirSync, rmSync } = await import('node:fs'); + const { join } = await import('node:path'); + const { tmpdir } = await import('node:os'); + + const tempDir = join(tmpdir(), `warden-guardrail-test-${Date.now()}`); + mkdirSync(tempDir, { recursive: true }); + + try { + // Write MAX_GLOB_FILE_RESULTS + 1 files to trigger the guardrail + const count = MAX_GLOB_FILE_RESULTS + 1; + for (let i = 0; i < count; i++) { + writeFileSync(join(tempDir, `file${i}.ts`), `// file ${i}`); + } + + await expect(expandFileGlobs(['**/*.ts'], tempDir)) + .rejects.toThrow(WardenGlobExpansionError); + + await expect(expandFileGlobs(['**/*.ts'], tempDir)) + .rejects.toThrow(/Glob pattern matched/); + } finally { + rmSync(tempDir, { recursive: true, force: true }); + } + }); +}); diff --git a/packages/warden/src/cli/files.ts b/packages/warden/src/cli/files.ts index 4c378363..d71d88d5 100644 --- a/packages/warden/src/cli/files.ts +++ b/packages/warden/src/cli/files.ts @@ -19,6 +19,35 @@ import { isRepoRelativePath, normalizePath } from '../utils/path.js'; * * Exported so the gitignore fallback scan can reuse the list consistently. */ +/** + * Hard upper bound on the number of files fast-glob may return from a single + * expandFileGlobs call. Exceeding this almost always means a dependency tree + * (vendor/, node_modules/, …) escaped the prune list — likely because the user + * negated the prune pattern in their config. Fail fast with an actionable + * message rather than silently burning memory. + */ +export const MAX_GLOB_FILE_RESULTS = 10_000; + +/** + * Thrown by expandFileGlobs when the glob expansion returns more than + * MAX_GLOB_FILE_RESULTS candidates. + */ +export class WardenGlobExpansionError extends Error { + constructor(count: number, limit: number) { + super( + `Glob pattern matched ${count.toLocaleString()} files (limit is ${limit.toLocaleString()}).\n` + + `This usually means a dependency directory (vendor/, node_modules/, …) is being scanned.\n` + + `\nTry one of:\n` + + ` • Quote the pattern to avoid shell expansion: warden 'dieter/**/*.php'\n` + + ` • Narrow to your application code: warden dieter/app/**/*.php\n` + + ` • Keep dependency dirs explicitly excluded in warden.toml:\n` + + ` [defaults.ignore]\n` + + ` paths = ["**/vendor/**"]`, + ); + this.name = 'WardenGlobExpansionError'; + } +} + export const BUILTIN_PRUNE_DIRECTORY_PATTERNS = [ '**/node_modules/**', '**/vendor/**', @@ -250,6 +279,12 @@ export async function expandFileGlobs( ignore: ['**/.git/**', ...prunePatterns], }); + // Guard against pathological expansion — e.g. user negated all prune patterns + // while pointing at a directory with tens-of-thousands of files. + if (files.length >= MAX_GLOB_FILE_RESULTS) { + throw new WardenGlobExpansionError(files.length, MAX_GLOB_FILE_RESULTS); + } + // If gitignore is disabled, return files as-is if (!useGitignore) { return files.sort(); diff --git a/packages/warden/src/cli/main.ts b/packages/warden/src/cli/main.ts index e3f3135a..119d5c08 100644 --- a/packages/warden/src/cli/main.ts +++ b/packages/warden/src/cli/main.ts @@ -21,6 +21,7 @@ import { isRepoRelativePath, normalizePath, resolveConfigInput } from '../utils/ import { parseCliArgs, showVersion, classifyTargets, expandTargetFileReferences, type CLIOptions } from './args.js'; import { showHelp } from './help.js'; import { buildLocalEventContext, buildFileEventContext } from './context.js'; +import { WardenGlobExpansionError } from './files.js'; import { getRepoRoot, getHeadSha, refExists, getDefaultBranch } from './git.js'; import { renderTerminalReport, filterReports } from './terminal.js'; import { @@ -1271,12 +1272,22 @@ async function runFileMode(filePatterns: string[], options: CLIOptions, reporter // Build context from files reporter.step('Building context from files...'); - const context = await buildFileEventContext({ - patterns: filePatterns, - cwd, - ignore: config?.defaults?.ignore, - scan: config?.defaults?.scan, - }); + let context: Awaited>; + try { + context = await buildFileEventContext({ + patterns: filePatterns, + cwd, + ignore: config?.defaults?.ignore, + scan: config?.defaults?.scan, + }); + } catch (error) { + if (error instanceof WardenGlobExpansionError) { + reporter.error(error.message); + return 1; + } + reporter.error('Failed to build context'); + return 1; + } const pullRequest = context.pullRequest; if (!pullRequest) { From cac1e2d9237d90ae71f7c75cb11b920c3047f5ba Mon Sep 17 00:00:00 2001 From: immutable dcramer Date: Tue, 16 Jun 2026 23:16:37 +0000 Subject: [PATCH 3/4] fix(files): pre-check filePatterns.length before any I/O in runFileMode The MAX_GLOB_FILE_RESULTS guardrail in expandFileGlobs fires after fast-glob returns, which is too late if the shell pre-expanded the glob before warden ran (e.g. zsh globstar turning dieter/**/*.php into 15,000 explicit paths in argv). At that point the oversized array already exists in memory. Add an early guard at the top of runFileMode that checks filePatterns.length against MAX_GLOB_FILE_RESULTS before any config load, file I/O, or context build. This catches the shell-expansion case with zero overhead. Co-Authored-By: sentry-junior[bot] <264270552+sentry-junior[bot]@users.noreply.github.com> --- packages/warden/src/cli/main.ts | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/packages/warden/src/cli/main.ts b/packages/warden/src/cli/main.ts index 119d5c08..59e6af7f 100644 --- a/packages/warden/src/cli/main.ts +++ b/packages/warden/src/cli/main.ts @@ -21,7 +21,7 @@ import { isRepoRelativePath, normalizePath, resolveConfigInput } from '../utils/ import { parseCliArgs, showVersion, classifyTargets, expandTargetFileReferences, type CLIOptions } from './args.js'; import { showHelp } from './help.js'; import { buildLocalEventContext, buildFileEventContext } from './context.js'; -import { WardenGlobExpansionError } from './files.js'; +import { WardenGlobExpansionError, MAX_GLOB_FILE_RESULTS } from './files.js'; import { getRepoRoot, getHeadSha, refExists, getDefaultBranch } from './git.js'; import { renderTerminalReport, filterReports } from './terminal.js'; import { @@ -1268,6 +1268,16 @@ export async function runSkills( async function runFileMode(filePatterns: string[], options: CLIOptions, reporter: Reporter): Promise { const cwd = process.cwd(); + + // Early guard: if the shell already expanded a glob to thousands of individual + // paths, the patterns array itself is oversized. Check this before any I/O. + if (filePatterns.length >= MAX_GLOB_FILE_RESULTS) { + reporter.error( + new WardenGlobExpansionError(filePatterns.length, MAX_GLOB_FILE_RESULTS).message, + ); + return 1; + } + const config = loadOptionalConfig(options, findRepoPath(cwd)); // Build context from files From 97d175d86b49264b48496f5eaaf0d44467bd56d0 Mon Sep 17 00:00:00 2001 From: immutable dcramer Date: Tue, 16 Jun 2026 23:28:04 +0000 Subject: [PATCH 4/4] fix(files): fix gitignore detection for new untracked dirs; remove hardcoded prune list Root cause: git ls-files with pathspecs (.gitignore **/.gitignore) does not reliably recurse into brand-new untracked directories to find their .gitignore files. A new Laravel app in dieter/ would have dieter/.gitignore (with vendor/) undetected, so vendor/ was not gitignored and warden would traverse it. Fix: drop the pathspecs from the git ls-files call and filter for .gitignore files client-side. Without pathspecs git recurses into all untracked dirs and applies each directory's own .gitignore rules via --exclude-standard, so dieter/.gitignore is both discovered and applied correctly. Remove BUILTIN_PRUNE_DIRECTORY_PATTERNS / getEffectivePrunePatterns: hardcoding vendor/, node_modules/ etc in the fast-glob ignore list is the wrong layer. The correct mechanism is each project's .gitignore and that now works. Move the MAX_GLOB_FILE_RESULTS guardrail to AFTER gitignore filtering so that properly gitignored dependency directories don't false-positive - the limit now only fires when .gitignore is absent or misconfigured. Removes ExpandGlobOptions.ignore and the expandAndCreateFileChanges ignore pass-through that only existed to support the prune list. Co-Authored-By: sentry-junior[bot] <264270552+sentry-junior[bot]@users.noreply.github.com> --- packages/warden/src/cli/files.test.ts | 183 ++++++-------------------- packages/warden/src/cli/files.ts | 150 +++++++-------------- packages/warden/src/cli/main.ts | 11 +- 3 files changed, 94 insertions(+), 250 deletions(-) diff --git a/packages/warden/src/cli/files.test.ts b/packages/warden/src/cli/files.test.ts index 9edaf83f..0d2423ea 100644 --- a/packages/warden/src/cli/files.test.ts +++ b/packages/warden/src/cli/files.test.ts @@ -8,7 +8,6 @@ import { createSyntheticFileChange, expandFileGlobs, expandAndCreateFileChanges, - getEffectivePrunePatterns, } from './files.js'; function initGitRepo(dir: string): void { @@ -109,45 +108,6 @@ describe('createSyntheticFileChange', () => { }); }); -describe('getEffectivePrunePatterns', () => { - it('returns all built-in prune patterns when no user overrides', () => { - const patterns = getEffectivePrunePatterns(); - expect(patterns).toContain('**/vendor/**'); - expect(patterns).toContain('**/node_modules/**'); - expect(patterns).toContain('**/dist/**'); - }); - - it('returns all built-in prune patterns when user paths have no negations', () => { - const patterns = getEffectivePrunePatterns(['*.log', 'tmp/']); - expect(patterns).toContain('**/vendor/**'); - expect(patterns).toContain('**/node_modules/**'); - }); - - it('removes vendor prune when user has a !vendor negation', () => { - const patterns = getEffectivePrunePatterns(['!vendor/**']); - expect(patterns).not.toContain('**/vendor/**'); - // other prune patterns are unaffected - expect(patterns).toContain('**/node_modules/**'); - }); - - it('removes node_modules prune when user has a !node_modules negation', () => { - const patterns = getEffectivePrunePatterns(['!node_modules/**']); - expect(patterns).not.toContain('**/node_modules/**'); - expect(patterns).toContain('**/vendor/**'); - }); - - it('handles negation with path separator prefix', () => { - const patterns = getEffectivePrunePatterns(['!src/vendor/special/**']); - expect(patterns).not.toContain('**/vendor/**'); - }); - - it('handles undefined user paths gracefully', () => { - expect(() => getEffectivePrunePatterns(undefined)).not.toThrow(); - const patterns = getEffectivePrunePatterns(undefined); - expect(patterns).toContain('**/vendor/**'); - }); -}); - describe('expandFileGlobs', () => { let tempDir: string; @@ -233,61 +193,6 @@ describe('expandFileGlobs', () => { expect(files).toHaveLength(0); }); - describe('built-in directory pruning', () => { - it('prunes vendor/ directory by default without gitignore', async () => { - // Simulate a new laravel-style app: app code + vendor/ with PHP files - mkdirSync(join(tempDir, 'app'), { recursive: true }); - mkdirSync(join(tempDir, 'vendor', 'laravel', 'framework'), { recursive: true }); - writeFileSync(join(tempDir, 'app', 'Controller.php'), ' f.includes('app/Controller.php'))).toBe(true); - expect(files.some(f => f.includes('vendor/'))).toBe(false); - }); - - it('prunes node_modules/ directory by default', async () => { - mkdirSync(join(tempDir, 'src'), { recursive: true }); - mkdirSync(join(tempDir, 'node_modules', 'pkg'), { recursive: true }); - writeFileSync(join(tempDir, 'src', 'index.ts'), 'export {}'); - writeFileSync(join(tempDir, 'node_modules', 'pkg', 'index.ts'), 'module'); - - const files = await expandFileGlobs(['**/*.ts'], tempDir); - - expect(files.some(f => f.includes('src/index.ts'))).toBe(true); - expect(files.some(f => f.includes('node_modules/'))).toBe(false); - }); - - it('prunes vendor/ even when not in a git repo (no gitignore fallback needed)', async () => { - // No git init — this tests that the fast-glob level prune works independently - mkdirSync(join(tempDir, 'app'), { recursive: true }); - mkdirSync(join(tempDir, 'vendor', 'lib'), { recursive: true }); - writeFileSync(join(tempDir, 'app', 'main.php'), ' f.includes('app/main.php'))).toBe(true); - expect(files.some(f => f.includes('vendor/'))).toBe(false); - }); - - it('re-includes vendor/ when user ignore has a !vendor negation', async () => { - mkdirSync(join(tempDir, 'app'), { recursive: true }); - mkdirSync(join(tempDir, 'vendor', 'lib'), { recursive: true }); - writeFileSync(join(tempDir, 'app', 'main.php'), ' f.includes('app/main.php'))).toBe(true); - expect(files.some(f => f.includes('vendor/lib/dep.php'))).toBe(true); - }); - }); - describe('gitignore support', () => { it('excludes files matching .gitignore patterns by default', async () => { initGitRepo(tempDir); @@ -419,6 +324,49 @@ describe('expandFileGlobs', () => { rmSync(outsideDir, { recursive: true, force: true }); } }); + + it('picks up .gitignore from a brand-new untracked subdirectory', async () => { + // Simulate the reported bug: a new Laravel app added in `dieter/` with + // vendor/ in its .gitignore, but nothing yet committed. The gitignore + // detection must find dieter/.gitignore even though dieter/ has never + // been git-tracked. + initGitRepo(tempDir); + // Commit something so the repo is real + mkdirSync(join(tempDir, 'src'), { recursive: true }); + writeFileSync(join(tempDir, 'src', '.gitkeep'), ''); + execFileSync('git', ['add', '.'], { cwd: tempDir, stdio: 'ignore' }); + execFileSync('git', ['commit', '-m', 'init', '--allow-empty'], { cwd: tempDir, stdio: 'ignore' }); + + // Now add a brand-new untracked Laravel-style app directory (never staged) + const appDir = join(tempDir, 'dieter'); + mkdirSync(join(appDir, 'app'), { recursive: true }); + mkdirSync(join(appDir, 'vendor', 'laravel', 'framework'), { recursive: true }); + writeFileSync(join(appDir, '.gitignore'), 'vendor/\n'); + writeFileSync(join(appDir, 'app', 'Controller.php'), ' f.includes('app/Controller.php'))).toBe(true); + expect(files.some(f => f.includes('vendor/'))).toBe(false); + }); + }); + + describe('MAX_GLOB_FILE_RESULTS guardrail', () => { + it('throws WardenGlobExpansionError when filtered result exceeds limit', async () => { + const { WardenGlobExpansionError, MAX_GLOB_FILE_RESULTS } = await import('./files.js'); + + // No git repo so gitignore can't shrink the set; create limit+1 files + const count = MAX_GLOB_FILE_RESULTS + 1; + for (let i = 0; i < count; i++) { + writeFileSync(join(tempDir, `file${i}.ts`), `// ${i}`); + } + + await expect(expandFileGlobs(['**/*.ts'], tempDir)) + .rejects.toThrow(WardenGlobExpansionError); + await expect(expandFileGlobs(['**/*.ts'], tempDir)) + .rejects.toThrow(/Glob pattern matched/); + }); }); }); @@ -451,49 +399,4 @@ describe('expandAndCreateFileChanges', () => { expect(file2).toBeDefined(); expect(file2?.additions).toBe(2); }); - - it('passes ignore config through so user negations can re-include pruned dirs', async () => { - mkdirSync(join(tempDir, 'app'), { recursive: true }); - mkdirSync(join(tempDir, 'vendor', 'lib'), { recursive: true }); - writeFileSync(join(tempDir, 'app', 'main.php'), ' f.filename.includes('vendor/'))).toBe(false); - - // With negation: vendor is re-included at traversal time - const withOverride = await expandAndCreateFileChanges(['**/*.php'], tempDir, { - ignore: { paths: ['!vendor/**'] }, - }); - expect(withOverride.some(f => f.filename.includes('vendor/'))).toBe(true); - }); -}); - -describe('WardenGlobExpansionError / MAX_GLOB_FILE_RESULTS guardrail', () => { - it('throws WardenGlobExpansionError when glob matches too many files', async () => { - const { WardenGlobExpansionError, MAX_GLOB_FILE_RESULTS } = await import('./files.js'); - const { writeFileSync, mkdirSync, rmSync } = await import('node:fs'); - const { join } = await import('node:path'); - const { tmpdir } = await import('node:os'); - - const tempDir = join(tmpdir(), `warden-guardrail-test-${Date.now()}`); - mkdirSync(tempDir, { recursive: true }); - - try { - // Write MAX_GLOB_FILE_RESULTS + 1 files to trigger the guardrail - const count = MAX_GLOB_FILE_RESULTS + 1; - for (let i = 0; i < count; i++) { - writeFileSync(join(tempDir, `file${i}.ts`), `// file ${i}`); - } - - await expect(expandFileGlobs(['**/*.ts'], tempDir)) - .rejects.toThrow(WardenGlobExpansionError); - - await expect(expandFileGlobs(['**/*.ts'], tempDir)) - .rejects.toThrow(/Glob pattern matched/); - } finally { - rmSync(tempDir, { recursive: true, force: true }); - } - }); }); diff --git a/packages/warden/src/cli/files.ts b/packages/warden/src/cli/files.ts index d71d88d5..d14f3661 100644 --- a/packages/warden/src/cli/files.ts +++ b/packages/warden/src/cli/files.ts @@ -10,92 +10,38 @@ import { execGitNonInteractive } from '../utils/exec.js'; import { isRepoRelativePath, normalizePath } from '../utils/path.js'; /** - * Directory patterns that are safe to prune at traversal time — before fast-glob - * returns results. These are the same large dependency / generated-output - * directories that BUILTIN_IGNORE_PATTERNS in scan-policy blocks after the fact. - * Pruning them early prevents fast-glob from traversing tens-of-thousands of - * files inside a vendor/ or node_modules/ tree when a broad glob like - * `dieter/**\/*.php` is used against a new Laravel app. - * - * Exported so the gitignore fallback scan can reuse the list consistently. - */ -/** - * Hard upper bound on the number of files fast-glob may return from a single - * expandFileGlobs call. Exceeding this almost always means a dependency tree - * (vendor/, node_modules/, …) escaped the prune list — likely because the user - * negated the prune pattern in their config. Fail fast with an actionable - * message rather than silently burning memory. + * Hard upper bound on the number of files returned by expandFileGlobs after + * gitignore filtering. If this limit is hit it almost always means a + * dependency tree (vendor/, node_modules/, …) is not gitignored and the user + * is accidentally scanning it. Fail fast with an actionable message rather + * than silently passing tens-of-thousands of files to the scan pipeline. */ export const MAX_GLOB_FILE_RESULTS = 10_000; /** - * Thrown by expandFileGlobs when the glob expansion returns more than - * MAX_GLOB_FILE_RESULTS candidates. + * Thrown by expandFileGlobs when the post-gitignore result set exceeds + * MAX_GLOB_FILE_RESULTS. */ export class WardenGlobExpansionError extends Error { constructor(count: number, limit: number) { super( - `Glob pattern matched ${count.toLocaleString()} files (limit is ${limit.toLocaleString()}).\n` + - `This usually means a dependency directory (vendor/, node_modules/, …) is being scanned.\n` + + `Glob pattern matched ${count.toLocaleString()} files after gitignore filtering (limit is ${limit.toLocaleString()}).\n` + + `This usually means a dependency directory is not excluded by .gitignore.\n` + `\nTry one of:\n` + ` • Quote the pattern to avoid shell expansion: warden 'dieter/**/*.php'\n` + ` • Narrow to your application code: warden dieter/app/**/*.php\n` + - ` • Keep dependency dirs explicitly excluded in warden.toml:\n` + - ` [defaults.ignore]\n` + - ` paths = ["**/vendor/**"]`, + ` • Add the dependency directory to .gitignore:\n` + + ` vendor/`, ); this.name = 'WardenGlobExpansionError'; } } -export const BUILTIN_PRUNE_DIRECTORY_PATTERNS = [ - '**/node_modules/**', - '**/vendor/**', - '**/dist/**', - '**/build/**', - '**/.next/**', - '**/.nuxt/**', - '**/out/**', - '**/coverage/**', - '**/.cache/**', -] as const; - -/** - * Compute the fast-glob ignore list, starting from BUILTIN_PRUNE_DIRECTORY_PATTERNS - * and removing any directory whose name is explicitly un-ignored by a user - * negation pattern (e.g. `!vendor/**`). This lets advanced users opt a - * dependency directory back in without breaking the default safety behaviour. - */ -export function getEffectivePrunePatterns(userIgnorePaths?: string[]): string[] { - const negations = (userIgnorePaths ?? []) - .filter((p) => p.startsWith('!')) - .map((p) => p.slice(1)); - - if (!negations.length) { - return [...BUILTIN_PRUNE_DIRECTORY_PATTERNS]; - } - - return BUILTIN_PRUNE_DIRECTORY_PATTERNS.filter((prunePattern) => { - // Extract the bare directory name from a pattern like '**/vendor/**' - const match = prunePattern.match(/\*\*\/([^/]+)\/\*\*/); - if (!match) return true; - const dirName = match[1]; - // Drop this prune entry if any negation path mentions the directory - return !negations.some((neg) => neg.includes(`${dirName}/`) || neg.includes(`/${dirName}`)); - }); -} - export interface ExpandGlobOptions { /** Working directory for glob expansion (default: process.cwd()) */ cwd?: string; /** Respect .gitignore files (default: true) */ gitignore?: boolean; - /** - * User-configured ignore rules from warden config. Negation patterns inside - * `paths` (e.g. `!vendor/**`) override the built-in directory prune list so - * that users who intentionally want to scan dependency trees can do so. - */ - ignore?: IgnoreConfig; } export interface SyntheticFileChangeOptions { @@ -189,28 +135,32 @@ function loadGitignoreRules(gitRoot: string): Ignore { // Always ignore .git directory ig.add('.git'); - // Use git to discover .gitignore files. This naturally skips ignored - // directories (node_modules, .venv, vendor, etc.) without maintaining - // a hardcoded exclusion list. + // Discover .gitignore files via git. Using --cached + --others without + // pathspecs and filtering client-side is intentional: pathspec-based queries + // like `**/.gitignore` may not recurse into brand-new untracked directories + // (e.g. a freshly-added Laravel app in dieter/) so they can miss the + // directory's own .gitignore and fail to exclude its vendor/ tree. + // Without pathspecs, git recurses into all untracked directories and returns + // every non-gitignored file; we then pick out .gitignore files ourselves. let gitignoreFiles: string[]; try { const output = execGitNonInteractive( - ['ls-files', '--cached', '--others', '--exclude-standard', '.gitignore', '**/.gitignore'], + ['ls-files', '--cached', '--others', '--exclude-standard'], { cwd: gitRoot } ); gitignoreFiles = output - ? output.split('\n').map((f) => resolve(gitRoot, f)) + ? output + .split('\n') + .filter((f) => f === '.gitignore' || f.endsWith('/.gitignore')) + .map((f) => resolve(gitRoot, f)) : []; } catch { - // Not a real git repo or git not available. Walk directories manually, - // skipping large directories that would never contain relevant .gitignore - // files. Reuse the same prune list used by expandFileGlobs() so behaviour - // is consistent across both code paths. + // Not a real git repo or git not available. Walk directories manually. gitignoreFiles = fg.sync('**/.gitignore', { cwd: gitRoot, absolute: true, dot: true, - ignore: ['**/.git/**', ...BUILTIN_PRUNE_DIRECTORY_PATTERNS], + ignore: ['**/.git/**'], }); } @@ -243,15 +193,13 @@ function loadGitignoreRules(gitRoot: string): Ignore { /** * Expand glob patterns to a list of file paths. * - * By default, respects .gitignore files to automatically exclude ignored - * directories like node_modules/. This can be disabled by setting + * By default respects .gitignore files to automatically exclude ignored + * directories like node_modules/ and vendor/. This can be disabled by setting * gitignore: false. * - * Large dependency and generated-output directories (vendor/, node_modules/, - * dist/, …) are also pruned at traversal time via BUILTIN_PRUNE_DIRECTORY_PATTERNS - * so that broad globs like `dieter/**\/*.php` against a Laravel app do not - * cause fast-glob to enumerate tens-of-thousands of files before the - * post-enumeration scan policy has a chance to skip them. + * Throws WardenGlobExpansionError if the result set after gitignore filtering + * exceeds MAX_GLOB_FILE_RESULTS, which almost always indicates an ungitignored + * dependency directory is being scanned. */ export async function expandFileGlobs( patterns: string[], @@ -264,35 +212,32 @@ export async function expandFileGlobs( const useGitignore = options.gitignore ?? true; const expandedPatterns = patterns.map((pattern) => expandDirectoryPattern(pattern, cwd)); - // Compute directory prune list, honouring user negation overrides. - const prunePatterns = getEffectivePrunePatterns(options.ignore?.paths); - - // Enumerate matching files. Built-in directory prune patterns are applied at - // this stage so fast-glob never descends into vendor/ or node_modules/ trees, - // preventing excessive memory use when scanning broad globs. Gitignore-based - // filtering and the full BUILTIN_IGNORE_PATTERNS check happen afterward. + // Enumerate matching files. Only .git/ is excluded at traversal time; + // dependency directories (vendor/, node_modules/, …) are excluded by + // gitignore filtering below, keeping the approach policy-free and letting + // each project's own .gitignore determine what is scanned. const files = await fg(expandedPatterns, { cwd, onlyFiles: true, absolute: true, dot: false, - ignore: ['**/.git/**', ...prunePatterns], + ignore: ['**/.git/**'], }); - // Guard against pathological expansion — e.g. user negated all prune patterns - // while pointing at a directory with tens-of-thousands of files. - if (files.length >= MAX_GLOB_FILE_RESULTS) { - throw new WardenGlobExpansionError(files.length, MAX_GLOB_FILE_RESULTS); - } - - // If gitignore is disabled, return files as-is + // If gitignore is disabled, check the raw count and return as-is if (!useGitignore) { + if (files.length >= MAX_GLOB_FILE_RESULTS) { + throw new WardenGlobExpansionError(files.length, MAX_GLOB_FILE_RESULTS); + } return files.sort(); } // Find git root - if not in a git repo, don't apply gitignore rules const gitRoot = findGitRoot(cwd); if (!gitRoot) { + if (files.length >= MAX_GLOB_FILE_RESULTS) { + throw new WardenGlobExpansionError(files.length, MAX_GLOB_FILE_RESULTS); + } return files.sort(); } @@ -313,6 +258,13 @@ export async function expandFileGlobs( return !ig.ignores(relativePath); }); + // Guard after gitignore so that properly gitignored dependency directories + // do not trigger a false positive — the limit only fires when the project's + // .gitignore is misconfigured or missing. + if (filteredFiles.length >= MAX_GLOB_FILE_RESULTS) { + throw new WardenGlobExpansionError(filteredFiles.length, MAX_GLOB_FILE_RESULTS); + } + return filteredFiles.sort(); } @@ -398,8 +350,6 @@ export async function expandAndCreateFileChanges( options: SyntheticFileChangeOptions = {} ): Promise { const resolvedCwd = resolve(cwd); - // Pass the ignore config so that user negation patterns can override built-in - // prune directories at traversal time (e.g. `!vendor/**` re-includes vendor). - const files = await expandFileGlobs(patterns, { cwd: resolvedCwd, ignore: options.ignore }); + const files = await expandFileGlobs(patterns, resolvedCwd); return createSyntheticFileChanges(files, resolvedCwd, options); } diff --git a/packages/warden/src/cli/main.ts b/packages/warden/src/cli/main.ts index 59e6af7f..5cf144c4 100644 --- a/packages/warden/src/cli/main.ts +++ b/packages/warden/src/cli/main.ts @@ -21,7 +21,7 @@ import { isRepoRelativePath, normalizePath, resolveConfigInput } from '../utils/ import { parseCliArgs, showVersion, classifyTargets, expandTargetFileReferences, type CLIOptions } from './args.js'; import { showHelp } from './help.js'; import { buildLocalEventContext, buildFileEventContext } from './context.js'; -import { WardenGlobExpansionError, MAX_GLOB_FILE_RESULTS } from './files.js'; +import { WardenGlobExpansionError } from './files.js'; import { getRepoRoot, getHeadSha, refExists, getDefaultBranch } from './git.js'; import { renderTerminalReport, filterReports } from './terminal.js'; import { @@ -1269,15 +1269,6 @@ export async function runSkills( async function runFileMode(filePatterns: string[], options: CLIOptions, reporter: Reporter): Promise { const cwd = process.cwd(); - // Early guard: if the shell already expanded a glob to thousands of individual - // paths, the patterns array itself is oversized. Check this before any I/O. - if (filePatterns.length >= MAX_GLOB_FILE_RESULTS) { - reporter.error( - new WardenGlobExpansionError(filePatterns.length, MAX_GLOB_FILE_RESULTS).message, - ); - return 1; - } - const config = loadOptionalConfig(options, findRepoPath(cwd)); // Build context from files