diff --git a/packages/warden/src/cli/files.test.ts b/packages/warden/src/cli/files.test.ts index ca87314b..0d2423ea 100644 --- a/packages/warden/src/cli/files.test.ts +++ b/packages/warden/src/cli/files.test.ts @@ -324,6 +324,49 @@ describe('expandFileGlobs', () => { rmSync(outsideDir, { recursive: true, force: true }); } }); + + it('picks up .gitignore from a brand-new untracked subdirectory', async () => { + // Simulate the reported bug: a new Laravel app added in `dieter/` with + // vendor/ in its .gitignore, but nothing yet committed. The gitignore + // detection must find dieter/.gitignore even though dieter/ has never + // been git-tracked. + initGitRepo(tempDir); + // Commit something so the repo is real + mkdirSync(join(tempDir, 'src'), { recursive: true }); + writeFileSync(join(tempDir, 'src', '.gitkeep'), ''); + execFileSync('git', ['add', '.'], { cwd: tempDir, stdio: 'ignore' }); + execFileSync('git', ['commit', '-m', 'init', '--allow-empty'], { cwd: tempDir, stdio: 'ignore' }); + + // Now add a brand-new untracked Laravel-style app directory (never staged) + const appDir = join(tempDir, 'dieter'); + mkdirSync(join(appDir, 'app'), { recursive: true }); + mkdirSync(join(appDir, 'vendor', 'laravel', 'framework'), { recursive: true }); + writeFileSync(join(appDir, '.gitignore'), 'vendor/\n'); + writeFileSync(join(appDir, 'app', 'Controller.php'), ' f.includes('app/Controller.php'))).toBe(true); + expect(files.some(f => f.includes('vendor/'))).toBe(false); + }); + }); + + describe('MAX_GLOB_FILE_RESULTS guardrail', () => { + it('throws WardenGlobExpansionError when filtered result exceeds limit', async () => { + const { WardenGlobExpansionError, MAX_GLOB_FILE_RESULTS } = await import('./files.js'); + + // No git repo so gitignore can't shrink the set; create limit+1 files + const count = MAX_GLOB_FILE_RESULTS + 1; + for (let i = 0; i < count; i++) { + writeFileSync(join(tempDir, `file${i}.ts`), `// ${i}`); + } + + await expect(expandFileGlobs(['**/*.ts'], tempDir)) + .rejects.toThrow(WardenGlobExpansionError); + await expect(expandFileGlobs(['**/*.ts'], tempDir)) + .rejects.toThrow(/Glob pattern matched/); + }); }); }); diff --git a/packages/warden/src/cli/files.ts b/packages/warden/src/cli/files.ts index 9fc39558..d14f3661 100644 --- a/packages/warden/src/cli/files.ts +++ b/packages/warden/src/cli/files.ts @@ -9,6 +9,34 @@ import { getPrePatchFileSkip } from '../sdk/scan-policy.js'; import { execGitNonInteractive } from '../utils/exec.js'; import { isRepoRelativePath, normalizePath } from '../utils/path.js'; +/** + * Hard upper bound on the number of files returned by expandFileGlobs after + * gitignore filtering. If this limit is hit it almost always means a + * dependency tree (vendor/, node_modules/, …) is not gitignored and the user + * is accidentally scanning it. Fail fast with an actionable message rather + * than silently passing tens-of-thousands of files to the scan pipeline. + */ +export const MAX_GLOB_FILE_RESULTS = 10_000; + +/** + * Thrown by expandFileGlobs when the post-gitignore result set exceeds + * MAX_GLOB_FILE_RESULTS. + */ +export class WardenGlobExpansionError extends Error { + constructor(count: number, limit: number) { + super( + `Glob pattern matched ${count.toLocaleString()} files after gitignore filtering (limit is ${limit.toLocaleString()}).\n` + + `This usually means a dependency directory is not excluded by .gitignore.\n` + + `\nTry one of:\n` + + ` • Quote the pattern to avoid shell expansion: warden 'dieter/**/*.php'\n` + + ` • Narrow to your application code: warden dieter/app/**/*.php\n` + + ` • Add the dependency directory to .gitignore:\n` + + ` vendor/`, + ); + this.name = 'WardenGlobExpansionError'; + } +} + export interface ExpandGlobOptions { /** Working directory for glob expansion (default: process.cwd()) */ cwd?: string; @@ -107,27 +135,32 @@ function loadGitignoreRules(gitRoot: string): Ignore { // Always ignore .git directory ig.add('.git'); - // Use git to discover .gitignore files. This naturally skips ignored - // directories (node_modules, .venv, vendor, etc.) without maintaining - // a hardcoded exclusion list. + // Discover .gitignore files via git. Using --cached + --others without + // pathspecs and filtering client-side is intentional: pathspec-based queries + // like `**/.gitignore` may not recurse into brand-new untracked directories + // (e.g. a freshly-added Laravel app in dieter/) so they can miss the + // directory's own .gitignore and fail to exclude its vendor/ tree. + // Without pathspecs, git recurses into all untracked directories and returns + // every non-gitignored file; we then pick out .gitignore files ourselves. let gitignoreFiles: string[]; try { const output = execGitNonInteractive( - ['ls-files', '--cached', '--others', '--exclude-standard', '.gitignore', '**/.gitignore'], + ['ls-files', '--cached', '--others', '--exclude-standard'], { cwd: gitRoot } ); gitignoreFiles = output - ? output.split('\n').map((f) => resolve(gitRoot, f)) + ? output + .split('\n') + .filter((f) => f === '.gitignore' || f.endsWith('/.gitignore')) + .map((f) => resolve(gitRoot, f)) : []; } catch { - // Not a real git repo or git not available. Walk directories manually, - // skipping common large directories that would never contain relevant - // .gitignore files. + // Not a real git repo or git not available. Walk directories manually. gitignoreFiles = fg.sync('**/.gitignore', { cwd: gitRoot, absolute: true, dot: true, - ignore: ['**/.git/**', '**/node_modules/**'], + ignore: ['**/.git/**'], }); } @@ -160,9 +193,13 @@ function loadGitignoreRules(gitRoot: string): Ignore { /** * Expand glob patterns to a list of file paths. * - * By default, respects .gitignore files to automatically exclude ignored - * directories like node_modules/. This can be disabled by setting + * By default respects .gitignore files to automatically exclude ignored + * directories like node_modules/ and vendor/. This can be disabled by setting * gitignore: false. + * + * Throws WardenGlobExpansionError if the result set after gitignore filtering + * exceeds MAX_GLOB_FILE_RESULTS, which almost always indicates an ungitignored + * dependency directory is being scanned. */ export async function expandFileGlobs( patterns: string[], @@ -175,24 +212,32 @@ export async function expandFileGlobs( const useGitignore = options.gitignore ?? true; const expandedPatterns = patterns.map((pattern) => expandDirectoryPattern(pattern, cwd)); - // Get all matching files first + // Enumerate matching files. Only .git/ is excluded at traversal time; + // dependency directories (vendor/, node_modules/, …) are excluded by + // gitignore filtering below, keeping the approach policy-free and letting + // each project's own .gitignore determine what is scanned. const files = await fg(expandedPatterns, { cwd, onlyFiles: true, absolute: true, dot: false, - // Always exclude .git directory ignore: ['**/.git/**'], }); - // If gitignore is disabled, return files as-is + // If gitignore is disabled, check the raw count and return as-is if (!useGitignore) { + if (files.length >= MAX_GLOB_FILE_RESULTS) { + throw new WardenGlobExpansionError(files.length, MAX_GLOB_FILE_RESULTS); + } return files.sort(); } // Find git root - if not in a git repo, don't apply gitignore rules const gitRoot = findGitRoot(cwd); if (!gitRoot) { + if (files.length >= MAX_GLOB_FILE_RESULTS) { + throw new WardenGlobExpansionError(files.length, MAX_GLOB_FILE_RESULTS); + } return files.sort(); } @@ -213,6 +258,13 @@ export async function expandFileGlobs( return !ig.ignores(relativePath); }); + // Guard after gitignore so that properly gitignored dependency directories + // do not trigger a false positive — the limit only fires when the project's + // .gitignore is misconfigured or missing. + if (filteredFiles.length >= MAX_GLOB_FILE_RESULTS) { + throw new WardenGlobExpansionError(filteredFiles.length, MAX_GLOB_FILE_RESULTS); + } + return filteredFiles.sort(); } diff --git a/packages/warden/src/cli/main.ts b/packages/warden/src/cli/main.ts index e3f3135a..5cf144c4 100644 --- a/packages/warden/src/cli/main.ts +++ b/packages/warden/src/cli/main.ts @@ -21,6 +21,7 @@ import { isRepoRelativePath, normalizePath, resolveConfigInput } from '../utils/ import { parseCliArgs, showVersion, classifyTargets, expandTargetFileReferences, type CLIOptions } from './args.js'; import { showHelp } from './help.js'; import { buildLocalEventContext, buildFileEventContext } from './context.js'; +import { WardenGlobExpansionError } from './files.js'; import { getRepoRoot, getHeadSha, refExists, getDefaultBranch } from './git.js'; import { renderTerminalReport, filterReports } from './terminal.js'; import { @@ -1267,16 +1268,27 @@ export async function runSkills( async function runFileMode(filePatterns: string[], options: CLIOptions, reporter: Reporter): Promise { const cwd = process.cwd(); + const config = loadOptionalConfig(options, findRepoPath(cwd)); // Build context from files reporter.step('Building context from files...'); - const context = await buildFileEventContext({ - patterns: filePatterns, - cwd, - ignore: config?.defaults?.ignore, - scan: config?.defaults?.scan, - }); + let context: Awaited>; + try { + context = await buildFileEventContext({ + patterns: filePatterns, + cwd, + ignore: config?.defaults?.ignore, + scan: config?.defaults?.scan, + }); + } catch (error) { + if (error instanceof WardenGlobExpansionError) { + reporter.error(error.message); + return 1; + } + reporter.error('Failed to build context'); + return 1; + } const pullRequest = context.pullRequest; if (!pullRequest) {