Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions packages/warden/src/cli/files.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,49 @@ describe('expandFileGlobs', () => {
rmSync(outsideDir, { recursive: true, force: true });
}
});

it('picks up .gitignore from a brand-new untracked subdirectory', async () => {
// Simulate the reported bug: a new Laravel app added in `dieter/` with
// vendor/ in its .gitignore, but nothing yet committed. The gitignore
// detection must find dieter/.gitignore even though dieter/ has never
// been git-tracked.
initGitRepo(tempDir);
// Commit something so the repo is real
mkdirSync(join(tempDir, 'src'), { recursive: true });
writeFileSync(join(tempDir, 'src', '.gitkeep'), '');
execFileSync('git', ['add', '.'], { cwd: tempDir, stdio: 'ignore' });
execFileSync('git', ['commit', '-m', 'init', '--allow-empty'], { cwd: tempDir, stdio: 'ignore' });

// Now add a brand-new untracked Laravel-style app directory (never staged)
const appDir = join(tempDir, 'dieter');
mkdirSync(join(appDir, 'app'), { recursive: true });
mkdirSync(join(appDir, 'vendor', 'laravel', 'framework'), { recursive: true });
writeFileSync(join(appDir, '.gitignore'), 'vendor/\n');
writeFileSync(join(appDir, 'app', 'Controller.php'), '<?php class Controller {}');
writeFileSync(join(appDir, 'vendor', 'laravel', 'framework', 'Framework.php'), '<?php');

const files = await expandFileGlobs(['dieter/**/*.php'], tempDir);

expect(files.some(f => f.includes('app/Controller.php'))).toBe(true);
expect(files.some(f => f.includes('vendor/'))).toBe(false);
});
});

describe('MAX_GLOB_FILE_RESULTS guardrail', () => {
it('throws WardenGlobExpansionError when filtered result exceeds limit', async () => {
const { WardenGlobExpansionError, MAX_GLOB_FILE_RESULTS } = await import('./files.js');

// No git repo so gitignore can't shrink the set; create limit+1 files
const count = MAX_GLOB_FILE_RESULTS + 1;
for (let i = 0; i < count; i++) {
writeFileSync(join(tempDir, `file${i}.ts`), `// ${i}`);
}

await expect(expandFileGlobs(['**/*.ts'], tempDir))
.rejects.toThrow(WardenGlobExpansionError);
await expect(expandFileGlobs(['**/*.ts'], tempDir))
.rejects.toThrow(/Glob pattern matched/);
});
});
});

Expand Down
80 changes: 66 additions & 14 deletions packages/warden/src/cli/files.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,34 @@ import { getPrePatchFileSkip } from '../sdk/scan-policy.js';
import { execGitNonInteractive } from '../utils/exec.js';
import { isRepoRelativePath, normalizePath } from '../utils/path.js';

/**
* Hard upper bound on the number of files returned by expandFileGlobs after
* gitignore filtering. If this limit is hit it almost always means a
* dependency tree (vendor/, node_modules/, …) is not gitignored and the user
* is accidentally scanning it. Fail fast with an actionable message rather
* than silently passing tens-of-thousands of files to the scan pipeline.
*/
export const MAX_GLOB_FILE_RESULTS = 10_000;

/**
* Thrown by expandFileGlobs when the post-gitignore result set exceeds
* MAX_GLOB_FILE_RESULTS.
*/
export class WardenGlobExpansionError extends Error {
constructor(count: number, limit: number) {
super(
`Glob pattern matched ${count.toLocaleString()} files after gitignore filtering (limit is ${limit.toLocaleString()}).\n` +
`This usually means a dependency directory is not excluded by .gitignore.\n` +
`\nTry one of:\n` +
` • Quote the pattern to avoid shell expansion: warden 'dieter/**/*.php'\n` +
` • Narrow to your application code: warden dieter/app/**/*.php\n` +
` • Add the dependency directory to .gitignore:\n` +
` vendor/`,
);
this.name = 'WardenGlobExpansionError';
}
}

export interface ExpandGlobOptions {
/** Working directory for glob expansion (default: process.cwd()) */
cwd?: string;
Expand Down Expand Up @@ -107,27 +135,32 @@ function loadGitignoreRules(gitRoot: string): Ignore {
// Always ignore .git directory
ig.add('.git');

// Use git to discover .gitignore files. This naturally skips ignored
// directories (node_modules, .venv, vendor, etc.) without maintaining
// a hardcoded exclusion list.
// Discover .gitignore files via git. Using --cached + --others without
// pathspecs and filtering client-side is intentional: pathspec-based queries
// like `**/.gitignore` may not recurse into brand-new untracked directories
// (e.g. a freshly-added Laravel app in dieter/) so they can miss the
// directory's own .gitignore and fail to exclude its vendor/ tree.
// Without pathspecs, git recurses into all untracked directories and returns
// every non-gitignored file; we then pick out .gitignore files ourselves.
let gitignoreFiles: string[];
try {
const output = execGitNonInteractive(
['ls-files', '--cached', '--others', '--exclude-standard', '.gitignore', '**/.gitignore'],
['ls-files', '--cached', '--others', '--exclude-standard'],
{ cwd: gitRoot }
);
gitignoreFiles = output
? output.split('\n').map((f) => resolve(gitRoot, f))
? output
.split('\n')
.filter((f) => f === '.gitignore' || f.endsWith('/.gitignore'))
.map((f) => resolve(gitRoot, f))
: [];
} catch {
// Not a real git repo or git not available. Walk directories manually,
// skipping common large directories that would never contain relevant
// .gitignore files.
// Not a real git repo or git not available. Walk directories manually.
gitignoreFiles = fg.sync('**/.gitignore', {
cwd: gitRoot,
absolute: true,
dot: true,
ignore: ['**/.git/**', '**/node_modules/**'],
ignore: ['**/.git/**'],
});
}

Expand Down Expand Up @@ -160,9 +193,13 @@ function loadGitignoreRules(gitRoot: string): Ignore {
/**
* Expand glob patterns to a list of file paths.
*
* By default, respects .gitignore files to automatically exclude ignored
* directories like node_modules/. This can be disabled by setting
* By default respects .gitignore files to automatically exclude ignored
* directories like node_modules/ and vendor/. This can be disabled by setting
* gitignore: false.
*
* Throws WardenGlobExpansionError if the result set after gitignore filtering
* exceeds MAX_GLOB_FILE_RESULTS, which almost always indicates an ungitignored
* dependency directory is being scanned.
*/
export async function expandFileGlobs(
patterns: string[],
Expand All @@ -175,24 +212,32 @@ export async function expandFileGlobs(
const useGitignore = options.gitignore ?? true;
const expandedPatterns = patterns.map((pattern) => expandDirectoryPattern(pattern, cwd));

// Get all matching files first
// Enumerate matching files. Only .git/ is excluded at traversal time;
// dependency directories (vendor/, node_modules/, …) are excluded by
// gitignore filtering below, keeping the approach policy-free and letting
// each project's own .gitignore determine what is scanned.
const files = await fg(expandedPatterns, {
cwd,
onlyFiles: true,
absolute: true,
dot: false,
// Always exclude .git directory
ignore: ['**/.git/**'],
Comment thread
cursor[bot] marked this conversation as resolved.
});

// If gitignore is disabled, return files as-is
// If gitignore is disabled, check the raw count and return as-is
if (!useGitignore) {
if (files.length >= MAX_GLOB_FILE_RESULTS) {
throw new WardenGlobExpansionError(files.length, MAX_GLOB_FILE_RESULTS);
}
return files.sort();
}

// Find git root - if not in a git repo, don't apply gitignore rules
const gitRoot = findGitRoot(cwd);
if (!gitRoot) {
if (files.length >= MAX_GLOB_FILE_RESULTS) {
throw new WardenGlobExpansionError(files.length, MAX_GLOB_FILE_RESULTS);
}
return files.sort();
}

Expand All @@ -213,6 +258,13 @@ export async function expandFileGlobs(
return !ig.ignores(relativePath);
});

// Guard after gitignore so that properly gitignored dependency directories
// do not trigger a false positive — the limit only fires when the project's
// .gitignore is misconfigured or missing.
if (filteredFiles.length >= MAX_GLOB_FILE_RESULTS) {
throw new WardenGlobExpansionError(filteredFiles.length, MAX_GLOB_FILE_RESULTS);
}

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Dependency trees enumerated before filter

High Severity

expandFileGlobs runs fast-glob with only **/.git/** ignored, then applies gitignore to the full match list. Gitignored paths like vendor/ still get walked and collected first. MAX_GLOB_FILE_RESULTS runs only on the filtered array, so large dependency trees can still exhaust memory or crash before the guard runs.

Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit 97d175d. Configure here.

return filteredFiles.sort();
}

Expand Down
24 changes: 18 additions & 6 deletions packages/warden/src/cli/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import { isRepoRelativePath, normalizePath, resolveConfigInput } from '../utils/
import { parseCliArgs, showVersion, classifyTargets, expandTargetFileReferences, type CLIOptions } from './args.js';
import { showHelp } from './help.js';
import { buildLocalEventContext, buildFileEventContext } from './context.js';
import { WardenGlobExpansionError } from './files.js';
import { getRepoRoot, getHeadSha, refExists, getDefaultBranch } from './git.js';
import { renderTerminalReport, filterReports } from './terminal.js';
import {
Expand Down Expand Up @@ -1267,16 +1268,27 @@ export async function runSkills(

async function runFileMode(filePatterns: string[], options: CLIOptions, reporter: Reporter): Promise<number> {
const cwd = process.cwd();

const config = loadOptionalConfig(options, findRepoPath(cwd));

// Build context from files
reporter.step('Building context from files...');
const context = await buildFileEventContext({
patterns: filePatterns,
cwd,
ignore: config?.defaults?.ignore,
scan: config?.defaults?.scan,
});
let context: Awaited<ReturnType<typeof buildFileEventContext>>;
try {
context = await buildFileEventContext({
patterns: filePatterns,
cwd,
ignore: config?.defaults?.ignore,
scan: config?.defaults?.scan,
});
} catch (error) {
if (error instanceof WardenGlobExpansionError) {
reporter.error(error.message);
return 1;
}
reporter.error('Failed to build context');
return 1;

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Context build hides error text

Low Severity

The new runFileMode try/catch reports a helpful message for WardenGlobExpansionError, but any other error from buildFileEventContext is reported only as Failed to build context, dropping the underlying Error message that would explain I/O or config failures.

Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit 8c4d39e. Configure here.

}

const pullRequest = context.pullRequest;
if (!pullRequest) {
Expand Down
Loading