-
-
Notifications
You must be signed in to change notification settings - Fork 29
fix(files): prune dependency dirs in expandFileGlobs before fast-glob traversal #410
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
bddfaed
8c4d39e
cac1e2d
97d175d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9,6 +9,34 @@ import { getPrePatchFileSkip } from '../sdk/scan-policy.js'; | |
| import { execGitNonInteractive } from '../utils/exec.js'; | ||
| import { isRepoRelativePath, normalizePath } from '../utils/path.js'; | ||
|
|
||
| /** | ||
| * Hard upper bound on the number of files returned by expandFileGlobs after | ||
| * gitignore filtering. If this limit is hit it almost always means a | ||
| * dependency tree (vendor/, node_modules/, …) is not gitignored and the user | ||
| * is accidentally scanning it. Fail fast with an actionable message rather | ||
| * than silently passing tens-of-thousands of files to the scan pipeline. | ||
| */ | ||
| export const MAX_GLOB_FILE_RESULTS = 10_000; | ||
|
|
||
| /** | ||
| * Thrown by expandFileGlobs when the post-gitignore result set exceeds | ||
| * MAX_GLOB_FILE_RESULTS. | ||
| */ | ||
| export class WardenGlobExpansionError extends Error { | ||
| constructor(count: number, limit: number) { | ||
| super( | ||
| `Glob pattern matched ${count.toLocaleString()} files after gitignore filtering (limit is ${limit.toLocaleString()}).\n` + | ||
| `This usually means a dependency directory is not excluded by .gitignore.\n` + | ||
| `\nTry one of:\n` + | ||
| ` • Quote the pattern to avoid shell expansion: warden 'dieter/**/*.php'\n` + | ||
| ` • Narrow to your application code: warden dieter/app/**/*.php\n` + | ||
| ` • Add the dependency directory to .gitignore:\n` + | ||
| ` vendor/`, | ||
| ); | ||
| this.name = 'WardenGlobExpansionError'; | ||
| } | ||
| } | ||
|
|
||
| export interface ExpandGlobOptions { | ||
| /** Working directory for glob expansion (default: process.cwd()) */ | ||
| cwd?: string; | ||
|
|
@@ -107,27 +135,32 @@ function loadGitignoreRules(gitRoot: string): Ignore { | |
| // Always ignore .git directory | ||
| ig.add('.git'); | ||
|
|
||
| // Use git to discover .gitignore files. This naturally skips ignored | ||
| // directories (node_modules, .venv, vendor, etc.) without maintaining | ||
| // a hardcoded exclusion list. | ||
| // Discover .gitignore files via git. Using --cached + --others without | ||
| // pathspecs and filtering client-side is intentional: pathspec-based queries | ||
| // like `**/.gitignore` may not recurse into brand-new untracked directories | ||
| // (e.g. a freshly-added Laravel app in dieter/) so they can miss the | ||
| // directory's own .gitignore and fail to exclude its vendor/ tree. | ||
| // Without pathspecs, git recurses into all untracked directories and returns | ||
| // every non-gitignored file; we then pick out .gitignore files ourselves. | ||
| let gitignoreFiles: string[]; | ||
| try { | ||
| const output = execGitNonInteractive( | ||
| ['ls-files', '--cached', '--others', '--exclude-standard', '.gitignore', '**/.gitignore'], | ||
| ['ls-files', '--cached', '--others', '--exclude-standard'], | ||
| { cwd: gitRoot } | ||
| ); | ||
| gitignoreFiles = output | ||
| ? output.split('\n').map((f) => resolve(gitRoot, f)) | ||
| ? output | ||
| .split('\n') | ||
| .filter((f) => f === '.gitignore' || f.endsWith('/.gitignore')) | ||
| .map((f) => resolve(gitRoot, f)) | ||
| : []; | ||
| } catch { | ||
| // Not a real git repo or git not available. Walk directories manually, | ||
| // skipping common large directories that would never contain relevant | ||
| // .gitignore files. | ||
| // Not a real git repo or git not available. Walk directories manually. | ||
| gitignoreFiles = fg.sync('**/.gitignore', { | ||
| cwd: gitRoot, | ||
| absolute: true, | ||
| dot: true, | ||
| ignore: ['**/.git/**', '**/node_modules/**'], | ||
| ignore: ['**/.git/**'], | ||
| }); | ||
| } | ||
|
|
||
|
|
@@ -160,9 +193,13 @@ function loadGitignoreRules(gitRoot: string): Ignore { | |
| /** | ||
| * Expand glob patterns to a list of file paths. | ||
| * | ||
| * By default, respects .gitignore files to automatically exclude ignored | ||
| * directories like node_modules/. This can be disabled by setting | ||
| * By default respects .gitignore files to automatically exclude ignored | ||
| * directories like node_modules/ and vendor/. This can be disabled by setting | ||
| * gitignore: false. | ||
| * | ||
| * Throws WardenGlobExpansionError if the result set after gitignore filtering | ||
| * exceeds MAX_GLOB_FILE_RESULTS, which almost always indicates an ungitignored | ||
| * dependency directory is being scanned. | ||
| */ | ||
| export async function expandFileGlobs( | ||
| patterns: string[], | ||
|
|
@@ -175,24 +212,32 @@ export async function expandFileGlobs( | |
| const useGitignore = options.gitignore ?? true; | ||
| const expandedPatterns = patterns.map((pattern) => expandDirectoryPattern(pattern, cwd)); | ||
|
|
||
| // Get all matching files first | ||
| // Enumerate matching files. Only .git/ is excluded at traversal time; | ||
| // dependency directories (vendor/, node_modules/, …) are excluded by | ||
| // gitignore filtering below, keeping the approach policy-free and letting | ||
| // each project's own .gitignore determine what is scanned. | ||
| const files = await fg(expandedPatterns, { | ||
| cwd, | ||
| onlyFiles: true, | ||
| absolute: true, | ||
| dot: false, | ||
| // Always exclude .git directory | ||
| ignore: ['**/.git/**'], | ||
| }); | ||
|
|
||
| // If gitignore is disabled, return files as-is | ||
| // If gitignore is disabled, check the raw count and return as-is | ||
| if (!useGitignore) { | ||
| if (files.length >= MAX_GLOB_FILE_RESULTS) { | ||
| throw new WardenGlobExpansionError(files.length, MAX_GLOB_FILE_RESULTS); | ||
| } | ||
| return files.sort(); | ||
| } | ||
|
|
||
| // Find git root - if not in a git repo, don't apply gitignore rules | ||
| const gitRoot = findGitRoot(cwd); | ||
| if (!gitRoot) { | ||
| if (files.length >= MAX_GLOB_FILE_RESULTS) { | ||
| throw new WardenGlobExpansionError(files.length, MAX_GLOB_FILE_RESULTS); | ||
| } | ||
| return files.sort(); | ||
| } | ||
|
|
||
|
|
@@ -213,6 +258,13 @@ export async function expandFileGlobs( | |
| return !ig.ignores(relativePath); | ||
| }); | ||
|
|
||
| // Guard after gitignore so that properly gitignored dependency directories | ||
| // do not trigger a false positive — the limit only fires when the project's | ||
| // .gitignore is misconfigured or missing. | ||
| if (filteredFiles.length >= MAX_GLOB_FILE_RESULTS) { | ||
| throw new WardenGlobExpansionError(filteredFiles.length, MAX_GLOB_FILE_RESULTS); | ||
| } | ||
|
|
||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Dependency trees enumerated before filterHigh Severity
Reviewed by Cursor Bugbot for commit 97d175d. Configure here. |
||
| return filteredFiles.sort(); | ||
| } | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -21,6 +21,7 @@ import { isRepoRelativePath, normalizePath, resolveConfigInput } from '../utils/ | |
| import { parseCliArgs, showVersion, classifyTargets, expandTargetFileReferences, type CLIOptions } from './args.js'; | ||
| import { showHelp } from './help.js'; | ||
| import { buildLocalEventContext, buildFileEventContext } from './context.js'; | ||
| import { WardenGlobExpansionError } from './files.js'; | ||
| import { getRepoRoot, getHeadSha, refExists, getDefaultBranch } from './git.js'; | ||
| import { renderTerminalReport, filterReports } from './terminal.js'; | ||
| import { | ||
|
|
@@ -1267,16 +1268,27 @@ export async function runSkills( | |
|
|
||
| async function runFileMode(filePatterns: string[], options: CLIOptions, reporter: Reporter): Promise<number> { | ||
| const cwd = process.cwd(); | ||
|
|
||
| const config = loadOptionalConfig(options, findRepoPath(cwd)); | ||
|
|
||
| // Build context from files | ||
| reporter.step('Building context from files...'); | ||
| const context = await buildFileEventContext({ | ||
| patterns: filePatterns, | ||
| cwd, | ||
| ignore: config?.defaults?.ignore, | ||
| scan: config?.defaults?.scan, | ||
| }); | ||
| let context: Awaited<ReturnType<typeof buildFileEventContext>>; | ||
| try { | ||
| context = await buildFileEventContext({ | ||
| patterns: filePatterns, | ||
| cwd, | ||
| ignore: config?.defaults?.ignore, | ||
| scan: config?.defaults?.scan, | ||
| }); | ||
| } catch (error) { | ||
| if (error instanceof WardenGlobExpansionError) { | ||
| reporter.error(error.message); | ||
| return 1; | ||
| } | ||
| reporter.error('Failed to build context'); | ||
| return 1; | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Context build hides error textLow Severity The new Reviewed by Cursor Bugbot for commit 8c4d39e. Configure here. |
||
| } | ||
|
|
||
| const pullRequest = context.pullRequest; | ||
| if (!pullRequest) { | ||
|
|
||


Uh oh!
There was an error while loading. Please reload this page.