Skip to content
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 112 additions & 0 deletions packages/warden/src/cli/files.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
createSyntheticFileChange,
expandFileGlobs,
expandAndCreateFileChanges,
getEffectivePrunePatterns,
} from './files.js';

function initGitRepo(dir: string): void {
Expand Down Expand Up @@ -108,6 +109,45 @@
});
});

describe('getEffectivePrunePatterns', () => {
it('returns all built-in prune patterns when no user overrides', () => {
const patterns = getEffectivePrunePatterns();
expect(patterns).toContain('**/vendor/**');
expect(patterns).toContain('**/node_modules/**');
expect(patterns).toContain('**/dist/**');
});

it('returns all built-in prune patterns when user paths have no negations', () => {
const patterns = getEffectivePrunePatterns(['*.log', 'tmp/']);
expect(patterns).toContain('**/vendor/**');
expect(patterns).toContain('**/node_modules/**');
});

it('removes vendor prune when user has a !vendor negation', () => {
const patterns = getEffectivePrunePatterns(['!vendor/**']);
expect(patterns).not.toContain('**/vendor/**');
// other prune patterns are unaffected
expect(patterns).toContain('**/node_modules/**');
});

it('removes node_modules prune when user has a !node_modules negation', () => {
const patterns = getEffectivePrunePatterns(['!node_modules/**']);
expect(patterns).not.toContain('**/node_modules/**');
expect(patterns).toContain('**/vendor/**');
});

it('handles negation with path separator prefix', () => {
const patterns = getEffectivePrunePatterns(['!src/vendor/special/**']);
expect(patterns).not.toContain('**/vendor/**');
});

it('handles undefined user paths gracefully', () => {
expect(() => getEffectivePrunePatterns(undefined)).not.toThrow();
const patterns = getEffectivePrunePatterns(undefined);
expect(patterns).toContain('**/vendor/**');
});
});

describe('expandFileGlobs', () => {
let tempDir: string;

Expand Down Expand Up @@ -193,6 +233,61 @@
expect(files).toHaveLength(0);
});

describe('built-in directory pruning', () => {
it('prunes vendor/ directory by default without gitignore', async () => {
// Simulate a new laravel-style app: app code + vendor/ with PHP files
mkdirSync(join(tempDir, 'app'), { recursive: true });
mkdirSync(join(tempDir, 'vendor', 'laravel', 'framework'), { recursive: true });
writeFileSync(join(tempDir, 'app', 'Controller.php'), '<?php class Controller {}');
writeFileSync(join(tempDir, 'vendor', 'laravel', 'framework', 'Framework.php'), '<?php');

const files = await expandFileGlobs(['**/*.php'], tempDir);

expect(files.some(f => f.includes('app/Controller.php'))).toBe(true);
expect(files.some(f => f.includes('vendor/'))).toBe(false);
});

it('prunes node_modules/ directory by default', async () => {
mkdirSync(join(tempDir, 'src'), { recursive: true });
mkdirSync(join(tempDir, 'node_modules', 'pkg'), { recursive: true });
writeFileSync(join(tempDir, 'src', 'index.ts'), 'export {}');
writeFileSync(join(tempDir, 'node_modules', 'pkg', 'index.ts'), 'module');

const files = await expandFileGlobs(['**/*.ts'], tempDir);

expect(files.some(f => f.includes('src/index.ts'))).toBe(true);
expect(files.some(f => f.includes('node_modules/'))).toBe(false);
});

it('prunes vendor/ even when not in a git repo (no gitignore fallback needed)', async () => {
// No git init — this tests that the fast-glob level prune works independently
mkdirSync(join(tempDir, 'app'), { recursive: true });
mkdirSync(join(tempDir, 'vendor', 'lib'), { recursive: true });
writeFileSync(join(tempDir, 'app', 'main.php'), '<?php');
writeFileSync(join(tempDir, 'vendor', 'lib', 'dep.php'), '<?php');

const files = await expandFileGlobs(['**/*.php'], tempDir);

expect(files.some(f => f.includes('app/main.php'))).toBe(true);
expect(files.some(f => f.includes('vendor/'))).toBe(false);
});

it('re-includes vendor/ when user ignore has a !vendor negation', async () => {
mkdirSync(join(tempDir, 'app'), { recursive: true });
mkdirSync(join(tempDir, 'vendor', 'lib'), { recursive: true });
writeFileSync(join(tempDir, 'app', 'main.php'), '<?php class App {}');
writeFileSync(join(tempDir, 'vendor', 'lib', 'dep.php'), '<?php class Dep {}');

Check warning on line 280 in packages/warden/src/cli/files.test.ts

View check run for this annotation

@sentry/warden / warden: code-review

[STV-NU3] `getEffectivePrunePatterns` drops prune entries on substring match, not whole-segment match (additional location)

A user negation like `!checkout/**`, `!rebuild/**`, or `!somevendor/**` incorrectly removes the `**/out/**`, `**/build/**`, or `**/vendor/**` prune entry because `neg.includes('out/')`, `neg.includes('build/')`, and `neg.includes('vendor/')` match mid-word substrings — potentially re-exposing the crash-inducing traversal the PR is designed to prevent.
const files = await expandFileGlobs(['**/*.php'], {
cwd: tempDir,
ignore: { paths: ['!vendor/**'] },
});

expect(files.some(f => f.includes('app/main.php'))).toBe(true);
expect(files.some(f => f.includes('vendor/lib/dep.php'))).toBe(true);
});
});

describe('gitignore support', () => {
it('excludes files matching .gitignore patterns by default', async () => {
initGitRepo(tempDir);
Expand Down Expand Up @@ -356,4 +451,21 @@
expect(file2).toBeDefined();
expect(file2?.additions).toBe(2);
});

it('passes ignore config through so user negations can re-include pruned dirs', async () => {
mkdirSync(join(tempDir, 'app'), { recursive: true });
mkdirSync(join(tempDir, 'vendor', 'lib'), { recursive: true });
writeFileSync(join(tempDir, 'app', 'main.php'), '<?php class App {}');
writeFileSync(join(tempDir, 'vendor', 'lib', 'dep.php'), '<?php class Dep {}');

// Without negation: vendor is pruned
const withoutOverride = await expandAndCreateFileChanges(['**/*.php'], tempDir);
expect(withoutOverride.some(f => f.filename.includes('vendor/'))).toBe(false);

// With negation: vendor is re-included at traversal time
const withOverride = await expandAndCreateFileChanges(['**/*.php'], tempDir, {
ignore: { paths: ['!vendor/**'] },
});
expect(withOverride.some(f => f.filename.includes('vendor/'))).toBe(true);
});
});
81 changes: 74 additions & 7 deletions packages/warden/src/cli/files.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,64 @@
import { execGitNonInteractive } from '../utils/exec.js';
import { isRepoRelativePath, normalizePath } from '../utils/path.js';

/**
* Directory patterns that are safe to prune at traversal time — before fast-glob
* returns results. These are the same large dependency / generated-output
* directories that BUILTIN_IGNORE_PATTERNS in scan-policy blocks after the fact.
* Pruning them early prevents fast-glob from traversing tens-of-thousands of
* files inside a vendor/ or node_modules/ tree when a broad glob like
* `dieter/**\/*.php` is used against a new Laravel app.
*
* Exported so the gitignore fallback scan can reuse the list consistently.
*/
export const BUILTIN_PRUNE_DIRECTORY_PATTERNS = [
'**/node_modules/**',
'**/vendor/**',
'**/dist/**',
'**/build/**',
'**/.next/**',
'**/.nuxt/**',
'**/out/**',
'**/coverage/**',
'**/.cache/**',
] as const;

/**
* Compute the fast-glob ignore list, starting from BUILTIN_PRUNE_DIRECTORY_PATTERNS
* and removing any directory whose name is explicitly un-ignored by a user
* negation pattern (e.g. `!vendor/**`). This lets advanced users opt a
* dependency directory back in without breaking the default safety behaviour.
*/
export function getEffectivePrunePatterns(userIgnorePaths?: string[]): string[] {
const negations = (userIgnorePaths ?? [])
.filter((p) => p.startsWith('!'))
.map((p) => p.slice(1));

if (!negations.length) {
return [...BUILTIN_PRUNE_DIRECTORY_PATTERNS];
}

return BUILTIN_PRUNE_DIRECTORY_PATTERNS.filter((prunePattern) => {
// Extract the bare directory name from a pattern like '**/vendor/**'
const match = prunePattern.match(/\*\*\/([^/]+)\/\*\*/);
if (!match) return true;
const dirName = match[1];
// Drop this prune entry if any negation path mentions the directory
return !negations.some((neg) => neg.includes(`${dirName}/`) || neg.includes(`/${dirName}`));

Check warning on line 55 in packages/warden/src/cli/files.ts

View check run for this annotation

@sentry/warden / warden: code-review

`getEffectivePrunePatterns` drops prune entries on substring match, not whole-segment match

A user negation like `!checkout/**`, `!rebuild/**`, or `!somevendor/**` incorrectly removes the `**/out/**`, `**/build/**`, or `**/vendor/**` prune entry because `neg.includes('out/')`, `neg.includes('build/')`, and `neg.includes('vendor/')` match mid-word substrings — potentially re-exposing the crash-inducing traversal the PR is designed to prevent.
});
Comment thread
cursor[bot] marked this conversation as resolved.
Outdated
}

export interface ExpandGlobOptions {
/** Working directory for glob expansion (default: process.cwd()) */
cwd?: string;
/** Respect .gitignore files (default: true) */
gitignore?: boolean;
/**
* User-configured ignore rules from warden config. Negation patterns inside
* `paths` (e.g. `!vendor/**`) override the built-in directory prune list so
* that users who intentionally want to scan dependency trees can do so.
*/
ignore?: IgnoreConfig;
}

export interface SyntheticFileChangeOptions {
Expand Down Expand Up @@ -121,13 +174,14 @@
: [];
} catch {
// Not a real git repo or git not available. Walk directories manually,
// skipping common large directories that would never contain relevant
// .gitignore files.
// skipping large directories that would never contain relevant .gitignore
// files. Reuse the same prune list used by expandFileGlobs() so behaviour
// is consistent across both code paths.
gitignoreFiles = fg.sync('**/.gitignore', {
cwd: gitRoot,
absolute: true,
dot: true,
ignore: ['**/.git/**', '**/node_modules/**'],
ignore: ['**/.git/**', ...BUILTIN_PRUNE_DIRECTORY_PATTERNS],
});
}

Expand Down Expand Up @@ -163,6 +217,12 @@
* By default, respects .gitignore files to automatically exclude ignored
* directories like node_modules/. This can be disabled by setting
* gitignore: false.
*
* Large dependency and generated-output directories (vendor/, node_modules/,
* dist/, …) are also pruned at traversal time via BUILTIN_PRUNE_DIRECTORY_PATTERNS
* so that broad globs like `dieter/**\/*.php` against a Laravel app do not
* cause fast-glob to enumerate tens-of-thousands of files before the
* post-enumeration scan policy has a chance to skip them.
*/
export async function expandFileGlobs(
patterns: string[],
Expand All @@ -175,14 +235,19 @@
const useGitignore = options.gitignore ?? true;
const expandedPatterns = patterns.map((pattern) => expandDirectoryPattern(pattern, cwd));

// Get all matching files first
// Compute directory prune list, honouring user negation overrides.
const prunePatterns = getEffectivePrunePatterns(options.ignore?.paths);

// Enumerate matching files. Built-in directory prune patterns are applied at
// this stage so fast-glob never descends into vendor/ or node_modules/ trees,
// preventing excessive memory use when scanning broad globs. Gitignore-based
// filtering and the full BUILTIN_IGNORE_PATTERNS check happen afterward.
const files = await fg(expandedPatterns, {
cwd,
onlyFiles: true,
absolute: true,
dot: false,
// Always exclude .git directory
ignore: ['**/.git/**'],
Comment thread
cursor[bot] marked this conversation as resolved.
ignore: ['**/.git/**', ...prunePatterns],
});

// If gitignore is disabled, return files as-is
Expand Down Expand Up @@ -298,6 +363,8 @@
options: SyntheticFileChangeOptions = {}
): Promise<FileChange[]> {
const resolvedCwd = resolve(cwd);
const files = await expandFileGlobs(patterns, resolvedCwd);
Comment thread
cursor[bot] marked this conversation as resolved.
// Pass the ignore config so that user negation patterns can override built-in
// prune directories at traversal time (e.g. `!vendor/**` re-includes vendor).
const files = await expandFileGlobs(patterns, { cwd: resolvedCwd, ignore: options.ignore });
return createSyntheticFileChanges(files, resolvedCwd, options);
}
Loading