From 4e8fb872a06231f8d61e1b6dd70d5f9a14396399 Mon Sep 17 00:00:00 2001 From: Yujiang Wang Date: Tue, 10 Mar 2026 21:36:38 +0800 Subject: [PATCH 1/2] Ignore files probing optimization: fetch directory entry metadata in batches * On Windows probing the existence of each ignore file is very heavy. Windows prefers fetching metadata in batches (FindFirstFileExW/FindNextFileW, which translates to std::sys::fs::read_dir) * Now, in ignore::Work::read_dir, we read the entire directory at once, then use the result to determine the existense of ignore files. The result is saved and visited later --- crates/ignore/src/dir.rs | 121 ++++++++++++++++++++++++++++++++------ crates/ignore/src/walk.rs | 64 ++++++++++++++------ 2 files changed, 149 insertions(+), 36 deletions(-) diff --git a/crates/ignore/src/dir.rs b/crates/ignore/src/dir.rs index 5939f32e84..9d4b3d30ba 100644 --- a/crates/ignore/src/dir.rs +++ b/crates/ignore/src/dir.rs @@ -16,7 +16,7 @@ use std::{ collections::HashMap, ffi::{OsStr, OsString}, - fs::{File, FileType}, + fs::{self, File, FileType}, io::{self, BufRead}, path::{Path, PathBuf}, sync::{Arc, RwLock, Weak}, @@ -150,6 +150,14 @@ struct IgnoreInner { opts: IgnoreOptions, } +struct IgnoreFilesFound { + has_ignore: bool, + has_git_ignore: bool, + has_git_dir: bool, + has_jj_dir: bool, + custom_ignore_files: Vec, +} + impl Ignore { /// Return the directory path of this matcher. pub(crate) fn path(&self) -> &Path { @@ -254,34 +262,103 @@ impl Ignore { (Ignore(Arc::new(ig)), err) } + /// Like add_child, but uses successful read_dir entries to reduce + /// probing when discovering ignore files. + pub(crate) fn add_child_with_entries>( + &self, + dir: P, + entries: &[fs::DirEntry], + ) -> (Ignore, Option) { + let files = Self::collect_ignore_files( + entries, + &self.0.custom_ignore_filenames, + ); + let (ig, err) = + self.add_child_path_with_found_ignore_files(dir.as_ref(), Some(&files)); + (Ignore(Arc::new(ig)), err) + } + /// Like add_child, but takes a full path and returns an IgnoreInner. fn add_child_path(&self, dir: &Path) -> (IgnoreInner, Option) { + self.add_child_path_with_found_ignore_files(dir, None) + } + + fn collect_ignore_files( + entries: &[fs::DirEntry], + custom_ignore_filenames: &[OsString], + ) -> IgnoreFilesFound { + let mut files = IgnoreFilesFound { + has_ignore: false, + has_git_ignore: false, + has_git_dir: false, + has_jj_dir: false, + custom_ignore_files: vec![false; custom_ignore_filenames.len()], + }; + for entry in entries { + let file_name = entry.file_name(); + if file_name == OsStr::new(".ignore") { + files.has_ignore = true; + } else if file_name == OsStr::new(".gitignore") { + files.has_git_ignore = true; + } else if file_name == OsStr::new(".git") { + files.has_git_dir = true; + } else if file_name == OsStr::new(".jj") { + files.has_jj_dir = true; + } + for (i, name) in custom_ignore_filenames.iter().enumerate() { + if file_name == name.as_os_str() { + files.custom_ignore_files[i] = true; + } + } + } + files + } + + fn add_child_path_with_found_ignore_files( + &self, + dir: &Path, + ignore_files_list: Option<&IgnoreFilesFound>, + ) -> (IgnoreInner, Option) { let check_vcs_dir = self.0.opts.require_git && (self.0.opts.git_ignore || self.0.opts.git_exclude); - let git_type = if check_vcs_dir { - dir.join(".git").metadata().ok().map(|md| md.file_type()) - } else { - None - }; - let has_git = - check_vcs_dir && (git_type.is_some() || dir.join(".jj").exists()); + let git_type = + if check_vcs_dir && ignore_files_list.is_none_or(|i| i.has_git_dir) { + dir.join(".git").metadata().ok().map(|md| md.file_type()) + } else { + None + }; + let has_jj = check_vcs_dir + && ignore_files_list.is_none_or(|i| i.has_jj_dir) + && dir.join(".jj").exists(); + let has_git = check_vcs_dir && (git_type.is_some() || has_jj); let mut errs = PartialErrorBuilder::default(); let custom_ig_matcher = if self.0.custom_ignore_filenames.is_empty() { Gitignore::empty() } else { - let (m, err) = create_gitignore( - &dir, - &dir, - &self.0.custom_ignore_filenames, - self.0.opts.ignore_case_insensitive, - ); - errs.maybe_push(err); - m + let custom_ignore_names: Vec<&OsString> = match ignore_files_list { + None => self.0.custom_ignore_filenames.iter().collect(), + Some(m) => self.0.custom_ignore_filenames.iter() + .zip(m.custom_ignore_files.iter()) + .filter_map(|(name, matched)| (*matched).then_some(name)) + .collect(), + }; + if custom_ignore_names.is_empty() { + Gitignore::empty() + } else { + let (m, err) = create_gitignore( + &dir, + &dir, + &custom_ignore_names, + self.0.opts.ignore_case_insensitive, + ); + errs.maybe_push(err); + m + } }; let ig_matcher = if !self.0.opts.ignore { Gitignore::empty() - } else { + } else if ignore_files_list.is_none_or(|i| i.has_ignore) { let (m, err) = create_gitignore( &dir, &dir, @@ -290,10 +367,12 @@ impl Ignore { ); errs.maybe_push(err); m + } else { + Gitignore::empty() }; let gi_matcher = if !self.0.opts.git_ignore { Gitignore::empty() - } else { + } else if ignore_files_list.is_none_or(|i| i.has_git_ignore) { let (m, err) = create_gitignore( &dir, &dir, @@ -302,11 +381,13 @@ impl Ignore { ); errs.maybe_push(err); m + } else { + Gitignore::empty() }; let gi_exclude_matcher = if !self.0.opts.git_exclude { Gitignore::empty() - } else { + } else if ignore_files_list.is_none_or(|i| i.has_git_dir) { match resolve_git_commondir(dir, git_type) { Ok(git_dir) => { let (m, err) = create_gitignore( @@ -323,6 +404,8 @@ impl Ignore { Gitignore::empty() } } + } else { + Gitignore::empty() }; let ig = IgnoreInner { compiled: self.0.compiled.clone(), diff --git a/crates/ignore/src/walk.rs b/crates/ignore/src/walk.rs index f697c50920..0301de9fa5 100644 --- a/crates/ignore/src/walk.rs +++ b/crates/ignore/src/walk.rs @@ -1463,6 +1463,12 @@ struct Work { root_device: Option, } +#[derive(Default)] +struct ReadDirResult { + entries: Vec, + errors: Vec, +} + impl Work { /// Returns true if and only if this work item is a directory. fn is_dir(&self) -> bool { @@ -1489,6 +1495,13 @@ impl Work { err } + /// Adds ignore rules for this directory without reading its contents. + fn add_ignore(&mut self) { + let (ig, err) = self.ignore.add_child(self.dent.path()); + self.ignore = ig; + self.dent.err = err; + } + /// Reads the directory contents of this work item and adds ignore /// rules for this directory. /// @@ -1496,7 +1509,7 @@ impl Work { /// an error is returned. If there was a problem reading the ignore /// rules for this directory, then the error is attached to this /// work item's directory entry. - fn read_dir(&mut self) -> Result { + fn read_dir(&mut self) -> Result { let readdir = match fs::read_dir(self.dent.path()) { Ok(readdir) => readdir, Err(err) => { @@ -1506,10 +1519,22 @@ impl Work { return Err(err); } }; - let (ig, err) = self.ignore.add_child(self.dent.path()); + // Actually descend into the directory and read its contents + let mut result = ReadDirResult::default(); + for entry in readdir { + match entry { + Ok(entry) => result.entries.push(entry), + Err(err) => result.errors.push( + Error::from(err) + .with_path(self.dent.path()) + .with_depth(self.dent.depth() + 1), + ), + } + } + let (ig, err) = self.ignore.add_child_with_entries(self.dent.path(), &result.entries); self.ignore = ig; self.dent.err = err; - Ok(readdir) + Ok(result) } } @@ -1679,8 +1704,14 @@ impl<'s> Worker<'s> { // have sufficient read permissions to list the directory. // In that case we still want to provide the closure with a valid // entry before passing the error value. - let readdir = work.read_dir(); let depth = work.dent.depth(); + let readdir = if descend + && self.max_depth.is_none_or(|m| work.dent.depth() < m) { + Some(work.read_dir()) + } else { + work.add_ignore(); + None + }; if should_visit { let state = self.visitor.visit(Ok(work.dent)); if !state.is_continue() { @@ -1691,6 +1722,10 @@ impl<'s> Worker<'s> { return WalkState::Skip; } + let readdir = match readdir { + Some(readdir) => readdir, + None => return WalkState::Skip, + }; let readdir = match readdir { Ok(readdir) => readdir, Err(err) => { @@ -1698,10 +1733,7 @@ impl<'s> Worker<'s> { } }; - if self.max_depth.map_or(false, |max| depth >= max) { - return WalkState::Skip; - } - for result in readdir { + for result in readdir.entries { let state = self.generate_work( &work.ignore, depth + 1, @@ -1712,6 +1744,12 @@ impl<'s> Worker<'s> { return state; } } + for err in readdir.errors { + let state = self.visitor.visit(Err(err)); + if state.is_quit() { + return state; + } + } WalkState::Continue } @@ -1733,16 +1771,8 @@ impl<'s> Worker<'s> { ig: &Ignore, depth: usize, root_device: Option, - result: Result, + fs_dent: fs::DirEntry, ) -> WalkState { - let fs_dent = match result { - Ok(fs_dent) => fs_dent, - Err(err) => { - return self - .visitor - .visit(Err(Error::from(err).with_depth(depth))); - } - }; let mut dent = match DirEntryRaw::from_entry(depth, &fs_dent) { Ok(dent) => DirEntry::new_raw(dent, None), Err(err) => { From 385ba0fe36faf50227f411d34b2413cb7bd2b76b Mon Sep 17 00:00:00 2001 From: Yujiang Wang Date: Thu, 21 May 2026 13:32:05 +0800 Subject: [PATCH 2/2] Fix fmt --- crates/ignore/src/dir.rs | 24 +++++++++++++++--------- crates/ignore/src/walk.rs | 7 +++++-- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/crates/ignore/src/dir.rs b/crates/ignore/src/dir.rs index 9d4b3d30ba..3b7adfa5b1 100644 --- a/crates/ignore/src/dir.rs +++ b/crates/ignore/src/dir.rs @@ -273,8 +273,10 @@ impl Ignore { entries, &self.0.custom_ignore_filenames, ); - let (ig, err) = - self.add_child_path_with_found_ignore_files(dir.as_ref(), Some(&files)); + let (ig, err) = self.add_child_path_with_found_ignore_files( + dir.as_ref(), + Some(&files), + ); (Ignore(Arc::new(ig)), err) } @@ -321,12 +323,13 @@ impl Ignore { ) -> (IgnoreInner, Option) { let check_vcs_dir = self.0.opts.require_git && (self.0.opts.git_ignore || self.0.opts.git_exclude); - let git_type = - if check_vcs_dir && ignore_files_list.is_none_or(|i| i.has_git_dir) { - dir.join(".git").metadata().ok().map(|md| md.file_type()) - } else { - None - }; + let git_type = if check_vcs_dir + && ignore_files_list.is_none_or(|i| i.has_git_dir) + { + dir.join(".git").metadata().ok().map(|md| md.file_type()) + } else { + None + }; let has_jj = check_vcs_dir && ignore_files_list.is_none_or(|i| i.has_jj_dir) && dir.join(".jj").exists(); @@ -338,7 +341,10 @@ impl Ignore { } else { let custom_ignore_names: Vec<&OsString> = match ignore_files_list { None => self.0.custom_ignore_filenames.iter().collect(), - Some(m) => self.0.custom_ignore_filenames.iter() + Some(m) => self + .0 + .custom_ignore_filenames + .iter() .zip(m.custom_ignore_files.iter()) .filter_map(|(name, matched)| (*matched).then_some(name)) .collect(), diff --git a/crates/ignore/src/walk.rs b/crates/ignore/src/walk.rs index 0301de9fa5..a3e6348459 100644 --- a/crates/ignore/src/walk.rs +++ b/crates/ignore/src/walk.rs @@ -1531,7 +1531,9 @@ impl Work { ), } } - let (ig, err) = self.ignore.add_child_with_entries(self.dent.path(), &result.entries); + let (ig, err) = self + .ignore + .add_child_with_entries(self.dent.path(), &result.entries); self.ignore = ig; self.dent.err = err; Ok(result) @@ -1706,7 +1708,8 @@ impl<'s> Worker<'s> { // entry before passing the error value. let depth = work.dent.depth(); let readdir = if descend - && self.max_depth.is_none_or(|m| work.dent.depth() < m) { + && self.max_depth.is_none_or(|m| work.dent.depth() < m) + { Some(work.read_dir()) } else { work.add_ignore();