From 00703066d992812d33f1a8d079633712f8208505 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Wed, 10 Jun 2026 08:09:27 +0200 Subject: [PATCH] du: do not deduplicate --files0-from / operand names by name GNU du deduplicates only by inode during traversal (disabled by --count-links); it never collapses repeated operand names. uutils du removed duplicate names from both read_files_from() and the plain operand list, which produced two GNU-visible divergences in tests/du/files0-from.pl: - 'du -l --files0-from' with a file listed twice printed it once instead of twice (-l must disable dedup). - repeated missing files reported a single error instead of one per occurrence (missing files have no inode to dedup on). Drop the name-based dedup in both places and rely on the existing inode-based dedup in the traversal loop, matching GNU. This also fixes the plain-operand case 'du missing missing'. Fixes tests/du/files0-from.pl. --- src/uu/du/src/du.rs | 23 +++++++++-------------- tests/by-util/test_du.rs | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 14 deletions(-) diff --git a/src/uu/du/src/du.rs b/src/uu/du/src/du.rs index fa91da7ac24..adc3f342a39 100644 --- a/src/uu/du/src/du.rs +++ b/src/uu/du/src/du.rs @@ -938,10 +938,11 @@ fn read_files_from(file_name: &OsStr) -> Result, std::io::Error> { show_error!("{}", translate!("du-error-hyphen-file-name-not-allowed")); set_exit_code(1); } else { - let p = PathBuf::from(&*uucore::os_str_from_bytes(&path).unwrap()); - if !paths.contains(&p) { - paths.push(p); - } + // Do not deduplicate here: GNU du processes every entry and + // relies on inode-based deduplication during traversal (which is + // disabled by --count-links). Deduplicating by name would, e.g., + // collapse repeated missing files into a single error. + paths.push(PathBuf::from(&*uucore::os_str_from_bytes(&path).unwrap())); } } @@ -1036,16 +1037,10 @@ pub fn uumain(args: impl uucore::Args) -> UResult<()> { read_files_from(file_from)? } else if let Some(files) = matches.get_many::(options::FILE) { - let files = files.map(PathBuf::from); - if count_links { - files.collect() - } else { - // Deduplicate while preserving order - let mut seen = HashSet::default(); - files - .filter(|path| seen.insert(path.clone())) - .collect::>() - } + // Do not deduplicate by name: GNU du processes every operand and relies + // on inode-based deduplication during traversal (disabled by + // --count-links), so repeated missing files are reported once each. + files.map(PathBuf::from).collect() } else { vec![PathBuf::from(".")] }; diff --git a/tests/by-util/test_du.rs b/tests/by-util/test_du.rs index 7dfda01fd4a..6147b5a8fc3 100644 --- a/tests/by-util/test_du.rs +++ b/tests/by-util/test_du.rs @@ -1619,6 +1619,38 @@ fn test_du_files0_from_ignore_duplicate_file_names() { .stdout_is(format!("0\t{file}\n")); } +#[test] +fn test_du_files0_from_count_links_lists_duplicate_file_names() { + // With --count-links (-l), inode-based deduplication is disabled, so a + // file listed twice must be reported twice (matches GNU du). + let ts = TestScenario::new(util_name!()); + let at = &ts.fixtures; + let file = "testfile"; + + at.touch(file); + at.write("filelist", &format!("{file}\0{file}\0")); + + ts.ucmd() + .arg("-l") + .arg("--files0-from=filelist") + .succeeds() + .stdout_is(format!("0\t{file}\n0\t{file}\n")); +} + +#[test] +fn test_du_files0_from_repeated_missing_file_reported_each_time() { + // Missing files have no inode to deduplicate on, so each occurrence in the + // input must produce its own error (matches GNU du). + new_ucmd!() + .arg("--files0-from=-") + .pipe_in("missing\0missing\0") + .fails_with_code(1) + .stderr_is( + "du: cannot access 'missing': No such file or directory\n\ + du: cannot access 'missing': No such file or directory\n", + ); +} + #[test] fn test_du_files0_from_with_invalid_zero_length_file_names() { let ts = TestScenario::new(util_name!());