diff --git a/src/uu/ptx/src/ptx.rs b/src/uu/ptx/src/ptx.rs index 28d5e880a4c..8b57cd7e59a 100644 --- a/src/uu/ptx/src/ptx.rs +++ b/src/uu/ptx/src/ptx.rs @@ -522,7 +522,7 @@ fn get_output_chunks( // max size of the tail chunk = max size of left half - space taken by before chunk - gap size. let max_tail_size = cmp::max( - max_before_size as isize - before.len() as isize - config.gap_size as isize, + max_before_size as isize - before.chars().count() as isize - config.gap_size as isize, 0, ) as usize; diff --git a/tests/by-util/test_ptx.rs b/tests/by-util/test_ptx.rs index 4752d924e0c..2cfa1def906 100644 --- a/tests/by-util/test_ptx.rs +++ b/tests/by-util/test_ptx.rs @@ -365,6 +365,33 @@ fn test_unicode_in_after_chunk_does_not_panic() { .stdout_contains("We've got +11"); } +#[test] +fn test_unicode_in_before_chunk_does_not_panic() { + // Regression test for issue #10893: a panic in get_output_chunks() when the + // computed max_before_size used char counts but the assert compared against + // before.len() (byte length). A multibyte char in the before chunk could + // trigger: `assertion failed: max_before_size >= before.len()`. + new_ucmd!() + .args(&["-w", "10"]) + .pipe_in("éé word\n") + .succeeds() + .no_stderr(); +} + +#[test] +fn test_unicode_tail_chunk_sizing() { + // The tail chunk budget (max_tail_size) subtracts the size of the before + // chunk. It must use the before chunk's char count, not its byte length, + // otherwise a multibyte before chunk shrinks the tail too much and drops a + // word that fits. Here "cc" wraps into the tail before "aé bé KEY"; with the + // byte-based budget it was dropped. + new_ucmd!() + .args(&["-w", "20"]) + .pipe_in("aé bé KEY cc dd ee ff gg\n") + .succeeds() + .stdout_contains("cc/ aé"); +} + #[test] fn test_duplicate_input_files() { new_ucmd!()