From 17ead1272c66a606de16233e5af456abbf2bfaf2 Mon Sep 17 00:00:00 2001
From: Sylvestre Ledru <sylvestre@debian.org>
Date: Sun, 7 Jun 2026 11:37:20 +0200
Subject: [PATCH] ptx: use char counts for before-chunk sizing in
 get_output_chunks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The max_before_size assert compared against before.len() (byte length)
while max_before_size is measured in chars, panicking on multibyte input
like 'éé word'. The tail-chunk budget (max_tail_size) had the same
byte/char mismatch, shrinking the tail too much and dropping a word that
fits. Use char counts in both places, matching the after chunk.

Fixes #10893
---
 src/uu/ptx/src/ptx.rs     |  2 +-
 tests/by-util/test_ptx.rs | 27 +++++++++++++++++++++++++++
 2 files changed, 28 insertions(+), 1 deletion(-)

diff --git a/src/uu/ptx/src/ptx.rs b/src/uu/ptx/src/ptx.rs
index 28d5e880a4c..8b57cd7e59a 100644
--- a/src/uu/ptx/src/ptx.rs
+++ b/src/uu/ptx/src/ptx.rs
@@ -522,7 +522,7 @@ fn get_output_chunks(
 
     // max size of the tail chunk = max size of left half - space taken by before chunk - gap size.
     let max_tail_size = cmp::max(
-        max_before_size as isize - before.len() as isize - config.gap_size as isize,
+        max_before_size as isize - before.chars().count() as isize - config.gap_size as isize,
         0,
     ) as usize;
 
diff --git a/tests/by-util/test_ptx.rs b/tests/by-util/test_ptx.rs
index 4752d924e0c..2cfa1def906 100644
--- a/tests/by-util/test_ptx.rs
+++ b/tests/by-util/test_ptx.rs
@@ -365,6 +365,33 @@ fn test_unicode_in_after_chunk_does_not_panic() {
         .stdout_contains("We've got +11");
 }
 
+#[test]
+fn test_unicode_in_before_chunk_does_not_panic() {
+    // Regression test for issue #10893: a panic in get_output_chunks() when the
+    // computed max_before_size used char counts but the assert compared against
+    // before.len() (byte length). A multibyte char in the before chunk could
+    // trigger: `assertion failed: max_before_size >= before.len()`.
+    new_ucmd!()
+        .args(&["-w", "10"])
+        .pipe_in("éé word\n")
+        .succeeds()
+        .no_stderr();
+}
+
+#[test]
+fn test_unicode_tail_chunk_sizing() {
+    // The tail chunk budget (max_tail_size) subtracts the size of the before
+    // chunk. It must use the before chunk's char count, not its byte length,
+    // otherwise a multibyte before chunk shrinks the tail too much and drops a
+    // word that fits. Here "cc" wraps into the tail before "aé bé KEY"; with the
+    // byte-based budget it was dropped.
+    new_ucmd!()
+        .args(&["-w", "20"])
+        .pipe_in("aé bé KEY cc dd ee ff gg\n")
+        .succeeds()
+        .stdout_contains("cc/ aé");
+}
+
 #[test]
 fn test_duplicate_input_files() {
     new_ucmd!()