OpenBMB · KennyUMN · May 31, 2026
diff --git a/src/voxcpm/utils/text_normalize.py b/src/voxcpm/utils/text_normalize.py
@@ -112,7 +112,14 @@ def replace_blank(text: str):
     out_str = []
     for i, c in enumerate(text):
         if c == " ":
-            if (text[i + 1].isascii() and text[i + 1] != " ") and (text[i - 1].isascii() and text[i - 1] != " "):
+            # Keep a blank only when it sits between two ASCII non-space
+            # characters. Guard the neighbour lookups so a leading space
+            # (i == 0) does not wrap around to text[-1] and a trailing
+            # space (i == len(text) - 1) does not raise IndexError. This
+            # mirrors the bounds check already used in split_paragraph().
+            prev_ok = i > 0 and text[i - 1].isascii() and text[i - 1] != " "
+            next_ok = i + 1 < len(text) and text[i + 1].isascii() and text[i + 1] != " "
+            if prev_ok and next_ok:
                 out_str.append(c)
         else:
             out_str.append(c)

diff --git a/tests/test_text_normalize.py b/tests/test_text_normalize.py
@@ -0,0 +1,59 @@
+from __future__ import annotations
+
+import importlib.util
+import sys
+import types
+from pathlib import Path
+
+ROOT = Path(__file__).resolve().parents[1]
+TEXT_NORMALIZE_PATH = ROOT / "src" / "voxcpm" / "utils" / "text_normalize.py"
+
+# Stub heavy/third-party imports so the module loads without them. We only
+# exercise ``replace_blank``, which depends on nothing beyond the stdlib.
+for _name in ("regex", "inflect"):
+    sys.modules.setdefault(_name, types.ModuleType(_name))
+
+_wetext_stub = types.ModuleType("wetext")
+_wetext_stub.Normalizer = object
+sys.modules.setdefault("wetext", _wetext_stub)
+
+spec = importlib.util.spec_from_file_location("voxcpm.utils.text_normalize", TEXT_NORMALIZE_PATH)
+text_normalize = importlib.util.module_from_spec(spec)
+assert spec.loader is not None
+spec.loader.exec_module(text_normalize)
+
+replace_blank = text_normalize.replace_blank
+
+
+def test_replace_blank_handles_trailing_space():
+    # A space at the end of the string has no right-hand neighbour. The old
+    # implementation indexed text[i + 1] unconditionally and raised
+    # IndexError. The trailing blank should simply be dropped.
+    assert replace_blank("hello ") == "hello"
+    assert replace_blank("\u4e2d\u6587 ") == "\u4e2d\u6587"
+    assert replace_blank("a b ") == "a b"
+
+
+def test_replace_blank_handles_leading_space():
+    # A space at the start has no left-hand neighbour. The old implementation
+    # let text[i - 1] wrap around to text[-1] (the last character), which
+    # could spuriously keep the leading blank. It should be dropped.
+    assert replace_blank(" ab") == "ab"
+    assert replace_blank(" a") == "a"
+
+
+def test_replace_blank_keeps_space_between_ascii():
+    # The documented behaviour: keep a blank only when it sits between two
+    # ASCII non-space characters.
+    assert replace_blank("a b") == "a b"
+    assert replace_blank("x 1") == "x 1"
+    assert replace_blank("hello world") == "hello world"
+
+
+def test_replace_blank_drops_space_around_cjk():
+    assert replace_blank("\u4e2d \u6587") == "\u4e2d\u6587"
+    assert replace_blank("\u4f60\u597d world ok") == "\u4f60\u597dworld ok"
+
+
+def test_replace_blank_empty_string():
+    assert replace_blank("") == ""