Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,14 @@
@ale-rt
- Fix possible issue with whitespaces lost around comments in XML documents.
@ale-rt
- Refuse to emit XML that the recover-mode parser would silently truncate,
so content after the root element is no longer quietly dropped.
(#230)
@gronke
- Write files atomically in ``--inplace`` mode so a failed run cannot leave a
half-written file behind.
(#230)
@gronke


## 4.0.2 (2026-06-09)
Expand Down
34 changes: 28 additions & 6 deletions zpretty/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,14 @@
from pathlib import Path
from sys import stderr
from sys import stdout
from zpretty.prettifier import ContentLossError
from zpretty.prettifier import ZPrettifier
from zpretty.xml import XMLPrettifier
from zpretty.zcml import ZCMLPrettifier

import os
import re
import tempfile

version = version("zpretty")

Expand Down Expand Up @@ -207,21 +210,40 @@ def good_paths(self):

return sorted(good_paths)

@staticmethod
def _atomic_write(path, content):

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am failing to see the practical need to write the file to a temporary location.

"""Write ``content`` to ``path`` via a temp file and ``os.replace``."""
directory = os.path.dirname(os.path.abspath(path))
fd, tmp = tempfile.mkstemp(dir=directory, prefix=".zpretty-", suffix=".tmp")
try:
with os.fdopen(fd, "w") as f:
f.write(content)
os.replace(tmp, path)
except BaseException:
try:
os.unlink(tmp)
except OSError:
pass
raise

def run(self):
"""Prettify each filename passed in the command line"""
encoding = self.config.encoding
for path in self.good_paths:
# use Pathlib to check if the file exists and it is a file
Prettifier = self.choose_prettifier(path)
prettifier = Prettifier(path, encoding=encoding)
if self.config.check:
if not prettifier.check():
self.errors.append(f"This file would be rewritten: {path}")
try:
if self.config.check:
if not prettifier.check():
self.errors.append(f"This file would be rewritten: {path}")
continue
prettified = prettifier()
except ContentLossError as error:
self.errors.append(f"{path}: {error}")
continue
prettified = prettifier()
if self.config.inplace and not path == "-":
with open(path, "w") as f:
f.write(prettified)
self._atomic_write(path, prettified)
continue
stdout.write(prettified)

Expand Down
40 changes: 40 additions & 0 deletions zpretty/prettifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from bs4.element import ProcessingInstruction
from bs4.element import Tag
from logging import getLogger
from lxml import etree
from uuid import uuid4
from zpretty.constants import ANY_IN
from zpretty.elements import PrettyElement
Expand All @@ -14,6 +15,10 @@
logger = getLogger(__name__)


class ContentLossError(Exception):
"""Raised when prettifying would drop content from the input."""


class ZPrettifier:
"""Wraps and renders some text that may contain xml like stuff"""

Expand Down Expand Up @@ -187,8 +192,43 @@ def check(self):
"""Checks if the input object should be prettified"""
return self.original_text == self()

# lxml flags content dropped after the document root with this error type.
_truncation_error = "ERR_DOCUMENT_END"

def _assert_no_content_loss(self):
"""Refuse input that lxml's recover mode would silently truncate.

XML is parsed in recover mode, which repairs many mistakes without
losing anything: a mismatched or unclosed tag is simply closed for you.
The dangerous exception is content after the root element - lxml keeps
the first root and silently drops the rest. We detect exactly that case
so the CLI can refuse instead of emitting (or overwriting a file with) a
truncated document.
"""
if self._is_html_builder:
return
# Only standalone documents reach this; wrapped fragments and non-XML
# come back as a plain tag rather than a full soup.
if not isinstance(self.soup, BeautifulSoup):
return
# self.text is masked already; drop the declaration (lxml rejects an
# encoding declaration on a str) before re-parsing.
text = re.sub(r"^\s*<\?xml\b[^>]*\?>\s*", "", self.text, count=1)
parser = etree.XMLParser(recover=True)
try:
etree.fromstring(text, parser)
except etree.XMLSyntaxError:
return
for error in parser.error_log:
if error.type_name == self._truncation_error:
raise ContentLossError(
"content after the root element would be silently "
"dropped; refusing to emit a truncated document"
)

def __call__(self):
if not self.root.getchildren():
# The parsed content is not even something that looks like an XML
return self.original_text
self._assert_no_content_loss()
return self.pretty_print(self.root)
7 changes: 7 additions & 0 deletions zpretty/tests/original/truncated_recipe.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<?xml version="1.0" encoding="utf-8"?>
<recipe>
<cake>Sachertorte</cake>
</recipe>
<recipe>
<cake>Gugelhupf</cake>
</recipe>
48 changes: 48 additions & 0 deletions zpretty/tests/test_cli.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from importlib.resources import files
from tempfile import TemporaryDirectory
from unittest import TestCase
from zpretty.cli import CLIRunner
from zpretty.prettifier import ZPrettifier
from zpretty.tests.mock import MockCLIRunner
from zpretty.xml import XMLPrettifier
Expand Down Expand Up @@ -227,3 +228,50 @@ def test_good_paths(self):
clirunner.good_paths,
sorted([xsd, xslt]),
)

def test_truncated_file_is_refused_and_not_overwritten(self):
"""`zpretty -i` on truncatable XML errors out and leaves the file intact."""
from unittest import mock

original = (
'<?xml version="1.0" encoding="utf-8"?>\n'
"<recipe>\n <cake>Sachertorte</cake>\n</recipe>\n"
"<recipe>\n <cake>Gugelhupf</cake>\n</recipe>\n"
)
with TemporaryDirectory() as tmpdir:
path = os.path.join(tmpdir, "recipes.xml")
with open(path, "w") as f:
f.write(original)
clirunner = MockCLIRunner("-x", "-i", path)
with mock.patch("builtins.exit", return_value=None) as mocked:
clirunner.run()
mocked.assert_called_once_with(1)
with open(path) as f:
self.assertEqual(f.read(), original)
self.assertListEqual(os.listdir(tmpdir), ["recipes.xml"])

def test_atomic_write_replaces_content(self):
"""--inplace writes are atomic and leave no temporary file behind."""
with TemporaryDirectory() as tmpdir:
path = os.path.join(tmpdir, "f.xml")
with open(path, "w") as f:
f.write("OLD")
CLIRunner._atomic_write(path, "NEW")
with open(path) as f:
self.assertEqual(f.read(), "NEW")
self.assertListEqual(os.listdir(tmpdir), ["f.xml"])

def test_atomic_write_preserves_original_on_failure(self):
"""A failed --inplace write must leave the original file untouched."""
from unittest import mock

with TemporaryDirectory() as tmpdir:
path = os.path.join(tmpdir, "f.xml")
with open(path, "w") as f:
f.write("ORIGINAL")
with mock.patch("os.replace", side_effect=OSError("boom")):
with self.assertRaises(OSError):
CLIRunner._atomic_write(path, "NEW")
with open(path) as f:
self.assertEqual(f.read(), "ORIGINAL")
self.assertListEqual(os.listdir(tmpdir), ["f.xml"])
33 changes: 33 additions & 0 deletions zpretty/tests/test_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from importlib.resources import files
from textwrap import dedent
from unittest import TestCase
from zpretty.prettifier import ContentLossError
from zpretty.xml import XMLElement
from zpretty.xml import XMLPrettifier

Expand Down Expand Up @@ -103,3 +104,35 @@ def test_sample_dtml(self):

def test_sample_txt(self):
self.prettify("sample.txt")

def test_recoverable_xml_is_repaired_not_refused(self):
"""A mismatched closing tag is repaired, not rejected.

lxml recover mode rewrites ``</frosting>`` to match ``<filling>``
without losing the text, and that auto-fix must keep working.
"""
text = "<cake><filling>ganache</frosting></cake>"
observed = XMLPrettifier(text=text)()
self.assertIn("ganache", observed)
self.assertIn("<filling>ganache</filling>", observed)
self.assertNotIn("</frosting>", observed)

def test_truncated_xml_is_refused(self):
"""Content after the root element is silently dropped, so we refuse."""
text = dedent("""\
<?xml version="1.0" encoding="utf-8"?>
<recipe>
<cake>Sachertorte</cake>
</recipe>
<recipe>
<cake>Gugelhupf</cake>
</recipe>
""")
with self.assertRaises(ContentLossError):
XMLPrettifier(text=text)()

def test_truncated_file_is_refused(self):
"""A file that would be truncated raises rather than losing a recipe."""
path = self.sample_folder_path / "truncated_recipe.xml"
with self.assertRaises(ContentLossError):
XMLPrettifier(path)()