diff --git a/README.md b/README.md index e72c8cb..5b31979 100644 --- a/README.md +++ b/README.md @@ -63,6 +63,26 @@ decode("id: 123", {"indent": 2, "strict": True}) - `indent`: Expected indent size (default: `2`) - `strict`: Validate syntax, lengths, delimiters (default: `True`) +### Working with JSON strings + +When data arrives as raw JSON text (LLM tool outputs, REST APIs, logs), skip the +manual `json.loads` step. JSON `null` is handled as `None` automatically. + +```python +from toon_format import encode_json, loads + +# JSON string straight to TOON +encode_json('{"name": "Alice", "mood": null}') +# name: Alice +# mood: null + +# Parse JSON into TOON-ready Python objects (null -> None) +loads('{"b": [1, null, 3]}') +# {'b': [1, None, 3]} +``` + +`encode_json(json_string, options=None)` is equivalent to `encode(loads(json_string), options)`. + ## Format Specification | Type | Example Input | TOON Output | @@ -125,7 +145,7 @@ git clone https://github.com/toon-format/toon-python.git cd toon-python uv sync -# Run tests (805 tests, 93% coverage, 85% enforced) +# Run tests (818 tests, 93% coverage, 85% enforced) uv run pytest --cov=toon_format --cov-report=term # Code quality diff --git a/docs/README.md b/docs/README.md index e2f16a3..ca654f6 100644 --- a/docs/README.md +++ b/docs/README.md @@ -24,6 +24,8 @@ New to TOON? Start here: Complete reference for all public functions and classes: - `encode()` - Convert Python to TOON - `decode()` - Convert TOON to Python +- `encode_json()` - Convert a JSON string directly to TOON +- `loads()` - Parse a JSON string into TOON-ready Python objects - `EncodeOptions` - Encoding configuration - `DecodeOptions` - Decoding configuration - `ToonDecodeError` - Error handling diff --git a/docs/api.md b/docs/api.md index c621e8d..5318982 100644 --- a/docs/api.md +++ b/docs/api.md @@ -76,6 +76,66 @@ decode(" item: value", {"indent": 4, "strict": False}) --- +## JSON Interop + +Helpers for working directly with JSON strings, so raw JSON text (LLM tool +outputs, REST API responses, logs) can go to TOON without a manual `json.loads` +step. JSON `null` is mapped to Python `None`, which `encode()` renders as the +TOON `null` literal. + +### `encode_json(json_string, options=None)` + +Encode a JSON string directly into TOON. Equivalent to +`encode(loads(json_string), options)`. + +**Parameters:** +- `json_string` (str): The JSON text to convert +- `options` (EncodeOptions | dict, optional): Encoding options (see [`EncodeOptions`](#encodeoptions)) + +**Returns:** `str` - The TOON-formatted string + +**Raises:** +- `json.JSONDecodeError`: If `json_string` is not valid JSON + +**Example:** + +```python +from toon_format import encode_json + +print(encode_json('{"name": "Alice", "mood": null, "tags": [1, null, 3]}')) +# name: Alice +# mood: null +# tags[3]: 1,null,3 +``` + +### `loads(json_string, **kwargs)` + +Parse a JSON string into TOON-ready Python objects. A thin wrapper around +`json.loads()`; JSON `null` becomes `None`. Extra keyword arguments are +forwarded to `json.loads`. + +**Parameters:** +- `json_string` (str): The JSON text to parse +- `**kwargs`: Forwarded to `json.loads` (e.g. `parse_float`) + +**Returns:** The parsed Python value + +**Raises:** +- `json.JSONDecodeError`: If `json_string` is not valid JSON + +**Example:** + +```python +from toon_format import loads, encode + +data = loads('{"b": [1, null, 3]}') +# {'b': [1, None, 3]} +print(encode(data)) +# b[3]: 1,null,3 +``` + +--- + ## Options Classes ### `EncodeOptions` diff --git a/src/toon_format/__init__.py b/src/toon_format/__init__.py index 29400d9..88236e2 100644 --- a/src/toon_format/__init__.py +++ b/src/toon_format/__init__.py @@ -23,11 +23,14 @@ from .decoder import ToonDecodeError, decode from .encoder import encode from .types import DecodeOptions, Delimiter, DelimiterKey, EncodeOptions +from .utils import encode_json, loads __version__ = "0.9.0-beta.1" __all__ = [ "encode", "decode", + "encode_json", + "loads", "ToonDecodeError", "Delimiter", "DelimiterKey", diff --git a/src/toon_format/utils.py b/src/toon_format/utils.py new file mode 100644 index 0000000..185bbee --- /dev/null +++ b/src/toon_format/utils.py @@ -0,0 +1,76 @@ +# Copyright (c) 2025 TOON Format Organization +# SPDX-License-Identifier: MIT +"""Convenience helpers built on top of the core encode/decode API. + +A shared home for small, public utilities that complement `encode()` and +`decode()` without belonging to the core pipeline. + +Currently this provides JSON-string interop. Data often arrives as raw JSON text +-- LLM tool outputs, REST API responses, log files -- where the JSON `null` +keyword has no direct TOON equivalent and must become Python `None` before +encoding. `encode()` already renders `None` as the TOON `null` literal, and the +standard library's `json.loads` already maps `null` to `None`, so these helpers +simply remove the boilerplate of wiring the two together for the common +JSON-string -> TOON path. +""" + +import json +from typing import Any, Optional + +from .encoder import encode +from .types import EncodeOptions, JsonValue + +__all__ = ["loads", "encode_json"] + + +def loads(json_string: str, **kwargs: Any) -> JsonValue: + """Parse a JSON string into TOON-ready Python objects. + + A thin wrapper around :func:`json.loads`. JSON `null` becomes Python `None`, + `true`/`false` become `bool`, objects become `dict`, and arrays become + `list` -- exactly the types that :func:`~toon_format.encode` expects. + + Args: + json_string: The JSON text to parse. + **kwargs: Additional keyword arguments forwarded to ``json.loads`` + (e.g. ``parse_float``). + + Returns: + The parsed Python value. + + Raises: + json.JSONDecodeError: If ``json_string`` is not valid JSON. + + Example: + >>> from toon_format import loads + >>> loads('{"a": null, "b": [1, null, 3]}') + {'a': None, 'b': [1, None, 3]} + """ + return json.loads(json_string, **kwargs) + + +def encode_json(json_string: str, options: Optional[EncodeOptions] = None) -> str: + """Encode a JSON string directly into TOON format. + + Equivalent to ``encode(loads(json_string), options)``. Use this when data + arrives as raw JSON text and you want TOON out in a single call -- JSON + `null` is handled as `None` automatically, with no manual preprocessing. + + Args: + json_string: The JSON text to convert. + options: Optional encoding options (see + :class:`~toon_format.EncodeOptions`). + + Returns: + The TOON-formatted string. + + Raises: + json.JSONDecodeError: If ``json_string`` is not valid JSON. + + Example: + >>> from toon_format import encode_json + >>> print(encode_json('{"name": "Alice", "mood": null}')) + name: Alice + mood: null + """ + return encode(loads(json_string), options) diff --git a/tests/test_json_integration.py b/tests/test_json_integration.py new file mode 100644 index 0000000..8e1cb64 --- /dev/null +++ b/tests/test_json_integration.py @@ -0,0 +1,85 @@ +"""Tests for JSON interop helpers (loads, encode_json). + +These cover the common integration case of taking raw JSON text -- with `null` +values from LLM outputs, REST APIs, or logs -- straight to TOON without manual +`null` -> `None` preprocessing. +""" + +import json + +import pytest + +from toon_format import encode, encode_json, loads + + +class TestLoads: + """Test the loads() JSON parsing wrapper.""" + + def test_loads_matches_json_loads(self): + """loads() should behave like json.loads for valid input.""" + text = '{"a": 1, "b": [2, 3], "c": "x"}' + assert loads(text) == json.loads(text) + + def test_loads_converts_null_to_none(self): + """JSON null should become Python None.""" + assert loads("null") is None + assert loads('{"a": null}') == {"a": None} + + def test_loads_converts_nested_nulls(self): + """Nulls in nested objects and arrays should all become None.""" + result = loads('{"a": null, "b": [1, null, 3], "c": {"d": null}}') + assert result == {"a": None, "b": [1, None, 3], "c": {"d": None}} + + def test_loads_preserves_primitive_types(self): + """booleans, ints, floats, and strings should round-trip as-is.""" + assert loads('{"t": true, "f": false, "i": 1, "x": 1.5, "s": "hi"}') == { + "t": True, + "f": False, + "i": 1, + "x": 1.5, + "s": "hi", + } + + def test_loads_forwards_kwargs(self): + """Extra keyword arguments should reach json.loads.""" + result = loads('{"x": 1.5}', parse_float=str) + assert result == {"x": "1.5"} + + def test_loads_raises_on_invalid_json(self): + """Invalid JSON should raise json.JSONDecodeError.""" + with pytest.raises(json.JSONDecodeError): + loads("{not valid}") + + +class TestEncodeJson: + """Test the encode_json() one-step JSON -> TOON helper.""" + + def test_encode_json_simple_object(self): + """A JSON object string should encode to TOON.""" + assert encode_json('{"name": "Alice", "age": 30}') == "name: Alice\nage: 30" + + def test_encode_json_null_in_object(self): + """JSON null in an object should render as the TOON null literal.""" + assert encode_json('{"name": "Alice", "mood": null}') == "name: Alice\nmood: null" + + def test_encode_json_null_in_array(self): + """JSON null inside an array should render as null, not the string 'null'.""" + assert encode_json('{"b": [1, null, 3]}') == "b[3]: 1,null,3" + + def test_encode_json_top_level_null(self): + """A bare JSON null should encode to the null literal.""" + assert encode_json("null") == "null" + + def test_encode_json_equivalent_to_manual_pipeline(self): + """encode_json should match encode(json.loads(...)).""" + text = '{"users": [{"id": 1, "name": "Alice", "note": null}]}' + assert encode_json(text) == encode(json.loads(text)) + + def test_encode_json_forwards_options(self): + """Encoding options should be forwarded to encode().""" + assert encode_json("[1, 2, 3]", {"delimiter": "\t"}) == "[3\t]: 1\t2\t3" + + def test_encode_json_raises_on_invalid_json(self): + """Invalid JSON should raise json.JSONDecodeError before encoding.""" + with pytest.raises(json.JSONDecodeError): + encode_json("{not valid}")