Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 59 additions & 29 deletions src/toon_format/decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
and validates array lengths and delimiters.
"""

import json
from typing import Any, Dict, List, Optional, Tuple

from ._literal_utils import is_boolean_or_null_literal, is_numeric_literal
Expand Down Expand Up @@ -228,18 +229,38 @@ def split_key_value(line: str) -> Tuple[str, str]:
return (key, value)


def decode(input_str: str, options: Optional[DecodeOptions] = None) -> JsonValue:
def decode(input_str: str, options: Optional[DecodeOptions] = None) -> Any:
"""Decode a TOON-formatted string to a Python value.

This function parses TOON format and returns the decoded data. By default,
it returns a Python object (dict, list, str, int, float, bool, or None).

The DecodeOptions.json_indent parameter is a Python-specific feature that
enables returning a JSON-formatted string instead of a Python object.
This is useful for applications that need pretty-printed JSON output.

Args:
input_str: TOON-formatted string
options: Optional decoding options
input_str: TOON-formatted string to decode
options: Optional DecodeOptions with indent, strict, and json_indent
settings. If not provided, defaults are used (indent=2,
strict=True, json_indent=None).

Returns:
Decoded Python value
By default (json_indent=None): Decoded Python value (object, array,
string, number, boolean, or null).
When json_indent is set: A JSON-formatted string with the specified
indentation level. Example: DecodeOptions(json_indent=2) returns
pretty-printed JSON with 2-space indentation.

Raises:
ToonDecodeError: If input is malformed
ToonDecodeError: If input is malformed or violates strict-mode rules

Example:
>>> toon = "name: Alice\\nage: 30"
>>> decode(toon)
{'name': 'Alice', 'age': 30}
>>> decode(toon, DecodeOptions(json_indent=2))
'{\\n "name": "Alice",\\n "age": 30\\n}'
"""
if options is None:
options = DecodeOptions()
Expand Down Expand Up @@ -273,32 +294,41 @@ def decode(input_str: str, options: Optional[DecodeOptions] = None) -> JsonValue
# Check for empty input (per spec Section 8: empty/whitespace-only → empty object)
non_blank_lines = [ln for ln in lines if not ln.is_blank]
if not non_blank_lines:
return {}

# Determine root form (Section 5)
first_line = non_blank_lines[0]

# Check if it's a root array header
header_info = parse_header(first_line.content)
if header_info is not None and header_info[0] is None: # No key = root array
# Root array
return decode_array(lines, 0, 0, header_info, strict)
result: Any = {}
else:
# Determine root form (Section 5)
first_line = non_blank_lines[0]

# Check if it's a root array header
header_info = parse_header(first_line.content)
if header_info is not None and header_info[0] is None: # No key = root array
# Root array
result = decode_array(lines, 0, 0, header_info, strict)
else:
# Check if it's a single primitive
if len(non_blank_lines) == 1:
line_content = first_line.content
# Check if it's not a key-value line
try:
split_key_value(line_content)
# It's a key-value, so root object
result = decode_object(lines, 0, 0, strict)
except ToonDecodeError:
# Not a key-value, check if it's a header
if header_info is None:
# Single primitive
result = parse_primitive(line_content)
else:
result = decode_object(lines, 0, 0, strict)
else:
# Otherwise, root object
result = decode_object(lines, 0, 0, strict)

# Check if it's a single primitive
if len(non_blank_lines) == 1:
line_content = first_line.content
# Check if it's not a key-value line
try:
split_key_value(line_content)
# It's a key-value, so root object
except ToonDecodeError:
# Not a key-value, check if it's a header
if header_info is None:
# Single primitive
return parse_primitive(line_content)
# If json_indent is specified, return JSON-formatted string
if options.json_indent is not None:
return json.dumps(result, indent=options.json_indent, ensure_ascii=False)

# Otherwise, root object
return decode_object(lines, 0, 0, strict)
return result


def decode_object(
Expand Down
16 changes: 15 additions & 1 deletion src/toon_format/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,26 @@ class DecodeOptions:

Attributes:
indent: Number of spaces per indentation level (default: 2)
Used for parsing TOON format.
strict: Enable strict validation (default: True)
Enforces spec conformance checks.
Comment on lines 54 to +57
Copy link

Copilot AI Nov 18, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[nitpick] The comment on lines 55-57 uses inconsistent terminology compared to the rest of the codebase. Line 55 says "Used for parsing TOON format" while line 57 says "Enforces spec conformance checks." The wording for indent could be more precise to match the style of the strict parameter.

Consider revising for consistency:

indent: Number of spaces per indentation level (default: 2).
        Used for parsing TOON format input.
strict: Enable strict validation (default: True).
        Enforces TOON specification conformance checks.
Suggested change
indent: Number of spaces per indentation level (default: 2)
Used for parsing TOON format.
strict: Enable strict validation (default: True)
Enforces spec conformance checks.
indent: Number of spaces per indentation level (default: 2).
strict: Enable strict validation (default: True).
Enforces TOON specification conformance checks.

Copilot uses AI. Check for mistakes.
json_indent: Optional number of spaces for JSON output formatting
(default: None). When set, decode() returns a JSON-formatted
string instead of a Python object. This is a Python-specific
feature for convenient output formatting. When None, returns
a Python object as normal. Pass an integer (e.g., 2 or 4)
to enable pretty-printed JSON output.
"""

def __init__(self, indent: int = 2, strict: bool = True) -> None:
def __init__(
self,
indent: int = 2,
strict: bool = True,
json_indent: Union[int, None] = None,
) -> None:
self.indent = indent
self.strict = strict
self.json_indent = json_indent


# Depth type for tracking indentation level
Expand Down
90 changes: 90 additions & 0 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
Python type normalization is tested in test_normalization.py.
"""

import json

import pytest

from toon_format import ToonDecodeError, decode, encode
Expand Down Expand Up @@ -286,3 +288,91 @@ def test_roundtrip_with_length_marker(self):
toon = encode(original, {"lengthMarker": "#"})
decoded = decode(toon)
assert decoded == original


class TestDecodeJSONIndentation:
"""Test decode() JSON indentation feature (Issue #10)."""

def test_decode_with_json_indent_returns_string(self):
"""decode() with json_indent should return JSON string."""
toon = "id: 123\nname: Alice"
options = DecodeOptions(json_indent=2)
result = decode(toon, options)
assert isinstance(result, str)
parsed = json.loads(result)
assert parsed == {"id": 123, "name": "Alice"}

def test_decode_with_json_indent_2(self):
"""decode() with json_indent=2 should format with 2 spaces."""
toon = "id: 123\nname: Alice"
result = decode(toon, DecodeOptions(json_indent=2))
expected = '{\n "id": 123,\n "name": "Alice"\n}'
assert result == expected

def test_decode_with_json_indent_4(self):
"""decode() with json_indent=4 should format with 4 spaces."""
toon = "id: 123\nname: Alice"
result = decode(toon, DecodeOptions(json_indent=4))
expected = '{\n "id": 123,\n "name": "Alice"\n}'
assert result == expected

def test_decode_with_json_indent_nested(self):
"""decode() with json_indent should handle nested structures."""
toon = "user:\n name: Alice\n age: 30"
result = decode(toon, DecodeOptions(json_indent=2))
expected = '{\n "user": {\n "name": "Alice",\n "age": 30\n }\n}'
assert result == expected

def test_decode_with_json_indent_array(self):
"""decode() with json_indent should handle arrays."""
toon = "items[2]: apple,banana"
result = decode(toon, DecodeOptions(json_indent=2))
expected = '{\n "items": [\n "apple",\n "banana"\n ]\n}'
assert result == expected

def test_decode_with_json_indent_none_returns_object(self):
"""decode() with json_indent=None should return Python object."""
toon = "id: 123\nname: Alice"
options = DecodeOptions(json_indent=None)
result = decode(toon, options)
assert isinstance(result, dict)
assert result == {"id": 123, "name": "Alice"}

def test_decode_with_json_indent_default_returns_object(self):
"""decode() without json_indent should return Python object (default)."""
toon = "id: 123\nname: Alice"
result = decode(toon)
assert isinstance(result, dict)
assert result == {"id": 123, "name": "Alice"}

def test_decode_json_indent_with_unicode(self):
"""decode() with json_indent should preserve unicode characters."""
toon = 'name: "José"'
result = decode(toon, DecodeOptions(json_indent=2))
assert "José" in result
parsed = json.loads(result)
assert parsed["name"] == "José"

def test_decode_json_indent_empty_object(self):
"""decode() with json_indent on empty input should return empty object JSON."""
result = decode("", DecodeOptions(json_indent=2))
assert result == "{}"

def test_decode_json_indent_single_primitive(self):
"""decode() with json_indent on single primitive should return JSON number."""
result = decode("42", DecodeOptions(json_indent=2))
assert result == "42"

def test_decode_json_indent_complex_nested(self):
"""decode() with json_indent should handle complex nested structures."""
toon = """users[2]{id,name}:
1,Alice
2,Bob
metadata:
version: 1
active: true"""
result = decode(toon, DecodeOptions(json_indent=2))
parsed = json.loads(result)
assert parsed["users"][0] == {"id": 1, "name": "Alice"}
assert parsed["metadata"]["version"] == 1
assert parsed["metadata"]["active"] is True
Loading