Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions src/toon_format/_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,13 @@ def to_parsed_lines(
if not source.strip():
return [], []

# Normalize Windows CRLF line endings to LF
# This prevents stray \r characters from appearing in content
source = source.replace("\r\n", "\n")

# Strip any remaining standalone \r characters (old Mac format)
source = source.replace("\r", "\n")

lines = source.split("\n")
parsed: List[ParsedLine] = []
blank_lines: List[BlankLineInfo] = []
Expand Down
55 changes: 55 additions & 0 deletions tests/test_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,3 +140,58 @@ def test_object_key_order_preserved(self):
assert keys == ["z", "a", "m", "b"]
# Verify order is not alphabetical
assert keys != ["a", "b", "m", "z"]


class TestCRLFDecoding:
"""Test CRLF (Windows) line ending handling in decoder."""

def test_decode_object_with_crlf(self):
"""Test decoding objects with CRLF line endings."""
toon = "name: Alice\r\nage: 30\r\n"
result = decode(toon)
assert result == {"name": "Alice", "age": 30}

def test_decode_nested_object_with_crlf(self):
"""Test decoding nested objects with CRLF line endings."""
toon = "person:\r\n name: Alice\r\n age: 30\r\n"
result = decode(toon)
assert result == {"person": {"name": "Alice", "age": 30}}

def test_decode_array_with_crlf(self):
"""Test decoding arrays with CRLF line endings."""
toon = "items[3]:\r\n - apple\r\n - banana\r\n - cherry\r\n"
result = decode(toon)
assert result == {"items": ["apple", "banana", "cherry"]}

def test_decode_delimited_array_with_crlf(self):
"""Test decoding delimited arrays with CRLF line endings."""
toon = "items[3]: apple,banana,cherry\r\n"
result = decode(toon)
assert result == {"items": ["apple", "banana", "cherry"]}

def test_decode_with_old_mac_cr(self):
"""Test decoding with old Mac CR line endings."""
toon = "name: Alice\rage: 30\r"
result = decode(toon)
assert result == {"name": "Alice", "age": 30}

def test_decode_with_mixed_line_endings(self):
"""Test decoding with mixed line endings."""
toon = "name: Alice\r\nage: 30\ncity: NYC\r"
result = decode(toon)
assert result == {"name": "Alice", "age": 30, "city": "NYC"}

def test_crlf_does_not_affect_quoted_strings(self):
"""Test that CRLF normalization doesn't affect escaped \\r in strings."""
toon = 'text: "line1\\r\\nline2"\r\n'
result = decode(toon)
# The string should contain the escaped sequences
assert result == {"text": "line1\r\nline2"}

def test_crlf_in_strict_mode(self):
"""Test CRLF works correctly in strict mode."""
toon = "name:\r\n first: Alice\r\n age: 30\r\n"
options = DecodeOptions(strict=True)
result = decode(toon, options)
assert result == {"name": {"first": "Alice", "age": 30}}

59 changes: 59 additions & 0 deletions tests/test_scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,3 +241,62 @@ def test_blank_lines_not_validated_in_strict_mode(self):
# Should not raise error for blank line with invalid indentation
assert len(blanks) == 1
assert blanks[0].line_num == 2


class TestCRLFHandling:
"""Tests for CRLF and CR normalization."""

def test_crlf_normalization(self):
"""Test Windows CRLF line endings are normalized to LF."""
source = "name: Alice\r\nage: 30\r\n"
lines, blanks = to_parsed_lines(source, 2, False)
# Verify no \r remains in content
assert "\r" not in lines[0].content
assert "\r" not in lines[1].content
assert lines[0].content == "name: Alice"
assert lines[1].content == "age: 30"
assert len(lines) == 3 # name, age, and trailing empty line

def test_standalone_cr_normalization(self):
"""Test old Mac CR line endings are normalized to LF."""
source = "name: Alice\rage: 30\r"
lines, blanks = to_parsed_lines(source, 2, False)
# Verify no \r remains in content
assert "\r" not in lines[0].content
assert "\r" not in lines[1].content
assert lines[0].content == "name: Alice"
assert lines[1].content == "age: 30"
assert len(lines) == 3 # name, age, and trailing empty line

def test_mixed_line_endings(self):
"""Test mixed line endings are all normalized."""
source = "line1\r\nline2\nline3\rline4"
lines, blanks = to_parsed_lines(source, 2, False)
assert len(lines) == 4
for line in lines:
assert "\r" not in line.content
assert lines[0].content == "line1"
assert lines[1].content == "line2"
assert lines[2].content == "line3"
assert lines[3].content == "line4"

def test_crlf_with_indentation(self):
"""Test CRLF handling preserves indentation."""
source = "parent:\r\n child: value\r\n"
lines, blanks = to_parsed_lines(source, 2, False)
assert lines[0].content == "parent:"
assert lines[0].depth == 0
assert lines[1].content == "child: value"
assert lines[1].depth == 1
assert lines[1].indent == 2

def test_crlf_in_strict_mode(self):
"""Test CRLF normalization works in strict mode."""
source = "name: Alice\r\n age: 30\r\n"
lines, blanks = to_parsed_lines(source, 2, True)
# Should not raise error and should properly normalize
assert len(lines) == 3
assert "\r" not in lines[0].content
assert "\r" not in lines[1].content
assert lines[1].depth == 1

Loading