Skip to content

Commit 728120d

Browse files
fix: improve numeric literal validation (#21)
* fix: improve numeric literal validation for negative numbers and leading zeros * test: add comprehensive tests for numeric literal and numeric like utilities * test: add type assertions for decoded values in round trip consistency tests --------- Co-authored-by: davidpirogov <580286+davidpirogov@users.noreply.github.com>
1 parent 6b26984 commit 728120d

File tree

2 files changed

+199
-1
lines changed

2 files changed

+199
-1
lines changed

src/toon_format/_literal_utils.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,14 +51,22 @@ def is_numeric_literal(token: str) -> bool:
5151
True
5252
>>> is_numeric_literal("0123") # Leading zero - not valid
5353
False
54+
>>> is_numeric_literal("-01") # Negative with leading zero - not valid
55+
False
5456
>>> is_numeric_literal("hello")
5557
False
5658
"""
5759
if not token:
5860
return False
5961

62+
# Handle negative numbers
63+
start_idx = 1 if token.startswith("-") else 0
64+
if start_idx >= len(token):
65+
return False
66+
6067
# Must not have leading zeros (except for `"0"` itself or decimals like `"0.5"`)
61-
if len(token) > 1 and token[0] == "0" and token[1] != ".":
68+
# Check the first digit after optional minus sign
69+
if len(token) > start_idx + 1 and token[start_idx] == "0" and token[start_idx + 1] != ".":
6270
return False
6371

6472
# Check if it's a valid number

tests/test_numeric_detection.py

Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,190 @@
1+
"""Tests for numeric detection utilities.
2+
3+
Tests the consistency and correctness of numeric literal detection
4+
across encoding and decoding pipelines.
5+
"""
6+
7+
from toon_format._literal_utils import is_numeric_literal
8+
from toon_format._validation import is_numeric_like
9+
10+
11+
class TestNumericLiteral:
12+
"""Tests for is_numeric_literal (decoder utility)."""
13+
14+
def test_valid_integers(self):
15+
"""Test valid integer literals are recognized."""
16+
assert is_numeric_literal("0")
17+
assert is_numeric_literal("1")
18+
assert is_numeric_literal("42")
19+
assert is_numeric_literal("999")
20+
assert is_numeric_literal("-1")
21+
assert is_numeric_literal("-42")
22+
23+
def test_valid_floats(self):
24+
"""Test valid float literals are recognized."""
25+
assert is_numeric_literal("0.0")
26+
assert is_numeric_literal("0.5")
27+
assert is_numeric_literal("3.14")
28+
assert is_numeric_literal("-2.5")
29+
assert is_numeric_literal("1.23456")
30+
31+
def test_scientific_notation(self):
32+
"""Test scientific notation is recognized."""
33+
assert is_numeric_literal("1e10")
34+
assert is_numeric_literal("1.5e10")
35+
assert is_numeric_literal("1e-10")
36+
assert is_numeric_literal("1.5e-10")
37+
assert is_numeric_literal("-1e10")
38+
assert is_numeric_literal("2.5E+3")
39+
40+
def test_leading_zeros_rejected(self):
41+
"""Test numbers with leading zeros are rejected (except special cases)."""
42+
assert not is_numeric_literal("01")
43+
assert not is_numeric_literal("0123")
44+
assert not is_numeric_literal("00")
45+
assert not is_numeric_literal("-01")
46+
# But these are valid:
47+
assert is_numeric_literal("0") # Just zero
48+
assert is_numeric_literal("0.5") # Decimal starting with zero
49+
assert is_numeric_literal("0.0")
50+
51+
def test_non_numeric_strings(self):
52+
"""Test non-numeric strings are rejected."""
53+
assert not is_numeric_literal("")
54+
assert not is_numeric_literal("abc")
55+
assert not is_numeric_literal("12abc")
56+
assert not is_numeric_literal("12.34.56")
57+
assert not is_numeric_literal("--5")
58+
assert not is_numeric_literal("1.2.3")
59+
60+
def test_special_float_values_rejected(self):
61+
"""Test NaN and infinity are rejected."""
62+
assert not is_numeric_literal("nan")
63+
assert not is_numeric_literal("NaN")
64+
assert not is_numeric_literal("inf")
65+
assert not is_numeric_literal("Infinity")
66+
assert not is_numeric_literal("-inf")
67+
68+
def test_empty_string(self):
69+
"""Test empty string is rejected."""
70+
assert not is_numeric_literal("")
71+
72+
def test_whitespace_only(self):
73+
"""Test whitespace-only strings are rejected."""
74+
assert not is_numeric_literal(" ")
75+
assert not is_numeric_literal(" ")
76+
77+
78+
class TestNumericLike:
79+
"""Tests for is_numeric_like (encoder utility)."""
80+
81+
def test_valid_integers(self):
82+
"""Test valid integers are recognized as numeric-like."""
83+
assert is_numeric_like("0")
84+
assert is_numeric_like("1")
85+
assert is_numeric_like("42")
86+
assert is_numeric_like("-1")
87+
assert is_numeric_like("-42")
88+
89+
def test_valid_floats(self):
90+
"""Test valid floats are recognized as numeric-like."""
91+
assert is_numeric_like("0.0")
92+
assert is_numeric_like("0.5")
93+
assert is_numeric_like("3.14")
94+
assert is_numeric_like("-2.5")
95+
96+
def test_scientific_notation(self):
97+
"""Test scientific notation is recognized as numeric-like."""
98+
assert is_numeric_like("1e10")
99+
assert is_numeric_like("1.5e10")
100+
assert is_numeric_like("1e-10")
101+
assert is_numeric_like("2.5E+3")
102+
103+
def test_octal_like_numbers(self):
104+
"""Test octal-like numbers (leading zeros) are recognized as numeric-like."""
105+
# These LOOK like numbers so they need quoting
106+
assert is_numeric_like("01")
107+
assert is_numeric_like("0123")
108+
assert is_numeric_like("00")
109+
110+
def test_non_numeric_strings(self):
111+
"""Test non-numeric strings are not numeric-like."""
112+
assert not is_numeric_like("")
113+
assert not is_numeric_like("abc")
114+
assert not is_numeric_like("hello")
115+
assert not is_numeric_like("12abc")
116+
117+
def test_edge_cases(self):
118+
"""Test edge cases."""
119+
assert not is_numeric_like("")
120+
assert not is_numeric_like(" ")
121+
assert not is_numeric_like("--5")
122+
123+
124+
class TestConsistency:
125+
"""Tests to ensure consistency between is_numeric_literal and is_numeric_like."""
126+
127+
def test_valid_numbers_recognized_by_both(self):
128+
"""Test that valid numbers are recognized by both functions."""
129+
valid_numbers = ["0", "1", "42", "-1", "3.14", "-2.5", "1e10", "1.5e-3"]
130+
for num in valid_numbers:
131+
assert is_numeric_literal(num), f"{num} should be numeric literal"
132+
assert is_numeric_like(num), f"{num} should be numeric-like"
133+
134+
def test_octal_like_difference(self):
135+
"""Test the key difference: octal-like numbers.
136+
137+
is_numeric_like returns True (needs quoting in encoder)
138+
is_numeric_literal returns False (not parsed as number in decoder)
139+
"""
140+
octal_like = ["01", "0123", "00", "007"]
141+
for num in octal_like:
142+
assert is_numeric_like(num), f"{num} should be numeric-like (needs quoting)"
143+
assert not is_numeric_literal(num), (
144+
f"{num} should not be numeric literal (has leading zero)"
145+
)
146+
147+
def test_non_numbers_rejected_by_both(self):
148+
"""Test that non-numbers are rejected by both functions."""
149+
non_numbers = ["", "abc", "hello", "12abc", "nan", "inf"]
150+
for val in non_numbers:
151+
# Allow for potential differences in edge cases, but most should agree
152+
if val: # Skip empty string edge case
153+
assert not is_numeric_literal(val), f"{val} should not be numeric literal"
154+
# is_numeric_like might have slightly different behavior for edge cases
155+
156+
157+
class TestRoundTripConsistency:
158+
"""Test that encoding and decoding are consistent."""
159+
160+
def test_octal_like_numbers_preserved_as_strings(self):
161+
"""Test that octal-like numbers are preserved as strings through round-trip."""
162+
from toon_format import decode, encode
163+
164+
# These should be treated as strings, not numbers
165+
octal_values = ["0123", "007", "00"]
166+
for val in octal_values:
167+
# When we encode a dict with these as values
168+
data = {"value": val}
169+
encoded = encode(data)
170+
decoded = decode(encoded)
171+
# Assert it's a dict before trying to access
172+
assert isinstance(decoded, dict)
173+
# They should come back as strings
174+
assert decoded["value"] == val
175+
assert isinstance(decoded["value"], str)
176+
177+
def test_valid_numbers_preserved_as_numbers(self):
178+
"""Test that valid numbers are preserved as numbers through round-trip."""
179+
from toon_format import decode, encode
180+
181+
numbers = [0, 1, 42, -1, 3.14, -2.5]
182+
for num in numbers:
183+
data = {"value": num}
184+
encoded = encode(data)
185+
decoded = decode(encoded)
186+
# Assert it's a dict before trying to access
187+
assert isinstance(decoded, dict)
188+
# They should come back as numbers (with potential float/int conversion)
189+
assert decoded["value"] == num
190+
assert isinstance(decoded["value"], (int, float))

0 commit comments

Comments
 (0)