|
| 1 | +"""Tests for numeric detection utilities. |
| 2 | +
|
| 3 | +Tests the consistency and correctness of numeric literal detection |
| 4 | +across encoding and decoding pipelines. |
| 5 | +""" |
| 6 | + |
| 7 | +from toon_format._literal_utils import is_numeric_literal |
| 8 | +from toon_format._validation import is_numeric_like |
| 9 | + |
| 10 | + |
| 11 | +class TestNumericLiteral: |
| 12 | + """Tests for is_numeric_literal (decoder utility).""" |
| 13 | + |
| 14 | + def test_valid_integers(self): |
| 15 | + """Test valid integer literals are recognized.""" |
| 16 | + assert is_numeric_literal("0") |
| 17 | + assert is_numeric_literal("1") |
| 18 | + assert is_numeric_literal("42") |
| 19 | + assert is_numeric_literal("999") |
| 20 | + assert is_numeric_literal("-1") |
| 21 | + assert is_numeric_literal("-42") |
| 22 | + |
| 23 | + def test_valid_floats(self): |
| 24 | + """Test valid float literals are recognized.""" |
| 25 | + assert is_numeric_literal("0.0") |
| 26 | + assert is_numeric_literal("0.5") |
| 27 | + assert is_numeric_literal("3.14") |
| 28 | + assert is_numeric_literal("-2.5") |
| 29 | + assert is_numeric_literal("1.23456") |
| 30 | + |
| 31 | + def test_scientific_notation(self): |
| 32 | + """Test scientific notation is recognized.""" |
| 33 | + assert is_numeric_literal("1e10") |
| 34 | + assert is_numeric_literal("1.5e10") |
| 35 | + assert is_numeric_literal("1e-10") |
| 36 | + assert is_numeric_literal("1.5e-10") |
| 37 | + assert is_numeric_literal("-1e10") |
| 38 | + assert is_numeric_literal("2.5E+3") |
| 39 | + |
| 40 | + def test_leading_zeros_rejected(self): |
| 41 | + """Test numbers with leading zeros are rejected (except special cases).""" |
| 42 | + assert not is_numeric_literal("01") |
| 43 | + assert not is_numeric_literal("0123") |
| 44 | + assert not is_numeric_literal("00") |
| 45 | + assert not is_numeric_literal("-01") |
| 46 | + # But these are valid: |
| 47 | + assert is_numeric_literal("0") # Just zero |
| 48 | + assert is_numeric_literal("0.5") # Decimal starting with zero |
| 49 | + assert is_numeric_literal("0.0") |
| 50 | + |
| 51 | + def test_non_numeric_strings(self): |
| 52 | + """Test non-numeric strings are rejected.""" |
| 53 | + assert not is_numeric_literal("") |
| 54 | + assert not is_numeric_literal("abc") |
| 55 | + assert not is_numeric_literal("12abc") |
| 56 | + assert not is_numeric_literal("12.34.56") |
| 57 | + assert not is_numeric_literal("--5") |
| 58 | + assert not is_numeric_literal("1.2.3") |
| 59 | + |
| 60 | + def test_special_float_values_rejected(self): |
| 61 | + """Test NaN and infinity are rejected.""" |
| 62 | + assert not is_numeric_literal("nan") |
| 63 | + assert not is_numeric_literal("NaN") |
| 64 | + assert not is_numeric_literal("inf") |
| 65 | + assert not is_numeric_literal("Infinity") |
| 66 | + assert not is_numeric_literal("-inf") |
| 67 | + |
| 68 | + def test_empty_string(self): |
| 69 | + """Test empty string is rejected.""" |
| 70 | + assert not is_numeric_literal("") |
| 71 | + |
| 72 | + def test_whitespace_only(self): |
| 73 | + """Test whitespace-only strings are rejected.""" |
| 74 | + assert not is_numeric_literal(" ") |
| 75 | + assert not is_numeric_literal(" ") |
| 76 | + |
| 77 | + |
| 78 | +class TestNumericLike: |
| 79 | + """Tests for is_numeric_like (encoder utility).""" |
| 80 | + |
| 81 | + def test_valid_integers(self): |
| 82 | + """Test valid integers are recognized as numeric-like.""" |
| 83 | + assert is_numeric_like("0") |
| 84 | + assert is_numeric_like("1") |
| 85 | + assert is_numeric_like("42") |
| 86 | + assert is_numeric_like("-1") |
| 87 | + assert is_numeric_like("-42") |
| 88 | + |
| 89 | + def test_valid_floats(self): |
| 90 | + """Test valid floats are recognized as numeric-like.""" |
| 91 | + assert is_numeric_like("0.0") |
| 92 | + assert is_numeric_like("0.5") |
| 93 | + assert is_numeric_like("3.14") |
| 94 | + assert is_numeric_like("-2.5") |
| 95 | + |
| 96 | + def test_scientific_notation(self): |
| 97 | + """Test scientific notation is recognized as numeric-like.""" |
| 98 | + assert is_numeric_like("1e10") |
| 99 | + assert is_numeric_like("1.5e10") |
| 100 | + assert is_numeric_like("1e-10") |
| 101 | + assert is_numeric_like("2.5E+3") |
| 102 | + |
| 103 | + def test_octal_like_numbers(self): |
| 104 | + """Test octal-like numbers (leading zeros) are recognized as numeric-like.""" |
| 105 | + # These LOOK like numbers so they need quoting |
| 106 | + assert is_numeric_like("01") |
| 107 | + assert is_numeric_like("0123") |
| 108 | + assert is_numeric_like("00") |
| 109 | + |
| 110 | + def test_non_numeric_strings(self): |
| 111 | + """Test non-numeric strings are not numeric-like.""" |
| 112 | + assert not is_numeric_like("") |
| 113 | + assert not is_numeric_like("abc") |
| 114 | + assert not is_numeric_like("hello") |
| 115 | + assert not is_numeric_like("12abc") |
| 116 | + |
| 117 | + def test_edge_cases(self): |
| 118 | + """Test edge cases.""" |
| 119 | + assert not is_numeric_like("") |
| 120 | + assert not is_numeric_like(" ") |
| 121 | + assert not is_numeric_like("--5") |
| 122 | + |
| 123 | + |
| 124 | +class TestConsistency: |
| 125 | + """Tests to ensure consistency between is_numeric_literal and is_numeric_like.""" |
| 126 | + |
| 127 | + def test_valid_numbers_recognized_by_both(self): |
| 128 | + """Test that valid numbers are recognized by both functions.""" |
| 129 | + valid_numbers = ["0", "1", "42", "-1", "3.14", "-2.5", "1e10", "1.5e-3"] |
| 130 | + for num in valid_numbers: |
| 131 | + assert is_numeric_literal(num), f"{num} should be numeric literal" |
| 132 | + assert is_numeric_like(num), f"{num} should be numeric-like" |
| 133 | + |
| 134 | + def test_octal_like_difference(self): |
| 135 | + """Test the key difference: octal-like numbers. |
| 136 | +
|
| 137 | + is_numeric_like returns True (needs quoting in encoder) |
| 138 | + is_numeric_literal returns False (not parsed as number in decoder) |
| 139 | + """ |
| 140 | + octal_like = ["01", "0123", "00", "007"] |
| 141 | + for num in octal_like: |
| 142 | + assert is_numeric_like(num), f"{num} should be numeric-like (needs quoting)" |
| 143 | + assert not is_numeric_literal(num), ( |
| 144 | + f"{num} should not be numeric literal (has leading zero)" |
| 145 | + ) |
| 146 | + |
| 147 | + def test_non_numbers_rejected_by_both(self): |
| 148 | + """Test that non-numbers are rejected by both functions.""" |
| 149 | + non_numbers = ["", "abc", "hello", "12abc", "nan", "inf"] |
| 150 | + for val in non_numbers: |
| 151 | + # Allow for potential differences in edge cases, but most should agree |
| 152 | + if val: # Skip empty string edge case |
| 153 | + assert not is_numeric_literal(val), f"{val} should not be numeric literal" |
| 154 | + # is_numeric_like might have slightly different behavior for edge cases |
| 155 | + |
| 156 | + |
| 157 | +class TestRoundTripConsistency: |
| 158 | + """Test that encoding and decoding are consistent.""" |
| 159 | + |
| 160 | + def test_octal_like_numbers_preserved_as_strings(self): |
| 161 | + """Test that octal-like numbers are preserved as strings through round-trip.""" |
| 162 | + from toon_format import decode, encode |
| 163 | + |
| 164 | + # These should be treated as strings, not numbers |
| 165 | + octal_values = ["0123", "007", "00"] |
| 166 | + for val in octal_values: |
| 167 | + # When we encode a dict with these as values |
| 168 | + data = {"value": val} |
| 169 | + encoded = encode(data) |
| 170 | + decoded = decode(encoded) |
| 171 | + # Assert it's a dict before trying to access |
| 172 | + assert isinstance(decoded, dict) |
| 173 | + # They should come back as strings |
| 174 | + assert decoded["value"] == val |
| 175 | + assert isinstance(decoded["value"], str) |
| 176 | + |
| 177 | + def test_valid_numbers_preserved_as_numbers(self): |
| 178 | + """Test that valid numbers are preserved as numbers through round-trip.""" |
| 179 | + from toon_format import decode, encode |
| 180 | + |
| 181 | + numbers = [0, 1, 42, -1, 3.14, -2.5] |
| 182 | + for num in numbers: |
| 183 | + data = {"value": num} |
| 184 | + encoded = encode(data) |
| 185 | + decoded = decode(encoded) |
| 186 | + # Assert it's a dict before trying to access |
| 187 | + assert isinstance(decoded, dict) |
| 188 | + # They should come back as numbers (with potential float/int conversion) |
| 189 | + assert decoded["value"] == num |
| 190 | + assert isinstance(decoded["value"], (int, float)) |
0 commit comments