Skip to content

Commit e78c3fa

Browse files
authored
lossless float support (#98)
1 parent 1fbedbf commit e78c3fa

File tree

12 files changed

+382
-122
lines changed

12 files changed

+382
-122
lines changed

crates/jiter-python/jiter.pyi

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
1+
import decimal
12
from typing import Any, Literal
23

34
def from_json(
45
json_data: bytes,
56
/,
67
*,
78
allow_inf_nan: bool = True,
8-
cache_strings: Literal[True, False, "all", "keys", "none"] = True,
9+
cache_strings: Literal[True, False, "all", "keys", "none"] = "all",
910
allow_partial: bool = False,
1011
catch_duplicate_keys: bool = False,
12+
lossless_floats: bool = False,
1113
) -> Any:
1214
"""
1315
Parse input bytes into a JSON object.
@@ -22,6 +24,7 @@ def from_json(
2224
- False / 'none' - cache nothing
2325
allow_partial: if True, return parsed content when reaching EOF without closing objects and arrays
2426
catch_duplicate_keys: if True, raise an exception if objects contain the same key multiple times
27+
lossless_floats: if True, preserve full detail on floats using `LosslessFloat`
2528
2629
Returns:
2730
Python object built from the JSON input.
@@ -39,3 +42,26 @@ def cache_usage() -> int:
3942
Returns:
4043
Size of the string cache in bytes.
4144
"""
45+
46+
47+
class LosslessFloat:
48+
"""
49+
Represents a float from JSON, by holding the underlying bytes representing a float from JSON.
50+
"""
51+
def __init__(self, json_float: bytes):
52+
"""Construct a LosslessFloat object from a JSON bytes slice"""
53+
54+
def as_decimal(self) -> decimal.Decimal:
55+
"""Construct a Python Decimal from the JSON bytes slice"""
56+
57+
def __float__(self) -> float:
58+
"""Construct a Python float from the JSON bytes slice"""
59+
60+
def __bytes__(self) -> bytes:
61+
"""Return the JSON bytes slice as bytes"""
62+
63+
def __str__(self):
64+
"""Return the JSON bytes slice as a string"""
65+
66+
def __repr__(self):
67+
...

crates/jiter-python/src/lib.rs

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,9 @@ use std::sync::OnceLock;
22

33
use pyo3::prelude::*;
44

5-
use jiter::{map_json_error, python_parse, StringCacheMode};
5+
use jiter::{map_json_error, LosslessFloat, PythonParseBuilder, StringCacheMode};
66

7+
#[allow(clippy::fn_params_excessive_bools)]
78
#[pyfunction(
89
signature = (
910
json_data,
@@ -12,7 +13,8 @@ use jiter::{map_json_error, python_parse, StringCacheMode};
1213
allow_inf_nan=true,
1314
cache_strings=StringCacheMode::All,
1415
allow_partial=false,
15-
catch_duplicate_keys=false
16+
catch_duplicate_keys=false,
17+
lossless_floats=false,
1618
)
1719
)]
1820
pub fn from_json<'py>(
@@ -22,16 +24,18 @@ pub fn from_json<'py>(
2224
cache_strings: StringCacheMode,
2325
allow_partial: bool,
2426
catch_duplicate_keys: bool,
27+
lossless_floats: bool,
2528
) -> PyResult<Bound<'py, PyAny>> {
26-
python_parse(
27-
py,
28-
json_data,
29+
let parse_builder = PythonParseBuilder {
2930
allow_inf_nan,
30-
cache_strings,
31+
cache_mode: cache_strings,
3132
allow_partial,
3233
catch_duplicate_keys,
33-
)
34-
.map_err(|e| map_json_error(json_data, &e))
34+
lossless_floats,
35+
};
36+
parse_builder
37+
.python_parse(py, json_data)
38+
.map_err(|e| map_json_error(json_data, &e))
3539
}
3640

3741
pub fn get_jiter_version() -> &'static str {
@@ -65,5 +69,6 @@ fn jiter_python(_py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> {
6569
m.add_function(wrap_pyfunction!(from_json, m)?)?;
6670
m.add_function(wrap_pyfunction!(cache_clear, m)?)?;
6771
m.add_function(wrap_pyfunction!(cache_usage, m)?)?;
72+
m.add_class::<LosslessFloat>()?;
6873
Ok(())
6974
}

crates/jiter-python/tests/test_jiter.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from decimal import Decimal
2+
13
import jiter
24
import pytest
35
from math import inf
@@ -144,3 +146,50 @@ def test_unicode_cache():
144146
jiter.cache_clear()
145147
parsed = jiter.from_json(json)
146148
assert parsed == {"💩": "£"}
149+
150+
151+
def test_json_float():
152+
f = jiter.LosslessFloat(b'123.45')
153+
assert str(f) == '123.45'
154+
assert repr(f) == 'LosslessFloat(123.45)'
155+
assert float(f) == 123.45
156+
assert f.as_decimal() == Decimal('123.45')
157+
assert bytes(f) == b'123.45'
158+
159+
160+
def test_json_float_scientific():
161+
f = jiter.LosslessFloat(b'123e4')
162+
assert str(f) == '123e4'
163+
assert float(f) == 123e4
164+
assert f.as_decimal() == Decimal('123e4')
165+
166+
167+
def test_json_float_invalid():
168+
with pytest.raises(ValueError, match='trailing characters at line 1 column 6'):
169+
jiter.LosslessFloat(b'123.4x')
170+
171+
172+
def test_lossless_floats():
173+
f = jiter.from_json(b'12.3')
174+
assert isinstance(f, float)
175+
assert f == 12.3
176+
177+
f = jiter.from_json(b'12.3', lossless_floats=True)
178+
assert isinstance(f, jiter.LosslessFloat)
179+
assert str(f) == '12.3'
180+
assert float(f) == 12.3
181+
assert f.as_decimal() == Decimal('12.3')
182+
183+
f = jiter.from_json(b'123.456789123456789e45', lossless_floats=True)
184+
assert isinstance(f, jiter.LosslessFloat)
185+
assert 123e45 < float(f) < 124e45
186+
assert f.as_decimal() == Decimal('1.23456789123456789E+47')
187+
assert bytes(f) == b'123.456789123456789e45'
188+
assert str(f) == '123.456789123456789e45'
189+
assert repr(f) == 'LosslessFloat(123.456789123456789e45)'
190+
191+
192+
def test_lossless_floats_int():
193+
v = jiter.from_json(b'123', lossless_floats=True)
194+
assert isinstance(v, int)
195+
assert v == 123

crates/jiter/Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ num-bigint = "0.4.4"
1616
num-traits = "0.2.16"
1717
ahash = "0.8.0"
1818
smallvec = "1.11.0"
19-
pyo3 = { version = "0.21.0", default-features=false, features = ["num-bigint"], optional = true }
19+
pyo3 = { version = "0.21.0", optional = true }
2020
lexical-parse-float = { version = "0.8.5", features = ["format"] }
2121

2222
[features]
@@ -69,5 +69,6 @@ match_bool = "allow"
6969
doc_markdown = "allow"
7070
implicit_clone = "allow"
7171
iter_without_into_iter = "allow"
72+
return_self_not_must_use = "allow"
7273
inline_always = "allow" # TODO remove?
7374
match_same_arms = "allow" # TODO remove?

crates/jiter/benches/python.rs

Lines changed: 18 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -5,21 +5,18 @@ use std::io::Read;
55

66
use pyo3::Python;
77

8-
use jiter::{cache_clear, python_parse, StringCacheMode};
8+
use jiter::{cache_clear, PythonParseBuilder, StringCacheMode};
99

1010
fn python_parse_numeric(bench: &mut Bencher) {
1111
Python::with_gil(|py| {
1212
cache_clear(py);
1313
bench.iter(|| {
14-
python_parse(
15-
py,
16-
br#" { "int": 1, "bigint": 123456789012345678901234567890, "float": 1.2} "#,
17-
false,
18-
StringCacheMode::All,
19-
false,
20-
false,
21-
)
22-
.unwrap()
14+
PythonParseBuilder::default()
15+
.python_parse(
16+
py,
17+
br#" { "int": 1, "bigint": 123456789012345678901234567890, "float": 1.2} "#,
18+
)
19+
.unwrap()
2320
});
2421
})
2522
}
@@ -28,15 +25,9 @@ fn python_parse_other(bench: &mut Bencher) {
2825
Python::with_gil(|py| {
2926
cache_clear(py);
3027
bench.iter(|| {
31-
python_parse(
32-
py,
33-
br#"["string", true, false, null]"#,
34-
false,
35-
StringCacheMode::All,
36-
false,
37-
false,
38-
)
39-
.unwrap()
28+
PythonParseBuilder::default()
29+
.python_parse(py, br#"["string", true, false, null]"#)
30+
.unwrap()
4031
});
4132
})
4233
}
@@ -49,7 +40,14 @@ fn _python_parse_file(path: &str, bench: &mut Bencher, cache_mode: StringCacheMo
4940

5041
Python::with_gil(|py| {
5142
cache_clear(py);
52-
bench.iter(|| python_parse(py, json_data, false, cache_mode, false, false).unwrap());
43+
bench.iter(|| {
44+
PythonParseBuilder {
45+
cache_mode,
46+
..Default::default()
47+
}
48+
.python_parse(py, json_data)
49+
.unwrap()
50+
});
5351
})
5452
}
5553

crates/jiter/src/jiter.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@ impl<'j> Jiter<'j> {
170170
.parser
171171
.consume_number::<NumberRange>(peek.into_inner(), self.allow_inf_nan)
172172
{
173-
Ok(range) => Ok(&self.data[range]),
173+
Ok(numbe_range) => Ok(&self.data[numbe_range.range]),
174174
Err(e) => Err(self.maybe_number_error(e, JsonType::Float, peek)),
175175
}
176176
}

crates/jiter/src/lib.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ mod lazy_index_map;
66
mod number_decoder;
77
mod parse;
88
#[cfg(feature = "python")]
9+
mod py_lossless_float;
10+
#[cfg(feature = "python")]
911
mod py_string_cache;
1012
#[cfg(feature = "python")]
1113
mod python;
@@ -21,7 +23,9 @@ pub use number_decoder::{NumberAny, NumberInt};
2123
pub use parse::Peek;
2224
pub use value::{JsonArray, JsonObject, JsonValue};
2325

26+
#[cfg(feature = "python")]
27+
pub use py_lossless_float::LosslessFloat;
2428
#[cfg(feature = "python")]
2529
pub use py_string_cache::{cache_clear, cache_usage, cached_py_string, pystring_fast_new, StringCacheMode};
2630
#[cfg(feature = "python")]
27-
pub use python::{map_json_error, python_parse};
31+
pub use python::{map_json_error, PythonParseBuilder};

0 commit comments

Comments
 (0)