Skip to content

Commit c7f8fdd

Browse files
authored
Partial JSON parsing support trailing strings (#101)
1 parent 240f180 commit c7f8fdd

File tree

20 files changed

+410
-150
lines changed

20 files changed

+410
-150
lines changed

.github/workflows/ci.yml

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ jobs:
5757
- run: cargo careful t -F python
5858
if: matrix.rust-version == 'nightly'
5959

60-
- uses: codecov/codecov-action@v3
60+
- uses: codecov/codecov-action@v4
6161
with:
6262
env_vars: RUNS_ON,RUST_VERSION
6363
token: ${{ secrets.CODECOV_TOKEN }}
@@ -103,11 +103,53 @@ jobs:
103103

104104
- run: cargo test --doc
105105

106-
- uses: codecov/codecov-action@v3
106+
- uses: codecov/codecov-action@v4
107107
with:
108108
env_vars: RUNS_ON,RUST_VERSION
109109
token: ${{ secrets.CODECOV_TOKEN }}
110110

111+
test-python:
112+
name: test jiter-python
113+
114+
runs-on: ubuntu-latest
115+
116+
env:
117+
RUNS_ON: ubuntu-latest
118+
119+
steps:
120+
- uses: actions/checkout@v3
121+
122+
- name: set up python
123+
uses: actions/setup-python@v4
124+
with:
125+
python-version: '3.12'
126+
127+
- uses: dtolnay/rust-toolchain@stable
128+
129+
- id: cache-rust
130+
uses: Swatinem/rust-cache@v2
131+
132+
- run: cargo install rustfilt coverage-prepare
133+
if: steps.cache-rust.outputs.cache-hit != 'true'
134+
135+
- run: rustup component add llvm-tools-preview
136+
137+
- run: make python-install
138+
139+
- run: pip install -e crates/jiter-python
140+
env:
141+
RUSTFLAGS: '-C instrument-coverage'
142+
143+
- run: pytest crates/jiter-python/tests
144+
env:
145+
RUST_BACKTRACE: 1
146+
147+
- run: coverage-prepare lcov $(python -c 'import jiter.jiter;print(jiter.jiter.__file__)')
148+
149+
- uses: codecov/codecov-action@v4
150+
with:
151+
token: ${{ secrets.CODECOV_TOKEN }}
152+
111153
bench:
112154
runs-on: ubuntu-latest
113155
steps:
@@ -504,7 +546,7 @@ jobs:
504546
# https://github.com/marketplace/actions/alls-green#why used for branch protection checks
505547
check:
506548
if: always()
507-
needs: [test-linux, test-macos, bench, fuzz, fuzz-skip, lint]
549+
needs: [test-linux, test-macos, test-python, bench, fuzz, fuzz-skip, lint]
508550
runs-on: ubuntu-latest
509551
steps:
510552
- name: Decide whether the needed jobs succeeded or failed

Makefile

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
.DEFAULT_GOAL := all
2+
3+
.PHONY: format
4+
format:
5+
@cargo fmt --version
6+
cargo fmt
7+
8+
.PHONY: lint
9+
lint:
10+
@cargo clippy --version
11+
cargo clippy -- -D warnings
12+
cargo doc
13+
14+
.PHONY: test
15+
test:
16+
cargo test
17+
18+
.PHONY: python-install
19+
python-install:
20+
pip install maturin
21+
pip install -r crates/jiter-python/tests/requirements.txt
22+
23+
.PHONY: python-dev
24+
python-dev:
25+
maturin develop -m crates/jiter-python/Cargo.toml
26+
27+
.PHONY: python-test
28+
python-test: python-dev
29+
pytest crates/jiter-python/tests
30+
31+
.PHONY: bench
32+
bench:
33+
cargo bench -p jiter -F python
34+
35+
.PHONY: fuzz
36+
fuzz:
37+
cargo +nightly fuzz run --fuzz-dir crates/fuzz compare_to_serde --release
38+
39+
.PHONY: fuzz-skip
40+
fuzz-skip:
41+
cargo +nightly fuzz run --fuzz-dir crates/fuzz compare_skip --release
42+
43+
.PHONY: all
44+
all: format lint test test-python

README.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,13 @@ Fast iterable JSON parser.
99
Documentation is available at [docs.rs/jiter](https://docs.rs/jiter).
1010

1111
jiter has three interfaces:
12-
* [`JsonValue`] an enum representing JSON data
13-
* [`Jiter`] an iterator over JSON data
14-
* [`python_parse`] which parses a JSON string into a Python object
12+
* `JsonValue` an enum representing JSON data
13+
* `Jiter` an iterator over JSON data
14+
* `PythonParse` which parses a JSON string into a Python object
1515

1616
## JsonValue Example
1717

18-
See [the `JsonValue` docs][JsonValue] for more details.
18+
See [the `JsonValue` docs](https://docs.rs/jiter/latest/jiter/enum.JsonValue.html) for more details.
1919

2020
```rust
2121
use jiter::JsonValue;
@@ -54,7 +54,7 @@ Object(
5454

5555
## Jiter Example
5656

57-
To use [Jiter], you need to know what schema you're expecting:
57+
To use [Jiter](https://docs.rs/jiter/latest/jiter/struct.Jiter.html), you need to know what schema you're expecting:
5858

5959
```rust
6060
use jiter::{Jiter, NumberInt, Peek};
@@ -69,7 +69,7 @@ fn main() {
6969
"+44 2345678"
7070
]
7171
}"#;
72-
let mut jiter = Jiter::new(json_data.as_bytes(), true);
72+
let mut jiter = Jiter::new(json_data.as_bytes());
7373
assert_eq!(jiter.next_object().unwrap(), Some("name"));
7474
assert_eq!(jiter.next_str().unwrap(), "John Doe");
7575
assert_eq!(jiter.next_key().unwrap(), Some("age"));

crates/fuzz/fuzz_targets/compare_skip.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,12 @@ fuzz_target!(|json: String| {
1616
let json_data = json.as_bytes();
1717
match JsonValue::parse(json_data, false) {
1818
Ok(_) => {
19-
let mut jiter = Jiter::new(json_data, false);
19+
let mut jiter = Jiter::new(json_data);
2020
jiter.next_skip().unwrap();
2121
jiter.finish().unwrap();
2222
}
2323
Err(json_error) => {
24-
let mut jiter = Jiter::new(json_data, false);
24+
let mut jiter = Jiter::new(json_data);
2525
let jiter_error = match jiter.next_skip() {
2626
Ok(_) => jiter.finish().unwrap_err(),
2727
Err(e) => e,

crates/jiter-python/README.md

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,10 @@ def from_json(
1515
/,
1616
*,
1717
allow_inf_nan: bool = True,
18-
cache_strings: Literal[True, False, "all", "keys", "none"] = True,
19-
allow_partial: bool = False,
18+
cache_mode: Literal[True, False, "all", "keys", "none"] = "all",
19+
partial_mode: Literal[True, False, "off", "on", "trailing-strings"] = False,
2020
catch_duplicate_keys: bool = False,
21+
lossless_floats: bool = False,
2122
) -> Any:
2223
"""
2324
Parse input bytes into a JSON object.
@@ -26,12 +27,16 @@ def from_json(
2627
json_data: The JSON data to parse
2728
allow_inf_nan: Whether to allow infinity (`Infinity` an `-Infinity`) and `NaN` values to float fields.
2829
Defaults to True.
29-
cache_strings: cache Python strings to improve performance at the cost of some memory usage
30+
cache_mode: cache Python strings to improve performance at the cost of some memory usage
3031
- True / 'all' - cache all strings
3132
- 'keys' - cache only object keys
3233
- False / 'none' - cache nothing
33-
allow_partial: if True, return parsed content when reaching EOF without closing objects and arrays
34+
partial_mode: How to handle incomplete strings:
35+
- False / 'off' - raise an exception if the input is incomplete
36+
- True / 'on' - allow incomplete JSON but discard the last string if it is incomplete
37+
- 'trailing-strings' - allow incomplete JSON, and include the last incomplete string in the output
3438
catch_duplicate_keys: if True, raise an exception if objects contain the same key multiple times
39+
lossless_floats: if True, preserve full detail on floats using `LosslessFloat`
3540
3641
Returns:
3742
Python object built from the JSON input.

crates/jiter-python/jiter.pyi

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,8 @@ def from_json(
66
/,
77
*,
88
allow_inf_nan: bool = True,
9-
cache_strings: Literal[True, False, "all", "keys", "none"] = "all",
10-
allow_partial: bool = False,
9+
cache_mode: Literal[True, False, "all", "keys", "none"] = "all",
10+
partial_mode: Literal[True, False, "off", "on", "trailing-strings"] = False,
1111
catch_duplicate_keys: bool = False,
1212
lossless_floats: bool = False,
1313
) -> Any:
@@ -18,11 +18,14 @@ def from_json(
1818
json_data: The JSON data to parse
1919
allow_inf_nan: Whether to allow infinity (`Infinity` an `-Infinity`) and `NaN` values to float fields.
2020
Defaults to True.
21-
cache_strings: cache Python strings to improve performance at the cost of some memory usage
21+
cache_mode: cache Python strings to improve performance at the cost of some memory usage
2222
- True / 'all' - cache all strings
2323
- 'keys' - cache only object keys
2424
- False / 'none' - cache nothing
25-
allow_partial: if True, return parsed content when reaching EOF without closing objects and arrays
25+
partial_mode: How to handle incomplete strings:
26+
- False / 'off' - raise an exception if the input is incomplete
27+
- True / 'on' - allow incomplete JSON but discard the last string if it is incomplete
28+
- 'trailing-strings' - allow incomplete JSON, and include the last incomplete string in the output
2629
catch_duplicate_keys: if True, raise an exception if objects contain the same key multiple times
2730
lossless_floats: if True, preserve full detail on floats using `LosslessFloat`
2831

crates/jiter-python/src/lib.rs

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use std::sync::OnceLock;
22

33
use pyo3::prelude::*;
44

5-
use jiter::{map_json_error, LosslessFloat, PythonParseBuilder, StringCacheMode};
5+
use jiter::{map_json_error, LosslessFloat, PartialMode, PythonParse, StringCacheMode};
66

77
#[allow(clippy::fn_params_excessive_bools)]
88
#[pyfunction(
@@ -11,8 +11,8 @@ use jiter::{map_json_error, LosslessFloat, PythonParseBuilder, StringCacheMode};
1111
/,
1212
*,
1313
allow_inf_nan=true,
14-
cache_strings=StringCacheMode::All,
15-
allow_partial=false,
14+
cache_mode=StringCacheMode::All,
15+
partial_mode=PartialMode::Off,
1616
catch_duplicate_keys=false,
1717
lossless_floats=false,
1818
)
@@ -21,15 +21,15 @@ pub fn from_json<'py>(
2121
py: Python<'py>,
2222
json_data: &[u8],
2323
allow_inf_nan: bool,
24-
cache_strings: StringCacheMode,
25-
allow_partial: bool,
24+
cache_mode: StringCacheMode,
25+
partial_mode: PartialMode,
2626
catch_duplicate_keys: bool,
2727
lossless_floats: bool,
2828
) -> PyResult<Bound<'py, PyAny>> {
29-
let parse_builder = PythonParseBuilder {
29+
let parse_builder = PythonParse {
3030
allow_inf_nan,
31-
cache_mode: cache_strings,
32-
allow_partial,
31+
cache_mode,
32+
partial_mode,
3333
catch_duplicate_keys,
3434
lossless_floats,
3535
};

0 commit comments

Comments
 (0)