Skip to content

Commit f0c928a

Browse files
authored
fix: skipping inside arrays on comma-atomic (#757)
Tail skipping was not triggered when the item matching the unitary transition was an atomic value inside a list. For example, selecting `$[0]` from a long list of integers would never skip, massively degrading performance. Skipping was added to `handle_comma` in the same vein as it was in `handle_colon` to enable this. Ref: #751
1 parent 058589c commit f0c928a

File tree

9 files changed

+39
-27
lines changed

9 files changed

+39
-27
lines changed

crates/rsonpath-benchmarks/Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,13 @@ name = "pathimpl"
2121
[dependencies]
2222
clap = { version = "4.5.23", features = ["derive", "wrap_help"] }
2323
color-eyre = { version = "0.6.3", default-features = false }
24-
criterion = "0.6.0"
24+
criterion = "0.7.0"
2525
eyre = "0.6.12"
2626
flate2 = "1.0.35"
2727
hex-literal = "1.0.0"
2828
indicatif = "0.18.0"
2929
jni = { version = "0.21.1", features = ["invocation", "default"] }
30-
jsonpath-rust = "1.0.0"
30+
jsonpath-rust = "1.0.4"
3131
lazy_static = "1.5.0"
3232
serde_json = "1.0.134"
3333
sha2 = "0.10.8"

crates/rsonpath-benchmarks/src/framework.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -329,7 +329,7 @@ impl<I: Implementation> BenchFn for PreparedQuery<I> {
329329
};
330330

331331
let result = self.implementation.run(q, f).unwrap();
332-
criterion::black_box(result);
332+
std::hint::black_box(result);
333333
}
334334
}
335335

crates/rsonpath-benchmarks/src/implementations/jsonpath_rust.rs

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,22 @@
11
use crate::framework::implementation::Implementation;
2-
use jsonpath_rust::{parser::JsonPath, JsonPathValue};
2+
use jsonpath_rust::{
3+
parser::{errors::JsonPathError, model::JpQuery, parse_json_path},
4+
query::{js_path_process, QueryRef},
5+
};
36
use serde_json::Value;
47
use std::{
58
fmt::Display,
69
fs,
710
io::{self, BufReader},
8-
str::FromStr,
911
};
1012
use thiserror::Error;
1113

1214
pub struct JsonpathRust {}
1315

14-
pub struct JsonpathRustResult<'a>(Vec<JsonPathValue<'a, Value>>);
16+
pub struct JsonpathRustResult<'a>(Vec<QueryRef<'a, Value>>);
1517

1618
impl Implementation for JsonpathRust {
17-
type Query = JsonPath;
19+
type Query = JpQuery;
1820

1921
type File = Value;
2022

@@ -39,11 +41,11 @@ impl Implementation for JsonpathRust {
3941
}
4042

4143
fn compile_query(&self, query: &str) -> Result<Self::Query, Self::Error> {
42-
JsonPath::from_str(query).map_err(JsonpathRustError::JsonPathInstError)
44+
parse_json_path(query).map_err(JsonpathRustError::JsonPathInstError)
4345
}
4446

4547
fn run<'a>(&self, query: &'a Self::Query, file: &'a Self::File) -> Result<Self::Result<'a>, Self::Error> {
46-
let results = query.find_slice(file);
48+
let results = js_path_process(query, file)?;
4749

4850
Ok(JsonpathRustResult(results))
4951
}
@@ -52,8 +54,7 @@ impl Implementation for JsonpathRust {
5254
impl Display for JsonpathRustResult<'_> {
5355
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
5456
for res in &self.0 {
55-
let val = res.clone().to_data();
56-
writeln!(f, "{}", val)?;
57+
writeln!(f, "{}", res.clone().val())?;
5758
}
5859

5960
Ok(())
@@ -67,5 +68,5 @@ pub enum JsonpathRustError {
6768
#[error("error parsing JSON with serde: '{0}'")]
6869
SerdeError(#[from] serde_json::Error),
6970
#[error("error parsing JSONPath query: '{0}'")]
70-
JsonPathInstError(<JsonPath as TryFrom<&'static str>>::Error),
71+
JsonPathInstError(#[from] JsonPathError),
7172
}

crates/rsonpath-benchmarks/src/main.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ fn run<I: Implementation>(imp: I, query_str: &str, path_str: &str) -> Result<()>
2323

2424
let result = imp.run(&query, &file)?;
2525

26-
println!("{}", result);
26+
println!("{result}");
2727

2828
Ok(())
2929
}

crates/rsonpath-lib/src/classification/structural/nosimd.rs

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -121,15 +121,13 @@ where
121121
#[inline]
122122
fn reclassify(&mut self, idx: usize) {
123123
if let Some(block) = self.block.take() {
124+
let relative_idx = idx + 1 - self.iter.get_offset();
124125
let quote_classified_block = block.quote_classified;
125-
let relevant_idx = idx + 1;
126-
let block_idx = (idx + 1) % N;
127-
debug!("relevant_idx is {relevant_idx}.");
128-
129-
if block_idx != 0 || relevant_idx == self.iter.get_offset() {
126+
debug!("relative_idx is {relative_idx}.");
127+
if relative_idx < N {
130128
let new_block = Block::from_idx(
131129
quote_classified_block,
132-
block_idx,
130+
relative_idx,
133131
self.are_colons_on,
134132
self.are_commas_on,
135133
);

crates/rsonpath-lib/src/classification/structural/shared.rs

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -58,15 +58,14 @@ macro_rules! structural_classifier {
5858
#[inline]
5959
fn reclassify(&mut self, idx: usize) {
6060
if let Some(block) = self.block.take() {
61+
let relative_idx = idx + 1 - self.iter.get_offset();
6162
let quote_classified_block = block.quote_classified;
62-
let relevant_idx = idx + 1;
63-
let block_idx = (idx + 1) % $size;
64-
debug!("relevant_idx is {relevant_idx}.");
63+
debug!("relative_idx is {relative_idx}.");
6564

66-
if block_idx != 0 || relevant_idx == self.iter.get_offset() {
65+
if relative_idx < $size {
6766
debug!("need to reclassify.");
6867

69-
let mask = <$mask_ty>::MAX << block_idx;
68+
let mask = <$mask_ty>::MAX << relative_idx;
7069
// SAFETY: target_feature invariant
7170
let mut new_block = unsafe { self.classifier.classify(quote_classified_block) };
7271
new_block.structural_mask &= mask;

crates/rsonpath-lib/src/engine.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ pub trait Engine {
4242
/// Find the starting indices of matches on the given [`Input`] and write them to the [`Sink`].
4343
///
4444
/// The result is equivalent to using [`matches`](Engine::matches) and extracting the
45-
/// [`Match::span.start_idx`],
45+
/// [`Match::span`] and [`MatchSpan::start_idx`],
4646
/// but in general is much more time and memory efficient.
4747
///
4848
/// # Errors

crates/rsonpath-lib/src/engine/main.rs

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -419,7 +419,7 @@ where
419419
/// This method only handles atomic values after the comma.
420420
/// Objects and arrays are processed at their respective opening character.
421421
#[inline(always)]
422-
fn handle_comma(&mut self, _classifier: &mut Classifier!(), idx: usize) -> Result<(), EngineError> {
422+
fn handle_comma(&mut self, classifier: &mut Classifier!(), idx: usize) -> Result<(), EngineError> {
423423
debug!("Comma");
424424

425425
self.recorder.record_value_terminator(idx, self.depth)?;
@@ -448,6 +448,20 @@ where
448448
{
449449
debug!("Accepting list item on comma.");
450450
self.record_match_detected_at(idx + 1, NodeType::Atomic)?;
451+
452+
if self.automaton.is_unitary(self.state) {
453+
self.next_event = classifier.next()?;
454+
match self.next_event {
455+
None | Some(Structural::Closing(_, _)) => {
456+
return Ok(());
457+
}
458+
Some(Structural::Comma(idx)) => self.recorder.record_value_terminator(idx, self.depth)?,
459+
Some(Structural::Colon(_) | Structural::Opening(_, _)) => (),
460+
}
461+
debug!("Skipping unique state from {:?}", BracketType::Square);
462+
let stop_at = classifier.skip(BracketType::Square)?;
463+
self.next_event = Some(Structural::Closing(BracketType::Square, stop_at));
464+
}
451465
}
452466
}
453467

crates/rsonpath-test/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ serde_json = { workspace = true }
1919

2020
[dev-dependencies]
2121
pretty_assertions = { workspace = true }
22-
rsonpath-lib = { workspace = true }
22+
rsonpath-lib = { workspace = true, features = ["simd"] }
2323
rsonpath-syntax = { workspace = true }
2424

2525
[build-dependencies]

0 commit comments

Comments
 (0)