Skip to content

Commit 8c0300e

Browse files
committed
lib: added StringPattern and made Automaton no longer borrow the query
The `Automaton` struct borrowed the source query, which also caused the Engine to carry the query's lifetime with it. The actual data being borrowed were the `JsonString` values for member transitions. In preparation for #117 we remove the borrowed `JsonString` and replace it with `StringPattern`. For UTF-8 the `StringPattern` will be a more complex struct that precomputes some stuff for efficient matching later. For now, it's a thin wrapper over a `JsonString`. During construction we may create many transitions over the same pattern. To reduce the size of the automaton we cache the patterns and put them into an `Rc`. This may get optimised later to instead use some kind of inline storage, but it's unlikely to actually matter. I ran the benchmarks and saw no measurable difference between the previous version and this one. Refs: #117 #613
1 parent 9a76de9 commit 8c0300e

File tree

27 files changed

+402
-333
lines changed

27 files changed

+402
-333
lines changed

crates/rsonpath-benchmarks/src/implementations/rsonpath.rs

Lines changed: 18 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,10 @@
11
use crate::framework::implementation::Implementation;
2-
use ouroboros::self_referencing;
2+
use rsonpath::{engine::Compiler, input::MmapInput};
33
use rsonpath::{
4-
engine::main::MainEngine,
4+
engine::{main::MainEngine, Engine},
55
input::OwnedBytes,
66
result::{Match, Sink},
77
};
8-
use rsonpath::{
9-
engine::{Compiler, Engine},
10-
input::MmapInput,
11-
};
12-
use rsonpath_syntax::JsonPathQuery;
138
use std::{convert::Infallible, fmt::Display, fs, io};
149
use thiserror::Error;
1510

@@ -18,16 +13,8 @@ pub struct RsonpathCount {}
1813
pub struct RsonpathMmap {}
1914
pub struct RsonpathMmapCount {}
2015

21-
#[self_referencing()]
22-
pub struct RsonpathQuery {
23-
query: JsonPathQuery,
24-
#[borrows(query)]
25-
#[not_covariant]
26-
engine: MainEngine<'this>,
27-
}
28-
2916
impl Implementation for Rsonpath {
30-
type Query = RsonpathQuery;
17+
type Query = MainEngine;
3118

3219
type File = OwnedBytes<Vec<u8>>;
3320

@@ -52,25 +39,20 @@ impl Implementation for Rsonpath {
5239

5340
fn compile_query(&self, query: &str) -> Result<Self::Query, Self::Error> {
5441
let query = rsonpath_syntax::parse(query).unwrap();
42+
let engine = MainEngine::compile_query(&query).map_err(RsonpathError::CompilerError)?;
5543

56-
let rsonpath = RsonpathQuery::try_new(query, |query| {
57-
MainEngine::compile_query(query).map_err(RsonpathError::CompilerError)
58-
})?;
59-
60-
Ok(rsonpath)
44+
Ok(engine)
6145
}
6246

6347
fn run(&self, query: &Self::Query, file: &Self::File) -> Result<Self::Result<'_>, Self::Error> {
64-
query
65-
.with_engine(|engine| engine.matches(file, &mut VoidSink))
66-
.map_err(RsonpathError::EngineError)?;
48+
query.matches(file, &mut VoidSink).map_err(RsonpathError::EngineError)?;
6749

6850
Ok("[not collected]")
6951
}
7052
}
7153

7254
impl Implementation for RsonpathCount {
73-
type Query = RsonpathQuery;
55+
type Query = MainEngine;
7456

7557
type File = OwnedBytes<Vec<u8>>;
7658

@@ -95,25 +77,20 @@ impl Implementation for RsonpathCount {
9577

9678
fn compile_query(&self, query: &str) -> Result<Self::Query, Self::Error> {
9779
let query = rsonpath_syntax::parse(query).unwrap();
80+
let engine = MainEngine::compile_query(&query).map_err(RsonpathError::CompilerError)?;
9881

99-
let rsonpath = RsonpathQuery::try_new(query, |query| {
100-
MainEngine::compile_query(query).map_err(RsonpathError::CompilerError)
101-
})?;
102-
103-
Ok(rsonpath)
82+
Ok(engine)
10483
}
10584

10685
fn run(&self, query: &Self::Query, file: &Self::File) -> Result<Self::Result<'_>, Self::Error> {
107-
query
108-
.with_engine(|engine| engine.count(file))
109-
.map_err(RsonpathError::EngineError)?;
86+
query.count(file).map_err(RsonpathError::EngineError)?;
11087

11188
Ok("[not collected]")
11289
}
11390
}
11491

11592
impl Implementation for RsonpathMmap {
116-
type Query = RsonpathQuery;
93+
type Query = MainEngine;
11794

11895
type File = MmapInput;
11996

@@ -138,25 +115,20 @@ impl Implementation for RsonpathMmap {
138115

139116
fn compile_query(&self, query: &str) -> Result<Self::Query, Self::Error> {
140117
let query = rsonpath_syntax::parse(query).unwrap();
118+
let engine = MainEngine::compile_query(&query).map_err(RsonpathError::CompilerError)?;
141119

142-
let rsonpath = RsonpathQuery::try_new(query, |query| {
143-
MainEngine::compile_query(query).map_err(RsonpathError::CompilerError)
144-
})?;
145-
146-
Ok(rsonpath)
120+
Ok(engine)
147121
}
148122

149123
fn run(&self, query: &Self::Query, file: &Self::File) -> Result<Self::Result<'_>, Self::Error> {
150-
query
151-
.with_engine(|engine| engine.matches(file, &mut VoidSink))
152-
.map_err(RsonpathError::EngineError)?;
124+
query.matches(file, &mut VoidSink).map_err(RsonpathError::EngineError)?;
153125

154126
Ok("[not collected]")
155127
}
156128
}
157129

158130
impl Implementation for RsonpathMmapCount {
159-
type Query = RsonpathQuery;
131+
type Query = MainEngine;
160132

161133
type File = MmapInput;
162134

@@ -181,18 +153,13 @@ impl Implementation for RsonpathMmapCount {
181153

182154
fn compile_query(&self, query: &str) -> Result<Self::Query, Self::Error> {
183155
let query = rsonpath_syntax::parse(query).unwrap();
156+
let engine = MainEngine::compile_query(&query).map_err(RsonpathError::CompilerError)?;
184157

185-
let rsonpath = RsonpathQuery::try_new(query, |query| {
186-
MainEngine::compile_query(query).map_err(RsonpathError::CompilerError)
187-
})?;
188-
189-
Ok(rsonpath)
158+
Ok(engine)
190159
}
191160

192161
fn run(&self, query: &Self::Query, file: &Self::File) -> Result<Self::Result<'_>, Self::Error> {
193-
query
194-
.with_engine(|engine| engine.count(file))
195-
.map_err(RsonpathError::EngineError)?;
162+
query.count(file).map_err(RsonpathError::EngineError)?;
196163

197164
Ok("[not collected]")
198165
}

crates/rsonpath-lib/src/automaton.rs

Lines changed: 27 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -8,20 +8,20 @@ mod state;
88

99
pub use state::{State, StateAttributes};
1010

11-
use crate::{automaton::error::CompilerError, debug};
11+
use crate::{automaton::error::CompilerError, debug, string_pattern::StringPattern};
1212
use nfa::NondeterministicAutomaton;
13-
use rsonpath_syntax::{num::JsonUInt, str::JsonString, JsonPathQuery};
13+
use rsonpath_syntax::{num::JsonUInt, JsonPathQuery};
1414
use smallvec::SmallVec;
15-
use std::{fmt::Display, ops::Index};
15+
use std::{fmt::Display, ops::Index, rc::Rc};
1616

1717
/// A minimal, deterministic automaton representing a JSONPath query.
1818
#[derive(Debug, PartialEq, Eq)]
19-
pub struct Automaton<'q> {
20-
states: Vec<StateTable<'q>>,
19+
pub struct Automaton {
20+
states: Vec<StateTable>,
2121
}
2222

23-
/// Transition when a JSON member name matches a [`JsonString`]i.
24-
pub type MemberTransition<'q> = (&'q JsonString, State);
23+
/// Transition when a JSON member name matches a [`StringPattern`].
24+
pub type MemberTransition = (Rc<StringPattern>, State);
2525

2626
/// Transition on elements of an array with indices specified by either a single index
2727
/// or a simple slice expression.
@@ -45,9 +45,9 @@ pub(super) enum ArrayTransitionLabel {
4545
/// Contains transitions triggered by matching member names or array indices, and a fallback transition
4646
/// triggered when none of the labelled transitions match.
4747
#[derive(Debug)]
48-
pub struct StateTable<'q> {
48+
pub struct StateTable {
4949
attributes: StateAttributes,
50-
member_transitions: SmallVec<[MemberTransition<'q>; 2]>,
50+
member_transitions: SmallVec<[MemberTransition; 2]>,
5151
array_transitions: SmallVec<[ArrayTransition; 2]>,
5252
fallback_state: State,
5353
}
@@ -59,7 +59,7 @@ pub(crate) struct SimpleSlice {
5959
step: JsonUInt,
6060
}
6161

62-
impl Default for StateTable<'_> {
62+
impl Default for StateTable {
6363
#[inline]
6464
fn default() -> Self {
6565
Self {
@@ -71,7 +71,7 @@ impl Default for StateTable<'_> {
7171
}
7272
}
7373

74-
impl PartialEq for StateTable<'_> {
74+
impl PartialEq for StateTable {
7575
#[inline]
7676
fn eq(&self, other: &Self) -> bool {
7777
return self.fallback_state == other.fallback_state
@@ -88,10 +88,10 @@ impl PartialEq for StateTable<'_> {
8888
}
8989
}
9090

91-
impl Eq for StateTable<'_> {}
91+
impl Eq for StateTable {}
9292

93-
impl<'q> Index<State> for Automaton<'q> {
94-
type Output = StateTable<'q>;
93+
impl Index<State> for Automaton {
94+
type Output = StateTable;
9595

9696
#[inline(always)]
9797
fn index(&self, index: State) -> &Self::Output {
@@ -149,7 +149,7 @@ impl From<SimpleSlice> for ArrayTransitionLabel {
149149
}
150150
}
151151

152-
impl<'q> Automaton<'q> {
152+
impl Automaton {
153153
/// Convert a [`JsonPathQuery`] into a minimal deterministic automaton.
154154
///
155155
/// # Errors
@@ -158,10 +158,10 @@ impl<'q> Automaton<'q> {
158158
/// - [`CompilerError::NotSupported`] raised if the query contains elements
159159
/// not yet supported by the compiler.
160160
#[inline]
161-
pub fn new(query: &'q JsonPathQuery) -> Result<Self, CompilerError> {
161+
pub fn new(query: &JsonPathQuery) -> Result<Self, CompilerError> {
162162
let nfa = NondeterministicAutomaton::new(query)?;
163163
debug!("NFA: {}", nfa);
164-
Automaton::minimize(nfa)
164+
Self::minimize(nfa)
165165
}
166166

167167
/// Returns whether this automaton represents the select-root JSONPath query ('$').
@@ -389,12 +389,12 @@ impl<'q> Automaton<'q> {
389389
self[state].attributes.is_unitary()
390390
}
391391

392-
fn minimize(nfa: NondeterministicAutomaton<'q>) -> Result<Self, CompilerError> {
392+
fn minimize(nfa: NondeterministicAutomaton) -> Result<Self, CompilerError> {
393393
minimizer::minimize(nfa)
394394
}
395395
}
396396

397-
impl<'q> StateTable<'q> {
397+
impl StateTable {
398398
/// Returns the state to which a fallback transition leads.
399399
///
400400
/// A fallback transition is the catch-all transition triggered
@@ -421,7 +421,7 @@ impl<'q> StateTable<'q> {
421421
/// to the contained [`State`].
422422
#[must_use]
423423
#[inline(always)]
424-
pub fn member_transitions(&self) -> &[MemberTransition<'q>] {
424+
pub fn member_transitions(&self) -> &[MemberTransition] {
425425
&self.member_transitions
426426
}
427427
}
@@ -442,7 +442,7 @@ impl Display for ArrayTransitionLabel {
442442
}
443443
}
444444

445-
impl Display for Automaton<'_> {
445+
impl Display for Automaton {
446446
#[inline]
447447
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
448448
writeln!(f, "digraph {{")?;
@@ -503,7 +503,12 @@ impl Display for Automaton<'_> {
503503
}
504504
}
505505
for (label, state) in &transitions.member_transitions {
506-
writeln!(f, " {i} -> {} [label=\"{}\"]", state.0, label.unquoted())?
506+
writeln!(
507+
f,
508+
" {i} -> {} [label=\"{}\"]",
509+
state.0,
510+
std::str::from_utf8(label.unquoted()).expect("labels to be valid utf8")
511+
)?
507512
}
508513
writeln!(f, " {i} -> {} [label=\"*\"]", transitions.fallback_state.0)?;
509514
}

0 commit comments

Comments
 (0)