Skip to content

Commit a4cf796

Browse files
authored
convert skip to iterate instead of call recursive functions (#111)
1 parent fc5a710 commit a4cf796

File tree

3 files changed

+143
-33
lines changed

3 files changed

+143
-33
lines changed

crates/jiter/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ ahash = "0.8.0"
1818
smallvec = "1.11.0"
1919
pyo3 = { version = "0.21.0", optional = true }
2020
lexical-parse-float = { version = "0.8.5", features = ["format"] }
21+
bitvec = "1.0.1"
2122

2223
[features]
2324
python = ["dep:pyo3", "dep:pyo3-build-config"]

crates/jiter/src/value.rs

Lines changed: 106 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -234,55 +234,128 @@ pub(crate) fn take_value_skip(
234234
peek: Peek,
235235
parser: &mut Parser,
236236
tape: &mut Tape,
237-
mut recursion_limit: u8,
237+
recursion_limit: u8,
238238
allow_inf_nan: bool,
239239
) -> JsonResult<()> {
240240
match peek {
241241
Peek::True => parser.consume_true(),
242242
Peek::False => parser.consume_false(),
243243
Peek::Null => parser.consume_null(),
244-
Peek::String => {
245-
parser.consume_string::<StringDecoderRange>(tape, false)?;
246-
Ok(())
247-
}
244+
Peek::String => parser.consume_string::<StringDecoderRange>(tape, false).map(drop),
248245
Peek::Array => {
249-
if let Some(peek_first) = parser.array_first()? {
250-
check_recursion!(recursion_limit, parser.index,
251-
take_value_skip(peek_first, parser, tape, recursion_limit, allow_inf_nan)?;
252-
);
253-
while let Some(peek) = parser.array_step()? {
254-
check_recursion!(recursion_limit, parser.index,
255-
take_value_skip(peek, parser, tape, recursion_limit, allow_inf_nan)?;
256-
);
257-
}
246+
if let Some(next_peek) = parser.array_first()? {
247+
take_value_skip_recursive(next_peek, ARRAY, parser, tape, recursion_limit, allow_inf_nan)
248+
} else {
249+
Ok(())
258250
}
259-
Ok(())
260251
}
261252
Peek::Object => {
262253
if parser.object_first::<StringDecoderRange>(tape)?.is_some() {
263-
let peek = parser.peek()?;
264-
check_recursion!(recursion_limit, parser.index,
265-
take_value_skip(peek, parser, tape, recursion_limit, allow_inf_nan)?;
266-
);
267-
while parser.object_step::<StringDecoderRange>(tape)?.is_some() {
268-
let peek = parser.peek()?;
269-
check_recursion!(recursion_limit, parser.index,
270-
take_value_skip(peek, parser, tape, recursion_limit, allow_inf_nan)?;
271-
);
272-
}
254+
take_value_skip_recursive(parser.peek()?, OBJECT, parser, tape, recursion_limit, allow_inf_nan)
255+
} else {
256+
Ok(())
273257
}
274-
Ok(())
275258
}
276-
_ => {
277-
if let Err(e) = parser.consume_number::<NumberRange>(peek.into_inner(), allow_inf_nan) {
259+
_ => parser
260+
.consume_number::<NumberRange>(peek.into_inner(), allow_inf_nan)
261+
.map(drop)
262+
.map_err(|e| {
278263
if !peek.is_num() {
279-
Err(json_error!(ExpectedSomeValue, parser.index))
264+
json_error!(ExpectedSomeValue, parser.index)
280265
} else {
281-
Err(e)
266+
e
282267
}
283-
} else {
284-
Ok(())
268+
}),
269+
}
270+
}
271+
272+
const ARRAY: bool = false;
273+
const OBJECT: bool = true;
274+
275+
#[inline(never)] // this is an iterative algo called only from take_value_skip, no point in inlining
276+
fn take_value_skip_recursive(
277+
mut peek: Peek,
278+
mut current_recursion: bool,
279+
parser: &mut Parser,
280+
tape: &mut Tape,
281+
recursion_limit: u8,
282+
allow_inf_nan: bool,
283+
) -> JsonResult<()> {
284+
let mut recursion_stack = bitvec::bitarr![0; 256];
285+
let recursion_limit: usize = recursion_limit.into();
286+
let mut current_recursion_depth = 0;
287+
288+
macro_rules! push_recursion {
289+
($next_peek:expr, $value:expr) => {
290+
peek = $next_peek;
291+
recursion_stack.set(
292+
current_recursion_depth,
293+
std::mem::replace(&mut current_recursion, $value),
294+
);
295+
current_recursion_depth += 1;
296+
if current_recursion_depth >= recursion_limit {
297+
return Err(json_error!(RecursionLimitExceeded, parser.index));
285298
}
286-
}
299+
};
300+
}
301+
302+
loop {
303+
match peek {
304+
Peek::True => parser.consume_true()?,
305+
Peek::False => parser.consume_false()?,
306+
Peek::Null => parser.consume_null()?,
307+
Peek::String => {
308+
parser.consume_string::<StringDecoderRange>(tape, false)?;
309+
}
310+
Peek::Array => {
311+
if let Some(next_peek) = parser.array_first()? {
312+
push_recursion!(next_peek, ARRAY);
313+
// immediately jump to process the first value in the array
314+
continue;
315+
}
316+
}
317+
Peek::Object => {
318+
if parser.object_first::<StringDecoderRange>(tape)?.is_some() {
319+
push_recursion!(parser.peek()?, OBJECT);
320+
// immediately jump to process the first value in the object
321+
continue;
322+
}
323+
}
324+
_ => {
325+
parser
326+
.consume_number::<NumberRange>(peek.into_inner(), allow_inf_nan)
327+
.map_err(|e| {
328+
if !peek.is_num() {
329+
json_error!(ExpectedSomeValue, parser.index)
330+
} else {
331+
e
332+
}
333+
})?;
334+
}
335+
};
336+
337+
// now try to advance position in the current array or object
338+
peek = loop {
339+
match current_recursion {
340+
ARRAY => {
341+
if let Some(next_peek) = parser.array_step()? {
342+
break next_peek;
343+
}
344+
}
345+
OBJECT => {
346+
if parser.object_step::<StringDecoderRange>(tape)?.is_some() {
347+
break parser.peek()?;
348+
}
349+
}
350+
}
351+
352+
current_recursion_depth = match current_recursion_depth.checked_sub(1) {
353+
Some(r) => r,
354+
// no recursion left, we are done
355+
None => return Ok(()),
356+
};
357+
358+
current_recursion = recursion_stack[current_recursion_depth];
359+
};
287360
}
288361
}

crates/jiter/tests/main.rs

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -962,6 +962,42 @@ fn test_recursion_limit_incr() {
962962
}
963963
}
964964

965+
#[test]
966+
fn test_recursion_limit_skip_array() {
967+
let json = (0..2000).map(|_| "[ ").collect::<String>();
968+
let bytes = json.as_bytes();
969+
let mut jiter = Jiter::new(bytes);
970+
let e = jiter.next_skip().unwrap_err();
971+
assert_eq!(
972+
e.error_type,
973+
JiterErrorType::JsonError(JsonErrorType::RecursionLimitExceeded)
974+
);
975+
let expected_index = JsonValue::parse(bytes, false).unwrap_err().index;
976+
assert_eq!(e.index, expected_index);
977+
assert_eq!(
978+
e.description(&jiter),
979+
format!("recursion limit exceeded at line 1 column {}", expected_index + 1)
980+
);
981+
}
982+
983+
#[test]
984+
fn test_recursion_limit_skip_object() {
985+
let json = (0..2000).map(|_| "{\"a\": ").collect::<String>();
986+
let bytes = json.as_bytes();
987+
let mut jiter = Jiter::new(bytes);
988+
let e = jiter.next_skip().unwrap_err();
989+
assert_eq!(
990+
e.error_type,
991+
JiterErrorType::JsonError(JsonErrorType::RecursionLimitExceeded)
992+
);
993+
let expected_index = JsonValue::parse(bytes, false).unwrap_err().index;
994+
assert_eq!(e.index, expected_index);
995+
assert_eq!(
996+
e.description(&jiter),
997+
format!("recursion limit exceeded at line 1 column {}", expected_index + 1)
998+
);
999+
}
1000+
9651001
macro_rules! number_bytes {
9661002
($($name:ident: $json:literal => $expected:expr;)*) => {
9671003
$(

0 commit comments

Comments
 (0)