Skip to content

Commit cda634a

Browse files
criminosisamaanq
andcommitted
feat: add error information in the progress callback
This allows users to bail parsing if an error was *definitely* detected using the progress callback, as all possible stack versions have a non-zero error cost. Co-authored-by: Amaan Qureshi <[email protected]>
1 parent ca087d2 commit cda634a

File tree

9 files changed

+87
-4
lines changed

9 files changed

+87
-4
lines changed

cli/src/tests/parser_test.rs

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1029,6 +1029,35 @@ fn test_parsing_with_timeout_during_balancing() {
10291029
});
10301030
}
10311031

1032+
#[test]
1033+
fn test_parsing_with_timeout_when_error_detected() {
1034+
let mut parser = Parser::new();
1035+
parser.set_language(&get_language("json")).unwrap();
1036+
1037+
// Parse an infinitely-long array, but insert an error after 1000 characters.
1038+
let mut offset = 0;
1039+
let erroneous_code = "!,";
1040+
let tree = parser.parse_with_options(
1041+
&mut |i, _| match i {
1042+
0 => "[",
1043+
1..=1000 => "0,",
1044+
_ => erroneous_code,
1045+
},
1046+
None,
1047+
Some(ParseOptions::new().progress_callback(&mut |state| {
1048+
offset = state.current_byte_offset();
1049+
state.has_error()
1050+
})),
1051+
);
1052+
1053+
// The callback is called at the end of parsing, however, what we're asserting here is that
1054+
// parsing ends immediately as the error is detected. This is verified by checking the offset
1055+
// of the last byte processed is the length of the erroneous code we inserted, aka, 1002, or
1056+
// 1000 + the length of the erroneous code.
1057+
assert_eq!(offset, 1000 + erroneous_code.len());
1058+
assert!(tree.is_none());
1059+
}
1060+
10321061
// Included Ranges
10331062

10341063
#[test]

lib/binding_rust/bindings.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ pub struct TSInput {
8282
pub struct TSParseState {
8383
pub payload: *mut ::core::ffi::c_void,
8484
pub current_byte_offset: u32,
85+
pub has_error: bool,
8586
}
8687
#[repr(C)]
8788
#[derive(Debug, Copy, Clone)]

lib/binding_rust/lib.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,11 @@ impl ParseState {
147147
pub const fn current_byte_offset(&self) -> usize {
148148
unsafe { self.0.as_ref() }.current_byte_offset as usize
149149
}
150+
151+
#[must_use]
152+
pub const fn has_error(&self) -> bool {
153+
unsafe { self.0.as_ref() }.has_error
154+
}
150155
}
151156

152157
/// A stateful object that is passed into a [`QueryProgressCallback`]

lib/binding_web/lib/imports.js

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,9 @@ mergeInto(LibraryManager.library, {
2323
}
2424
},
2525

26-
tree_sitter_progress_callback(currentOffset) {
26+
tree_sitter_progress_callback(currentOffset, hasError) {
2727
if (Module.currentProgressCallback) {
28-
return Module.currentProgressCallback({ currentOffset });
28+
return Module.currentProgressCallback({ currentOffset, hasError });
2929
}
3030
return false;
3131
},

lib/binding_web/lib/tree-sitter.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,8 @@ extern void tree_sitter_log_callback(
139139
);
140140

141141
extern bool tree_sitter_progress_callback(
142-
uint32_t current_offset
142+
uint32_t current_offset,
143+
bool has_error
143144
);
144145

145146
extern bool tree_sitter_query_progress_callback(
@@ -178,7 +179,7 @@ static void call_log_callback(
178179
static bool progress_callback(
179180
TSParseState *state
180181
) {
181-
return tree_sitter_progress_callback(state->current_byte_offset);
182+
return tree_sitter_progress_callback(state->current_byte_offset, state->has_error);
182183
}
183184

184185
static bool query_progress_callback(

lib/binding_web/src/parser.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,9 @@ export interface ParseOptions {
5353
export interface ParseState {
5454
/** The byte offset in the document that the parser is at. */
5555
currentOffset: number;
56+
57+
/** Indicates whether the parser has encountered an error during parsing. */
58+
hasError: boolean;
5659
}
5760

5861
/**

lib/binding_web/test/parser.test.ts

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -413,5 +413,34 @@ describe('Parser', () => {
413413
{ progressCallback },
414414
)).toBeNull();
415415
});
416+
417+
it('times out when an error is detected', { timeout: 5000 }, () => {
418+
parser.setLanguage(JSON);
419+
420+
let offset = 0;
421+
const erroneousCode = '!,';
422+
const progressCallback = (state: ParseState) => {
423+
offset = state.currentOffset;
424+
return state.hasError;
425+
};
426+
427+
const tree = parser.parse(
428+
(offset) => {
429+
if (offset === 0) return '[';
430+
if (offset >= 1 && offset < 1000) return '0,';
431+
return erroneousCode;
432+
},
433+
null,
434+
{ progressCallback },
435+
);
436+
437+
// The callback is called at the end of parsing, however, what we're asserting here is that
438+
// parsing ends immediately as the error is detected. This is verified by checking the offset
439+
// of the last byte processed is the length of the erroneous code we inserted, aka, 1002, or
440+
// 1000 + the length of the erroneous code. Note that in this WASM test, we multiply the offset
441+
// by 2 because JavaScript strings are UTF-16 encoded.
442+
expect(offset).toBe((1000 + erroneousCode.length) * 2);
443+
expect(tree).toBeNull();
444+
});
416445
});
417446
});

lib/include/tree_sitter/api.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ typedef struct TSInput {
9494
typedef struct TSParseState {
9595
void *payload;
9696
uint32_t current_byte_offset;
97+
bool has_error;
9798
} TSParseState;
9899

99100
typedef struct TSParseOptions {

lib/src/parser.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ struct TSParser {
116116
unsigned included_range_difference_index;
117117
bool has_scanner_error;
118118
bool canceled_balancing;
119+
bool has_error;
119120
};
120121

121122
typedef struct {
@@ -1419,6 +1420,16 @@ static void ts_parser__recover(
14191420
self->stack, version, ts_subtree_last_external_token(lookahead)
14201421
);
14211422
}
1423+
1424+
bool has_error = true;
1425+
for (unsigned i = 0; i < ts_stack_version_count(self->stack); i++) {
1426+
ErrorStatus status = ts_parser__version_status(self, i);
1427+
if (!status.is_in_error) {
1428+
has_error = false;
1429+
break;
1430+
}
1431+
}
1432+
self->has_error = has_error;
14221433
}
14231434

14241435
static void ts_parser__handle_error(
@@ -1525,6 +1536,7 @@ static bool ts_parser__check_progress(TSParser *self, Subtree *lookahead, const
15251536
}
15261537
if (self->parse_options.progress_callback && position != NULL) {
15271538
self->parse_state.current_byte_offset = *position;
1539+
self->parse_state.has_error = self->has_error;
15281540
}
15291541
if (
15301542
self->operation_count == 0 &&
@@ -1929,6 +1941,7 @@ TSParser *ts_parser_new(void) {
19291941
self->timeout_duration = 0;
19301942
self->language = NULL;
19311943
self->has_scanner_error = false;
1944+
self->has_error = false;
19321945
self->canceled_balancing = false;
19331946
self->external_scanner_payload = NULL;
19341947
self->end_clock = clock_null();
@@ -2066,6 +2079,7 @@ void ts_parser_reset(TSParser *self) {
20662079
}
20672080
self->accept_count = 0;
20682081
self->has_scanner_error = false;
2082+
self->has_error = false;
20692083
self->parse_options = (TSParseOptions) {0};
20702084
self->parse_state = (TSParseState) {0};
20712085
}

0 commit comments

Comments
 (0)