Skip to content

Commit ad2047f

Browse files
authored
Use implicit tree for error search (#562)
This builds on top of #560 and replaces the use of `SyntaxNode` in hooks.jl by the new lower-level cursor APIs. This avoid allocating two completely separate representations of the syntax tree. As a result, the end-to-end parse time for error-containing code is between 1.5x (if the error is the first token) and 2x (if the error is the last token) faster than current master. However, the main motivation here is just to reduce coupling between the Expr-producing and SyntaxNode producing parts of the code.
1 parent ebfaf96 commit ad2047f

File tree

3 files changed

+68
-31
lines changed

3 files changed

+68
-31
lines changed

src/expr.jl

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -609,9 +609,12 @@ end
609609
return retexpr
610610
end
611611

612-
function build_tree(::Type{Expr}, stream::ParseStream;
613-
filename=nothing, first_line=1, kws...)
612+
function build_tree(::Type{Expr}, stream::ParseStream; filename=nothing, first_line=1, kws...)
614613
source = SourceFile(stream, filename=filename, first_line=first_line)
614+
return build_tree(Expr, stream, source)
615+
end
616+
617+
function build_tree(::Type{Expr}, stream::ParseStream, source::SourceFile)
615618
txtbuf = unsafe_textbuf(stream)
616619
cursor = RedTreeCursor(stream)
617620
wrapper_head = SyntaxHead(K"wrapper",EMPTY_FLAGS)

src/hooks.jl

Lines changed: 54 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -4,25 +4,51 @@
44
const _has_v1_6_hooks = VERSION >= v"1.6"
55
const _has_v1_10_hooks = isdefined(Core, :_setparser!)
66

7+
struct ErrorSpec
8+
child_idx::Int
9+
node::RedTreeCursor
10+
parent_kind::Kind
11+
end
12+
13+
function first_error_cursor(stream::ParseStream)
14+
output = stream.output
15+
for i = 2:length(output)
16+
is_error(output[i]) && return GreenTreeCursor(output, i)
17+
end
18+
end
19+
720
# Find the first error in a SyntaxNode tree, returning the index of the error
821
# within its parent and the node itself.
9-
function _first_error(t::SyntaxNode)
10-
if is_error(t)
11-
return 0,t
22+
function first_tree_error(c::RedTreeCursor, error_cursor::GreenTreeCursor)
23+
@assert !is_leaf(c) && !is_error(c)
24+
first_child = first_error = nothing
25+
it = reverse_nontrivia_children(c)
26+
r = iterate(it)
27+
local child
28+
while r !== nothing
29+
(child, state) = r
30+
r = iterate(it, state)
31+
(error_cursor in child || error_cursor == child.green) || continue
32+
is_error(child) && break
33+
return first_tree_error(child, error_cursor)
1234
end
13-
if !is_leaf(t)
14-
for (i,c) in enumerate(children(t))
15-
if is_error(c)
16-
return i,c
17-
else
18-
x = _first_error(c)
19-
if x != (0,nothing)
20-
return x
21-
end
22-
end
23-
end
35+
i = 1 # count node index
36+
while r !== nothing
37+
i += 1
38+
(_, state) = r
39+
r = iterate(it, state)
40+
end
41+
return ErrorSpec(i, child, kind(c))
42+
end
43+
44+
function first_tree_error(stream::ParseStream)
45+
c = RedTreeCursor(stream)
46+
err = first_error_cursor(stream)
47+
for c in reverse_toplevel_siblings(c)
48+
is_error(c) && return ErrorSpec(0, c, K"wrapper")
49+
is_leaf(c) && continue
50+
return first_tree_error(c, err)
2451
end
25-
return 0,nothing
2652
end
2753

2854
# Classify an incomplete expression, returning a Symbol compatible with
@@ -32,8 +58,10 @@ end
3258
# next if the incomplete stream was to continue. (Though this is just rough. In
3359
# practice several categories are combined for the purposes of the REPL -
3460
# perhaps we can/should do something more precise in the future.)
35-
function _incomplete_tag(n::SyntaxNode, codelen)
36-
i,c = _first_error(n)
61+
function _incomplete_tag(theerror::ErrorSpec, codelen)
62+
i = theerror.child_idx
63+
c = theerror.node
64+
kp = theerror.parent_kind
3765
if isnothing(c) || last_byte(c) < codelen || codelen == 0
3866
if kind(c) == K"ErrorEofMultiComment"
3967
# This is the one weird case where the token itself is an
@@ -47,18 +75,16 @@ function _incomplete_tag(n::SyntaxNode, codelen)
4775
# here as a hard error.
4876
return :none
4977
end
50-
if kind(c) == K"error" && numchildren(c) > 0
51-
for cc in children(c)
78+
if kind(c) == K"error" && is_non_terminal(c)
79+
for cc in reverse_nontrivia_children(c)
5280
if kind(cc) == K"error"
5381
return :other
5482
end
5583
end
5684
end
57-
if isnothing(c.parent)
85+
if kp == K"wrapper"
5886
return :other
59-
end
60-
kp = kind(c.parent)
61-
if kp == K"string" || kp == K"var"
87+
elseif kp == K"string" || kp == K"var"
6288
return :string
6389
elseif kp == K"cmdstring"
6490
return :cmd
@@ -181,8 +207,8 @@ function core_parser_hook(code, filename::String, lineno::Int, offset::Int, opti
181207

182208
if any_error(stream)
183209
pos_before_comments = last_non_whitespace_byte(stream)
184-
tree = build_tree(SyntaxNode, stream, first_line=lineno, filename=filename)
185-
tag = _incomplete_tag(tree, pos_before_comments)
210+
errspec = first_tree_error(stream)
211+
tag = _incomplete_tag(errspec, pos_before_comments)
186212
if _has_v1_10_hooks
187213
exc = ParseError(stream, filename=filename, first_line=lineno,
188214
incomplete_tag=tag)
@@ -211,15 +237,15 @@ function core_parser_hook(code, filename::String, lineno::Int, offset::Int, opti
211237
# * truncates the top level expression arg list before that error
212238
# * includes the last line number
213239
# * appends the error message
214-
topex = Expr(tree)
240+
source = SourceFile(stream, filename=filename, first_line=lineno)
241+
topex = build_tree(Expr, stream, source)
215242
@assert topex.head == :toplevel
216243
i = findfirst(_has_nested_error, topex.args)
217244
if i > 1 && topex.args[i-1] isa LineNumberNode
218245
i -= 1
219246
end
220247
resize!(topex.args, i-1)
221-
_,errort = _first_error(tree)
222-
push!(topex.args, LineNumberNode(source_line(errort), filename))
248+
push!(topex.args, LineNumberNode(source_line(source, first_byte(errspec.node)), filename))
223249
push!(topex.args, error_ex)
224250
topex
225251
else
@@ -402,4 +428,3 @@ end
402428
# Convenience functions to mirror `JuliaSyntax.parsestmt(Expr, ...)` in simple cases.
403429
fl_parse(::Type{Expr}, args...; kws...) = fl_parse(args...; kws...)
404430
fl_parseall(::Type{Expr}, args...; kws...) = fl_parseall(args...; kws...)
405-

src/tree_cursors.jl

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,12 @@ function prev_sibling_assumed(cursor::GreenTreeCursor)
3131
GreenTreeCursor(cursor.parser_output, next_idx)
3232
end
3333

34+
function Base.in(child::GreenTreeCursor, parent::GreenTreeCursor)
35+
@assert child.parser_output === parent.parser_output
36+
child.position < parent.position || return false
37+
return child.position >= parent.position - this(parent).node_span
38+
end
39+
3440
# Debug printing
3541
function Base.show(io::IO, node::GreenTreeCursor)
3642
print(io, Base.summary(this(node)), " @", node.position)
@@ -164,3 +170,6 @@ end
164170
end
165171
nothing
166172
end
173+
174+
Base.in(child::GreenTreeCursor, parent::RedTreeCursor) =
175+
in(child, parent.green)

0 commit comments

Comments
 (0)