Skip to content

Commit 4de460e

Browse files
committed
incremental option to parse!() + callable Expr compat code
Here I've made two changes to make JuliaSyntax easier to integrate into Base without requiring the existing hook mechanism. The `incremental` option has been added to `parse!()` moving flisp-compatible whitespace handling (eg, parsing a statement consumes up to the next newline) out of the hook and into the main `parse!()` API function. The logic for constructing `Expr(:incomplete)` and appropriate `Expr(:error)` for flisp compatibility is now callable separately from the hook.
1 parent 99e975a commit 4de460e

File tree

6 files changed

+114
-89
lines changed

6 files changed

+114
-89
lines changed

src/JuliaSyntax.jl

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@ export parsestmt,
1818

1919
@_public parse!,
2020
ParseStream,
21-
build_tree
21+
build_tree,
22+
all_trivia,
23+
any_error
2224

2325
# Tokenization
2426
export tokenize,

src/core/parse_stream.jl

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -961,8 +961,21 @@ unsafe_textbuf(stream) = stream.textbuf
961961

962962
first_byte(stream::ParseStream) = first(stream.output).byte_span + 1 # After sentinel
963963
last_byte(stream::ParseStream) = stream.next_byte - 1
964+
965+
"""
966+
any_error(stream)
967+
968+
Return true if the stream encountered an error during parsing.
969+
"""
964970
any_error(stream::ParseStream) = any_error(stream.diagnostics)
965971

972+
"""
973+
all_trivia(stream)
974+
975+
Return true if the parse stream contains only syntax trivia (or is empty)
976+
"""
977+
all_trivia(stream::ParseStream) = all(n->is_trivia(n) || kind(n) == K"TOMBSTONE", stream.output)
978+
966979
# Return last non-whitespace byte which was parsed
967980
function last_non_whitespace_byte(stream::ParseStream)
968981
byte_pos = stream.next_byte

src/integration/hooks.jl

Lines changed: 59 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,58 @@ end
162162
# Debug log file for dumping parsed code
163163
const _debug_log = Ref{Union{Nothing,IO}}(nothing)
164164

165-
function core_parser_hook(code, filename::String, lineno::Int, offset::Int, options::Symbol)
165+
# fl_parse has several peculiarities in the exact for which error expressions take.
166+
function build_base_compat_expr(stream, rule; filename="none", first_line=1)
167+
if !any_error(stream)
168+
return build_tree(Expr, stream; filename=filename, first_line=first_line)
169+
end
170+
pos_before_comments = last_non_whitespace_byte(stream)
171+
errspec = first_tree_error(stream)
172+
tag = _incomplete_tag(errspec, pos_before_comments)
173+
if _has_v1_10_hooks
174+
exc = ParseError(stream, filename=filename, first_line=first_line,
175+
incomplete_tag=tag)
176+
msg = sprint(showerror, exc)
177+
error_ex = Expr(tag === :none ? :error : :incomplete,
178+
Meta.ParseError(msg, exc))
179+
elseif tag !== :none
180+
# Hack: For older Julia versions, replicate the messages which
181+
# Base.incomplete_tag() will match
182+
msg =
183+
tag === :string ? "incomplete: invalid string syntax" :
184+
tag === :comment ? "incomplete: unterminated multi-line comment #= ... =#" :
185+
tag === :block ? "incomplete: construct requires end" :
186+
tag === :cmd ? "incomplete: invalid \"`\" syntax" :
187+
tag === :char ? "incomplete: invalid character literal" :
188+
"incomplete: premature end of input"
189+
error_ex = Expr(:incomplete, msg)
190+
else
191+
# In the flisp parser errors are normally `Expr(:error, msg)` where
192+
# `msg` is a String. By using a JuliaSyntax.ParseError for msg
193+
# we can do fancy error reporting instead.
194+
error_ex = Expr(:error, ParseError(stream, filename=filename, first_line=first_line))
195+
end
196+
if rule != :all
197+
return error_ex
198+
end
199+
# When encountering a toplevel error, the reference parser
200+
# * truncates the top level expression arg list before that error
201+
# * includes the last line number
202+
# * appends the error message
203+
source = SourceFile(stream, filename=filename, first_line=first_line)
204+
topex = build_tree(Expr, stream, source)
205+
@assert topex.head == :toplevel
206+
i = findfirst(_has_nested_error, topex.args)
207+
if i > 1 && topex.args[i-1] isa LineNumberNode
208+
i -= 1
209+
end
210+
resize!(topex.args, i-1)
211+
push!(topex.args, LineNumberNode(source_line(source, first_byte(errspec.node)), filename))
212+
push!(topex.args, error_ex)
213+
return topex
214+
end
215+
216+
function core_parser_hook(code, filename::String, first_line::Int, offset::Int, rule::Symbol)
166217
try
167218
# TODO: Check that we do all this input wrangling without copying the
168219
# code buffer
@@ -178,90 +229,17 @@ function core_parser_hook(code, filename::String, lineno::Int, offset::Int, opti
178229
if !isnothing(_debug_log[])
179230
print(_debug_log[], """
180231
#-#-#-------------------------------
181-
# ENTER filename=$filename, lineno=$lineno, offset=$offset, options=$options"
232+
# ENTER filename=$filename, first_line=$first_line, offset=$offset, rule=$rule"
182233
#-#-#-------------------------------
183234
""")
184235
write(_debug_log[], code)
185236
end
186237

187238
stream = ParseStream(code, offset+1)
188-
if options === :statement || options === :atom
189-
# To copy the flisp parser driver:
190-
# * Parsing atoms consumes leading trivia
191-
# * Parsing statements consumes leading+trailing trivia
192-
bump_trivia(stream)
193-
if peek(stream) == K"EndMarker"
194-
# If we're at the end of stream after skipping whitespace, just
195-
# return `nothing` to indicate this rather than attempting to
196-
# parse a statement or atom and failing.
197-
return Core.svec(nothing, last_byte(stream))
198-
end
199-
end
200-
parse!(stream; rule=options)
201-
if options === :statement
202-
bump_trivia(stream; skip_newlines=false)
203-
if peek(stream) == K"NewlineWs"
204-
bump(stream)
205-
end
206-
end
207-
208-
if any_error(stream)
209-
pos_before_comments = last_non_whitespace_byte(stream)
210-
errspec = first_tree_error(stream)
211-
tag = _incomplete_tag(errspec, pos_before_comments)
212-
if _has_v1_10_hooks
213-
exc = ParseError(stream, filename=filename, first_line=lineno,
214-
incomplete_tag=tag)
215-
msg = sprint(showerror, exc)
216-
error_ex = Expr(tag === :none ? :error : :incomplete,
217-
Meta.ParseError(msg, exc))
218-
elseif tag !== :none
219-
# Hack: For older Julia versions, replicate the messages which
220-
# Base.incomplete_tag() will match
221-
msg =
222-
tag === :string ? "incomplete: invalid string syntax" :
223-
tag === :comment ? "incomplete: unterminated multi-line comment #= ... =#" :
224-
tag === :block ? "incomplete: construct requires end" :
225-
tag === :cmd ? "incomplete: invalid \"`\" syntax" :
226-
tag === :char ? "incomplete: invalid character literal" :
227-
"incomplete: premature end of input"
228-
error_ex = Expr(:incomplete, msg)
229-
else
230-
# In the flisp parser errors are normally `Expr(:error, msg)` where
231-
# `msg` is a String. By using a JuliaSyntax.ParseError for msg
232-
# we can do fancy error reporting instead.
233-
error_ex = Expr(:error, ParseError(stream, filename=filename, first_line=lineno))
234-
end
235-
ex = if options === :all
236-
# When encountering a toplevel error, the reference parser
237-
# * truncates the top level expression arg list before that error
238-
# * includes the last line number
239-
# * appends the error message
240-
source = SourceFile(stream, filename=filename, first_line=lineno)
241-
topex = build_tree(Expr, stream, source)
242-
@assert topex.head == :toplevel
243-
i = findfirst(_has_nested_error, topex.args)
244-
if i > 1 && topex.args[i-1] isa LineNumberNode
245-
i -= 1
246-
end
247-
resize!(topex.args, i-1)
248-
push!(topex.args, LineNumberNode(source_line(source, first_byte(errspec.node)), filename))
249-
push!(topex.args, error_ex)
250-
topex
251-
else
252-
error_ex
253-
end
254-
else
255-
# TODO: Figure out a way to show warnings. Meta.parse() has no API
256-
# to communicate this, and we also can't show them to stdout as
257-
# this is too side-effectful and can result in double-reporting in
258-
# the REPL.
259-
#
260-
# show_diagnostics(stdout, stream.diagnostics, code)
261-
#
262-
ex = build_tree(Expr, stream; filename=filename, first_line=lineno)
263-
end
239+
parse!(stream; rule=rule, incremental=true)
264240

241+
ex = all_trivia(stream) ? nothing :
242+
build_base_compat_expr(stream, rule; filename=filename, first_line=first_line)
265243
# Note the next byte in 1-based indexing is `last_byte(stream) + 1` but
266244
# the Core hook must return an offset (ie, it's 0-based) so the factors
267245
# of one cancel here.
@@ -294,15 +272,15 @@ function core_parser_hook(code, filename::String, lineno::Int, offset::Int, opti
294272
offset=offset,
295273
code=code)
296274

297-
_fl_parse_hook(code, filename, lineno, offset, options)
275+
_fl_parse_hook(code, filename, first_line, offset, rule)
298276
end
299277
end
300278

301279
# Core._parse gained a `lineno` argument in
302280
# https://github.com/JuliaLang/julia/pull/43876
303281
# Prior to this, the following signature was needed:
304-
function core_parser_hook(code, filename, offset, options)
305-
core_parser_hook(code, filename, 1, offset, options)
282+
function core_parser_hook(code, filename, offset, rule)
283+
core_parser_hook(code, filename, 1, offset, rule)
306284
end
307285

308286
if _has_v1_10_hooks

src/julia/parser.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1563,7 +1563,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
15631563
k2 = peek(ps, 2)
15641564
if peek(ps) == K"NewlineWs" && !is_closing_token(ps, k2) &&
15651565
k2 != K"NewlineWs"
1566-
bump(ps) # newline
1566+
bump(ps, TRIVIA_FLAG) # newline
15671567
parse_eq(ps)
15681568
end
15691569
end

src/julia/parser_api.jl

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -43,22 +43,42 @@ structures may be extracted from `stream` with the [`build_tree`](@ref) function
4343
* `:statement` — parse a single statement, or statements separated by semicolons.
4444
* `:atom` — parse a single syntax "atom": a literal, identifier, or
4545
parenthesized expression.
46+
47+
If `incremental` is `true`, skip whitespace (including newlines) before parsing
48+
an atom or statement and skip any trailing whitespace up to the newline when
49+
parsing a statement. In incremental mode it's not an error for the end of
50+
stream to be reached and `all_trivia(stream)` can be used to detect if the end
51+
of the stream was reached without encountering significant syntax.
4652
"""
47-
function parse!(stream::ParseStream; rule::Symbol=:all)
53+
function parse!(stream::ParseStream; rule::Symbol=:all, incremental=false)
4854
if rule == :toplevel
4955
Base.depwarn("Use of rule == :toplevel in parse!() is deprecated. use `rule=:all` instead.", :parse!)
5056
rule = :all
5157
end
58+
mark = position(stream)
5259
ps = ParseState(stream)
60+
if incremental && rule != :all
61+
bump_trivia(stream, skip_newlines=true)
62+
end
5363
if rule === :all
5464
parse_toplevel(ps)
5565
elseif rule === :statement
56-
parse_stmts(ps)
66+
if !incremental || peek(stream) != K"EndMarker"
67+
parse_stmts(ps)
68+
end
5769
elseif rule === :atom
58-
parse_atom(ps)
70+
if !incremental || peek(stream) != K"EndMarker"
71+
parse_atom(ps)
72+
end
5973
else
6074
throw(ArgumentError("Unknown grammar rule $rule"))
6175
end
76+
if incremental && rule == :statement
77+
bump_trivia(stream; skip_newlines=false)
78+
if peek(stream) == K"NewlineWs"
79+
bump(stream, TRIVIA_FLAG)
80+
end
81+
end
6282
validate_tokens(stream)
6383
stream
6484
end
@@ -81,12 +101,12 @@ end
81101

82102
function _parse(rule::Symbol, need_eof::Bool, ::Type{T}, text, index=1; version=VERSION,
83103
ignore_trivia=true, filename=nothing, first_line=1, ignore_errors=false,
84-
ignore_warnings=ignore_errors, kws...) where {T}
104+
ignore_warnings=ignore_errors, incremental=false, kws...) where {T}
85105
stream = ParseStream(text, index; version=version)
86106
if ignore_trivia && rule != :all
87107
bump_trivia(stream, skip_newlines=true)
88108
end
89-
parse!(stream; rule=rule)
109+
parse!(stream; rule=rule, incremental=incremental)
90110
if need_eof
91111
if (ignore_trivia && peek(stream, skip_newlines=true) != K"EndMarker") ||
92112
(!ignore_trivia && (peek(stream, skip_newlines=false, skip_whitespace=false) != K"EndMarker"))
@@ -108,7 +128,8 @@ _parse_docs = """
108128
ignore_trivia=true,
109129
filename=nothing,
110130
ignore_errors=false,
111-
ignore_warnings=ignore_errors)
131+
ignore_warnings=ignore_errors,
132+
incremental=false)
112133
113134
# Parse all statements at top level (file scope)
114135
parseall(...)

test/parser_api.jl

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,17 @@
9595
(Expr(:block, LineNumberNode(2), :a), 12)
9696
@test JuliaSyntax.parsestmt(Expr, "begin\na\nend\nbegin\nb\nend", 12) ==
9797
(Expr(:block, LineNumberNode(3), :b), 24)
98+
99+
# Test that parsing statements in incremental mode works and stops
100+
# after whitespace / comment trivia
101+
@test JuliaSyntax.parsestmt(Expr, "x + 1\n(y)\n", 1, incremental=true) == (:(x + 1), 7)
102+
@test JuliaSyntax.parsestmt(Expr, "x + 1\n(y)\n", 7, incremental=true) == (:y, 11)
103+
@test JuliaSyntax.parsestmt(Expr, " x#==#", 1, incremental=true) == (:x, 7)
104+
let ps = JuliaSyntax.ParseStream(" #==# ")
105+
JuliaSyntax.parse!(ps, rule=:statement, incremental=true)
106+
@test JuliaSyntax.all_trivia(ps)
107+
@test JuliaSyntax.last_byte(ps) == 6
108+
end
98109
end
99110

100111
@testset "error/warning handling" begin

0 commit comments

Comments
 (0)