diff --git a/src/JuliaSyntax.jl b/src/JuliaSyntax.jl index da5861c0..c879c209 100644 --- a/src/JuliaSyntax.jl +++ b/src/JuliaSyntax.jl @@ -18,7 +18,9 @@ export parsestmt, @_public parse!, ParseStream, - build_tree + build_tree, + all_trivia, + any_error # Tokenization export tokenize, diff --git a/src/core/parse_stream.jl b/src/core/parse_stream.jl index da4d70cc..b429334e 100644 --- a/src/core/parse_stream.jl +++ b/src/core/parse_stream.jl @@ -961,8 +961,21 @@ unsafe_textbuf(stream) = stream.textbuf first_byte(stream::ParseStream) = first(stream.output).byte_span + 1 # After sentinel last_byte(stream::ParseStream) = stream.next_byte - 1 + +""" + any_error(stream) + +Return true if the stream encountered an error during parsing. +""" any_error(stream::ParseStream) = any_error(stream.diagnostics) +""" + all_trivia(stream) + +Return true if the parse stream contains only syntax trivia (or is empty) +""" +all_trivia(stream::ParseStream) = all(n->is_trivia(n) || kind(n) == K"TOMBSTONE", stream.output) + # Return last non-whitespace byte which was parsed function last_non_whitespace_byte(stream::ParseStream) byte_pos = stream.next_byte diff --git a/src/integration/hooks.jl b/src/integration/hooks.jl index 2d1e4df8..fa8a8701 100644 --- a/src/integration/hooks.jl +++ b/src/integration/hooks.jl @@ -162,7 +162,58 @@ end # Debug log file for dumping parsed code const _debug_log = Ref{Union{Nothing,IO}}(nothing) -function core_parser_hook(code, filename::String, lineno::Int, offset::Int, options::Symbol) +# fl_parse has several peculiarities in the exact for which error expressions take. +function build_base_compat_expr(stream, rule; filename="none", first_line=1) + if !any_error(stream) + return build_tree(Expr, stream; filename=filename, first_line=first_line) + end + pos_before_comments = last_non_whitespace_byte(stream) + errspec = first_tree_error(stream) + tag = _incomplete_tag(errspec, pos_before_comments) + if _has_v1_10_hooks + exc = ParseError(stream, filename=filename, first_line=first_line, + incomplete_tag=tag) + msg = sprint(showerror, exc) + error_ex = Expr(tag === :none ? :error : :incomplete, + Meta.ParseError(msg, exc)) + elseif tag !== :none + # Hack: For older Julia versions, replicate the messages which + # Base.incomplete_tag() will match + msg = + tag === :string ? "incomplete: invalid string syntax" : + tag === :comment ? "incomplete: unterminated multi-line comment #= ... =#" : + tag === :block ? "incomplete: construct requires end" : + tag === :cmd ? "incomplete: invalid \"`\" syntax" : + tag === :char ? "incomplete: invalid character literal" : + "incomplete: premature end of input" + error_ex = Expr(:incomplete, msg) + else + # In the flisp parser errors are normally `Expr(:error, msg)` where + # `msg` is a String. By using a JuliaSyntax.ParseError for msg + # we can do fancy error reporting instead. + error_ex = Expr(:error, ParseError(stream, filename=filename, first_line=first_line)) + end + if rule != :all + return error_ex + end + # When encountering a toplevel error, the reference parser + # * truncates the top level expression arg list before that error + # * includes the last line number + # * appends the error message + source = SourceFile(stream, filename=filename, first_line=first_line) + topex = build_tree(Expr, stream, source) + @assert topex.head == :toplevel + i = findfirst(_has_nested_error, topex.args) + if i > 1 && topex.args[i-1] isa LineNumberNode + i -= 1 + end + resize!(topex.args, i-1) + push!(topex.args, LineNumberNode(source_line(source, first_byte(errspec.node)), filename)) + push!(topex.args, error_ex) + return topex +end + +function core_parser_hook(code, filename::String, first_line::Int, offset::Int, rule::Symbol) try # TODO: Check that we do all this input wrangling without copying the # code buffer @@ -178,90 +229,17 @@ function core_parser_hook(code, filename::String, lineno::Int, offset::Int, opti if !isnothing(_debug_log[]) print(_debug_log[], """ #-#-#------------------------------- - # ENTER filename=$filename, lineno=$lineno, offset=$offset, options=$options" + # ENTER filename=$filename, first_line=$first_line, offset=$offset, rule=$rule" #-#-#------------------------------- """) write(_debug_log[], code) end stream = ParseStream(code, offset+1) - if options === :statement || options === :atom - # To copy the flisp parser driver: - # * Parsing atoms consumes leading trivia - # * Parsing statements consumes leading+trailing trivia - bump_trivia(stream) - if peek(stream) == K"EndMarker" - # If we're at the end of stream after skipping whitespace, just - # return `nothing` to indicate this rather than attempting to - # parse a statement or atom and failing. - return Core.svec(nothing, last_byte(stream)) - end - end - parse!(stream; rule=options) - if options === :statement - bump_trivia(stream; skip_newlines=false) - if peek(stream) == K"NewlineWs" - bump(stream) - end - end - - if any_error(stream) - pos_before_comments = last_non_whitespace_byte(stream) - errspec = first_tree_error(stream) - tag = _incomplete_tag(errspec, pos_before_comments) - if _has_v1_10_hooks - exc = ParseError(stream, filename=filename, first_line=lineno, - incomplete_tag=tag) - msg = sprint(showerror, exc) - error_ex = Expr(tag === :none ? :error : :incomplete, - Meta.ParseError(msg, exc)) - elseif tag !== :none - # Hack: For older Julia versions, replicate the messages which - # Base.incomplete_tag() will match - msg = - tag === :string ? "incomplete: invalid string syntax" : - tag === :comment ? "incomplete: unterminated multi-line comment #= ... =#" : - tag === :block ? "incomplete: construct requires end" : - tag === :cmd ? "incomplete: invalid \"`\" syntax" : - tag === :char ? "incomplete: invalid character literal" : - "incomplete: premature end of input" - error_ex = Expr(:incomplete, msg) - else - # In the flisp parser errors are normally `Expr(:error, msg)` where - # `msg` is a String. By using a JuliaSyntax.ParseError for msg - # we can do fancy error reporting instead. - error_ex = Expr(:error, ParseError(stream, filename=filename, first_line=lineno)) - end - ex = if options === :all - # When encountering a toplevel error, the reference parser - # * truncates the top level expression arg list before that error - # * includes the last line number - # * appends the error message - source = SourceFile(stream, filename=filename, first_line=lineno) - topex = build_tree(Expr, stream, source) - @assert topex.head == :toplevel - i = findfirst(_has_nested_error, topex.args) - if i > 1 && topex.args[i-1] isa LineNumberNode - i -= 1 - end - resize!(topex.args, i-1) - push!(topex.args, LineNumberNode(source_line(source, first_byte(errspec.node)), filename)) - push!(topex.args, error_ex) - topex - else - error_ex - end - else - # TODO: Figure out a way to show warnings. Meta.parse() has no API - # to communicate this, and we also can't show them to stdout as - # this is too side-effectful and can result in double-reporting in - # the REPL. - # - # show_diagnostics(stdout, stream.diagnostics, code) - # - ex = build_tree(Expr, stream; filename=filename, first_line=lineno) - end + parse!(stream; rule=rule, incremental=true) + ex = all_trivia(stream) ? nothing : + build_base_compat_expr(stream, rule; filename=filename, first_line=first_line) # Note the next byte in 1-based indexing is `last_byte(stream) + 1` but # the Core hook must return an offset (ie, it's 0-based) so the factors # of one cancel here. @@ -294,15 +272,15 @@ function core_parser_hook(code, filename::String, lineno::Int, offset::Int, opti offset=offset, code=code) - _fl_parse_hook(code, filename, lineno, offset, options) + _fl_parse_hook(code, filename, first_line, offset, rule) end end # Core._parse gained a `lineno` argument in # https://github.com/JuliaLang/julia/pull/43876 # Prior to this, the following signature was needed: -function core_parser_hook(code, filename, offset, options) - core_parser_hook(code, filename, 1, offset, options) +function core_parser_hook(code, filename, offset, rule) + core_parser_hook(code, filename, 1, offset, rule) end if _has_v1_10_hooks diff --git a/src/julia/parser.jl b/src/julia/parser.jl index a2ce4209..626187e3 100644 --- a/src/julia/parser.jl +++ b/src/julia/parser.jl @@ -1563,7 +1563,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) k2 = peek(ps, 2) if peek(ps) == K"NewlineWs" && !is_closing_token(ps, k2) && k2 != K"NewlineWs" - bump(ps) # newline + bump(ps, TRIVIA_FLAG) # newline parse_eq(ps) end end diff --git a/src/julia/parser_api.jl b/src/julia/parser_api.jl index a3e2162b..5dbeb992 100644 --- a/src/julia/parser_api.jl +++ b/src/julia/parser_api.jl @@ -43,22 +43,42 @@ structures may be extracted from `stream` with the [`build_tree`](@ref) function * `:statement` — parse a single statement, or statements separated by semicolons. * `:atom` — parse a single syntax "atom": a literal, identifier, or parenthesized expression. + +If `incremental` is `true`, skip whitespace (including newlines) before parsing +an atom or statement and skip any trailing whitespace up to the newline when +parsing a statement. In incremental mode it's not an error for the end of +stream to be reached and `all_trivia(stream)` can be used to detect if the end +of the stream was reached without encountering significant syntax. """ -function parse!(stream::ParseStream; rule::Symbol=:all) +function parse!(stream::ParseStream; rule::Symbol=:all, incremental=false) if rule == :toplevel Base.depwarn("Use of rule == :toplevel in parse!() is deprecated. use `rule=:all` instead.", :parse!) rule = :all end + mark = position(stream) ps = ParseState(stream) + if incremental && rule != :all + bump_trivia(stream, skip_newlines=true) + end if rule === :all parse_toplevel(ps) elseif rule === :statement - parse_stmts(ps) + if !incremental || peek(stream) != K"EndMarker" + parse_stmts(ps) + end elseif rule === :atom - parse_atom(ps) + if !incremental || peek(stream) != K"EndMarker" + parse_atom(ps) + end else throw(ArgumentError("Unknown grammar rule $rule")) end + if incremental && rule == :statement + bump_trivia(stream; skip_newlines=false) + if peek(stream) == K"NewlineWs" + bump(stream, TRIVIA_FLAG) + end + end validate_tokens(stream) stream end @@ -81,12 +101,12 @@ end function _parse(rule::Symbol, need_eof::Bool, ::Type{T}, text, index=1; version=VERSION, ignore_trivia=true, filename=nothing, first_line=1, ignore_errors=false, - ignore_warnings=ignore_errors, kws...) where {T} + ignore_warnings=ignore_errors, incremental=false, kws...) where {T} stream = ParseStream(text, index; version=version) if ignore_trivia && rule != :all bump_trivia(stream, skip_newlines=true) end - parse!(stream; rule=rule) + parse!(stream; rule=rule, incremental=incremental) if need_eof if (ignore_trivia && peek(stream, skip_newlines=true) != K"EndMarker") || (!ignore_trivia && (peek(stream, skip_newlines=false, skip_whitespace=false) != K"EndMarker")) @@ -108,7 +128,8 @@ _parse_docs = """ ignore_trivia=true, filename=nothing, ignore_errors=false, - ignore_warnings=ignore_errors) + ignore_warnings=ignore_errors, + incremental=false) # Parse all statements at top level (file scope) parseall(...) diff --git a/test/parser_api.jl b/test/parser_api.jl index 10a09d3a..c3820d29 100644 --- a/test/parser_api.jl +++ b/test/parser_api.jl @@ -95,6 +95,17 @@ (Expr(:block, LineNumberNode(2), :a), 12) @test JuliaSyntax.parsestmt(Expr, "begin\na\nend\nbegin\nb\nend", 12) == (Expr(:block, LineNumberNode(3), :b), 24) + + # Test that parsing statements in incremental mode works and stops + # after whitespace / comment trivia + @test JuliaSyntax.parsestmt(Expr, "x + 1\n(y)\n", 1, incremental=true) == (:(x + 1), 7) + @test JuliaSyntax.parsestmt(Expr, "x + 1\n(y)\n", 7, incremental=true) == (:y, 11) + @test JuliaSyntax.parsestmt(Expr, " x#==#", 1, incremental=true) == (:x, 7) + let ps = JuliaSyntax.ParseStream(" #==# ") + JuliaSyntax.parse!(ps, rule=:statement, incremental=true) + @test JuliaSyntax.all_trivia(ps) + @test JuliaSyntax.last_byte(ps) == 6 + end end @testset "error/warning handling" begin