From 2ca86b6f3743de51890604aa2ee72fbc4a9f5e66 Mon Sep 17 00:00:00 2001 From: c42f Date: Mon, 27 Feb 2023 10:42:16 +1000 Subject: [PATCH 1/3] Make macros bind stronger than commas within parentheses That is, `f(@x a, b)` parses as (call f (macrocall a) b) rather than (call f (macrocall (tupel a b))) Unfortunately, this is mildly breaking, due to such strange syntax as `(@unpack a,b = c)` --- src/parser.jl | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/src/parser.jl b/src/parser.jl index acf2fe26..66f6f1f8 100644 --- a/src/parser.jl +++ b/src/parser.jl @@ -21,24 +21,27 @@ struct ParseState whitespace_newline::Bool # Enable parsing `where` with high precedence where_enabled::Bool + # Comma special + low_precedence_comma::Bool end # Normal context function ParseState(stream::ParseStream) - ParseState(stream, true, false, false, false, false, true) + ParseState(stream, true, false, false, false, false, true, false) end function ParseState(ps::ParseState; range_colon_enabled=nothing, space_sensitive=nothing, for_generator=nothing, end_symbol=nothing, whitespace_newline=nothing, - where_enabled=nothing) + where_enabled=nothing, low_precedence_comma=nothing) ParseState(ps.stream, range_colon_enabled === nothing ? ps.range_colon_enabled : range_colon_enabled, space_sensitive === nothing ? ps.space_sensitive : space_sensitive, for_generator === nothing ? ps.for_generator : for_generator, end_symbol === nothing ? ps.end_symbol : end_symbol, whitespace_newline === nothing ? ps.whitespace_newline : whitespace_newline, - where_enabled === nothing ? ps.where_enabled : where_enabled) + where_enabled === nothing ? ps.where_enabled : where_enabled, + low_precedence_comma === nothing ? ps.low_precedence_comma : low_precedence_comma) end # Functions to change parse state @@ -50,7 +53,8 @@ function normal_context(ps::ParseState) where_enabled=true, for_generator=false, end_symbol=false, - whitespace_newline=false) + whitespace_newline=false, + low_precedence_comma=false) end function with_space_sensitive(ps::ParseState) @@ -545,7 +549,11 @@ end # # flisp: parse-eq function parse_eq(ps::ParseState) - parse_assignment(ps, parse_comma) + if ps.low_precedence_comma + parse_eq_star(ps) + else + parse_assignment(ps, parse_comma) + end end # parse_eq_star is used where commas are special, for example in an argument list @@ -2633,7 +2641,6 @@ end # flisp: parse-space-separated-exprs function parse_space_separated_exprs(ps::ParseState) - ps = with_space_sensitive(ps) n_sep = 0 while true k = peek(ps) @@ -2984,7 +2991,8 @@ function parse_paren(ps::ParseState, check_identifiers=true) ps = ParseState(ps, range_colon_enabled=true, space_sensitive=false, where_enabled=true, - whitespace_newline=true) + whitespace_newline=true, + low_precedence_comma=true) mark = position(ps) @check peek(ps) == K"(" bump(ps, TRIVIA_FLAG) # K"(" @@ -3074,7 +3082,8 @@ function parse_brackets(after_parse::Function, ps = ParseState(ps, range_colon_enabled=true, space_sensitive=false, where_enabled=true, - whitespace_newline=true) + whitespace_newline=true, + low_precedence_comma=true) params_positions = acquire_positions(ps.stream) last_eq_before_semi = 0 num_subexprs = 0 From ce5fb2297cf03841a9776db0f903c263b48d1769 Mon Sep 17 00:00:00 2001 From: c42f Date: Fri, 12 May 2023 15:03:05 +1000 Subject: [PATCH 2/3] Make low precedence comma a feature flag, off by default Also enable it within concatenation syntax for consistency --- src/parse_stream.jl | 43 ++++++++++++++++++++++++------------------- src/parser.jl | 8 +++++--- src/parser_api.jl | 5 ++++- test/parser.jl | 11 +++++++++-- 4 files changed, 42 insertions(+), 25 deletions(-) diff --git a/src/parse_stream.jl b/src/parse_stream.jl index 74789c30..4bc175ae 100644 --- a/src/parse_stream.jl +++ b/src/parse_stream.jl @@ -236,12 +236,16 @@ mutable struct ParseStream diagnostics::Vector{Diagnostic} # Counter for number of peek()s we've done without making progress via a bump() peek_count::Int + + # Feature flags # (major,minor) version of Julia we're parsing this code for. # May be different from VERSION! version::Tuple{Int,Int} + # Comma binds looser than macrocall in bracketed expressions + low_precedence_comma_in_brackets::Bool - function ParseStream(text_buf::Vector{UInt8}, text_root, next_byte::Integer, - version::VersionNumber) + function ParseStream(text_buf::Vector{UInt8}, text_root, next_byte::Integer; + version=VERSION, low_precedence_comma_in_brackets=false) io = IOBuffer(text_buf) seek(io, next_byte-1) lexer = Tokenize.Lexer(io) @@ -264,44 +268,45 @@ mutable struct ParseStream Vector{TaggedRange}(), Vector{Diagnostic}(), 0, - ver) + ver, + low_precedence_comma_in_brackets) end end -function ParseStream(text::Vector{UInt8}, index::Integer=1; version=VERSION) - ParseStream(text, text, index, version) +function ParseStream(text::Vector{UInt8}, index::Integer=1; kws...) + ParseStream(text, text, index; kws...) end # Buffer with unknown owner. Not exactly recommended, but good for C interop -function ParseStream(ptr::Ptr{UInt8}, len::Integer, index::Integer=1; version=VERSION) - ParseStream(unsafe_wrap(Vector{UInt8}, ptr, len), nothing, index, version) +function ParseStream(ptr::Ptr{UInt8}, len::Integer, index::Integer=1; kws...) + ParseStream(unsafe_wrap(Vector{UInt8}, ptr, len), nothing, index; kws...) end # Buffers originating from strings -function ParseStream(text::String, index::Integer=1; version=VERSION) +function ParseStream(text::String, index::Integer=1; kws...) ParseStream(unsafe_wrap(Vector{UInt8}, text), - text, index, version) + text, index; kws...) end -function ParseStream(text::SubString, index::Integer=1; version=VERSION) +function ParseStream(text::SubString, index::Integer=1; kws...) # See also IOBuffer(SubString("x")) ParseStream(unsafe_wrap(Vector{UInt8}, pointer(text), sizeof(text)), - text, index, version) + text, index; kws...) end -function ParseStream(text::AbstractString, index::Integer=1; version=VERSION) - ParseStream(String(text), index; version=version) +function ParseStream(text::AbstractString, index::Integer=1; kws...) + ParseStream(String(text), index; kws...) end # IO-based cases -function ParseStream(io::IOBuffer; version=VERSION) - ParseStream(io.data, io, position(io)+1, version) +function ParseStream(io::IOBuffer; kws...) + ParseStream(io.data, io, position(io)+1; kws...) end -function ParseStream(io::Base.GenericIOBuffer; version=VERSION) +function ParseStream(io::Base.GenericIOBuffer; kws...) textbuf = unsafe_wrap(Vector{UInt8}, pointer(io.data), length(io.data)) - ParseStream(textbuf, io, position(io)+1, version) + ParseStream(textbuf, io, position(io)+1; kws...) end -function ParseStream(io::IO; version=VERSION) +function ParseStream(io::IO; kws...) textbuf = read(io) - ParseStream(textbuf, textbuf, 1, version) + ParseStream(textbuf, textbuf, 1; kws...) end function Base.show(io::IO, mime::MIME"text/plain", stream::ParseStream) diff --git a/src/parser.jl b/src/parser.jl index 66f6f1f8..e211e4b2 100644 --- a/src/parser.jl +++ b/src/parser.jl @@ -21,7 +21,7 @@ struct ParseState whitespace_newline::Bool # Enable parsing `where` with high precedence where_enabled::Bool - # Comma special + # Comma binds looser than macro calls (for use in brackets) low_precedence_comma::Bool end @@ -41,7 +41,8 @@ function ParseState(ps::ParseState; range_colon_enabled=nothing, end_symbol === nothing ? ps.end_symbol : end_symbol, whitespace_newline === nothing ? ps.whitespace_newline : whitespace_newline, where_enabled === nothing ? ps.where_enabled : where_enabled, - low_precedence_comma === nothing ? ps.low_precedence_comma : low_precedence_comma) + low_precedence_comma === nothing ? ps.low_precedence_comma : + low_precedence_comma && ps.stream.low_precedence_comma_in_brackets) end # Functions to change parse state @@ -2937,7 +2938,8 @@ function parse_cat(ps::ParseState, closer, end_is_symbol) space_sensitive=true, where_enabled=true, whitespace_newline=false, - for_generator=true) + for_generator=true, + low_precedence_comma=true) k = peek(ps, skip_newlines=true) mark = position(ps) if k == closer diff --git a/src/parser_api.jl b/src/parser_api.jl index c48fef60..628fd7eb 100644 --- a/src/parser_api.jl +++ b/src/parser_api.jl @@ -76,9 +76,12 @@ function parse!(::Type{TreeType}, io::IO; end function _parse(rule::Symbol, need_eof::Bool, ::Type{T}, text, index=1; version=VERSION, + low_precedence_comma_in_brackets=false, ignore_trivia=true, filename=nothing, first_line=1, ignore_errors=false, ignore_warnings=ignore_errors, kws...) where {T} - stream = ParseStream(text, index; version=version) + stream = ParseStream(text, index; + version=version, + low_precedence_comma_in_brackets=low_precedence_comma_in_brackets) if ignore_trivia && rule != :all bump_trivia(stream, skip_newlines=true) empty!(stream) diff --git a/test/parser.jl b/test/parser.jl index 69ef9554..1d44e31d 100644 --- a/test/parser.jl +++ b/test/parser.jl @@ -1,8 +1,8 @@ """ Parse string to SyntaxNode tree and show as an sexpression """ -function parse_to_sexpr_str(production, code::AbstractString; v=v"1.6", expr=false) - stream = ParseStream(code, version=v) +function parse_to_sexpr_str(production, code::AbstractString; v=v"1.6", expr=false, kws...) + stream = ParseStream(code; version=v, kws...) production(ParseState(stream)) JuliaSyntax.validate_tokens(stream) t = build_tree(GreenNode, stream, wrap_toplevel_as_kind=K"None") @@ -430,6 +430,13 @@ tests = [ "x\"s\"2" => """(macrocall @x_str (string-r "s") 2)""" "x\"s\"10.0" => """(macrocall @x_str (string-r "s") 10.0)""" # + "f(@x a, b)" => "(call f (macrocall @x (tuple a b)))" + ((low_precedence_comma_in_brackets=true,), "f(@x a, b)") => + "(call f (macrocall @x a) b)" + ((low_precedence_comma_in_brackets=true,), "(@x a, b)") => + "(tuple-p (macrocall @x a) b)" + ((low_precedence_comma_in_brackets=true,), "[@x a, b]") => + "(vect (macrocall @x a) b)" ], JuliaSyntax.parse_resword => [ # In normal_context From 0a0ffa386c3a433309dfdf5108f33fa3468a707f Mon Sep 17 00:00:00 2001 From: c42f Date: Fri, 12 May 2023 17:08:02 +1000 Subject: [PATCH 3/3] Hack: Test tight binding commas against JuliaSyntax ref parser --- test/test_utils.jl | 9 ++++++--- tools/check_all_packages.jl | 40 ++++++++++++++++++++++--------------- 2 files changed, 30 insertions(+), 19 deletions(-) diff --git a/test/test_utils.jl b/test/test_utils.jl index 3ee9f243..98af3a27 100644 --- a/test/test_utils.jl +++ b/test/test_utils.jl @@ -243,8 +243,9 @@ function equals_flisp_parse(exprs_equal, tree) exprs_equal(fl_ex, ex) end -function _reduce_tree(failing_subtrees, tree; exprs_equal=exprs_equal_no_linenum) - if equals_flisp_parse(exprs_equal, tree) +function _reduce_tree(failing_subtrees, tree; exprs_equal=exprs_equal_no_linenum, + equals_ref_parse=equals_flisp_parse) + if equals_ref_parse(exprs_equal, tree) return false end if !haschildren(tree) @@ -257,7 +258,9 @@ function _reduce_tree(failing_subtrees, tree; exprs_equal=exprs_equal_no_linenum if is_trivia(child) || !haschildren(child) continue end - had_failing_subtrees |= _reduce_tree(failing_subtrees, child; exprs_equal=exprs_equal) + had_failing_subtrees |= _reduce_tree(failing_subtrees, child; + exprs_equal=exprs_equal, + equals_ref_parse=equals_ref_parse) end end if !had_failing_subtrees diff --git a/tools/check_all_packages.jl b/tools/check_all_packages.jl index 0fad0c30..c678100c 100644 --- a/tools/check_all_packages.jl +++ b/tools/check_all_packages.jl @@ -5,6 +5,8 @@ using JuliaSyntax, Logging, TerminalLoggers, ProgressLogging, Serialization +using JuliaSyntax: GreenNode + include("../test/test_utils.jl") include("../test/fuzz_test.jl") @@ -19,26 +21,41 @@ exceptions = [] all_reduced_failures = String[] +function _lowprec_commas_equiv(exprs_equal, tree) + node_text = sourcetext(tree) + e1 = parseall(GreenNode, node_text, ignore_errors=true) + e2 = parseall(GreenNode, node_text, ignore_errors=true, + low_precedence_comma_in_brackets=true) + e1 == e2 +end + Logging.with_logger(TerminalLogger()) do global exception_count, mismatch_count, t0 @withprogress for (ifile, fpath) in enumerate(source_paths) @logprogress ifile/file_count time_ms=round((time() - t0)/ifile*1000, digits = 2) text = read(fpath, String) expr_cache = fpath*".Expr" - #e2 = JuliaSyntax.fl_parseall(text) - e2 = open(deserialize, fpath*".Expr") - @assert Meta.isexpr(e2, :toplevel) + #e_ref = JuliaSyntax.fl_parseall(text) + #e_ref = open(deserialize, fpath*".Expr") + #@assert Meta.isexpr(e_ref, :toplevel) + e_ref = try + JuliaSyntax.parseall(GreenNode, text, filename=fpath, ignore_warnings=true) + catch + continue + end try - e1 = JuliaSyntax.parseall(Expr, text, filename=fpath, ignore_warnings=true) - if !exprs_roughly_equal(e2, e1) + e1 = JuliaSyntax.parseall(GreenNode, text, filename=fpath, ignore_warnings=true, low_precedence_comma_in_brackets=true) + if e1 != e_ref + source = SourceFile(text, filename=fpath) + e1sn = SyntaxNode(source, e1) mismatch_count += 1 failing_source = sprint(context=:color=>true) do io - for c in reduce_tree(parseall(SyntaxNode, text)) + for c in reduce_tree(e1sn, equals_ref_parse=_lowprec_commas_equiv) JuliaSyntax.highlight(io, c.source, range(c), context_lines_inner=5) println(io, "\n") end end - reduced_failures = reduce_text.(reduce_tree(text), + reduced_failures = reduce_text.(reduce_tree(text, equals_ref_parse=_lowprec_commas_equiv), parsers_fuzzy_disagree) append!(all_reduced_failures, reduced_failures) @error("Parsers succeed but disagree", @@ -51,15 +68,6 @@ Logging.with_logger(TerminalLogger()) do err isa InterruptException && rethrow() ex = (err, catch_backtrace()) push!(exceptions, ex) - ref_parse = "success" - if length(e2.args) >= 1 && Meta.isexpr(last(e2.args), (:error, :incomplete)) - ref_parse = "fail" - if err isa JuliaSyntax.ParseError - # Both parsers agree that there's an error, and - # JuliaSyntax didn't have an internal error. - continue - end - end exception_count += 1 parse_to_syntax = "success"