diff --git a/docs/src/api.md b/docs/src/api.md index d79c86a3..5dfbec6e 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -115,6 +115,7 @@ JuliaSyntax.has_flags JuliaSyntax.TRIPLE_STRING_FLAG JuliaSyntax.RAW_STRING_FLAG JuliaSyntax.PARENS_FLAG +JuliaSyntax.TRAILING_COMMA_FLAG JuliaSyntax.COLON_QUOTE JuliaSyntax.TOPLEVEL_SEMICOLONS_FLAG JuliaSyntax.MUTABLE_FLAG diff --git a/src/parse_stream.jl b/src/parse_stream.jl index 5b04b42c..02c0307e 100644 --- a/src/parse_stream.jl +++ b/src/parse_stream.jl @@ -39,6 +39,13 @@ Set for K"tuple", K"block" or K"macrocall" which are delimited by parentheses """ const PARENS_FLAG = RawFlags(1<<5) +""" +Set for various delimited constructs when they contains a trailing comma. For +example, to distinguish `(a,b,)` vs `(a,b)`, and `f(a)` vs `f(a,)`. Kinds where +this applies are: `tuple call dotcall macrocall vect curly braces <: >:`. +""" +const TRAILING_COMMA_FLAG = RawFlags(1<<6) + """ Set for K"quote" for the short form `:x` as opposed to long form `quote x end` """ @@ -139,22 +146,27 @@ function untokenize(head::SyntaxHead; unique=true, include_flag_suff=true) is_prefix_op_call(head) && (str = str*"-pre") is_postfix_op_call(head) && (str = str*"-post") - if kind(head) in KSet"string cmdstring Identifier" + k = kind(head) + if k in KSet"string cmdstring Identifier" has_flags(head, TRIPLE_STRING_FLAG) && (str = str*"-s") has_flags(head, RAW_STRING_FLAG) && (str = str*"-r") - elseif kind(head) in KSet"tuple block macrocall" + elseif k in KSet"tuple block macrocall" has_flags(head, PARENS_FLAG) && (str = str*"-p") - elseif kind(head) == K"quote" + elseif k == K"quote" has_flags(head, COLON_QUOTE) && (str = str*"-:") - elseif kind(head) == K"toplevel" + elseif k == K"toplevel" has_flags(head, TOPLEVEL_SEMICOLONS_FLAG) && (str = str*"-;") - elseif kind(head) == K"function" + elseif k == K"function" has_flags(head, SHORT_FORM_FUNCTION_FLAG) && (str = str*"-=") - elseif kind(head) == K"struct" + elseif k == K"struct" has_flags(head, MUTABLE_FLAG) && (str = str*"-mut") - elseif kind(head) == K"module" + elseif k == K"module" has_flags(head, BARE_MODULE_FLAG) && (str = str*"-bare") end + if k in KSet"tuple call dotcall macrocall vect curly braces <: >:" && + has_flags(head, TRAILING_COMMA_FLAG) + str *= "-," + end is_suffixed(head) && (str = str*"-suf") n = numeric_flags(head) n != 0 && (str = str*"-"*string(n)) diff --git a/src/parser.jl b/src/parser.jl index 0cd65f7a..bb08134d 100644 --- a/src/parser.jl +++ b/src/parser.jl @@ -1302,10 +1302,10 @@ function parse_unary(ps::ParseState) # +(a,b)(x)^2 ==> (call-i (call (call + a b) x) ^ 2) if is_type_operator(op_t) # <:(a,) ==> (<: a) - emit(ps, mark, op_k) + emit(ps, mark, op_k, opts.delim_flags) reset_node!(ps, op_pos, flags=TRIVIA_FLAG) else - emit(ps, mark, K"call") + emit(ps, mark, K"call", opts.delim_flags) end parse_call_chain(ps, mark) parse_factor_with_initial_ex(ps, mark) @@ -1552,13 +1552,14 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # f (a) ==> (call f (error-t) a) bump_disallowed_space(ps) bump(ps, TRIVIA_FLAG) - parse_call_arglist(ps, K")") + opts = parse_call_arglist(ps, K")") if peek(ps) == K"do" # f(x) do y body end ==> (call f x (do (tuple y) (block body))) parse_do(ps) end emit(ps, mark, is_macrocall ? K"macrocall" : K"call", - is_macrocall ? PARENS_FLAG : EMPTY_FLAGS) + # TODO: Add PARENS_FLAG to all calls which use them? + (is_macrocall ? PARENS_FLAG : EMPTY_FLAGS)|opts.delim_flags) if is_macrocall # @x(a, b) ==> (macrocall-p @x a b) # A.@x(y) ==> (macrocall-p (. A @x) y) @@ -1634,8 +1635,8 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # f. (x) ==> (dotcall f (error-t) x) bump_disallowed_space(ps) bump(ps, TRIVIA_FLAG) - parse_call_arglist(ps, K")") - emit(ps, mark, K"dotcall") + opts = parse_call_arglist(ps, K")") + emit(ps, mark, K"dotcall", opts.delim_flags) elseif k == K":" # A.:+ ==> (. A (quote-: +)) # A.: + ==> (. A (error-t) (quote-: +)) @@ -1697,20 +1698,20 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # S {a} ==> (curly S (error-t) a) bump_disallowed_space(ps) bump(ps, TRIVIA_FLAG) - parse_call_arglist(ps, K"}") + opts = parse_call_arglist(ps, K"}") if is_macrocall # @S{a,b} ==> (macrocall S (braces a b)) # A.@S{a} ==> (macrocall (. A @S) (braces a)) # @S{a}.b ==> (. (macrocall @S (braces a)) b) fix_macro_name_kind!(ps, macro_name_position) - emit(ps, m, K"braces") + emit(ps, m, K"braces", opts.delim_flags) emit(ps, mark, K"macrocall") min_supported_version(v"1.6", ps, mark, "macro call without space before `{}`") is_macrocall = false macro_atname_range = nothing else # S{a,b} ==> (curly S a b) - emit(ps, mark, K"curly") + emit(ps, mark, K"curly", opts.delim_flags) end elseif k in KSet" \" \"\"\" ` ``` " && !preceding_whitespace(t) && maybe_strmac && @@ -2151,7 +2152,7 @@ function parse_function_signature(ps::ParseState, is_function::Bool) # function (f(x),) end ==> (function (tuple-p (call f x)) (block)) ambiguous_parens = opts.maybe_grouping_parens && peek_behind(ps).kind in KSet"macrocall $" - emit(ps, mark, K"tuple", PARENS_FLAG) + emit(ps, mark, K"tuple", PARENS_FLAG|opts.delim_flags) if ambiguous_parens # Got something like `(@f(x))`. Is it anon `(@f(x),)` or named sig `@f(x)` ?? emit(ps, mark, K"error", error="Ambiguous signature. Add a trailing comma if this is a 1-argument anonymous function; remove parentheses if this is a macro call acting as function signature.") @@ -2716,16 +2717,21 @@ end # surrounding brackets. # # flisp: parse-vect -function parse_vect(ps::ParseState, closer) +function parse_vect(ps::ParseState, closer, prefix_trailing_comma) # [x, y] ==> (vect x y) # [x, y] ==> (vect x y) # [x,y ; z] ==> (vect x y (parameters z)) # [x=1, y=2] ==> (vect (= x 1) (= y 2)) # [x=1, ; y=2] ==> (vect (= x 1) (parameters (= y 2))) - parse_brackets(ps, closer) do _, _, _, _ - return (needs_parameters=true,) + opts = parse_brackets(ps, closer) do _, _, _, num_subexprs + return (needs_parameters=true, + num_subexprs=num_subexprs) + end + delim_flags = opts.delim_flags + if opts.num_subexprs == 0 && prefix_trailing_comma + delim_flags |= TRAILING_COMMA_FLAG end - return (K"vect", EMPTY_FLAGS) + return (K"vect", delim_flags) end # Parse generators @@ -2988,7 +2994,7 @@ function parse_cat(ps::ParseState, closer, end_is_symbol) mark = position(ps) if k == closer # [] ==> (vect) - return parse_vect(ps, closer) + return parse_vect(ps, closer, false) elseif k == K";" #v1.8: [;] ==> (ncat-1) #v1.8: [;;] ==> (ncat-2) @@ -3003,14 +3009,15 @@ function parse_cat(ps::ParseState, closer, end_is_symbol) parse_eq_star(ps) k = peek(ps, skip_newlines=true) if k == K"," || (is_closing_token(ps, k) && k != K";") - if k == K"," + prefix_trailing_comma = k == K"," + if prefix_trailing_comma # [x,] ==> (vect x) bump(ps, TRIVIA_FLAG; skip_newlines = true) end # [x] ==> (vect x) # [x \n ] ==> (vect x) # [x ==> (vect x (error-t)) - parse_vect(ps, closer) + parse_vect(ps, closer, prefix_trailing_comma) elseif k == K"for" # [x for a in as] ==> (comprehension (generator x (iteration (in a as)))) # [x \n\n for a in as] ==> (comprehension (generator x (iteration (in a as)))) @@ -3087,7 +3094,7 @@ function parse_paren(ps::ParseState, check_identifiers=true) # (; a=1; b=2) ==> (tuple-p (parameters (= a 1)) (parameters (= b 2))) # (a; b; c,d) ==> (tuple-p a (parameters b) (parameters c d)) # (a=1, b=2; c=3) ==> (tuple-p (= a 1) (= b 2) (parameters (= c 3))) - emit(ps, mark, K"tuple", PARENS_FLAG) + emit(ps, mark, K"tuple", PARENS_FLAG|opts.delim_flags) elseif opts.is_block # Blocks # (;;) ==> (block-p) @@ -3135,6 +3142,7 @@ function parse_brackets(after_parse::Function, had_commas = false had_splat = false param_start = nothing + trailing_comma = false while true k = peek(ps) if k == closing_kind @@ -3150,11 +3158,13 @@ function parse_brackets(after_parse::Function, bump(ps, TRIVIA_FLAG) bump_trivia(ps) elseif is_closing_token(ps, k) + trailing_comma = false # Error; handled below in bump_closing_token break else mark = position(ps) parse_eq_star(ps) + trailing_comma = false num_subexprs += 1 if num_subexprs == 1 had_splat = peek_behind(ps).kind == K"..." @@ -3172,6 +3182,7 @@ function parse_brackets(after_parse::Function, if k == K"," had_commas = true bump(ps, TRIVIA_FLAG) + trailing_comma = true elseif k == K";" || k == closing_kind # Handled above continue @@ -3193,7 +3204,7 @@ function parse_brackets(after_parse::Function, end release_positions(ps.stream, params_positions) bump_closing_token(ps, closing_kind, " or `,`") - return opts + return (; opts..., delim_flags=trailing_comma ? TRAILING_COMMA_FLAG : EMPTY_FLAGS) end _is_indentation(b::UInt8) = (b == u8" " || b == u8"\t") @@ -3420,14 +3431,15 @@ end function emit_braces(ps, mark, ckind, cflags) if ckind == K"hcat" # {x y} ==> (bracescat (row x y)) - emit(ps, mark, K"row", cflags) + emit(ps, mark, K"row", cflags & ~TRAILING_COMMA_FLAG) elseif ckind == K"ncat" # {x ;;; y} ==> (bracescat (nrow-3 x y)) - emit(ps, mark, K"nrow", cflags) + emit(ps, mark, K"nrow", cflags & ~TRAILING_COMMA_FLAG) end check_ncat_compat(ps, mark, ckind) outk = ckind in KSet"vect comprehension" ? K"braces" : K"bracescat" - emit(ps, mark, outk) + delim_flags = outk == K"braces" ? (cflags & TRAILING_COMMA_FLAG) : EMPTY_FLAGS + emit(ps, mark, outk, delim_flags) end # parse numbers, identifiers, parenthesized expressions, lists, vectors, etc. diff --git a/test/parser.jl b/test/parser.jl index e6115ad4..e36ba73e 100644 --- a/test/parser.jl +++ b/test/parser.jl @@ -234,9 +234,10 @@ tests = [ ".*(x)" => "(call (. *) x)" # Prefix function calls for operators which are both binary and unary "+(a,b)" => "(call + a b)" - ".+(a,)" => "(call (. +) a)" + "+(a,)" => "(call-, + a)" + ".+(a,)" => "(call-, (. +) a)" "(.+)(a)" => "(call (parens (. +)) a)" - "+(a=1,)" => "(call + (= a 1))" + "+(a=1,)" => "(call-, + (= a 1))" "+(a...)" => "(call + (... a))" "+(a;b,c)" => "(call + a (parameters b c))" "+(;a)" => "(call + (parameters a))" @@ -251,7 +252,7 @@ tests = [ # Prefix calls have higher precedence than ^ "+(a,b)^2" => "(call-i (call + a b) ^ 2)" "+(a,b)(x)^2" => "(call-i (call (call + a b) x) ^ 2)" - "<:(a,)" => "(<: a)" + "<:(a,)" => "(<:-, a)" # Unary function calls with brackets as grouping, not an arglist ".+(a)" => "(dotcall-pre + (parens a))" "+(a;b)" => "(call-pre + (block-p a b))" @@ -306,6 +307,7 @@ tests = [ # Really for parse_where "x where \n {T}" => "(where x (braces T))" "x where {T,S}" => "(where x (braces T S))" + "x where {T,S,}" => "(where x (braces-, T S))" "x where {T S}" => "(where x (bracescat (row T S)))" "x where {y for y in ys}" => "(where x (braces (generator y (iteration (in y ys)))))" "x where T" => "(where x T)" @@ -364,11 +366,13 @@ tests = [ # calls with brackets "f(a,b)" => "(call f a b)" + "f(a,)" => "(call-, f a)" "f(a=1; b=2)" => "(call f (= a 1) (parameters (= b 2)))" "f(a; b; c)" => "(call f a (parameters b) (parameters c))" "(a=1)()" => "(call (parens (= a 1)))" "f (a)" => "(call f (error-t) a)" "@x(a, b)" => "(macrocall-p @x a b)" + "@x(a, b,)" => "(macrocall-p-, @x a b)" "A.@x(y)" => "(macrocall-p (. A @x) y)" "A.@x(y).z" => "(. (macrocall-p (. A @x) y) z)" "f(y for x = xs; a)" => "(call f (generator y (iteration (in x xs))) (parameters a))" @@ -407,6 +411,7 @@ tests = [ "A.@B.x" => "(macrocall (. (. A B) (error-t) @x))" "@M.(x)" => "(macrocall (dotcall @M (error-t) x))" "f.(a,b)" => "(dotcall f a b)" + "f.(a,b,)" => "(dotcall-, f a b)" "f.(a=1; b=2)" => "(dotcall f (= a 1) (parameters (= b 2)))" "(a=1).()" => "(dotcall (parens (= a 1)))" "f. (x)" => "(dotcall f (error-t) x)" @@ -577,9 +582,10 @@ tests = [ "macro (\$f)() end" => "(macro (call (parens (\$ f))) (block))" "function (x) body end"=> "(function (tuple-p x) (block body))" "function (x,y) end" => "(function (tuple-p x y) (block))" + "function (x,y,) end" => "(function (tuple-p-, x y) (block))" "function (x=1) end" => "(function (tuple-p (= x 1)) (block))" "function (;x=1) end" => "(function (tuple-p (parameters (= x 1))) (block))" - "function (f(x),) end" => "(function (tuple-p (call f x)) (block))" + "function (f(x),) end" => "(function (tuple-p-, (call f x)) (block))" "function (@f(x);) end" => "(function (tuple-p (macrocall-p @f x) (parameters)) (block))" "function (@f(x)...) end" => "(function (tuple-p (... (macrocall-p @f x))) (block))" "function (@f(x)) end" => "(function (error (tuple-p (macrocall-p @f x))) (block))" @@ -715,7 +721,7 @@ tests = [ JuliaSyntax.parse_paren => [ # Tuple syntax with commas "()" => "(tuple-p)" - "(x,)" => "(tuple-p x)" + "(x,)" => "(tuple-p-, x)" "(x,y)" => "(tuple-p x y)" "(x=1, y=2)" => "(tuple-p (= x 1) (= y 2))" # Named tuples with initial semicolon @@ -827,11 +833,12 @@ tests = [ "=" => "(error =)" # parse_cat "[]" => "(vect)" - "[x,]" => "(vect x)" - "[x\n,,]" => "(vect x (error-t ✘))" + "[x,]" => "(vect-, x)" + "[x,y,]" => "(vect-, x y)" + "[x\n,,]" => "(vect-, x (error-t ✘))" "[x]" => "(vect x)" "[x \n ]" => "(vect x)" - "[x \n, ]" => "(vect x)" + "[x \n, ]" => "(vect-, x)" "[x" => "(vect x (error-t))" "[x \n\n ]" => "(vect x)" "[x for a in as]" => "(comprehension (generator x (iteration (in a as))))" @@ -849,10 +856,10 @@ tests = [ "(x for a in as if z)" => "(parens (generator x (filter (iteration (in a as)) z)))" # parse_vect "[x, y]" => "(vect x y)" - "[x, y]" => "(vect x y)" + "[x, y,]" => "(vect-, x y)" "[x,\n y]" => "(vect x y)" "[x\n, y]" => "(vect x y)" - "[x\n,, y]" => "(vect x (error-t ✘ y))" + "[x\n,, y]" => "(vect-, x (error-t ✘ y))" "[x,y ; z]" => "(vect x y (parameters z))" "[x=1, y=2]" => "(vect (= x 1) (= y 2))" "[x=1, ; y=2]" => "(vect (= x 1) (parameters (= y 2)))" @@ -862,6 +869,8 @@ tests = [ ":(::\n)" => "(quote-: (parens ::))" "(function f \n end)" => "(parens (function f))" # braces + "{x,y}" => "(braces x y)" + "{x,y,}" => "(braces-, x y)" "{x y}" => "(bracescat (row x y))" ((v=v"1.7",), "{x ;;; y}") => "(bracescat (nrow-3 x y))" # Macro names can be keywords