diff --git a/src/integration/expr.jl b/src/integration/expr.jl index 038bad9a..36f23e27 100644 --- a/src/integration/expr.jl +++ b/src/integration/expr.jl @@ -338,6 +338,9 @@ end return adjust_macro_name!(retexpr.args[1], k) elseif k == K"?" retexpr.head = :if + elseif k == K"DotsIdentifier" + n = numeric_flags(flags(nodehead)) + return n == 2 ? :(..) : :(...) elseif k == K"op=" && length(args) == 3 lhs = args[1] op = args[2] diff --git a/src/julia/julia_parse_stream.jl b/src/julia/julia_parse_stream.jl index 87ad0386..3c17eea0 100644 --- a/src/julia/julia_parse_stream.jl +++ b/src/julia/julia_parse_stream.jl @@ -137,8 +137,8 @@ function untokenize(head::SyntaxHead; unique=true, include_flag_suff=true) is_postfix_op_call(head) && (str = str*"-post") k = kind(head) - # Handle numeric flags for nrow/ncat nodes - if k in KSet"nrow ncat typed_ncat" + # Handle numeric flags for nodes that take them + if k in KSet"nrow ncat typed_ncat DotsIdentifier" n = numeric_flags(head) n != 0 && (str = str*"-"*string(n)) else @@ -301,13 +301,18 @@ function bump_split(stream::ParseStream, split_spec::Vararg{Any, N}) where {N} return position(stream) end -function peek_dotted_op_token(ps, allow_whitespace=false) +function peek_dotted_op_token(ps) # Peek the next token, but if it is a dot, peek the next one as well t = peek_token(ps) isdotted = kind(t) == K"." if isdotted t2 = peek_token(ps, 2) - if !is_operator(t2) || (!allow_whitespace && preceding_whitespace(t2)) + if preceding_whitespace(t2) + isdotted = false + elseif !is_operator(t2) + isdotted = false + elseif kind(t2) == K"." && peek(ps, 3) == K"." + # Treat `..` as dotted K".", unless there's another dot after isdotted = false else t = t2 @@ -316,13 +321,21 @@ function peek_dotted_op_token(ps, allow_whitespace=false) return (isdotted, t) end -function bump_dotted(ps, isdot, flags=EMPTY_FLAGS; emit_dot_node=false, remap_kind=K"None") +function bump_dotted(ps, isdot, t, flags=EMPTY_FLAGS; emit_dot_node=false, remap_kind=K"None") if isdot - if emit_dot_node - dotmark = position(ps) - bump(ps, TRIVIA_FLAG) # TODO: NOTATION_FLAG - else - bump(ps, TRIVIA_FLAG) # TODO: NOTATION_FLAG + dotmark = position(ps) + bump(ps, TRIVIA_FLAG) + if kind(t) == K"." + # .. => DotsIdentifier-2 + bump(ps, TRIVIA_FLAG) + pos = emit(ps, dotmark, K"DotsIdentifier", set_numeric_flags(2)) + nt = peek_token(ps) + if is_operator(nt) && !preceding_whitespace(nt) + # a..+b => (call-i a .. (error-t) (call + b)) + bump_invisible(ps, K"error", TRIVIA_FLAG, + error="`..` here is interpreted as a binary operator. A space is required if followed by another operator.") + end + return pos end end pos = bump(ps, flags, remap_kind=remap_kind) diff --git a/src/julia/kinds.jl b/src/julia/kinds.jl index 19a00eb2..90b9ec96 100644 --- a/src/julia/kinds.jl +++ b/src/julia/kinds.jl @@ -278,8 +278,6 @@ register_kinds!(JuliaSyntax, 0, [ "ErrorInvalidOperator" "Error**" - "..." - # Level 1 "BEGIN_ASSIGNMENTS" "BEGIN_SYNTACTIC_ASSIGNMENTS" @@ -774,7 +772,6 @@ register_kinds!(JuliaSyntax, 0, [ # Level 8 "BEGIN_COLON" ":" - ".." "…" "⁝" "⋮" @@ -1033,6 +1030,10 @@ register_kinds!(JuliaSyntax, 0, [ "typed_ncat" "row" "nrow" + # splat/slurp + "..." + # ../... as a identifier + "DotsIdentifier" # Comprehensions "generator" "filter" diff --git a/src/julia/parser.jl b/src/julia/parser.jl index 2abed160..f525735b 100644 --- a/src/julia/parser.jl +++ b/src/julia/parser.jl @@ -371,7 +371,7 @@ function parse_RtoL(ps::ParseState, down, is_op, self) down(ps) isdot, tk = peek_dotted_op_token(ps) if is_op(tk) - bump_dotted(ps, isdot, remap_kind=K"Identifier") + bump_dotted(ps, isdot, tk, remap_kind=K"Identifier") self(ps) emit(ps, mark, isdot ? K"dotcall" : K"call", INFIX_FLAG) end @@ -598,7 +598,7 @@ function parse_assignment_with_initial_ex(ps::ParseState, mark, down::T) where { # a .~ b ==> (dotcall-i a ~ b) # [a ~ b c] ==> (hcat (call-i a ~ b) c) # [a~b] ==> (vect (call-i a ~ b)) - bump_dotted(ps, isdot, remap_kind=K"Identifier") + bump_dotted(ps, isdot, t, remap_kind=K"Identifier") bump_trivia(ps) parse_assignment(ps, down) emit(ps, mark, isdot ? K"dotcall" : K"call", INFIX_FLAG) @@ -617,7 +617,7 @@ function parse_assignment_with_initial_ex(ps::ParseState, mark, down::T) where { (-1, K"Identifier", EMPTY_FLAGS), # op (1, K"=", TRIVIA_FLAG)) else - bump_dotted(ps, isdot, TRIVIA_FLAG) + bump_dotted(ps, isdot, t, TRIVIA_FLAG) end bump_trivia(ps) # Syntax Edition TODO: We'd like to call `down` here when @@ -743,7 +743,7 @@ function parse_arrow(ps::ParseState) # x <--> y ==> (call-i x <--> y) # x .--> y ==> (dotcall-i x --> y) # x -->₁ y ==> (call-i x -->₁ y) - bump_dotted(ps, isdot, remap_kind=K"Identifier") + bump_dotted(ps, isdot, t, remap_kind=K"Identifier") parse_arrow(ps) emit(ps, mark, isdot ? K"dotcall" : K"call", INFIX_FLAG) end @@ -771,7 +771,7 @@ function parse_lazy_cond(ps::ParseState, down, is_op, self) (isdot, t) = peek_dotted_op_token(ps) k = kind(t) if is_op(k) - bump_dotted(ps, isdot, TRIVIA_FLAG) + bump_dotted(ps, isdot, t, TRIVIA_FLAG) self(ps) emit(ps, mark, isdot ? dotted(k) : k, flags(t)) if isdot @@ -819,7 +819,7 @@ function parse_comparison(ps::ParseState, subtype_comparison=false) while ((isdot, t) = peek_dotted_op_token(ps); is_prec_comparison(t)) n_comparisons += 1 op_dotted = isdot - op_pos = bump_dotted(ps, isdot, emit_dot_node=true, remap_kind=K"Identifier") + op_pos = bump_dotted(ps, isdot, t, emit_dot_node=true, remap_kind=K"Identifier") parse_pipe_lt(ps) end if n_comparisons == 1 @@ -873,15 +873,16 @@ end function parse_range(ps::ParseState) mark = position(ps) parse_invalid_ops(ps) + (initial_dot, initial_tok) = peek_dotted_op_token(ps) initial_kind = kind(initial_tok) - if initial_kind != K":" && is_prec_colon(initial_kind) - # a..b ==> (call-i a .. b) + if initial_kind != K":" && (is_prec_colon(initial_kind) || (initial_dot && initial_kind == K".")) + # a..b ==> (call-i a (DotsIdentifier-2) b) # a … b ==> (call-i a … b) # a .… b ==> (dotcall-i a … b) - bump_dotted(ps, initial_dot, remap_kind=K"Identifier") + bump_dotted(ps, initial_dot, initial_tok, remap_kind=K"Identifier") parse_invalid_ops(ps) - emit(ps, mark, initial_dot ? K"dotcall" : K"call", INFIX_FLAG) + emit(ps, mark, (initial_dot && initial_kind != K".") ? K"dotcall" : K"call", INFIX_FLAG) elseif initial_kind == K":" && ps.range_colon_enabled # a ? b : c:d ==> (? a b (call-i c : d)) n_colons = 0 @@ -948,8 +949,10 @@ function parse_range(ps::ParseState) # x... ==> (... x) # x:y... ==> (... (call-i x : y)) # x..y... ==> (... (call-i x .. y)) # flisp parser fails here - if peek(ps) == K"..." + if peek(ps) == K"." && peek(ps, 2) == K"." && peek(ps, 3) == K"." bump(ps, TRIVIA_FLAG) + bump(ps, TRIVIA_FLAG) # second dot + bump(ps, TRIVIA_FLAG) # third dot emit(ps, mark, K"...") end end @@ -965,7 +968,7 @@ function parse_invalid_ops(ps::ParseState) parse_expr(ps) while ((isdot, t) = peek_dotted_op_token(ps); kind(t) in KSet"ErrorInvalidOperator Error**") bump_trivia(ps) - bump_dotted(ps, isdot) + bump_dotted(ps, isdot, t) parse_expr(ps) emit(ps, mark, isdot ? K"dotcall" : K"call", INFIX_FLAG) end @@ -1006,7 +1009,7 @@ function parse_with_chains(ps::ParseState, down, is_op, chain_ops) # [x+y + z] ==> (vect (call-i x + y z)) break end - bump_dotted(ps, isdot, remap_kind=K"Identifier") + bump_dotted(ps, isdot, t, remap_kind=K"Identifier") down(ps) if kind(t) in chain_ops && !is_suffixed(t) && !isdot # a + b + c ==> (call-i a + b c) @@ -1258,7 +1261,7 @@ function parse_unary(ps::ParseState) # # (The flisp parser only considers commas before `;` and thus gets this # last case wrong) - op_pos = bump_dotted(ps, op_dotted, emit_dot_node=true, remap_kind=K"Identifier") + op_pos = bump_dotted(ps, op_dotted, op_t, emit_dot_node=true, remap_kind=K"Identifier") space_before_paren = preceding_whitespace(t2) if space_before_paren @@ -1352,12 +1355,12 @@ function parse_unary(ps::ParseState) # -0x1 ==> (call-pre - 0x01) # - 2 ==> (call-pre - 2) # .-2 ==> (dotcall-pre - 2) - op_pos = bump_dotted(ps, op_dotted, remap_kind=K"Identifier") + op_pos = bump_dotted(ps, op_dotted, op_t, remap_kind=K"Identifier") else # /x ==> (call-pre (error /) x) # +₁ x ==> (call-pre (error +₁) x) # .<: x ==> (dotcall-pre (error (. <:)) x) - bump_dotted(ps, op_dotted, emit_dot_node=true, remap_kind=K"Identifier") + bump_dotted(ps, op_dotted, op_t, emit_dot_node=true, remap_kind=K"Identifier") op_pos = emit(ps, mark, K"error", error="not a unary operator") end parse_unary(ps) @@ -1388,7 +1391,7 @@ end function parse_factor_with_initial_ex(ps::ParseState, mark) parse_decl_with_initial_ex(ps, mark) if ((isdot, t) = peek_dotted_op_token(ps); is_prec_power(kind(t))) - bump_dotted(ps, isdot, remap_kind=K"Identifier") + bump_dotted(ps, isdot, t, remap_kind=K"Identifier") parse_factor_after(ps) emit(ps, mark, isdot ? K"dotcall" : K"call", INFIX_FLAG) end @@ -2476,11 +2479,11 @@ function parse_import_atsym(ps::ParseState, allow_quotes=true) end end b = peek_behind(ps, pos) - if warn_parens && b.orig_kind != K".." + if warn_parens && b.kind != K"DotsIdentifier" emit_diagnostic(ps, mark, warning="parentheses are not required here") end ok = (b.is_leaf && (b.kind == K"Identifier" || is_operator(b.kind))) || - (!b.is_leaf && b.kind in KSet"$ var") + (!b.is_leaf && (b.kind in KSet"$ var" || b.kind == K"DotsIdentifier")) if !ok emit(ps, mark, K"error", error="expected identifier") end @@ -2589,10 +2592,6 @@ function parse_import_path(ps::ParseState) end if k == K"." bump(ps) - elseif k == K".." - bump_split(ps, (1,K".",EMPTY_FLAGS), (1,K".",EMPTY_FLAGS)) - elseif k == K"..." - bump_split(ps, (1,K".",EMPTY_FLAGS), (1,K".",EMPTY_FLAGS), (1,K".",EMPTY_FLAGS)) else break end @@ -2611,6 +2610,17 @@ function parse_import_path(ps::ParseState) # import A.⋆.f ==> (import (importpath A ⋆ f)) next_tok = peek_token(ps, 2) if is_operator(kind(next_tok)) + if kind(next_tok) == K"." && peek(ps, 3) == K"." + # Import the .. operator + # import A... ==> (import (importpath A (DotsIdentifier-2))) + bump_disallowed_space(ps) + bump(ps, TRIVIA_FLAG) + dotmark = position(ps) + bump(ps, TRIVIA_FLAG) + bump(ps, TRIVIA_FLAG) + emit(ps, dotmark, K"DotsIdentifier", set_numeric_flags(2)) + continue + end if preceding_whitespace(t) # Whitespace in import path allowed but discouraged # import A .== ==> (import (importpath A ==)) @@ -2623,10 +2633,6 @@ function parse_import_path(ps::ParseState) end bump(ps, TRIVIA_FLAG) parse_import_atsym(ps) - elseif k == K"..." - # Import the .. operator - # import A... ==> (import (importpath A ..)) - bump_split(ps, (1,K".",TRIVIA_FLAG), (2,K"..",EMPTY_FLAGS)) elseif k in KSet"NewlineWs ; , : EndMarker" # import A; B ==> (import (importpath A)) break @@ -3496,6 +3502,16 @@ function parse_atom(ps::ParseState, check_identifiers=true, has_unary_prefix=fal # . ==> (error .) emit(ps, mark, K"error", error="invalid identifier") end + elseif kind(leading_tok) == K"." && peek(ps, 2) == K"." && peek(ps, 3) == K"." + # ... + bump(ps, TRIVIA_FLAG) + bump(ps, TRIVIA_FLAG) + bump(ps, TRIVIA_FLAG) + emit(ps, mark, K"DotsIdentifier", set_numeric_flags(3)) + if check_identifiers + # ... ==> (error ...) + emit(ps, mark, K"error", error="invalid identifier") + end elseif is_error(leading_kind) # Errors for bad tokens are emitted in validate_tokens() rather than # here. @@ -3583,9 +3599,9 @@ function parse_atom(ps::ParseState, check_identifiers=true, has_unary_prefix=fal @label is_operator # + ==> + # .+ ==> (. +) - bump_dotted(ps, leading_dot, emit_dot_node=true, remap_kind= + bump_dotted(ps, leading_dot, leading_tok, emit_dot_node=true, remap_kind= is_syntactic_operator(leading_kind) ? leading_kind : K"Identifier") - if check_identifiers && !is_valid_identifier(leading_kind) + if check_identifiers && !(is_valid_identifier(leading_kind) || (leading_dot && leading_kind == K".")) # += ==> (error (op= +)) # ? ==> (error ?) # .+= ==> (error (. (op= +))) diff --git a/src/julia/tokenize.jl b/src/julia/tokenize.jl index 2bd0f56d..37e40109 100644 --- a/src/julia/tokenize.jl +++ b/src/julia/tokenize.jl @@ -153,7 +153,6 @@ end function optakessuffix(k) (K"BEGIN_OPS" <= k <= K"END_OPS") && !( - k == K"..." || K"BEGIN_ASSIGNMENTS" <= k <= K"END_ASSIGNMENTS" || k == K"?" || k == K"<:" || @@ -165,7 +164,6 @@ function optakessuffix(k) k == K"≔" || k == K"⩴" || k == K":" || - k == K".." || k == K"$" || k == K"::" || k == K"where" || @@ -987,7 +985,7 @@ function lex_digit(l::Lexer, kind) pc,ppc = dpeekchar(l) if pc == '.' if ppc == '.' - # Number followed by K".." or K"..." + # Number followed by K"." return emit(l, kind) elseif kind === K"Float" # If we enter the function with kind == K"Float" then a '.' has been parsed. @@ -1166,23 +1164,19 @@ function lex_backslash(l::Lexer) end function lex_dot(l::Lexer) - if accept(l, '.') + if l.last_token == K"@" if accept(l, '.') - l.last_token == K"@" && return emit(l, K"Identifier") - return emit(l, K"...") - else - if is_dottable_operator_start_char(peekchar(l)) + if !accept(l, '.') && is_dottable_operator_start_char(peekchar(l)) readchar(l) return emit(l, K"ErrorInvalidOperator") - else - l.last_token == K"@" && return emit(l, K"Identifier") - return emit(l, K"..") end end - elseif Base.isdigit(peekchar(l)) + # Emit `.`, `..` and `...` as identifiers after `@` + emit(l, K"Identifier") + elseif l.last_token != K"." && Base.isdigit(peekchar(l)) + # Only start a numeric constant if the previous token wasn't a dot return lex_digit(l, K"Float") else - l.last_token == K"@" && return emit(l, K"Identifier") return emit(l, K".") end end diff --git a/test/expr.jl b/test/expr.jl index d7547848..dde93e34 100644 --- a/test/expr.jl +++ b/test/expr.jl @@ -14,6 +14,8 @@ @test parseatom(":(a)") == QuoteNode(:a) @test parseatom(":(:a)") == Expr(:quote, QuoteNode(:a)) @test parseatom(":(1+2)") == Expr(:quote, Expr(:call, :+, 1, 2)) + @test parseatom(":...") == QuoteNode(Symbol("...")) + @test parseatom(":(...)") == QuoteNode(Symbol("...")) # Compatibility hack for VERSION >= v"1.4" # https://github.com/JuliaLang/julia/pull/34077 @test parseatom(":true") == Expr(:quote, true) diff --git a/test/parser.jl b/test/parser.jl index 64ecc8ea..2500cf9d 100644 --- a/test/parser.jl +++ b/test/parser.jl @@ -141,14 +141,15 @@ tests = [ "1:\n2" => "(call-i 1 : (error))" ], JuliaSyntax.parse_range => [ - "a..b" => "(call-i a .. b)" + "a..b" => "(call-i a (DotsIdentifier-2) b)" + "a..+b" => "(call-i a (DotsIdentifier-2) (error-t) (call-pre + b))" "a … b" => "(call-i a … b)" "a .… b" => "(dotcall-i a … b)" "[1 :a]" => "(hcat 1 (quote-: a))" "[1 2:3 :a]" => "(hcat 1 (call-i 2 : 3) (quote-: a))" "x..." => "(... x)" "x:y..." => "(... (call-i x : y))" - "x..y..." => "(... (call-i x .. y))" + "x..y..." => "(... (call-i x (DotsIdentifier-2) y))" ], JuliaSyntax.parse_invalid_ops => [ "a--b" => "(call-i a (ErrorInvalidOperator) b)" @@ -719,7 +720,7 @@ tests = [ "import A.:(+)" => "(import (importpath A (quote-: (parens +))))" "import A.==" => "(import (importpath A ==))" "import A.⋆.f" => "(import (importpath A ⋆ f))" - "import A..." => "(import (importpath A ..))" + "import A..." => "(import (importpath A (DotsIdentifier-2)))" "import A; B" => "(import (importpath A))" # Colons not allowed first in import paths # but are allowed in trailing components (#473) @@ -816,7 +817,7 @@ tests = [ "&&" => "(error &&)" "||" => "(error ||)" "." => "(error .)" - "..." => "(error ...)" + "..." => "(error (DotsIdentifier-3))" "+=" => "(error +=)" "-=" => "(error -=)" "*=" => "(error *=)" @@ -1143,7 +1144,7 @@ parsestmt_with_kind_tests = [ "a → b" => "(call-i a::Identifier →::Identifier b::Identifier)" "a < b < c" => "(comparison a::Identifier <::Identifier b::Identifier <::Identifier c::Identifier)" "a .<: b"=> "(dotcall-i a::Identifier <:::Identifier b::Identifier)" - "a .. b" => "(call-i a::Identifier ..::Identifier b::Identifier)" + "a .. b" => "(call-i a::Identifier (DotsIdentifier-2) b::Identifier)" "a : b" => "(call-i a::Identifier :::Identifier b::Identifier)" "-2^x" => "(call-pre -::Identifier (call-i 2::Integer ^::Identifier x::Identifier))" "-(2)" => "(call-pre -::Identifier (parens 2::Integer))" diff --git a/test/tokenize.jl b/test/tokenize.jl index 50891520..ab3800c9 100644 --- a/test/tokenize.jl +++ b/test/tokenize.jl @@ -155,7 +155,7 @@ end # testset end # testset @testset "issue 5, '..'" begin - @test kind.(collect(tokenize("1.23..3.21"))) == [K"Float",K"..",K"Float",K"EndMarker"] + @test kind.(collect(tokenize("1.23..3.21"))) == [K"Float",K".",K".",K"Float",K"EndMarker"] end @testset "issue 17, >>" begin @@ -712,10 +712,10 @@ end @test toks("1.#") == ["1."=>K"Float", "#"=>K"Comment"] # ellipses - @test toks("1..") == ["1"=>K"Integer", ".."=>K".."] - @test toks("1...") == ["1"=>K"Integer", "..."=>K"..."] - @test toks(".1..") == [".1"=>K"Float", ".."=>K".."] - @test toks("0x01..") == ["0x01"=>K"HexInt", ".."=>K".."] + @test toks("1..") == ["1"=>K"Integer", "."=>K".", "."=>K"."] + @test toks("1...") == ["1"=>K"Integer", "."=>K".", "."=>K".", "."=>K"."] + @test toks(".1..") == [".1"=>K"Float", "."=>K".", "."=>K"."] + @test toks("0x01..") == ["0x01"=>K"HexInt", "."=>K".", "."=>K"."] # Dotted operators and other dotted suffixes @test toks("1234 .+1") == ["1234"=>K"Integer", " "=>K"Whitespace", "."=>K".", "+"=>K"+", "1"=>K"Integer"] @@ -876,8 +876,9 @@ end @test toks("--") == ["--"=>K"ErrorInvalidOperator"] @test toks("1**2") == ["1"=>K"Integer", "**"=>K"Error**", "2"=>K"Integer"] @test toks("a<---b") == ["a"=>K"Identifier", "<---"=>K"ErrorInvalidOperator", "b"=>K"Identifier"] - @test toks("a..+b") == ["a"=>K"Identifier", "..+"=>K"ErrorInvalidOperator", "b"=>K"Identifier"] - @test toks("a..−b") == ["a"=>K"Identifier", "..−"=>K"ErrorInvalidOperator", "b"=>K"Identifier"] + # These used to test for invalid operators ..+ and ..−, but now .. is tokenized as two dots + @test toks("a..+b") == ["a"=>K"Identifier", "."=>K".", "."=>K".", "+"=>K"+", "b"=>K"Identifier"] + @test toks("a..−b") == ["a"=>K"Identifier", "."=>K".", "."=>K".", "−"=>K"-", "b"=>K"Identifier"] end @testset "hat suffix" begin