diff --git a/docs/src/api.md b/docs/src/api.md index 5dfbec6e..b2440f01 100644 --- a/docs/src/api.md +++ b/docs/src/api.md @@ -101,7 +101,6 @@ JuliaSyntax.is_infix_op_call JuliaSyntax.is_prefix_op_call JuliaSyntax.is_postfix_op_call JuliaSyntax.is_dotted -JuliaSyntax.is_suffixed JuliaSyntax.is_decorated JuliaSyntax.numeric_flags ``` diff --git a/src/JuliaSyntax.jl b/src/JuliaSyntax.jl index da5861c0..0f930224 100644 --- a/src/JuliaSyntax.jl +++ b/src/JuliaSyntax.jl @@ -41,7 +41,11 @@ export SourceFile @_public source_line_range # Expression predicates, kinds and flags -export @K_str, kind +export @K_str, kind, PrecedenceLevel, PREC_NONE, PREC_ASSIGNMENT, + PREC_PAIRARROW, PREC_CONDITIONAL, PREC_ARROW, PREC_LAZYOR, PREC_LAZYAND, + PREC_COMPARISON, PREC_PIPE_LT, PREC_PIPE_GT, PREC_COLON, PREC_PLUS, + PREC_BITSHIFT, PREC_TIMES, PREC_RATIONAL, PREC_POWER, PREC_DECL, + PREC_WHERE, PREC_DOT, PREC_QUOTE, PREC_UNICODE_OPS, PREC_COMPOUND_ASSIGN, generic_operators_by_level @_public Kind @_public flags, @@ -53,7 +57,6 @@ export @K_str, kind is_prefix_op_call, is_postfix_op_call, is_dotted, - is_suffixed, is_decorated, numeric_flags, has_flags, diff --git a/src/core/parse_stream.jl b/src/core/parse_stream.jl index da4d70cc..7ba09901 100644 --- a/src/core/parse_stream.jl +++ b/src/core/parse_stream.jl @@ -45,7 +45,7 @@ kind(head::SyntaxHead) = head.kind Return the flag bits of a syntactic construct. Prefer to query these with the predicates `is_trivia`, `is_prefix_call`, `is_infix_op_call`, -`is_prefix_op_call`, `is_postfix_op_call`, `is_dotted`, `is_suffixed`, +`is_prefix_op_call`, `is_postfix_op_call`, `is_dotted`, `is_decorated`. Or extract numeric portion of the flags with `numeric_flags`. @@ -376,7 +376,10 @@ function _buffer_lookahead_tokens(lexer, lookahead) was_whitespace = is_whitespace(k) had_whitespace |= was_whitespace f = EMPTY_FLAGS - raw.suffix && (f |= SUFFIXED_FLAG) + if k == K"Operator" && raw.op_precedence != Tokenize.PREC_NONE + # Store operator precedence in numeric flags + f |= set_numeric_flags(Int(raw.op_precedence)) + end push!(lookahead, SyntaxToken(SyntaxHead(k, f), k, had_whitespace, raw.endbyte + 2)) token_count += 1 diff --git a/src/integration/expr.jl b/src/integration/expr.jl index 038bad9a..52f9e22f 100644 --- a/src/integration/expr.jl +++ b/src/integration/expr.jl @@ -338,20 +338,31 @@ end return adjust_macro_name!(retexpr.args[1], k) elseif k == K"?" retexpr.head = :if - elseif k == K"op=" && length(args) == 3 - lhs = args[1] - op = args[2] - rhs = args[3] - headstr = string(args[2], '=') - retexpr.head = Symbol(headstr) - retexpr.args = Any[lhs, rhs] - elseif k == K".op=" && length(args) == 3 - lhs = args[1] - op = args[2] - rhs = args[3] - headstr = '.' * string(args[2], '=') - retexpr.head = Symbol(headstr) - retexpr.args = Any[lhs, rhs] + elseif k == K"dots" + n = numeric_flags(flags(nodehead)) + return n == 2 ? :(..) : :(...) + elseif k == K"op=" + if length(args) == 3 + lhs = args[1] + op = args[2] + rhs = args[3] + headstr = string(args[2], '=') + retexpr.head = Symbol(headstr) + retexpr.args = Any[lhs, rhs] + elseif length(args) == 1 + return Symbol(string(args[1], '=')) + end + elseif k == K".op=" + if length(args) == 3 + lhs = args[1] + op = args[2] + rhs = args[3] + headstr = '.' * string(args[2], '=') + retexpr.head = Symbol(headstr) + retexpr.args = Any[lhs, rhs] + else + return Symbol(string('.', args[1], '=')) + end elseif k == K"macrocall" if length(args) >= 2 a2 = args[2] diff --git a/src/julia/julia_parse_stream.jl b/src/julia/julia_parse_stream.jl index 87ad0386..21d4728f 100644 --- a/src/julia/julia_parse_stream.jl +++ b/src/julia/julia_parse_stream.jl @@ -1,7 +1,3 @@ -# Token flags - may be set for operator kinded tokens -# Operator has a suffix -const SUFFIXED_FLAG = RawFlags(1<<2) - # Set for K"call", K"dotcall" or any syntactic operator heads # Distinguish various syntaxes which are mapped to K"call" const PREFIX_CALL_FLAG = RawFlags(0<<3) @@ -110,15 +106,6 @@ Return true for postfix operator calls such as the `'ᵀ` call node parsed from """ is_postfix_op_call(x) = call_type_flags(x) == POSTFIX_OP_FLAG - -""" - is_suffixed(x) - -Return true for operators which have suffixes, such as `+₁` -""" -is_suffixed(x) = has_flags(x, SUFFIXED_FLAG) - - """ numeric_flags(x) @@ -137,8 +124,8 @@ function untokenize(head::SyntaxHead; unique=true, include_flag_suff=true) is_postfix_op_call(head) && (str = str*"-post") k = kind(head) - # Handle numeric flags for nrow/ncat nodes - if k in KSet"nrow ncat typed_ncat" + # Handle numeric flags for nodes that take them + if k in KSet"nrow ncat typed_ncat dots" n = numeric_flags(head) n != 0 && (str = str*"-"*string(n)) else @@ -164,7 +151,6 @@ function untokenize(head::SyntaxHead; unique=true, include_flag_suff=true) str *= "-," end end - is_suffixed(head) && (str = str*"-suf") end str end @@ -262,67 +248,38 @@ function validate_tokens(stream::ParseStream) sort!(stream.diagnostics, by=first_byte) end -""" - bump_split(stream, token_spec1, [token_spec2 ...]) - -Bump the next token, splitting it into several pieces - -Tokens are defined by a number of `token_spec` of shape `(nbyte, kind, flags)`. -If all `nbyte` are positive, the sum must equal the token length. If one -`nbyte` is negative, that token is given `tok_len + nbyte` bytes and the sum of -all `nbyte` must equal zero. - -This is a hack which helps resolves the occasional lexing ambiguity. For -example -* Whether .+ should be a single token or the composite (. +) which is used for - standalone operators. -* Whether ... is splatting (most of the time) or three . tokens in import paths - -TODO: Are these the only cases? Can we replace this general utility with a -simpler one which only splits preceding dots? -""" -function bump_split(stream::ParseStream, split_spec::Vararg{Any, N}) where {N} - tok = stream.lookahead[stream.lookahead_index] - stream.lookahead_index += 1 - start_b = _next_byte(stream) - toklen = tok.next_byte - start_b - prev_b = start_b - for (i, (nbyte, k, f)) in enumerate(split_spec) - h = SyntaxHead(k, f) - actual_nbyte = nbyte < 0 ? (toklen + nbyte) : nbyte - orig_k = k == K"." ? K"." : kind(tok) - node = RawGreenNode(h, actual_nbyte, orig_k) - push!(stream.output, node) - prev_b += actual_nbyte - stream.next_byte += actual_nbyte - end - @assert tok.next_byte == prev_b - stream.peek_count = 0 - return position(stream) -end - function peek_dotted_op_token(ps, allow_whitespace=false) # Peek the next token, but if it is a dot, peek the next one as well t = peek_token(ps) isdotted = kind(t) == K"." if isdotted t2 = peek_token(ps, 2) - if !is_operator(t2) || (!allow_whitespace && preceding_whitespace(t2)) + if (!allow_whitespace && preceding_whitespace(t2)) + isdotted = false + elseif !is_operator(t2) + isdotted = false + elseif kind(t2) == K"." && peek(ps, 3) == K"." + # Treat `..` as dotted K".", unless there's another dot after isdotted = false else t = t2 end end - return (isdotted, t) + isassign = false + if !allow_whitespace && is_operator(t) + t3 = peek_token(ps, 2+isdotted) + isassign = kind(t3) == K"=" && !preceding_whitespace(t3) + end + return (isdotted, isassign, t) end -function bump_dotted(ps, isdot, flags=EMPTY_FLAGS; emit_dot_node=false, remap_kind=K"None") +function bump_dotted(ps, isdot, t, flags=EMPTY_FLAGS; emit_dot_node=false, remap_kind=K"None") if isdot - if emit_dot_node - dotmark = position(ps) - bump(ps, TRIVIA_FLAG) # TODO: NOTATION_FLAG - else - bump(ps, TRIVIA_FLAG) # TODO: NOTATION_FLAG + dotmark = position(ps) + bump(ps, TRIVIA_FLAG) + if kind(t) == K"." + bump(ps, TRIVIA_FLAG) + return emit(ps, dotmark, K"dots", set_numeric_flags(2)) end end pos = bump(ps, flags, remap_kind=remap_kind) diff --git a/src/julia/kinds.jl b/src/julia/kinds.jl index 19a00eb2..3aa17ef9 100644 --- a/src/julia/kinds.jl +++ b/src/julia/kinds.jl @@ -181,7 +181,6 @@ Return the `Kind` of `x`. """ kind(k::Kind) = k - #------------------------------------------------------------------------------- # Kinds used by JuliaSyntax register_kinds!(JuliaSyntax, 0, [ @@ -193,6 +192,7 @@ register_kinds!(JuliaSyntax, 0, [ # Identifiers "BEGIN_IDENTIFIERS" "Identifier" + "Operator" "Placeholder" # Used for empty catch variables, and all-underscore identifiers in lowering "END_IDENTIFIERS" @@ -278,728 +278,56 @@ register_kinds!(JuliaSyntax, 0, [ "ErrorInvalidOperator" "Error**" - "..." - - # Level 1 + # Various operators that have special parsing rules and thus get explicit heads. + # All other operators (including suffixed versions of these) are K"Operator". "BEGIN_ASSIGNMENTS" - "BEGIN_SYNTACTIC_ASSIGNMENTS" "=" ".=" - "op=" # Updating assignment operator ( $= %= &= *= += -= //= /= <<= >>= >>>= \= ^= |= ÷= ⊻= ) - ".op=" ":=" - "END_SYNTACTIC_ASSIGNMENTS" "~" "≔" "⩴" "≕" + # Compound assignments + "op=" + ".op=" "END_ASSIGNMENTS" - - "BEGIN_PAIRARROW" - "=>" - "END_PAIRARROW" - - # Level 2 - "BEGIN_CONDITIONAL" - "?" - "END_CONDITIONAL" - - # Level 3 - "BEGIN_ARROW" - "-->" - "<--" - "<-->" - "←" - "→" - "↔" - "↚" - "↛" - "↞" - "↠" - "↢" - "↣" - "↤" - "↦" - "↮" - "⇎" - "⇍" - "⇏" - "⇐" - "⇒" - "⇔" - "⇴" - "⇶" - "⇷" - "⇸" - "⇹" - "⇺" - "⇻" - "⇼" - "⇽" - "⇾" - "⇿" - "⟵" - "⟶" - "⟷" - "⟹" - "⟺" - "⟻" - "⟼" - "⟽" - "⟾" - "⟿" - "⤀" - "⤁" - "⤂" - "⤃" - "⤄" - "⤅" - "⤆" - "⤇" - "⤌" - "⤍" - "⤎" - "⤏" - "⤐" - "⤑" - "⤔" - "⤕" - "⤖" - "⤗" - "⤘" - "⤝" - "⤞" - "⤟" - "⤠" - "⥄" - "⥅" - "⥆" - "⥇" - "⥈" - "⥊" - "⥋" - "⥎" - "⥐" - "⥒" - "⥓" - "⥖" - "⥗" - "⥚" - "⥛" - "⥞" - "⥟" - "⥢" - "⥤" - "⥦" - "⥧" - "⥨" - "⥩" - "⥪" - "⥫" - "⥬" - "⥭" - "⥰" - "⧴" - "⬱" - "⬰" - "⬲" - "⬳" - "⬴" - "⬵" - "⬶" - "⬷" - "⬸" - "⬹" - "⬺" - "⬻" - "⬼" - "⬽" - "⬾" - "⬿" - "⭀" - "⭁" - "⭂" - "⭃" - "⥷" - "⭄" - "⥺" - "⭇" - "⭈" - "⭉" - "⭊" - "⭋" - "⭌" - "←" - "→" - "⇜" - "⇝" - "↜" - "↝" - "↩" - "↪" - "↫" - "↬" - "↼" - "↽" - "⇀" - "⇁" - "⇄" - "⇆" - "⇇" - "⇉" - "⇋" - "⇌" - "⇚" - "⇛" - "⇠" - "⇢" - "↷" - "↶" - "↺" - "↻" - "🢲" - "END_ARROW" - - # Level 4 - "BEGIN_LAZYOR" - "||" - ".||" - "END_LAZYOR" - - # Level 5 - "BEGIN_LAZYAND" - "&&" - ".&&" - "END_LAZYAND" - - # Level 6 - "BEGIN_COMPARISON" - "<:" - ">:" - ">" - "<" - ">=" - "≥" - "<=" - "≤" - "==" - "===" - "≡" - "!=" - "≠" - "!==" - "≢" - "∈" - "in" - "isa" - "∉" - "∋" - "∌" - "⊆" - "⊈" - "⊂" - "⊄" - "⊊" - "∝" - "∊" - "∍" - "∥" - "∦" - "∷" - "∺" - "∻" - "∽" - "∾" - "≁" - "≃" - "≂" - "≄" - "≅" - "≆" - "≇" - "≈" - "≉" - "≊" - "≋" - "≌" - "≍" - "≎" - "≐" - "≑" - "≒" - "≓" - "≖" - "≗" - "≘" - "≙" - "≚" - "≛" - "≜" - "≝" - "≞" - "≟" - "≣" - "≦" - "≧" - "≨" - "≩" - "≪" - "≫" - "≬" - "≭" - "≮" - "≯" - "≰" - "≱" - "≲" - "≳" - "≴" - "≵" - "≶" - "≷" - "≸" - "≹" - "≺" - "≻" - "≼" - "≽" - "≾" - "≿" - "⊀" - "⊁" - "⊃" - "⊅" - "⊇" - "⊉" - "⊋" - "⊏" - "⊐" - "⊑" - "⊒" - "⊜" - "⊩" - "⊬" - "⊮" - "⊰" - "⊱" - "⊲" - "⊳" - "⊴" - "⊵" - "⊶" - "⊷" - "⋍" - "⋐" - "⋑" - "⋕" - "⋖" - "⋗" - "⋘" - "⋙" - "⋚" - "⋛" - "⋜" - "⋝" - "⋞" - "⋟" - "⋠" - "⋡" - "⋢" - "⋣" - "⋤" - "⋥" - "⋦" - "⋧" - "⋨" - "⋩" - "⋪" - "⋫" - "⋬" - "⋭" - "⋲" - "⋳" - "⋴" - "⋵" - "⋶" - "⋷" - "⋸" - "⋹" - "⋺" - "⋻" - "⋼" - "⋽" - "⋾" - "⋿" - "⟈" - "⟉" - "⟒" - "⦷" - "⧀" - "⧁" - "⧡" - "⧣" - "⧤" - "⧥" - "⩦" - "⩧" - "⩪" - "⩫" - "⩬" - "⩭" - "⩮" - "⩯" - "⩰" - "⩱" - "⩲" - "⩳" - "⩵" - "⩶" - "⩷" - "⩸" - "⩹" - "⩺" - "⩻" - "⩼" - "⩽" - "⩾" - "⩿" - "⪀" - "⪁" - "⪂" - "⪃" - "⪄" - "⪅" - "⪆" - "⪇" - "⪈" - "⪉" - "⪊" - "⪋" - "⪌" - "⪍" - "⪎" - "⪏" - "⪐" - "⪑" - "⪒" - "⪓" - "⪔" - "⪕" - "⪖" - "⪗" - "⪘" - "⪙" - "⪚" - "⪛" - "⪜" - "⪝" - "⪞" - "⪟" - "⪠" - "⪡" - "⪢" - "⪣" - "⪤" - "⪥" - "⪦" - "⪧" - "⪨" - "⪩" - "⪪" - "⪫" - "⪬" - "⪭" - "⪮" - "⪯" - "⪰" - "⪱" - "⪲" - "⪳" - "⪴" - "⪵" - "⪶" - "⪷" - "⪸" - "⪹" - "⪺" - "⪻" - "⪼" - "⪽" - "⪾" - "⪿" - "⫀" - "⫁" - "⫂" - "⫃" - "⫄" - "⫅" - "⫆" - "⫇" - "⫈" - "⫉" - "⫊" - "⫋" - "⫌" - "⫍" - "⫎" - "⫏" - "⫐" - "⫑" - "⫒" - "⫓" - "⫔" - "⫕" - "⫖" - "⫗" - "⫘" - "⫙" - "⫷" - "⫸" - "⫹" - "⫺" - "⊢" - "⊣" - "⟂" - # ⫪,⫫ see https://github.com/JuliaLang/julia/issues/39350 - "⫪" - "⫫" - "END_COMPARISON" - - # Level 7 - "BEGIN_PIPE" - "<|" - "|>" - "END_PIPE" - - # Level 8 - "BEGIN_COLON" - ":" - ".." - "…" - "⁝" - "⋮" - "⋱" - "⋰" - "⋯" - "END_COLON" - - # Level 9 - "BEGIN_PLUS" - "\$" - "+" - "-" # also used for "−" - "++" - "⊕" - "⊖" - "⊞" - "⊟" - "|" - "∪" - "∨" - "⊔" - "±" - "∓" - "∔" - "∸" - "≏" - "⊎" - "⊻" - "⊽" - "⋎" - "⋓" - "⟇" - "⧺" - "⧻" - "⨈" - "⨢" - "⨣" - "⨤" - "⨥" - "⨦" - "⨧" - "⨨" - "⨩" - "⨪" - "⨫" - "⨬" - "⨭" - "⨮" - "⨹" - "⨺" - "⩁" - "⩂" - "⩅" - "⩊" - "⩌" - "⩏" - "⩐" - "⩒" - "⩔" - "⩖" - "⩗" - "⩛" - "⩝" - "⩡" - "⩢" - "⩣" - "¦" - "END_PLUS" - - # Level 10 - "BEGIN_TIMES" - "*" - "/" - "÷" - "%" - "⋅" # also used for lookalikes "·" and "·" - "∘" - "×" - "\\" - "&" - "∩" - "∧" - "⊗" - "⊘" - "⊙" - "⊚" - "⊛" - "⊠" - "⊡" - "⊓" - "∗" - "∙" - "∤" - "⅋" - "≀" - "⊼" - "⋄" - "⋆" - "⋇" - "⋉" - "⋊" - "⋋" - "⋌" - "⋏" - "⋒" - "⟑" - "⦸" - "⦼" - "⦾" - "⦿" - "⧶" - "⧷" - "⨇" - "⨰" - "⨱" - "⨲" - "⨳" - "⨴" - "⨵" - "⨶" - "⨷" - "⨸" - "⨻" - "⨼" - "⨽" - "⩀" - "⩃" - "⩄" - "⩋" - "⩍" - "⩎" - "⩑" - "⩓" - "⩕" - "⩘" - "⩚" - "⩜" - "⩞" - "⩟" - "⩠" - "⫛" - "⊍" - "▷" - "⨝" - "⟕" - "⟖" - "⟗" - "⌿" - "⨟" - "END_TIMES" - - # Level 11 - "BEGIN_RATIONAL" - "//" - "END_RATIONAL" - - # Level 12 - "BEGIN_BITSHIFTS" - "<<" - ">>" - ">>>" - "END_BITSHIFTS" - - # Level 13 - "BEGIN_POWER" - "^" - "↑" - "↓" - "⇵" - "⟰" - "⟱" - "⤈" - "⤉" - "⤊" - "⤋" - "⤒" - "⤓" - "⥉" - "⥌" - "⥍" - "⥏" - "⥑" - "⥔" - "⥕" - "⥘" - "⥙" - "⥜" - "⥝" - "⥠" - "⥡" - "⥣" - "⥥" - "⥮" - "⥯" - "↑" - "↓" - "END_POWER" - - # Level 14 - "BEGIN_DECL" - "::" - "END_DECL" - - # Level 15 - "BEGIN_WHERE" - "where" - "END_WHERE" - - # Level 16 - "BEGIN_DOT" - "." - "END_DOT" - - "!" - "'" - ".'" - "->" - - "BEGIN_UNICODE_OPS" - "¬" - "√" - "∛" - "∜" - "END_UNICODE_OPS" + "?" # ternary operator + "||" # not an operator call + ".||" # dotted of above (not emitted by lexer) + "&&" # not an operator call + ".&&" # dotted of above (not emitted by lexer) + "<:" # subtype syntax + ">:" # supertype syntax + "::" # field type syntax + "." # various dot syntax + ".." # .. operator (not emitted by lexer) + "in" # iteration syntax + "isa" + "where" + "!" # syntactic unary + "'" # special postfix parsing + ".'" # special postfix parsing + "->" # syntactic arrow + "-->" # syntactic arrow + ":" # used for quoting + "+" # used in numeric constants + "++" # special chaining syntax + "*" # special chaining syntax + "<" # recovery path for :< + ">" # recovery path for :> + "\$" # interpolation + "-" # negated constants + "&" # syntactic unary + "∈" # iteration syntax + # all syntactic unary + "⋆" + "±" + "∓" + "¬" + "√" + "∛" + "∜" "END_OPS" # 2. Nonterminals which are exposed in the AST, but where the surface @@ -1033,6 +361,10 @@ register_kinds!(JuliaSyntax, 0, [ "typed_ncat" "row" "nrow" + # splat/slurp + "..." + # ../... as a identifier + "dots" # Comprehensions "generator" "filter" @@ -1073,6 +405,109 @@ register_kinds!(JuliaSyntax, 0, [ "END_ERRORS" ]) +@enum PrecedenceLevel begin + PREC_NONE + PREC_ASSIGNMENT + PREC_PAIRARROW + PREC_CONDITIONAL + PREC_ARROW + PREC_LAZYOR + PREC_LAZYAND + PREC_COMPARISON + PREC_PIPE_LT + PREC_PIPE_GT + PREC_COLON + PREC_PLUS + PREC_BITSHIFT + PREC_TIMES + PREC_RATIONAL + PREC_POWER + PREC_DECL + PREC_WHERE + PREC_DOT + PREC_QUOTE + PREC_UNICODE_OPS + # Special precendence to only allow compound assignment for designated operators, for + # compatibility with flisp + PREC_COMPOUND_ASSIGN +end + +const generic_operators_by_level = Dict{PrecedenceLevel, Vector{Char}}( + PREC_ASSIGNMENT => Char[#= = .= := ~ ≔ ⩴ ≕ =#], + PREC_PAIRARROW => Char[#= => =#], + PREC_CONDITIONAL => Char[#= ? =#], + PREC_ARROW => + [#= -> --> <-- <--> =# + '←', '→', '↔', '↚', '↛', '↞', '↠', '↢', + '↣', '↤', '↦', '↮', '⇎', '⇍', '⇏', '⇐', '⇒', '⇔', '⇴', + '⇶', '⇷', '⇸', '⇹', '⇺', '⇻', '⇼', '⇽', '⇾', '⇿', '⟵', + '⟶', '⟷', '⟹', '⟺', '⟻', '⟼', '⟽', '⟾', '⟿', '⤀', '⤁', + '⤂', '⤃', '⤄', '⤅', '⤆', '⤇', '⤌', '⤍', '⤎', '⤏', '⤐', '⤑', + '⤔', '⤕', '⤖', '⤗', '⤘', '⤝', '⤞', '⤟', '⤠', '⥄', '⥅', '⥆', + '⥇', '⥈', '⥊', '⥋', '⥎', '⥐', '⥒', '⥓', '⥖', '⥗', '⥚', '⥛', + '⥞', '⥟', '⥢', '⥤', '⥦', '⥧', '⥨', '⥩', '⥪', '⥫', '⥬', '⥭', + '⥰', '⧴', '⬱', '⬰', '⬲', '⬳', '⬴', '⬵', '⬶', '⬷', '⬸', '⬹', + '⬺', '⬻', '⬼', '⬽', '⬾', '⬿', '⭀', '⭁', '⭂', '⭃', '⥷', '⭄', + '⥺', '⭇', '⭈', '⭉', '⭊', '⭋', '⭌', '←', '→', '⇜', '⇝', '↜', '↝', + '↩', '↪', '↫', '↬', '↼', '↽', '⇀', '⇁', '⇄', '⇆', '⇇', '⇉', '⇋', + '⇌', '⇚', '⇛', '⇠', '⇢', '↷', '↶', '↺', '↻', '🢲'], + PREC_LAZYOR => Char[#= || =#], + PREC_LAZYAND => Char[#= && =#], + PREC_COMPARISON => + [#= <: >: in isa < > ∈ == != !== =# + '≥', '≤', '≡', '≠', '≢', '∉', '∋', + '∌', '⊆', '⊈', '⊂', '⊄', '⊊', '∝', '∊', '∍', '∥', '∦', + '∷', '∺', '∻', '∽', '∾', '≁', '≃', '≂', '≄', '≅', '≆', + '≇', '≈', '≉', '≊', '≋', '≌', '≍', '≎', '≐', '≑', '≒', + '≓', '≖', '≗', '≘', '≙', '≚', '≛', '≜', '≝', '≞', '≟', + '≣', '≦', '≧', '≨', '≩', '≪', '≫', '≬', '≭', '≮', '≯', + '≰', '≱', '≲', '≳', '≴', '≵', '≶', '≷', '≸', '≹', '≺', + '≻', '≼', '≽', '≾', '≿', '⊀', '⊁', '⊃', '⊅', '⊇', '⊉', + '⊋', '⊏', '⊐', '⊑', '⊒', '⊜', '⊩', '⊬', '⊮', '⊰', '⊱', + '⊲', '⊳', '⊴', '⊵', '⊶', '⊷', '⋍', '⋐', '⋑', '⋕', '⋖', + '⋗', '⋘', '⋙', '⋚', '⋛', '⋜', '⋝', '⋞', '⋟', '⋠', '⋡', + '⋢', '⋣', '⋤', '⋥', '⋦', '⋧', '⋨', '⋩', '⋪', '⋫', '⋬', + '⋭', '⋲', '⋳', '⋴', '⋵', '⋶', '⋷', '⋸', '⋹', '⋺', '⋻', + '⋼', '⋽', '⋾', '⋿', '⟈', '⟉', '⟒', '⦷', '⧀', '⧁', '⧡', + '⧣', '⧤', '⧥', '⩦', '⩧', '⩪', '⩫', '⩬', '⩭', '⩮', '⩯', + '⩰', '⩱', '⩲', '⩳', '⩵', '⩶', '⩷', '⩸', '⩹', '⩺', '⩻', + '⩼', '⩽', '⩾', '⩿', '⪀', '⪁', '⪂', '⪃', '⪄', '⪅', '⪆', '⪇', + '⪈', '⪉', '⪊', '⪋', '⪌', '⪍', '⪎', '⪏', '⪐', '⪑', '⪒', '⪓', + '⪔', '⪕', '⪖', '⪗', '⪘', '⪙', '⪚', '⪛', '⪜', '⪝', '⪞', '⪟', + '⪠', '⪡', '⪢', '⪣', '⪤', '⪥', '⪦', '⪧', '⪨', '⪩', '⪪', + '⪫', '⪬', '⪭', '⪮', '⪯', '⪰', '⪱', '⪲', '⪳', '⪴', '⪵', + '⪶', '⪷', '⪸', '⪹', '⪺', '⪻', '⪼', '⪽', '⪾', '⪿', '⫀', + '⫁', '⫂', '⫃', '⫄', '⫅', '⫆', '⫇', '⫈', '⫉', '⫊', '⫋', + '⫌', '⫍', '⫎', '⫏', '⫐', '⫑', '⫒', '⫓', '⫔', '⫕', '⫖', + '⫗', '⫘', '⫙', '⫷', '⫸', '⫹', '⫺', '⊢', '⊣', '⟂', '⫪', '⫫'], + PREC_PIPE_LT => Char[#= <| =#], + PREC_PIPE_GT => Char[#= |> =#], + PREC_COLON => [ #= : .. =# '…', '⁝', '⋮', '⋱', '⋰', '⋯'], + PREC_PLUS => + [ #= + - ± ∓ ++ =# + '⊕', '⊖', '⊞', '⊟', '|', '∪', '∨', + '⊔', '±', '∓', '∔', '∸', '≏', '⊎', '⊻', '⊽', '⋎', '⋓', '⟇', '⧺', + '⧻', '⨈', '⨢', '⨣', '⨤', '⨥', '⨦', '⨧', '⨨', '⨩', '⨪', '⨫', '⨬', '⨭', + '⨮', '⨹', '⨺', '⩁', '⩂', '⩅', '⩊', '⩌', '⩏', '⩐', '⩒', '⩔', '⩖', '⩗', + '⩛', '⩝', '⩡', '⩢', '⩣', '¦'], + PREC_TIMES => + [ #= * ⋆ & =# + '/', '÷', '%', '⋅', '·', '·', '∘', '×', '\\', '∩', '∧', '⊗', + '⊘', '⊙', '⊚', '⊛', '⊠', '⊡', '⊓', '∗', '∙', '∤', '⅋', '≀', '⊼', '⋄', '⋆', + '⋇', '⋉', '⋊', '⋋', '⋌', '⋏', '⋒', '⟑', '⦸', '⦼', '⦾', '⦿', '⧶', '⧷', + '⨇', '⨰', '⨱', '⨲', '⨳', '⨴', '⨵', '⨶', '⨷', '⨸', '⨻', '⨼', '⨽', '⩀', + '⩃', '⩄', '⩋', '⩍', '⩎', '⩑', '⩓', '⩕', '⩘', '⩚', '⩜', '⩞', '⩟', '⩠', + '⫛', '⊍', '▷', '⨝', '⟕', '⟖', '⟗', '⌿', '⨟', + '\u00b7', # '·' Middle Dot + '\u0387' # '·' Greek Ano Teleia + ], + PREC_RATIONAL => Char[#= // =#], + PREC_BITSHIFT => Char[#= << >> >>> =#], + PREC_POWER => ['^', '↑', '↓', '⇵', '⟰', '⟱', '⤈', '⤉', '⤊', '⤋', '⤒', '⤓', '⥉', + '⥌', '⥍', '⥏', '⥑', '⥔', '⥕', '⥘', '⥙', '⥜', '⥝', '⥠', '⥡', '⥣', '⥥', + '⥮', '⥯', '↑', '↓'], +) + #------------------------------------------------------------------------------- const _nonunique_kind_names = Set([ K"Comment" @@ -1156,7 +591,7 @@ is_keyword(k::Kind) = K"BEGIN_KEYWORDS" <= k <= K"END_KEYWORDS" is_block_continuation_keyword(k::Kind) = K"BEGIN_BLOCK_CONTINUATION_KEYWORDS" <= k <= K"END_BLOCK_CONTINUATION_KEYWORDS" is_literal(k::Kind) = K"BEGIN_LITERAL" <= k <= K"END_LITERAL" is_number(k::Kind) = K"BEGIN_NUMBERS" <= k <= K"END_NUMBERS" -is_operator(k::Kind) = K"BEGIN_OPS" <= k <= K"END_OPS" +is_operator(k::Kind) = k == K"Operator" || K"BEGIN_OPS" <= k <= K"END_OPS" is_word_operator(k::Kind) = (k == K"in" || k == K"isa" || k == K"where") is_identifier(x) = is_identifier(kind(x)) @@ -1171,28 +606,30 @@ is_word_operator(x) = is_word_operator(kind(x)) # Predicates for operator precedence # FIXME: Review how precedence depends on dottedness, eg # https://github.com/JuliaLang/julia/pull/36725 + + is_prec_assignment(x) = K"BEGIN_ASSIGNMENTS" <= kind(x) <= K"END_ASSIGNMENTS" -is_prec_pair(x) = K"BEGIN_PAIRARROW" <= kind(x) <= K"END_PAIRARROW" -is_prec_conditional(x) = K"BEGIN_CONDITIONAL" <= kind(x) <= K"END_CONDITIONAL" -is_prec_arrow(x) = K"BEGIN_ARROW" <= kind(x) <= K"END_ARROW" -is_prec_lazy_or(x) = K"BEGIN_LAZYOR" <= kind(x) <= K"END_LAZYOR" -is_prec_lazy_and(x) = K"BEGIN_LAZYAND" <= kind(x) <= K"END_LAZYAND" -is_prec_comparison(x) = K"BEGIN_COMPARISON" <= kind(x) <= K"END_COMPARISON" -is_prec_pipe(x) = K"BEGIN_PIPE" <= kind(x) <= K"END_PIPE" -is_prec_colon(x) = K"BEGIN_COLON" <= kind(x) <= K"END_COLON" -is_prec_plus(x) = K"BEGIN_PLUS" <= kind(x) <= K"END_PLUS" -is_prec_bitshift(x) = K"BEGIN_BITSHIFTS" <= kind(x) <= K"END_BITSHIFTS" -is_prec_times(x) = K"BEGIN_TIMES" <= kind(x) <= K"END_TIMES" -is_prec_rational(x) = K"BEGIN_RATIONAL" <= kind(x) <= K"END_RATIONAL" -is_prec_power(x) = K"BEGIN_POWER" <= kind(x) <= K"END_POWER" -is_prec_decl(x) = K"BEGIN_DECL" <= kind(x) <= K"END_DECL" -is_prec_where(x) = K"BEGIN_WHERE" <= kind(x) <= K"END_WHERE" -is_prec_dot(x) = K"BEGIN_DOT" <= kind(x) <= K"END_DOT" -is_prec_unicode_ops(x) = K"BEGIN_UNICODE_OPS" <= kind(x) <= K"END_UNICODE_OPS" -is_prec_pipe_lt(x) = kind(x) == K"<|" -is_prec_pipe_gt(x) = kind(x) == K"|>" +is_prec_pair(x) = (kind(x) == K"Operator" && numeric_flags(head(x)) == Int(PREC_PAIRARROW)) +is_prec_conditional(x) = kind(x) == K"?" +is_prec_arrow(x) = (kind(x) == K"Operator" && numeric_flags(head(x)) == Int(PREC_ARROW)) || kind(x) == K"-->" +is_prec_lazy_or(x) = (kind(x) == K"Operator" && numeric_flags(head(x)) == Int(PREC_LAZYOR)) || kind(x) in KSet"||" +is_prec_lazy_and(x) = (kind(x) == K"Operator" && numeric_flags(head(x)) == Int(PREC_LAZYAND)) || kind(x) in KSet"&&" +is_prec_comparison(x) = (kind(x) == K"Operator" && numeric_flags(head(x)) == Int(PREC_COMPARISON)) || kind(x) in KSet"<: >: in isa < > ∈" +is_prec_pipe_lt(x) = kind(x) == K"Operator" && numeric_flags(head(x)) == Int(PREC_PIPE_LT) +is_prec_pipe_gt(x) = kind(x) == K"Operator" && numeric_flags(head(x)) == Int(PREC_PIPE_GT) +is_prec_pipe(x) = is_prec_pipe_lt(x) || is_prec_pipe_gt(x) +is_prec_colon(x) = (kind(x) == K"Operator" && numeric_flags(head(x)) == Int(PREC_COLON)) || kind(x) == K".." +is_prec_plus(x) = (kind(x) == K"Operator" && numeric_flags(head(x)) == Int(PREC_PLUS)) || kind(x) in KSet"+ - ± ∓" +is_prec_bitshift(x) = kind(x) == K"Operator" && numeric_flags(head(x)) == Int(PREC_BITSHIFT) +is_prec_times(x) = (kind(x) == K"Operator" && numeric_flags(head(x)) == Int(PREC_TIMES)) || kind(x) in KSet"* ⋆ &" +is_prec_rational(x) = kind(x) == K"Operator" && numeric_flags(head(x)) == Int(PREC_RATIONAL) +is_prec_power(x) = kind(x) == K"Operator" && numeric_flags(head(x)) == Int(PREC_POWER) +is_prec_decl(x) = (kind(x) == K"Operator" && numeric_flags(head(x)) == Int(PREC_DECL)) || kind(x) == K"::" +is_prec_where(x) = (kind(x) == K"Operator" && numeric_flags(head(x)) == Int(PREC_WHERE)) || kind(x) == K"where" +is_prec_dot(x) = (kind(x) == K"Operator" && numeric_flags(head(x)) == Int(PREC_DOT)) || kind(x) == K"." +is_prec_quote(x) = (kind(x) == K"Operator" && numeric_flags(head(x)) == Int(PREC_QUOTE)) || kind(x) == K"'" is_syntax_kind(x) = K"BEGIN_SYNTAX_KINDS"<= kind(x) <= K"END_SYNTAX_KINDS" -is_syntactic_assignment(x) = K"BEGIN_SYNTACTIC_ASSIGNMENTS" <= kind(x) <= K"END_SYNTACTIC_ASSIGNMENTS" +is_prec_compound_assign(x) = kind(x) == K"Operator" && numeric_flags(head(x)) == Int(PREC_COMPOUND_ASSIGN) function is_string_delim(x) kind(x) in (K"\"", K"\"\"\"") @@ -1216,5 +653,5 @@ function is_syntactic_operator(x) # in the parser? The lexer itself usually disallows such tokens, so it's # not clear whether we need to handle them. (Though note `.->` is a # token...) - return k in KSet"&& || . ... ->" || is_syntactic_assignment(k) + return k in KSet"&& || . ... -> = :=" end diff --git a/src/julia/parser.jl b/src/julia/parser.jl index 2abed160..201cdcb0 100644 --- a/src/julia/parser.jl +++ b/src/julia/parser.jl @@ -101,10 +101,6 @@ function bump_glue(ps::ParseState, args...; kws...) bump_glue(ps.stream, args...; kws...) end -function bump_split(ps::ParseState, args...; kws...) - bump_split(ps.stream, args...; kws...) -end - function reset_node!(ps::ParseState, args...; kws...) reset_node!(ps.stream, args...; kws...) end @@ -221,9 +217,7 @@ end # # All these take either a raw kind or a token. -function is_plain_equals(t) - kind(t) == K"=" && !is_suffixed(t) -end +is_plain_equals(t) = kind(t) == K"=" function is_closing_token(ps::ParseState, k) k = kind(k) @@ -274,8 +268,10 @@ function is_block_form(k) abstract primitive struct try module" end -function is_syntactic_unary_op(k) - kind(k) in KSet"$ & ::" +function is_syntactic_unary_op(x) + # $, & and :: are syntactic unary operators + k = kind(x) + return k in KSet":: $ &" end function is_type_operator(t, isdot) @@ -284,20 +280,14 @@ end function is_unary_op(t, isdot) k = kind(t) - !is_suffixed(t) && ( - (k in KSet"<: >:" && !isdot) || - k in KSet"+ - ! ~ ¬ √ ∛ ∜ ⋆ ± ∓" # dotop allowed - ) + (k in KSet"<: >:" && !isdot) || + k in KSet"+ - ! ~ ¬ √ ∛ ∜ ⋆ ± ∓" # dotop allowed end # Operators that are both unary and binary function is_both_unary_and_binary(t, isdot) k = kind(t) - # Preventing is_suffixed here makes this consistent with the flisp parser. - # But is this by design or happenstance? - !is_suffixed(t) && ( - k in KSet"+ - ⋆ ± ∓" || (k in KSet"$ & ~" && !isdot) - ) + k in KSet"+ - ⋆ ± ∓" || (k in KSet"$ & ~" && !isdot) end function is_string_macro_suffix(k) @@ -353,8 +343,8 @@ function parse_LtoR(ps::ParseState, down, is_op) mark = position(ps) down(ps) while true - isdot, tk = peek_dotted_op_token(ps) - is_op(tk) || break + isdot, isassign, tk = peek_dotted_op_token(ps) + (is_op(tk) && !isassign) || break isdot && bump(ps, TRIVIA_FLAG) # TODO: NOTATION_FLAG bump(ps, remap_kind=K"Identifier") down(ps) @@ -369,9 +359,9 @@ end function parse_RtoL(ps::ParseState, down, is_op, self) mark = position(ps) down(ps) - isdot, tk = peek_dotted_op_token(ps) - if is_op(tk) - bump_dotted(ps, isdot, remap_kind=K"Identifier") + isdot, isassign, tk = peek_dotted_op_token(ps) + if is_op(tk) && !isassign + bump_dotted(ps, isdot, tk, remap_kind=K"Identifier") self(ps) emit(ps, mark, isdot ? K"dotcall" : K"call", INFIX_FLAG) end @@ -581,11 +571,13 @@ function parse_assignment(ps::ParseState, down) end function parse_assignment_with_initial_ex(ps::ParseState, mark, down::T) where {T} # where => specialize on `down` - isdot, t = peek_dotted_op_token(ps) + isdot, is_compound_assignment, t = peek_dotted_op_token(ps) k = kind(t) - if !is_prec_assignment(k) + + if !is_prec_assignment(t) && !is_compound_assignment return end + if k == K"~" if ps.space_sensitive && preceding_whitespace(t) && !preceding_whitespace(peek_token(ps, 2)) # Unary ~ in space sensitive context is not assignment precedence @@ -598,7 +590,7 @@ function parse_assignment_with_initial_ex(ps::ParseState, mark, down::T) where { # a .~ b ==> (dotcall-i a ~ b) # [a ~ b c] ==> (hcat (call-i a ~ b) c) # [a~b] ==> (vect (call-i a ~ b)) - bump_dotted(ps, isdot, remap_kind=K"Identifier") + bump_dotted(ps, isdot, t, remap_kind=K"Identifier") bump_trivia(ps) parse_assignment(ps, down) emit(ps, mark, isdot ? K"dotcall" : K"call", INFIX_FLAG) @@ -608,16 +600,20 @@ function parse_assignment_with_initial_ex(ps::ParseState, mark, down::T) where { # a += b ==> (+= a b) # a .= b ==> (.= a b) is_short_form_func = k == K"=" && !isdot && was_eventually_call(ps) - if k == K"op=" + if is_compound_assignment # x += y ==> (op= x + y) # x .+= y ==> (.op= x + y) bump_trivia(ps) - isdot && bump(ps, TRIVIA_FLAG) # TODO: NOTATION_FLAG - bump_split(ps, - (-1, K"Identifier", EMPTY_FLAGS), # op - (1, K"=", TRIVIA_FLAG)) + opmark = position(ps) + bump_dotted(ps, isdot, t, remap_kind=K"Identifier") + if is_compound_assignment && !is_prec_compound_assign(t) + emit(ps, opmark, K"error", + error="Compound assignment is not allowed for this operator") + end + bump(ps, TRIVIA_FLAG) # bump the = + k = K"op=" # Set k for the emit below else - bump_dotted(ps, isdot, TRIVIA_FLAG) + bump_dotted(ps, isdot, t, TRIVIA_FLAG) end bump_trivia(ps) # Syntax Edition TODO: We'd like to call `down` here when @@ -730,10 +726,10 @@ end function parse_arrow(ps::ParseState) mark = position(ps) parse_or(ps) - isdot, t = peek_dotted_op_token(ps) + isdot, isassign, t = peek_dotted_op_token(ps) k = kind(t) - if is_prec_arrow(k) - if kind(t) == K"-->" && !isdot && !is_suffixed(t) + if is_prec_arrow(t) + if kind(t) == K"-->" && !isdot # x --> y ==> (--> x y) # The only syntactic arrow bump(ps, TRIVIA_FLAG) parse_arrow(ps) @@ -743,7 +739,7 @@ function parse_arrow(ps::ParseState) # x <--> y ==> (call-i x <--> y) # x .--> y ==> (dotcall-i x --> y) # x -->₁ y ==> (call-i x -->₁ y) - bump_dotted(ps, isdot, remap_kind=K"Identifier") + bump_dotted(ps, isdot, t, remap_kind=K"Identifier") parse_arrow(ps) emit(ps, mark, isdot ? K"dotcall" : K"call", INFIX_FLAG) end @@ -768,10 +764,10 @@ end function parse_lazy_cond(ps::ParseState, down, is_op, self) mark = position(ps) down(ps) - (isdot, t) = peek_dotted_op_token(ps) + (isdot, isassign, t) = peek_dotted_op_token(ps) k = kind(t) - if is_op(k) - bump_dotted(ps, isdot, TRIVIA_FLAG) + if is_op(t) + bump_dotted(ps, isdot, t, TRIVIA_FLAG) self(ps) emit(ps, mark, isdot ? dotted(k) : k, flags(t)) if isdot @@ -815,11 +811,11 @@ function parse_comparison(ps::ParseState, subtype_comparison=false) n_comparisons = 0 op_pos = NO_POSITION op_dotted = false - (initial_dot, initial_tok) = peek_dotted_op_token(ps) - while ((isdot, t) = peek_dotted_op_token(ps); is_prec_comparison(t)) + (initial_dot, initial_isassign, initial_tok) = peek_dotted_op_token(ps) + while ((isdot, isassign, t) = peek_dotted_op_token(ps); is_prec_comparison(t)) n_comparisons += 1 op_dotted = isdot - op_pos = bump_dotted(ps, isdot, emit_dot_node=true, remap_kind=K"Identifier") + op_pos = bump_dotted(ps, isdot, t, emit_dot_node=true, remap_kind=K"Identifier") parse_pipe_lt(ps) end if n_comparisons == 1 @@ -873,15 +869,16 @@ end function parse_range(ps::ParseState) mark = position(ps) parse_invalid_ops(ps) - (initial_dot, initial_tok) = peek_dotted_op_token(ps) + + (initial_dot, initial_isassign, initial_tok) = peek_dotted_op_token(ps) initial_kind = kind(initial_tok) - if initial_kind != K":" && is_prec_colon(initial_kind) - # a..b ==> (call-i a .. b) + if initial_kind != K":" && (is_prec_colon(initial_tok) || (initial_dot && initial_kind == K".")) + # a..b ==> (call-i a (dots-2) b) # a … b ==> (call-i a … b) # a .… b ==> (dotcall-i a … b) - bump_dotted(ps, initial_dot, remap_kind=K"Identifier") + bump_dotted(ps, initial_dot, initial_tok, remap_kind=K"Identifier") parse_invalid_ops(ps) - emit(ps, mark, initial_dot ? K"dotcall" : K"call", INFIX_FLAG) + emit(ps, mark, (initial_dot && initial_kind != K".") ? K"dotcall" : K"call", INFIX_FLAG) elseif initial_kind == K":" && ps.range_colon_enabled # a ? b : c:d ==> (? a b (call-i c : d)) n_colons = 0 @@ -948,8 +945,10 @@ function parse_range(ps::ParseState) # x... ==> (... x) # x:y... ==> (... (call-i x : y)) # x..y... ==> (... (call-i x .. y)) # flisp parser fails here - if peek(ps) == K"..." + if peek(ps) == K"." && peek(ps, 2) == K"." && peek(ps, 3) == K"." bump(ps, TRIVIA_FLAG) + bump(ps, TRIVIA_FLAG) # second dot + bump(ps, TRIVIA_FLAG) # third dot emit(ps, mark, K"...") end end @@ -963,9 +962,9 @@ end function parse_invalid_ops(ps::ParseState) mark = position(ps) parse_expr(ps) - while ((isdot, t) = peek_dotted_op_token(ps); kind(t) in KSet"ErrorInvalidOperator Error**") + while ((isdot, isassign, t) = peek_dotted_op_token(ps); kind(t) in KSet"ErrorInvalidOperator Error**") bump_trivia(ps) - bump_dotted(ps, isdot) + bump_dotted(ps, isdot, t) parse_expr(ps) emit(ps, mark, isdot ? K"dotcall" : K"call", INFIX_FLAG) end @@ -993,7 +992,7 @@ end function parse_with_chains(ps::ParseState, down, is_op, chain_ops) mark = position(ps) down(ps) - while ((isdot, t) = peek_dotted_op_token(ps); is_op(kind(t))) + while ((isdot, isassign, t) = peek_dotted_op_token(ps); is_op(t) && !isassign) if ps.space_sensitive && preceding_whitespace(t) && is_both_unary_and_binary(t, isdot) && !preceding_whitespace(peek_token(ps, 2)) @@ -1006,9 +1005,9 @@ function parse_with_chains(ps::ParseState, down, is_op, chain_ops) # [x+y + z] ==> (vect (call-i x + y z)) break end - bump_dotted(ps, isdot, remap_kind=K"Identifier") + bump_dotted(ps, isdot, t, remap_kind=K"Identifier") down(ps) - if kind(t) in chain_ops && !is_suffixed(t) && !isdot + if kind(t) in chain_ops && !isdot # a + b + c ==> (call-i a + b c) # a + b .+ c ==> (dotcall-i (call-i a + b) + c) parse_chain(ps, down, kind(t)) @@ -1024,8 +1023,8 @@ end # flisp: parse-chain function parse_chain(ps::ParseState, down, op_kind) while true - isdot, t = peek_dotted_op_token(ps) - if kind(t) != op_kind || is_suffixed(t) || isdot + isdot, isassign, t = peek_dotted_op_token(ps) + if kind(t) != op_kind || isdot break end if ps.space_sensitive && preceding_whitespace(t) && @@ -1142,7 +1141,8 @@ function parse_juxtapose(ps::ParseState) is_syntactic_unary_op(prev_k) || is_initial_reserved_word(ps, prev_k) ))) && (!is_operator(k) || is_radical_op(k)) && - !is_closing_token(ps, k) + !is_closing_token(ps, k) && k != K"..." && + k != K"ErrorInvalidOperator" && k != K"Error**" if prev_k == K"string" || is_string_delim(t) bump_invisible(ps, K"error", TRIVIA_FLAG, error="cannot juxtapose string literal") @@ -1191,7 +1191,7 @@ end function parse_unary(ps::ParseState) mark = position(ps) bump_trivia(ps) - (op_dotted, op_t) = peek_dotted_op_token(ps) + (op_dotted, op_isassign, op_t) = peek_dotted_op_token(ps) op_k = kind(op_t) if ( !is_operator(op_k) || @@ -1209,12 +1209,12 @@ function parse_unary(ps::ParseState) end t2 = peek_token(ps, 2+op_dotted) k2 = kind(t2) - if op_k in KSet"- +" && !is_suffixed(op_t) && !op_dotted + if op_k in KSet"- +" && !op_dotted if !preceding_whitespace(t2) && (k2 in KSet"Integer Float Float32" || (op_k == K"+" && k2 in KSet"BinInt HexInt OctInt")) - k3 = peek(ps, 3) - if is_prec_power(k3) || k3 in KSet"[ {" + t3 = peek_token(ps, 3) + if is_prec_power(t3) || kind(t3) in KSet"[ {" # `[`, `{` (issue #18851) and `^` have higher precedence than # unary negation # -2^x ==> (call-pre - (call-i 2 ^ x)) @@ -1258,7 +1258,7 @@ function parse_unary(ps::ParseState) # # (The flisp parser only considers commas before `;` and thus gets this # last case wrong) - op_pos = bump_dotted(ps, op_dotted, emit_dot_node=true, remap_kind=K"Identifier") + op_pos = bump_dotted(ps, op_dotted, op_t, emit_dot_node=true, remap_kind=K"Identifier") space_before_paren = preceding_whitespace(t2) if space_before_paren @@ -1352,12 +1352,12 @@ function parse_unary(ps::ParseState) # -0x1 ==> (call-pre - 0x01) # - 2 ==> (call-pre - 2) # .-2 ==> (dotcall-pre - 2) - op_pos = bump_dotted(ps, op_dotted, remap_kind=K"Identifier") + op_pos = bump_dotted(ps, op_dotted, op_t, remap_kind=K"Identifier") else # /x ==> (call-pre (error /) x) # +₁ x ==> (call-pre (error +₁) x) # .<: x ==> (dotcall-pre (error (. <:)) x) - bump_dotted(ps, op_dotted, emit_dot_node=true, remap_kind=K"Identifier") + bump_dotted(ps, op_dotted, op_t, emit_dot_node=true, remap_kind=K"Identifier") op_pos = emit(ps, mark, K"error", error="not a unary operator") end parse_unary(ps) @@ -1387,8 +1387,8 @@ end # flisp: parse-factor-with-initial-ex function parse_factor_with_initial_ex(ps::ParseState, mark) parse_decl_with_initial_ex(ps, mark) - if ((isdot, t) = peek_dotted_op_token(ps); is_prec_power(kind(t))) - bump_dotted(ps, isdot, remap_kind=K"Identifier") + if ((isdot, isassign, t) = peek_dotted_op_token(ps); is_prec_power(t) && !isassign) + bump_dotted(ps, isdot, t, remap_kind=K"Identifier") parse_factor_after(ps) emit(ps, mark, isdot ? K"dotcall" : K"call", INFIX_FLAG) end @@ -1459,7 +1459,7 @@ end # flisp: parse-unary-prefix function parse_unary_prefix(ps::ParseState, has_unary_prefix=false) mark = position(ps) - (isdot, t) = peek_dotted_op_token(ps) + (isdot, isassign, t) = peek_dotted_op_token(ps) k = kind(t) if is_syntactic_unary_op(k) && !isdot k2 = peek(ps, 2) @@ -1751,7 +1751,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) maybe_strmac_1 = true emit(ps, mark, K".") end - elseif k == K"'" && !preceding_whitespace(t) + elseif is_prec_quote(t) && !preceding_whitespace(t) # f' ==> (call-post f ') # f'ᵀ ==> (call-post f 'ᵀ) bump(ps, remap_kind=K"Identifier") @@ -2146,15 +2146,28 @@ end function parse_global_local_const_vars(ps) mark = position(ps) n_commas = parse_comma(ps, false) - (isdot, t) = peek_dotted_op_token(ps) - if is_prec_assignment(t) + (isdot, isassign, t) = peek_dotted_op_token(ps) + + # Check if we have operator followed by = + is_compound_assignment = false + if is_operator(kind(t)) && !is_prec_assignment(t) + # Look ahead to see if next token is = + # For dotted operators like .+, we need to check token 3 + peek_pos = isdot ? 3 : 2 + t2 = peek_token(ps, peek_pos) + if kind(t2) == K"=" && !preceding_whitespace(t2) + is_compound_assignment = true + end + end + + if is_prec_assignment(t) || is_compound_assignment if n_commas >= 1 # const x,y = 1,2 ==> (const (= (tuple x y) (tuple 1 2))) emit(ps, mark, K"tuple") end # const x = 1 ==> (const (= x 1)) # global x ~ 1 ==> (global (call-i x ~ 1)) - # global x += 1 ==> (global (+= x 1)) + # global x += 1 ==> (global (op= x + 1)) parse_assignment_with_initial_ex(ps, mark, parse_comma) else # global x,y ==> (global x y) @@ -2232,7 +2245,7 @@ function parse_function_signature(ps::ParseState, is_function::Bool) # function (:)() end ==> (function (call (parens :)) (block)) # function (x::T)() end ==> (function (call (parens (::-i x T))) (block)) # function (::T)() end ==> (function (call (parens (::-pre T))) (block)) - # function (:*=(f))() end ==> (function (call (parens (call (quote-: *=) f))) (block)) + # function (:*=(f))() end ==> (function (call (parens (call (quote-: (op= *)) f))) (block)) emit(ps, mark, K"parens", PARENS_FLAG) end end @@ -2476,11 +2489,11 @@ function parse_import_atsym(ps::ParseState, allow_quotes=true) end end b = peek_behind(ps, pos) - if warn_parens && b.orig_kind != K".." + if warn_parens && b.kind != K"dots" emit_diagnostic(ps, mark, warning="parentheses are not required here") end ok = (b.is_leaf && (b.kind == K"Identifier" || is_operator(b.kind))) || - (!b.is_leaf && b.kind in KSet"$ var") + (!b.is_leaf && (b.kind in KSet"$ var" || b.kind == K"dots")) if !ok emit(ps, mark, K"error", error="expected identifier") end @@ -2589,10 +2602,6 @@ function parse_import_path(ps::ParseState) end if k == K"." bump(ps) - elseif k == K".." - bump_split(ps, (1,K".",EMPTY_FLAGS), (1,K".",EMPTY_FLAGS)) - elseif k == K"..." - bump_split(ps, (1,K".",EMPTY_FLAGS), (1,K".",EMPTY_FLAGS), (1,K".",EMPTY_FLAGS)) else break end @@ -2611,6 +2620,17 @@ function parse_import_path(ps::ParseState) # import A.⋆.f ==> (import (importpath A ⋆ f)) next_tok = peek_token(ps, 2) if is_operator(kind(next_tok)) + if kind(next_tok) == K"." && peek(ps, 3) == K"." + # Import the .. operator + # import A... ==> (import (importpath A (dots-2))) + bump_disallowed_space(ps) + bump(ps, TRIVIA_FLAG) + dotmark = position(ps) + bump(ps, TRIVIA_FLAG) + bump(ps, TRIVIA_FLAG) + emit(ps, dotmark, K"dots", set_numeric_flags(2)) + continue + end if preceding_whitespace(t) # Whitespace in import path allowed but discouraged # import A .== ==> (import (importpath A ==)) @@ -2623,10 +2643,6 @@ function parse_import_path(ps::ParseState) end bump(ps, TRIVIA_FLAG) parse_import_atsym(ps) - elseif k == K"..." - # Import the .. operator - # import A... ==> (import (importpath A ..)) - bump_split(ps, (1,K".",TRIVIA_FLAG), (2,K"..",EMPTY_FLAGS)) elseif k in KSet"NewlineWs ; , : EndMarker" # import A; B ==> (import (importpath A)) break @@ -3082,13 +3098,13 @@ function parse_paren(ps::ParseState, check_identifiers=true, has_unary_prefix=fa @check peek(ps) == K"(" bump(ps, TRIVIA_FLAG) # K"(" after_paren_mark = position(ps) - (isdot, tok) = peek_dotted_op_token(ps) + (isdot, isassign, tok) = peek_dotted_op_token(ps) k = kind(tok) if k == K")" # () ==> (tuple-p) bump(ps, TRIVIA_FLAG) emit(ps, mark, K"tuple", PARENS_FLAG) - elseif is_syntactic_operator(k) + elseif is_syntactic_operator(k) || isassign # allow :(=) etc in unchecked contexts, eg quotes # :(=) ==> (quote-: (parens =)) parse_atom(ps, check_identifiers) @@ -3486,7 +3502,7 @@ end function parse_atom(ps::ParseState, check_identifiers=true, has_unary_prefix=false) bump_trivia(ps) mark = position(ps) - (leading_dot, leading_tok) = peek_dotted_op_token(ps) + (leading_dot, leading_isassign, leading_tok) = peek_dotted_op_token(ps) leading_kind = kind(leading_tok) # todo: Reorder to put most likely tokens first? if leading_dot @@ -3496,6 +3512,16 @@ function parse_atom(ps::ParseState, check_identifiers=true, has_unary_prefix=fal # . ==> (error .) emit(ps, mark, K"error", error="invalid identifier") end + elseif kind(leading_tok) == K"." && peek(ps, 2) == K"." && peek(ps, 3) == K"." + # ... + bump(ps, TRIVIA_FLAG) + bump(ps, TRIVIA_FLAG) + bump(ps, TRIVIA_FLAG) + emit(ps, mark, K"dots", set_numeric_flags(3)) + if check_identifiers + # ... ==> (error ...) + emit(ps, mark, K"error", error="invalid identifier") + end elseif is_error(leading_kind) # Errors for bad tokens are emitted in validate_tokens() rather than # here. @@ -3583,16 +3609,27 @@ function parse_atom(ps::ParseState, check_identifiers=true, has_unary_prefix=fal @label is_operator # + ==> + # .+ ==> (. +) - bump_dotted(ps, leading_dot, emit_dot_node=true, remap_kind= + is_compound_assignment = !is_prec_assignment(leading_tok) && leading_isassign + bump_dotted(ps, leading_dot, leading_tok, emit_dot_node=!is_compound_assignment, remap_kind= is_syntactic_operator(leading_kind) ? leading_kind : K"Identifier") - if check_identifiers && !is_valid_identifier(leading_kind) - # += ==> (error (op= +)) + + # Check if this is a compound assignment operator pattern + if is_compound_assignment + bump(ps, TRIVIA_FLAG) # consume the = but mark as trivia + emit(ps, mark, leading_dot ? K".op=" : K"op=") + if check_identifiers + # += ==> (error (op= +)) + # .+= ==> (error (. (op= +))) + emit(ps, mark, K"error", error="invalid identifier") + end + # Quoted syntactic operators are allowed + # :+= ==> (quote-: (op= +)) + return + end + + if check_identifiers && !(is_valid_identifier(leading_kind) || (leading_dot && leading_kind == K".")) # ? ==> (error ?) - # .+= ==> (error (. (op= +))) emit(ps, mark, K"error", error="invalid identifier") - else - # Quoted syntactic operators allowed - # :+= ==> (quote-: (op= +)) end elseif is_keyword(leading_kind) if leading_kind == K"var" && (t = peek_token(ps,2); diff --git a/src/julia/tokenize.jl b/src/julia/tokenize.jl index 2bd0f56d..093c9713 100644 --- a/src/julia/tokenize.jl +++ b/src/julia/tokenize.jl @@ -2,10 +2,15 @@ module Tokenize export tokenize, untokenize -using ..JuliaSyntax: JuliaSyntax, Kind, @K_str, @KSet_str, @callsite_inline +using ..JuliaSyntax: JuliaSyntax, Kind, @K_str, @KSet_str, @callsite_inline, + generic_operators_by_level, PrecedenceLevel, PREC_NONE, PREC_ASSIGNMENT, + PREC_PAIRARROW, PREC_CONDITIONAL, PREC_ARROW, PREC_LAZYOR, PREC_LAZYAND, + PREC_COMPARISON, PREC_PIPE_LT, PREC_PIPE_GT, PREC_COLON, PREC_PLUS, + PREC_BITSHIFT, PREC_TIMES, PREC_RATIONAL, PREC_POWER, PREC_DECL, + PREC_WHERE, PREC_DOT, PREC_QUOTE, PREC_UNICODE_OPS, PREC_COMPOUND_ASSIGN import ..JuliaSyntax: kind, - is_literal, is_contextual_keyword, is_word_operator + is_literal, is_contextual_keyword, is_word_operator, is_operator #------------------------------------------------------------------------------- # Character-based predicates for tokenization @@ -72,32 +77,6 @@ end readchar(io::IO) = eof(io) ? EOF_CHAR : read(io, Char) -# Some unicode operators are normalized by the tokenizer into their equivalent -# kinds. See also normalize_identifier() -const _ops_with_unicode_aliases = [ - # \minus '−' is normalized into K"-", - '−' => K"-" - # Lookalikes which are normalized into K"⋅", - # https://github.com/JuliaLang/julia/pull/25157, - '\u00b7' => K"⋅" # '·' Middle Dot,, - '\u0387' => K"⋅" # '·' Greek Ano Teleia,, -] - -function _nondot_symbolic_operator_kinds() - op_range = reinterpret(UInt16, K"BEGIN_OPS"):reinterpret(UInt16, K"END_OPS") - setdiff(reinterpret.(Kind, op_range), [ - K"ErrorInvalidOperator" - K"Error**" - K"..." - K"." - K"where" - K"isa" - K"in" - K".'" - K"op=" - ]) -end - function _char_in_set_expr(varname, firstchars) codes = sort!(UInt32.(unique(firstchars))) terms = [] @@ -121,15 +100,14 @@ end if c == EOF_CHAR || !isvalid(c) return false end - u = UInt32(c) - return $(_char_in_set_expr(:u, - append!(first.(string.(_nondot_symbolic_operator_kinds())), - first.(_ops_with_unicode_aliases)))) + # Check if character is known operator char or in our unicode ops dictionary + return c in ('!', '#', '$', '%', '&', '*', '+', '-', '−', '/', ':', '<', '=', '>', '?', '@', '\\', '^', '|', '~', '÷', '⊻') || + haskey(_unicode_ops, c) end # Checks whether a Char is an operator which can be prefixed with a dot `.` function is_dottable_operator_start_char(c) - return c != '?' && c != '$' && c != ':' && c != '\'' && is_operator_start_char(c) + return c != '?' && c != '$' && c != ':' && c != '\'' && c != '#' && c != '@' && is_operator_start_char(c) end @eval function isopsuffix(c::Char) @@ -151,9 +129,9 @@ end end function optakessuffix(k) - (K"BEGIN_OPS" <= k <= K"END_OPS") && + # Most operators can take suffix except for specific ones + is_operator(k) && !( - k == K"..." || K"BEGIN_ASSIGNMENTS" <= k <= K"END_ASSIGNMENTS" || k == K"?" || k == K"<:" || @@ -162,10 +140,7 @@ function optakessuffix(k) k == K"||" || k == K"in" || k == K"isa" || - k == K"≔" || - k == K"⩴" || k == K":" || - k == K".." || k == K"$" || k == K"::" || k == K"where" || @@ -178,14 +153,16 @@ function optakessuffix(k) end const _unicode_ops = let - ks = _nondot_symbolic_operator_kinds() - ss = string.(ks) + # Map single-character unicode operators to their precedence levels + ops = Dict{Char, PrecedenceLevel}() - ops = Dict{Char, Kind}([first(s)=>k for (k,s) in zip(ks,ss) - if length(s) == 1 && !isascii(s[1])]) - for ck in _ops_with_unicode_aliases - push!(ops, ck) + # Add operators from generic_operators_by_level + for (prec, chars) in generic_operators_by_level + for c in chars + ops[c] = prec + end end + ops end @@ -197,12 +174,12 @@ struct RawToken # Offsets into a string or buffer startbyte::Int # The byte where the token start in the buffer endbyte::Int # The byte where the token ended in the buffer - suffix::Bool + op_precedence::PrecedenceLevel # If K"Operator", the operator's precedence level end function RawToken(kind::Kind, startbyte::Int, endbyte::Int) - RawToken(kind, startbyte, endbyte, false) + RawToken(kind, startbyte, endbyte, PREC_NONE) end -RawToken() = RawToken(K"error", 0, 0, false) +RawToken() = RawToken(K"error", 0, 0, PREC_NONE) const EMPTY_TOKEN = RawToken() @@ -427,17 +404,33 @@ end Returns a `RawToken` of kind `kind` with contents `str` and starts a new `RawToken`. """ -function emit(l::Lexer, kind::Kind, maybe_op=true) - suffix = false - if optakessuffix(kind) && maybe_op +function emit(l::Lexer, kind::Kind) + tok = RawToken(kind, startpos(l), position(l) - 1, PREC_NONE) + + l.last_token = kind + return tok +end + +function emit_operator(l::Lexer, kind::Kind, precedence::PrecedenceLevel, take_suffix=optakessuffix(kind)) + if take_suffix while isopsuffix(peekchar(l)) readchar(l) - suffix = true + kind = K"Operator" end end + tok = RawToken(kind, startpos(l), position(l) - 1, precedence) + + l.last_token = kind + return tok +end - tok = RawToken(kind, startpos(l), position(l) - 1, suffix) +""" + emit(l::Lexer, kind::Kind) +Returns a `RawToken` of kind `kind` with contents `str` and starts a new `RawToken`. +""" +function emit_trivia(l::Lexer, kind::Kind) + tok = RawToken(kind, startpos(l), position(l) - 1, PREC_NONE) l.last_token = kind return tok end @@ -450,9 +443,9 @@ Returns the next `RawToken`. function next_token(l::Lexer, start = true) start && start_token!(l) if !isempty(l.string_states) - lex_string_chunk(l) + return lex_string_chunk(l) else - _next_token(l, readchar(l)) + return _next_token(l, readchar(l)) end end @@ -525,18 +518,44 @@ function _next_token(l::Lexer, c) return lex_plus(l); elseif c == '-' return lex_minus(l); - elseif c == '−' # \minus '−' treated as hyphen '-' - return emit(l, accept(l, '=') ? K"op=" : K"-") elseif c == '`' return lex_backtick(l); + elseif c == '−' # \minus '−' treated as hyphen '-' + if peekchar(l) == '=' + return emit_operator(l, K"Operator", PREC_COMPOUND_ASSIGN) + else + return emit_operator(l, K"-", PREC_PLUS) + end + elseif c == '∈' + return emit_operator(l, K"∈", PREC_COMPARISON) + elseif c == '⋆' + return emit_operator(l, K"⋆", PREC_TIMES) + elseif c == '±' + return emit_operator(l, K"±", PREC_PLUS) + elseif c == '∓' + return emit_operator(l, K"∓", PREC_PLUS) + elseif c == '¬' + return emit(l, K"¬") + elseif c == '√' + return emit(l, K"√") + elseif c == '∛' + return emit(l, K"∛") + elseif c == '∜' + return emit(l, K"∜") + elseif c == '≔' + return emit_operator(l, K"≔", PREC_ASSIGNMENT) + elseif c == '⩴' + return emit_operator(l, K"⩴", PREC_ASSIGNMENT) + elseif c == '≕' + return emit_operator(l, K"≕", PREC_ASSIGNMENT) + elseif haskey(_unicode_ops, c) + return emit_operator(l, K"Operator", _unicode_ops[c]) elseif is_identifier_start_char(c) return lex_identifier(l, c) elseif isdigit(c) return lex_digit(l, K"Integer") - elseif (k = get(_unicode_ops, c, K"None")) != K"None" - return emit(l, k) else - emit(l, + return emit(l, !isvalid(c) ? K"ErrorInvalidUTF8" : is_invisible_char(c) ? K"ErrorInvisibleChar" : is_identifier_char(c) ? K"ErrorIdentifierStart" : @@ -642,7 +661,7 @@ function lex_string_chunk(l) K"\"\"\"" : K"```") else return emit(l, state.delim == '"' ? K"\"" : - state.delim == '`' ? K"`" : K"'", false) + state.delim == '`' ? K"`" : K"'") end end # Read a chunk of string characters @@ -741,7 +760,7 @@ function lex_whitespace(l::Lexer, c) end c = readchar(l) end - return emit(l, k) + return emit_trivia(l, k) end function lex_comment(l::Lexer) @@ -750,7 +769,8 @@ function lex_comment(l::Lexer) while true pc, ppc = dpeekchar(l) if pc == '\n' || (pc == '\r' && ppc == '\n') || pc == EOF_CHAR - return emit(l, valid ? K"Comment" : K"ErrorInvalidUTF8") + return valid ? emit_trivia(l, K"Comment") : + emit(l, K"ErrorInvalidUTF8") end valid &= isvalid(pc) readchar(l) @@ -782,7 +802,7 @@ function lex_comment(l::Lexer) outk = !valid ? K"ErrorInvalidUTF8" : bidi_state != init_bidi_state ? K"ErrorBidiFormatting" : K"Comment" - return emit(l, outk) + return valid ? emit_trivia(l, outk) : emit(l, outk) end end end @@ -795,54 +815,57 @@ end function lex_greater(l::Lexer) if accept(l, '>') if accept(l, '>') - if accept(l, '=') - return emit(l, K"op=") - else # >>>?, ? not a = - return emit(l, K">>>") + if peekchar(l) == '=' + return emit_operator(l, K"Operator", PREC_COMPOUND_ASSIGN) # >>>= + else + return emit_operator(l, K"Operator", PREC_BITSHIFT) # >>> end - elseif accept(l, '=') - return emit(l, K"op=") else - return emit(l, K">>") + if peekchar(l) == '=' + return emit_operator(l, K"Operator", PREC_COMPOUND_ASSIGN) # >>= + else + return emit_operator(l, K"Operator", PREC_BITSHIFT) # >> + end end elseif accept(l, '=') - return emit(l, K">=") + return emit_operator(l, K"Operator", PREC_COMPARISON) # >= elseif accept(l, ':') return emit(l, K">:") else - return emit(l, K">") + return emit_operator(l, K">", PREC_COMPARISON) end end # Lex a less char, a '<' has been consumed function lex_less(l::Lexer) if accept(l, '<') - if accept(l, '=') - return emit(l, K"op=") - else # '<') - return emit(l, K"<-->") + return emit_operator(l, K"Operator", PREC_ARROW) # <--> elseif accept(l, '-') return emit(l, K"ErrorInvalidOperator") else - return emit(l, K"<--") + return emit_operator(l, K"Operator", PREC_ARROW) # <-- end end else - return emit(l, K"<") + return emit_operator(l, K"<", PREC_COMPARISON) end end @@ -850,15 +873,12 @@ end # An '=' char has been consumed function lex_equal(l::Lexer) if accept(l, '=') - if accept(l, '=') - emit(l, K"===") - else - emit(l, K"==") - end + accept(l, '=') + return emit_operator(l, K"Operator", PREC_COMPARISON) # ==, === elseif accept(l, '>') - emit(l, K"=>") + return emit_operator(l, K"Operator", PREC_PAIRARROW) else - emit(l, K"=") + return emit(l, K"=") end end @@ -869,16 +889,16 @@ function lex_colon(l::Lexer) elseif accept(l, '=') return emit(l, K":=") else - return emit(l, K":") + return emit_operator(l, K":", PREC_COLON) end end function lex_exclaim(l::Lexer) if accept(l, '=') if accept(l, '=') - return emit(l, K"!==") + return emit_operator(l, K"Operator", PREC_COMPARISON) # !== else - return emit(l, K"!=") + return emit_operator(l, K"Operator", PREC_COMPARISON) # != end else return emit(l, K"!") @@ -886,84 +906,82 @@ function lex_exclaim(l::Lexer) end function lex_percent(l::Lexer) - if accept(l, '=') - return emit(l, K"op=") - else - return emit(l, K"%") - end + return emit_operator(l, K"Operator", peekchar(l) == '=' ? PREC_COMPOUND_ASSIGN : PREC_TIMES) end function lex_bar(l::Lexer) - if accept(l, '=') - return emit(l, K"op=") - elseif accept(l, '>') - return emit(l, K"|>") + if accept(l, '>') + return emit_operator(l, K"Operator", PREC_PIPE_GT) # |> elseif accept(l, '|') return emit(l, K"||") else - emit(l, K"|") + return emit_operator(l, K"Operator", peekchar(l) == '=' ? PREC_COMPOUND_ASSIGN : PREC_PLUS) end end function lex_plus(l::Lexer) if accept(l, '+') - return emit(l, K"++") - elseif accept(l, '=') - return emit(l, K"op=") + return emit_operator(l, K"++", PREC_PLUS) end - return emit(l, K"+") + # Check if followed by = for compound assignment + if peekchar(l) == '=' + return emit_operator(l, K"Operator", PREC_COMPOUND_ASSIGN) + end + return emit_operator(l, K"+", PREC_PLUS) end function lex_minus(l::Lexer) if accept(l, '-') if accept(l, '>') - return emit(l, K"-->") + return emit_operator(l, K"-->", PREC_ARROW) else return emit(l, K"ErrorInvalidOperator") # "--" is an invalid operator end elseif l.last_token != K"." && accept(l, '>') - return emit(l, K"->") - elseif accept(l, '=') - return emit(l, K"op=") + return emit_operator(l, K"->", PREC_ARROW) end - return emit(l, K"-") + # Check if followed by = for compound assignment + if peekchar(l) == '=' + return emit_operator(l, K"Operator", PREC_COMPOUND_ASSIGN) + end + return emit_operator(l, K"-", PREC_PLUS) end function lex_star(l::Lexer) if accept(l, '*') return emit(l, K"Error**") # "**" is an invalid operator use ^ - elseif accept(l, '=') - return emit(l, K"op=") end - return emit(l, K"*") + # Check if followed by = for compound assignment + if peekchar(l) == '=' + return emit_operator(l, K"Operator", PREC_COMPOUND_ASSIGN) + end + return emit_operator(l, K"*", PREC_TIMES) end function lex_circumflex(l::Lexer) - if accept(l, '=') - return emit(l, K"op=") + if peekchar(l) == '=' + return emit_operator(l, K"Operator", PREC_COMPOUND_ASSIGN) end - return emit(l, K"^") + return emit_operator(l, K"Operator", PREC_POWER) # ^ end function lex_division(l::Lexer) - if accept(l, '=') - return emit(l, K"op=") + if peekchar(l) == '=' + return emit_operator(l, K"Operator", PREC_COMPOUND_ASSIGN) end - return emit(l, K"÷") + return emit_operator(l, K"Operator", PREC_TIMES) # / end function lex_dollar(l::Lexer) - if accept(l, '=') - return emit(l, K"op=") + # Check if followed by = for compound assignment + if peekchar(l) == '=' + return emit_operator(l, K"Operator", PREC_COMPOUND_ASSIGN) end - return emit(l, K"$") + return emit_operator(l, K"$", PREC_PLUS) end function lex_xor(l::Lexer) - if accept(l, '=') - return emit(l, K"op=") - end - return emit(l, K"⊻") + return emit_operator(l, K"Operator", peekchar(l) == '=' ? PREC_COMPOUND_ASSIGN : PREC_PLUS) end function accept_number(l::Lexer, f::F) where {F} @@ -987,7 +1005,7 @@ function lex_digit(l::Lexer, kind) pc,ppc = dpeekchar(l) if pc == '.' if ppc == '.' - # Number followed by K".." or K"..." + # Number followed by K"." return emit(l, kind) elseif kind === K"Float" # If we enter the function with kind == K"Float" then a '.' has been parsed. @@ -1098,20 +1116,21 @@ function lex_prime(l) is_literal(l.last_token) # FIXME ^ This doesn't cover all cases - probably needs involvement # from the parser state. - return emit(l, K"'") + return emit_operator(l, K"'", PREC_QUOTE) else push!(l.string_states, StringState(false, true, '\'', 0)) - return emit(l, K"'", false) + return emit(l, K"'") end end function lex_amper(l::Lexer) if accept(l, '&') return emit(l, K"&&") - elseif accept(l, '=') - return emit(l, K"op=") else - return emit(l, K"&") + if peekchar(l) == '=' + return emit_operator(l, K"Operator", PREC_COMPOUND_ASSIGN) + end + return emit_operator(l, K"&", PREC_TIMES) end end @@ -1145,44 +1164,32 @@ end # Parse a token starting with a forward slash. # A '/' has been consumed function lex_forwardslash(l::Lexer) - if accept(l, '/') - if accept(l, '=') - return emit(l, K"op=") - else - return emit(l, K"//") - end - elseif accept(l, '=') - return emit(l, K"op=") - else - return emit(l, K"/") - end + prec = accept(l, '/') ? PREC_RATIONAL : PREC_TIMES + return emit_operator(l, K"Operator", peekchar(l) == '=' ? PREC_COMPOUND_ASSIGN : prec) # // or / end function lex_backslash(l::Lexer) - if accept(l, '=') - return emit(l, K"op=") + # Check if followed by = for compound assignment + if peekchar(l) == '=' + return emit_operator(l, K"Operator", PREC_COMPOUND_ASSIGN) # \ before = end - return emit(l, K"\\") + return emit_operator(l, K"Operator", PREC_TIMES) end function lex_dot(l::Lexer) - if accept(l, '.') + if l.last_token == K"@" if accept(l, '.') - l.last_token == K"@" && return emit(l, K"Identifier") - return emit(l, K"...") - else - if is_dottable_operator_start_char(peekchar(l)) + if !accept(l, '.') && is_dottable_operator_start_char(peekchar(l)) readchar(l) return emit(l, K"ErrorInvalidOperator") - else - l.last_token == K"@" && return emit(l, K"Identifier") - return emit(l, K"..") end end - elseif Base.isdigit(peekchar(l)) + # Emit `.`, `..` and `...` as identifiers after `@` + emit(l, K"Identifier") + elseif l.last_token != K"." && Base.isdigit(peekchar(l)) + # Only start a numeric constant if the previous token wasn't a dot return lex_digit(l, K"Float") else - l.last_token == K"@" && return emit(l, K"Identifier") return emit(l, K".") end end @@ -1237,11 +1244,12 @@ function lex_identifier(l::Lexer, c) end if n > MAX_KW_LENGTH - emit(l, K"Identifier") + return emit(l, K"Identifier") elseif h == _true_hash || h == _false_hash - emit(l, K"Bool") + return emit(l, K"Bool") else - emit(l, get(_kw_hash, h, K"Identifier")) + k = get(_kw_hash, h, K"Identifier") + return emit(l, k) end end diff --git a/test/expr.jl b/test/expr.jl index d7547848..dde93e34 100644 --- a/test/expr.jl +++ b/test/expr.jl @@ -14,6 +14,8 @@ @test parseatom(":(a)") == QuoteNode(:a) @test parseatom(":(:a)") == Expr(:quote, QuoteNode(:a)) @test parseatom(":(1+2)") == Expr(:quote, Expr(:call, :+, 1, 2)) + @test parseatom(":...") == QuoteNode(Symbol("...")) + @test parseatom(":(...)") == QuoteNode(Symbol("...")) # Compatibility hack for VERSION >= v"1.4" # https://github.com/JuliaLang/julia/pull/34077 @test parseatom(":true") == Expr(:quote, true) diff --git a/test/parser.jl b/test/parser.jl index 64ecc8ea..e54c8bc8 100644 --- a/test/parser.jl +++ b/test/parser.jl @@ -76,6 +76,8 @@ tests = [ "f(x) where S where U = 1" => "(function-= (where (where (call f x) S) U) 1)" "(f(x)::T) where S = 1" => "(function-= (where (parens (::-i (call f x) T)) S) 1)" "f(x) = 1 = 2" => "(function-= (call f x) (= 1 2))" # Should be a warning! + # Bad assignment with suffixed op + ((v = v"1.12",), "a +₁= b") => "(op= a (error +₁) b)" ], JuliaSyntax.parse_pair => [ "a => b" => "(call-i a => b)" @@ -141,14 +143,14 @@ tests = [ "1:\n2" => "(call-i 1 : (error))" ], JuliaSyntax.parse_range => [ - "a..b" => "(call-i a .. b)" + "a..b" => "(call-i a (dots-2) b)" "a … b" => "(call-i a … b)" "a .… b" => "(dotcall-i a … b)" "[1 :a]" => "(hcat 1 (quote-: a))" "[1 2:3 :a]" => "(hcat 1 (call-i 2 : 3) (quote-: a))" "x..." => "(... x)" "x:y..." => "(... (call-i x : y))" - "x..y..." => "(... (call-i x .. y))" + "x..y..." => "(... (call-i x (dots-2) y))" ], JuliaSyntax.parse_invalid_ops => [ "a--b" => "(call-i a (ErrorInvalidOperator) b)" @@ -617,7 +619,7 @@ tests = [ "function (::g(x))() end" => "(function (call (parens (::-pre (call g x)))) (block))" "function (f::T{g(i)})() end" => "(function (call (parens (::-i f (curly T (call g i))))) (block))" "function (::T)() end" => "(function (call (parens (::-pre T))) (block))" - "function (:*=(f))() end" => "(function (call (parens (call (quote-: *=) f))) (block))" + "function (:*=(f))() end" => "(function (call (parens (call (quote-: (op= *)) f))) (block))" "function begin() end" => "(function (call (error begin)) (block))" "function f() end" => "(function (call f) (block))" "function type() end" => "(function (call type) (block))" @@ -719,7 +721,7 @@ tests = [ "import A.:(+)" => "(import (importpath A (quote-: (parens +))))" "import A.==" => "(import (importpath A ==))" "import A.⋆.f" => "(import (importpath A ⋆ f))" - "import A..." => "(import (importpath A ..))" + "import A..." => "(import (importpath A (dots-2)))" "import A; B" => "(import (importpath A))" # Colons not allowed first in import paths # but are allowed in trailing components (#473) @@ -816,25 +818,25 @@ tests = [ "&&" => "(error &&)" "||" => "(error ||)" "." => "(error .)" - "..." => "(error ...)" - "+=" => "(error +=)" - "-=" => "(error -=)" - "*=" => "(error *=)" - "/=" => "(error /=)" - "//=" => "(error //=)" - "|=" => "(error |=)" - "^=" => "(error ^=)" - "÷=" => "(error ÷=)" - "%=" => "(error %=)" - "<<=" => "(error <<=)" - ">>=" => "(error >>=)" - ">>>="=> "(error >>>=)" - "\\=" => "(error \\=)" - "&=" => "(error &=)" - ":=" => "(error :=)" - "\$=" => "(error \$=)" - "⊻=" => "(error ⊻=)" - ".+=" => "(error (. +=))" + "..." => "(error (dots-3))" + "+=" => "(error (op= +))" + "-=" => "(error (op= -))" + "*=" => "(error (op= *))" + "/=" => "(error (op= /))" + "//=" => "(error (op= //))" + "|=" => "(error (op= |))" + "^=" => "(error (op= ^))" + "÷=" => "(error (op= ÷))" + "%=" => "(error (op= %))" + "<<=" => "(error (op= <<))" + ">>=" => "(error (op= >>))" + ">>>="=> "(error (op= >>>))" + "\\=" => "(error (op= \\))" + "&=" => "(error (op= &))" + ":=" => "(error :=)" # Assignment operator, not `:`-update + "\$=" => "(error (op= \$))" + "⊻=" => "(error (op= ⊻))" + ".+=" => "(error (.op= +))" # Normal operators "+" => "+" # Assignment-precedence operators which can be used as identifiers @@ -843,8 +845,8 @@ tests = [ "⩴" => "⩴" "≕" => "≕" # Quoted syntactic operators allowed - ":+=" => "(quote-: +=)" - ":.+=" => "(quote-: (. +=))" + ":+=" => "(quote-: (op= +))" + ":.+=" => "(quote-: (.op= +))" ":.=" => "(quote-: (. =))" ":.&&" => "(quote-: (. &&))" # Special symbols quoted @@ -1116,7 +1118,7 @@ parsestmt_test_specs = [ # detecting raw vs non-raw strings. The old parser was tightly coupled to # the lexer and the parser state was used to disambiguate these cases. "x in' '" => "(call-i x in (char (error)))" - "x in'``\$" => "(call-i x in (call-i (juxtapose (char '`' (error-t)) (cmdstring-r (error-t))) \$ (error)))" + "x in'``\$" => "(wrapper (call-i x in (juxtapose (char '`' (error-t)) (cmdstring-r (error-t)))) (error-t \$))" "var\"#\"`str`" => "(juxtapose (var # (error-t)) (cmdstring-r \"str\"))" "var\"#\"\"str\"" => "(juxtapose (var # (error-t)) (error-t) (string \"str\"))" @@ -1143,7 +1145,7 @@ parsestmt_with_kind_tests = [ "a → b" => "(call-i a::Identifier →::Identifier b::Identifier)" "a < b < c" => "(comparison a::Identifier <::Identifier b::Identifier <::Identifier c::Identifier)" "a .<: b"=> "(dotcall-i a::Identifier <:::Identifier b::Identifier)" - "a .. b" => "(call-i a::Identifier ..::Identifier b::Identifier)" + "a .. b" => "(call-i a::Identifier (dots-2) b::Identifier)" "a : b" => "(call-i a::Identifier :::Identifier b::Identifier)" "-2^x" => "(call-pre -::Identifier (call-i 2::Integer ^::Identifier x::Identifier))" "-(2)" => "(call-pre -::Identifier (parens 2::Integer))" @@ -1165,8 +1167,8 @@ parsestmt_with_kind_tests = [ "a += b" => "(op= a::Identifier +::Identifier b::Identifier)" "a .+= b" => "(.op= a::Identifier +::Identifier b::Identifier)" "a >>= b" => "(op= a::Identifier >>::Identifier b::Identifier)" - ":+=" => "(quote-: +=::op=)" - ":.+=" => "(quote-: (. +=::op=))" + ":+=" => "(quote-: (op= +::Identifier))" + ":.+=" => "(quote-: (.op= +::Identifier))" ] @testset "parser `Kind` remapping" begin diff --git a/test/parser_api.jl b/test/parser_api.jl index 10a09d3a..2496ed82 100644 --- a/test/parser_api.jl +++ b/test/parser_api.jl @@ -214,8 +214,10 @@ tokensplit(str; kws...) = [kind(tok) => untokenize(tok, str) for tok in tokenize K"Integer" => "1", ] - # A predicate based on flags() - @test JuliaSyntax.is_suffixed(tokenize("+₁")[1]) + # +₁ is tokenized as a single identifier token (subscripts are valid in identifiers) + tokens = tokenize("+₁") + @test length(tokens) == 1 # Just the identifier, endmarker is not included in tokenize() + @test kind(tokens[1]) == K"Identifier" # Buffer interface @test tokenize(Vector{UInt8}("a + b")) == tokenize("a + b") diff --git a/test/tokenize.jl b/test/tokenize.jl index 50891520..cea7927d 100644 --- a/test/tokenize.jl +++ b/test/tokenize.jl @@ -129,11 +129,11 @@ end # testset K"NewlineWs",K"Comment", - K"NewlineWs",K"Integer",K"%",K"Integer", + K"NewlineWs",K"Integer",K"Operator",K"Integer", - K"NewlineWs",K"Identifier",K"'",K"/",K"Identifier",K"'", + K"NewlineWs",K"Identifier",K"'",K"Operator",K"Identifier",K"'", - K"NewlineWs",K"Identifier",K".",K"'",K"\\",K"Identifier",K".",K"'", + K"NewlineWs",K"Identifier",K".",K"'",K"Operator",K"Identifier",K".",K"'", K"NewlineWs",K"`",K"CmdString",K"`", @@ -155,7 +155,7 @@ end # testset end # testset @testset "issue 5, '..'" begin - @test kind.(collect(tokenize("1.23..3.21"))) == [K"Float",K"..",K"Float",K"EndMarker"] + @test kind.(collect(tokenize("1.23..3.21"))) == [K"Float",K".",K".",K"Float",K"EndMarker"] end @testset "issue 17, >>" begin @@ -175,18 +175,28 @@ end end @testset "test added operators" begin - @test tok("1+=2", 2).kind == K"op=" - @test tok("1-=2", 2).kind == K"op=" - @test tok("1*=2", 2).kind == K"op=" - @test tok("1^=2", 2).kind == K"op=" - @test tok("1÷=2", 2).kind == K"op=" - @test tok("1\\=2", 2).kind == K"op=" - @test tok("1\$=2", 2).kind == K"op=" - @test tok("1⊻=2", 2).kind == K"op=" + # Compound assignments now emit separate operator and = tokens + # Operators emit as K"Operator" when followed by = + @test tok("1+=2", 2).kind == K"Operator" # + before = + @test tok("1+=2", 3).kind == K"=" + @test tok("1-=2", 2).kind == K"Operator" # - before = + @test tok("1-=2", 3).kind == K"=" + @test tok("1*=2", 2).kind == K"Operator" # * before = + @test tok("1*=2", 3).kind == K"=" + @test tok("1^=2", 2).kind == K"Operator" # ^ before = + @test tok("1^=2", 3).kind == K"=" + @test tok("1÷=2", 2).kind == K"Operator" # ÷ before = + @test tok("1÷=2", 3).kind == K"=" + @test tok("1\\=2", 2).kind == K"Operator" # \ before = + @test tok("1\\=2", 3).kind == K"=" + @test tok("1\$=2", 2).kind == K"Operator" # $ before = + @test tok("1\$=2", 3).kind == K"=" + @test tok("1⊻=2", 2).kind == K"Operator" # ⊻ before = + @test tok("1⊻=2", 3).kind == K"=" @test tok("1:=2", 2).kind == K":=" @test tok("1-->2", 2).kind == K"-->" - @test tok("1<--2", 2).kind == K"<--" - @test tok("1<-->2", 2).kind == K"<-->" + @test tok("1<--2", 2).kind == K"Operator" + @test tok("1<-->2", 2).kind == K"Operator" @test tok("1>:2", 2).kind == K">:" end @@ -584,9 +594,9 @@ end end @testset "modifying function names (!) followed by operator" begin - @test toks("a!=b") == ["a"=>K"Identifier", "!="=>K"!=", "b"=>K"Identifier"] - @test toks("a!!=b") == ["a!"=>K"Identifier", "!="=>K"!=", "b"=>K"Identifier"] - @test toks("!=b") == ["!="=>K"!=", "b"=>K"Identifier"] + @test toks("a!=b") == ["a"=>K"Identifier", "!="=>K"Operator", "b"=>K"Identifier"] + @test toks("a!!=b") == ["a!"=>K"Identifier", "!="=>K"Operator", "b"=>K"Identifier"] + @test toks("!=b") == ["!="=>K"Operator", "b"=>K"Identifier"] end @testset "integer literals" begin @@ -712,10 +722,10 @@ end @test toks("1.#") == ["1."=>K"Float", "#"=>K"Comment"] # ellipses - @test toks("1..") == ["1"=>K"Integer", ".."=>K".."] - @test toks("1...") == ["1"=>K"Integer", "..."=>K"..."] - @test toks(".1..") == [".1"=>K"Float", ".."=>K".."] - @test toks("0x01..") == ["0x01"=>K"HexInt", ".."=>K".."] + @test toks("1..") == ["1"=>K"Integer", "."=>K".", "."=>K"."] + @test toks("1...") == ["1"=>K"Integer", "."=>K".", "."=>K".", "."=>K"."] + @test toks(".1..") == [".1"=>K"Float", "."=>K".", "."=>K"."] + @test toks("0x01..") == ["0x01"=>K"HexInt", "."=>K".", "."=>K"."] # Dotted operators and other dotted suffixes @test toks("1234 .+1") == ["1234"=>K"Integer", " "=>K"Whitespace", "."=>K".", "+"=>K"+", "1"=>K"Integer"] @@ -725,11 +735,11 @@ end "f"=>K"Identifier", "("=>K"(", "a"=>K"Identifier", ")"=>K")"] @test toks("1234.0 .f(a)") == ["1234.0"=>K"Float", " "=>K"Whitespace", "."=>K".", "f"=>K"Identifier", "("=>K"(", "a"=>K"Identifier", ")"=>K")"] - @test toks("1f0./1") == ["1f0"=>K"Float32", "."=>K".", "/"=>K"/", "1"=>K"Integer"] + @test toks("1f0./1") == ["1f0"=>K"Float32", "."=>K".", "/"=>K"Operator", "1"=>K"Integer"] # Dotted operators after numeric constants are ok - @test toks("1e1.⫪") == ["1e1"=>K"Float", "."=>K".", "⫪"=>K"⫪"] - @test toks("1.1.⫪") == ["1.1"=>K"Float", "."=>K".", "⫪"=>K"⫪"] + @test toks("1e1.⫪") == ["1e1"=>K"Float", "."=>K".", "⫪"=>K"Operator"] + @test toks("1.1.⫪") == ["1.1"=>K"Float", "."=>K".", "⫪"=>K"Operator"] @test toks("1e1.−") == ["1e1"=>K"Float", "."=>K".", "−"=>K"-"] @test toks("1.1.−") == ["1.1"=>K"Float", "."=>K".", "−"=>K"-"] # Non-dottable operators are not ok @@ -739,8 +749,8 @@ end # Ambiguous dotted operators @test toks("1.+") == ["1."=>K"ErrorAmbiguousNumericConstant", "+"=>K"+"] @test toks("1.+ ") == ["1."=>K"ErrorAmbiguousNumericConstant", "+"=>K"+", " "=>K"Whitespace"] - @test toks("1.⤋") == ["1."=>K"ErrorAmbiguousNumericConstant", "⤋"=>K"⤋"] - @test toks("1.⫪") == ["1."=>K"ErrorAmbiguousNumericConstant", "⫪"=>K"⫪"] + @test toks("1.⤋") == ["1."=>K"ErrorAmbiguousNumericConstant", "⤋"=>K"Operator"] + @test toks("1.⫪") == ["1."=>K"ErrorAmbiguousNumericConstant", "⫪"=>K"Operator"] # non-dottable ops are the exception @test toks("1.:") == ["1."=>K"Float", ":"=>K":"] @test toks("1.\$") == ["1."=>K"Float", "\$"=>K"$"] @@ -793,9 +803,24 @@ end @test length(collect(tokenize(io))) == 4 end +function _nondot_symbolic_operator_kinds() + op_range = reinterpret(UInt16, K"BEGIN_OPS"):reinterpret(UInt16, K"END_OPS") + setdiff(reinterpret.(Kind, op_range), [ + K"ErrorInvalidOperator" + K"Error**" + K"." + K".." + K"where" + K"isa" + K"in" + K".'" + K"op=" + ]) +end + @testset "dotted and suffixed operators" begin -for opkind in Tokenize._nondot_symbolic_operator_kinds() +for opkind in _nondot_symbolic_operator_kinds() op = string(opkind) strs = [ 1 => [ # unary @@ -853,19 +878,21 @@ end @testset "Normalization of Unicode symbols" begin # https://github.com/JuliaLang/julia/pull/25157 - @test tok("\u00b7").kind == K"⋅" - @test tok("\u0387").kind == K"⋅" - @test toks(".\u00b7") == ["."=>K".", "\u00b7"=>K"⋅"] - @test toks(".\u0387") == ["."=>K".", "\u0387"=>K"⋅"] + @test tok("\u00b7").kind == K"Operator" + @test tok("\u0387").kind == K"Operator" + @test toks(".\u00b7") == ["."=>K".", "\u00b7"=>K"Operator"] + @test toks(".\u0387") == ["."=>K".", "\u0387"=>K"Operator"] # https://github.com/JuliaLang/julia/pull/40948 @test tok("−").kind == K"-" - @test tok("−=").kind == K"op=" + # −= now emits separate tokens + @test tok("−=").kind == K"Operator" # − before = + @test tok("−=", 2).kind == K"=" @test toks(".−") == ["."=>K".", "−"=>K"-"] end @testset "perp" begin - @test tok("1 ⟂ 2", 3).kind==K"⟂" + @test tok("1 ⟂ 2", 3).kind==K"Operator" end @testset "outer" begin @@ -876,8 +903,9 @@ end @test toks("--") == ["--"=>K"ErrorInvalidOperator"] @test toks("1**2") == ["1"=>K"Integer", "**"=>K"Error**", "2"=>K"Integer"] @test toks("a<---b") == ["a"=>K"Identifier", "<---"=>K"ErrorInvalidOperator", "b"=>K"Identifier"] - @test toks("a..+b") == ["a"=>K"Identifier", "..+"=>K"ErrorInvalidOperator", "b"=>K"Identifier"] - @test toks("a..−b") == ["a"=>K"Identifier", "..−"=>K"ErrorInvalidOperator", "b"=>K"Identifier"] + # These used to test for invalid operators ..+ and ..−, but now .. is tokenized as two dots + @test toks("a..+b") == ["a"=>K"Identifier", "."=>K".", "."=>K".", "+"=>K"+", "b"=>K"Identifier"] + @test toks("a..−b") == ["a"=>K"Identifier", "."=>K".", "."=>K".", "−"=>K"-", "b"=>K"Identifier"] end @testset "hat suffix" begin @@ -893,7 +921,7 @@ end @testset "circ arrow right op" begin s = "↻" - @test collect(tokenize(s))[1].kind == K"↻" + @test collect(tokenize(s))[1].kind == K"Operator" end @testset "invalid float" begin @@ -917,8 +945,8 @@ end raw"<|" raw"|>" raw": .. … ⁝ ⋮ ⋱ ⋰ ⋯" - raw"$ + - ¦ | ⊕ ⊖ ⊞ ⊟ ++ ∪ ∨ ⊔ ± ∓ ∔ ∸ ≏ ⊎ ⊻ ⊽ ⋎ ⋓ ⧺ ⧻ ⨈ ⨢ ⨣ ⨤ ⨥ ⨦ ⨧ ⨨ ⨩ ⨪ ⨫ ⨬ ⨭ ⨮ ⨹ ⨺ ⩁ ⩂ ⩅ ⩊ ⩌ ⩏ ⩐ ⩒ ⩔ ⩖ ⩗ ⩛ ⩝ ⩡ ⩢ ⩣" - raw"* / ⌿ ÷ % & ⋅ ∘ × \ ∩ ∧ ⊗ ⊘ ⊙ ⊚ ⊛ ⊠ ⊡ ⊓ ∗ ∙ ∤ ⅋ ≀ ⊼ ⋄ ⋆ ⋇ ⋉ ⋊ ⋋ ⋌ ⋏ ⋒ ⟑ ⦸ ⦼ ⦾ ⦿ ⧶ ⧷ ⨇ ⨰ ⨱ ⨲ ⨳ ⨴ ⨵ ⨶ ⨷ ⨸ ⨻ ⨼ ⨽ ⩀ ⩃ ⩄ ⩋ ⩍ ⩎ ⩑ ⩓ ⩕ ⩘ ⩚ ⩜ ⩞ ⩟ ⩠ ⫛ ⊍ ▷ ⨝ ⟕ ⟖ ⟗" + raw"$ + - | ⊕ ⊖ ⊞ ⊟ ++ ∪ ∨ ⊔ ± ∓ ∔ ∸ ≏ ⊎ ⊻ ⊽ ⋎ ⋓ ⧺ ⧻ ⨈ ⨢ ⨣ ⨤ ⨥ ⨦ ⨧ ⨨ ⨩ ⨪ ⨫ ⨬ ⨭ ⨮ ⨹ ⨺ ⩁ ⩂ ⩅ ⩊ ⩌ ⩏ ⩐ ⩒ ⩔ ⩖ ⩗ ⩛ ⩝ ⩡ ⩢ ⩣" + raw"* / ÷ % & ⋅ ∘ × \ ∩ ∧ ⊗ ⊘ ⊙ ⊚ ⊛ ⊠ ⊡ ⊓ ∗ ∙ ∤ ⅋ ≀ ⊼ ⋄ ⋆ ⋇ ⋉ ⋊ ⋋ ⋌ ⋏ ⋒ ⟑ ⦸ ⦼ ⦾ ⦿ ⧶ ⧷ ⨇ ⨰ ⨱ ⨲ ⨳ ⨴ ⨵ ⨶ ⨷ ⨸ ⨻ ⨼ ⨽ ⩀ ⩃ ⩄ ⩋ ⩍ ⩎ ⩑ ⩓ ⩕ ⩘ ⩚ ⩜ ⩞ ⩟ ⩠ ⫛ ⊍ ▷ ⨝ ⟕ ⟖ ⟗" raw"//" raw"<< >> >>>" raw"^ ↑ ↓ ⇵ ⟰ ⟱ ⤈ ⤉ ⤊ ⤋ ⤒ ⤓ ⥉ ⥌ ⥍ ⥏ ⥑ ⥔ ⥕ ⥘ ⥙ ⥜ ⥝ ⥠ ⥡ ⥣ ⥥ ⥮ ⥯ ↑ ↓" @@ -926,7 +954,7 @@ end raw"." ] if VERSION >= v"1.6.0" - push!(ops, raw"<-- <-->") + push!(ops, raw"<-- <--> ¦ ⌿") end if VERSION >= v"1.7.0" append!(ops, [