diff --git a/src/integration/expr.jl b/src/integration/expr.jl index c4fd8222..da9c67c9 100644 --- a/src/integration/expr.jl +++ b/src/integration/expr.jl @@ -246,6 +246,8 @@ function node_to_expr(cursor, source, txtbuf::Vector{UInt8}, txtbuf_offset::UInt val isa UInt128 ? Symbol("@uint128_str") : Symbol("@big_str") return Expr(:macrocall, GlobalRef(Core, macname), nothing, str) + elseif is_identifier(k) + return lower_identifier_name(val, k) else return val end @@ -294,27 +296,18 @@ function node_to_expr(cursor, source, txtbuf::Vector{UInt8}, txtbuf_offset::UInt nodehead, source) end -function adjust_macro_name!(retexpr::Union{Expr, Symbol}, k::Kind) - if !(retexpr isa Symbol) +function adjust_macro_name!(retexpr::Union{Expr, Symbol}) + if retexpr isa Symbol + return lower_identifier_name(retexpr, K"macro_name") + else retexpr::Expr - # can happen for incomplete or errors - (length(retexpr.args) < 2 || retexpr.head != :(.)) && return retexpr - arg2 = retexpr.args[2] - isa(arg2, QuoteNode) || return retexpr - retexpr.args[2] = QuoteNode(adjust_macro_name!(arg2.value, k)) - return retexpr - end - if k == K"macro_name" - if retexpr === Symbol(".") - return Symbol("@__dot__") - else - return Symbol("@$retexpr") + if length(retexpr.args) == 2 && retexpr.head == :(.) + arg2 = retexpr.args[2] + if isa(arg2, QuoteNode) && arg2.value isa Symbol + retexpr.args[2] = QuoteNode(lower_identifier_name(arg2.value, K"macro_name")) + end end - elseif k == K"macro_name_cmd" - return Symbol("@$(retexpr)_cmd") - else - @assert k == K"macro_name_str" - return Symbol("@$(retexpr)_str") + return retexpr end end @@ -334,8 +327,8 @@ end # However, errors can add additional errors tokens which we represent # as e.g. `Expr(:var, ..., Expr(:error))`. return retexpr.args[1] - elseif k in KSet"macro_name macro_name_cmd macro_name_str" - return adjust_macro_name!(retexpr.args[1], k) + elseif k == K"macro_name" + return adjust_macro_name!(retexpr.args[1]) elseif k == K"?" retexpr.head = :if elseif k == K"op=" && length(args) == 3 @@ -355,7 +348,7 @@ end elseif k == K"macrocall" if length(args) >= 2 a2 = args[2] - if @isexpr(a2, :macrocall) && kind(firstchildhead) == K"macro_name_cmd" + if @isexpr(a2, :macrocall) && kind(firstchildhead) == K"CmdMacroName" # Fix up for custom cmd macros like foo`x` args[2] = a2.args[3] end diff --git a/src/julia/kinds.jl b/src/julia/kinds.jl index 19a00eb2..96d78ad7 100644 --- a/src/julia/kinds.jl +++ b/src/julia/kinds.jl @@ -194,6 +194,10 @@ register_kinds!(JuliaSyntax, 0, [ "BEGIN_IDENTIFIERS" "Identifier" "Placeholder" # Used for empty catch variables, and all-underscore identifiers in lowering + # String and command macro names are modeled as a special kind of + # identifier as they need to be mangled before lookup. + "StrMacroName" + "CmdMacroName" "END_IDENTIFIERS" "BEGIN_KEYWORDS" @@ -1039,10 +1043,7 @@ register_kinds!(JuliaSyntax, 0, [ "iteration" "comprehension" "typed_comprehension" - # Macro names "macro_name" - "macro_name_cmd" - "macro_name_str" # Container for a single statement/atom plus any trivia and errors "wrapper" "END_SYNTAX_KINDS" @@ -1106,6 +1107,9 @@ const _nonunique_kind_names = Set([ K"String" K"Char" K"CmdString" + + K"StrMacroName" + K"CmdMacroName" ]) """ diff --git a/src/julia/literal_parsing.jl b/src/julia/literal_parsing.jl index 0d716e39..1db36d7f 100644 --- a/src/julia/literal_parsing.jl +++ b/src/julia/literal_parsing.jl @@ -416,7 +416,7 @@ function parse_julia_literal(txtbuf::Vector{UInt8}, head::SyntaxHead, srcrange) parse_int_literal(val_str) elseif k in KSet"BinInt OctInt HexInt" parse_uint_literal(val_str, k) - elseif k == K"Identifier" || k == K"Placeholder" + elseif is_identifier(k) if has_flags(head, RAW_STRING_FLAG) io = IOBuffer() unescape_raw_string(io, txtbuf, first(srcrange), last(srcrange)+1, false) @@ -442,3 +442,33 @@ function parse_julia_literal(txtbuf::Vector{UInt8}, head::SyntaxHead, srcrange) end end +""" + lower_identifier_name(name, kind) + +Lower a Julia identifier `name` of given `kind` to the name used by the Julia +runtime. (In particular, this handles the name mangling of macros.) + +This is a lowering (rather than parsing) step, but is needed for `Expr` +conversion and is also used for pretty printing. +""" +function lower_identifier_name(name::AbstractString, k::Kind) + # Replicate eager lowering done by the flisp parser + if k == K"macro_name" + name == "." ? "@__dot__" : "@$name" + elseif k == K"StrMacroName" + "@$(name)_str" + elseif k == K"CmdMacroName" + "@$(name)_cmd" + else + name + end +end + +function lower_identifier_name(name::Symbol, k::Kind) + if k == K"Identifier" + name # avoid unnecessary conversion + else + Symbol(lower_identifier_name(string(name), k)) + end +end + diff --git a/src/julia/parser.jl b/src/julia/parser.jl index 2abed160..a2ce4209 100644 --- a/src/julia/parser.jl +++ b/src/julia/parser.jl @@ -1519,7 +1519,8 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) macro_atname_range = nothing # $A.@x ==> (macrocall (. ($ A) (macro_name x))) maybe_strmac = true - last_identifier_orig_kind = peek_behind(ps).orig_kind + last_identifier_pos = peek_behind_pos(ps) + last_identifier_orig_kind = peek_behind(ps, last_identifier_pos).orig_kind while true maybe_strmac_1 = false t = peek_token(ps) @@ -1577,7 +1578,6 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # f (a) ==> (call f (error-t) a) processing_macro_name = maybe_parsed_macro_name( ps, processing_macro_name, mark) - processing_macro_name = false bump_disallowed_space(ps) bump(ps, TRIVIA_FLAG) opts = parse_call_arglist(ps, K")") @@ -1714,7 +1714,8 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) bump(ps, TRIVIA_FLAG) end parse_macro_name(ps) - last_identifier_orig_kind = peek_behind(ps).orig_kind + last_identifier_pos = peek_behind_pos(ps) + last_identifier_orig_kind = peek_behind(ps, last_identifier_pos).orig_kind !is_macrocall && emit(ps, m, K"macro_name") macro_atname_range = (m, position(ps)) is_macrocall = true @@ -1747,7 +1748,8 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) emit(ps, macro_name_mark, K"macro_name") misplaced_atsym_mark = (aterror_mark, position(ps)) end - last_identifier_orig_kind = peek_behind(ps).orig_kind + last_identifier_pos = peek_behind_pos(ps) + last_identifier_orig_kind = peek_behind(ps, last_identifier_pos).orig_kind maybe_strmac_1 = true emit(ps, mark, K".") end @@ -1784,29 +1786,29 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) origk = last_identifier_orig_kind; origk == K"Identifier" || is_contextual_keyword(origk) || is_word_operator(origk)) # Custom string and command literals - # x"str" ==> (macrocall (macro_name_str x) (string-r "str")) - # x`str` ==> (macrocall (macro_name_cmd x) (cmdstring-r "str")) - # x"" ==> (macrocall (macro_name_str x) (string-r "")) - # x`` ==> (macrocall (macro_name_cmd x) (cmdstring-r "")) + # x"str" ==> (macrocall @x_str (string-r "str")) + # x`str` ==> (macrocall @x_cmd (cmdstring-r "str")) + # x"" ==> (macrocall @x_str (string-r "")) + # x`` ==> (macrocall @x_cmd (cmdstring-r "")) # Triple quoted processing for custom strings - # r"""\nx""" ==> (macrocall (macro_name_str r) (string-s-r "x")) - # r"""\n x\n y""" ==> (macrocall (macro_name_str r) (string-s-r "x\n" "y")) - # r"""\n x\\n y""" ==> (macrocall (macro_name_str r) (string-s-r "x\\\n" "y")) + # r"""\nx""" ==> (macrocall @r_str (string-s-r "x")) + # r"""\n x\n y""" ==> (macrocall @r_str (string-s-r "x\n" "y")) + # r"""\n x\\n y""" ==> (macrocall @r_str (string-s-r "x\\\n" "y")) # # Use a special token kind for string and cmd macro names so the # names can be expanded later as necessary. - outk = is_string_delim(k) ? K"macro_name_str" : K"macro_name_cmd" - emit(ps, mark, outk) + name_kind = is_string_delim(k) ? K"StrMacroName" : K"CmdMacroName" + reset_node!(ps, last_identifier_pos, kind=name_kind) parse_string(ps, true) t = peek_token(ps) k = kind(t) if !preceding_whitespace(t) && is_string_macro_suffix(k) # Macro suffixes can include keywords and numbers - # x"s"y ==> (macrocall (macro_name_str x) (string-r "s") "y") - # x"s"end ==> (macrocall (macro_name_str x) (string-r "s") "end") - # x"s"in ==> (macrocall (macro_name_str x) (string-r "s") "in") - # x"s"2 ==> (macrocall (macro_name_str x) (string-r "s") 2) - # x"s"10.0 ==> (macrocall (macro_name_str x) (string-r "s") 10.0) + # x"s"y ==> (macrocall @x_str (string-r "s") "y") + # x"s"end ==> (macrocall @x_str (string-r "s") "end") + # x"s"in ==> (macrocall @x_str (string-r "s") "in") + # x"s"2 ==> (macrocall @x_str (string-r "s") 2) + # x"s"10.0 ==> (macrocall @x_str (string-r "s") 10.0) suffix_kind = (k == K"Identifier" || is_keyword(k) || is_word_operator(k)) ? K"String" : k bump(ps, remap_kind=suffix_kind) diff --git a/src/porcelain/syntax_tree.jl b/src/porcelain/syntax_tree.jl index 4ad22cf6..ad08b25e 100644 --- a/src/porcelain/syntax_tree.jl +++ b/src/porcelain/syntax_tree.jl @@ -198,7 +198,7 @@ sourcefile(node::AbstractSyntaxNode) = node.source function leaf_string(ex) if !is_leaf(ex) - throw(ArgumentError("_value_string should be used for leaf nodes only")) + throw(ArgumentError("leaf_string should be used for leaf nodes only")) end k = kind(ex) value = ex.val @@ -243,7 +243,12 @@ function _show_syntax_node_sexpr(io, node::AbstractSyntaxNode, show_kind) if is_error(node) print(io, "(", untokenize(head(node)), ")") else - print(io, leaf_string(node)) + str = leaf_string(node) + k = kind(node) + if is_identifier(k) && !show_kind + str = lower_identifier_name(str, k) + end + print(io, str) if show_kind print(io, "::", kind(node)) end diff --git a/test/parser.jl b/test/parser.jl index 64ecc8ea..a6ee4b62 100644 --- a/test/parser.jl +++ b/test/parser.jl @@ -463,28 +463,30 @@ tests = [ "S{a,b}" => "(curly S a b)" "T{y for x = xs; a}" => "(curly T (generator y (iteration (in x xs))) (parameters a))" # String macros - "x\"str\"" => """(macrocall (macro_name_str x) (string-r "str"))""" - "x`str`" => """(macrocall (macro_name_cmd x) (cmdstring-r "str"))""" - "x\"\"" => """(macrocall (macro_name_str x) (string-r ""))""" - "x``" => """(macrocall (macro_name_cmd x) (cmdstring-r ""))""" - "in\"str\"" => """(macrocall (macro_name_str in) (string-r "str"))""" - "outer\"str\"" => """(macrocall (macro_name_str outer) (string-r "str"))""" + "x\"str\"" => """(macrocall @x_str (string-r "str"))""" + "x`str`" => """(macrocall @x_cmd (cmdstring-r "str"))""" + "x\"\"" => """(macrocall @x_str (string-r ""))""" + "x``" => """(macrocall @x_cmd (cmdstring-r ""))""" + "in\"str\"" => """(macrocall @in_str (string-r "str"))""" + "outer\"str\"" => """(macrocall @outer_str (string-r "str"))""" + "A.x\"str\"" => """(macrocall (. A @x_str) (string-r "str"))""" + "A.x`str`" => """(macrocall (. A @x_cmd) (cmdstring-r "str"))""" # Triple quoted processing for custom strings - "r\"\"\"\nx\"\"\"" => raw"""(macrocall (macro_name_str r) (string-s-r "x"))""" - "r\"\"\"\n x\n y\"\"\"" => raw"""(macrocall (macro_name_str r) (string-s-r "x\n" "y"))""" - "r\"\"\"\n x\\\n y\"\"\"" => raw"""(macrocall (macro_name_str r) (string-s-r "x\\\n" "y"))""" + "r\"\"\"\nx\"\"\"" => raw"""(macrocall @r_str (string-s-r "x"))""" + "r\"\"\"\n x\n y\"\"\"" => raw"""(macrocall @r_str (string-s-r "x\n" "y"))""" + "r\"\"\"\n x\\\n y\"\"\"" => raw"""(macrocall @r_str (string-s-r "x\\\n" "y"))""" # Macro suffixes can include keywords and numbers - "x\"s\"y" => """(macrocall (macro_name_str x) (string-r "s") "y")""" - "x\"s\"end" => """(macrocall (macro_name_str x) (string-r "s") "end")""" - "x\"s\"in" => """(macrocall (macro_name_str x) (string-r "s") "in")""" - "x\"s\"2" => """(macrocall (macro_name_str x) (string-r "s") 2)""" - "x\"s\"10.0" => """(macrocall (macro_name_str x) (string-r "s") 10.0)""" + "x\"s\"y" => """(macrocall @x_str (string-r "s") "y")""" + "x\"s\"end" => """(macrocall @x_str (string-r "s") "end")""" + "x\"s\"in" => """(macrocall @x_str (string-r "s") "in")""" + "x\"s\"2" => """(macrocall @x_str (string-r "s") 2)""" + "x\"s\"10.0" => """(macrocall @x_str (string-r "s") 10.0)""" # Cmd macro suffixes - "x`s`y" => """(macrocall (macro_name_cmd x) (cmdstring-r "s") "y")""" - "x`s`end" => """(macrocall (macro_name_cmd x) (cmdstring-r "s") "end")""" - "x`s`in" => """(macrocall (macro_name_cmd x) (cmdstring-r "s") "in")""" - "x`s`2" => """(macrocall (macro_name_cmd x) (cmdstring-r "s") 2)""" - "x`s`10.0" => """(macrocall (macro_name_cmd x) (cmdstring-r "s") 10.0)""" + "x`s`y" => """(macrocall @x_cmd (cmdstring-r "s") "y")""" + "x`s`end" => """(macrocall @x_cmd (cmdstring-r "s") "end")""" + "x`s`in" => """(macrocall @x_cmd (cmdstring-r "s") "in")""" + "x`s`2" => """(macrocall @x_cmd (cmdstring-r "s") 2)""" + "x`s`10.0" => """(macrocall @x_cmd (cmdstring-r "s") 10.0)""" ], JuliaSyntax.parse_resword => [ # In normal_context @@ -1167,6 +1169,9 @@ parsestmt_with_kind_tests = [ "a >>= b" => "(op= a::Identifier >>::Identifier b::Identifier)" ":+=" => "(quote-: +=::op=)" ":.+=" => "(quote-: (. +=::op=))" + # str/cmd macro name kinds + "x\"str\"" => """(macrocall x::StrMacroName (string-r "str"::String))""" + "x`str`" => """(macrocall x::CmdMacroName (cmdstring-r "str"::CmdString))""" ] @testset "parser `Kind` remapping" begin @@ -1195,8 +1200,8 @@ end # ɛµ normalizes to εμ @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "\u025B\u00B5()") == "(call \u03B5\u03BC)" @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "@\u025B\u00B5") == "(macrocall (macro_name \u03B5\u03BC))" - @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "\u025B\u00B5\"\"") == "(macrocall (macro_name_str \u03B5\u03BC) (string-r \"\"))" - @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "\u025B\u00B5``") == "(macrocall (macro_name_cmd \u03B5\u03BC) (cmdstring-r \"\"))" + @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "\u025B\u00B5\"\"") == "(macrocall @\u03B5\u03BC_str (string-r \"\"))" + @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "\u025B\u00B5``") == "(macrocall @\u03B5\u03BC_cmd (cmdstring-r \"\"))" # · and · normalize to ⋅ @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "a \u00B7 b") == "(call-i a \u22C5 b)" @test parse_to_sexpr_str(JuliaSyntax.parse_eq, "a \u0387 b") == "(call-i a \u22C5 b)"