Skip to content

Commit 1320b6e

Browse files
authored
Revert to the use of StrMacroName/CmdMacroName kinds (#583)
Trying out `macro_name_str` / `macro_name_cmd` in JuliaLowering it turns out to be inconvenient to work with identifiers which change their meaning due to being nested inside another construct. This change revert to the previous use of StrMacroName / CmdMacroName identifier kinds. macro_name is left as-is because it faithfully represents the position of the `@`. This isn't a complete reversion to the previous JuliaSyntax behavior. Previously, SyntaxNode would contain the symbol `@x_str` for the string macro `x` in `x"hi"` despite having kind set to `StrMacroName`. However, appending the `_str` is best seen as a symbolic lowering (/name mangling) step which isn't reflected in the source code and shouldn't be the business of the parser or parser-related tools. Thus, in the code here we defer this name mangling to the `Expr` conversion step instead, and introduce `lower_identifier_name()` as a standard way to do this conversion. To go with these slightly modified semantics and to mimic the `_str` name mangling, `StrMacroName` replaces the previous `StringMacroName`.
1 parent cc18f83 commit 1320b6e

File tree

6 files changed

+106
-67
lines changed

6 files changed

+106
-67
lines changed

src/integration/expr.jl

Lines changed: 15 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,8 @@ function node_to_expr(cursor, source, txtbuf::Vector{UInt8}, txtbuf_offset::UInt
246246
val isa UInt128 ? Symbol("@uint128_str") :
247247
Symbol("@big_str")
248248
return Expr(:macrocall, GlobalRef(Core, macname), nothing, str)
249+
elseif is_identifier(k)
250+
return lower_identifier_name(val, k)
249251
else
250252
return val
251253
end
@@ -294,27 +296,18 @@ function node_to_expr(cursor, source, txtbuf::Vector{UInt8}, txtbuf_offset::UInt
294296
nodehead, source)
295297
end
296298

297-
function adjust_macro_name!(retexpr::Union{Expr, Symbol}, k::Kind)
298-
if !(retexpr isa Symbol)
299+
function adjust_macro_name!(retexpr::Union{Expr, Symbol})
300+
if retexpr isa Symbol
301+
return lower_identifier_name(retexpr, K"macro_name")
302+
else
299303
retexpr::Expr
300-
# can happen for incomplete or errors
301-
(length(retexpr.args) < 2 || retexpr.head != :(.)) && return retexpr
302-
arg2 = retexpr.args[2]
303-
isa(arg2, QuoteNode) || return retexpr
304-
retexpr.args[2] = QuoteNode(adjust_macro_name!(arg2.value, k))
305-
return retexpr
306-
end
307-
if k == K"macro_name"
308-
if retexpr === Symbol(".")
309-
return Symbol("@__dot__")
310-
else
311-
return Symbol("@$retexpr")
304+
if length(retexpr.args) == 2 && retexpr.head == :(.)
305+
arg2 = retexpr.args[2]
306+
if isa(arg2, QuoteNode) && arg2.value isa Symbol
307+
retexpr.args[2] = QuoteNode(lower_identifier_name(arg2.value, K"macro_name"))
308+
end
312309
end
313-
elseif k == K"macro_name_cmd"
314-
return Symbol("@$(retexpr)_cmd")
315-
else
316-
@assert k == K"macro_name_str"
317-
return Symbol("@$(retexpr)_str")
310+
return retexpr
318311
end
319312
end
320313

@@ -334,8 +327,8 @@ end
334327
# However, errors can add additional errors tokens which we represent
335328
# as e.g. `Expr(:var, ..., Expr(:error))`.
336329
return retexpr.args[1]
337-
elseif k in KSet"macro_name macro_name_cmd macro_name_str"
338-
return adjust_macro_name!(retexpr.args[1], k)
330+
elseif k == K"macro_name"
331+
return adjust_macro_name!(retexpr.args[1])
339332
elseif k == K"?"
340333
retexpr.head = :if
341334
elseif k == K"op=" && length(args) == 3
@@ -355,7 +348,7 @@ end
355348
elseif k == K"macrocall"
356349
if length(args) >= 2
357350
a2 = args[2]
358-
if @isexpr(a2, :macrocall) && kind(firstchildhead) == K"macro_name_cmd"
351+
if @isexpr(a2, :macrocall) && kind(firstchildhead) == K"CmdMacroName"
359352
# Fix up for custom cmd macros like foo`x`
360353
args[2] = a2.args[3]
361354
end

src/julia/kinds.jl

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -194,6 +194,10 @@ register_kinds!(JuliaSyntax, 0, [
194194
"BEGIN_IDENTIFIERS"
195195
"Identifier"
196196
"Placeholder" # Used for empty catch variables, and all-underscore identifiers in lowering
197+
# String and command macro names are modeled as a special kind of
198+
# identifier as they need to be mangled before lookup.
199+
"StrMacroName"
200+
"CmdMacroName"
197201
"END_IDENTIFIERS"
198202

199203
"BEGIN_KEYWORDS"
@@ -1039,10 +1043,7 @@ register_kinds!(JuliaSyntax, 0, [
10391043
"iteration"
10401044
"comprehension"
10411045
"typed_comprehension"
1042-
# Macro names
10431046
"macro_name"
1044-
"macro_name_cmd"
1045-
"macro_name_str"
10461047
# Container for a single statement/atom plus any trivia and errors
10471048
"wrapper"
10481049
"END_SYNTAX_KINDS"
@@ -1106,6 +1107,9 @@ const _nonunique_kind_names = Set([
11061107
K"String"
11071108
K"Char"
11081109
K"CmdString"
1110+
1111+
K"StrMacroName"
1112+
K"CmdMacroName"
11091113
])
11101114

11111115
"""

src/julia/literal_parsing.jl

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -416,7 +416,7 @@ function parse_julia_literal(txtbuf::Vector{UInt8}, head::SyntaxHead, srcrange)
416416
parse_int_literal(val_str)
417417
elseif k in KSet"BinInt OctInt HexInt"
418418
parse_uint_literal(val_str, k)
419-
elseif k == K"Identifier" || k == K"Placeholder"
419+
elseif is_identifier(k)
420420
if has_flags(head, RAW_STRING_FLAG)
421421
io = IOBuffer()
422422
unescape_raw_string(io, txtbuf, first(srcrange), last(srcrange)+1, false)
@@ -442,3 +442,33 @@ function parse_julia_literal(txtbuf::Vector{UInt8}, head::SyntaxHead, srcrange)
442442
end
443443
end
444444

445+
"""
446+
lower_identifier_name(name, kind)
447+
448+
Lower a Julia identifier `name` of given `kind` to the name used by the Julia
449+
runtime. (In particular, this handles the name mangling of macros.)
450+
451+
This is a lowering (rather than parsing) step, but is needed for `Expr`
452+
conversion and is also used for pretty printing.
453+
"""
454+
function lower_identifier_name(name::AbstractString, k::Kind)
455+
# Replicate eager lowering done by the flisp parser
456+
if k == K"macro_name"
457+
name == "." ? "@__dot__" : "@$name"
458+
elseif k == K"StrMacroName"
459+
"@$(name)_str"
460+
elseif k == K"CmdMacroName"
461+
"@$(name)_cmd"
462+
else
463+
name
464+
end
465+
end
466+
467+
function lower_identifier_name(name::Symbol, k::Kind)
468+
if k == K"Identifier"
469+
name # avoid unnecessary conversion
470+
else
471+
Symbol(lower_identifier_name(string(name), k))
472+
end
473+
end
474+

src/julia/parser.jl

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1519,7 +1519,8 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
15191519
macro_atname_range = nothing
15201520
# $A.@x ==> (macrocall (. ($ A) (macro_name x)))
15211521
maybe_strmac = true
1522-
last_identifier_orig_kind = peek_behind(ps).orig_kind
1522+
last_identifier_pos = peek_behind_pos(ps)
1523+
last_identifier_orig_kind = peek_behind(ps, last_identifier_pos).orig_kind
15231524
while true
15241525
maybe_strmac_1 = false
15251526
t = peek_token(ps)
@@ -1577,7 +1578,6 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
15771578
# f (a) ==> (call f (error-t) a)
15781579
processing_macro_name = maybe_parsed_macro_name(
15791580
ps, processing_macro_name, mark)
1580-
processing_macro_name = false
15811581
bump_disallowed_space(ps)
15821582
bump(ps, TRIVIA_FLAG)
15831583
opts = parse_call_arglist(ps, K")")
@@ -1714,7 +1714,8 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
17141714
bump(ps, TRIVIA_FLAG)
17151715
end
17161716
parse_macro_name(ps)
1717-
last_identifier_orig_kind = peek_behind(ps).orig_kind
1717+
last_identifier_pos = peek_behind_pos(ps)
1718+
last_identifier_orig_kind = peek_behind(ps, last_identifier_pos).orig_kind
17181719
!is_macrocall && emit(ps, m, K"macro_name")
17191720
macro_atname_range = (m, position(ps))
17201721
is_macrocall = true
@@ -1747,7 +1748,8 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
17471748
emit(ps, macro_name_mark, K"macro_name")
17481749
misplaced_atsym_mark = (aterror_mark, position(ps))
17491750
end
1750-
last_identifier_orig_kind = peek_behind(ps).orig_kind
1751+
last_identifier_pos = peek_behind_pos(ps)
1752+
last_identifier_orig_kind = peek_behind(ps, last_identifier_pos).orig_kind
17511753
maybe_strmac_1 = true
17521754
emit(ps, mark, K".")
17531755
end
@@ -1784,29 +1786,29 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
17841786
origk = last_identifier_orig_kind;
17851787
origk == K"Identifier" || is_contextual_keyword(origk) || is_word_operator(origk))
17861788
# Custom string and command literals
1787-
# x"str" ==> (macrocall (macro_name_str x) (string-r "str"))
1788-
# x`str` ==> (macrocall (macro_name_cmd x) (cmdstring-r "str"))
1789-
# x"" ==> (macrocall (macro_name_str x) (string-r ""))
1790-
# x`` ==> (macrocall (macro_name_cmd x) (cmdstring-r ""))
1789+
# x"str" ==> (macrocall @x_str (string-r "str"))
1790+
# x`str` ==> (macrocall @x_cmd (cmdstring-r "str"))
1791+
# x"" ==> (macrocall @x_str (string-r ""))
1792+
# x`` ==> (macrocall @x_cmd (cmdstring-r ""))
17911793
# Triple quoted processing for custom strings
1792-
# r"""\nx""" ==> (macrocall (macro_name_str r) (string-s-r "x"))
1793-
# r"""\n x\n y""" ==> (macrocall (macro_name_str r) (string-s-r "x\n" "y"))
1794-
# r"""\n x\\n y""" ==> (macrocall (macro_name_str r) (string-s-r "x\\\n" "y"))
1794+
# r"""\nx""" ==> (macrocall @r_str (string-s-r "x"))
1795+
# r"""\n x\n y""" ==> (macrocall @r_str (string-s-r "x\n" "y"))
1796+
# r"""\n x\\n y""" ==> (macrocall @r_str (string-s-r "x\\\n" "y"))
17951797
#
17961798
# Use a special token kind for string and cmd macro names so the
17971799
# names can be expanded later as necessary.
1798-
outk = is_string_delim(k) ? K"macro_name_str" : K"macro_name_cmd"
1799-
emit(ps, mark, outk)
1800+
name_kind = is_string_delim(k) ? K"StrMacroName" : K"CmdMacroName"
1801+
reset_node!(ps, last_identifier_pos, kind=name_kind)
18001802
parse_string(ps, true)
18011803
t = peek_token(ps)
18021804
k = kind(t)
18031805
if !preceding_whitespace(t) && is_string_macro_suffix(k)
18041806
# Macro suffixes can include keywords and numbers
1805-
# x"s"y ==> (macrocall (macro_name_str x) (string-r "s") "y")
1806-
# x"s"end ==> (macrocall (macro_name_str x) (string-r "s") "end")
1807-
# x"s"in ==> (macrocall (macro_name_str x) (string-r "s") "in")
1808-
# x"s"2 ==> (macrocall (macro_name_str x) (string-r "s") 2)
1809-
# x"s"10.0 ==> (macrocall (macro_name_str x) (string-r "s") 10.0)
1807+
# x"s"y ==> (macrocall @x_str (string-r "s") "y")
1808+
# x"s"end ==> (macrocall @x_str (string-r "s") "end")
1809+
# x"s"in ==> (macrocall @x_str (string-r "s") "in")
1810+
# x"s"2 ==> (macrocall @x_str (string-r "s") 2)
1811+
# x"s"10.0 ==> (macrocall @x_str (string-r "s") 10.0)
18101812
suffix_kind = (k == K"Identifier" || is_keyword(k) ||
18111813
is_word_operator(k)) ? K"String" : k
18121814
bump(ps, remap_kind=suffix_kind)

src/porcelain/syntax_tree.jl

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -198,7 +198,7 @@ sourcefile(node::AbstractSyntaxNode) = node.source
198198

199199
function leaf_string(ex)
200200
if !is_leaf(ex)
201-
throw(ArgumentError("_value_string should be used for leaf nodes only"))
201+
throw(ArgumentError("leaf_string should be used for leaf nodes only"))
202202
end
203203
k = kind(ex)
204204
value = ex.val
@@ -243,7 +243,12 @@ function _show_syntax_node_sexpr(io, node::AbstractSyntaxNode, show_kind)
243243
if is_error(node)
244244
print(io, "(", untokenize(head(node)), ")")
245245
else
246-
print(io, leaf_string(node))
246+
str = leaf_string(node)
247+
k = kind(node)
248+
if is_identifier(k) && !show_kind
249+
str = lower_identifier_name(str, k)
250+
end
251+
print(io, str)
247252
if show_kind
248253
print(io, "::", kind(node))
249254
end

test/parser.jl

Lines changed: 26 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -463,28 +463,30 @@ tests = [
463463
"S{a,b}" => "(curly S a b)"
464464
"T{y for x = xs; a}" => "(curly T (generator y (iteration (in x xs))) (parameters a))"
465465
# String macros
466-
"x\"str\"" => """(macrocall (macro_name_str x) (string-r "str"))"""
467-
"x`str`" => """(macrocall (macro_name_cmd x) (cmdstring-r "str"))"""
468-
"x\"\"" => """(macrocall (macro_name_str x) (string-r ""))"""
469-
"x``" => """(macrocall (macro_name_cmd x) (cmdstring-r ""))"""
470-
"in\"str\"" => """(macrocall (macro_name_str in) (string-r "str"))"""
471-
"outer\"str\"" => """(macrocall (macro_name_str outer) (string-r "str"))"""
466+
"x\"str\"" => """(macrocall @x_str (string-r "str"))"""
467+
"x`str`" => """(macrocall @x_cmd (cmdstring-r "str"))"""
468+
"x\"\"" => """(macrocall @x_str (string-r ""))"""
469+
"x``" => """(macrocall @x_cmd (cmdstring-r ""))"""
470+
"in\"str\"" => """(macrocall @in_str (string-r "str"))"""
471+
"outer\"str\"" => """(macrocall @outer_str (string-r "str"))"""
472+
"A.x\"str\"" => """(macrocall (. A @x_str) (string-r "str"))"""
473+
"A.x`str`" => """(macrocall (. A @x_cmd) (cmdstring-r "str"))"""
472474
# Triple quoted processing for custom strings
473-
"r\"\"\"\nx\"\"\"" => raw"""(macrocall (macro_name_str r) (string-s-r "x"))"""
474-
"r\"\"\"\n x\n y\"\"\"" => raw"""(macrocall (macro_name_str r) (string-s-r "x\n" "y"))"""
475-
"r\"\"\"\n x\\\n y\"\"\"" => raw"""(macrocall (macro_name_str r) (string-s-r "x\\\n" "y"))"""
475+
"r\"\"\"\nx\"\"\"" => raw"""(macrocall @r_str (string-s-r "x"))"""
476+
"r\"\"\"\n x\n y\"\"\"" => raw"""(macrocall @r_str (string-s-r "x\n" "y"))"""
477+
"r\"\"\"\n x\\\n y\"\"\"" => raw"""(macrocall @r_str (string-s-r "x\\\n" "y"))"""
476478
# Macro suffixes can include keywords and numbers
477-
"x\"s\"y" => """(macrocall (macro_name_str x) (string-r "s") "y")"""
478-
"x\"s\"end" => """(macrocall (macro_name_str x) (string-r "s") "end")"""
479-
"x\"s\"in" => """(macrocall (macro_name_str x) (string-r "s") "in")"""
480-
"x\"s\"2" => """(macrocall (macro_name_str x) (string-r "s") 2)"""
481-
"x\"s\"10.0" => """(macrocall (macro_name_str x) (string-r "s") 10.0)"""
479+
"x\"s\"y" => """(macrocall @x_str (string-r "s") "y")"""
480+
"x\"s\"end" => """(macrocall @x_str (string-r "s") "end")"""
481+
"x\"s\"in" => """(macrocall @x_str (string-r "s") "in")"""
482+
"x\"s\"2" => """(macrocall @x_str (string-r "s") 2)"""
483+
"x\"s\"10.0" => """(macrocall @x_str (string-r "s") 10.0)"""
482484
# Cmd macro suffixes
483-
"x`s`y" => """(macrocall (macro_name_cmd x) (cmdstring-r "s") "y")"""
484-
"x`s`end" => """(macrocall (macro_name_cmd x) (cmdstring-r "s") "end")"""
485-
"x`s`in" => """(macrocall (macro_name_cmd x) (cmdstring-r "s") "in")"""
486-
"x`s`2" => """(macrocall (macro_name_cmd x) (cmdstring-r "s") 2)"""
487-
"x`s`10.0" => """(macrocall (macro_name_cmd x) (cmdstring-r "s") 10.0)"""
485+
"x`s`y" => """(macrocall @x_cmd (cmdstring-r "s") "y")"""
486+
"x`s`end" => """(macrocall @x_cmd (cmdstring-r "s") "end")"""
487+
"x`s`in" => """(macrocall @x_cmd (cmdstring-r "s") "in")"""
488+
"x`s`2" => """(macrocall @x_cmd (cmdstring-r "s") 2)"""
489+
"x`s`10.0" => """(macrocall @x_cmd (cmdstring-r "s") 10.0)"""
488490
],
489491
JuliaSyntax.parse_resword => [
490492
# In normal_context
@@ -1167,6 +1169,9 @@ parsestmt_with_kind_tests = [
11671169
"a >>= b" => "(op= a::Identifier >>::Identifier b::Identifier)"
11681170
":+=" => "(quote-: +=::op=)"
11691171
":.+=" => "(quote-: (. +=::op=))"
1172+
# str/cmd macro name kinds
1173+
"x\"str\"" => """(macrocall x::StrMacroName (string-r "str"::String))"""
1174+
"x`str`" => """(macrocall x::CmdMacroName (cmdstring-r "str"::CmdString))"""
11701175
]
11711176

11721177
@testset "parser `Kind` remapping" begin
@@ -1195,8 +1200,8 @@ end
11951200
# ɛµ normalizes to εμ
11961201
@test parse_to_sexpr_str(JuliaSyntax.parse_eq, "\u025B\u00B5()") == "(call \u03B5\u03BC)"
11971202
@test parse_to_sexpr_str(JuliaSyntax.parse_eq, "@\u025B\u00B5") == "(macrocall (macro_name \u03B5\u03BC))"
1198-
@test parse_to_sexpr_str(JuliaSyntax.parse_eq, "\u025B\u00B5\"\"") == "(macrocall (macro_name_str \u03B5\u03BC) (string-r \"\"))"
1199-
@test parse_to_sexpr_str(JuliaSyntax.parse_eq, "\u025B\u00B5``") == "(macrocall (macro_name_cmd \u03B5\u03BC) (cmdstring-r \"\"))"
1203+
@test parse_to_sexpr_str(JuliaSyntax.parse_eq, "\u025B\u00B5\"\"") == "(macrocall @\u03B5\u03BC_str (string-r \"\"))"
1204+
@test parse_to_sexpr_str(JuliaSyntax.parse_eq, "\u025B\u00B5``") == "(macrocall @\u03B5\u03BC_cmd (cmdstring-r \"\"))"
12001205
# · and · normalize to ⋅
12011206
@test parse_to_sexpr_str(JuliaSyntax.parse_eq, "a \u00B7 b") == "(call-i a \u22C5 b)"
12021207
@test parse_to_sexpr_str(JuliaSyntax.parse_eq, "a \u0387 b") == "(call-i a \u22C5 b)"

0 commit comments

Comments
 (0)