Skip to content

Commit 6b3664c

Browse files
committed
Tweak macro name representation
The current representation for macro names is a bit peculiar. When the parser encounters `@a`, it treats `@` as notation for the macrocall and then `reset_node!`'s (which itself may be considerd a bit of a code smell) the `a` to a special MacroName token kind that (when translated back to julia Expr) implicitly adds back the `@`. Things get even more preculiar with `@var"a"` where only the token inside the string macro gets reset. One particular consequence of this is JuliaLang/julia#58885, because our translation back to Expr does not check the RAW_STRING_FLAG (whereas the translation for K"Identifier" does). A second issue is that we currently parse `@A.b.c` and `A.b.@c` to the same SyntaxTree (of course the green tree is different). We aren't currently being super precise about the required invariants for syntax trees, but in general it would be desirable for non-trivia notation (like `@`) to be precisely recoverable from the tree, which is not the case here. This is especially annoying because there are syntax cases that are errors for one of these, but not the other (e.g. `@A.b.$` is an error, but `A.B.@$` is allowed). Now, I think the wisdom of some of those syntax choices can be debated, but that is the situation we face. So this PR tries to clean that all up a bit by: - Replacing the terminal K"MacroName" by a non-terminal K"macro_name". With this form, `@A.c` parses as `(macro_name (. A c))` while `A.@c` parses as `(. A (macro_name c))`. - (In particular the `@` notation is now always associated with the macro_name). - Emitting the dots in `@..` and `@...` as direct identifier tokens rather than having to reset them back. - Adjusting everything else accordingly. Partially written by Claude Code, though it had some trouble with the actual code changes.
1 parent e02f29f commit 6b3664c

File tree

9 files changed

+162
-173
lines changed

9 files changed

+162
-173
lines changed

src/integration/expr.jl

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -246,8 +246,6 @@ function node_to_expr(cursor, source, txtbuf::Vector{UInt8}, txtbuf_offset::UInt
246246
val isa UInt128 ? Symbol("@uint128_str") :
247247
Symbol("@big_str")
248248
return Expr(:macrocall, GlobalRef(Core, macname), nothing, str)
249-
elseif k == K"MacroName" && val === Symbol("@.")
250-
return Symbol("@__dot__")
251249
else
252250
return val
253251
end
@@ -296,6 +294,30 @@ function node_to_expr(cursor, source, txtbuf::Vector{UInt8}, txtbuf_offset::UInt
296294
nodehead, source)
297295
end
298296

297+
function adjust_macro_name!(retexpr::Union{Expr, Symbol}, k::Kind)
298+
if !(retexpr isa Symbol)
299+
retexpr::Expr
300+
# can happen for incomplete or errors
301+
(length(retexpr.args) < 2 || retexpr.head != :(.)) && return retexpr
302+
arg2 = retexpr.args[2]
303+
isa(arg2, QuoteNode) || return retexpr
304+
retexpr.args[2] = QuoteNode(adjust_macro_name!(arg2.value, k))
305+
return retexpr
306+
end
307+
if k == K"macro_name"
308+
if retexpr === Symbol(".")
309+
return Symbol("@__dot__")
310+
else
311+
return Symbol("@$retexpr")
312+
end
313+
elseif k == K"macro_name_cmd"
314+
return Symbol("@$(retexpr)_cmd")
315+
else
316+
@assert k == K"macro_name_str"
317+
return Symbol("@$(retexpr)_str")
318+
end
319+
end
320+
299321
# Split out from the above for codesize reasons, to avoid specialization on multiple
300322
# tree types.
301323
@noinline function _node_to_expr(retexpr::Expr, loc::LineNumberNode,
@@ -312,6 +334,8 @@ end
312334
# However, errors can add additional errors tokens which we represent
313335
# as e.g. `Expr(:var, ..., Expr(:error))`.
314336
return retexpr.args[1]
337+
elseif k in KSet"macro_name macro_name_cmd macro_name_str"
338+
return adjust_macro_name!(retexpr.args[1], k)
315339
elseif k == K"?"
316340
retexpr.head = :if
317341
elseif k == K"op=" && length(args) == 3
@@ -331,7 +355,7 @@ end
331355
elseif k == K"macrocall"
332356
if length(args) >= 2
333357
a2 = args[2]
334-
if @isexpr(a2, :macrocall) && kind(firstchildhead) == K"CmdMacroName"
358+
if @isexpr(a2, :macrocall) && kind(firstchildhead) == K"macro_name_cmd"
335359
# Fix up for custom cmd macros like foo`x`
336360
args[2] = a2.args[3]
337361
end

src/julia/kinds.jl

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -194,15 +194,6 @@ register_kinds!(JuliaSyntax, 0, [
194194
"BEGIN_IDENTIFIERS"
195195
"Identifier"
196196
"Placeholder" # Used for empty catch variables, and all-underscore identifiers in lowering
197-
# Macro names are modelled as special kinds of identifiers because the full
198-
# macro name may not appear as characters in the source: The `@` may be
199-
# detached from the macro name as in `@A.x` (ugh!!), or have a _str or _cmd
200-
# suffix appended.
201-
"BEGIN_MACRO_NAMES"
202-
"MacroName"
203-
"StringMacroName"
204-
"CmdMacroName"
205-
"END_MACRO_NAMES"
206197
"END_IDENTIFIERS"
207198

208199
"BEGIN_KEYWORDS"
@@ -1048,6 +1039,10 @@ register_kinds!(JuliaSyntax, 0, [
10481039
"iteration"
10491040
"comprehension"
10501041
"typed_comprehension"
1042+
# Macro names
1043+
"macro_name"
1044+
"macro_name_cmd"
1045+
"macro_name_str"
10511046
# Container for a single statement/atom plus any trivia and errors
10521047
"wrapper"
10531048
"END_SYNTAX_KINDS"
@@ -1111,10 +1106,6 @@ const _nonunique_kind_names = Set([
11111106
K"String"
11121107
K"Char"
11131108
K"CmdString"
1114-
1115-
K"MacroName"
1116-
K"StringMacroName"
1117-
K"CmdMacroName"
11181109
])
11191110

11201111
"""
@@ -1201,7 +1192,6 @@ is_prec_unicode_ops(x) = K"BEGIN_UNICODE_OPS" <= kind(x) <= K"END_UNICODE_OPS"
12011192
is_prec_pipe_lt(x) = kind(x) == K"<|"
12021193
is_prec_pipe_gt(x) = kind(x) == K"|>"
12031194
is_syntax_kind(x) = K"BEGIN_SYNTAX_KINDS"<= kind(x) <= K"END_SYNTAX_KINDS"
1204-
is_macro_name(x) = K"BEGIN_MACRO_NAMES" <= kind(x) <= K"END_MACRO_NAMES"
12051195
is_syntactic_assignment(x) = K"BEGIN_SYNTACTIC_ASSIGNMENTS" <= kind(x) <= K"END_SYNTACTIC_ASSIGNMENTS"
12061196

12071197
function is_string_delim(x)

src/julia/literal_parsing.jl

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -430,12 +430,6 @@ function parse_julia_literal(txtbuf::Vector{UInt8}, head::SyntaxHead, srcrange)
430430
Symbol(normalize_identifier(val_str))
431431
elseif k == K"error"
432432
ErrorVal()
433-
elseif k == K"MacroName"
434-
Symbol("@$(normalize_identifier(val_str))")
435-
elseif k == K"StringMacroName"
436-
Symbol("@$(normalize_identifier(val_str))_str")
437-
elseif k == K"CmdMacroName"
438-
Symbol("@$(normalize_identifier(val_str))_cmd")
439433
elseif is_syntax_kind(head)
440434
nothing
441435
elseif is_keyword(k)

src/julia/parser.jl

Lines changed: 29 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1505,6 +1505,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
15051505
# 2(x) ==> (* 2 x)
15061506
return
15071507
end
1508+
is_macrocall_on_entry = is_macrocall
15081509
# source range of the @-prefixed part of a macro
15091510
macro_atname_range = nothing
15101511
# $A.@x ==> (macrocall (. ($ A) @x))
@@ -1533,7 +1534,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
15331534
# A.@var"#" a ==> (macrocall (. A (var @#)) a)
15341535
# @+x y ==> (macrocall @+ x y)
15351536
# [email protected] ==> (macrocall (. A @.) x)
1536-
fix_macro_name_kind!(ps, macro_name_position)
1537+
is_macrocall_on_entry && emit(ps, mark, K"macro_name")
15371538
let ps = with_space_sensitive(ps)
15381539
# Space separated macro arguments
15391540
# A.@foo a b ==> (macrocall (. A @foo) a b)
@@ -1566,6 +1567,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
15661567
# f(a; b; c) ==> (call f a (parameters b) (parameters c))
15671568
# (a=1)() ==> (call (parens (= a 1)))
15681569
# f (a) ==> (call f (error-t) a)
1570+
is_macrocall_on_entry && emit(ps, mark, K"macro_name")
15691571
bump_disallowed_space(ps)
15701572
bump(ps, TRIVIA_FLAG)
15711573
opts = parse_call_arglist(ps, K")")
@@ -1580,11 +1582,13 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
15801582
# @x(a, b) ==> (macrocall-p @x a b)
15811583
# A.@x(y) ==> (macrocall-p (. A @x) y)
15821584
# A.@x(y).z ==> (. (macrocall-p (. A @x) y) z)
1583-
fix_macro_name_kind!(ps, macro_name_position)
15841585
is_macrocall = false
1586+
# @f()() ==> (call (macrocall-p (macro_name f)))
1587+
is_macrocall_on_entry = false
15851588
macro_atname_range = nothing
15861589
end
15871590
elseif k == K"["
1591+
is_macrocall_on_entry && emit(ps, mark, K"macro_name")
15881592
m = position(ps)
15891593
# a [i] ==> (ref a (error-t) i)
15901594
bump_disallowed_space(ps)
@@ -1599,7 +1603,6 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
15991603
# @S[a].b ==> (. (macrocall @S (vect a)) b)
16001604
#v1.7: @S[a ;; b] ==> (macrocall @S (ncat-2 a b))
16011605
#v1.6: @S[a ;; b] ==> (macrocall @S (error (ncat-2 a b)))
1602-
fix_macro_name_kind!(ps, macro_name_position)
16031606
emit(ps, m, ckind, cflags | set_numeric_flags(dim))
16041607
check_ncat_compat(ps, m, ckind)
16051608
emit(ps, mark, K"macrocall")
@@ -1643,12 +1646,18 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
16431646
emit_diagnostic(ps, macro_atname_range...,
16441647
error="`@` must appear on first or last macro name component")
16451648
bump(ps, TRIVIA_FLAG, error="Unexpected `.` after macro name")
1649+
# Recover by treating the `@` as if it had been on the wole thing
1650+
reset_node!(ps, macro_atname_range[2], kind=K"TOMBSTONE")
1651+
is_macrocall_on_entry = true
16461652
else
16471653
bump(ps, TRIVIA_FLAG)
16481654
end
16491655
k = peek(ps)
16501656
if k == K"("
16511657
if is_macrocall
1658+
is_macrocall_on_entry && emit(ps, mark, K"macro_name")
1659+
# Recover by pretending we do have the syntax
1660+
is_macrocall_on_entry = false
16521661
# @M.(x) ==> (macrocall (dotcall @M (error-t) x))
16531662
bump_invisible(ps, K"error", TRIVIA_FLAG)
16541663
emit_diagnostic(ps, mark,
@@ -1677,7 +1686,11 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
16771686
m = position(ps)
16781687
bump(ps, TRIVIA_FLAG)
16791688
parse_atom(ps)
1680-
emit(ps, m, K"$")
1689+
if is_macrocall
1690+
emit(ps, m, K"error", error="invalid macro name")
1691+
else
1692+
emit(ps, m, K"$")
1693+
end
16811694
macro_name_position = position(ps)
16821695
emit(ps, mark, K".")
16831696
elseif k == K"@"
@@ -1690,11 +1703,12 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
16901703
bump(ps, TRIVIA_FLAG, error="repeated `@` in macro module path")
16911704
else
16921705
bump(ps, TRIVIA_FLAG)
1693-
is_macrocall = true
16941706
end
16951707
parse_macro_name(ps)
16961708
macro_name_position = position(ps)
1709+
!is_macrocall && emit(ps, m, K"macro_name")
16971710
macro_atname_range = (m, position(ps))
1711+
is_macrocall = true
16981712
emit(ps, mark, K".")
16991713
elseif k == K"'"
17001714
# f.' => (dotcall-post f (error '))
@@ -1717,6 +1731,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
17171731
bump(ps, remap_kind=K"Identifier")
17181732
emit(ps, mark, K"call", POSTFIX_OP_FLAG)
17191733
elseif k == K"{"
1734+
is_macrocall_on_entry && emit(ps, mark, K"macro_name")
17201735
# Type parameter curlies and macro calls
17211736
m = position(ps)
17221737
# S {a} ==> (curly S (error-t) a)
@@ -1727,7 +1742,6 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
17271742
# @S{a,b} ==> (macrocall S (braces a b))
17281743
# A.@S{a} ==> (macrocall (. A @S) (braces a))
17291744
# @S{a}.b ==> (. (macrocall @S (braces a)) b)
1730-
fix_macro_name_kind!(ps, macro_name_position)
17311745
emit(ps, m, K"braces", opts.delim_flags)
17321746
emit(ps, mark, K"macrocall")
17331747
min_supported_version(v"1.6", ps, mark, "macro call without space before `{}`")
@@ -1754,8 +1768,8 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
17541768
#
17551769
# Use a special token kind for string and cmd macro names so the
17561770
# names can be expanded later as necessary.
1757-
outk = is_string_delim(k) ? K"StringMacroName" : K"CmdMacroName"
1758-
fix_macro_name_kind!(ps, macro_name_position, outk)
1771+
outk = is_string_delim(k) ? K"macro_name_str" : K"macro_name_cmd"
1772+
emit(ps, mark, outk)
17591773
parse_string(ps, true)
17601774
t = peek_token(ps)
17611775
k = kind(t)
@@ -2039,7 +2053,7 @@ function parse_resword(ps::ParseState)
20392053
# export \n a ==> (export a)
20402054
# export \$a, \$(a*b) ==> (export (\$ a) (\$ (parens (call-i a * b))))
20412055
bump(ps, TRIVIA_FLAG)
2042-
parse_comma_separated(ps, x->parse_atsym(x, false))
2056+
parse_comma_separated(ps, x->parse_import_atsym(x, false))
20432057
emit(ps, mark, word)
20442058
elseif word in KSet"import using"
20452059
parse_imports(ps)
@@ -2372,34 +2386,6 @@ function _is_valid_macro_name(peektok)
23722386
return !is_error(peektok.kind) && (peektok.is_leaf || peektok.kind == K"var")
23732387
end
23742388

2375-
function fix_macro_name_kind!(ps::ParseState, macro_name_position, name_kind=nothing)
2376-
k = peek_behind(ps, macro_name_position).kind
2377-
if k == K"var"
2378-
macro_name_position = first_child_position(ps, macro_name_position)
2379-
k = peek_behind(ps, macro_name_position).kind
2380-
elseif k == K"parens"
2381-
# @(A) x ==> (macrocall (parens @A) x)
2382-
macro_name_position = first_child_position(ps, macro_name_position)
2383-
if macro_name_position == NO_POSITION
2384-
return
2385-
end
2386-
k = peek_behind(ps, macro_name_position).kind
2387-
elseif k == K"error"
2388-
# Error already reported in parse_macro_name
2389-
return
2390-
end
2391-
if isnothing(name_kind)
2392-
name_kind = _is_valid_macro_name(peek_behind(ps, macro_name_position)) ?
2393-
K"MacroName" : K"error"
2394-
if name_kind == K"error"
2395-
# TODO: This isn't quite accurate
2396-
emit_diagnostic(ps, macro_name_position, macro_name_position,
2397-
error="invalid macro name")
2398-
end
2399-
end
2400-
reset_node!(ps, macro_name_position, kind=name_kind)
2401-
end
2402-
24032389
# If remap_kind is false, the kind will be remapped by parse_call_chain after
24042390
# it discovers which component of the macro's module path is the macro name.
24052391
#
@@ -2425,15 +2411,16 @@ end
24252411
# Parse an identifier, interpolation or @-prefixed symbol
24262412
#
24272413
# flisp: parse-atsym
2428-
function parse_atsym(ps::ParseState, allow_quotes=true)
2414+
function parse_import_atsym(ps::ParseState, allow_quotes=true)
24292415
bump_trivia(ps)
24302416
if peek(ps) == K"@"
2417+
mark = position(ps)
24312418
# export @a ==> (export @a)
24322419
# export @var"'" ==> (export (var @'))
24332420
# export a, \n @b ==> (export a @b)
24342421
bump(ps, TRIVIA_FLAG)
24352422
parse_macro_name(ps)
2436-
fix_macro_name_kind!(ps, position(ps))
2423+
emit(ps, mark, K"macro_name")
24372424
else
24382425
# export a ==> (export a)
24392426
# export \n a ==> (export a)
@@ -2538,7 +2525,7 @@ function parse_import(ps::ParseState, word, has_import_prefix)
25382525
# import A: x as y ==> (import (: (importpath A) (as (importpath x) y)))
25392526
# using A: x as y ==> (using (: (importpath A) (as (importpath x) y)))
25402527
bump(ps, TRIVIA_FLAG)
2541-
parse_atsym(ps, false)
2528+
parse_import_atsym(ps, false)
25422529
emit(ps, mark, K"as")
25432530
if word == K"using" && !has_import_prefix
25442531
# using A as B ==> (using (error (as (importpath A) B)))
@@ -2589,7 +2576,7 @@ function parse_import_path(ps::ParseState)
25892576
end
25902577
# import @x ==> (import (importpath @x))
25912578
# import $A ==> (import (importpath ($ A)))
2592-
parse_atsym(ps, false)
2579+
parse_import_atsym(ps, false)
25932580
while true
25942581
t = peek_token(ps)
25952582
k = kind(t)
@@ -2611,7 +2598,7 @@ function parse_import_path(ps::ParseState)
26112598
bump_disallowed_space(ps)
26122599
end
26132600
bump(ps, TRIVIA_FLAG)
2614-
parse_atsym(ps)
2601+
parse_import_atsym(ps)
26152602
elseif k == K"..."
26162603
# Import the .. operator
26172604
# import A... ==> (import (importpath A ..))

src/julia/tokenize.jl

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1168,30 +1168,21 @@ end
11681168
function lex_dot(l::Lexer)
11691169
if accept(l, '.')
11701170
if accept(l, '.')
1171+
l.last_token == K"@" && return emit(l, K"Identifier")
11711172
return emit(l, K"...")
11721173
else
11731174
if is_dottable_operator_start_char(peekchar(l))
11741175
readchar(l)
11751176
return emit(l, K"ErrorInvalidOperator")
11761177
else
1178+
l.last_token == K"@" && return emit(l, K"Identifier")
11771179
return emit(l, K"..")
11781180
end
11791181
end
11801182
elseif Base.isdigit(peekchar(l))
11811183
return lex_digit(l, K"Float")
11821184
else
1183-
pc, dpc = dpeekchar(l)
1184-
# When we see a dot followed by an operator, we want to emit just the dot
1185-
# and let the next token be the operator
1186-
if is_operator_start_char(pc) || (pc == '!' && dpc == '=')
1187-
return emit(l, K".")
1188-
elseif pc == '÷'
1189-
return emit(l, K".")
1190-
elseif pc == '=' && dpc == '>'
1191-
return emit(l, K".")
1192-
elseif is_dottable_operator_start_char(pc)
1193-
return emit(l, K".")
1194-
end
1185+
l.last_token == K"@" && return emit(l, K"Identifier")
11951186
return emit(l, K".")
11961187
end
11971188
end

test/diagnostics.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ end
5353
Diagnostic(1, 9, :error, "try without catch or finally")
5454
# TODO: better range
5555
@test diagnostic("@A.\$x a") ==
56-
Diagnostic(6, 5, :error, "invalid macro name")
56+
Diagnostic(4, 5, :error, "invalid macro name")
5757

5858
@test diagnostic("a, , b") ==
5959
Diagnostic(4, 4, :error, "unexpected `,`")

test/expr.jl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -554,6 +554,7 @@
554554

555555
# var""
556556
@test parsestmt("@var\"#\" a") == Expr(:macrocall, Symbol("@#"), LineNumberNode(1), :a)
557+
@test parsestmt("@var\"\\\"\" a") == Expr(:macrocall, Symbol("@\""), LineNumberNode(1), :a)
557558
@test parsestmt("A.@var\"#\" a") == Expr(:macrocall, Expr(:., :A, QuoteNode(Symbol("@#"))), LineNumberNode(1), :a)
558559

559560
# Square brackets

0 commit comments

Comments
 (0)