Skip to content

Commit 9f4f2fc

Browse files
committed
Fix to allow operator-named macros
1 parent 6b55399 commit 9f4f2fc

File tree

3 files changed

+66
-45
lines changed

3 files changed

+66
-45
lines changed

README.md

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -365,15 +365,17 @@ parsing `key=val` pairs inside parentheses.
365365
Flattened generators are uniquely problematic because the Julia AST doesn't
366366
respect a key rule we normally expect: that the children of an AST node are a
367367
*contiguous* range in the source text. This is because the `for`s in
368-
`[xy for x in xs for y in ys]` are parsed in the normal order of a for loop as
368+
`[xy for x in xs for y in ys]` are parsed in the normal order of a for loop to
369+
mean
369370

370371
```
371372
for x in xs
372373
for y in ys
373374
push!(xy, collection)
374375
```
375376

376-
and the standard Julia AST is like this:
377+
so the `xy` prefix is in the *body* of the innermost for loop. Following this,
378+
the standard Julia AST is like so:
377379

378380
```
379381
(flatten
@@ -384,10 +386,13 @@ and the standard Julia AST is like this:
384386
(= x xs)))
385387
```
386388

387-
however, note that if this tree were flattened, the order of tokens would be
388-
`(xy) (y in ys) (x in xs)` which is *not* the source order. So in this case
389-
our green tree must deviate from the Julia AST. The natural representation
390-
seems to be to flatten the generators:
389+
however, note that if this tree were flattened, the order would be
390+
`(xy) (y in ys) (x in xs)` and the `x` and `y` iterations are *opposite* of the
391+
source order.
392+
393+
However, our green tree is strictly source-ordered, so we must deviate from the
394+
Julia AST. The natural representation seems to be to remove the generators and
395+
use a flattened structure:
391396

392397
```
393398
(flatten
@@ -438,6 +443,21 @@ seems to be to flatten the generators:
438443
* In braces after macrocall, `@S{a b}` is invalid but both `@S{a,b}` and
439444
`@S {a b}` parse. Conversely, `@S[a b]` parses.
440445

446+
* Macro names and invocations are post-processed from the output of
447+
`parse-atom` / `parse-call`, which leads to some surprising and questionable
448+
constructs which "work":
449+
- Absurdities like `@(((((a))))) x ==> (macrocall @a x)`
450+
- Infix macros!? `@(x + y) ==> (macrocall @+ x y)` (ok, kinda cute and has
451+
some weird logic to it... but what?)
452+
- Similarly additional parentheses are allowed `@(f(x)) ==> (macrocall @f x)`
453+
454+
* Allowing `@` first in macro module paths (eg `@A.B.x` instead of `A.B.@x`)
455+
seems like unnecessary variation in syntax. It makes parsing valid macro
456+
module paths more complex and leads to oddities like `@$.x y ==> (macrocall
457+
($ (quote x)) y` where the `$` is first parsed as a macro name, but turns out
458+
to be the module name after the `.` is parsed. But `$` can never be a valid
459+
module name in normal Julia code so this makes no sense.
460+
441461
# Comparisons to other packages
442462

443463
### Official Julia compiler

src/parser.jl

Lines changed: 33 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,8 @@ function peek_token(ps::ParseState, n=1; skip_newlines=nothing)
7171
peek_token(ps.stream, n, skip_newlines=skip_nl)
7272
end
7373

74-
function peek_behind(ps::ParseState, args...)
75-
peek_behind(ps.stream, args...)
74+
function peek_behind(ps::ParseState, args...; kws...)
75+
peek_behind(ps.stream, args...; kws...)
7676
end
7777

7878
function bump(ps::ParseState, flags=EMPTY_FLAGS; skip_newlines=nothing, kws...)
@@ -1336,20 +1336,15 @@ function parse_identifier_or_interpolate(ps::ParseState)
13361336
end
13371337
end
13381338

1339-
# Emit an error if the call chain syntax is not a valid module reference
1340-
function emit_modref_error(ps, mark)
1341-
emit(ps, mark, K"error", error="not a valid module reference")
1342-
end
1343-
1344-
function finish_macroname(ps, mark, is_valid_modref, macro_name_position,
1339+
function finish_macroname(ps, mark, valid_macroname, macro_name_position,
13451340
name_kind=nothing)
1346-
if is_valid_modref
1341+
if valid_macroname
13471342
if isnothing(name_kind)
13481343
name_kind = macro_name_kind(peek_behind(ps, macro_name_position).kind)
13491344
end
13501345
reset_node!(ps, macro_name_position, kind = name_kind)
13511346
else
1352-
emit(ps, mark, K"error", error="not a valid module reference")
1347+
emit(ps, mark, K"error", error="not a valid macro name or macro module path")
13531348
end
13541349
end
13551350

@@ -1367,14 +1362,13 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
13671362
end
13681363
# source range of the @-prefixed part of a macro
13691364
macro_atname_range = nothing
1370-
kb = peek_behind(ps).kind
13711365
# $A.@x ==> (macrocall (. ($ A) (quote @x)))
1372-
is_valid_modref = kb in KSet`Identifier . $`
1366+
valid_macroname = peek_behind(ps, skip_trivia=false).kind in KSet`Identifier . $`
13731367
# We record the last component of chains of dot-separated identifiers so we
13741368
# know which identifier was the macro name.
13751369
macro_name_position = position(ps) # points to same output span as peek_behind
13761370
while true
1377-
this_iter_valid_modref = false
1371+
this_iter_valid_macroname = false
13781372
t = peek_token(ps)
13791373
k = kind(t)
13801374
if is_macrocall && (t.had_whitespace || is_closing_token(ps, k))
@@ -1384,7 +1378,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
13841378
# @foo (x,y) ==> (macrocall @foo (tuple x y))
13851379
# a().@x y ==> (macrocall (error (. (call a) (quote x))) y)
13861380
# [@foo "x"] ==> (vect (macrocall @foo "x"))
1387-
finish_macroname(ps, mark, is_valid_modref, macro_name_position)
1381+
finish_macroname(ps, mark, valid_macroname, macro_name_position)
13881382
with_space_sensitive(ps) do ps
13891383
# Space separated macro arguments
13901384
# A.@foo a b ==> (macrocall (. A (quote @foo)) a b)
@@ -1420,7 +1414,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
14201414
elseif k == K"("
14211415
if is_macrocall
14221416
# a().@x(y) ==> (macrocall (error (. (call a) (quote x))) y)
1423-
finish_macroname(ps, mark, is_valid_modref, macro_name_position)
1417+
finish_macroname(ps, mark, valid_macroname, macro_name_position)
14241418
end
14251419
# f(a,b) ==> (call f a b)
14261420
# f (a) ==> (call f (error-t) a b)
@@ -1443,7 +1437,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
14431437
elseif k == K"["
14441438
if is_macrocall
14451439
# a().@x[1] ==> (macrocall (ref (error (. (call a) (quote x))) 1))
1446-
finish_macroname(ps, mark, is_valid_modref, macro_name_position)
1440+
finish_macroname(ps, mark, valid_macroname, macro_name_position)
14471441
end
14481442
# a [i] ==> (ref a (error-t) i)
14491443
bump_disallowed_space(ps)
@@ -1479,7 +1473,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
14791473
bump(ps)
14801474
emit(ps, emark, K"error", TRIVIA_FLAG,
14811475
error="the .' operator for transpose is discontinued")
1482-
is_valid_modref = false
1476+
valid_macroname = false
14831477
continue
14841478
end
14851479
if !isnothing(macro_atname_range)
@@ -1529,7 +1523,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
15291523
emit(ps, m, K"inert")
15301524
emit(ps, mark, K".")
15311525
# A.$B.@x ==> (macrocall (. (. A (inert ($ B))) (quote @x)))
1532-
this_iter_valid_modref = true
1526+
this_iter_valid_macroname = true
15331527
elseif k == K"@"
15341528
# A macro call after some prefix A has been consumed
15351529
# A.@x ==> (macrocall (. A (quote @x)))
@@ -1547,7 +1541,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
15471541
macro_atname_range = (m, macro_name_position)
15481542
emit(ps, m, K"quote")
15491543
emit(ps, mark, K".")
1550-
this_iter_valid_modref = true
1544+
this_iter_valid_macroname = true
15511545
else
15521546
# Field/property syntax
15531547
# f.x.y ==> (. (. f (quote x)) (quote y))
@@ -1556,7 +1550,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
15561550
macro_name_position = position(ps)
15571551
emit(ps, m, K"quote")
15581552
emit(ps, mark, K".")
1559-
this_iter_valid_modref = true
1553+
this_iter_valid_macroname = true
15601554
end
15611555
elseif k == K"'"
15621556
if !is_suffixed(t)
@@ -1572,7 +1566,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
15721566
# Type parameter curlies and macro calls
15731567
if is_macrocall
15741568
# a().@x{y} ==> (macrocall (error (. (call a) (quote x))) (braces y))
1575-
finish_macroname(ps, mark, is_valid_modref, macro_name_position)
1569+
finish_macroname(ps, mark, valid_macroname, macro_name_position)
15761570
end
15771571
m = position(ps)
15781572
# S {a} ==> (curly S (error-t) a)
@@ -1590,7 +1584,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
15901584
emit(ps, mark, K"curly")
15911585
end
15921586
elseif k in KSet` " """ \` \`\`\` ` &&
1593-
!t.had_whitespace && is_valid_modref
1587+
!t.had_whitespace && valid_macroname
15941588
# Custom string and command literals
15951589
# x"str" ==> (macrocall @x_str "str")
15961590
# x`str` ==> (macrocall @x_cmd "str")
@@ -1600,7 +1594,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
16001594
# Use a special token kind for string and cmd macro names so the
16011595
# names can be expanded later as necessary.
16021596
outk = is_string_delim(k) ? K"StringMacroName" : K"CmdMacroName"
1603-
finish_macroname(ps, mark, is_valid_modref, macro_name_position, outk)
1597+
finish_macroname(ps, mark, valid_macroname, macro_name_position, outk)
16041598
parse_raw_string(ps)
16051599
t = peek_token(ps)
16061600
k = kind(t)
@@ -1619,7 +1613,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
16191613
else
16201614
break
16211615
end
1622-
is_valid_modref &= this_iter_valid_modref
1616+
valid_macroname &= this_iter_valid_macroname
16231617
end
16241618
end
16251619

@@ -2175,22 +2169,24 @@ function macro_name_kind(k)
21752169
end
21762170

21772171
# If remap_kind is false, the kind will be remapped by parse_call_chain after
2178-
# it discovers the macro name component of the module path.
2172+
# it discovers which component of the macro's module path is the macro name.
21792173
#
21802174
# flisp: parse-macro-name
2181-
function parse_macro_name(ps::ParseState; remap_kind=false)
2175+
function parse_macro_name(ps::ParseState)
21822176
bump_disallowed_space(ps)
2183-
if peek(ps) == K"."
2184-
# @. y ==> (macrocall (quote @__dot__) y)
2177+
mark = position(ps)
2178+
k = peek(ps)
2179+
if k == K"."
2180+
# @. y ==> (macrocall @__dot__ y)
21852181
bump(ps)
21862182
else
2183+
# @! x ==> (macrocall @! x)
2184+
# @.. x ==> (macrocall @.. x)
2185+
# @$ x ==> (macrocall @$ x)
21872186
with_space_sensitive(ps) do ps1
21882187
parse_atom(ps1, false)
21892188
end
21902189
end
2191-
if remap_kind
2192-
reset_node!(ps, position(ps), kind=macro_name_kind(peek_behind(ps).kind))
2193-
end
21942190
end
21952191

21962192
# Parse an identifier, interpolation of @-prefixed symbol
@@ -2202,7 +2198,8 @@ function parse_atsym(ps::ParseState)
22022198
# export @a ==> (export @a)
22032199
# export a, \n @b ==> (export a @b)
22042200
bump(ps, TRIVIA_FLAG)
2205-
parse_macro_name(ps, remap_kind=true)
2201+
parse_macro_name(ps)
2202+
reset_node!(ps, position(ps), kind=macro_name_kind(peek_behind(ps).kind))
22062203
else
22072204
# export a ==> (export a)
22082205
# export \n a ==> (export a)
@@ -3133,7 +3130,10 @@ function parse_atom(ps::ParseState, check_identifiers=true)
31333130
# Quoted syntactic operators allowed
31343131
# :+= ==> (quote +=)
31353132
# :.= ==> (quote .=)
3136-
bump(ps)
3133+
# Remap the kind here to K"Identifier", as operators parsed in this
3134+
# branch should be in "identifier-like" positions (I guess this is
3135+
# correct? is it convenient?)
3136+
bump(ps, remap_kind=K"Identifier")
31373137
end
31383138
elseif is_keyword(leading_kind)
31393139
if leading_kind == K"var" && (t = peek_token(ps,2);

test/parser.jl

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -229,14 +229,13 @@ tests = [
229229
"f(a).g(b)" => "(call (. (call f a) (quote g)) b)"
230230
"\$A.@x" => "(macrocall (. (\$ A) (quote @x)))"
231231
# do
232-
"f() do x, y\n body end" => "(do (call f) (-> (tuple x y) (block body)))"
233232
"f() do\nend" => "(do (call f) (-> (tuple) (block)))"
234233
"f() do ; body end" => "(do (call f) (-> (tuple) (block body)))"
235-
"f(x) do y,z body end" => "(do (call f x) (-> (tuple y z) (block body)))"
234+
"f() do x, y\n body end" => "(do (call f) (-> (tuple x y) (block body)))"
235+
"f(x) do y body end" => "(do (call f x) (-> (tuple y) (block body)))"
236236
# Keyword arguments depend on call vs macrocall
237237
"foo(a=1)" => "(call foo (kw a 1))"
238238
"@foo(a=1)" => "(macrocall @foo (= a 1))"
239-
# f(x) do y body end ==> (do (call f x) (-> (tuple y) (block body)))
240239
"@foo a b" => "(macrocall @foo a b)"
241240
"@foo (x)" => "(macrocall @foo x)"
242241
"@foo (x,y)" => "(macrocall @foo (tuple x y))"
@@ -245,6 +244,10 @@ tests = [
245244
"[@foo \"x\"]" => "(vect (macrocall @foo \"x\"))"
246245
"[f (x)]" => "(hcat f x)"
247246
"[f \"x\"]" => "(hcat f \"x\")"
247+
# Macro names
248+
"@! x" => "(macrocall @! x)"
249+
"@.. x" => "(macrocall @.. x)"
250+
"@\$ y" => "(macrocall @\$ y)"
248251
# Special @doc parsing rules
249252
"@doc x\ny" => "(macrocall @doc x y)"
250253
"A.@doc x\ny" => "(macrocall (. A (quote @doc)) x y)"
@@ -670,9 +673,7 @@ broken_tests = [
670673
# Triple-quoted string processing
671674
"\"\"\"\n\$x\"\"\"" => "(string x)"
672675
"\"\"\"\$x\n\"\"\"" => "(string x \"\n\")"
673-
# Operator-named macros with and without spaces
674-
"@! x" => "(macrocall @! x)"
675-
"@.. x" => "(macrocall @.. x)"
676+
# Operator-named macros without spaces
676677
"@!x" => "(macrocall @! x)"
677678
"@..x" => "(macrocall @.. x)"
678679
"@.x" => "(macrocall @__dot__ x)"

0 commit comments

Comments
 (0)