Skip to content

Commit 25f8eb2

Browse files
authored
Use K"function" for short form function AST (#466)
A pain point when writing macros is detecting all the types of things which might be lowered to functions. This is partly due to the existence of short form function definitions which in Julia's classic AST parse with `:(=)` rather than a `:function` head - to detect the meaning of `=`, one needs to traverse recursively into the left hand side of the expression. This change modifies the parsing of short form functions to use the `K"function"` kind. A new syntax flag `SHORT_FORM_FUNCTION_FLAG` is set to enable AST consumers to detect short vs long form functions.
1 parent da801cc commit 25f8eb2

File tree

8 files changed

+132
-56
lines changed

8 files changed

+132
-56
lines changed

docs/src/api.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ JuliaSyntax.COLON_QUOTE
118118
JuliaSyntax.TOPLEVEL_SEMICOLONS_FLAG
119119
JuliaSyntax.MUTABLE_FLAG
120120
JuliaSyntax.BARE_MODULE_FLAG
121+
JuliaSyntax.SHORT_FORM_FUNCTION_FLAG
121122
```
122123

123124
## Syntax trees

src/expr.jl

Lines changed: 16 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,6 @@ macro isexpr(ex, head, nargs)
2828
length($(esc(ex)).args) == $(esc(nargs)))
2929
end
3030

31-
function is_eventually_call(ex)
32-
return ex isa Expr && (ex.head === :call ||
33-
(ex.head === :where || ex.head === :(::)) && is_eventually_call(ex.args[1]))
34-
end
35-
3631
function _reorder_parameters!(args::Vector{Any}, params_pos)
3732
p = 0
3833
for i = length(args):-1:1
@@ -233,16 +228,6 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads,
233228

234229
if k == K"?"
235230
headsym = :if
236-
elseif k == K"=" && !is_decorated(head)
237-
a2 = args[2]
238-
if is_eventually_call(args[1])
239-
if @isexpr(a2, :block)
240-
pushfirst!(a2.args, loc)
241-
else
242-
# Add block for short form function locations
243-
args[2] = Expr(:block, loc, a2)
244-
end
245-
end
246231
elseif k == K"macrocall"
247232
do_lambda = _extract_do_lambda!(args)
248233
_reorder_parameters!(args, 2)
@@ -399,14 +384,22 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads,
399384
end
400385
elseif k == K"function"
401386
if length(args) > 1
402-
a1 = args[1]
403-
if @isexpr(a1, :tuple)
404-
# Convert to weird Expr forms for long-form anonymous functions.
405-
#
406-
# (function (tuple (... xs)) body) ==> (function (... xs) body)
407-
if length(a1.args) == 1 && (a11 = a1.args[1]; @isexpr(a11, :...))
408-
# function (xs...) \n body end
409-
args[1] = a11
387+
if has_flags(head, SHORT_FORM_FUNCTION_FLAG)
388+
a2 = args[2]
389+
if !@isexpr(a2, :block)
390+
args[2] = Expr(:block, a2)
391+
end
392+
headsym = :(=)
393+
else
394+
a1 = args[1]
395+
if @isexpr(a1, :tuple)
396+
# Convert to weird Expr forms for long-form anonymous functions.
397+
#
398+
# (function (tuple (... xs)) body) ==> (function (... xs) body)
399+
if length(a1.args) == 1 && (a11 = a1.args[1]; @isexpr(a11, :...))
400+
# function (xs...) \n body end
401+
args[1] = a11
402+
end
410403
end
411404
end
412405
pushfirst!((args[2]::Expr).args, loc)

src/parse_stream.jl

Lines changed: 48 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,11 @@ Set for K"toplevel" which is delimited by parentheses
4949
"""
5050
const TOPLEVEL_SEMICOLONS_FLAG = RawFlags(1<<5)
5151

52+
"""
53+
Set for K"function" in short form definitions such as `f() = 1`
54+
"""
55+
const SHORT_FORM_FUNCTION_FLAG = RawFlags(1<<5)
56+
5257
"""
5358
Set for K"struct" when mutable
5459
"""
@@ -143,6 +148,8 @@ function untokenize(head::SyntaxHead; unique=true, include_flag_suff=true)
143148
has_flags(head, COLON_QUOTE) && (str = str*"-:")
144149
elseif kind(head) == K"toplevel"
145150
has_flags(head, TOPLEVEL_SEMICOLONS_FLAG) && (str = str*"-;")
151+
elseif kind(head) == K"function"
152+
has_flags(head, SHORT_FORM_FUNCTION_FLAG) && (str = str*"-=")
146153
elseif kind(head) == K"struct"
147154
has_flags(head, MUTABLE_FLAG) && (str = str*"-mut")
148155
elseif kind(head) == K"module"
@@ -646,17 +653,17 @@ function peek_behind(stream::ParseStream, pos::ParseStreamPosition)
646653
end
647654

648655
function first_child_position(stream::ParseStream, pos::ParseStreamPosition)
656+
ranges = stream.ranges
657+
@assert pos.range_index > 0
658+
parent = ranges[pos.range_index]
649659
# Find the first nontrivia range which is a child of this range but not a
650660
# child of the child
651661
c = 0
652-
@assert pos.range_index > 0
653-
parent = stream.ranges[pos.range_index]
654662
for i = pos.range_index-1:-1:1
655-
if stream.ranges[i].first_token < parent.first_token
663+
if ranges[i].first_token < parent.first_token
656664
break
657665
end
658-
if (c == 0 || stream.ranges[i].first_token < stream.ranges[c].first_token) &&
659-
!is_trivia(stream.ranges[i])
666+
if (c == 0 || ranges[i].first_token < ranges[c].first_token) && !is_trivia(ranges[i])
660667
c = i
661668
end
662669
end
@@ -670,19 +677,44 @@ function first_child_position(stream::ParseStream, pos::ParseStreamPosition)
670677
end
671678
end
672679

673-
if c != 0
674-
if t != 0
675-
if stream.ranges[c].first_token > t
676-
# Need a child index strictly before `t`. `c=0` works.
677-
return ParseStreamPosition(t, 0)
678-
else
679-
return ParseStreamPosition(stream.ranges[c].last_token, c)
680-
end
681-
else
682-
return ParseStreamPosition(stream.ranges[c].last_token, c)
680+
if c == 0 || (t != 0 && ranges[c].first_token > t)
681+
# Return leaf node at `t`
682+
return ParseStreamPosition(t, 0)
683+
else
684+
# Return interior node at `c`
685+
return ParseStreamPosition(ranges[c].last_token, c)
686+
end
687+
end
688+
689+
function last_child_position(stream::ParseStream, pos::ParseStreamPosition)
690+
ranges = stream.ranges
691+
@assert pos.range_index > 0
692+
parent = ranges[pos.range_index]
693+
# Find the last nontrivia range which is a child of this range
694+
c = 0
695+
if pos.range_index > 1
696+
i = pos.range_index-1
697+
if ranges[i].first_token >= parent.first_token
698+
# Valid child of current range
699+
c = i
683700
end
701+
end
702+
703+
# Find last nontrivia token
704+
t = 0
705+
for i = parent.last_token:-1:parent.first_token
706+
if !is_trivia(stream.tokens[i])
707+
t = i
708+
break
709+
end
710+
end
711+
712+
if c == 0 || (t != 0 && ranges[c].last_token < t)
713+
# Return leaf node at `t`
714+
return ParseStreamPosition(t, 0)
684715
else
685-
return ParseStreamPosition(t, c)
716+
# Return interior node at `c`
717+
return ParseStreamPosition(ranges[c].last_token, c)
686718
end
687719
end
688720

src/parser.jl

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,9 @@ function first_child_position(ps::ParseState, pos::ParseStreamPosition)
133133
first_child_position(ps.stream, pos)
134134
end
135135

136+
function last_child_position(ps::ParseState, pos::ParseStreamPosition)
137+
last_child_position(ps.stream, pos)
138+
end
136139
#-------------------------------------------------------------------------------
137140
# Parser Utils
138141

@@ -325,6 +328,12 @@ function was_eventually_call(ps::ParseState)
325328
return true
326329
elseif b.kind == K"where" || b.kind == K"parens" ||
327330
(b.kind == K"::" && has_flags(b.flags, INFIX_FLAG))
331+
if b.kind == K"::"
332+
p_last = last_child_position(ps, p)
333+
if p == p_last
334+
return false
335+
end
336+
end
328337
p = first_child_position(ps, p)
329338
else
330339
return false
@@ -618,12 +627,19 @@ function parse_assignment_with_initial_ex(ps::ParseState, mark, down::T) where {
618627
parse_assignment(ps, down)
619628
emit(ps, mark, is_dotted(t) ? K"dotcall" : K"call", INFIX_FLAG)
620629
else
621-
# a += b ==> (+= a b)
622-
# a .= b ==> (.= a b)
630+
# f() = 1 ==> (function-= (call f) 1)
631+
# f() .= 1 ==> (.= (call f) 1)
632+
# a += b ==> (+= a b)
633+
# a .= b ==> (.= a b)
634+
is_short_form_func = k == K"=" && !is_dotted(t) && was_eventually_call(ps)
623635
bump(ps, TRIVIA_FLAG)
624636
bump_trivia(ps)
637+
# Syntax Edition TODO: We'd like to call `down` here when
638+
# is_short_form_func is true, to prevent `f() = 1 = 2` from parsing.
625639
parse_assignment(ps, down)
626-
emit(ps, mark, k, flags(t))
640+
emit(ps, mark,
641+
is_short_form_func ? K"function" : k,
642+
is_short_form_func ? SHORT_FORM_FUNCTION_FLAG : flags(t))
627643
end
628644
end
629645

test/parse_packages.jl

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,13 @@ base_path = let
2020
p
2121
end
2222
@testset "Parse Base at $base_path" begin
23-
test_parse_all_in_path(base_path)
23+
test_parse_all_in_path(base_path) do f
24+
if endswith(f, "gmp.jl")
25+
# Loose comparison due to `f(::g(w) = z) = a` syntax
26+
return exprs_roughly_equal
27+
end
28+
return exprs_equal_no_linenum
29+
end
2430
end
2531

2632
base_tests_path = joinpath(Sys.BINDIR, Base.DATAROOTDIR, "julia", "test")

test/parse_stream.jl

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ using JuliaSyntax: ParseStream,
77
peek, peek_token,
88
bump, bump_trivia, bump_invisible,
99
emit, emit_diagnostic, TRIVIA_FLAG, INFIX_FLAG,
10-
ParseStreamPosition, first_child_position
10+
ParseStreamPosition, first_child_position, last_child_position
1111

1212
# Here we manually issue parse events in the order the Julia parser would issue
1313
# them
@@ -110,27 +110,40 @@ end
110110
st = parse_sexpr("((a b) c)")
111111
child1_pos = first_child_position(st, position(st))
112112
@test child1_pos == ParseStreamPosition(7, 1)
113-
child2_pos = first_child_position(st, child1_pos)
114-
@test child2_pos == ParseStreamPosition(4, 0)
113+
@test first_child_position(st, child1_pos) == ParseStreamPosition(4, 0)
114+
@test last_child_position(st, position(st)) == ParseStreamPosition(9, 0)
115+
@test last_child_position(st, child1_pos) == ParseStreamPosition(6, 0)
115116

116117
st = parse_sexpr("( (a b) c)")
117118
child1_pos = first_child_position(st, position(st))
118119
@test child1_pos == ParseStreamPosition(8, 1)
119-
child2_pos = first_child_position(st, child1_pos)
120-
@test child2_pos == ParseStreamPosition(5, 0)
120+
@test first_child_position(st, child1_pos) == ParseStreamPosition(5, 0)
121+
@test last_child_position(st, position(st)) == ParseStreamPosition(10, 0)
122+
@test last_child_position(st, child1_pos) == ParseStreamPosition(7, 0)
121123

122124
st = parse_sexpr("(a (b c))")
123125
@test first_child_position(st, position(st)) == ParseStreamPosition(3, 0)
126+
child2_pos = last_child_position(st, position(st))
127+
@test child2_pos == ParseStreamPosition(9, 1)
128+
@test first_child_position(st, child2_pos) == ParseStreamPosition(6, 0)
129+
@test last_child_position(st, child2_pos) == ParseStreamPosition(8, 0)
124130

125131
st = parse_sexpr("( a (b c))")
126132
@test first_child_position(st, position(st)) == ParseStreamPosition(4, 0)
133+
child2_pos = last_child_position(st, position(st))
134+
@test child2_pos == ParseStreamPosition(10, 1)
135+
@test first_child_position(st, child2_pos) == ParseStreamPosition(7, 0)
136+
@test last_child_position(st, child2_pos) == ParseStreamPosition(9, 0)
127137

128138
st = parse_sexpr("a (b c)")
129139
@test first_child_position(st, position(st)) == ParseStreamPosition(5, 0)
140+
@test last_child_position(st, position(st)) == ParseStreamPosition(7, 0)
130141

131142
st = parse_sexpr("(a) (b c)")
132143
@test first_child_position(st, position(st)) == ParseStreamPosition(7, 0)
144+
@test last_child_position(st, position(st)) == ParseStreamPosition(9, 0)
133145

134146
st = parse_sexpr("(() ())")
135147
@test first_child_position(st, position(st)) == ParseStreamPosition(4, 1)
148+
@test last_child_position(st, position(st)) == ParseStreamPosition(7, 2)
136149
end

test/parser.jl

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,13 @@ tests = [
7171
"a .~ b" => "(dotcall-i a ~ b)"
7272
"[a ~ b c]" => "(hcat (call-i a ~ b) c)"
7373
"[a~b]" => "(vect (call-i a ~ b))"
74+
"f(x) .= 1" => "(.= (call f x) 1)"
75+
"::g() = 1" => "(= (::-pre (call g)) 1)"
76+
"f(x) = 1" => "(function-= (call f x) 1)"
77+
"f(x)::T = 1" => "(function-= (::-i (call f x) T) 1)"
78+
"f(x) where S where U = 1" => "(function-= (where (where (call f x) S) U) 1)"
79+
"(f(x)::T) where S = 1" => "(function-= (where (parens (::-i (call f x) T)) S) 1)"
80+
"f(x) = 1 = 2" => "(function-= (call f x) (= 1 2))" # Should be a warning!
7481
],
7582
JuliaSyntax.parse_pair => [
7683
"a => b" => "(call-i a => b)"
@@ -449,7 +456,7 @@ tests = [
449456
],
450457
JuliaSyntax.parse_resword => [
451458
# In normal_context
452-
"begin f() where T = x end" => "(block (= (where (call f) T) x))"
459+
"begin f() where T = x end" => "(block (function-= (where (call f) T) x))"
453460
# block
454461
"begin end" => "(block)"
455462
"begin a ; b end" => "(block a b)"
@@ -955,14 +962,14 @@ tests = [
955962
"if true \n public A, B \n end" => PARSE_ERROR
956963
"public export=true foo, bar" => PARSE_ERROR # but these may be
957964
"public experimental=true foo, bar" => PARSE_ERROR # supported soon ;)
958-
"public(x::String) = false" => "(= (call public (::-i x String)) false)"
965+
"public(x::String) = false" => "(function-= (call public (::-i x String)) false)"
959966
"module M; export @a; end" => "(module M (block (export @a)))"
960967
"module M; public @a; end" => "(module M (block (public @a)))"
961968
"module M; export ⤈; end" => "(module M (block (export ⤈)))"
962969
"module M; public ⤈; end" => "(module M (block (public ⤈)))"
963970
"public = 4" => "(= public 4)"
964971
"public[7] = 5" => "(= (ref public 7) 5)"
965-
"public() = 6" => "(= (call public) 6)"
972+
"public() = 6" => "(function-= (call public) 6)"
966973
]),
967974
JuliaSyntax.parse_docstring => [
968975
""" "notdoc" ] """ => "(string \"notdoc\")"

test/test_utils.jl

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,11 @@ function exprs_equal_no_linenum(fl_ex, ex)
9696
remove_all_linenums!(deepcopy(ex)) == remove_all_linenums!(deepcopy(fl_ex))
9797
end
9898

99+
function is_eventually_call(ex)
100+
return ex isa Expr && (ex.head === :call ||
101+
(ex.head === :where || ex.head === :(::)) && is_eventually_call(ex.args[1]))
102+
end
103+
99104
# Compare Expr from reference parser expression to JuliaSyntax parser, ignoring
100105
# differences due to bugs in the reference parser.
101106
function exprs_roughly_equal(fl_ex, ex)
@@ -149,7 +154,7 @@ function exprs_roughly_equal(fl_ex, ex)
149154
fl_args[1] = Expr(:tuple, Expr(:parameters, kwargs...), posargs...)
150155
elseif h == :for
151156
iterspec = args[1]
152-
if JuliaSyntax.is_eventually_call(iterspec.args[1]) &&
157+
if is_eventually_call(iterspec.args[1]) &&
153158
Meta.isexpr(iterspec.args[2], :block)
154159
blk = iterspec.args[2]
155160
if length(blk.args) == 2 && blk.args[1] isa LineNumberNode
@@ -158,6 +163,11 @@ function exprs_roughly_equal(fl_ex, ex)
158163
iterspec.args[2] = blk.args[2]
159164
end
160165
end
166+
elseif (h == :(=) || h == :kw) && Meta.isexpr(fl_args[1], :(::), 1) &&
167+
Meta.isexpr(fl_args[2], :block, 2) && fl_args[2].args[1] isa LineNumberNode
168+
# The flisp parser adds an extra block around `w` in the following case
169+
# f(::g(z) = w) = 1
170+
fl_args[2] = fl_args[2].args[2]
161171
end
162172
if length(fl_args) != length(args)
163173
return false
@@ -169,9 +179,7 @@ function exprs_roughly_equal(fl_ex, ex)
169179
fl_args[1] = Expr(:macrocall, map(kw_to_eq, args[1].args)...)
170180
end
171181
for i = 1:length(args)
172-
flarg = fl_args[i]
173-
arg = args[i]
174-
if !exprs_roughly_equal(flarg, arg)
182+
if !exprs_roughly_equal(fl_args[i], args[i])
175183
return false
176184
end
177185
end
@@ -307,7 +315,7 @@ between flisp and JuliaSyntax parsers and return the source text of those
307315
subtrees.
308316
"""
309317
function reduce_tree(text::AbstractString; kws...)
310-
tree = parseall(SyntaxNode, text)
318+
tree = parseall(SyntaxNode, text, ignore_warnings=true)
311319
sourcetext.(reduce_tree(tree; kws...))
312320
end
313321

0 commit comments

Comments
 (0)