Skip to content

Commit 70c908f

Browse files
authored
Enclose grouping parentheses with parens node (#222)
Introduce a new kind `K"parens"` to represent grouping parentheses with a tree node of their own. This makes it simple for tooling to process and preserve parenthesized expressions without resorting to searching through the attached syntax trivia. An alternative considered here was to use `K"block"` with a single child which would avoid introducing an extra kind of node. But in that case we couldn't distinguish between a trivial block like `(a;)` vs bare parentheses `(a)`. It also makes implementing `peek_behind` more complicated.
1 parent d192cea commit 70c908f

File tree

5 files changed

+138
-130
lines changed

5 files changed

+138
-130
lines changed

src/expr.jl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,9 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true,
197197
end
198198
elseif headsym === :where
199199
reorder_parameters!(args, 2)
200+
elseif headsym == :parens
201+
# parens are used for grouping and don't appear in the Expr AST
202+
return only(args)
200203
elseif headsym in (:try, :try_finally_catch)
201204
# Try children in source order:
202205
# try_block catch_var catch_block else_block finally_block

src/parse_stream.jl

Lines changed: 18 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -582,26 +582,25 @@ function first_child_position(stream::ParseStream, pos::ParseStreamPosition)
582582
end
583583

584584
function peek_behind(stream::ParseStream; skip_trivia::Bool=true)
585-
pos = position(stream)
586-
if !skip_trivia || !token_is_last(stream, pos)
587-
return peek_behind(stream, pos)
588-
else
589-
token_index = lastindex(stream.tokens)
590-
range_index = lastindex(stream.ranges)
591-
last_token_in_nonterminal = isempty(stream.ranges) ? 0 :
592-
stream.ranges[range_index].last_token
593-
while token_index > last_token_in_nonterminal
594-
t = stream.tokens[token_index]
595-
if !is_trivia(t) && kind(t) != K"TOMBSTONE"
596-
break
597-
end
598-
token_index -= 1
599-
end
600-
if token_index > 0
601-
return peek_behind(stream, ParseStreamPosition(token_index, range_index))
602-
else
603-
internal_error("Can't peek behind at start of stream")
585+
token_index = lastindex(stream.tokens)
586+
range_index = lastindex(stream.ranges)
587+
while range_index >= firstindex(stream.ranges) &&
588+
kind(stream.ranges[range_index]) == K"parens"
589+
range_index -= 1
590+
end
591+
last_token_in_nonterminal = range_index == 0 ? 0 :
592+
stream.ranges[range_index].last_token
593+
while token_index > last_token_in_nonterminal
594+
t = stream.tokens[token_index]
595+
if kind(t) != K"TOMBSTONE" && (!skip_trivia || !is_trivia(t))
596+
break
604597
end
598+
token_index -= 1
599+
end
600+
if token_index > 0
601+
return peek_behind(stream, ParseStreamPosition(token_index, range_index))
602+
else
603+
internal_error("Can't peek behind at start of stream")
605604
end
606605
end
607606

src/parser.jl

Lines changed: 55 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -315,8 +315,8 @@ function was_eventually_call(ps::ParseState)
315315
b = peek_behind(stream, p)
316316
if b.kind == K"call"
317317
return true
318-
elseif b.kind == K"where" || (b.kind == K"::" &&
319-
has_flags(b.flags, INFIX_FLAG))
318+
elseif b.kind == K"where" || b.kind == K"parens" ||
319+
(b.kind == K"::" && has_flags(b.flags, INFIX_FLAG))
320320
p = first_child_position(ps, p)
321321
else
322322
return false
@@ -885,7 +885,7 @@ function parse_range(ps::ParseState)
885885
if had_newline
886886
# Error message for people coming from python
887887
# 1:\n2 ==> (call-i 1 : (error))
888-
# (1:\n2) ==> (call-i 1 : 2)
888+
# (1:\n2) ==> (parens (call-i 1 : 2))
889889
emit_diagnostic(ps, whitespace=true,
890890
error="line break after `:` in range expression")
891891
bump_invisible(ps, K"error")
@@ -1021,7 +1021,7 @@ function parse_unary_subtype(ps::ParseState)
10211021
elseif k2 in KSet"{ ("
10221022
# parse <:{T}(x::T) or <:(x::T) like other unary operators
10231023
# <:{T}(x::T) ==> (call (curly <: T) (:: x T))
1024-
# <:(x::T) ==> (<:-pre (:: x T))
1024+
# <:(x::T) ==> (<:-pre (parens (:: x T)))
10251025
parse_where(ps, parse_juxtapose)
10261026
else
10271027
# <: x ==> (<:-pre x)
@@ -1108,9 +1108,9 @@ end
11081108
# Juxtoposition. Ugh! But so useful for units and Field identities like `im`
11091109
#
11101110
# 2x ==> (juxtapose 2 x)
1111-
# 2(x) ==> (juxtapose 2 x)
1112-
# (2)(3)x ==> (juxtapose 2 3 x)
1113-
# (x-1)y ==> (juxtapose (call-i x - 1) y)
1111+
# 2(x) ==> (juxtapose 2 (parens x))
1112+
# (2)(3)x ==> (juxtapose (parens 2) (parens 3) x)
1113+
# (x-1)y ==> (juxtapose (parens (call-i x - 1)) y)
11141114
# x'y ==> (juxtapose (call-post x ') y)
11151115
#
11161116
# flisp: parse-juxtapose
@@ -1239,9 +1239,9 @@ function parse_unary(ps::ParseState)
12391239

12401240
mark_before_paren = position(ps)
12411241
bump(ps, TRIVIA_FLAG) # (
1242-
initial_semi = peek(ps) == K";"
1242+
_is_paren_call = peek(ps, skip_newlines=true) in KSet"; )"
12431243
opts = parse_brackets(ps, K")") do had_commas, had_splat, num_semis, num_subexprs
1244-
is_paren_call = had_commas || had_splat || initial_semi
1244+
is_paren_call = had_commas || had_splat || _is_paren_call
12451245
return (needs_parameters=is_paren_call,
12461246
is_paren_call=is_paren_call,
12471247
is_block=!is_paren_call && num_semis > 0)
@@ -1263,6 +1263,7 @@ function parse_unary(ps::ParseState)
12631263
# +(a...) ==> (call + (... a))
12641264
# +(a;b,c) ==> (call + a (parameters b c))
12651265
# +(;a) ==> (call + (parameters a))
1266+
# +() ==> (call +)
12661267
# Prefix calls have higher precedence than ^
12671268
# +(a,b)^2 ==> (call-i (call + a b) ^ 2)
12681269
# +(a,b)(x)^2 ==> (call-i (call (call + a b) x) ^ 2)
@@ -1292,21 +1293,23 @@ function parse_unary(ps::ParseState)
12921293
parse_factor_with_initial_ex(ps, mark)
12931294
else
12941295
# Unary function calls with brackets as grouping, not an arglist
1295-
# .+(a) ==> (dotcall-pre (. +) a)
1296+
# .+(a) ==> (dotcall-pre (. +) (parens a))
12961297
if opts.is_block
12971298
# +(a;b) ==> (call-pre + (block-p a b))
12981299
emit(ps, mark_before_paren, K"block", PARENS_FLAG)
1300+
else
1301+
emit(ps, mark_before_paren, K"parens")
12991302
end
13001303
# Not a prefix operator call but a block; `=` is not `kw`
1301-
# +(a=1) ==> (call-pre + (= a 1))
1304+
# +(a=1) ==> (call-pre + (parens (= a 1)))
13021305
# Unary operators have lower precedence than ^
1303-
# +(a)^2 ==> (call-pre + (call-i a ^ 2))
1304-
# .+(a)^2 ==> (dotcall-pre + (call-i a ^ 2))
1305-
# +(a)(x,y)^2 ==> (call-pre + (call-i (call a x y) ^ 2))
1306+
# +(a)^2 ==> (call-pre + (call-i (parens a) ^ 2))
1307+
# .+(a)^2 ==> (dotcall-pre + (call-i (parens a) ^ 2))
1308+
# +(a)(x,y)^2 ==> (call-pre + (call-i (call (parens a) x y) ^ 2))
13061309
parse_call_chain(ps, mark_before_paren)
13071310
parse_factor_with_initial_ex(ps, mark_before_paren)
13081311
if is_type_operator(op_t)
1309-
# <:(a) ==> (<:-pre a)
1312+
# <:(a) ==> (<:-pre (parens a))
13101313
emit(ps, mark, op_k, PREFIX_OP_FLAG)
13111314
reset_node!(ps, op_pos, flags=TRIVIA_FLAG)
13121315
else
@@ -1451,7 +1454,7 @@ function parse_identifier_or_interpolate(ps::ParseState)
14511454
mark = position(ps)
14521455
parse_unary_prefix(ps)
14531456
b = peek_behind(ps)
1454-
# export (x::T) ==> (export (error (::-i x T)))
1457+
# export (x::T) ==> (export (error (parens (::-i x T))))
14551458
# export outer ==> (export outer)
14561459
# export ($f) ==> (export ($ f))
14571460
ok = (b.is_leaf && (b.kind == K"Identifier" || is_operator(b.kind))) ||
@@ -1491,13 +1494,13 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
14911494
k = kind(t)
14921495
if !is_macrocall && ps.space_sensitive && preceding_whitespace(t) &&
14931496
k in KSet"( [ { \" \"\"\" ` ```"
1494-
# [f (x)] ==> (hcat f x)
1497+
# [f (x)] ==> (hcat f (parens x))
14951498
# [f x] ==> (hcat f x)
14961499
break
14971500
elseif is_macrocall && (preceding_whitespace(t) || !(k in KSet"( [ { ' ."))
14981501
# Macro calls with space-separated arguments
14991502
# @foo a b ==> (macrocall @foo a b)
1500-
# @foo (x) ==> (macrocall @foo x)
1503+
# @foo (x) ==> (macrocall @foo (parens x))
15011504
# @foo (x,y) ==> (macrocall @foo (tuple-p x y))
15021505
# [@foo x] ==> (vect (macrocall @foo x))
15031506
# [@foo] ==> (vect (macrocall @foo))
@@ -1537,8 +1540,8 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
15371540
# f(a,b) ==> (call f a b)
15381541
# f(a=1; b=2) ==> (call f (= a 1) (parameters (= b 2)))
15391542
# f(a; b; c) ==> (call f a (parameters b) (parameters c))
1540-
# (a=1)() ==> (call (= a 1))
1541-
# f (a) ==> (call f (error-t) a b)
1543+
# (a=1)() ==> (call (parens (= a 1)))
1544+
# f (a) ==> (call f (error-t) a)
15421545
bump_disallowed_space(ps)
15431546
bump(ps, TRIVIA_FLAG)
15441547
parse_call_arglist(ps, K")")
@@ -1580,7 +1583,8 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
15801583
else
15811584
# a[i] ==> (ref a i)
15821585
# a[i,j] ==> (ref a i j)
1583-
# (a=1)[] ==> (ref (= a 1))
1586+
# (a=1)[] ==> (ref (parens (= a 1)))
1587+
# a[end] ==> (ref a end)
15841588
# T[x y] ==> (typed_hcat T x y)
15851589
# T[x ; y] ==> (typed_vcat T x y)
15861590
# T[a b; c d] ==> (typed_vcat T (row a b) (row c d))
@@ -1905,7 +1909,7 @@ function parse_resword(ps::ParseState)
19051909
else
19061910
# Function/macro definition with no methods
19071911
# function f end ==> (function f)
1908-
# (function f \n end) ==> (function f)
1912+
# (function f \n end) ==> (parens (function f))
19091913
# function f \n\n end ==> (function f)
19101914
# function $f end ==> (function ($ f))
19111915
# macro f end ==> (macro f)
@@ -2007,7 +2011,7 @@ function parse_resword(ps::ParseState)
20072011
# export a, \n @b ==> (export a @b)
20082012
# export +, == ==> (export + ==)
20092013
# export \n a ==> (export a)
2010-
# export \$a, \$(a*b) ==> (export (\$ a) (\$ (call-i a * b)))
2014+
# export \$a, \$(a*b) ==> (export (\$ a) (\$ (parens (call-i a * b))))
20112015
bump(ps, TRIVIA_FLAG)
20122016
parse_comma_separated(ps, parse_atsym)
20132017
emit(ps, mark, K"export")
@@ -2105,10 +2109,10 @@ function parse_function_signature(ps::ParseState, is_function::Bool)
21052109
emit(ps, mark, K"error", error="Invalid macro name")
21062110
else
21072111
# macro f() end ==> (macro (call f) (block))
2108-
# macro (:)(ex) end ==> (macro (call : ex) (block))
2109-
# macro (type)(ex) end ==> (macro (call type ex) (block))
2112+
# macro (:)(ex) end ==> (macro (call (parens :) ex) (block))
2113+
# macro (type)(ex) end ==> (macro (call (parens type) ex) (block))
21102114
# macro $f() end ==> (macro (call ($ f)) (block))
2111-
# macro ($f)() end ==> (macro (call ($ f)) (block))
2115+
# macro ($f)() end ==> (macro (call (parens ($ f))) (block))
21122116
end
21132117
else
21142118
if peek(ps) == K"("
@@ -2145,10 +2149,11 @@ function parse_function_signature(ps::ParseState, is_function::Bool)
21452149
# function ()(x) end ==> (function (call (tuple-p) x) (block))
21462150
emit(ps, mark, K"tuple", PARENS_FLAG)
21472151
else
2148-
# function (A).f() end ==> (function (call (. A (quote f))) (block))
2149-
# function (:)() end ==> (function (call :) (block))
2150-
# function (x::T)() end ==> (function (call (::-i x T)) (block))
2151-
# function (::T)() end ==> (function (call (::-pre T)) (block))
2152+
# function (A).f() end ==> (function (call (. (parens A) (quote f))) (block))
2153+
# function (:)() end ==> (function (call (parens :)) (block))
2154+
# function (x::T)() end ==> (function (call (parens (::-i x T))) (block))
2155+
# function (::T)() end ==> (function (call (parens (::-pre T))) (block))
2156+
emit(ps, mark, K"parens", PARENS_FLAG)
21522157
end
21532158
else
21542159
parse_unary_prefix(ps)
@@ -2163,8 +2168,7 @@ function parse_function_signature(ps::ParseState, is_function::Bool)
21632168
# function type() end ==> (function (call type) (block))
21642169
# function \n f() end ==> (function (call f) (block))
21652170
# function $f() end ==> (function (call ($ f)) (block))
2166-
# function (:)() end ==> (function (call :) (block))
2167-
# function (::Type{T})(x) end ==> (function (call (::-pre (curly Type T)) x) (block))
2171+
# function (::Type{T})(x) end ==> (function (call (parens (::-pre (curly Type T))) x) (block))
21682172
end
21692173
end
21702174
end
@@ -2205,8 +2209,8 @@ function parse_function_signature(ps::ParseState, is_function::Bool)
22052209
# function (f() where T) end ==> (function (where (call f) T) (block))
22062210
# function (f()) where T end ==> (function (where (call f) T) (block))
22072211
# function (f() where T) where U end ==> (function (where (where (call f) T) U) (block))
2208-
# function (f()::S) end ==> (function (::-i (call f) S) (block))
2209-
# function ((f()::S) where T) end ==> (function (where (::-i (call f) S) T) (block))
2212+
# function (f()::S) end ==> (function (parens (::-i (call f) S)) (block))
2213+
# function ((f()::S) where T) end ==> (function (where (parens (::-i (call f) S)) T) (block))
22102214
#
22112215
# TODO: Warn for use of parens? The precedence of `::` and
22122216
# `where` don't work inside parens so this is a bit of a syntax
@@ -2401,7 +2405,7 @@ function parse_atsym(ps::ParseState)
24012405
else
24022406
# export a ==> (export a)
24032407
# export \n a ==> (export a)
2404-
# export $a, $(a*b) ==> (export ($ a) ($ (call * a b)))
2408+
# export $a, $(a*b) ==> (export ($ a) (parens ($ (call * a b))))
24052409
parse_identifier_or_interpolate(ps)
24062410
end
24072411
end
@@ -2706,7 +2710,7 @@ end
27062710
function parse_generator(ps::ParseState, mark, flatten=false)
27072711
t = peek_token(ps)
27082712
if !preceding_whitespace(t)
2709-
# [(x)for x in xs] ==> (comprehension (generator x (error) (= x xs)))
2713+
# [(x)for x in xs] ==> (comprehension (generator (parens x) (error) (= x xs)))
27102714
bump_invisible(ps, K"error", TRIVIA_FLAG,
27112715
error="Expected space before `for` in generator")
27122716
end
@@ -2715,21 +2719,21 @@ function parse_generator(ps::ParseState, mark, flatten=false)
27152719
filter_mark = position(ps)
27162720
parse_comma_separated(ps, parse_iteration_spec)
27172721
if peek(ps) == K"if"
2718-
# (a for x in xs if cond) ==> (generator a (filter (= x xs) cond))
2722+
# (a for x in xs if cond) ==> (parens (generator a (filter (= x xs) cond)))
27192723
bump(ps, TRIVIA_FLAG)
27202724
parse_cond(ps)
27212725
emit(ps, filter_mark, K"filter")
27222726
end
27232727
t = peek_token(ps)
27242728
if kind(t) == K"for"
2725-
# (xy for x in xs for y in ys) ==> (flatten xy (= x xs) (= y ys))
2726-
# (xy for x in xs for y in ys for z in zs) ==> (flatten xy (= x xs) (= y ys) (= z zs))
2729+
# (xy for x in xs for y in ys) ==> (parens (flatten xy (= x xs) (= y ys)))
2730+
# (xy for x in xs for y in ys for z in zs) ==> (parens (flatten xy (= x xs) (= y ys) (= z zs)))
27272731
parse_generator(ps, mark, true)
27282732
if !flatten
27292733
emit(ps, mark, K"flatten")
27302734
end
27312735
elseif !flatten
2732-
# (x for a in as) ==> (generator x (= a as))
2736+
# (x for a in as) ==> (parens (generator x (= a as)))
27332737
emit(ps, mark, K"generator")
27342738
end
27352739
end
@@ -3071,10 +3075,11 @@ function parse_paren(ps::ParseState, check_identifiers=true)
30713075
emit(ps, mark, K"block", PARENS_FLAG)
30723076
else
30733077
# Parentheses used for grouping
3074-
# (a * b) ==> (call-i * a b)
3075-
# (a=1) ==> (= a 1)
3076-
# (x) ==> x
3077-
# (a...) ==> (... a)
3078+
# (a * b) ==> (parens (call-i * a b))
3079+
# (a=1) ==> (parens (= a 1))
3080+
# (x) ==> (parens x)
3081+
# (a...) ==> (parens (... a))
3082+
emit(ps, mark, K"parens")
30783083
end
30793084
end
30803085
end
@@ -3144,8 +3149,8 @@ function parse_brackets(after_parse::Function,
31443149
continue
31453150
elseif k == K"for"
31463151
# Generator syntax
3147-
# (x for a in as) ==> (generator x (= a as))
3148-
# (x \n\n for a in as) ==> (generator x (= a as))
3152+
# (x for a in as) ==> (parens (generator x (= a as)))
3153+
# (x \n\n for a in as) ==> (parens (generator x (= a as)))
31493154
parse_generator(ps, mark)
31503155
else
31513156
# Error - recovery done when consuming closing_kind
@@ -3203,8 +3208,8 @@ function parse_string(ps::ParseState, raw::Bool)
32033208
bump(ps, TRIVIA_FLAG)
32043209
k = peek(ps)
32053210
if k == K"("
3206-
# "a $(x + y) b" ==> (string "a " (call-i x + y) " b")
3207-
# "hi$("ho")" ==> (string "hi" (string "ho"))
3211+
# "a $(x + y) b" ==> (string "a " (parens (call-i x + y)) " b")
3212+
# "hi$("ho")" ==> (string "hi" (parens (string "ho")))
32083213
parse_atom(ps)
32093214
elseif k == K"var"
32103215
# var identifiers disabled in strings
@@ -3346,7 +3351,7 @@ function parse_string(ps::ParseState, raw::Bool)
33463351
end
33473352
# String interpolations
33483353
# "$x$y$z" ==> (string x y z)
3349-
# "$(x)" ==> (string x)
3354+
# "$(x)" ==> (string (parens x))
33503355
# "$x" ==> (string x)
33513356
# """$x""" ==> (string-s x)
33523357
#
@@ -3440,7 +3445,7 @@ function parse_atom(ps::ParseState, check_identifiers=true)
34403445
# Being inside quote makes keywords into identifiers at the
34413446
# first level of nesting
34423447
# :end ==> (quote end)
3443-
# :(end) ==> (quote (error (end)))
3448+
# :(end) ==> (quote (parens (error-t)))
34443449
# Being inside quote makes end non-special again (issue #27690)
34453450
# a[:(end)] ==> (ref a (quote (error-t end)))
34463451
parse_atom(ParseState(ps, end_symbol=false), false)

0 commit comments

Comments
 (0)