Skip to content

Commit e4f08c4

Browse files
authored
Add PARENS_FLAG to tuple, block and macrocall (#218)
Several syntactic constructs can occur either with or without parentheses and it can be useful to distinguish between these without looking at the syntax trivia - particularly for code formatting, but also for other reasons. * Macro calls: `@x(a,b)` vs `@x a b` * Blocks: `(a; b)` vs `begin a ; b end` * Tuples: `a,b` vs `(a,b)` (for example, see #194) Also modify the printing of head flags so that each flag is clearly distinguished from every other flag with a `-`.
1 parent 917e87f commit e4f08c4

File tree

4 files changed

+122
-116
lines changed

4 files changed

+122
-116
lines changed

src/expr.jl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,9 @@ function _to_expr(node::SyntaxNode; iteration_spec=false, need_linenodes=true,
146146
_to_expr(n, eq_to_kw=eq_to_kw,
147147
map_kw_in_params=in_vcbr)
148148
end
149+
if nodekind == K"block" && has_flags(node, PARENS_FLAG)
150+
popfirst!(args)
151+
end
149152
end
150153
end
151154

src/parse_stream.jl

Lines changed: 40 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,7 @@ const TRIPLE_STRING_FLAG = RawFlags(1<<5)
2525
# Set when a string or identifier needs "raw string" unescaping
2626
const RAW_STRING_FLAG = RawFlags(1<<6)
2727

28-
# TODO?
29-
# const ERROR_FLAG = RawFlags(1<<7)
30-
31-
# Token-only flag
32-
# Record whether a token had preceding whitespace
33-
const PRECEDING_WHITESPACE_FLAG = RawFlags(1<<7)
28+
const PARENS_FLAG = RawFlags(1<<7)
3429

3530
# Flags holding the dimension of an nrow or other UInt8 not held in the source
3631
const NUMERIC_FLAGS = RawFlags(RawFlags(0xff)<<8)
@@ -77,21 +72,18 @@ function untokenize(head::SyntaxHead; unique=true, include_flag_suff=true)
7772
if is_dotted(head)
7873
str = "."*str
7974
end
80-
# Ignore some flags:
81-
# DOTOP_FLAG is represented above with . prefix
82-
# PRECEDING_WHITESPACE_FLAG relates to the environment of this token
83-
suffix_flags = remove_flags(flags(head), DOTOP_FLAG, PRECEDING_WHITESPACE_FLAG)
84-
if include_flag_suff && suffix_flags != EMPTY_FLAGS
85-
str = str*"-"
86-
is_trivia(head) && (str = str*"t")
87-
is_infix_op_call(head) && (str = str*"i")
88-
is_prefix_op_call(head) && (str = str*"pre")
89-
is_postfix_op_call(head) && (str = str*"post")
90-
has_flags(head, TRIPLE_STRING_FLAG) && (str = str*"s")
91-
has_flags(head, RAW_STRING_FLAG) && (str = str*"r")
92-
is_suffixed(head) && (str = str*"S")
75+
if include_flag_suff
76+
# Ignore DOTOP_FLAG - it's represented above with . prefix
77+
is_trivia(head) && (str = str*"-t")
78+
is_infix_op_call(head) && (str = str*"-i")
79+
is_prefix_op_call(head) && (str = str*"-pre")
80+
is_postfix_op_call(head) && (str = str*"-post")
81+
has_flags(head, TRIPLE_STRING_FLAG) && (str = str*"-s")
82+
has_flags(head, RAW_STRING_FLAG) && (str = str*"-r")
83+
has_flags(head, PARENS_FLAG) && (str = str*"-p")
84+
is_suffixed(head) && (str = str*"-S")
9385
n = numeric_flags(head)
94-
n != 0 && (str = str*string(n))
86+
n != 0 && (str = str*"-"*string(n))
9587
end
9688
str
9789
end
@@ -116,7 +108,6 @@ is_postfix_op_call(x) = call_type_flags(x) == POSTFIX_OP_FLAG
116108
is_dotted(x) = has_flags(x, DOTOP_FLAG)
117109
is_suffixed(x) = has_flags(x, SUFFIXED_FLAG)
118110
is_decorated(x) = is_dotted(x) || is_suffixed(x)
119-
preceding_whitespace(x) = has_flags(x, PRECEDING_WHITESPACE_FLAG)
120111
numeric_flags(x) = numeric_flags(flags(x))
121112

122113
#-------------------------------------------------------------------------------
@@ -131,18 +122,17 @@ token to be used for recording the first byte of the first real token.
131122
struct SyntaxToken
132123
head::SyntaxHead
133124
orig_kind::Kind
125+
preceding_whitespace::Bool
134126
next_byte::UInt32
135127
end
136128

137-
function SyntaxToken(head::SyntaxHead, next_byte::Integer)
138-
SyntaxToken(head, kind(head), next_byte)
139-
end
140-
141129
function Base.show(io::IO, tok::SyntaxToken)
142130
print(io, rpad(untokenize(tok.head, unique=false), 15), " |", tok.next_byte)
143131
end
144132

145133
head(tok::SyntaxToken) = tok.head
134+
flags(tok::SyntaxToken) = remove_flags(flags(tok.head), NUMERIC_FLAGS)
135+
preceding_whitespace(tok::SyntaxToken) = tok.preceding_whitespace
146136

147137

148138
#-------------------------------------------------------------------------------
@@ -240,7 +230,7 @@ mutable struct ParseStream
240230
ver = (version.major, version.minor)
241231
# Initial sentinel token containing the first byte of the first real token.
242232
sentinel = SyntaxToken(SyntaxHead(K"TOMBSTONE", EMPTY_FLAGS),
243-
K"TOMBSTONE", next_byte)
233+
K"TOMBSTONE", false, next_byte)
244234
new(text_buf,
245235
text_root,
246236
lexer,
@@ -353,10 +343,10 @@ function _buffer_lookahead_tokens(lexer, lookahead)
353343
was_whitespace = k in (K"Whitespace", K"Comment", K"NewlineWs")
354344
had_whitespace |= was_whitespace
355345
f = EMPTY_FLAGS
356-
had_whitespace && (f |= PRECEDING_WHITESPACE_FLAG)
357346
raw.dotop && (f |= DOTOP_FLAG)
358347
raw.suffix && (f |= SUFFIXED_FLAG)
359-
push!(lookahead, SyntaxToken(SyntaxHead(k, f), raw.endbyte + 2))
348+
push!(lookahead, SyntaxToken(SyntaxHead(k, f), k,
349+
had_whitespace, raw.endbyte + 2))
360350
token_count += 1
361351
if k == K"EndMarker"
362352
break
@@ -471,7 +461,7 @@ function peek_token(stream::ParseStream, n::Integer=1;
471461
if !skip_whitespace
472462
i = stream.lookahead_index
473463
end
474-
return @inbounds head(stream.lookahead[i])
464+
return @inbounds stream.lookahead[i]
475465
end
476466

477467

@@ -613,12 +603,13 @@ function _bump_until_n(stream::ParseStream, n::Integer, flags, remap_kind=K"None
613603
if k == K"EndMarker"
614604
break
615605
end
616-
f = flags | remove_flags((@__MODULE__).flags(tok), PRECEDING_WHITESPACE_FLAG)
606+
f = flags | (@__MODULE__).flags(tok)
617607
is_trivia = k (K"Whitespace", K"Comment", K"NewlineWs")
618608
is_trivia && (f |= TRIVIA_FLAG)
619609
outk = (is_trivia || remap_kind == K"None") ? k : remap_kind
620610
h = SyntaxHead(outk, f)
621-
push!(stream.tokens, SyntaxToken(h, kind(tok), tok.next_byte))
611+
push!(stream.tokens,
612+
SyntaxToken(h, kind(tok), tok.preceding_whitespace, tok.next_byte))
622613
end
623614
stream.lookahead_index = n + 1
624615
# Defuse the time bomb
@@ -675,7 +666,7 @@ function bump_invisible(stream::ParseStream, kind, flags=EMPTY_FLAGS;
675666
error=nothing)
676667
b = _next_byte(stream)
677668
h = SyntaxHead(kind, flags)
678-
push!(stream.tokens, SyntaxToken(h, b))
669+
push!(stream.tokens, SyntaxToken(h, (@__MODULE__).kind(h), false, b))
679670
if !isnothing(error)
680671
emit_diagnostic(stream, b, b-1, error=error)
681672
end
@@ -693,7 +684,8 @@ whitespace if necessary with bump_trivia.
693684
function bump_glue(stream::ParseStream, kind, flags, num_tokens)
694685
i = stream.lookahead_index
695686
h = SyntaxHead(kind, flags)
696-
push!(stream.tokens, SyntaxToken(h, stream.lookahead[i+1].next_byte))
687+
push!(stream.tokens, SyntaxToken(h, kind, false,
688+
stream.lookahead[i+1].next_byte))
697689
stream.lookahead_index += num_tokens
698690
stream.peek_count = 0
699691
return position(stream)
@@ -724,7 +716,7 @@ function bump_split(stream::ParseStream, split_spec...)
724716
for (i, (nbyte, k, f)) in enumerate(split_spec)
725717
h = SyntaxHead(k, f)
726718
b = (i == length(split_spec)) ? tok.next_byte : b + nbyte
727-
push!(stream.tokens, SyntaxToken(h, kind(tok), b))
719+
push!(stream.tokens, SyntaxToken(h, kind(tok), false, b))
728720
end
729721
stream.peek_count = 0
730722
return position(stream)
@@ -747,12 +739,14 @@ function reset_node!(stream::ParseStream, pos::ParseStreamPosition;
747739
kind=nothing, flags=nothing)
748740
if token_is_last(stream, pos)
749741
t = stream.tokens[pos.token_index]
750-
stream.tokens[pos.token_index] = SyntaxToken(_reset_node_head(t, kind, flags),
751-
t.orig_kind, t.next_byte)
742+
stream.tokens[pos.token_index] =
743+
SyntaxToken(_reset_node_head(t, kind, flags),
744+
t.orig_kind, t.preceding_whitespace, t.next_byte)
752745
else
753746
r = stream.ranges[pos.range_index]
754-
stream.ranges[pos.range_index] = TaggedRange(_reset_node_head(r, kind, flags),
755-
r.first_token, r.last_token)
747+
stream.ranges[pos.range_index] =
748+
TaggedRange(_reset_node_head(r, kind, flags),
749+
r.first_token, r.last_token)
756750
end
757751
end
758752

@@ -770,11 +764,13 @@ function steal_token_bytes!(stream::ParseStream, pos::ParseStreamPosition, numby
770764
t2 = stream.tokens[i+1]
771765

772766
t1_next_byte = t1.next_byte + numbytes
773-
stream.tokens[i] = SyntaxToken(t1.head, t1.orig_kind, t1_next_byte)
767+
stream.tokens[i] = SyntaxToken(t1.head, t1.orig_kind,
768+
t1.preceding_whitespace, t1_next_byte)
774769

775770
t2_is_empty = t1_next_byte == t2.next_byte
776771
head2 = t2_is_empty ? SyntaxHead(K"TOMBSTONE", EMPTY_FLAGS) : t2.head
777-
stream.tokens[i+1] = SyntaxToken(head2, t2.orig_kind, t2.next_byte)
772+
stream.tokens[i+1] = SyntaxToken(head2, t2.orig_kind,
773+
t2.preceding_whitespace, t2.next_byte)
778774
return t2_is_empty
779775
end
780776

@@ -920,7 +916,8 @@ function validate_tokens(stream::ParseStream)
920916
end
921917
if error_kind != K"None"
922918
toks[i] = SyntaxToken(SyntaxHead(error_kind, EMPTY_FLAGS),
923-
t.orig_kind, t.next_byte)
919+
t.orig_kind, t.preceding_whitespace,
920+
t.next_byte)
924921
end
925922
end
926923
sort!(stream.diagnostics, by=first_byte)
@@ -1052,6 +1049,7 @@ function Base.empty!(stream::ParseStream)
10521049
empty!(stream.tokens)
10531050
# Restore sentinel token
10541051
push!(stream.tokens, SyntaxToken(SyntaxHead(K"TOMBSTONE",EMPTY_FLAGS),
1055-
K"TOMBSTONE", t.next_byte))
1052+
K"TOMBSTONE", t.preceding_whitespace,
1053+
t.next_byte))
10561054
empty!(stream.ranges)
10571055
end

0 commit comments

Comments
 (0)