Skip to content

Commit 54a4c54

Browse files
committed
Better kind()/flags() API + compactify SyntaxToken flags
Define the combination of head/kind/flags functions as a more formal API - many syntax nodes and token types need these. On top of this we can define various predicates such as `is_trivia` in one place rather than having multiple definitions of these functions.
1 parent 1cbcade commit 54a4c54

File tree

4 files changed

+75
-67
lines changed

4 files changed

+75
-67
lines changed

src/green_tree.jl

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,10 +63,6 @@ children(node::GreenNode) = node.args
6363
span(node::GreenNode) = node.span
6464
head(node::GreenNode) = node.head
6565

66-
# Predicates
67-
is_trivia(node::GreenNode) = is_trivia(node.head)
68-
is_error(node::GreenNode) = is_error(node.head)
69-
7066
Base.summary(node::GreenNode) = summary(node.head)
7167

7268
# Pretty printing

src/parse_stream.jl

Lines changed: 46 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
#-------------------------------------------------------------------------------
22
# Flags hold auxilary information about tokens/nonterminals which the Kind
33
# doesn't capture in a nice way.
4-
const RawFlags = UInt32
4+
#
5+
# TODO: Use `primitive type SyntaxFlags 16 end` rather than an alias?
6+
const RawFlags = UInt16
57
const EMPTY_FLAGS = RawFlags(0)
68
const TRIVIA_FLAG = RawFlags(1<<0)
79
# Some of the following flags are head-specific and could probably be allowed
@@ -17,7 +19,11 @@ const RAW_STRING_FLAG = RawFlags(1<<4)
1719
const TRY_CATCH_AFTER_FINALLY_FLAG = RawFlags(1<<5)
1820
# Flags holding the dimension of an nrow or other UInt8 not held in the source
1921
const NUMERIC_FLAGS = RawFlags(RawFlags(0xff)<<8)
20-
# Todo ERROR_FLAG = 0x80000000 ?
22+
# Todo ERROR_FLAG = 0x8000 ?
23+
24+
## Flags for tokens (may overlap with the flags allocated for syntax above)
25+
const SUFFIXED_FLAG = RawFlags(1<<6)
26+
const PRECEDING_WHITESPACE_FLAG = RawFlags(1<<7)
2127

2228
function set_numeric_flags(n::Integer)
2329
f = RawFlags((n << 8) & NUMERIC_FLAGS)
@@ -31,6 +37,10 @@ function numeric_flags(f::RawFlags)
3137
Int((f >> 8) % UInt8)
3238
end
3339

40+
function remove_flags(n::RawFlags, fs...)
41+
RawFlags(n & ~(RawFlags((|)(fs...))))
42+
end
43+
3444
# Return true if any of `test_flags` are set
3545
has_flags(flags::RawFlags, test_flags) = (flags & test_flags) != 0
3646

@@ -42,13 +52,6 @@ end
4252

4353
kind(head::SyntaxHead) = head.kind
4454
flags(head::SyntaxHead) = head.flags
45-
has_flags(head::SyntaxHead, test_flags) = has_flags(flags(head), test_flags)
46-
47-
is_trivia(head::SyntaxHead) = has_flags(head, TRIVIA_FLAG)
48-
is_infix(head::SyntaxHead) = has_flags(head, INFIX_FLAG)
49-
is_dotted(head::SyntaxHead) = has_flags(head, DOTOP_FLAG)
50-
numeric_flags(head::SyntaxHead) = numeric_flags(flags(head))
51-
is_error(head::SyntaxHead) = kind(head) == K"error"
5255

5356
function Base.summary(head::SyntaxHead)
5457
untokenize(head, unique=false, include_flag_suff=false)
@@ -59,55 +62,74 @@ function untokenize(head::SyntaxHead; unique=true, include_flag_suff=true)
5962
if is_dotted(head)
6063
str = "."*str
6164
end
62-
if include_flag_suff && flags(head) (EMPTY_FLAGS, DOTOP_FLAG)
65+
f = flags(head)
66+
# Ignore some flags:
67+
# - DOTOP_FLAG is represented with . prefix
68+
# - PRECEDING_WHITESPACE_FLAG relates to the environment of this token
69+
f &= ~(DOTOP_FLAG | PRECEDING_WHITESPACE_FLAG)
70+
suffix_flags = remove_flags(flags(head), DOTOP_FLAG, PRECEDING_WHITESPACE_FLAG)
71+
if include_flag_suff && suffix_flags != EMPTY_FLAGS
6372
str = str*"-"
6473
is_trivia(head) && (str = str*"t")
6574
is_infix(head) && (str = str*"i")
6675
has_flags(head, TRIPLE_STRING_FLAG) && (str = str*"s")
6776
has_flags(head, RAW_STRING_FLAG) && (str = str*"r")
6877
has_flags(head, TRY_CATCH_AFTER_FINALLY_FLAG) && (str = str*"f")
78+
is_suffixed(head) && (str = str*"S")
6979
n = numeric_flags(head)
7080
n != 0 && (str = str*string(n))
7181
end
7282
str
7383
end
7484

85+
#-------------------------------------------------------------------------------
86+
# Generic interface for types `T` which have kind and flags:
87+
# 1. Define kind(::T) and flags(::T) directly
88+
# 2. Define head(::T) to return a type like `SyntaxKind` for which `kind` and
89+
# `flags` are defined
90+
kind(x) = kind(head(x))
91+
flags(x) = flags(head(x))
92+
93+
# Predicates based on kind() / flags()
94+
is_error(x) = kind(x) == K"error"
95+
has_flags(x, test_flags) = has_flags(flags(x), test_flags)
96+
is_trivia(x) = has_flags(x, TRIVIA_FLAG)
97+
is_infix(x) = has_flags(x, INFIX_FLAG)
98+
is_dotted(x) = has_flags(x, DOTOP_FLAG)
99+
is_suffixed(x) = has_flags(x, SUFFIXED_FLAG)
100+
preceding_whitespace(x) = has_flags(x, PRECEDING_WHITESPACE_FLAG)
101+
numeric_flags(x) = numeric_flags(flags(x))
102+
75103
#-------------------------------------------------------------------------------
76104
"""
77105
`SyntaxToken` is a token covering a contiguous byte range in the input text.
78106
Information about preceding whitespace is added for use by the parser.
79107
"""
80108
struct SyntaxToken
81-
kind::Kind
109+
head::SyntaxHead
82110
first_byte::UInt32
83111
last_byte::UInt32
84-
# Flags for leading whitespace
85-
is_dotted::Bool
86-
is_suffixed::Bool
87-
had_whitespace::Bool
88112
end
89113

90114
function SyntaxToken(raw::Token, had_whitespace)
91-
SyntaxToken(raw.kind, raw.startbyte + 1, raw.endbyte + 1, raw.dotop, raw.suffix,
92-
had_whitespace)
115+
f = EMPTY_FLAGS
116+
had_whitespace && (f |= PRECEDING_WHITESPACE_FLAG)
117+
raw.dotop && (f |= DOTOP_FLAG)
118+
raw.suffix && (f |= SUFFIXED_FLAG)
119+
SyntaxToken(SyntaxHead(raw.kind, f), raw.startbyte + 1, raw.endbyte + 1)
93120
end
94121

95122
function Base.show(io::IO, tok::SyntaxToken)
96-
range = string(lpad(first_byte(tok), 3), ":", rpad(last_byte(tok), 3))
97-
print(io, rpad(range, 17, " "), rpad(kind(tok), 15, " "))
123+
print(io, untokenize(tok.head, unique=false), " @", first_byte(tok))
98124
end
99125

100-
kind(tok::SyntaxToken) = tok.kind
101-
flags(tok::SyntaxToken) = tok.is_dotted ? DOTOP_FLAG : EMPTY_FLAGS
126+
head(tok::SyntaxToken) = tok.head
102127
first_byte(tok::SyntaxToken) = tok.first_byte
103128
last_byte(tok::SyntaxToken) = tok.last_byte
104129
span(tok::SyntaxToken) = last_byte(tok) - first_byte(tok) + 1
105130

106-
is_dotted(tok::SyntaxToken) = tok.is_dotted
107-
is_suffixed(tok::SyntaxToken) = tok.is_suffixed
108131
is_decorated(tok::SyntaxToken) = is_dotted(tok) || is_suffixed(tok)
109132

110-
Base.:(==)(tok::SyntaxToken, k::Kind) = (kind(tok) == k && !is_decorated(tok))
111133

112134
#-------------------------------------------------------------------------------
113135

src/parser.jl

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ end
184184

185185
# flisp: disallow-space
186186
function bump_disallowed_space(ps)
187-
if peek_token(ps).had_whitespace
187+
if preceding_whitespace(peek_token(ps))
188188
bump_trivia(ps, TRIVIA_FLAG, skip_newlines=false,
189189
error="whitespace is not allowed here")
190190
end
@@ -561,7 +561,7 @@ function parse_assignment_with_initial_ex(ps::ParseState, mark, down, equals_is_
561561
return NO_POSITION
562562
end
563563
if k == K"~"
564-
if ps.space_sensitive && !peek_token(ps, 2).had_whitespace
564+
if ps.space_sensitive && !preceding_whitespace(peek_token(ps, 2))
565565
# Unary ~ in space sensitive context is not assignment precedence
566566
# [a ~b] ==> (hcat a (call ~ b))
567567
return NO_POSITION
@@ -626,21 +626,21 @@ function parse_cond(ps::ParseState)
626626
if kind(t) != K"?"
627627
return
628628
end
629-
if !t.had_whitespace
629+
if !preceding_whitespace(t)
630630
# a? b : c => (if a (error-t) b c)
631631
bump_invisible(ps, K"error", TRIVIA_FLAG,
632632
error="space required before `?` operator")
633633
end
634634
bump(ps, TRIVIA_FLAG) # ?
635635
t = peek_token(ps)
636-
if !t.had_whitespace
636+
if !preceding_whitespace(t)
637637
# a ?b : c
638638
bump_invisible(ps, K"error", TRIVIA_FLAG,
639639
error="space required after `?` operator")
640640
end
641641
parse_eq_star(ParseState(ps, range_colon_enabled=false))
642642
t = peek_token(ps)
643-
if !t.had_whitespace
643+
if !preceding_whitespace(t)
644644
# a ? b: c ==> (if a [ ] [?] [ ] b (error-t) [:] [ ] c)
645645
bump_invisible(ps, K"error", TRIVIA_FLAG,
646646
error="space required before `:` in `?` expression")
@@ -652,7 +652,7 @@ function parse_cond(ps::ParseState)
652652
bump_invisible(ps, K"error", TRIVIA_FLAG, error="`:` expected in `?` expression")
653653
end
654654
t = peek_token(ps)
655-
if !t.had_whitespace
655+
if !preceding_whitespace(t)
656656
# a ? b :c ==> (if a [ ] [?] [ ] b [ ] [:] (error-t) c)
657657
bump_invisible(ps, K"error", TRIVIA_FLAG,
658658
error="space required after `:` in `?` expression")
@@ -799,15 +799,15 @@ function parse_range(ps::ParseState)
799799
n_colons = 0
800800
while peek(ps) == K":"
801801
if ps.space_sensitive &&
802-
peek_token(ps).had_whitespace &&
803-
!peek_token(ps, 2).had_whitespace
802+
preceding_whitespace(peek_token(ps)) &&
803+
!preceding_whitespace(peek_token(ps, 2))
804804
# Tricky cases in space sensitive mode
805805
# [1 :a] ==> (hcat 1 (quote a))
806806
# [1 2:3 :a] ==> (hcat 1 (call-i 2 : 3) (quote a))
807807
break
808808
end
809809
t2 = peek_token(ps,2)
810-
if kind(t2) in KSet`< >` && !t2.had_whitespace
810+
if kind(t2) in KSet`< >` && !preceding_whitespace(t2)
811811
# Error heuristic: we found `:>` or `:<` which are invalid lookalikes
812812
# for `<:` and `>:`. Attempt to recover by treating them as a
813813
# comparison operator.
@@ -887,9 +887,9 @@ function parse_with_chains(ps::ParseState, down, is_op, chain_ops)
887887
mark = position(ps)
888888
down(ps)
889889
while (t = peek_token(ps); is_op(kind(t)))
890-
if ps.space_sensitive && t.had_whitespace &&
890+
if ps.space_sensitive && preceding_whitespace(t) &&
891891
is_both_unary_and_binary(t) &&
892-
!peek_token(ps, 2).had_whitespace
892+
!preceding_whitespace(peek_token(ps, 2))
893893
# The following is two elements of a hcat
894894
# [x +y] ==> (hcat x (call + y))
895895
# [x+y +z] ==> (hcat (call-i x + y) (call + z))
@@ -917,9 +917,9 @@ end
917917
# flisp: parse-chain
918918
function parse_chain(ps::ParseState, down, op_kind)
919919
while (t = peek_token(ps); kind(t) == op_kind && !is_decorated(t))
920-
if ps.space_sensitive && t.had_whitespace &&
920+
if ps.space_sensitive && preceding_whitespace(t) &&
921921
is_both_unary_and_binary(t) &&
922-
!peek_token(ps, 2).had_whitespace
922+
!preceding_whitespace(peek_token(ps, 2))
923923
# [x +y] ==> (hcat x (call + y))
924924
break
925925
end
@@ -1024,7 +1024,7 @@ function is_juxtapose(ps, prev_k, t)
10241024
# x' y ==> x
10251025
# x 'y ==> x
10261026

1027-
return !t.had_whitespace &&
1027+
return !preceding_whitespace(t) &&
10281028
(is_number(prev_k) ||
10291029
(!is_number(k) && # disallow "x.3" and "sqrt(2)2"
10301030
k != K"@" && # disallow "x@time"
@@ -1098,7 +1098,7 @@ function parse_unary(ps::ParseState)
10981098
end
10991099
if k in KSet`- +`
11001100
t2 = peek_token(ps, 2)
1101-
if !t2.had_whitespace && kind(t2) in KSet`Integer Float`
1101+
if !preceding_whitespace(t2) && kind(t2) in KSet`Integer Float`
11021102
k3 = peek(ps, 3)
11031103
if is_prec_power(k3) || k3 in KSet`[ {`
11041104
# `[`, `{` (issue #18851) and `^` have higher precedence than
@@ -1190,7 +1190,7 @@ function parse_unary_call(ps::ParseState)
11901190
# The precedence between unary + and any following infix ^ depends on
11911191
# whether the parens are a function call or not
11921192
if is_call
1193-
if t2.had_whitespace
1193+
if preceding_whitespace(t2)
11941194
# Whitespace not allowed before prefix function call bracket
11951195
# + (a,b) ==> (call + (error) a b)
11961196
reset_node!(ps, ws_error_pos, kind=K"error")
@@ -1392,7 +1392,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
13921392
this_iter_valid_macroname = false
13931393
t = peek_token(ps)
13941394
k = kind(t)
1395-
if is_macrocall && (t.had_whitespace || is_closing_token(ps, k))
1395+
if is_macrocall && (preceding_whitespace(t) || is_closing_token(ps, k))
13961396
# Macro calls with space-separated arguments
13971397
# @foo a b ==> (macrocall @foo a b)
13981398
# @foo (x) ==> (macrocall @foo x)
@@ -1427,7 +1427,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
14271427
emit(ps, mark, K"macrocall")
14281428
end
14291429
break
1430-
elseif (ps.space_sensitive && t.had_whitespace &&
1430+
elseif (ps.space_sensitive && preceding_whitespace(t) &&
14311431
k in KSet`( [ { \ Char " """ \` \`\`\``)
14321432
# [f (x)] ==> (hcat f x)
14331433
# [f "x"] ==> (hcat f "x")
@@ -1605,7 +1605,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
16051605
emit(ps, mark, K"curly")
16061606
end
16071607
elseif k in KSet` " """ \` \`\`\` ` &&
1608-
!t.had_whitespace && valid_macroname
1608+
!preceding_whitespace(t) && valid_macroname
16091609
# Custom string and command literals
16101610
# x"str" ==> (macrocall @x_str "str")
16111611
# x`str` ==> (macrocall @x_cmd "str")
@@ -1623,7 +1623,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false)
16231623
parse_string(ps, true)
16241624
t = peek_token(ps)
16251625
k = kind(t)
1626-
if !t.had_whitespace && (k == K"Identifier" || is_keyword(k) || is_word_operator(k) || is_number(k))
1626+
if !preceding_whitespace(t) && (k == K"Identifier" || is_keyword(k) || is_word_operator(k) || is_number(k))
16271627
# Macro sufficies can include keywords and numbers
16281628
# x"s"y ==> (macrocall @x_str "s" "y")
16291629
# x"s"end ==> (macrocall @x_str "s" "end")
@@ -2248,7 +2248,7 @@ function parse_imports(ps::ParseState)
22482248
k = kind(t)
22492249
has_import_prefix = false # true if we have `prefix:` in `import prefix: stuff`
22502250
has_comma = false
2251-
if k == K":" && !t.had_whitespace
2251+
if k == K":" && !preceding_whitespace(t)
22522252
bump(ps, TRIVIA_FLAG)
22532253
has_import_prefix = true
22542254
if initial_as
@@ -2368,7 +2368,7 @@ function parse_import_path(ps::ParseState)
23682368
# path, not operators
23692369
# import A.== ==> (import (. A ==))
23702370
# import A.⋆.f ==> (import (. A ⋆ f))
2371-
if t.had_whitespace
2371+
if preceding_whitespace(t)
23722372
# Whitespace in import path allowed but discouraged
23732373
# import A .== ==> (import (. A ==))
23742374
emit_diagnostic(ps, whitespace=true,
@@ -2537,7 +2537,7 @@ end
25372537
# flisp: parse-generator
25382538
function parse_generator(ps::ParseState, mark, flatten=false)
25392539
t = peek_token(ps)
2540-
if !t.had_whitespace
2540+
if !preceding_whitespace(t)
25412541
# [(x)for x in xs] ==> (comprehension (generator x (error) (= x xs)))
25422542
bump_invisible(ps, K"error", TRIVIA_FLAG,
25432543
error="Expected space before `for` in generator")
@@ -2707,7 +2707,7 @@ function parse_array_separator(ps, array_order)
27072707
if kind(t) != K";"
27082708
break
27092709
end
2710-
if t.had_whitespace
2710+
if preceding_whitespace(t)
27112711
bump_disallowed_space(ps)
27122712
end
27132713
n_semis += 1
@@ -2751,7 +2751,7 @@ function parse_array_separator(ps, array_order)
27512751
bump(ps, TRIVIA_FLAG, error="unexpected comma in array expression")
27522752
return (1, -1)
27532753
else
2754-
if t.had_whitespace && !is_closing_token(ps, k)
2754+
if preceding_whitespace(t) && !is_closing_token(ps, k)
27552755
if array_order[] === :column_major
27562756
# Can't mix multiple ;'s and spaces
27572757
#v1.7: [a ;; b c] ==> (ncat-2 a (row b (error-t) c))
@@ -3252,15 +3252,15 @@ function parse_atom(ps::ParseState, check_identifiers=true)
32523252
# : foo ==> (quote (error-t) foo)
32533253
t = peek_token(ps, 2)
32543254
k = kind(t)
3255-
if is_closing_token(ps, k) && (!is_keyword(k) || t.had_whitespace)
3255+
if is_closing_token(ps, k) && (!is_keyword(k) || preceding_whitespace(t))
32563256
# : is a literal colon in some circumstances
32573257
# :) ==> :
32583258
# : end ==> :
32593259
bump(ps) # K":"
32603260
return
32613261
end
32623262
bump(ps, TRIVIA_FLAG) # K":"
3263-
if t.had_whitespace
3263+
if preceding_whitespace(t)
32643264
# : a ==> (quote (error-t) a))
32653265
# ===
32663266
# :
@@ -3306,7 +3306,7 @@ function parse_atom(ps::ParseState, check_identifiers=true)
33063306
end
33073307
elseif is_keyword(leading_kind)
33083308
if leading_kind == K"var" && (t = peek_token(ps,2);
3309-
kind(t) == K"\"" && !t.had_whitespace)
3309+
kind(t) == K"\"" && !preceding_whitespace(t))
33103310
# var"x" ==> x
33113311
# Raw mode unescaping
33123312
# var"" ==>
@@ -3333,7 +3333,7 @@ function parse_atom(ps::ParseState, check_identifiers=true)
33333333
end
33343334
t = peek_token(ps)
33353335
k = kind(t)
3336-
if t.had_whitespace || is_operator(k) ||
3336+
if preceding_whitespace(t) || is_operator(k) ||
33373337
k in KSet`( ) [ ] { } , ; @ EndMarker`
33383338
# var"x"+ ==> x
33393339
# var"x") ==> x

0 commit comments

Comments
 (0)