@@ -25,12 +25,7 @@ const TRIPLE_STRING_FLAG = RawFlags(1<<5)
25
25
# Set when a string or identifier needs "raw string" unescaping
26
26
const RAW_STRING_FLAG = RawFlags (1 << 6 )
27
27
28
- # TODO ?
29
- # const ERROR_FLAG = RawFlags(1<<7)
30
-
31
- # Token-only flag
32
- # Record whether a token had preceding whitespace
33
- const PRECEDING_WHITESPACE_FLAG = RawFlags (1 << 7 )
28
+ const PARENS_FLAG = RawFlags (1 << 7 )
34
29
35
30
# Flags holding the dimension of an nrow or other UInt8 not held in the source
36
31
const NUMERIC_FLAGS = RawFlags (RawFlags (0xff )<< 8 )
@@ -77,21 +72,18 @@ function untokenize(head::SyntaxHead; unique=true, include_flag_suff=true)
77
72
if is_dotted (head)
78
73
str = " ." * str
79
74
end
80
- # Ignore some flags:
81
- # DOTOP_FLAG is represented above with . prefix
82
- # PRECEDING_WHITESPACE_FLAG relates to the environment of this token
83
- suffix_flags = remove_flags (flags (head), DOTOP_FLAG, PRECEDING_WHITESPACE_FLAG)
84
- if include_flag_suff && suffix_flags != EMPTY_FLAGS
85
- str = str* " -"
86
- is_trivia (head) && (str = str* " t" )
87
- is_infix_op_call (head) && (str = str* " i" )
88
- is_prefix_op_call (head) && (str = str* " pre" )
89
- is_postfix_op_call (head) && (str = str* " post" )
90
- has_flags (head, TRIPLE_STRING_FLAG) && (str = str* " s" )
91
- has_flags (head, RAW_STRING_FLAG) && (str = str* " r" )
92
- is_suffixed (head) && (str = str* " S" )
75
+ if include_flag_suff
76
+ # Ignore DOTOP_FLAG - it's represented above with . prefix
77
+ is_trivia (head) && (str = str* " -t" )
78
+ is_infix_op_call (head) && (str = str* " -i" )
79
+ is_prefix_op_call (head) && (str = str* " -pre" )
80
+ is_postfix_op_call (head) && (str = str* " -post" )
81
+ has_flags (head, TRIPLE_STRING_FLAG) && (str = str* " -s" )
82
+ has_flags (head, RAW_STRING_FLAG) && (str = str* " -r" )
83
+ has_flags (head, PARENS_FLAG) && (str = str* " -p" )
84
+ is_suffixed (head) && (str = str* " -S" )
93
85
n = numeric_flags (head)
94
- n != 0 && (str = str* string (n))
86
+ n != 0 && (str = str* " - " * string (n))
95
87
end
96
88
str
97
89
end
@@ -116,7 +108,6 @@ is_postfix_op_call(x) = call_type_flags(x) == POSTFIX_OP_FLAG
116
108
is_dotted (x) = has_flags (x, DOTOP_FLAG)
117
109
is_suffixed (x) = has_flags (x, SUFFIXED_FLAG)
118
110
is_decorated (x) = is_dotted (x) || is_suffixed (x)
119
- preceding_whitespace (x) = has_flags (x, PRECEDING_WHITESPACE_FLAG)
120
111
numeric_flags (x) = numeric_flags (flags (x))
121
112
122
113
# -------------------------------------------------------------------------------
@@ -131,18 +122,17 @@ token to be used for recording the first byte of the first real token.
131
122
struct SyntaxToken
132
123
head:: SyntaxHead
133
124
orig_kind:: Kind
125
+ preceding_whitespace:: Bool
134
126
next_byte:: UInt32
135
127
end
136
128
137
- function SyntaxToken (head:: SyntaxHead , next_byte:: Integer )
138
- SyntaxToken (head, kind (head), next_byte)
139
- end
140
-
141
129
function Base. show (io:: IO , tok:: SyntaxToken )
142
130
print (io, rpad (untokenize (tok. head, unique= false ), 15 ), " |" , tok. next_byte)
143
131
end
144
132
145
133
head (tok:: SyntaxToken ) = tok. head
134
+ flags (tok:: SyntaxToken ) = remove_flags (flags (tok. head), NUMERIC_FLAGS)
135
+ preceding_whitespace (tok:: SyntaxToken ) = tok. preceding_whitespace
146
136
147
137
148
138
# -------------------------------------------------------------------------------
@@ -240,7 +230,7 @@ mutable struct ParseStream
240
230
ver = (version. major, version. minor)
241
231
# Initial sentinel token containing the first byte of the first real token.
242
232
sentinel = SyntaxToken (SyntaxHead (K " TOMBSTONE" , EMPTY_FLAGS),
243
- K " TOMBSTONE" , next_byte)
233
+ K " TOMBSTONE" , false , next_byte)
244
234
new (text_buf,
245
235
text_root,
246
236
lexer,
@@ -353,10 +343,10 @@ function _buffer_lookahead_tokens(lexer, lookahead)
353
343
was_whitespace = k in (K " Whitespace" , K " Comment" , K " NewlineWs" )
354
344
had_whitespace |= was_whitespace
355
345
f = EMPTY_FLAGS
356
- had_whitespace && (f |= PRECEDING_WHITESPACE_FLAG)
357
346
raw. dotop && (f |= DOTOP_FLAG)
358
347
raw. suffix && (f |= SUFFIXED_FLAG)
359
- push! (lookahead, SyntaxToken (SyntaxHead (k, f), raw. endbyte + 2 ))
348
+ push! (lookahead, SyntaxToken (SyntaxHead (k, f), k,
349
+ had_whitespace, raw. endbyte + 2 ))
360
350
token_count += 1
361
351
if k == K " EndMarker"
362
352
break
@@ -471,7 +461,7 @@ function peek_token(stream::ParseStream, n::Integer=1;
471
461
if ! skip_whitespace
472
462
i = stream. lookahead_index
473
463
end
474
- return @inbounds head ( stream. lookahead[i])
464
+ return @inbounds stream. lookahead[i]
475
465
end
476
466
477
467
@@ -613,12 +603,13 @@ function _bump_until_n(stream::ParseStream, n::Integer, flags, remap_kind=K"None
613
603
if k == K " EndMarker"
614
604
break
615
605
end
616
- f = flags | remove_flags (( @__MODULE__ ). flags (tok), PRECEDING_WHITESPACE_FLAG )
606
+ f = flags | ( @__MODULE__ ). flags (tok)
617
607
is_trivia = k ∈ (K " Whitespace" , K " Comment" , K " NewlineWs" )
618
608
is_trivia && (f |= TRIVIA_FLAG)
619
609
outk = (is_trivia || remap_kind == K " None" ) ? k : remap_kind
620
610
h = SyntaxHead (outk, f)
621
- push! (stream. tokens, SyntaxToken (h, kind (tok), tok. next_byte))
611
+ push! (stream. tokens,
612
+ SyntaxToken (h, kind (tok), tok. preceding_whitespace, tok. next_byte))
622
613
end
623
614
stream. lookahead_index = n + 1
624
615
# Defuse the time bomb
@@ -675,7 +666,7 @@ function bump_invisible(stream::ParseStream, kind, flags=EMPTY_FLAGS;
675
666
error= nothing )
676
667
b = _next_byte (stream)
677
668
h = SyntaxHead (kind, flags)
678
- push! (stream. tokens, SyntaxToken (h, b))
669
+ push! (stream. tokens, SyntaxToken (h, ( @__MODULE__ ) . kind (h), false , b))
679
670
if ! isnothing (error)
680
671
emit_diagnostic (stream, b, b- 1 , error= error)
681
672
end
@@ -693,7 +684,8 @@ whitespace if necessary with bump_trivia.
693
684
function bump_glue (stream:: ParseStream , kind, flags, num_tokens)
694
685
i = stream. lookahead_index
695
686
h = SyntaxHead (kind, flags)
696
- push! (stream. tokens, SyntaxToken (h, stream. lookahead[i+ 1 ]. next_byte))
687
+ push! (stream. tokens, SyntaxToken (h, kind, false ,
688
+ stream. lookahead[i+ 1 ]. next_byte))
697
689
stream. lookahead_index += num_tokens
698
690
stream. peek_count = 0
699
691
return position (stream)
@@ -724,7 +716,7 @@ function bump_split(stream::ParseStream, split_spec...)
724
716
for (i, (nbyte, k, f)) in enumerate (split_spec)
725
717
h = SyntaxHead (k, f)
726
718
b = (i == length (split_spec)) ? tok. next_byte : b + nbyte
727
- push! (stream. tokens, SyntaxToken (h, kind (tok), b))
719
+ push! (stream. tokens, SyntaxToken (h, kind (tok), false , b))
728
720
end
729
721
stream. peek_count = 0
730
722
return position (stream)
@@ -747,12 +739,14 @@ function reset_node!(stream::ParseStream, pos::ParseStreamPosition;
747
739
kind= nothing , flags= nothing )
748
740
if token_is_last (stream, pos)
749
741
t = stream. tokens[pos. token_index]
750
- stream. tokens[pos. token_index] = SyntaxToken (_reset_node_head (t, kind, flags),
751
- t. orig_kind, t. next_byte)
742
+ stream. tokens[pos. token_index] =
743
+ SyntaxToken (_reset_node_head (t, kind, flags),
744
+ t. orig_kind, t. preceding_whitespace, t. next_byte)
752
745
else
753
746
r = stream. ranges[pos. range_index]
754
- stream. ranges[pos. range_index] = TaggedRange (_reset_node_head (r, kind, flags),
755
- r. first_token, r. last_token)
747
+ stream. ranges[pos. range_index] =
748
+ TaggedRange (_reset_node_head (r, kind, flags),
749
+ r. first_token, r. last_token)
756
750
end
757
751
end
758
752
@@ -770,11 +764,13 @@ function steal_token_bytes!(stream::ParseStream, pos::ParseStreamPosition, numby
770
764
t2 = stream. tokens[i+ 1 ]
771
765
772
766
t1_next_byte = t1. next_byte + numbytes
773
- stream. tokens[i] = SyntaxToken (t1. head, t1. orig_kind, t1_next_byte)
767
+ stream. tokens[i] = SyntaxToken (t1. head, t1. orig_kind,
768
+ t1. preceding_whitespace, t1_next_byte)
774
769
775
770
t2_is_empty = t1_next_byte == t2. next_byte
776
771
head2 = t2_is_empty ? SyntaxHead (K " TOMBSTONE" , EMPTY_FLAGS) : t2. head
777
- stream. tokens[i+ 1 ] = SyntaxToken (head2, t2. orig_kind, t2. next_byte)
772
+ stream. tokens[i+ 1 ] = SyntaxToken (head2, t2. orig_kind,
773
+ t2. preceding_whitespace, t2. next_byte)
778
774
return t2_is_empty
779
775
end
780
776
@@ -920,7 +916,8 @@ function validate_tokens(stream::ParseStream)
920
916
end
921
917
if error_kind != K " None"
922
918
toks[i] = SyntaxToken (SyntaxHead (error_kind, EMPTY_FLAGS),
923
- t. orig_kind, t. next_byte)
919
+ t. orig_kind, t. preceding_whitespace,
920
+ t. next_byte)
924
921
end
925
922
end
926
923
sort! (stream. diagnostics, by= first_byte)
@@ -1052,6 +1049,7 @@ function Base.empty!(stream::ParseStream)
1052
1049
empty! (stream. tokens)
1053
1050
# Restore sentinel token
1054
1051
push! (stream. tokens, SyntaxToken (SyntaxHead (K " TOMBSTONE" ,EMPTY_FLAGS),
1055
- K " TOMBSTONE" , t. next_byte))
1052
+ K " TOMBSTONE" , t. preceding_whitespace,
1053
+ t. next_byte))
1056
1054
empty! (stream. ranges)
1057
1055
end
0 commit comments