@@ -9,93 +9,11 @@ const EMPTY_FLAGS = RawFlags(0)
99# Set for tokens or ranges which are syntax trivia after parsing
1010const TRIVIA_FLAG = RawFlags (1 << 0 )
1111
12- # Token flags - may be set for operator kinded tokens
13- # Operator is dotted
14- const DOTOP_FLAG = RawFlags (1 << 1 )
15- # Operator has a suffix
16- const SUFFIXED_FLAG = RawFlags (1 << 2 )
17-
18- # Set for K"call", K"dotcall" or any syntactic operator heads
19- # Distinguish various syntaxes which are mapped to K"call"
20- const PREFIX_CALL_FLAG = RawFlags (0 << 3 )
21- const INFIX_FLAG = RawFlags (1 << 3 )
22- const PREFIX_OP_FLAG = RawFlags (2 << 3 )
23- const POSTFIX_OP_FLAG = RawFlags (3 << 3 )
24-
25- # The following flags are quite head-specific and may overlap
26-
27- """
28- Set when K"string" or K"cmdstring" was triple-delimited as with \"\"\" or ```
29- """
30- const TRIPLE_STRING_FLAG = RawFlags (1 << 5 )
31-
32- """
33- Set when a K"string", K"cmdstring" or K"Identifier" needs raw string unescaping
34- """
35- const RAW_STRING_FLAG = RawFlags (1 << 6 )
36-
37- """
38- Set for K"tuple", K"block" or K"macrocall" which are delimited by parentheses
39- """
40- const PARENS_FLAG = RawFlags (1 << 5 )
41-
42- """
43- Set for various delimited constructs when they contains a trailing comma. For
44- example, to distinguish `(a,b,)` vs `(a,b)`, and `f(a)` vs `f(a,)`. Kinds where
45- this applies are: `tuple call dotcall macrocall vect curly braces <: >:`.
46- """
47- const TRAILING_COMMA_FLAG = RawFlags (1 << 6 )
48-
49- """
50- Set for K"quote" for the short form `:x` as opposed to long form `quote x end`
51- """
52- const COLON_QUOTE = RawFlags (1 << 5 )
53-
54- """
55- Set for K"toplevel" which is delimited by parentheses
56- """
57- const TOPLEVEL_SEMICOLONS_FLAG = RawFlags (1 << 5 )
58-
59- """
60- Set for K"function" in short form definitions such as `f() = 1`
61- """
62- const SHORT_FORM_FUNCTION_FLAG = RawFlags (1 << 5 )
63-
64- """
65- Set for K"struct" when mutable
66- """
67- const MUTABLE_FLAG = RawFlags (1 << 5 )
68-
69- """
70- Set for K"module" when it's not bare (`module`, not `baremodule`)
71- """
72- const BARE_MODULE_FLAG = RawFlags (1 << 5 )
73-
7412"""
7513Set for nodes that are non-terminals
7614"""
7715const NON_TERMINAL_FLAG = RawFlags (1 << 7 )
7816
79- # Flags holding the dimension of an nrow or other UInt8 not held in the source
80- # TODO : Given this is only used for nrow/ncat, we could actually use all the flags?
81- const NUMERIC_FLAGS = RawFlags (RawFlags (0xff )<< 8 )
82-
83- function set_numeric_flags (n:: Integer )
84- f = RawFlags ((n << 8 ) & NUMERIC_FLAGS)
85- if numeric_flags (f) != n
86- error (" Numeric flags unable to hold large integer $n " )
87- end
88- f
89- end
90-
91- function call_type_flags (f:: RawFlags )
92- f & 0b11000
93- end
94-
95- function numeric_flags (f:: RawFlags )
96- Int ((f >> 8 ) % UInt8)
97- end
98-
9917function remove_flags (n:: RawFlags , fs... )
10018 RawFlags (n & ~ (RawFlags ((| )(fs... ))))
10119end
@@ -138,47 +56,6 @@ function Base.summary(head::SyntaxHead)
13856 untokenize (head, unique= false , include_flag_suff= false )
13957end
14058
141- function untokenize (head:: SyntaxHead ; unique= true , include_flag_suff= true )
142- str = (is_error (kind (head)) ? untokenize (kind (head); unique= false ) :
143- untokenize (kind (head); unique= unique)):: String
144- if is_dotted (head)
145- str = " ." * str
146- end
147- if include_flag_suff
148- # Ignore DOTOP_FLAG - it's represented above with . prefix
149- is_trivia (head) && (str = str* " -t" )
150- is_infix_op_call (head) && (str = str* " -i" )
151- is_prefix_op_call (head) && (str = str* " -pre" )
152- is_postfix_op_call (head) && (str = str* " -post" )
153-
154- k = kind (head)
155- if k in KSet " string cmdstring Identifier"
156- has_flags (head, TRIPLE_STRING_FLAG) && (str = str* " -s" )
157- has_flags (head, RAW_STRING_FLAG) && (str = str* " -r" )
158- elseif k in KSet " tuple block macrocall"
159- has_flags (head, PARENS_FLAG) && (str = str* " -p" )
160- elseif k == K " quote"
161- has_flags (head, COLON_QUOTE) && (str = str* " -:" )
162- elseif k == K " toplevel"
163- has_flags (head, TOPLEVEL_SEMICOLONS_FLAG) && (str = str* " -;" )
164- elseif k == K " function"
165- has_flags (head, SHORT_FORM_FUNCTION_FLAG) && (str = str* " -=" )
166- elseif k == K " struct"
167- has_flags (head, MUTABLE_FLAG) && (str = str* " -mut" )
168- elseif k == K " module"
169- has_flags (head, BARE_MODULE_FLAG) && (str = str* " -bare" )
170- end
171- if k in KSet " tuple call dotcall macrocall vect curly braces <: >:" &&
172- has_flags (head, TRAILING_COMMA_FLAG)
173- str *= " -,"
174- end
175- is_suffixed (head) && (str = str* " -suf" )
176- n = numeric_flags (head)
177- n != 0 && (str = str* " -" * string (n))
178- end
179- str
180- end
181-
18259# -------------------------------------------------------------------------------
18360# Generic interface for types `T` which have kind and flags. Either:
18461# 1. Define kind(::T) and flags(::T), or
@@ -200,65 +77,6 @@ invisible to the parser (eg, whitespace) or implied by the structure of the AST
20077"""
20178is_trivia (x) = has_flags (x, TRIVIA_FLAG)
20279
203- """
204- is_prefix_call(x)
205-
206- Return true for normal prefix function call syntax such as the `f` call node
207- parsed from `f(x)`.
208- """
209- is_prefix_call (x) = call_type_flags (x) == PREFIX_CALL_FLAG
210-
211- """
212- is_infix_op_call(x)
213-
214- Return true for infix operator calls such as the `+` call node parsed from
215- `x + y`.
216- """
217- is_infix_op_call (x) = call_type_flags (x) == INFIX_FLAG
218-
219- """
220- is_prefix_op_call(x)
221-
222- Return true for prefix operator calls such as the `+` call node parsed from `+x`.
223- """
224- is_prefix_op_call (x) = call_type_flags (x) == PREFIX_OP_FLAG
225-
226- """
227- is_postfix_op_call(x)
228-
229- Return true for postfix operator calls such as the `'ᵀ` call node parsed from `x'ᵀ`.
230- """
231- is_postfix_op_call (x) = call_type_flags (x) == POSTFIX_OP_FLAG
232-
233- """
234- is_dotted(x)
235-
236- Return true for dotted syntax tokens
237- """
238- is_dotted (x) = has_flags (x, DOTOP_FLAG)
239-
240- """
241- is_suffixed(x)
242-
243- Return true for operators which have suffixes, such as `+₁`
244- """
245- is_suffixed (x) = has_flags (x, SUFFIXED_FLAG)
246-
247- """
248- is_decorated(x)
249-
250- Return true for operators which are decorated with a dot or suffix.
251- """
252- is_decorated (x) = is_dotted (x) || is_suffixed (x)
253-
254- """
255- numeric_flags(x)
256-
257- Return the number attached to a `SyntaxHead`. This is only for kinds `K"nrow"`
258- and `K"ncat"`, for now.
259- """
260- numeric_flags (x) = numeric_flags (flags (x))
261-
26280# -------------------------------------------------------------------------------
26381"""
26482`SyntaxToken` is a token covering a contiguous byte range in the input text.
@@ -962,45 +780,6 @@ function bump_glue(stream::ParseStream, kind, flags)
962780 return position (stream)
963781end
964782
965- """
966- bump_split(stream, token_spec1, [token_spec2 ...])
967-
968- Bump the next token, splitting it into several pieces
969-
970- Tokens are defined by a number of `token_spec` of shape `(nbyte, kind, flags)`.
971- If all `nbyte` are positive, the sum must equal the token length. If one
972- `nbyte` is negative, that token is given `tok_len + nbyte` bytes and the sum of
973- all `nbyte` must equal zero.
974-
975- This is a hack which helps resolves the occasional lexing ambiguity. For
976- example
977- * Whether .+ should be a single token or the composite (. +) which is used for
978- standalone operators.
979- * Whether ... is splatting (most of the time) or three . tokens in import paths
980-
981- TODO: Are these the only cases? Can we replace this general utility with a
982- simpler one which only splits preceding dots?
983- """
984- function bump_split (stream:: ParseStream , split_spec:: Vararg{Any, N} ) where {N}
985- tok = stream. lookahead[stream. lookahead_index]
986- stream. lookahead_index += 1
987- start_b = _next_byte (stream)
988- toklen = tok. next_byte - start_b
989- prev_b = start_b
990- for (i, (nbyte, k, f)) in enumerate (split_spec)
991- h = SyntaxHead (k, f)
992- actual_nbyte = nbyte < 0 ? (toklen + nbyte) : nbyte
993- orig_k = k == K " ." ? K " ." : kind (tok)
994- node = RawGreenNode (h, actual_nbyte, orig_k)
995- push! (stream. output, node)
996- prev_b += actual_nbyte
997- stream. next_byte += actual_nbyte
998- end
999- @assert tok. next_byte == prev_b
1000- stream. peek_count = 0
1001- return position (stream)
1002- end
1003-
1004783"""
1005784Reset kind or flags of an existing node in the output stream
1006785
@@ -1129,98 +908,6 @@ function emit_diagnostic(diagnostics::AbstractVector{Diagnostic},
1129908 push! (diagnostics, Diagnostic (first (byterange), last (byterange); kws... ))
1130909end
1131910
1132- # -------------------------------------------------------------------------------
1133- # ParseStream Post-processing
1134-
1135- function validate_tokens (stream:: ParseStream )
1136- txtbuf = unsafe_textbuf (stream)
1137- charbuf = IOBuffer ()
1138-
1139- # Process terminal nodes in the output
1140- fbyte = stream. output[1 ]. byte_span+ 1 # Start after sentinel
1141- for i = 2 : length (stream. output)
1142- node = stream. output[i]
1143- if ! is_terminal (node) || kind (node) == K " TOMBSTONE"
1144- continue
1145- end
1146-
1147- k = kind (node)
1148- nbyte = fbyte + node. byte_span
1149- tokrange = fbyte: nbyte- 1
1150- error_kind = K " None"
1151-
1152- if k in KSet " Integer BinInt OctInt HexInt"
1153- # The following shouldn't be able to error...
1154- # parse_int_literal
1155- # parse_uint_literal
1156- elseif k == K " Float" || k == K " Float32"
1157- underflow0 = false
1158- if k == K " Float"
1159- x, code = parse_float_literal (Float64, txtbuf, fbyte, nbyte)
1160- # jl_strtod_c can return "underflow" even for valid cases such
1161- # as `5e-324` where the source is an exact representation of
1162- # `x`. So only warn when underflowing to zero.
1163- underflow0 = code === :underflow && x == 0
1164- else
1165- x, code = parse_float_literal (Float32, txtbuf, fbyte, nbyte)
1166- underflow0 = code === :underflow && x == 0
1167- end
1168- if code === :ok
1169- # pass
1170- elseif code === :overflow
1171- emit_diagnostic (stream, tokrange,
1172- error= " overflow in floating point literal" )
1173- error_kind = K " ErrorNumericOverflow"
1174- elseif underflow0
1175- emit_diagnostic (stream, tokrange,
1176- warning= " underflow to zero in floating point literal" )
1177- end
1178- elseif k == K " Char"
1179- @assert fbyte < nbyte # Already handled in the parser
1180- truncate (charbuf, 0 )
1181- had_error = unescape_julia_string (charbuf, txtbuf, fbyte,
1182- nbyte, stream. diagnostics)
1183- if had_error
1184- error_kind = K " ErrorInvalidEscapeSequence"
1185- else
1186- seek (charbuf,0 )
1187- read (charbuf, Char)
1188- if ! eof (charbuf)
1189- error_kind = K " ErrorOverLongCharacter"
1190- emit_diagnostic (stream, tokrange,
1191- error= " character literal contains multiple characters" )
1192- end
1193- end
1194- elseif k == K " String" && ! has_flags (node, RAW_STRING_FLAG)
1195- had_error = unescape_julia_string (devnull , txtbuf, fbyte,
1196- nbyte, stream. diagnostics)
1197- if had_error
1198- error_kind = K " ErrorInvalidEscapeSequence"
1199- end
1200- elseif is_error (k) && k != K " error"
1201- # Emit messages for non-generic token errors
1202- tokstr = String (txtbuf[tokrange])
1203- msg = if k in KSet " ErrorInvisibleChar ErrorUnknownCharacter ErrorIdentifierStart"
1204- " $(_token_error_descriptions[k]) $(repr (tokstr[1 ])) "
1205- elseif k in KSet " ErrorInvalidUTF8 ErrorBidiFormatting"
1206- " $(_token_error_descriptions[k]) $(repr (tokstr)) "
1207- else
1208- _token_error_descriptions[k]
1209- end
1210- emit_diagnostic (stream, tokrange, error= msg)
1211- end
1212-
1213- if error_kind != K " None"
1214- # Update the node with new error kind
1215- stream. output[i] = RawGreenNode (SyntaxHead (error_kind, EMPTY_FLAGS),
1216- node. byte_span, node. orig_kind)
1217- end
1218-
1219- fbyte = nbyte
1220- end
1221- sort! (stream. diagnostics, by= first_byte)
1222- end
1223-
1224911# Tree construction from the list of text ranges held by ParseStream
1225912
1226913# API for extracting results from ParseStream
0 commit comments