|
1 | 1 | module CSTParser
|
2 |
| -global debug = true |
3 | 2 |
|
4 | 3 | using Tokenize
|
5 |
| -import Base: length, first, last, getindex, setindex! |
6 | 4 | import Tokenize.Tokens
|
7 | 5 | import Tokenize.Tokens: RawToken, AbstractToken, iskeyword, isliteral, isoperator, untokenize
|
8 |
| -import Tokenize.Lexers: Lexer, peekchar, iswhitespace |
| 6 | +import Tokenize.Lexers: Lexer, peekchar, iswhitespace, readchar, emit, emit_error, accept_batch, eof |
9 | 7 |
|
10 |
| -export ParseState, parse_expression |
| 8 | +include("packagedef.jl") |
11 | 9 |
|
12 |
| -include("lexer.jl") |
13 |
| -include("spec.jl") |
14 |
| -include("utils.jl") |
15 |
| -include("recovery.jl") |
16 |
| -include("components/internals.jl") |
17 |
| -include("components/keywords.jl") |
18 |
| -include("components/lists.jl") |
19 |
| -include("components/operators.jl") |
20 |
| -include("components/strings.jl") |
21 |
| -include("conversion.jl") |
22 |
| -include("display.jl") |
23 |
| -include("interface.jl") |
24 |
| -include("iterate.jl") |
25 |
| - |
26 |
| -""" |
27 |
| - parse_expression(ps) |
28 |
| -
|
29 |
| -Parses an expression until `closer(ps) == true`. Expects to enter the |
30 |
| -`ParseState` the token before the the beginning of the expression and ends |
31 |
| -on the last token. |
32 |
| -
|
33 |
| -Acceptable starting tokens are: |
34 |
| -+ A keyword |
35 |
| -+ An opening parentheses or brace. |
36 |
| -+ An operator. |
37 |
| -+ An instance (e.g. identifier, number, etc.) |
38 |
| -+ An `@`. |
39 |
| -
|
40 |
| -""" |
41 |
| -function parse_expression(ps::ParseState, esc_on_error = false) |
42 |
| - if kindof(ps.nt) === Tokens.ENDMARKER |
43 |
| - ret = mErrorToken(ps, UnexpectedToken) |
44 |
| - elseif (esc_on_error && ps.nt.kind == Tokens.ERROR) |
45 |
| - ret = EXPR(:errortoken, 0, 0) |
46 |
| - elseif kindof(ps.nt) ∈ term_c && !(kindof(ps.nt) === Tokens.END && ps.closer.square) |
47 |
| - if ps.closer.square && kindof(ps.nt) === Tokens.RSQUARE |
48 |
| - ret = mErrorToken(ps, UnexpectedToken) |
49 |
| - else |
50 |
| - ret = mErrorToken(ps, EXPR(next(ps)), UnexpectedToken) |
51 |
| - end |
52 |
| - else |
53 |
| - next(ps) |
54 |
| - if iskeyword(kindof(ps.t)) && kindof(ps.t) != Tokens.DO |
55 |
| - ret = parse_kw(ps) |
56 |
| - elseif kindof(ps.t) === Tokens.LPAREN |
57 |
| - ret = parse_paren(ps) |
58 |
| - elseif kindof(ps.t) === Tokens.LSQUARE |
59 |
| - ret = @closer ps :for_generator @default ps parse_array(ps) |
60 |
| - elseif kindof(ps.t) === Tokens.LBRACE |
61 |
| - ret = @default ps @closebrace ps parse_braces(ps) |
62 |
| - elseif isinstance(ps.t) || isoperator(ps.t) |
63 |
| - if both_symbol_and_op(ps.t) |
64 |
| - ret = EXPR(:IDENTIFIER, ps) |
65 |
| - else |
66 |
| - @static if VERSION < v"1.6" |
67 |
| - # https://github.com/JuliaLang/julia/pull/37583 |
68 |
| - ret = INSTANCE(ps) |
69 |
| - else |
70 |
| - if ps.t.dotop && closer(ps) && !isassignmentop(ps.t) |
71 |
| - # Split dotted operator into dot-call |
72 |
| - v = val(ps.t, ps)[2:end] |
73 |
| - dot = EXPR(:OPERATOR, 1, 1, ".") |
74 |
| - op = EXPR(:OPERATOR, ps.nt.startbyte - ps.t.startbyte - 1, ps.t.endbyte - ps.t.startbyte, v) |
75 |
| - ret = EXPR(dot, EXPR[op], nothing) |
76 |
| - else |
77 |
| - ret = INSTANCE(ps) |
78 |
| - end |
79 |
| - end |
80 |
| - end |
81 |
| - if is_colon(ret) && !(iscomma(ps.nt) || kindof(ps.ws) == SemiColonWS) |
82 |
| - ret = parse_unary(ps, ret) |
83 |
| - elseif isoperator(ret) && assign_prec(valof(ret)) && !isunaryop(ret) |
84 |
| - ret = mErrorToken(ps, ret, UnexpectedAssignmentOp) |
85 |
| - end |
86 |
| - elseif kindof(ps.t) === Tokens.AT_SIGN |
87 |
| - ret = parse_macrocall(ps) |
88 |
| - else |
89 |
| - ret = mErrorToken(ps, INSTANCE(ps), UnexpectedToken) |
90 |
| - end |
91 |
| - ret = parse_compound_recur(ps, ret) |
92 |
| - end |
93 |
| - return ret |
94 |
| -end |
95 |
| - |
96 |
| -function parse_compound_recur(ps, ret) |
97 |
| - !closer(ps) ? parse_compound_recur(ps, parse_compound(ps, ret)) : ret |
98 |
| -end |
99 |
| - |
100 |
| -""" |
101 |
| - parse_compound(ps::ParseState, ret::EXPR) |
102 |
| -
|
103 |
| -Attempts to parse a compound expression given the preceding expression `ret`. |
104 |
| -""" |
105 |
| -function parse_compound(ps::ParseState, ret::EXPR) |
106 |
| - if kindof(ps.nt) === Tokens.FOR |
107 |
| - ret = parse_generator(ps, ret) |
108 |
| - elseif kindof(ps.nt) === Tokens.DO |
109 |
| - ret = @default ps @closer ps :block parse_do(ps, ret) |
110 |
| - elseif isajuxtaposition(ps, ret) |
111 |
| - if disallowednumberjuxt(ret) |
112 |
| - ret = mErrorToken(ps, ret, CannotJuxtapose) |
113 |
| - end |
114 |
| - ret = parse_operator(ps, ret, EXPR(:OPERATOR, 0, 0, "*")) |
115 |
| - elseif issuffixableliteral(ps, ret) |
116 |
| - if isnumberliteral(ps.nt) |
117 |
| - arg = mLITERAL(next(ps)) |
118 |
| - push!(ret, arg) |
119 |
| - else |
120 |
| - arg = EXPR(:IDENTIFIER, next(ps)) |
121 |
| - push!(ret, EXPR(:STRING, arg.fullspan, arg.span, val(ps.t, ps))) |
122 |
| - end |
123 |
| - elseif (isidentifier(ret) || is_getfield(ret)) && isemptyws(ps.ws) && isprefixableliteral(ps.nt) |
124 |
| - ret = parse_prefixed_string_cmd(ps, ret) |
125 |
| - elseif kindof(ps.nt) === Tokens.LPAREN |
126 |
| - no_ws = !isemptyws(ps.ws) |
127 |
| - ret = @closer ps :for_generator @closeparen ps parse_call(ps, ret) |
128 |
| - if no_ws && !isunarycall(ret) |
129 |
| - ret = mErrorToken(ps, ret, UnexpectedWhiteSpace) |
130 |
| - end |
131 |
| - elseif kindof(ps.nt) === Tokens.LBRACE |
132 |
| - if isemptyws(ps.ws) |
133 |
| - ret = @default ps @nocloser ps :inwhere @closebrace ps parse_curly(ps, ret) |
134 |
| - else |
135 |
| - ret = mErrorToken(ps, (@default ps @nocloser ps :inwhere @closebrace ps parse_curly(ps, ret)), UnexpectedWhiteSpace) |
136 |
| - end |
137 |
| - elseif kindof(ps.nt) === Tokens.LSQUARE && isemptyws(ps.ws) && !isoperator(ret) |
138 |
| - ret = @closer ps :for_generator @default ps @nocloser ps :block parse_ref(ps, ret) |
139 |
| - elseif iscomma(ps.nt) |
140 |
| - ret = parse_tuple(ps, ret) |
141 |
| - elseif isunaryop(ret) && kindof(ps.nt) != Tokens.EQ |
142 |
| - ret = parse_unary(ps, ret) |
143 |
| - elseif isoperator(ps.nt) |
144 |
| - op = EXPR(:OPERATOR, next(ps)) |
145 |
| - ret = parse_operator(ps, ret, op) |
146 |
| - elseif is_prime(ret.head) |
147 |
| - # prime operator followed by an identifier has an implicit multiplication |
148 |
| - nextarg = @precedence ps TimesOp parse_expression(ps) |
149 |
| - ret = EXPR(:call, EXPR[EXPR(:OPERATOR, 0, 0, "*"), ret, nextarg], nothing) |
150 |
| -# ############################################################################### |
151 |
| -# Everything below here is an error |
152 |
| -# ############################################################################### |
153 |
| - else |
154 |
| - ps.errored = true |
155 |
| - if kindof(ps.nt) in (Tokens.RPAREN, Tokens.RSQUARE, Tokens.RBRACE) |
156 |
| - nextarg = mErrorToken(ps, EXPR(next(ps)), Unknown) |
157 |
| - else |
158 |
| - nextarg = try |
159 |
| - parse_expression(ps) |
160 |
| - catch err |
161 |
| - if err isa StackOverflowError |
162 |
| - throw(error(string(ps, "\nsize: ", ps.l.io.size))) |
163 |
| - end |
164 |
| - mErrorToken(ps, ret, Unknown) |
165 |
| - end |
166 |
| - end |
167 |
| - ret = EXPR(:errortoken, EXPR[ret, nextarg], nothing) |
168 |
| - end |
169 |
| - return ret |
170 |
| -end |
171 |
| - |
172 |
| -""" |
173 |
| - parse_paren(ps, ret) |
174 |
| -
|
175 |
| -Parses an expression starting with a `(`. |
176 |
| -""" |
177 |
| -function parse_paren(ps::ParseState) |
178 |
| - args = EXPR[] |
179 |
| - trivia = EXPR[EXPR(ps)] |
180 |
| - @closeparen ps @default ps @nocloser ps :inwhere parse_comma_sep(ps, args, trivia, false, true, true, insert_params_at = 1) |
181 |
| - if length(args) == 1 && length(trivia) == 1 && ((kindof(ps.ws) !== SemiColonWS || headof(args[1]) === :block) && headof(args[1]) !== :parameters) |
182 |
| - accept_rparen(ps, trivia) |
183 |
| - ret = EXPR(:brackets, args, trivia) |
184 |
| - elseif VERSION < v"1.5" && length(args) == 1 && args[1].head === :parameters && isempty(args[1].args) |
185 |
| - accept_rparen(ps, trivia) |
186 |
| - pop!(args) |
187 |
| - push!(args, EXPR(:block, EXPR[], nothing)) |
188 |
| - ret = EXPR(:brackets, args, trivia) |
189 |
| - else |
190 |
| - accept_rparen(ps, trivia) |
191 |
| - ret = EXPR(:tuple, args, trivia) |
192 |
| - end |
193 |
| - return ret |
194 |
| -end |
195 |
| - |
196 |
| -""" |
197 |
| - parse(str, cont = false) |
198 |
| -
|
199 |
| -Parses the passed string. If `cont` is true then will continue parsing until the end of the string returning the resulting expressions in a TOPLEVEL block. |
200 |
| -""" |
201 |
| -function parse(str::String, cont=false) |
202 |
| - ps = ParseState(str) |
203 |
| - x, _ = parse(ps, cont) |
204 |
| - return x |
205 |
| -end |
206 |
| - |
207 |
| -""" |
208 |
| - parse_doc(ps::ParseState) |
209 |
| -
|
210 |
| -Used for top-level parsing - attaches documentation (such as this) to expressions. |
211 |
| -""" |
212 |
| -function parse_doc(ps::ParseState) |
213 |
| - if (kindof(ps.nt) === Tokens.STRING || kindof(ps.nt) === Tokens.TRIPLE_STRING) && !isemptyws(ps.nws) |
214 |
| - doc = mLITERAL(next(ps)) |
215 |
| - if kindof(ps.nt) === Tokens.ENDMARKER || kindof(ps.nt) === Tokens.END || ps.t.endpos[1] + 1 < ps.nt.startpos[1] |
216 |
| - ret = doc |
217 |
| - elseif isbinaryop(ps.nt) && !closer(ps) |
218 |
| - ret = parse_compound_recur(ps, doc) |
219 |
| - else |
220 |
| - ret = parse_expression(ps) |
221 |
| - ret = EXPR(:macrocall, EXPR[EXPR(:globalrefdoc, 0, 0), EXPR(:NOTHING, 0, 0), doc, ret], nothing) |
222 |
| - end |
223 |
| - else |
224 |
| - ret = parse_expression(ps) |
225 |
| - end |
226 |
| - if _continue_doc_parse(ps, ret) |
227 |
| - push!(ret, parse_expression(ps)) |
228 |
| - end |
229 |
| - return ret |
230 |
| -end |
231 |
| - |
232 |
| -function parse(ps::ParseState, cont=false) |
233 |
| - if ps.l.io.size == 0 |
234 |
| - return (cont ? EXPR(:file, EXPR[]) : nothing), ps |
235 |
| - end |
236 |
| - last_line = 0 |
237 |
| - curr_line = 0 |
238 |
| - |
239 |
| - if cont |
240 |
| - top = EXPR(:file, EXPR[], nothing) |
241 |
| - if kindof(ps.nt) === Tokens.WHITESPACE || kindof(ps.nt) === Tokens.COMMENT |
242 |
| - next(ps) |
243 |
| - push!(top, EXPR(:NOTHING, ps.nt.startbyte, ps.nt.startbyte, "")) |
244 |
| - elseif kindof(ps.nt) === Tokens.SEMICOLON |
245 |
| - next(ps) |
246 |
| - push!(top, EXPR(:toplevel, EXPR[EXPR(:NOTHING, ps.nt.startbyte, ps.nt.startbyte, "")])) |
247 |
| - end |
248 |
| - |
249 |
| - prevpos = position(ps) |
250 |
| - while kindof(ps.nt) !== Tokens.ENDMARKER |
251 |
| - curr_line = ps.nt.startpos[1] |
252 |
| - ret = parse_doc(ps) |
253 |
| - # join semicolon sep items |
254 |
| - if curr_line == last_line && headof(last(top.args)) === :toplevel |
255 |
| - push!(last(top.args), ret) |
256 |
| - top.fullspan += ret.fullspan |
257 |
| - top.span = top.fullspan - (ret.fullspan - ret.span) |
258 |
| - elseif kindof(ps.ws) == SemiColonWS |
259 |
| - push!(top, EXPR(:toplevel, EXPR[ret])) |
260 |
| - else |
261 |
| - push!(top, ret) |
262 |
| - end |
263 |
| - last_line = curr_line |
264 |
| - kindof(ps.nt) === Tokens.ENDMARKER && break # don't do loop check if eof |
265 |
| - prevpos = loop_check(ps, prevpos) |
266 |
| - end |
267 |
| - else |
268 |
| - if kindof(ps.nt) === Tokens.WHITESPACE || kindof(ps.nt) === Tokens.COMMENT |
269 |
| - next(ps) |
270 |
| - top = EXPR(:NOTHING, ps.nt.startbyte, ps.nt.startbyte, "") |
271 |
| - elseif !(ps.done || kindof(ps.nt) === Tokens.ENDMARKER) |
272 |
| - last_line = current_line(ps) |
273 |
| - if ps.nt.kind === Tokens.SEMICOLON |
274 |
| - next(ps) |
275 |
| - top = EXPR(:toplevel, EXPR[EXPR(:NOTHING, ps.nt.startbyte, ps.nt.startbyte, "")]) |
276 |
| - else |
277 |
| - top = parse_doc(ps) |
278 |
| - end |
279 |
| - if kindof(ps.ws) == SemiColonWS# && curr_line == last_line |
280 |
| - top = EXPR(:toplevel, EXPR[top], nothing) |
281 |
| - prevpos = position(ps) |
282 |
| - while kindof(ps.ws) == SemiColonWS && current_line(ps) == last_line && kindof(ps.nt) != Tokens.ENDMARKER |
283 |
| - last_line = current_line(ps) |
284 |
| - ret = parse_doc(ps) |
285 |
| - push!(top, ret) |
286 |
| - prevpos = loop_check(ps, prevpos) |
287 |
| - end |
288 |
| - end |
289 |
| - else |
290 |
| - top = EXPR(:errortoken, EXPR[], nothing, 0, 0) |
291 |
| - end |
292 |
| - end |
293 |
| - |
294 |
| - return top, ps |
295 |
| -end |
296 |
| - |
297 |
| -function _continue_doc_parse(ps::ParseState, x::EXPR) |
298 |
| - kindof(ps.nt) !== Tokens.ENDMARKER && |
299 |
| - headof(x) === :macrocall && |
300 |
| - valof(x.args[1]) == "@doc" && |
301 |
| - length(x.args) < 4 && |
302 |
| - ps.t.endpos[1] + 1 == ps.nt.startpos[1] |
303 |
| -end |
304 |
| - |
305 |
| -include("precompile.jl") |
306 |
| -_precompile() |
307 | 10 | end
|
0 commit comments