Skip to content

Commit a1b4cdb

Browse files
committed
include cstparser
1 parent e496179 commit a1b4cdb

18 files changed

+4794
-26
lines changed

Manifest.toml

Lines changed: 9 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -3,25 +3,17 @@
33
[[Base64]]
44
uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
55

6-
[[CSTParser]]
7-
deps = ["LibGit2", "Test", "Tokenize"]
8-
git-tree-sha1 = "d878de3315f9b6569851d919f7976fe527d00c24"
9-
repo-rev = "location"
10-
repo-url = "https://github.com/MikeInnes/CSTParser.jl"
11-
uuid = "00ebfdb7-1f24-5e51-bd34-a7502290713f"
12-
version = "0.5.2+"
13-
146
[[Compat]]
157
deps = ["Base64", "Dates", "DelimitedFiles", "Distributed", "InteractiveUtils", "LibGit2", "Libdl", "LinearAlgebra", "Markdown", "Mmap", "Pkg", "Printf", "REPL", "Random", "Serialization", "SharedArrays", "Sockets", "SparseArrays", "Statistics", "Test", "UUIDs", "Unicode"]
16-
git-tree-sha1 = "49269e311ffe11ac5b334681d212329002a9832a"
8+
git-tree-sha1 = "84aa74986c5b9b898b0d1acaf3258741ee64754f"
179
uuid = "34da2185-b29b-5c13-b0c7-acf172513d20"
18-
version = "1.5.1"
10+
version = "2.1.0"
1911

2012
[[DataStructures]]
21-
deps = ["InteractiveUtils", "OrderedCollections", "Random", "Serialization", "Test"]
22-
git-tree-sha1 = "ca971f03e146cf144a9e2f2ce59674f5bf0e8038"
13+
deps = ["InteractiveUtils", "OrderedCollections"]
14+
git-tree-sha1 = "0809951a1774dc724da22d26e4289bbaab77809a"
2315
uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
24-
version = "0.15.0"
16+
version = "0.17.0"
2517

2618
[[Dates]]
2719
deps = ["Printf"]
@@ -61,9 +53,9 @@ uuid = "a63ad114-7e13-5084-954f-fe012c677804"
6153

6254
[[OrderedCollections]]
6355
deps = ["Random", "Serialization", "Test"]
64-
git-tree-sha1 = "85619a3f3e17bb4761fe1b1fd47f0e979f964d5b"
56+
git-tree-sha1 = "c4c13474d23c60d20a67b217f1d7f22a40edf8f1"
6557
uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d"
66-
version = "1.0.2"
58+
version = "1.1.0"
6759

6860
[[Pkg]]
6961
deps = ["Dates", "LibGit2", "Markdown", "Printf", "REPL", "Random", "SHA", "UUIDs"]
@@ -107,10 +99,9 @@ deps = ["Distributed", "InteractiveUtils", "Logging", "Random"]
10799
uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
108100

109101
[[Tokenize]]
110-
deps = ["Printf", "Test"]
111-
git-tree-sha1 = "3e83f60b74911d3042d3550884ca2776386a02b8"
102+
git-tree-sha1 = "0de343efc07da00cd449d5b04e959ebaeeb3305d"
112103
uuid = "0796e94c-ce3b-5d07-9a54-7f471281c624"
113-
version = "0.5.3"
104+
version = "0.5.4"
114105

115106
[[UUIDs]]
116107
deps = ["Random", "SHA"]

Project.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,10 @@ name = "MacroTools"
22
uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
33

44
[deps]
5-
CSTParser = "00ebfdb7-1f24-5e51-bd34-a7502290713f"
65
Compat = "34da2185-b29b-5c13-b0c7-acf172513d20"
76
DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
87
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
8+
Tokenize = "0796e94c-ce3b-5d07-9a54-7f471281c624"
99

1010
[compat]
1111
julia = "≥ 1.0"

src/MacroTools.jl

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,12 @@ include("examples/destruct.jl")
1616
include("examples/threading.jl")
1717
include("examples/forward.jl")
1818

19-
using CSTParser
19+
include("cstparser/CSTParser.jl")
2020

21-
if isdefined(CSTParser, :Location)
22-
include("patch/diff.jl")
23-
include("patch/cst.jl")
24-
end
21+
using .CSTParser
22+
23+
include("patch/diff.jl")
24+
include("patch/cst.jl")
2525

2626
const animals = Symbol[]
2727
const animals_file = joinpath(@__DIR__, "..", "animals.txt")

src/cstparser/CSTParser.jl

Lines changed: 264 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,264 @@
1+
module CSTParser
2+
global debug = true
3+
4+
using Tokenize
5+
import Base: length, first, last, getindex, setindex!
6+
import Tokenize.Tokens
7+
import Tokenize.Tokens: RawToken, AbstractToken, iskeyword, isliteral, isoperator, untokenize
8+
import Tokenize.Lexers: Lexer, peekchar, iswhitespace
9+
10+
export ParseState, parse_expression
11+
12+
include("lexer.jl")
13+
include("spec.jl")
14+
include("utils.jl")
15+
include("recovery.jl")
16+
include("components/internals.jl")
17+
include("components/keywords.jl")
18+
include("components/lists.jl")
19+
include("components/operators.jl")
20+
include("components/strings.jl")
21+
include("location.jl")
22+
include("conversion.jl")
23+
include("display.jl")
24+
include("interface.jl")
25+
26+
27+
"""
28+
parse_expression(ps)
29+
30+
Parses an expression until `closer(ps) == true`. Expects to enter the
31+
`ParseState` the token before the the beginning of the expression and ends
32+
on the last token.
33+
34+
Acceptable starting tokens are:
35+
+ A keyword
36+
+ An opening parentheses or brace.
37+
+ An operator.
38+
+ An instance (e.g. identifier, number, etc.)
39+
+ An `@`.
40+
41+
"""
42+
@addctx :expr function parse_expression(ps::ParseState)
43+
if ps.nt.kind == Tokens.COMMA
44+
push!(ps.errors, Error((ps.nt.startbyte:ps.nws.endbyte) .+ 1, "Expression began with a comma."))
45+
ret = ErrorToken(PUNCTUATION(next(ps)))
46+
elseif ps.nt.kind term_c && ps.nt.kind != Tokens.END
47+
push!(ps.errors, Error((ps.nt.startbyte:ps.nws.endbyte) .+ 1, "Expression began with a terminal token: $(ps.nt.kind)."))
48+
ret = ErrorToken(INSTANCE(next(ps)))
49+
else
50+
next(ps)
51+
if iskeyword(ps.t.kind) && ps.t.kind != Tokens.DO
52+
ret = parse_kw(ps)
53+
elseif ps.t.kind == Tokens.LPAREN
54+
ret = parse_paren(ps)
55+
elseif ps.t.kind == Tokens.LSQUARE
56+
ret = @default ps parse_array(ps)
57+
elseif ps.t.kind == Tokens.LBRACE
58+
ret = @default ps @closebrace ps parse_braces(ps)
59+
elseif isinstance(ps.t) || isoperator(ps.t)
60+
if ps.t.kind == Tokens.WHERE
61+
ret = IDENTIFIER(ps)
62+
else
63+
ret = INSTANCE(ps)
64+
end
65+
if is_colon(ret) && ps.nt.kind != Tokens.COMMA
66+
ret = parse_unary(ps, ret)
67+
end
68+
elseif ps.t.kind == Tokens.AT_SIGN
69+
ret = parse_macrocall(ps)
70+
else
71+
ret = ErrorToken(INSTANCE(ps))
72+
push!(ps.errors, Error((ps.nt.startbyte:ps.nws.endbyte) .+ 1, "Expression began with a : $(ps.nt.kind)."))
73+
end
74+
75+
while !closer(ps)
76+
ret = parse_compound(ps, ret)
77+
end
78+
end
79+
return ret
80+
end
81+
82+
function parse_compound(ps::ParseState, @nospecialize ret)
83+
if ps.nt.kind == Tokens.FOR
84+
ret = parse_generator(ps, ret)
85+
elseif ps.nt.kind == Tokens.DO
86+
ret = @default ps @closer ps block parse_do(ps, ret)
87+
elseif isajuxtaposition(ps, ret)
88+
op = OPERATOR(0, 0, Tokens.STAR, false)
89+
ret = parse_operator(ps, ret, op)
90+
elseif (ret isa EXPR{x_Str} || ret isa EXPR{x_Cmd}) && ps.nt.kind == Tokens.IDENTIFIER
91+
arg = IDENTIFIER(next(ps))
92+
push!(ret, LITERAL(arg.fullspan, arg.span, val(ps.t, ps), Tokens.STRING))
93+
elseif (ret isa IDENTIFIER || (ret isa BinarySyntaxOpCall && is_dot(ret.op))) && (ps.nt.kind == Tokens.STRING || ps.nt.kind == Tokens.TRIPLE_STRING || ps.nt.kind == Tokens.CMD)
94+
next(ps)
95+
arg = parse_string_or_cmd(ps, ret)
96+
head = arg.kind == Tokens.CMD ? x_Cmd : x_Str
97+
ret = EXPR{head}(Any[ret, arg])
98+
elseif ps.nt.kind == Tokens.LPAREN
99+
no_ws = !isemptyws(ps.ws)
100+
err_rng = ps.t.endbyte + 2:ps.nt.startbyte
101+
ret = @closeparen ps parse_call(ps, ret)
102+
if no_ws && !(ret isa UnaryOpCall || ret isa UnarySyntaxOpCall)
103+
push!(ps.errors, Error(err_rng, "White space in function call."))
104+
ret = ErrorToken(ret)
105+
end
106+
elseif ps.nt.kind == Tokens.LBRACE
107+
if isemptyws(ps.ws)
108+
ret = @default ps @nocloser ps inwhere @closebrace ps parse_curly(ps, ret)
109+
else
110+
push!(ps.errors, Error(ps.t.endbyte + 2:ps.nt.startbyte , "White space in brace call."))
111+
ret = ErrorToken(@default ps @nocloser ps inwhere @closebrace ps parse_curly(ps, ret))
112+
end
113+
elseif ps.nt.kind == Tokens.LSQUARE && isemptyws(ps.ws) && !(ret isa OPERATOR)
114+
ret = @default ps @nocloser ps block parse_ref(ps, ret)
115+
elseif ps.nt.kind == Tokens.COMMA
116+
ret = parse_tuple(ps, ret)
117+
elseif isunaryop(ret) && ps.nt.kind != Tokens.EQ
118+
ret = parse_unary(ps, ret)
119+
elseif isoperator(ps.nt)
120+
op = OPERATOR(next(ps))
121+
ret = parse_operator(ps, ret, op)
122+
elseif ret isa UnarySyntaxOpCall && is_prime(ret.arg2)
123+
# prime operator followed by an identifier has an implicit multiplication
124+
nextarg = @precedence ps 11 parse_expression(ps)
125+
ret = BinaryOpCall(ret, OPERATOR(0, 0, Tokens.STAR,false), nextarg)
126+
################################################################################
127+
# Everything below here is an error
128+
################################################################################
129+
elseif ps.nt.kind in (Tokens.RPAREN, Tokens.RSQUARE, Tokens.RBRACE)
130+
push!(ps.errors, Error((ps.t.startbyte:ps.nt.endbyte) .+ 1 , "Disallowed compound expression."))
131+
ret = EXPR{ErrorToken}([ret, ErrorToken(PUNCTUATION(next(ps)))])
132+
else
133+
push!(ps.errors, Error((ps.t.startbyte:ps.nt.endbyte) .+ 1 , "Disallowed compound expression."))
134+
nextarg = parse_expression(ps)
135+
ret = EXPR{ErrorToken}([ret, nextarg])
136+
end
137+
return ret
138+
end
139+
140+
"""
141+
parse_paren(ps, ret)
142+
143+
Parses an expression starting with a `(`.
144+
"""
145+
@addctx :paren function parse_paren(ps::ParseState)
146+
args = Any[PUNCTUATION(ps)]
147+
@closeparen ps @default ps @nocloser ps inwhere parse_comma_sep(ps, args, false, true, true)
148+
149+
if length(args) == 2 && ((ps.ws.kind != SemiColonWS || (length(args) == 2 && args[2] isa EXPR{Block})) && !(args[2] isa EXPR{Parameters}))
150+
accept_rparen(ps, args)
151+
ret = EXPR{InvisBrackets}(args)
152+
else
153+
accept_rparen(ps, args)
154+
ret = EXPR{TupleH}(args)
155+
end
156+
return ret
157+
end
158+
159+
"""
160+
parse(str, cont = false)
161+
162+
Parses the passed string. If `cont` is true then will continue parsing until the end of the string returning the resulting expressions in a TOPLEVEL block.
163+
"""
164+
function parse(str::String, cont = false)
165+
ps = ParseState(str)
166+
x, ps = parse(ps, cont)
167+
return x
168+
end
169+
170+
function parse_doc(ps::ParseState)
171+
if (ps.nt.kind == Tokens.STRING || ps.nt.kind == Tokens.TRIPLE_STRING) && !isemptyws(ps.nws)
172+
doc = LITERAL(next(ps))
173+
if (ps.nt.kind == Tokens.ENDMARKER || ps.nt.kind == Tokens.END)
174+
return doc
175+
elseif isbinaryop(ps.nt) && !closer(ps)
176+
ret = parse_compound(ps, doc)
177+
return ret
178+
end
179+
180+
ret = parse_expression(ps)
181+
ret = EXPR{MacroCall}(Any[GlobalRefDOC, doc, ret])
182+
elseif ps.nt.kind == Tokens.IDENTIFIER && val(ps.nt, ps) == "doc" && (ps.nnt.kind == Tokens.STRING || ps.nnt.kind == Tokens.TRIPLE_STRING)
183+
doc = IDENTIFIER(next(ps))
184+
next(ps)
185+
arg = parse_string_or_cmd(ps, doc)
186+
doc = EXPR{x_Str}(Any[doc, arg])
187+
ret = parse_expression(ps)
188+
ret = EXPR{MacroCall}(Any[GlobalRefDOC, doc, ret])
189+
else
190+
ret = parse_expression(ps)
191+
end
192+
return ret
193+
end
194+
195+
function parse(ps::ParseState, cont = false)
196+
if ps.l.io.size == 0
197+
return (cont ? EXPR{FileH}(Any[]) : nothing), ps
198+
end
199+
last_line = 0
200+
curr_line = 0
201+
202+
if cont
203+
top = EXPR{FileH}(Any[])
204+
if ps.nt.kind == Tokens.WHITESPACE || ps.nt.kind == Tokens.COMMENT
205+
next(ps)
206+
push!(top, LITERAL(ps.nt.startbyte, ps.nt.startbyte, "", Tokens.NOTHING))
207+
end
208+
209+
while !ps.done && !ps.errored
210+
curr_line = ps.nt.startpos[1]
211+
ret = parse_doc(ps)
212+
213+
# join semicolon sep items
214+
if curr_line == last_line && last(top.args) isa EXPR{TopLevel}
215+
push!(last(top.args), ret)
216+
elseif ps.ws.kind == SemiColonWS
217+
push!(top, EXPR{TopLevel}(Any[ret]))
218+
else
219+
push!(top, ret)
220+
end
221+
last_line = curr_line
222+
end
223+
else
224+
if ps.nt.kind == Tokens.WHITESPACE || ps.nt.kind == Tokens.COMMENT
225+
next(ps)
226+
top = LITERAL(ps.nt.startbyte, ps.nt.startbyte, "", Tokens.NOTHING)
227+
else
228+
top = parse_doc(ps)
229+
last_line = ps.nt.startpos[1]
230+
if ps.ws.kind == SemiColonWS
231+
top = EXPR{TopLevel}(Any[top])
232+
while ps.ws.kind == SemiColonWS && ps.nt.startpos[1] == last_line && ps.nt.kind != Tokens.ENDMARKER
233+
ret = parse_doc(ps)
234+
push!(top, ret)
235+
last_line = ps.nt.startpos[1]
236+
end
237+
end
238+
end
239+
end
240+
241+
return top, ps
242+
end
243+
244+
245+
function parse_file(path::String)
246+
x = parse(read(path, String), true)
247+
File([], [], path, x, [])
248+
end
249+
250+
function parse_directory(path::String, proj = Project(path, []))
251+
for f in readdir(path)
252+
if isfile(joinpath(path, f)) && endswith(f, ".jl")
253+
try
254+
push!(proj.files, parse_file(joinpath(path, f)))
255+
catch
256+
println("$f failed to parse")
257+
end
258+
elseif isdir(joinpath(path, f))
259+
parse_directory(joinpath(path, f), proj)
260+
end
261+
end
262+
proj
263+
end
264+
end

0 commit comments

Comments
 (0)