Skip to content

Commit 330e63e

Browse files
committed
Prototype for conversion to CSTParser.EXPR
This demonstrates the basic approach working, but various things don't work here yet due to two things: * Mismatch between the way the two packages tokenize the input, eg, for the delimiters in strings. * Missing "trivia nodes" in JuliaSyntax (eg, a brackets node). These should probably be added.
1 parent 98bd80c commit 330e63e

File tree

1 file changed

+268
-0
lines changed

1 file changed

+268
-0
lines changed

prototypes/cst_conversion.jl

Lines changed: 268 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,268 @@
1+
# A prototype for converting JuliaSyntax data structures into CSTParser.EXPR.
2+
3+
using CSTParser
4+
5+
using JuliaSyntax
6+
using JuliaSyntax: GreenNode, SyntaxHead, SourceFile, TaggedRange,
7+
@K_str, @KSet_cmd,
8+
haschildren, is_syntax_kind, is_keyword, is_operator, is_identifier, head, kind, span,
9+
is_infix, is_trivia, untokenize, TzTokens, children
10+
11+
# See CSTParser.tokenkindtoheadmap
12+
function tokenkindtoheadmap(k::TzTokens.Kind)
13+
k === TzTokens.COMMA ? :COMMA :
14+
k === TzTokens.LPAREN ? :LPAREN :
15+
k === TzTokens.RPAREN ? :RPAREN :
16+
k === TzTokens.LSQUARE ? :LSQUARE :
17+
k === TzTokens.RSQUARE ? :RSQUARE :
18+
k === TzTokens.LBRACE ? :LBRACE :
19+
k === TzTokens.RBRACE ? :RBRACE :
20+
k === TzTokens.AT_SIGN ? :ATSIGN :
21+
k === TzTokens.DOT ? :DOT :
22+
k === TzTokens.ABSTRACT ? :ABSTRACT :
23+
k === TzTokens.BAREMODULE ? :BAREMODULE :
24+
k === TzTokens.BEGIN ? :BEGIN :
25+
k === TzTokens.BREAK ? :BREAK :
26+
k === TzTokens.CATCH ? :CATCH :
27+
k === TzTokens.CONST ? :CONST :
28+
k === TzTokens.CONTINUE ? :CONTINUE :
29+
k === TzTokens.DO ? :DO :
30+
k === TzTokens.ELSE ? :ELSE :
31+
k === TzTokens.ELSEIF ? :ELSEIF :
32+
k === TzTokens.END ? :END :
33+
k === TzTokens.EXPORT ? :EXPORT :
34+
k === TzTokens.FINALLY ? :FINALLY :
35+
k === TzTokens.FOR ? :FOR :
36+
k === TzTokens.FUNCTION ? :FUNCTION :
37+
k === TzTokens.GLOBAL ? :GLOBAL :
38+
k === TzTokens.IF ? :IF :
39+
k === TzTokens.IMPORT ? :IMPORT :
40+
k === TzTokens.LET ? :LET :
41+
k === TzTokens.LOCAL ? :LOCAL :
42+
k === TzTokens.MACRO ? :MACRO :
43+
k === TzTokens.MODULE ? :MODULE :
44+
k === TzTokens.MUTABLE ? :MUTABLE :
45+
k === TzTokens.OUTER ? :OUTER :
46+
k === TzTokens.PRIMITIVE ? :PRIMITIVE :
47+
k === TzTokens.QUOTE ? :QUOTE :
48+
k === TzTokens.RETURN ? :RETURN :
49+
k === TzTokens.STRUCT ? :STRUCT :
50+
k === TzTokens.TRY ? :TRY :
51+
k === TzTokens.TYPE ? :TYPE :
52+
k === TzTokens.USING ? :USING :
53+
k === TzTokens.WHILE ? :WHILE :
54+
k === TzTokens.INTEGER ? :INTEGER :
55+
k === TzTokens.BIN_INT ? :BININT :
56+
k === TzTokens.HEX_INT ? :HEXINT :
57+
k === TzTokens.OCT_INT ? :OCTINT :
58+
k === TzTokens.FLOAT ? :FLOAT :
59+
k === TzTokens.STRING ? :STRING :
60+
# k === TzTokens.TRIPLE_STRING ? :TRIPLESTRING :
61+
k === TzTokens.CHAR ? :CHAR :
62+
k === TzTokens.CMD ? :CMD :
63+
# k === TzTokens.TRIPLE_CMD ? :TRIPLECMD :
64+
k === TzTokens.TRUE ? :TRUE :
65+
k === TzTokens.FALSE ? :FALSE :
66+
k === TzTokens.ENDMARKER ? :errortoken :
67+
error("Unknown token $k")
68+
end
69+
70+
# Things which are "trailing trivia" according to CSTParser
71+
#
72+
# "Trailing trivia" is trivia which will be attached to the end of a node.
73+
is_cst_trailing_trivia(x) = kind(x) in KSet`Whitespace NewlineWs Comment ;`
74+
75+
# Convert GreenNode into CSTParser.EXPR
76+
function cst(source::SourceFile, raw_node::GreenNode{SyntaxHead}, position::Integer=1)
77+
node_start = position
78+
cs = children(raw_node)
79+
i = 1
80+
args = CSTParser.EXPR[]
81+
trivia = CSTParser.EXPR[]
82+
last_trivia_span = 0
83+
while i <= length(cs)
84+
raw = cs[i]
85+
if haschildren(raw)
86+
c = cst(source, raw, position)
87+
push!(args, c)
88+
last_trivia_span = c.fullspan - c.span
89+
position += span(raw)
90+
else
91+
start_pos = position
92+
token_start = i
93+
inner_span = span(raw)
94+
position += span(raw)
95+
# Here we append any trailing trivia tokens to the node.
96+
while i < length(cs) && is_cst_trailing_trivia(cs[i+1])
97+
position += span(cs[i+1])
98+
i += 1
99+
end
100+
full_span = position - start_pos
101+
last_trivia_span = full_span - inner_span
102+
103+
# Leaf node
104+
k = kind(raw)
105+
val_range = start_pos:(start_pos + inner_span - 1)
106+
val = source[val_range]
107+
108+
if kind(raw) == K"nothing"
109+
# First `nothing` token in file seems to require this. Why I don't know.
110+
inner_span = full_span
111+
end
112+
113+
# See CSTParser.literalmap. Which we can't use directly because we've
114+
# customized Tokenize.jl :-(
115+
cst_head = k === TzTokens.NOTHING ? :NOTHING :
116+
# FIXME: Following probably need special handling
117+
k === TzTokens.MACRO_NAME ? :IDENTIFIER :
118+
k === TzTokens.CMD_MACRO_NAME ? :IDENTIFIER :
119+
k === TzTokens.STRING_MACRO_NAME ? :IDENTIFIER :
120+
k === TzTokens.DQUOTE ? :DQUOTE :
121+
k === TzTokens.BACKTICK ? :BACKTICK :
122+
is_operator(k) ? :OPERATOR :
123+
is_identifier(k) ? :IDENTIFIER :
124+
tokenkindtoheadmap(k)
125+
# FIXME: STRING, TRIPLE_STRING, CMD, TRIPLE_CMD, need special handling:
126+
# * STRING doesn't incude delimiters
127+
# * CMD doesn't include delimiters
128+
# * TRIPLE_STRING is a composite of STRING and TRIPLE_DQUOTE
129+
# * TRIPLE_CMD is a composite of CMD and TRIPLE_DQUOTE
130+
# They don't exist anymore as individual tokens
131+
132+
push!(is_trivia(raw) ? trivia : args,
133+
CSTParser.EXPR(cst_head, nothing, nothing, full_span, inner_span, val,
134+
nothing, nothing))
135+
end
136+
i += 1
137+
end
138+
139+
if is_infix(raw_node)
140+
args[1], args[2] = args[2], args[1]
141+
# TODO: Other argument swizzling, as done in SyntaxNode -> Expr conversions
142+
end
143+
144+
full_span = position - node_start
145+
inner_span = full_span - last_trivia_span
146+
k = kind(raw_node)
147+
cst_head = k == K"toplevel" ? :file :
148+
is_operator(k) ? popfirst!(trivia) :
149+
Symbol(lowercase(string(kind(raw_node))))
150+
x = CSTParser.EXPR(cst_head, args,
151+
isempty(trivia) ? nothing : trivia,
152+
full_span, inner_span, nothing, nothing, nothing)
153+
for a in args
154+
a.parent = x
155+
end
156+
for a in trivia
157+
a.parent = x
158+
end
159+
return x
160+
end
161+
162+
163+
# Some steps of conversion to CSTParser.EXPR is most conveniently done on the
164+
# raw ParseStream representation. In particular, CSTParser.EXPR attaches
165+
# some types of trivia to the end of nontrivia or trivia tokens.
166+
#
167+
# This function reassociates trivia with nonterminal nodes to make converting
168+
# to CSTParser.EXPR a *local* operation on green tree nodes.
169+
function parse_for_cst(text)
170+
stream = JuliaSyntax.ParseStream(text)
171+
172+
# Insert initial nothing node if necessary to anchor trailing whitespace.
173+
if is_cst_trailing_trivia(peek(stream, skip_whitespace=false))
174+
JuliaSyntax.bump_invisible(stream, K"nothing")
175+
end
176+
JuliaSyntax.parse(stream, rule=:toplevel)
177+
178+
# Fix up start of stream
179+
ranges = stream.ranges
180+
@assert kind(ranges[end]) == K"toplevel"
181+
ranges[end] = let r = ranges[end]
182+
TaggedRange(r.head, 1, r.last_token)
183+
end
184+
185+
# Rearrange whitespace trivia tokens so that they're always *trailing*
186+
# siblings of non-whitespace trivia tokens.
187+
#
188+
# This is required for later conversion to CSTParser.EXPR
189+
tokens = stream.tokens
190+
for (i,range) in enumerate(ranges)
191+
first_token = range.first_token
192+
while first_token < length(tokens) &&
193+
is_cst_trailing_trivia(tokens[first_token])
194+
first_token += 1
195+
end
196+
last_token = range.last_token
197+
while last_token < length(tokens) &&
198+
is_cst_trailing_trivia(tokens[last_token+1])
199+
last_token += 1
200+
end
201+
ranges[i] = TaggedRange(head(range), first_token, last_token)
202+
end
203+
204+
return JuliaSyntax.build_tree(JuliaSyntax.GreenNode, stream)
205+
end
206+
207+
# CSTParser.EXPR equality; should be in CSTParser...
208+
function Base.:(==)(x::CSTParser.EXPR, y::CSTParser.EXPR)
209+
# Debugging hacks:
210+
if x.head != y.head
211+
@info "Trivia mismatch" x.head y.head
212+
end
213+
if x.trivia != y.trivia
214+
@info "Trivia mismatch" x.trivia y.trivia
215+
end
216+
if x.fullspan != y.fullspan
217+
@info "Fullspan mismatch" x y x.fullspan y.fullspan
218+
end
219+
if x.span != y.span
220+
@info "Span mismatch" x y x.span y.span
221+
end
222+
if x.val != y.val
223+
@info "Trivia mismatch" x.val y.val
224+
end
225+
226+
return x.head == y.head &&
227+
x.args == y.args &&
228+
x.trivia == y.trivia &&
229+
x.fullspan == y.fullspan &&
230+
x.span == y.span &&
231+
x.val == y.val &&
232+
x.meta == y.meta
233+
end
234+
235+
# Some things which work
236+
#text = " 1 + 2 * 3 "
237+
#text = "[ 1 ; 2 ;]"
238+
#text = "for i=1:10\nx\ny\nend"
239+
#text = "100.00"
240+
text = """
241+
function f(x,y)
242+
s = 0
243+
for i = 1:10
244+
s += x - i^y
245+
end
246+
end
247+
"""
248+
249+
# Some things which don't yet work
250+
#
251+
# Macro names
252+
# text = "@A.asdf x y"
253+
#
254+
# Bracket nodes don't exist yet in JuliaSyntax
255+
# text = "(a + b)"
256+
#
257+
# Strings have separate delimiters. Will need to put them back together.
258+
# text = "\"str\""
259+
260+
source = SourceFile(text)
261+
262+
ex = parse_for_cst(text)
263+
# show(stdout, MIME"text/plain"(), ex, text)
264+
265+
y = CSTParser.parse(text, true)
266+
x = cst(source, ex)
267+
x == y
268+

0 commit comments

Comments
 (0)