Skip to content

Commit 9c21de0

Browse files
committed
Prototype for conversion to CSTParser.EXPR
This demonstrates the basic approach working, but various things don't work here yet due to at least two things: * Mismatch between the way the two packages tokenize the input, eg, for the delimiters in strings. * Missing "trivia nodes" in JuliaSyntax (eg, a brackets node). These should probably be added.
1 parent 98bd80c commit 9c21de0

File tree

1 file changed

+268
-0
lines changed

1 file changed

+268
-0
lines changed

prototypes/cst_conversion.jl

Lines changed: 268 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,268 @@
1+
# A prototype for converting JuliaSyntax data structures into CSTParser.EXPR.
2+
3+
using CSTParser
4+
5+
using JuliaSyntax
6+
using JuliaSyntax: GreenNode, SyntaxHead, SourceFile, TaggedRange,
7+
@K_str, @KSet_cmd,
8+
haschildren, is_syntax_kind, is_keyword, is_operator, is_identifier, head, kind, span,
9+
is_infix, is_trivia, untokenize, TzTokens, children
10+
11+
# See CSTParser.tokenkindtoheadmap
12+
function tokenkindtoheadmap(k::TzTokens.Kind)
13+
k === TzTokens.COMMA ? :COMMA :
14+
k === TzTokens.LPAREN ? :LPAREN :
15+
k === TzTokens.RPAREN ? :RPAREN :
16+
k === TzTokens.LSQUARE ? :LSQUARE :
17+
k === TzTokens.RSQUARE ? :RSQUARE :
18+
k === TzTokens.LBRACE ? :LBRACE :
19+
k === TzTokens.RBRACE ? :RBRACE :
20+
k === TzTokens.AT_SIGN ? :ATSIGN :
21+
k === TzTokens.DOT ? :DOT :
22+
k === TzTokens.ABSTRACT ? :ABSTRACT :
23+
k === TzTokens.BAREMODULE ? :BAREMODULE :
24+
k === TzTokens.BEGIN ? :BEGIN :
25+
k === TzTokens.BREAK ? :BREAK :
26+
k === TzTokens.CATCH ? :CATCH :
27+
k === TzTokens.CONST ? :CONST :
28+
k === TzTokens.CONTINUE ? :CONTINUE :
29+
k === TzTokens.DO ? :DO :
30+
k === TzTokens.ELSE ? :ELSE :
31+
k === TzTokens.ELSEIF ? :ELSEIF :
32+
k === TzTokens.END ? :END :
33+
k === TzTokens.EXPORT ? :EXPORT :
34+
k === TzTokens.FINALLY ? :FINALLY :
35+
k === TzTokens.FOR ? :FOR :
36+
k === TzTokens.FUNCTION ? :FUNCTION :
37+
k === TzTokens.GLOBAL ? :GLOBAL :
38+
k === TzTokens.IF ? :IF :
39+
k === TzTokens.IMPORT ? :IMPORT :
40+
k === TzTokens.LET ? :LET :
41+
k === TzTokens.LOCAL ? :LOCAL :
42+
k === TzTokens.MACRO ? :MACRO :
43+
k === TzTokens.MODULE ? :MODULE :
44+
k === TzTokens.MUTABLE ? :MUTABLE :
45+
k === TzTokens.OUTER ? :OUTER :
46+
k === TzTokens.PRIMITIVE ? :PRIMITIVE :
47+
k === TzTokens.QUOTE ? :QUOTE :
48+
k === TzTokens.RETURN ? :RETURN :
49+
k === TzTokens.STRUCT ? :STRUCT :
50+
k === TzTokens.TRY ? :TRY :
51+
k === TzTokens.TYPE ? :TYPE :
52+
k === TzTokens.USING ? :USING :
53+
k === TzTokens.WHILE ? :WHILE :
54+
k === TzTokens.INTEGER ? :INTEGER :
55+
k === TzTokens.BIN_INT ? :BININT :
56+
k === TzTokens.HEX_INT ? :HEXINT :
57+
k === TzTokens.OCT_INT ? :OCTINT :
58+
k === TzTokens.FLOAT ? :FLOAT :
59+
k === TzTokens.STRING ? :STRING :
60+
# k === TzTokens.TRIPLE_STRING ? :TRIPLESTRING :
61+
k === TzTokens.CHAR ? :CHAR :
62+
k === TzTokens.CMD ? :CMD :
63+
# k === TzTokens.TRIPLE_CMD ? :TRIPLECMD :
64+
k === TzTokens.TRUE ? :TRUE :
65+
k === TzTokens.FALSE ? :FALSE :
66+
k === TzTokens.ENDMARKER ? :errortoken :
67+
error("Unknown token $k")
68+
end
69+
70+
# Things which are "trailing trivia" according to CSTParser
71+
#
72+
# "Trailing trivia" is trivia which will be attached to the end of a node.
73+
is_cst_trailing_trivia(x) = kind(x) in KSet`Whitespace NewlineWs Comment ;`
74+
75+
# Convert GreenNode into CSTParser.EXPR
76+
function cst(source::SourceFile, raw_node::GreenNode{SyntaxHead}, position::Integer=1)
77+
node_start = position
78+
cs = children(raw_node)
79+
i = 1
80+
args = CSTParser.EXPR[]
81+
trivia = CSTParser.EXPR[]
82+
last_trivia_span = 0
83+
while i <= length(cs)
84+
raw = cs[i]
85+
if haschildren(raw)
86+
c = cst(source, raw, position)
87+
push!(args, c)
88+
last_trivia_span = c.fullspan - c.span
89+
position += span(raw)
90+
else
91+
start_pos = position
92+
token_start = i
93+
inner_span = span(raw)
94+
position += span(raw)
95+
# Here we append any trailing trivia tokens to the node.
96+
while i < length(cs) && is_cst_trailing_trivia(cs[i+1])
97+
position += span(cs[i+1])
98+
i += 1
99+
end
100+
full_span = position - start_pos
101+
last_trivia_span = full_span - inner_span
102+
103+
# Leaf node
104+
k = kind(raw)
105+
val_range = start_pos:(start_pos + inner_span - 1)
106+
val = source[val_range]
107+
108+
if kind(raw) == K"nothing"
109+
# First `nothing` token in file seems to require this. Why I don't know.
110+
inner_span = full_span
111+
end
112+
113+
# See CSTParser.literalmap. Which we can't use directly because we've
114+
# customized Tokenize.jl :-(
115+
cst_head = k === TzTokens.NOTHING ? :NOTHING :
116+
# FIXME: Following probably need special handling
117+
k === TzTokens.MACRO_NAME ? :IDENTIFIER :
118+
k === TzTokens.CMD_MACRO_NAME ? :IDENTIFIER :
119+
k === TzTokens.STRING_MACRO_NAME ? :IDENTIFIER :
120+
k === TzTokens.DQUOTE ? :DQUOTE :
121+
k === TzTokens.BACKTICK ? :BACKTICK :
122+
is_operator(k) ? :OPERATOR :
123+
is_identifier(k) ? :IDENTIFIER :
124+
tokenkindtoheadmap(k)
125+
# FIXME: STRING, TRIPLE_STRING, CMD, TRIPLE_CMD, need special handling:
126+
# * STRING doesn't incude delimiters (DQUOTE tokens)
127+
# * CMD doesn't include delimiters (BACKTICK tokens)
128+
# * TRIPLE_STRING is a composite of STRING and TRIPLE_DQUOTE
129+
# * TRIPLE_CMD is a composite of CMD and TRIPLE_BACKTICK
130+
# They don't exist anymore as individual tokens
131+
132+
push!(is_trivia(raw) ? trivia : args,
133+
CSTParser.EXPR(cst_head, nothing, nothing, full_span, inner_span, val,
134+
nothing, nothing))
135+
end
136+
i += 1
137+
end
138+
139+
if is_infix(raw_node)
140+
args[1], args[2] = args[2], args[1]
141+
# TODO: Other argument swizzling, as done in SyntaxNode -> Expr conversions
142+
end
143+
144+
full_span = position - node_start
145+
inner_span = full_span - last_trivia_span
146+
k = kind(raw_node)
147+
cst_head = k == K"toplevel" ? :file :
148+
is_operator(k) ? popfirst!(trivia) :
149+
Symbol(lowercase(string(kind(raw_node))))
150+
x = CSTParser.EXPR(cst_head, args,
151+
isempty(trivia) ? nothing : trivia,
152+
full_span, inner_span, nothing, nothing, nothing)
153+
for a in args
154+
a.parent = x
155+
end
156+
for a in trivia
157+
a.parent = x
158+
end
159+
return x
160+
end
161+
162+
163+
# Some steps of conversion to CSTParser.EXPR is most conveniently done on the
164+
# raw ParseStream representation. In particular, CSTParser.EXPR attaches
165+
# some types of trivia to the end of nontrivia or trivia tokens.
166+
#
167+
# This function reassociates trivia with nonterminal nodes to make converting
168+
# to CSTParser.EXPR a *local* operation on green tree nodes.
169+
function parse_for_cst(text)
170+
stream = JuliaSyntax.ParseStream(text)
171+
172+
# Insert initial nothing node if necessary to anchor trailing whitespace.
173+
if is_cst_trailing_trivia(peek(stream, skip_whitespace=false))
174+
JuliaSyntax.bump_invisible(stream, K"nothing")
175+
end
176+
JuliaSyntax.parse(stream, rule=:toplevel)
177+
178+
# Fix up start of stream
179+
ranges = stream.ranges
180+
@assert kind(ranges[end]) == K"toplevel"
181+
ranges[end] = let r = ranges[end]
182+
TaggedRange(r.head, 1, r.last_token)
183+
end
184+
185+
# Rearrange whitespace trivia tokens so that they're always *trailing*
186+
# siblings of non-whitespace trivia tokens.
187+
#
188+
# This is required for later conversion to CSTParser.EXPR
189+
tokens = stream.tokens
190+
for (i,range) in enumerate(ranges)
191+
first_token = range.first_token
192+
while first_token < length(tokens) &&
193+
is_cst_trailing_trivia(tokens[first_token])
194+
first_token += 1
195+
end
196+
last_token = range.last_token
197+
while last_token < length(tokens) &&
198+
is_cst_trailing_trivia(tokens[last_token+1])
199+
last_token += 1
200+
end
201+
ranges[i] = TaggedRange(head(range), first_token, last_token)
202+
end
203+
204+
return JuliaSyntax.build_tree(JuliaSyntax.GreenNode, stream)
205+
end
206+
207+
# CSTParser.EXPR equality; should be in CSTParser...
208+
function Base.:(==)(x::CSTParser.EXPR, y::CSTParser.EXPR)
209+
# Debugging hacks:
210+
if x.head != y.head
211+
@info "Trivia mismatch" x.head y.head
212+
end
213+
if x.trivia != y.trivia
214+
@info "Trivia mismatch" x.trivia y.trivia
215+
end
216+
if x.fullspan != y.fullspan
217+
@info "Fullspan mismatch" x y x.fullspan y.fullspan
218+
end
219+
if x.span != y.span
220+
@info "Span mismatch" x y x.span y.span
221+
end
222+
if x.val != y.val
223+
@info "Trivia mismatch" x.val y.val
224+
end
225+
226+
return x.head == y.head &&
227+
x.args == y.args &&
228+
x.trivia == y.trivia &&
229+
x.fullspan == y.fullspan &&
230+
x.span == y.span &&
231+
x.val == y.val &&
232+
x.meta == y.meta
233+
end
234+
235+
# Some things which work
236+
#text = " 1 + 2 * 3 "
237+
#text = "[ 1 ; 2 ;]"
238+
#text = "for i=1:10\nx\ny\nend"
239+
#text = "100.00"
240+
text = """
241+
function f(x,y)
242+
s = 0
243+
for i = 1:10
244+
s += x - i^y
245+
end
246+
end
247+
"""
248+
249+
# Some things which don't yet work
250+
#
251+
# Macro names
252+
# text = "@A.asdf x y"
253+
#
254+
# Bracket nodes don't exist yet in JuliaSyntax
255+
# text = "(a + b)"
256+
#
257+
# Strings have separate delimiters. Will need to put them back together.
258+
# text = "\"str\""
259+
260+
source = SourceFile(text)
261+
262+
ex = parse_for_cst(text)
263+
# show(stdout, MIME"text/plain"(), ex, text)
264+
265+
y = CSTParser.parse(text, true)
266+
x = cst(source, ex)
267+
x == y
268+

0 commit comments

Comments
 (0)