|
| 1 | +# A prototype for converting JuliaSyntax data structures into CSTParser.EXPR. |
| 2 | + |
| 3 | +using CSTParser |
| 4 | + |
| 5 | +using JuliaSyntax |
| 6 | +using JuliaSyntax: GreenNode, SyntaxHead, SourceFile, TaggedRange, |
| 7 | + @K_str, @KSet_cmd, |
| 8 | + haschildren, is_syntax_kind, is_keyword, is_operator, is_identifier, head, kind, span, |
| 9 | + is_infix, is_trivia, untokenize, TzTokens, children |
| 10 | + |
| 11 | +# See CSTParser.tokenkindtoheadmap |
| 12 | +function tokenkindtoheadmap(k::TzTokens.Kind) |
| 13 | + k === TzTokens.COMMA ? :COMMA : |
| 14 | + k === TzTokens.LPAREN ? :LPAREN : |
| 15 | + k === TzTokens.RPAREN ? :RPAREN : |
| 16 | + k === TzTokens.LSQUARE ? :LSQUARE : |
| 17 | + k === TzTokens.RSQUARE ? :RSQUARE : |
| 18 | + k === TzTokens.LBRACE ? :LBRACE : |
| 19 | + k === TzTokens.RBRACE ? :RBRACE : |
| 20 | + k === TzTokens.AT_SIGN ? :ATSIGN : |
| 21 | + k === TzTokens.DOT ? :DOT : |
| 22 | + k === TzTokens.ABSTRACT ? :ABSTRACT : |
| 23 | + k === TzTokens.BAREMODULE ? :BAREMODULE : |
| 24 | + k === TzTokens.BEGIN ? :BEGIN : |
| 25 | + k === TzTokens.BREAK ? :BREAK : |
| 26 | + k === TzTokens.CATCH ? :CATCH : |
| 27 | + k === TzTokens.CONST ? :CONST : |
| 28 | + k === TzTokens.CONTINUE ? :CONTINUE : |
| 29 | + k === TzTokens.DO ? :DO : |
| 30 | + k === TzTokens.ELSE ? :ELSE : |
| 31 | + k === TzTokens.ELSEIF ? :ELSEIF : |
| 32 | + k === TzTokens.END ? :END : |
| 33 | + k === TzTokens.EXPORT ? :EXPORT : |
| 34 | + k === TzTokens.FINALLY ? :FINALLY : |
| 35 | + k === TzTokens.FOR ? :FOR : |
| 36 | + k === TzTokens.FUNCTION ? :FUNCTION : |
| 37 | + k === TzTokens.GLOBAL ? :GLOBAL : |
| 38 | + k === TzTokens.IF ? :IF : |
| 39 | + k === TzTokens.IMPORT ? :IMPORT : |
| 40 | + k === TzTokens.LET ? :LET : |
| 41 | + k === TzTokens.LOCAL ? :LOCAL : |
| 42 | + k === TzTokens.MACRO ? :MACRO : |
| 43 | + k === TzTokens.MODULE ? :MODULE : |
| 44 | + k === TzTokens.MUTABLE ? :MUTABLE : |
| 45 | + k === TzTokens.OUTER ? :OUTER : |
| 46 | + k === TzTokens.PRIMITIVE ? :PRIMITIVE : |
| 47 | + k === TzTokens.QUOTE ? :QUOTE : |
| 48 | + k === TzTokens.RETURN ? :RETURN : |
| 49 | + k === TzTokens.STRUCT ? :STRUCT : |
| 50 | + k === TzTokens.TRY ? :TRY : |
| 51 | + k === TzTokens.TYPE ? :TYPE : |
| 52 | + k === TzTokens.USING ? :USING : |
| 53 | + k === TzTokens.WHILE ? :WHILE : |
| 54 | + k === TzTokens.INTEGER ? :INTEGER : |
| 55 | + k === TzTokens.BIN_INT ? :BININT : |
| 56 | + k === TzTokens.HEX_INT ? :HEXINT : |
| 57 | + k === TzTokens.OCT_INT ? :OCTINT : |
| 58 | + k === TzTokens.FLOAT ? :FLOAT : |
| 59 | + k === TzTokens.STRING ? :STRING : |
| 60 | + # k === TzTokens.TRIPLE_STRING ? :TRIPLESTRING : |
| 61 | + k === TzTokens.CHAR ? :CHAR : |
| 62 | + k === TzTokens.CMD ? :CMD : |
| 63 | + # k === TzTokens.TRIPLE_CMD ? :TRIPLECMD : |
| 64 | + k === TzTokens.TRUE ? :TRUE : |
| 65 | + k === TzTokens.FALSE ? :FALSE : |
| 66 | + k === TzTokens.ENDMARKER ? :errortoken : |
| 67 | + error("Unknown token $k") |
| 68 | +end |
| 69 | + |
| 70 | +# Things which are "trailing trivia" according to CSTParser |
| 71 | +# |
| 72 | +# "Trailing trivia" is trivia which will be attached to the end of a node. |
| 73 | +is_cst_trailing_trivia(x) = kind(x) in KSet`Whitespace NewlineWs Comment ;` |
| 74 | + |
| 75 | +# Convert GreenNode into CSTParser.EXPR |
| 76 | +function cst(source::SourceFile, raw_node::GreenNode{SyntaxHead}, position::Integer=1) |
| 77 | + node_start = position |
| 78 | + cs = children(raw_node) |
| 79 | + i = 1 |
| 80 | + args = CSTParser.EXPR[] |
| 81 | + trivia = CSTParser.EXPR[] |
| 82 | + last_trivia_span = 0 |
| 83 | + while i <= length(cs) |
| 84 | + raw = cs[i] |
| 85 | + if haschildren(raw) |
| 86 | + c = cst(source, raw, position) |
| 87 | + push!(args, c) |
| 88 | + last_trivia_span = c.fullspan - c.span |
| 89 | + position += span(raw) |
| 90 | + else |
| 91 | + start_pos = position |
| 92 | + token_start = i |
| 93 | + inner_span = span(raw) |
| 94 | + position += span(raw) |
| 95 | + # Here we append any trailing trivia tokens to the node. |
| 96 | + while i < length(cs) && is_cst_trailing_trivia(cs[i+1]) |
| 97 | + position += span(cs[i+1]) |
| 98 | + i += 1 |
| 99 | + end |
| 100 | + full_span = position - start_pos |
| 101 | + last_trivia_span = full_span - inner_span |
| 102 | + |
| 103 | + # Leaf node |
| 104 | + k = kind(raw) |
| 105 | + val_range = start_pos:(start_pos + inner_span - 1) |
| 106 | + val = source[val_range] |
| 107 | + |
| 108 | + if kind(raw) == K"nothing" |
| 109 | + # First `nothing` token in file seems to require this. Why I don't know. |
| 110 | + inner_span = full_span |
| 111 | + end |
| 112 | + |
| 113 | + # See CSTParser.literalmap. Which we can't use directly because we've |
| 114 | + # customized Tokenize.jl :-( |
| 115 | + cst_head = k === TzTokens.NOTHING ? :NOTHING : |
| 116 | + # FIXME: Following probably need special handling |
| 117 | + k === TzTokens.MACRO_NAME ? :IDENTIFIER : |
| 118 | + k === TzTokens.CMD_MACRO_NAME ? :IDENTIFIER : |
| 119 | + k === TzTokens.STRING_MACRO_NAME ? :IDENTIFIER : |
| 120 | + k === TzTokens.DQUOTE ? :DQUOTE : |
| 121 | + k === TzTokens.BACKTICK ? :BACKTICK : |
| 122 | + is_operator(k) ? :OPERATOR : |
| 123 | + is_identifier(k) ? :IDENTIFIER : |
| 124 | + tokenkindtoheadmap(k) |
| 125 | + # FIXME: STRING, TRIPLE_STRING, CMD, TRIPLE_CMD, need special handling: |
| 126 | + # * STRING doesn't incude delimiters |
| 127 | + # * CMD doesn't include delimiters |
| 128 | + # * TRIPLE_STRING is a composite of STRING and TRIPLE_DQUOTE |
| 129 | + # * TRIPLE_CMD is a composite of CMD and TRIPLE_DQUOTE |
| 130 | + # They don't exist anymore as individual tokens |
| 131 | + |
| 132 | + push!(is_trivia(raw) ? trivia : args, |
| 133 | + CSTParser.EXPR(cst_head, nothing, nothing, full_span, inner_span, val, |
| 134 | + nothing, nothing)) |
| 135 | + end |
| 136 | + i += 1 |
| 137 | + end |
| 138 | + |
| 139 | + if is_infix(raw_node) |
| 140 | + args[1], args[2] = args[2], args[1] |
| 141 | + # TODO: Other argument swizzling, as done in SyntaxNode -> Expr conversions |
| 142 | + end |
| 143 | + |
| 144 | + full_span = position - node_start |
| 145 | + inner_span = full_span - last_trivia_span |
| 146 | + k = kind(raw_node) |
| 147 | + cst_head = k == K"toplevel" ? :file : |
| 148 | + is_operator(k) ? popfirst!(trivia) : |
| 149 | + Symbol(lowercase(string(kind(raw_node)))) |
| 150 | + x = CSTParser.EXPR(cst_head, args, |
| 151 | + isempty(trivia) ? nothing : trivia, |
| 152 | + full_span, inner_span, nothing, nothing, nothing) |
| 153 | + for a in args |
| 154 | + a.parent = x |
| 155 | + end |
| 156 | + for a in trivia |
| 157 | + a.parent = x |
| 158 | + end |
| 159 | + return x |
| 160 | +end |
| 161 | + |
| 162 | + |
| 163 | +# Some steps of conversion to CSTParser.EXPR is most conveniently done on the |
| 164 | +# raw ParseStream representation. In particular, CSTParser.EXPR attaches |
| 165 | +# some types of trivia to the end of nontrivia or trivia tokens. |
| 166 | +# |
| 167 | +# This function reassociates trivia with nonterminal nodes to make converting |
| 168 | +# to CSTParser.EXPR a *local* operation on green tree nodes. |
| 169 | +function parse_for_cst(text) |
| 170 | + stream = JuliaSyntax.ParseStream(text) |
| 171 | + |
| 172 | + # Insert initial nothing node if necessary to anchor trailing whitespace. |
| 173 | + if is_cst_trailing_trivia(peek(stream, skip_whitespace=false)) |
| 174 | + JuliaSyntax.bump_invisible(stream, K"nothing") |
| 175 | + end |
| 176 | + JuliaSyntax.parse(stream, rule=:toplevel) |
| 177 | + |
| 178 | + # Fix up start of stream |
| 179 | + ranges = stream.ranges |
| 180 | + @assert kind(ranges[end]) == K"toplevel" |
| 181 | + ranges[end] = let r = ranges[end] |
| 182 | + TaggedRange(r.head, 1, r.last_token) |
| 183 | + end |
| 184 | + |
| 185 | + # Rearrange whitespace trivia tokens so that they're always *trailing* |
| 186 | + # siblings of non-whitespace trivia tokens. |
| 187 | + # |
| 188 | + # This is required for later conversion to CSTParser.EXPR |
| 189 | + tokens = stream.tokens |
| 190 | + for (i,range) in enumerate(ranges) |
| 191 | + first_token = range.first_token |
| 192 | + while first_token < length(tokens) && |
| 193 | + is_cst_trailing_trivia(tokens[first_token]) |
| 194 | + first_token += 1 |
| 195 | + end |
| 196 | + last_token = range.last_token |
| 197 | + while last_token < length(tokens) && |
| 198 | + is_cst_trailing_trivia(tokens[last_token+1]) |
| 199 | + last_token += 1 |
| 200 | + end |
| 201 | + ranges[i] = TaggedRange(head(range), first_token, last_token) |
| 202 | + end |
| 203 | + |
| 204 | + return JuliaSyntax.build_tree(JuliaSyntax.GreenNode, stream) |
| 205 | +end |
| 206 | + |
| 207 | +# CSTParser.EXPR equality; should be in CSTParser... |
| 208 | +function Base.:(==)(x::CSTParser.EXPR, y::CSTParser.EXPR) |
| 209 | + # Debugging hacks: |
| 210 | + if x.head != y.head |
| 211 | + @info "Trivia mismatch" x.head y.head |
| 212 | + end |
| 213 | + if x.trivia != y.trivia |
| 214 | + @info "Trivia mismatch" x.trivia y.trivia |
| 215 | + end |
| 216 | + if x.fullspan != y.fullspan |
| 217 | + @info "Fullspan mismatch" x y x.fullspan y.fullspan |
| 218 | + end |
| 219 | + if x.span != y.span |
| 220 | + @info "Span mismatch" x y x.span y.span |
| 221 | + end |
| 222 | + if x.val != y.val |
| 223 | + @info "Trivia mismatch" x.val y.val |
| 224 | + end |
| 225 | + |
| 226 | + return x.head == y.head && |
| 227 | + x.args == y.args && |
| 228 | + x.trivia == y.trivia && |
| 229 | + x.fullspan == y.fullspan && |
| 230 | + x.span == y.span && |
| 231 | + x.val == y.val && |
| 232 | + x.meta == y.meta |
| 233 | +end |
| 234 | + |
| 235 | +# Some things which work |
| 236 | +#text = " 1 + 2 * 3 " |
| 237 | +#text = "[ 1 ; 2 ;]" |
| 238 | +#text = "for i=1:10\nx\ny\nend" |
| 239 | +#text = "100.00" |
| 240 | +text = """ |
| 241 | +function f(x,y) |
| 242 | + s = 0 |
| 243 | + for i = 1:10 |
| 244 | + s += x - i^y |
| 245 | + end |
| 246 | +end |
| 247 | +""" |
| 248 | + |
| 249 | +# Some things which don't yet work |
| 250 | +# |
| 251 | +# Macro names |
| 252 | +# text = "@A.asdf x y" |
| 253 | +# |
| 254 | +# Bracket nodes don't exist yet in JuliaSyntax |
| 255 | +# text = "(a + b)" |
| 256 | +# |
| 257 | +# Strings have separate delimiters. Will need to put them back together. |
| 258 | +# text = "\"str\"" |
| 259 | + |
| 260 | +source = SourceFile(text) |
| 261 | + |
| 262 | +ex = parse_for_cst(text) |
| 263 | +# show(stdout, MIME"text/plain"(), ex, text) |
| 264 | + |
| 265 | +y = CSTParser.parse(text, true) |
| 266 | +x = cst(source, ex) |
| 267 | +x == y |
| 268 | + |
0 commit comments