python-hcl2/hcl2/transformer.py at d95885fdb63522e4d7c3d4f171f48a7c1724dea5 · amplify-education/python-hcl2 · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
"""A Lark Transformer for transforming a Lark parse tree into a Python dict"""
import json
import re
import sys
from collections import namedtuple
from typing import List, Dict, Any

from lark import Token
from lark.tree import Meta
from lark.visitors import Transformer, Discard, _DiscardType, v_args

from .reconstructor import reverse_quotes_within_interpolation


HEREDOC_PATTERN = re.compile(r"<<([a-zA-Z][a-zA-Z0-9._-]+)\n([\s\S]*)\1", re.S)
HEREDOC_TRIM_PATTERN = re.compile(r"<<-([a-zA-Z][a-zA-Z0-9._-]+)\n([\s\S]*)\1", re.S)


START_LINE = "__start_line__"
END_LINE = "__end_line__"


Attribute = namedtuple("Attribute", ("key", "value"))


# pylint: disable=missing-function-docstring,unused-argument
class DictTransformer(Transformer):
    """Takes a syntax tree generated by the parser and
    transforms it to a dict.
    """

    with_meta: bool

    @staticmethod
    def is_type_keyword(value: str) -> bool:
        return value in {"bool", "number", "string"}

    def __init__(self, with_meta: bool = False):
        """
        :param with_meta: If set to true then adds `__start_line__` and `__end_line__`
        parameters to the output dict. Default to false.
        """
        self.with_meta = with_meta
        super().__init__()

    def float_lit(self, args: List) -> float:
        return float("".join([self.to_tf_inline(arg) for arg in args]))

    def int_lit(self, args: List) -> int:
        return int("".join([self.to_tf_inline(arg) for arg in args]))

    def expr_term(self, args: List) -> Any:
        args = self.strip_new_line_tokens(args)

        #
        if args[0] == "true":
            return True
        if args[0] == "false":
            return False
        if args[0] == "null":
            return None

        if args[0] == "(" and args[-1] == ")":
            return "".join(str(arg) for arg in args)

        return args[0]

    def index_expr_term(self, args: List) -> str:
        args = self.strip_new_line_tokens(args)
        return f"{args[0]}{args[1]}"

    def index(self, args: List) -> str:
        args = self.strip_new_line_tokens(args)
        return f"[{args[0]}]"

    def get_attr_expr_term(self, args: List) -> str:
        return f"{args[0]}{args[1]}"

    def get_attr(self, args: List) -> str:
        return f".{args[0]}"

    def attr_splat_expr_term(self, args: List) -> str:
        return f"{args[0]}{args[1]}"

    def attr_splat(self, args: List) -> str:
        args_str = "".join(self.to_tf_inline(arg) for arg in args)
        return f".*{args_str}"

    def full_splat_expr_term(self, args: List) -> str:
        return f"{args[0]}{args[1]}"

    def full_splat(self, args: List) -> str:
        args_str = "".join(self.to_tf_inline(arg) for arg in args)
        return f"[*]{args_str}"

    def tuple(self, args: List) -> List:
        return [self.to_string_dollar(arg) for arg in self.strip_new_line_tokens(args)]

    def object_elem(self, args: List) -> Dict:
        # This returns a dict with a single key/value pair to make it easier to merge these
        # into a bigger dict that is returned by the "object" function
        key = self.strip_quotes(str(args[0].children[0]))
        if len(args) == 3:
            value = args[2]
        else:
            value = args[1]

        value = self.to_string_dollar(value)
        return {key: value}

    def object(self, args: List) -> Dict:
        args = self.strip_new_line_tokens(args)
        result: Dict[str, Any] = {}
        for arg in args:
            if (
                isinstance(arg, Token) and arg.type == "COMMA"
            ):  # skip optional comma at the end of object element
                continue

            result.update(arg)
        return result

    def function_call(self, args: List) -> str:
        args = self.strip_new_line_tokens(args)
        args_str = ""
        if len(args) > 1:
            args_str = ", ".join(
                [self.to_tf_inline(arg) for arg in args[1] if arg is not Discard]
            )
        return f"{args[0]}({args_str})"

    def provider_function_call(self, args: List) -> str:
        args = self.strip_new_line_tokens(args)
        args_str = ""
        if len(args) > 5:
            args_str = ", ".join(
                [self.to_tf_inline(arg) for arg in args[5] if arg is not Discard]
            )
        provider_func = "::".join([args[0], args[2], args[4]])
        return f"{provider_func}({args_str})"

    def arguments(self, args: List) -> List:
        return args

    @v_args(meta=True)
    def block(self, meta: Meta, args: List) -> Dict:
        *block_labels, block_body = args
        result: Dict[str, Any] = block_body
        if self.with_meta:
            result.update(
                {
                    START_LINE: meta.line,
                    END_LINE: meta.end_line,
                }
            )

        # create nested dict. i.e. {label1: {label2: {labelN: result}}}
        for label in reversed(block_labels):
            label_str = self.strip_quotes(label)
            result = {label_str: result}

        return result

    def attribute(self, args: List) -> Attribute:
        key = str(args[0])
        if key.startswith('"') and key.endswith('"'):
            key = key[1:-1]
        value = self.to_string_dollar(args[2])
        return Attribute(key, value)

    def conditional(self, args: List) -> str:
        args = self.strip_new_line_tokens(args)
        return f"{args[0]} ? {args[1]} : {args[2]}"

    def binary_op(self, args: List) -> str:
        return " ".join([self.to_tf_inline(arg) for arg in args])

    def unary_op(self, args: List) -> str:
        return "".join([self.to_tf_inline(arg) for arg in args])

    def binary_term(self, args: List) -> str:
        args = self.strip_new_line_tokens(args)
        return " ".join([self.to_tf_inline(arg) for arg in args])

    def body(self, args: List) -> Dict[str, List]:
        # See https://github.com/hashicorp/hcl/blob/main/hclsyntax/spec.md#bodies
        # ---
        # A body is a collection of associated attributes and blocks.
        #
        # An attribute definition assigns a value to a particular attribute
        # name within a body. Each distinct attribute name may be defined no
        # more than once within a single body.
        #
        # A block creates a child body that is annotated with a block type and
        # zero or more block labels. Blocks create a structural hierarchy which
        # can be interpreted by the calling application.
        # ---
        #
        # There can be more than one child body with the same block type and
        # labels. This means that all blocks (even when there is only one)
        # should be transformed into lists of blocks.
        args = self.strip_new_line_tokens(args)
        attributes = set()
        result: Dict[str, Any] = {}
        for arg in args:
            if isinstance(arg, Attribute):
                if arg.key in result:
                    raise RuntimeError(f"{arg.key} already defined")
                result[arg.key] = arg.value
                attributes.add(arg.key)
            else:
                # This is a block.
                for key, value in arg.items():
                    key = str(key)
                    if key in result:
                        if key in attributes:
                            raise RuntimeError(f"{key} already defined")
                        result[key].append(value)
                    else:
                        result[key] = [value]

        return result

    def start(self, args: List) -> Dict:
        args = self.strip_new_line_tokens(args)
        return args[0]

    def binary_operator(self, args: List) -> str:
        return str(args[0])

    def heredoc_template(self, args: List) -> str:
        match = HEREDOC_PATTERN.match(str(args[0]))
        if not match:
            raise RuntimeError(f"Invalid Heredoc token: {args[0]}")

        trim_chars = "\n\t "
        return f'"{match.group(2).rstrip(trim_chars)}"'

    def heredoc_template_trim(self, args: List) -> str:
        # See https://github.com/hashicorp/hcl2/blob/master/hcl/hclsyntax/spec.md#template-expressions
        # This is a special version of heredocs that are declared with "<<-"
        # This will calculate the minimum number of leading spaces in each line of a heredoc
        # and then remove that number of spaces from each line
        match = HEREDOC_TRIM_PATTERN.match(str(args[0]))
        if not match:
            raise RuntimeError(f"Invalid Heredoc token: {args[0]}")

        trim_chars = "\n\t "
        text = match.group(2).rstrip(trim_chars)
        lines = text.split("\n")

        # calculate the min number of leading spaces in each line
        min_spaces = sys.maxsize
        for line in lines:
            leading_spaces = len(line) - len(line.lstrip(" "))
            min_spaces = min(min_spaces, leading_spaces)

        # trim off that number of leading spaces from each line
        lines = [line[min_spaces:] for line in lines]

        return '"%s"' % "\n".join(lines)

    def new_line_or_comment(self, args: List) -> _DiscardType:
        return Discard

    def for_tuple_expr(self, args: List) -> str:
        args = self.strip_new_line_tokens(args)
        for_expr = " ".join([self.to_tf_inline(arg) for arg in args[1:-1]])
        return f"[{for_expr}]"

    def for_intro(self, args: List) -> str:
        args = self.strip_new_line_tokens(args)
        return " ".join([self.to_tf_inline(arg) for arg in args])

    def for_cond(self, args: List) -> str:
        args = self.strip_new_line_tokens(args)
        return " ".join([self.to_tf_inline(arg) for arg in args])

    def for_object_expr(self, args: List) -> str:
        args = self.strip_new_line_tokens(args)
        for_expr = " ".join([self.to_tf_inline(arg) for arg in args[1:-1]])
        # doubled curly braces stands for inlining the braces
        # and the third pair of braces is for the interpolation
        # e.g. f"{2 + 2} {{2 + 2}}" == "4 {2 + 2}"
        return f"{{{for_expr}}}"

    def string_with_interpolation(self, args: List) -> str:
        return '"' + ("".join(args)) + '"'

    def interpolation_maybe_nested(self, args: List) -> str:
        # return "".join(args)
        return "${" + ("".join(args)) + "}"

    def strip_new_line_tokens(self, args: List) -> List:
        """
        Remove new line and Discard tokens.
        The parser will sometimes include these in the tree so we need to strip them out here
        """
        return [arg for arg in args if arg != "\n" and arg is not Discard]

    def to_string_dollar(self, value: Any) -> Any:
        """Wrap a string in ${ and }"""
        if isinstance(value, str):
            # if it's already wrapped, pass it unmodified
            if value.startswith("${") and value.endswith("}"):
                return value

            if value.startswith('"') and value.endswith('"'):
                value = str(value)[1:-1]
                return self.process_escape_sequences(value)

            if self.is_type_keyword(value):
                return value

            return f"${{{value}}}"
        return value

    def strip_quotes(self, value: Any) -> Any:
        """Remove quote characters from the start and end of a string"""
        if isinstance(value, str):
            if value.startswith('"') and value.endswith('"'):
                value = str(value)[1:-1]
                return self.process_escape_sequences(value)
        return value

    def process_escape_sequences(self, value: str) -> str:
        """Process HCL escape sequences within quoted template expressions."""
        if isinstance(value, str):
            # normal escape sequences
            value = value.replace("\\n", "\n")
            value = value.replace("\\r", "\r")
            value = value.replace("\\t", "\t")
            value = value.replace('\\"', '"')
            value = value.replace("\\\\", "\\")

            # we will leave Unicode escapes (\uNNNN and \UNNNNNNNN) untouched
            # for now, but this method can be extended in the future
        return value

    def to_tf_inline(self, value: Any) -> str:
        """
        Converts complex objects (e.g.) dicts to an "inline" HCL syntax
        for use in function calls and ${interpolation} strings
        """
        if isinstance(value, dict):
            dict_v = json.dumps(value)
            return reverse_quotes_within_interpolation(dict_v)
        if isinstance(value, list):
            value = [self.to_tf_inline(item) for item in value]
            return f"[{', '.join(value)}]"
        if isinstance(value, bool):
            return "true" if value else "false"
        if isinstance(value, str):
            return value
        if isinstance(value, (int, float)):
            return str(value)
        if value is None:
            return "None"

        raise RuntimeError(f"Invalid type to convert to inline HCL: {type(value)}")

    def identifier(self, value: Any) -> Any:
        # Making identifier a token by capitalizing it to IDENTIFIER
        # seems to return a token object instead of the str
        # So treat it like a regular rule
        # In this case we just convert the whole thing to a string
        return str(value[0])