Skip to content

Commit 8edc8a7

Browse files
some refactoring of reconstructor.py and api.py (#202)
1 parent 9693d42 commit 8edc8a7

File tree

3 files changed

+79
-100
lines changed

3 files changed

+79
-100
lines changed

hcl2/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
transform,
1414
reverse_transform,
1515
writes,
16-
AST,
1716
)
1817

1918
from .builder import Builder

hcl2/api.py

Lines changed: 14 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
"""The API that will be exposed to users of this package"""
22
from typing import TextIO
33

4-
from lark.tree import Tree as AST
5-
from hcl2.parser import parser
4+
from lark.tree import Tree
5+
from hcl2.parser import parser, reconstruction_parser
66
from hcl2.transformer import DictTransformer
7+
from hcl2.reconstructor import HCLReconstructor, HCLReverseTransformer
78

89

910
def load(file: TextIO, with_meta=False) -> dict:
@@ -22,61 +23,45 @@ def loads(text: str, with_meta=False) -> dict:
2223
parameters to the output dict. Default to false.
2324
"""
2425
# append new line as a workaround for https://github.com/lark-parser/lark/issues/237
25-
# Lark doesn't support a EOF token so our grammar can't look for "new line or end of file"
26+
# Lark doesn't support EOF token so our grammar can't look for "new line or end of file"
2627
# This means that all blocks must end in a new line even if the file ends
2728
# Append a new line as a temporary fix
2829
tree = parser().parse(text + "\n")
2930
return DictTransformer(with_meta=with_meta).transform(tree)
3031

3132

32-
def parse(file: TextIO) -> AST:
33+
def parse(file: TextIO) -> Tree:
3334
"""Load HCL2 syntax tree from a file.
3435
:param file: File with hcl2 to be loaded as a dict.
3536
"""
3637
return parses(file.read())
3738

3839

39-
def parses(text: str) -> AST:
40+
def parses(text: str) -> Tree:
4041
"""Load HCL2 syntax tree from a string.
4142
:param text: Text with hcl2 to be loaded as a dict.
4243
"""
43-
# defer this import until this method is called, due to the performance hit
44-
# of rebuilding the grammar without cache
45-
from hcl2.parser import ( # pylint: disable=import-outside-toplevel
46-
reconstruction_parser,
47-
)
48-
4944
return reconstruction_parser().parse(text)
5045

5146

52-
def transform(ast: AST, with_meta=False) -> dict:
47+
def transform(ast: Tree, with_meta=False) -> dict:
5348
"""Convert an HCL2 AST to a dictionary.
5449
:param ast: HCL2 syntax tree, output from `parse` or `parses`
50+
:param with_meta: If set to true then adds `__start_line__` and `__end_line__`
51+
parameters to the output dict. Default to false.
5552
"""
5653
return DictTransformer(with_meta=with_meta).transform(ast)
5754

5855

59-
def reverse_transform(hcl2_dict: dict) -> AST:
56+
def reverse_transform(hcl2_dict: dict) -> Tree:
6057
"""Convert a dictionary to an HCL2 AST.
61-
:param dict: a dictionary produced by `load` or `transform`
58+
:param hcl2_dict: a dictionary produced by `load` or `transform`
6259
"""
63-
# defer this import until this method is called, due to the performance hit
64-
# of rebuilding the grammar without cache
65-
from hcl2.reconstructor import ( # pylint: disable=import-outside-toplevel
66-
hcl2_reverse_transformer,
67-
)
60+
return HCLReverseTransformer().transform(hcl2_dict)
6861

69-
return hcl2_reverse_transformer.transform(hcl2_dict)
7062

71-
72-
def writes(ast: AST) -> str:
63+
def writes(ast: Tree) -> str:
7364
"""Convert an HCL2 syntax tree to a string.
7465
:param ast: HCL2 syntax tree, output from `parse` or `parses`
7566
"""
76-
# defer this import until this method is called, due to the performance hit
77-
# of rebuilding the grammar without cache
78-
from hcl2.reconstructor import ( # pylint: disable=import-outside-toplevel
79-
hcl2_reconstructor,
80-
)
81-
82-
return hcl2_reconstructor.reconstruct(ast)
67+
return HCLReconstructor(reconstruction_parser()).reconstruct(ast)

hcl2/reconstructor.py

Lines changed: 65 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def reverse_quotes_within_interpolation(interp_s: str) -> str:
2222
method removes any erroneous escapes within interpolated segments of a
2323
string.
2424
"""
25-
return re.sub(r"\$\{(.*)\}", lambda m: m.group(0).replace('\\"', '"'), interp_s)
25+
return re.sub(r"\$\{(.*)}", lambda m: m.group(0).replace('\\"', '"'), interp_s)
2626

2727

2828
class WriteTokensAndMetaTransformer(Transformer_InPlace):
@@ -43,6 +43,7 @@ def __init__(
4343
tokens: Dict[str, TerminalDef],
4444
term_subs: Dict[str, Callable[[Symbol], str]],
4545
) -> None:
46+
super().__init__()
4647
self.tokens = tokens
4748
self.term_subs = term_subs
4849

@@ -91,47 +92,45 @@ def __default__(self, data, children, meta):
9192
class HCLReconstructor(Reconstructor):
9293
"""This class converts a Lark.Tree AST back into a string representing the underlying HCL code."""
9394

94-
# these variables track state during reconstruction to enable us to make
95-
# informed decisions about formatting output. They are primarily used
96-
# by the _should_add_space(...) method.
97-
last_char_space = True
98-
last_terminal = None
99-
last_rule = None
100-
deferred_item = None
101-
10295
def __init__(
10396
self,
10497
parser: Lark,
10598
term_subs: Optional[Dict[str, Callable[[Symbol], str]]] = None,
10699
):
107100
Reconstructor.__init__(self, parser, term_subs)
108101

109-
self.write_tokens = WriteTokensAndMetaTransformer(
110-
{token.name: token for token in self.tokens}, term_subs or {}
102+
self.write_tokens: WriteTokensAndMetaTransformer = (
103+
WriteTokensAndMetaTransformer(
104+
{token.name: token for token in self.tokens}, term_subs or {}
105+
)
111106
)
112107

113-
# space around these terminals if they're within for or if statements
114-
FOR_IF_KEYWORDS = [
115-
Terminal("IF"),
116-
Terminal("IN"),
117-
Terminal("FOR"),
118-
Terminal("FOR_EACH"),
119-
Terminal("FOR_OBJECT_ARROW"),
120-
Terminal("COLON"),
121-
]
122-
123-
# space on both sides, in ternaries and binary operators
124-
BINARY_OPS = [
125-
Terminal("QMARK"),
126-
Terminal("COLON"),
127-
Terminal("BINARY_OP"),
128-
]
108+
# these variables track state during reconstruction to enable us to make
109+
# informed decisions about formatting output. They are primarily used
110+
# by the _should_add_space(...) method.
111+
self._last_char_space = True
112+
self._last_terminal: Union[Terminal, None] = None
113+
self._last_rule: Union[Tree, Token, None] = None
114+
self._deferred_item = None
115+
116+
def should_be_wrapped_in_spaces(self, terminal: Terminal) -> bool:
117+
"""Whether given terminal should be wrapped in spaces"""
118+
return terminal.name in {
119+
"IF",
120+
"IN",
121+
"FOR",
122+
"FOR_EACH",
123+
"FOR_OBJECT_ARROW",
124+
"COLON",
125+
"QMARK",
126+
"BINARY_OP",
127+
}
129128

130129
def _is_equals_sign(self, terminal) -> bool:
131130
return (
132-
isinstance(self.last_rule, Token)
133-
and self.last_rule.value in ("attribute", "object_elem")
134-
and self.last_terminal == Terminal("EQ")
131+
isinstance(self._last_rule, Token)
132+
and self._last_rule.value in ("attribute", "object_elem")
133+
and self._last_terminal == Terminal("EQ")
135134
and terminal != Terminal("NL_OR_COMMENT")
136135
)
137136

@@ -155,11 +154,11 @@ def _should_add_space(self, rule, current_terminal):
155154
This should be sufficient to make a spacing decision.
156155
"""
157156
# we don't need to add multiple spaces
158-
if self.last_char_space:
157+
if self._last_char_space:
159158
return False
160159

161160
# we don't add a space at the start of the file
162-
if not self.last_terminal or not self.last_rule:
161+
if not self._last_terminal or not self._last_rule:
163162
return False
164163

165164
if self._is_equals_sign(current_terminal):
@@ -173,20 +172,20 @@ def _should_add_space(self, rule, current_terminal):
173172
"conditional",
174173
"binary_operator",
175174
]
176-
and current_terminal in self.BINARY_OPS
175+
and self.should_be_wrapped_in_spaces(current_terminal)
177176
):
178177
return True
179178

180179
# if we just left a ternary or binary operator, add space around the
181180
# operator unless there's a newline already
182181
if (
183-
isinstance(self.last_rule, Token)
184-
and self.last_rule.value
182+
isinstance(self._last_rule, Token)
183+
and self._last_rule.value
185184
in [
186185
"conditional",
187186
"binary_operator",
188187
]
189-
and self.last_terminal in self.BINARY_OPS
188+
and self.should_be_wrapped_in_spaces(self._last_terminal)
190189
and current_terminal != Terminal("NL_OR_COMMENT")
191190
):
192191
return True
@@ -200,21 +199,21 @@ def _should_add_space(self, rule, current_terminal):
200199
"for_cond",
201200
"for_intro",
202201
]
203-
and current_terminal in self.FOR_IF_KEYWORDS
202+
and self.should_be_wrapped_in_spaces(current_terminal)
204203
):
205204
return True
206205

207206
# if we've just left a for or if statement and find a keyword, add a
208207
# space, unless we have a newline
209208
if (
210-
isinstance(self.last_rule, Token)
211-
and self.last_rule.value
209+
isinstance(self._last_rule, Token)
210+
and self._last_rule.value
212211
in [
213212
"for_object_expr",
214213
"for_cond",
215214
"for_intro",
216215
]
217-
and self.last_terminal in self.FOR_IF_KEYWORDS
216+
and self.should_be_wrapped_in_spaces(self._last_terminal)
218217
and current_terminal != Terminal("NL_OR_COMMENT")
219218
):
220219
return True
@@ -230,7 +229,7 @@ def _should_add_space(self, rule, current_terminal):
230229
# always add space before the closing brace
231230
if current_terminal == Terminal(
232231
"RBRACE"
233-
) and self.last_terminal != Terminal("LBRACE"):
232+
) and self._last_terminal != Terminal("LBRACE"):
234233
return True
235234

236235
# always add space between string literals
@@ -240,20 +239,20 @@ def _should_add_space(self, rule, current_terminal):
240239
# if we just opened a block, add a space, unless the block is empty
241240
# or has a newline
242241
if (
243-
isinstance(self.last_rule, Token)
244-
and self.last_rule.value == "block"
245-
and self.last_terminal == Terminal("LBRACE")
242+
isinstance(self._last_rule, Token)
243+
and self._last_rule.value == "block"
244+
and self._last_terminal == Terminal("LBRACE")
246245
and current_terminal not in [Terminal("RBRACE"), Terminal("NL_OR_COMMENT")]
247246
):
248247
return True
249248

250249
# if we're in a tuple or function arguments (this rule matches commas between items)
251-
if isinstance(self.last_rule, str) and re.match(
252-
r"^__(tuple|arguments)_(star|plus)_.*", self.last_rule
250+
if isinstance(self._last_rule, str) and re.match(
251+
r"^__(tuple|arguments)_(star|plus)_.*", self._last_rule
253252
):
254253

255254
# string literals, decimals, and identifiers should always be
256-
# preceeded by a space if they're following a comma in a tuple or
255+
# preceded by a space if they're following a comma in a tuple or
257256
# function arg
258257
if current_terminal in [
259258
Terminal("STRING_LIT"),
@@ -279,12 +278,12 @@ def _reconstruct(self, tree):
279278
rule, terminal, value = item
280279

281280
# first, handle any deferred items
282-
if self.deferred_item is not None:
281+
if self._deferred_item is not None:
283282
(
284283
deferred_rule,
285284
deferred_terminal,
286285
deferred_value,
287-
) = self.deferred_item
286+
) = self._deferred_item
288287

289288
# if we deferred a comma and the next character ends a
290289
# parenthesis or block, we can throw it out
@@ -298,32 +297,32 @@ def _reconstruct(self, tree):
298297
yield deferred_value
299298

300299
# and do our bookkeeping
301-
self.last_terminal = deferred_terminal
302-
self.last_rule = deferred_rule
300+
self._last_terminal = deferred_terminal
301+
self._last_rule = deferred_rule
303302
if deferred_value and not deferred_value[-1].isspace():
304-
self.last_char_space = False
303+
self._last_char_space = False
305304

306305
# clear the deferred item
307-
self.deferred_item = None
306+
self._deferred_item = None
308307

309308
# potentially add a space before the next token
310309
if self._should_add_space(rule, terminal):
311310
yield " "
312-
self.last_char_space = True
311+
self._last_char_space = True
313312

314-
# potentially defer the item if needs to be
313+
# potentially defer the item if needed
315314
if terminal in [Terminal("COMMA")]:
316-
self.deferred_item = item
315+
self._deferred_item = item
317316
else:
318317
# otherwise print the next token
319318
yield value
320319

321320
# and do our bookkeeping so we can make an informed
322321
# decision about formatting next time
323-
self.last_terminal = terminal
324-
self.last_rule = rule
322+
self._last_terminal = terminal
323+
self._last_rule = rule
325324
if value:
326-
self.last_char_space = value[-1].isspace()
325+
self._last_char_space = value[-1].isspace()
327326

328327
else:
329328
raise RuntimeError(f"Unknown bare token type: {item}")
@@ -375,7 +374,7 @@ def transform(self, hcl_dict: dict) -> Tree:
375374
@staticmethod
376375
def _is_string_wrapped_tf(interp_s: str) -> bool:
377376
"""
378-
Determines whether a string is a complex HCL datastructure
377+
Determines whether a string is a complex HCL data structure
379378
wrapped in ${ interpolation } characters.
380379
"""
381380
if not interp_s.startswith("${") or not interp_s.endswith("}"):
@@ -549,19 +548,19 @@ def _transform_value_to_expr_term(self, value, level) -> Union[Token, Tree]:
549548
# for dicts, recursively turn the child k/v pairs into object elements
550549
# and store within an object
551550
if isinstance(value, dict):
552-
elems = []
551+
elements = []
553552

554553
# if the object has elements, put it on a newline
555554
if len(value) > 0:
556-
elems.append(self._newline(level + 1))
555+
elements.append(self._newline(level + 1))
557556

558557
# iterate through the items and add them to the object
559558
for i, (k, dict_v) in enumerate(value.items()):
560559
if k in ["__start_line__", "__end_line__"]:
561560
continue
562561

563562
value_expr_term = self._transform_value_to_expr_term(dict_v, level + 1)
564-
elems.append(
563+
elements.append(
565564
Tree(
566565
Token("RULE", "object_elem"),
567566
[
@@ -577,11 +576,11 @@ def _transform_value_to_expr_term(self, value, level) -> Union[Token, Tree]:
577576

578577
# add indentation appropriately
579578
if i < len(value) - 1:
580-
elems.append(self._newline(level + 1))
579+
elements.append(self._newline(level + 1))
581580
else:
582-
elems.append(self._newline(level))
581+
elements.append(self._newline(level))
583582
return Tree(
584-
Token("RULE", "expr_term"), [Tree(Token("RULE", "object"), elems)]
583+
Token("RULE", "expr_term"), [Tree(Token("RULE", "object"), elements)]
585584
)
586585

587586
# treat booleans appropriately
@@ -641,7 +640,3 @@ def _transform_value_to_expr_term(self, value, level) -> Union[Token, Tree]:
641640

642641
# otherwise, we don't know the type
643642
raise RuntimeError(f"Unknown type to transform {type(value)}")
644-
645-
646-
hcl2_reconstructor = HCLReconstructor(reconstruction_parser())
647-
hcl2_reverse_transformer = HCLReverseTransformer()

0 commit comments

Comments
 (0)