Skip to content

Commit c3e4baf

Browse files
authored
formatter: support for a few more forms and fix some bugs, update tree-sitter grammar (#3317)
1 parent 85039fe commit c3e4baf

File tree

13 files changed

+314
-63
lines changed

13 files changed

+314
-63
lines changed

common/formatter/formatter.cpp

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,8 @@ void apply_formatting_config(
6767
//
6868
// Otherwise, we always default to a hang.
6969
//
70-
// NOTE - any modifications here to child elements could be superseeded later in the recursion
71-
// in order to maintain your sanity, only modify things here that _arent_ touched by default
70+
// NOTE - any modifications here to child elements could be superseeded later in the recursion!
71+
// In order to maintain your sanity, only modify things here that _arent_ touched by default
7272
// configurations. These are explicitly prepended with `parent_mutable_`
7373
if (!predefined_config) {
7474
if (curr_node.metadata.is_top_level) {
@@ -104,6 +104,8 @@ void apply_formatting_config(
104104
}
105105
// If we are hanging, lets determine the indentation width since it is based on the form itself
106106
if (curr_node.formatting_config.hang_forms) {
107+
// TODO - this isn't being calculated for a pre-defined config
108+
// TODO - another idea is to do this during consolidation
107109
curr_node.formatting_config.indentation_width = hang_indentation_width(curr_node);
108110
}
109111
// iterate through the refs
@@ -123,6 +125,7 @@ void apply_formatting_config(
123125
}
124126
}
125127

128+
// TODO - this doesn't account for paren's width contribution!
126129
int get_total_form_inlined_width(const FormatterTreeNode& curr_node) {
127130
if (curr_node.token) {
128131
return curr_node.token->length();
@@ -196,12 +199,15 @@ std::vector<std::string> apply_formatting(const FormatterTreeNode& curr_node,
196199
using namespace formatter_rules;
197200
if (!curr_node.token && curr_node.refs.empty()) {
198201
// special case to handle an empty list
202+
if (curr_node.node_prefix) {
203+
return {fmt::format("{}()", curr_node.node_prefix.value())};
204+
}
199205
return {"()"};
200206
}
201207

202208
// If its a token, just print the token and move on
203209
if (curr_node.token) {
204-
return {curr_node.token.value()};
210+
return {curr_node.token_str()};
205211
}
206212

207213
bool inline_form = can_node_be_inlined(curr_node, cursor_pos);
@@ -220,13 +226,19 @@ std::vector<std::string> apply_formatting(const FormatterTreeNode& curr_node,
220226
// Add new line entry
221227
if (ref.token) {
222228
// Cleanup block-comments
223-
std::string val = ref.token.value();
229+
std::string val = ref.token_str();
224230
if (ref.metadata.node_type == "block_comment") {
225231
// TODO - change this sanitization to return a list of lines instead of a single new-lined
226232
// line
227-
val = comments::format_block_comment(ref.token.value());
233+
val = comments::format_block_comment(ref.token_str());
228234
}
229235
form_lines.push_back(val);
236+
if (!curr_node.metadata.is_top_level && i == curr_node.refs.size() - 1 &&
237+
(ref.metadata.is_comment)) {
238+
// if there's an inline comment at the end of a form, we have to force the paren to the next
239+
// line and do a new-line paren this is ugly, but we have no choice!
240+
form_lines.push_back("");
241+
}
230242
} else {
231243
// If it's not a token, we have to recursively build up the form
232244
// TODO - add the cursor_pos here
@@ -250,6 +262,9 @@ std::vector<std::string> apply_formatting(const FormatterTreeNode& curr_node,
250262
if ((next_ref.metadata.node_type == "comment" && next_ref.metadata.is_inline) ||
251263
(curr_node.formatting_config.has_constant_pairs &&
252264
constant_pairs::is_element_second_in_constant_pair(curr_node, next_ref, i + 1))) {
265+
// TODO
266+
// has issues with not consolidating first lines, this should probably just be moved to
267+
// outside this loop for simplicity, do it later
253268
if (next_ref.token) {
254269
form_lines.at(form_lines.size() - 1) += fmt::format(" {}", next_ref.token.value());
255270
i++;
@@ -260,6 +275,10 @@ std::vector<std::string> apply_formatting(const FormatterTreeNode& curr_node,
260275
}
261276
i++;
262277
}
278+
if (!curr_node.metadata.is_top_level && next_ref.metadata.node_type == "comment" &&
279+
(i + 1) == (int)curr_node.refs.size()) {
280+
form_lines.push_back("");
281+
}
263282
}
264283
}
265284
// If we are at the top level, potential separate with a new line
@@ -288,6 +307,9 @@ std::vector<std::string> apply_formatting(const FormatterTreeNode& curr_node,
288307
// Apply necessary indentation to each line and add parens
289308
if (!curr_node.metadata.is_top_level) {
290309
std::string form_surround_start = "(";
310+
if (curr_node.node_prefix) {
311+
form_surround_start = fmt::format("{}(", curr_node.node_prefix.value());
312+
}
291313
std::string form_surround_end = ")";
292314
form_lines[0] = fmt::format("{}{}", form_surround_start, form_lines[0]);
293315
form_lines[form_lines.size() - 1] =

common/formatter/formatter_tree.cpp

Lines changed: 35 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -78,16 +78,24 @@ FormatterTree::FormatterTree(const std::string& source, const TSNode& root_node)
7878
construct_formatter_tree_recursive(source, root_node, root);
7979
}
8080

81+
const std::unordered_map<std::string, std::vector<std::string>> node_type_ignorable_contents = {
82+
{"list_lit", {"(", ")"}},
83+
{"quoting_lit", {"'"}},
84+
{"unquoting_lit", {","}},
85+
{"quasi_quoting_lit", {"`"}}};
86+
8187
// TODO make an imperative version eventually
8288
void FormatterTree::construct_formatter_tree_recursive(const std::string& source,
8389
TSNode curr_node,
84-
FormatterTreeNode& tree_node) {
90+
FormatterTreeNode& tree_node,
91+
std::optional<std::string> node_prefix) {
8592
if (ts_node_child_count(curr_node) == 0) {
8693
tree_node.refs.push_back(FormatterTreeNode(source, curr_node));
8794
return;
8895
}
8996
const std::string curr_node_type = ts_node_type(curr_node);
9097
FormatterTreeNode list_node;
98+
std::optional<std::string> next_node_prefix;
9199
if (curr_node_type == "list_lit") {
92100
list_node = FormatterTreeNode();
93101
} else if (curr_node_type == "str_lit") {
@@ -97,22 +105,40 @@ void FormatterTree::construct_formatter_tree_recursive(const std::string& source
97105
tree_node.refs.push_back(FormatterTreeNode(source, curr_node));
98106
return;
99107
} else if (curr_node_type == "quoting_lit") {
100-
// same story for quoted symbols
101-
// TODO - expect to have to add more here
102-
tree_node.refs.push_back(FormatterTreeNode(source, curr_node));
103-
return;
108+
next_node_prefix = "'";
109+
} else if (curr_node_type == "unquoting_lit") {
110+
next_node_prefix = ",";
111+
} else if (curr_node_type == "quasi_quoting_lit") {
112+
next_node_prefix = "`";
113+
}
114+
std::vector<std::string> skippable_nodes = {};
115+
if (node_type_ignorable_contents.find(curr_node_type) != node_type_ignorable_contents.end()) {
116+
skippable_nodes = node_type_ignorable_contents.at(curr_node_type);
104117
}
105118
for (size_t i = 0; i < ts_node_child_count(curr_node); i++) {
106119
const auto child_node = ts_node_child(curr_node, i);
107-
// We skip parens
108120
const auto contents = get_source_code(source, child_node);
109-
if (contents == "(" || contents == ")") {
121+
bool skip_node = false;
122+
for (const auto& skippable_content : skippable_nodes) {
123+
if (skippable_content == contents) {
124+
skip_node = true;
125+
break;
126+
}
127+
}
128+
if (skip_node) {
110129
continue;
111130
}
112131
if (curr_node_type == "list_lit") {
113-
construct_formatter_tree_recursive(source, child_node, list_node);
132+
construct_formatter_tree_recursive(source, child_node, list_node, next_node_prefix);
133+
if (node_prefix) {
134+
list_node.node_prefix = node_prefix;
135+
}
114136
} else {
115-
construct_formatter_tree_recursive(source, child_node, tree_node);
137+
construct_formatter_tree_recursive(source, child_node, tree_node, next_node_prefix);
138+
// TODO - im not sure if this is correct
139+
if (node_prefix && !tree_node.refs.empty()) {
140+
tree_node.refs.at(tree_node.refs.size() - 1).node_prefix = node_prefix;
141+
}
116142
}
117143
}
118144
if (curr_node_type == "list_lit") {

common/formatter/formatter_tree.h

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,9 @@
1818
// we really care about is:
1919
// - getting all the text tokens for the source code
2020
// - having them in a proper, nested format
21-
// The treesitter format is complicated and highly nested, leading to some very hard to understand
22-
// code. So my solution is a 2-pass format.
21+
//
22+
// TLDR - The treesitter format is complicated and highly nested, leading to some very hard to
23+
// understand code. So my solution is atleast a 2-pass format.
2324
//
2425
// Pass 1 - convert the AST into a simplified FormatterTree
2526
// Pass 2 - use the simplified tree to output the final code
@@ -37,8 +38,9 @@ class FormatterTreeNode {
3738
std::vector<FormatterTreeNode> refs;
3839
Metadata metadata;
3940
// The token is optional because list nodes do not contain a token, they just contain a bunch of
40-
// eventually token node refs
41+
// eventually-containing token node refs
4142
std::optional<std::string> token;
43+
std::optional<std::string> node_prefix;
4244

4345
formatter_rules::config::FormFormattingConfig formatting_config;
4446

@@ -47,6 +49,15 @@ class FormatterTreeNode {
4749
FormatterTreeNode(const Metadata& _metadata) : metadata(_metadata){};
4850

4951
bool is_list() const { return !token.has_value(); }
52+
std::string token_str() const {
53+
if (node_prefix && token) {
54+
return node_prefix.value() + token.value();
55+
}
56+
if (token) {
57+
return token.value();
58+
}
59+
return "";
60+
}
5061
};
5162

5263
// A FormatterTree has a very simple and crude tree structure where:
@@ -62,5 +73,6 @@ class FormatterTree {
6273
private:
6374
void construct_formatter_tree_recursive(const std::string& source,
6475
TSNode curr_node,
65-
FormatterTreeNode& tree_node);
76+
FormatterTreeNode& tree_node,
77+
std::optional<std::string> node_prefix = {});
6678
};

common/formatter/rules/rule_config.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,13 @@ namespace formatter_rules {
66
namespace config {
77

88
// TODO - this could be greatly simplified with C++20's designated initialization
9+
FormFormattingConfig new_permissive_flow_rule() {
10+
FormFormattingConfig cfg;
11+
cfg.hang_forms = false;
12+
cfg.combine_first_two_lines = true;
13+
return cfg;
14+
}
15+
916
FormFormattingConfig new_flow_rule(int start_index) {
1017
FormFormattingConfig cfg;
1118
cfg.hang_forms = false;
@@ -80,6 +87,11 @@ const std::unordered_map<std::string, FormFormattingConfig> opengoal_form_config
8087
{"defmethod", new_flow_rule(3)},
8188
{"deftype", new_flow_rule_prevent_inlining_indexes(3, {3, 4, 5})},
8289
{"defun", new_flow_rule(3)},
90+
{"defbehavior", new_flow_rule(4)},
91+
{"if", new_permissive_flow_rule()},
92+
{"define", new_permissive_flow_rule()},
93+
{"define-extern", new_permissive_flow_rule()},
94+
{"defmacro", new_flow_rule(3)},
8395
{"dotimes", new_flow_rule(2)},
8496
{"let", new_binding_rule()},
8597
{"when", new_flow_rule(2)},

test/common/formatter/corpus/comments.test.gc

Lines changed: 48 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ Block Comment - Allow Annotations
105105
(println "test")
106106

107107
===
108-
Block Comment - In Form
108+
Block Comment - In Form - TODO Improve
109109
===
110110

111111
(println
@@ -122,3 +122,50 @@ Block Comment - In Form
122122
test
123123
|#
124124
"test")
125+
126+
===
127+
At the end of a form
128+
===
129+
130+
(println
131+
"hello world"
132+
;; this is a comment, don't forget the paren!
133+
)
134+
135+
---
136+
137+
(println "hello world"
138+
;; this is a comment, don't forget the paren!
139+
)
140+
141+
===
142+
Block at the end of a form - TODO Improve
143+
===
144+
145+
(println
146+
"hello world"
147+
#| wow look at that block comment |#
148+
)
149+
150+
---
151+
152+
(println "hello world"
153+
#|
154+
wow look at that block comment
155+
|#
156+
)
157+
158+
===
159+
Inline at the end of a form - TODO-A handle hanging in this instance better
160+
===
161+
162+
(println
163+
"hello world" ;; this is a comment
164+
)
165+
166+
---
167+
168+
(println
169+
"hello world" ;; this is a comment
170+
)
171+
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
===
2+
Inlinable If
3+
===
4+
5+
(if arg1
6+
arg1
7+
(symbol->string (-> arg0 type symbol))
8+
)
9+
10+
---
11+
12+
(if arg1 arg1 (symbol->string (-> arg0 type symbol)))
13+
14+
===
15+
Non-Inlinable If
16+
===
17+
18+
(if arg1
19+
(symbol->string (-> arg0 type symbol) (-> arg0 type symbol) (-> arg0 type symbol) (-> arg0 type symbol))
20+
(symbol->string (-> arg0 type symbol) (-> arg0 type symbol) (-> arg0 type symbol) (-> arg0 type symbol))
21+
)
22+
23+
---
24+
25+
(if arg1
26+
(symbol->string (-> arg0 type symbol) (-> arg0 type symbol) (-> arg0 type symbol) (-> arg0 type symbol))
27+
(symbol->string (-> arg0 type symbol) (-> arg0 type symbol) (-> arg0 type symbol) (-> arg0 type symbol)))
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
===
2+
Define with docstring - inlinable
3+
===
4+
5+
(define enable-level-text-file-loading "Disables [[*level-text-file-load-flag*]]" (function none))
6+
7+
---
8+
9+
(define enable-level-text-file-loading "Disables [[*level-text-file-load-flag*]]" (function none))
10+
11+
===
12+
Define with docstring - not inlinable
13+
===
14+
15+
(define enable-level-text-file-loading "Disables [[*level-text-file-load-flag*]] lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar" (function none))
16+
17+
---
18+
19+
(define enable-level-text-file-loading
20+
"Disables [[*level-text-file-load-flag*]] lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar lorem ipsum dolar"
21+
(function none))

0 commit comments

Comments
 (0)