Skip to content

Commit facc9f4

Browse files
committed
Refactor wat-fmt by removing outdated comments and improving inline signature handling in format_node
1 parent 22ce006 commit facc9f4

File tree

1 file changed

+13
-53
lines changed

1 file changed

+13
-53
lines changed

wat-fmt/src/lib.rs

Lines changed: 13 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,12 @@ extern crate alloc;
33
use alloc::string::String;
44
use alloc::vec::Vec;
55

6-
/// A simple token type.
76
enum Token {
87
LParen,
98
RParen,
109
Atom(String),
1110
}
1211

13-
/// Given an input string slice, break it into tokens.
14-
/// String–literals (delimited by quotes) are kept as a single atom.
1512
fn tokenize(input: &str) -> Vec<Token> {
1613
let mut tokens = Vec::new();
1714
let mut chars = input.chars().peekable();
@@ -24,7 +21,6 @@ fn tokenize(input: &str) -> Vec<Token> {
2421
} else if c == ')' {
2522
tokens.push(Token::RParen);
2623
} else if c == '"' {
27-
// Read a string literal including the quotes.
2824
let mut s = String::new();
2925
s.push('"');
3026
while let Some(&next) = chars.peek() {
@@ -36,7 +32,6 @@ fn tokenize(input: &str) -> Vec<Token> {
3632
}
3733
tokens.push(Token::Atom(s));
3834
} else {
39-
// Read an atom until whitespace or a parenthesis is encountered.
4035
let mut s = String::new();
4136
s.push(c);
4237
while let Some(&next) = chars.peek() {
@@ -53,14 +48,11 @@ fn tokenize(input: &str) -> Vec<Token> {
5348
tokens
5449
}
5550

56-
/// An AST node representing either an atom or a list of nodes.
5751
enum Node {
5852
Atom(String),
5953
List(Vec<Node>),
6054
}
6155

62-
/// A recursive parser that builds a (possibly malformed) AST.
63-
/// It is tolerant – if extra closing parentheses occur it simply produces an Atom.
6456
fn parse_node(tokens: &[Token], mut i: usize) -> (Node, usize) {
6557
if i >= tokens.len() {
6658
return (Node::Atom(String::new()), i);
@@ -84,15 +76,11 @@ fn parse_node(tokens: &[Token], mut i: usize) -> (Node, usize) {
8476
}
8577
(Node::List(children), i)
8678
}
87-
Token::RParen => {
88-
// Stray closing parenthesis: output it as an atom.
89-
(Node::Atom(String::from(")")), i + 1)
90-
}
79+
Token::RParen => (Node::Atom(String::from(")")), i + 1),
9180
Token::Atom(ref s) => (Node::Atom(s.clone()), i + 1),
9281
}
9382
}
9483

95-
/// Parse all tokens into a vector of nodes.
9684
fn parse_all(tokens: &[Token]) -> Vec<Node> {
9785
let mut nodes = Vec::new();
9886
let mut i = 0;
@@ -104,7 +92,6 @@ fn parse_all(tokens: &[Token]) -> Vec<Node> {
10492
nodes
10593
}
10694

107-
/// Returns a string with `indent` levels (2 spaces per level).
10895
fn indent_str(indent: usize) -> String {
10996
let mut s = String::new();
11097
for _ in 0..indent {
@@ -113,20 +100,17 @@ fn indent_str(indent: usize) -> String {
113100
s
114101
}
115102

116-
/// Returns true if the given node is “flat” (an atom or a list whose children are all flat).
117103
fn is_flat_node(node: &Node) -> bool {
118104
match node {
119105
Node::Atom(_) => true,
120106
Node::List(children) => children.iter().all(is_flat_node),
121107
}
122108
}
123109

124-
/// Returns true if every node in the slice is flat.
125110
fn is_flat_list(nodes: &[Node]) -> bool {
126111
nodes.iter().all(is_flat_node)
127112
}
128113

129-
/// Format a node “inline” (without inserting any newlines).
130114
fn format_node_inline(node: &Node) -> String {
131115
match node {
132116
Node::Atom(s) => s.clone(),
@@ -147,7 +131,6 @@ fn format_node_inline(node: &Node) -> String {
147131
}
148132
}
149133

150-
/// If a node is flat then return its inline formatting.
151134
#[allow(dead_code)]
152135
fn format_inline(node: &Node) -> Option<String> {
153136
if is_flat_node(node) {
@@ -157,8 +140,6 @@ fn format_inline(node: &Node) -> Option<String> {
157140
}
158141
}
159142

160-
/// Returns true if the first atom of a list node is one of these keywords.
161-
/// Such nodes (like `(export ...)`, `(param ...)`, `(result ...)`) are inlined when part of a func signature.
162143
fn is_inline_signature(node: &Node) -> bool {
163144
if let Node::List(children) = node {
164145
if let Some(Node::Atom(ref keyword)) = children.first() {
@@ -168,9 +149,6 @@ fn is_inline_signature(node: &Node) -> bool {
168149
false
169150
}
170151

171-
/// A simple heuristic: returns true if the given token is an opcode.
172-
/// In our case, an opcode is an atom that does not start with '$',
173-
/// is not a numeric literal, and is not a string literal.
174152
fn is_opcode(token: &str) -> bool {
175153
if token.starts_with('$') {
176154
return false;
@@ -180,7 +158,6 @@ fn is_opcode(token: &str) -> bool {
180158
}
181159
let mut chars = token.chars();
182160
if let Some(first) = chars.next() {
183-
// Check for a numeric literal: allow an optional sign then digits.
184161
if (first == '-' || first == '+') && chars.clone().all(|c| c.is_ascii_digit()) {
185162
return false;
186163
}
@@ -191,16 +168,13 @@ fn is_opcode(token: &str) -> bool {
191168
true
192169
}
193170

194-
/// Helper that groups consecutive instruction tokens (atoms) in a list.
195-
/// For each opcode atom, it appends any following atoms that are not opcodes.
196171
fn format_instructions(nodes: &[Node], indent: usize) -> String {
197172
let mut result = String::new();
198173
let mut i = 0;
199174
while i < nodes.len() {
200175
match &nodes[i] {
201176
Node::Atom(token) => {
202177
if is_opcode(token) {
203-
// Start a new instruction line.
204178
let mut line = token.clone();
205179
i += 1;
206180
while i < nodes.len() {
@@ -220,15 +194,13 @@ fn format_instructions(nodes: &[Node], indent: usize) -> String {
220194
result.push_str(&indent_str(indent));
221195
result.push_str(&line);
222196
} else {
223-
// Non-opcode atom printed on its own line.
224197
result.push('\n');
225198
result.push_str(&indent_str(indent));
226199
result.push_str(token);
227200
i += 1;
228201
}
229202
}
230203
Node::List(_) => {
231-
// For a nested list, simply delegate to format_node.
232204
result.push('\n');
233205
result.push_str(&indent_str(indent));
234206
result.push_str(&format_node(&nodes[i], indent));
@@ -239,14 +211,6 @@ fn format_instructions(nodes: &[Node], indent: usize) -> String {
239211
result
240212
}
241213

242-
/// Recursively format a node with the given indent level.
243-
///
244-
/// Special rules:
245-
/// - A `(module …)` prints its children on new indented lines.
246-
/// - A `(func …)` prints its signature groups inline and then uses `format_instructions`
247-
/// for the remaining function body.
248-
/// - A `(forall …)` is handled similarly, grouping its children (after the first "forall" atom)
249-
/// as instructions.
250214
fn format_node(node: &Node, indent: usize) -> String {
251215
match node {
252216
Node::Atom(s) => s.clone(),
@@ -271,28 +235,29 @@ fn format_node(node: &Node, indent: usize) -> String {
271235
} else if ident == "func" {
272236
let mut s = String::new();
273237
s.push('(');
274-
// Always inline the first element ("func")
275238
s.push_str(&format_node_inline(&children[0]));
276239
let mut i = 1;
277-
// Inline any signature tokens like (export ...), (param ...), (result ...).
278240
while i < children.len() {
279-
if is_inline_signature(&children[i]) {
280-
s.push(' ');
281-
s.push_str(&format_node_inline(&children[i]));
282-
i += 1;
283-
} else {
284-
break;
241+
match &children[i] {
242+
Node::Atom(_) => {
243+
s.push(' ');
244+
s.push_str(&format_node_inline(&children[i]));
245+
i += 1;
246+
}
247+
Node::List(_) if is_inline_signature(&children[i]) => {
248+
s.push(' ');
249+
s.push_str(&format_node_inline(&children[i]));
250+
i += 1;
251+
}
252+
_ => break,
285253
}
286254
}
287-
// Process the remaining children as the function body,
288-
// grouping consecutive instruction tokens.
289255
s.push_str(&format_instructions(&children[i..], indent + 1));
290256
s.push('\n');
291257
s.push_str(&indent_str(indent));
292258
s.push(')');
293259
return s;
294260
} else if ["forall", "exists", "assume", "unique"].contains(&ident.as_str()) {
295-
// For a forall block, print the first atom inline then group its remaining children.
296261
let mut s = String::new();
297262
s.push('(');
298263
s.push_str(ident);
@@ -303,7 +268,6 @@ fn format_node(node: &Node, indent: usize) -> String {
303268
return s;
304269
}
305270
}
306-
// For any other list: if it is flat, print inline; otherwise, one element per line.
307271
if is_flat_list(children) {
308272
format_node_inline(node)
309273
} else {
@@ -329,10 +293,6 @@ fn format_node(node: &Node, indent: usize) -> String {
329293
}
330294
}
331295

332-
/// The public function to format unformatted WAT code.
333-
///
334-
/// This function receives unformatted WAT as an `&str` and returns a formatted `String`.
335-
/// Even if the input is malformed, the formatter does its best.
336296
pub fn format(input: &str) -> String {
337297
let tokens = tokenize(input);
338298
let nodes = parse_all(&tokens);

0 commit comments

Comments
 (0)